@hebo-ai/gateway 0.10.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -926,6 +926,42 @@ These attributes appear on the active span and on all metric instruments (reques
926
926
  > To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
927
927
  > For `/chat/completions` and `/embeddings`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
928
928
 
929
+ #### Per-Request Trace Control
930
+
931
+ You can override the global `telemetry.signals.gen_ai` level on a per-request basis using the `trace` body parameter. This is useful for selectively enabling detailed traces on specific requests without changing the gateway-wide configuration.
932
+
933
+ The `trace` parameter is accepted on all endpoints (`/chat/completions`, `/embeddings`, `/messages`, `/responses`):
934
+
935
+ ```json
936
+ {
937
+ "model": "openai/gpt-oss-20b",
938
+ "messages": [{ "role": "user", "content": "Hello" }],
939
+ "trace": "full"
940
+ }
941
+ ```
942
+
943
+ Accepted values:
944
+
945
+ - `false` — disables tracing for this request (equivalent to `"off"`)
946
+ - `true` — uses the global default (same as omitting the parameter)
947
+ - `"off"` | `"required"` | `"recommended"` | `"full"` — sets the signal level for this request
948
+
949
+ The resolution order is: **hook-set `ctx.trace`** > **body `trace` parameter** > **`cfg.telemetry.signals.gen_ai`**. This means hooks can always override the body parameter by setting `ctx.trace` directly:
950
+
951
+ ```ts
952
+ hooks: {
953
+ before: (ctx) => {
954
+ // Force full tracing for a specific user
955
+ if (ctx.state.userId === "debug-user") {
956
+ ctx.trace = "full";
957
+ }
958
+ },
959
+ }
960
+ ```
961
+
962
+ > [!NOTE]
963
+ > The `trace` parameter only affects span attributes and metrics signal level — it does not control whether tracing is enabled globally. The `telemetry.enabled` config field must still be `true` for any telemetry to be emitted.
964
+
929
965
  #### Metrics
930
966
 
931
967
  The Gateway also emits `gen_ai` metrics:
@@ -58,18 +58,18 @@ export const chatCompletions = (config) => {
58
58
  ctx.resolvedProviderId = languageModel.provider;
59
59
  logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
60
60
  addSpanEvent("hebo.provider.resolved");
61
- const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
62
- const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
61
+ ctx.trace ??= ctx.body.trace ?? cfg.telemetry?.signals?.gen_ai;
62
+ const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, ctx.trace);
63
63
  setSpanAttributes(genAiGeneralAttrs);
64
64
  // Convert inputs to AI SDK call options.
65
- const { model: _model, stream, ...inputs } = ctx.body;
65
+ const { model: _model, stream, trace: _trace, ...inputs } = ctx.body;
66
66
  const textOptions = convertToTextCallOptions(inputs);
67
67
  logger.trace({
68
68
  requestId: ctx.requestId,
69
69
  options: textOptions,
70
70
  }, "[chat] AI SDK options");
71
71
  addSpanEvent("hebo.options.prepared");
72
- setSpanAttributes(getChatRequestAttributes(ctx.body, genAiSignalLevel));
72
+ setSpanAttributes(getChatRequestAttributes(ctx.body, ctx.trace));
73
73
  // Build middleware chain (model -> forward params -> provider).
74
74
  const languageModelWithMiddleware = wrapLanguageModel({
75
75
  model: languageModel,
@@ -93,7 +93,7 @@ export const chatCompletions = (config) => {
93
93
  onChunk: () => {
94
94
  if (!ttft) {
95
95
  ttft = performance.now() - start;
96
- recordTimeToFirstToken(ttft, genAiGeneralAttrs, genAiSignalLevel);
96
+ recordTimeToFirstToken(ttft, genAiGeneralAttrs, ctx.trace);
97
97
  }
98
98
  },
99
99
  onFinish: (res) => {
@@ -101,10 +101,10 @@ export const chatCompletions = (config) => {
101
101
  const streamResult = toChatCompletions(res, ctx.resolvedModelId);
102
102
  logger.trace({ requestId: ctx.requestId, result: streamResult }, "[chat] ChatCompletions");
103
103
  addSpanEvent("hebo.result.transformed");
104
- const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
104
+ const genAiResponseAttrs = getChatResponseAttributes(streamResult, ctx.trace);
105
105
  setSpanAttributes(genAiResponseAttrs);
106
- recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
107
- recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
106
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
107
+ recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
108
108
  },
109
109
  experimental_include: {
110
110
  requestBody: false,
@@ -133,19 +133,19 @@ export const chatCompletions = (config) => {
133
133
  });
134
134
  logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
135
135
  addSpanEvent("hebo.ai-sdk.completed");
136
- recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
136
+ recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, ctx.trace);
137
137
  // Transform result.
138
138
  ctx.result = toChatCompletions(result, ctx.resolvedModelId);
139
139
  logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] ChatCompletions");
140
140
  addSpanEvent("hebo.result.transformed");
141
- const genAiResponseAttrs = getChatResponseAttributes(ctx.result, genAiSignalLevel);
141
+ const genAiResponseAttrs = getChatResponseAttributes(ctx.result, ctx.trace);
142
142
  setSpanAttributes(genAiResponseAttrs);
143
- recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
143
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
144
144
  if (hooks?.after) {
145
145
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
146
146
  addSpanEvent("hebo.hooks.after.completed");
147
147
  }
148
- recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
148
+ recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
149
149
  return ctx.result;
150
150
  };
151
151
  return { handler: winterCgHandler(handler, config) };
@@ -1041,6 +1041,12 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
1041
1041
  extra_body: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
1042
1042
  model: z.ZodString;
1043
1043
  stream: z.ZodOptional<z.ZodBoolean>;
1044
+ trace: z.ZodOptional<z.ZodUnion<readonly [z.ZodPipe<z.ZodBoolean, z.ZodTransform<"off" | undefined, boolean>>, z.ZodEnum<{
1045
+ off: "off";
1046
+ required: "required";
1047
+ recommended: "recommended";
1048
+ full: "full";
1049
+ }>]>>;
1044
1050
  }, z.core.$loose>;
1045
1051
  export type ChatCompletionsBody = z.infer<typeof ChatCompletionsBodySchema>;
1046
1052
  export declare const ChatCompletionsFinishReasonSchema: z.ZodEnum<{
@@ -1,5 +1,5 @@
1
1
  import * as z from "zod";
2
- import { CacheControlSchema as ChatCompletionsCacheControlSchema, ReasoningEffortSchema as ChatCompletionsReasoningEffortSchema, ReasoningConfigSchema as ChatCompletionsReasoningConfigSchema, ServiceTierSchema as ChatCompletionsServiceTierSchema, ProviderMetadataSchema as ChatCompletionsProviderMetadataSchema, ContentPartAudioSchema as ChatCompletionsContentPartAudioSchema, } from "../shared/schema";
2
+ import { CacheControlSchema as ChatCompletionsCacheControlSchema, ReasoningEffortSchema as ChatCompletionsReasoningEffortSchema, ReasoningConfigSchema as ChatCompletionsReasoningConfigSchema, ServiceTierSchema as ChatCompletionsServiceTierSchema, ProviderMetadataSchema as ChatCompletionsProviderMetadataSchema, ContentPartAudioSchema as ChatCompletionsContentPartAudioSchema, TraceSchema, } from "../shared/schema";
3
3
  export { ChatCompletionsCacheControlSchema, ChatCompletionsReasoningEffortSchema, ChatCompletionsReasoningConfigSchema, ChatCompletionsServiceTierSchema, ChatCompletionsProviderMetadataSchema, ChatCompletionsContentPartAudioSchema, };
4
4
  export const ChatCompletionsContentPartTextSchema = z.object({
5
5
  type: z.literal("text"),
@@ -181,6 +181,7 @@ const ChatCompletionsInputsSchema = z.object({
181
181
  export const ChatCompletionsBodySchema = z.looseObject({
182
182
  model: z.string(),
183
183
  stream: z.boolean().optional(),
184
+ trace: TraceSchema,
184
185
  ...ChatCompletionsInputsSchema.shape,
185
186
  });
186
187
  export const ChatCompletionsFinishReasonSchema = z.enum([
@@ -403,6 +403,7 @@ export declare const ConversationItemSchema: z.ZodIntersection<z.ZodObject<{
403
403
  incomplete: "incomplete";
404
404
  }>>;
405
405
  extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
406
+ signature: z.ZodOptional<z.ZodString>;
406
407
  }, z.core.$strip>], "type">>;
407
408
  export type ConversationItem = z.infer<typeof ConversationItemSchema>;
408
409
  export declare const ConversationSchema: z.ZodObject<{
@@ -819,6 +820,7 @@ export declare const ConversationCreateParamsSchema: z.ZodObject<{
819
820
  incomplete: "incomplete";
820
821
  }>>;
821
822
  extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
823
+ signature: z.ZodOptional<z.ZodString>;
822
824
  }, z.core.$strip>], "type">>>;
823
825
  metadata: z.ZodOptional<z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodString>>>>;
824
826
  }, z.core.$strip>;
@@ -1225,6 +1227,7 @@ export declare const ConversationItemsAddBodySchema: z.ZodObject<{
1225
1227
  incomplete: "incomplete";
1226
1228
  }>>;
1227
1229
  extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
1230
+ signature: z.ZodOptional<z.ZodString>;
1228
1231
  }, z.core.$strip>], "type">>;
1229
1232
  }, z.core.$strip>;
1230
1233
  export type ConversationItemsAddBody = z.infer<typeof ConversationItemsAddBodySchema>;
@@ -1631,6 +1634,7 @@ export declare const ConversationItemListSchema: z.ZodObject<{
1631
1634
  incomplete: "incomplete";
1632
1635
  }>>;
1633
1636
  extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
1637
+ signature: z.ZodOptional<z.ZodString>;
1634
1638
  }, z.core.$strip>], "type">>>;
1635
1639
  has_more: z.ZodBoolean;
1636
1640
  first_id: z.ZodOptional<z.ZodString>;
@@ -57,15 +57,15 @@ export const embeddings = (config) => {
57
57
  ctx.resolvedProviderId = embeddingModel.provider;
58
58
  logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
59
59
  addSpanEvent("hebo.provider.resolved");
60
- const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
61
- const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
60
+ ctx.trace ??= ctx.body.trace ?? cfg.telemetry?.signals?.gen_ai;
61
+ const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, ctx.trace);
62
62
  setSpanAttributes(genAiGeneralAttrs);
63
63
  // Convert inputs to AI SDK call options.
64
- const { model: _model, ...inputs } = ctx.body;
64
+ const { model: _model, trace: _trace, ...inputs } = ctx.body;
65
65
  const embedOptions = convertToEmbedCallOptions(inputs);
66
66
  logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
67
67
  addSpanEvent("hebo.options.prepared");
68
- setSpanAttributes(getEmbeddingsRequestAttributes(ctx.body, genAiSignalLevel));
68
+ setSpanAttributes(getEmbeddingsRequestAttributes(ctx.body, ctx.trace));
69
69
  // Build middleware chain (model -> forward params -> provider).
70
70
  const embeddingModelWithMiddleware = wrapEmbeddingModel({
71
71
  model: embeddingModel,
@@ -85,14 +85,14 @@ export const embeddings = (config) => {
85
85
  ctx.result = toEmbeddings(result, ctx.modelId);
86
86
  logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] Embeddings");
87
87
  addSpanEvent("hebo.result.transformed");
88
- const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
89
- recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
88
+ const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, ctx.trace);
89
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
90
90
  setSpanAttributes(genAiResponseAttrs);
91
91
  if (hooks?.after) {
92
92
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
93
93
  addSpanEvent("hebo.hooks.after.completed");
94
94
  }
95
- recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
95
+ recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
96
96
  return ctx.result;
97
97
  };
98
98
  return { handler: winterCgHandler(handler, config) };
@@ -14,6 +14,12 @@ export declare const EmbeddingsBodySchema: z.ZodObject<{
14
14
  dimensions: z.ZodOptional<z.ZodInt>;
15
15
  metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
16
16
  model: z.ZodString;
17
+ trace: z.ZodOptional<z.ZodUnion<readonly [z.ZodPipe<z.ZodBoolean, z.ZodTransform<"off" | undefined, boolean>>, z.ZodEnum<{
18
+ off: "off";
19
+ required: "required";
20
+ recommended: "recommended";
21
+ full: "full";
22
+ }>]>>;
17
23
  }, z.core.$loose>;
18
24
  export type EmbeddingsBody = z.infer<typeof EmbeddingsBodySchema>;
19
25
  export declare const EmbeddingsDataSchema: z.ZodObject<{
@@ -1,4 +1,5 @@
1
1
  import * as z from "zod";
2
+ import { TraceSchema } from "../shared/schema";
2
3
  export const EmbeddingsDimensionsSchema = z.int().nonnegative().max(65536);
3
4
  export const EmbeddingsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
4
5
  export const EmbeddingsInputsSchema = z.object({
@@ -8,6 +9,7 @@ export const EmbeddingsInputsSchema = z.object({
8
9
  });
9
10
  export const EmbeddingsBodySchema = z.looseObject({
10
11
  model: z.string(),
12
+ trace: TraceSchema,
11
13
  ...EmbeddingsInputsSchema.shape,
12
14
  });
13
15
  export const EmbeddingsDataSchema = z.object({
@@ -55,14 +55,14 @@ export const messages = (config) => {
55
55
  ctx.resolvedProviderId = languageModel.provider;
56
56
  logger.debug(`[messages] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
57
57
  addSpanEvent("hebo.provider.resolved");
58
- const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
59
- const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
58
+ ctx.trace ??= ctx.body.trace ?? cfg.telemetry?.signals?.gen_ai;
59
+ const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, ctx.trace);
60
60
  setSpanAttributes(genAiGeneralAttrs);
61
- const { model: _model, stream, ...inputs } = ctx.body;
61
+ const { model: _model, stream, trace: _trace, ...inputs } = ctx.body;
62
62
  const textOptions = convertToTextCallOptions(inputs);
63
63
  logger.trace({ requestId: ctx.requestId, options: textOptions }, "[messages] AI SDK options");
64
64
  addSpanEvent("hebo.options.prepared");
65
- setSpanAttributes(getMessagesRequestAttributes(ctx.body, genAiSignalLevel));
65
+ setSpanAttributes(getMessagesRequestAttributes(ctx.body, ctx.trace));
66
66
  const languageModelWithMiddleware = wrapLanguageModel({
67
67
  model: languageModel,
68
68
  middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
@@ -84,7 +84,7 @@ export const messages = (config) => {
84
84
  onChunk: () => {
85
85
  if (!ttft) {
86
86
  ttft = performance.now() - start;
87
- recordTimeToFirstToken(ttft, genAiGeneralAttrs, genAiSignalLevel);
87
+ recordTimeToFirstToken(ttft, genAiGeneralAttrs, ctx.trace);
88
88
  }
89
89
  },
90
90
  onFinish: (res) => {
@@ -92,10 +92,10 @@ export const messages = (config) => {
92
92
  const streamResult = toMessages(res, ctx.resolvedModelId);
93
93
  logger.trace({ requestId: ctx.requestId, result: streamResult }, "[messages] Messages");
94
94
  addSpanEvent("hebo.result.transformed");
95
- const genAiResponseAttrs = getMessagesResponseAttributes(streamResult, genAiSignalLevel, res.finishReason);
95
+ const genAiResponseAttrs = getMessagesResponseAttributes(streamResult, ctx.trace, res.finishReason);
96
96
  setSpanAttributes(genAiResponseAttrs);
97
- recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
98
- recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
97
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
98
+ recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
99
99
  },
100
100
  experimental_include: {
101
101
  requestBody: false,
@@ -124,18 +124,18 @@ export const messages = (config) => {
124
124
  });
125
125
  logger.trace({ requestId: ctx.requestId, result }, "[messages] AI SDK result");
126
126
  addSpanEvent("hebo.ai-sdk.completed");
127
- recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
127
+ recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, ctx.trace);
128
128
  ctx.result = toMessages(result, ctx.resolvedModelId);
129
129
  logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[messages] Messages");
130
130
  addSpanEvent("hebo.result.transformed");
131
- const genAiResponseAttrs = getMessagesResponseAttributes(ctx.result, genAiSignalLevel, result.finishReason);
131
+ const genAiResponseAttrs = getMessagesResponseAttributes(ctx.result, ctx.trace, result.finishReason);
132
132
  setSpanAttributes(genAiResponseAttrs);
133
- recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
133
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
134
134
  if (hooks?.after) {
135
135
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
136
136
  addSpanEvent("hebo.hooks.after.completed");
137
137
  }
138
- recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
138
+ recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
139
139
  return ctx.result;
140
140
  };
141
141
  return { handler: winterCgHandler(handler, config) };
@@ -452,6 +452,12 @@ export declare const MessagesBodySchema: z.ZodObject<{
452
452
  }, z.core.$strip>>;
453
453
  }, z.core.$strip>>]>>;
454
454
  stream: z.ZodOptional<z.ZodBoolean>;
455
+ trace: z.ZodOptional<z.ZodUnion<readonly [z.ZodPipe<z.ZodBoolean, z.ZodTransform<"off" | undefined, boolean>>, z.ZodEnum<{
456
+ off: "off";
457
+ required: "required";
458
+ recommended: "recommended";
459
+ full: "full";
460
+ }>]>>;
455
461
  temperature: z.ZodOptional<z.ZodNumber>;
456
462
  top_p: z.ZodOptional<z.ZodNumber>;
457
463
  stop_sequences: z.ZodOptional<z.ZodArray<z.ZodString>>;
@@ -1,5 +1,5 @@
1
1
  import * as z from "zod";
2
- import { CacheControlSchema, ProviderMetadataSchema } from "../shared/schema";
2
+ import { CacheControlSchema, ProviderMetadataSchema, TraceSchema } from "../shared/schema";
3
3
  // --- Content Block Schemas ---
4
4
  const TextBlockSchema = z.object({
5
5
  type: z.literal("text"),
@@ -172,6 +172,7 @@ export const MessagesBodySchema = z.object({
172
172
  messages: z.array(MessagesMessageSchema),
173
173
  system: z.union([z.string(), z.array(SystemBlockSchema)]).optional(),
174
174
  stream: z.boolean().optional(),
175
+ trace: TraceSchema,
175
176
  temperature: z.number().optional(),
176
177
  top_p: z.number().optional(),
177
178
  stop_sequences: z.array(z.string()).optional(),
@@ -91,17 +91,24 @@ export function convertToModelMessages(input, instructions) {
91
91
  }
92
92
  function fromReasoningItem(item) {
93
93
  const parts = [];
94
- if (!item.summary || item.summary.length === 0) {
94
+ // Prefer content (full thinking text) over summary when available
95
+ const source = item.content && item.content.length > 0 ? item.content : item.summary;
96
+ if (!source || source.length === 0) {
95
97
  return { role: "assistant", content: parts };
96
98
  }
97
99
  let providerOptions;
98
- if (item.extra_content || item.encrypted_content) {
99
- providerOptions = item.extra_content ?? { unknown: {} };
100
+ if (item.extra_content || item.encrypted_content || item.signature) {
101
+ providerOptions = item.extra_content ? { ...item.extra_content } : { unknown: {} };
102
+ const existing = (providerOptions["unknown"] ?? {});
100
103
  if (item.encrypted_content) {
101
- (providerOptions ??= {})["unknown"] = { redactedData: item.encrypted_content };
104
+ existing["redactedData"] = item.encrypted_content;
102
105
  }
106
+ if (item.signature) {
107
+ existing["signature"] = item.signature;
108
+ }
109
+ providerOptions["unknown"] = existing;
103
110
  }
104
- for (const s of item.summary) {
111
+ for (const s of source) {
105
112
  parts.push({
106
113
  type: "reasoning",
107
114
  text: s.text,
@@ -477,13 +484,17 @@ function toReasoningOutputItem(reasoning) {
477
484
  };
478
485
  if (reasoning.text) {
479
486
  item.summary = [{ type: "summary_text", text: reasoning.text }];
487
+ item.content = [{ type: "reasoning_text", text: reasoning.text }];
480
488
  }
481
489
  const providerMetadata = reasoning.providerMetadata ?? {};
482
490
  item.extra_content = providerMetadata;
483
- const { redactedData } = extractReasoningMetadata(providerMetadata);
491
+ const { redactedData, signature } = extractReasoningMetadata(providerMetadata);
484
492
  if (redactedData) {
485
493
  item.encrypted_content = redactedData;
486
494
  }
495
+ if (signature) {
496
+ item.signature = signature;
497
+ }
487
498
  return item;
488
499
  }
489
500
  function toFunctionCallItem(toolCallId, toolName, input, providerMetadata, status = "completed") {
@@ -560,6 +571,7 @@ export class ResponsesTransformStream extends TransformStream {
560
571
  let reasoningItem;
561
572
  let reasoningOutputIndex = -1;
562
573
  let summaryIndex = 0;
574
+ let reasoningContentIndex = 0;
563
575
  let finishProviderMetadata;
564
576
  const outputItems = [];
565
577
  const inProgressToolCalls = new Map();
@@ -592,8 +604,13 @@ export class ResponsesTransformStream extends TransformStream {
592
604
  type: "summary_text",
593
605
  text: s.text,
594
606
  })),
607
+ content: item.content?.map((c) => ({
608
+ type: "reasoning_text",
609
+ text: c.text,
610
+ })),
595
611
  extra_content: item.extra_content,
596
612
  encrypted_content: item.encrypted_content,
613
+ signature: item.signature,
597
614
  };
598
615
  }
599
616
  if (item.type === "function_call") {
@@ -664,6 +681,21 @@ export class ResponsesTransformStream extends TransformStream {
664
681
  });
665
682
  }
666
683
  }
684
+ if (reasoningItem && reasoningItem.content && reasoningItem.content.length > 0) {
685
+ const lastContentPart = reasoningItem.content[reasoningContentIndex];
686
+ if (lastContentPart) {
687
+ controller.enqueue({
688
+ event: "response.reasoning_text.done",
689
+ data: {
690
+ type: "response.reasoning_text.done",
691
+ item_id: reasoningItem.id,
692
+ output_index: reasoningOutputIndex,
693
+ content_index: reasoningContentIndex,
694
+ text: lastContentPart.text,
695
+ },
696
+ });
697
+ }
698
+ }
667
699
  if (reasoningItem) {
668
700
  reasoningItem.status = "completed";
669
701
  controller.enqueue({
@@ -813,16 +845,21 @@ export class ResponsesTransformStream extends TransformStream {
813
845
  id: uuidv7(),
814
846
  status: "in_progress",
815
847
  summary: [],
848
+ content: [],
816
849
  };
817
850
  const providerMetadata = part.providerMetadata;
818
851
  if (providerMetadata) {
819
852
  reasoningItem.extra_content = providerMetadata;
820
- const { redactedData } = extractReasoningMetadata(providerMetadata);
853
+ const { redactedData, signature } = extractReasoningMetadata(providerMetadata);
821
854
  if (redactedData) {
822
855
  reasoningItem.encrypted_content = redactedData;
823
856
  }
857
+ if (signature) {
858
+ reasoningItem.signature = signature;
859
+ }
824
860
  }
825
861
  reasoningOutputIndex = outputIndex++;
862
+ reasoningContentIndex = 0;
826
863
  outputItems.push(reasoningItem);
827
864
  controller.enqueue({
828
865
  event: "response.output_item.added",
@@ -834,14 +871,17 @@ export class ResponsesTransformStream extends TransformStream {
834
871
  id: reasoningItem.id,
835
872
  status: "in_progress",
836
873
  summary: [],
874
+ content: [],
837
875
  extra_content: reasoningItem.extra_content,
838
876
  encrypted_content: reasoningItem.encrypted_content,
877
+ signature: reasoningItem.signature,
839
878
  },
840
879
  },
841
880
  });
842
881
  break;
843
882
  }
844
883
  case "reasoning-delta": {
884
+ // Summary deltas
845
885
  if (summaryIndex === reasoningItem.summary.length) {
846
886
  const summaryPart = {
847
887
  type: "summary_text",
@@ -873,6 +913,26 @@ export class ResponsesTransformStream extends TransformStream {
873
913
  delta: part.text,
874
914
  },
875
915
  });
916
+ // Content deltas (parallel to summary)
917
+ const contentArr = reasoningItem.content;
918
+ if (reasoningContentIndex === contentArr.length) {
919
+ const contentPart = {
920
+ type: "reasoning_text",
921
+ text: "",
922
+ };
923
+ contentArr.push(contentPart);
924
+ }
925
+ contentArr[reasoningContentIndex].text += part.text;
926
+ controller.enqueue({
927
+ event: "response.reasoning_text.delta",
928
+ data: {
929
+ type: "response.reasoning_text.delta",
930
+ item_id: reasoningItem.id,
931
+ output_index: reasoningOutputIndex,
932
+ content_index: reasoningContentIndex,
933
+ delta: part.text,
934
+ },
935
+ });
876
936
  break;
877
937
  }
878
938
  case "reasoning-end": {
@@ -54,14 +54,14 @@ export const responses = (config) => {
54
54
  ctx.resolvedProviderId = languageModel.provider;
55
55
  logger.debug(`[responses] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
56
56
  addSpanEvent("hebo.provider.resolved");
57
- const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
58
- const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
57
+ ctx.trace ??= ctx.body.trace ?? cfg.telemetry?.signals?.gen_ai;
58
+ const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, ctx.trace);
59
59
  setSpanAttributes(genAiGeneralAttrs);
60
- const { model: _model, stream, ...inputs } = ctx.body;
60
+ const { model: _model, stream, trace: _trace, ...inputs } = ctx.body;
61
61
  const textOptions = convertToTextCallOptions(inputs);
62
62
  logger.trace({ requestId: ctx.requestId, options: textOptions }, "[responses] AI SDK options");
63
63
  addSpanEvent("hebo.options.prepared");
64
- setSpanAttributes(getResponsesRequestAttributes(ctx.body, genAiSignalLevel));
64
+ setSpanAttributes(getResponsesRequestAttributes(ctx.body, ctx.trace));
65
65
  const languageModelWithMiddleware = wrapLanguageModel({
66
66
  model: languageModel,
67
67
  middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
@@ -83,7 +83,7 @@ export const responses = (config) => {
83
83
  onChunk: () => {
84
84
  if (!ttft) {
85
85
  ttft = performance.now() - start;
86
- recordTimeToFirstToken(ttft, genAiGeneralAttrs, genAiSignalLevel);
86
+ recordTimeToFirstToken(ttft, genAiGeneralAttrs, ctx.trace);
87
87
  }
88
88
  },
89
89
  onFinish: (res) => {
@@ -91,10 +91,10 @@ export const responses = (config) => {
91
91
  const streamResult = toResponses(res, ctx.resolvedModelId, ctx.body.metadata);
92
92
  logger.trace({ requestId: ctx.requestId, result: streamResult }, "[responses] Responses");
93
93
  addSpanEvent("hebo.result.transformed");
94
- const genAiResponseAttrs = getResponsesResponseAttributes(streamResult, genAiSignalLevel, res.finishReason);
94
+ const genAiResponseAttrs = getResponsesResponseAttributes(streamResult, ctx.trace, res.finishReason);
95
95
  setSpanAttributes(genAiResponseAttrs);
96
- recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
97
- recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
96
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
97
+ recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
98
98
  },
99
99
  experimental_include: {
100
100
  requestBody: false,
@@ -123,18 +123,18 @@ export const responses = (config) => {
123
123
  });
124
124
  logger.trace({ requestId: ctx.requestId, result }, "[responses] AI SDK result");
125
125
  addSpanEvent("hebo.ai-sdk.completed");
126
- recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
126
+ recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, ctx.trace);
127
127
  ctx.result = toResponses(result, ctx.resolvedModelId, ctx.body.metadata);
128
128
  logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[responses] Responses");
129
129
  addSpanEvent("hebo.result.transformed");
130
- const genAiResponseAttrs = getResponsesResponseAttributes(ctx.result, genAiSignalLevel, result.finishReason);
130
+ const genAiResponseAttrs = getResponsesResponseAttributes(ctx.result, ctx.trace, result.finishReason);
131
131
  setSpanAttributes(genAiResponseAttrs);
132
- recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
132
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
133
133
  if (hooks?.after) {
134
134
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
135
135
  addSpanEvent("hebo.hooks.after.completed");
136
136
  }
137
- recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
137
+ recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
138
138
  return ctx.result;
139
139
  };
140
140
  return { handler: winterCgHandler(handler, config) };
@@ -554,6 +554,7 @@ export declare const ResponsesReasoningItemSchema: z.ZodObject<{
554
554
  incomplete: "incomplete";
555
555
  }>>;
556
556
  extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
557
+ signature: z.ZodOptional<z.ZodString>;
557
558
  }, z.core.$strip>;
558
559
  export type ResponsesReasoningItem = z.infer<typeof ResponsesReasoningItemSchema>;
559
560
  /**
@@ -956,6 +957,7 @@ export declare const ResponsesInputItemSchema: z.ZodDiscriminatedUnion<[z.ZodDis
956
957
  incomplete: "incomplete";
957
958
  }>>;
958
959
  extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
960
+ signature: z.ZodOptional<z.ZodString>;
959
961
  }, z.core.$strip>], "type">;
960
962
  export type ResponsesInputItem = z.infer<typeof ResponsesInputItemSchema>;
961
963
  import { CacheControlSchema as ResponsesCacheControlSchema, ReasoningEffortSchema as ResponsesReasoningEffortSchema, ReasoningConfigSchema as ResponsesReasoningConfigSchema, ServiceTierSchema as ResponsesServiceTierSchema, ProviderMetadataSchema as ResponsesProviderMetadataSchema, type CacheControl as ResponsesCacheControl, type ReasoningEffort as ResponsesReasoningEffort, type ReasoningConfig as ResponsesReasoningConfig, type ServiceTier as ResponsesServiceTier, type ProviderMetadata as ResponsesProviderMetadata, ContentPartAudioSchema as ResponsesInputAudioSchema, type ContentPartAudio as ResponsesInputAudio } from "../shared/schema";
@@ -1429,6 +1431,7 @@ declare const ResponsesInputsSchema: z.ZodObject<{
1429
1431
  incomplete: "incomplete";
1430
1432
  }>>;
1431
1433
  extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
1434
+ signature: z.ZodOptional<z.ZodString>;
1432
1435
  }, z.core.$strip>], "type">>]>;
1433
1436
  instructions: z.ZodOptional<z.ZodString>;
1434
1437
  tools: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -1923,6 +1926,7 @@ export declare const ResponsesBodySchema: z.ZodObject<{
1923
1926
  incomplete: "incomplete";
1924
1927
  }>>;
1925
1928
  extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
1929
+ signature: z.ZodOptional<z.ZodString>;
1926
1930
  }, z.core.$strip>], "type">>]>;
1927
1931
  instructions: z.ZodOptional<z.ZodString>;
1928
1932
  tools: z.ZodOptional<z.ZodArray<z.ZodObject<{
@@ -2019,6 +2023,12 @@ export declare const ResponsesBodySchema: z.ZodObject<{
2019
2023
  extra_body: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
2020
2024
  model: z.ZodString;
2021
2025
  stream: z.ZodOptional<z.ZodBoolean>;
2026
+ trace: z.ZodOptional<z.ZodUnion<readonly [z.ZodPipe<z.ZodBoolean, z.ZodTransform<"off" | undefined, boolean>>, z.ZodEnum<{
2027
+ off: "off";
2028
+ required: "required";
2029
+ recommended: "recommended";
2030
+ full: "full";
2031
+ }>]>>;
2022
2032
  }, z.core.$strip>;
2023
2033
  export type ResponsesBody = z.infer<typeof ResponsesBodySchema>;
2024
2034
  /**
@@ -2094,6 +2104,7 @@ export declare const ResponsesOutputItemSchema: z.ZodDiscriminatedUnion<[z.ZodOb
2094
2104
  incomplete: "incomplete";
2095
2105
  }>>;
2096
2106
  extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
2107
+ signature: z.ZodOptional<z.ZodString>;
2097
2108
  }, z.core.$strip>], "type">;
2098
2109
  export type ResponsesOutputItem = z.infer<typeof ResponsesOutputItemSchema>;
2099
2110
  /**
@@ -2184,6 +2195,7 @@ export declare const ResponsesSchema: z.ZodObject<{
2184
2195
  incomplete: "incomplete";
2185
2196
  }>>;
2186
2197
  extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
2198
+ signature: z.ZodOptional<z.ZodString>;
2187
2199
  }, z.core.$strip>], "type">>;
2188
2200
  usage: z.ZodNullable<z.ZodObject<{
2189
2201
  input_tokens: z.ZodNumber;
@@ -2270,6 +2282,20 @@ export type ResponseReasoningSummaryPartDoneEvent = SseFrame<{
2270
2282
  summary_index: number;
2271
2283
  part: ResponsesSummaryText;
2272
2284
  }, "response.reasoning_summary_part.done">;
2285
+ export type ResponseReasoningTextDeltaEvent = SseFrame<{
2286
+ type: "response.reasoning_text.delta";
2287
+ item_id: string;
2288
+ output_index: number;
2289
+ content_index: number;
2290
+ delta: string;
2291
+ }, "response.reasoning_text.delta">;
2292
+ export type ResponseReasoningTextDoneEvent = SseFrame<{
2293
+ type: "response.reasoning_text.done";
2294
+ item_id: string;
2295
+ output_index: number;
2296
+ content_index: number;
2297
+ text: string;
2298
+ }, "response.reasoning_text.done">;
2273
2299
  export type ResponseOutputItemDoneEvent = SseFrame<{
2274
2300
  type: "response.output_item.done";
2275
2301
  output_index: number;
@@ -2297,5 +2323,5 @@ export type ResponseFailedEvent = SseFrame<{
2297
2323
  type: "response.failed";
2298
2324
  response: Responses;
2299
2325
  }, "response.failed">;
2300
- export type ResponsesStreamEvent = ResponseCreatedEvent | ResponseInProgressEvent | ResponseOutputItemAddedEvent | ResponseContentPartAddedEvent | ResponseReasoningSummaryPartAddedEvent | ResponseOutputTextDeltaEvent | ResponseReasoningSummaryTextDeltaEvent | ResponseContentPartDoneEvent | ResponseReasoningSummaryPartDoneEvent | ResponseOutputItemDoneEvent | ResponseFunctionCallArgumentsDeltaEvent | ResponseFunctionCallArgumentsDoneEvent | ResponseCompletedEvent | ResponseFailedEvent;
2326
+ export type ResponsesStreamEvent = ResponseCreatedEvent | ResponseInProgressEvent | ResponseOutputItemAddedEvent | ResponseContentPartAddedEvent | ResponseReasoningSummaryPartAddedEvent | ResponseOutputTextDeltaEvent | ResponseReasoningSummaryTextDeltaEvent | ResponseReasoningTextDeltaEvent | ResponseContentPartDoneEvent | ResponseReasoningSummaryPartDoneEvent | ResponseReasoningTextDoneEvent | ResponseOutputItemDoneEvent | ResponseFunctionCallArgumentsDeltaEvent | ResponseFunctionCallArgumentsDoneEvent | ResponseCompletedEvent | ResponseFailedEvent;
2301
2327
  export type ResponsesStream = ReadableStream<ResponsesStreamEvent | SseErrorFrame>;
@@ -147,6 +147,8 @@ export const ResponsesReasoningItemSchema = z.object({
147
147
  status: ResponsesItemStatusSchema.optional(),
148
148
  // Extension origin: Gemini
149
149
  extra_content: ResponsesProviderMetadataSchema.optional().meta({ extension: true }),
150
+ // Extension origin: Anthropic/OpenRouter
151
+ signature: z.string().optional().meta({ extension: true }),
150
152
  });
151
153
  /**
152
154
  * --- Input Items ---
@@ -157,7 +159,7 @@ export const ResponsesInputItemSchema = z.discriminatedUnion("type", [
157
159
  ResponsesFunctionCallOutputSchema,
158
160
  ResponsesReasoningItemSchema,
159
161
  ]);
160
- import { CacheControlSchema as ResponsesCacheControlSchema, ReasoningEffortSchema as ResponsesReasoningEffortSchema, ReasoningConfigSchema as ResponsesReasoningConfigSchema, ServiceTierSchema as ResponsesServiceTierSchema, ProviderMetadataSchema as ResponsesProviderMetadataSchema, ContentPartAudioSchema as ResponsesInputAudioSchema, } from "../shared/schema";
162
+ import { CacheControlSchema as ResponsesCacheControlSchema, ReasoningEffortSchema as ResponsesReasoningEffortSchema, ReasoningConfigSchema as ResponsesReasoningConfigSchema, ServiceTierSchema as ResponsesServiceTierSchema, ProviderMetadataSchema as ResponsesProviderMetadataSchema, ContentPartAudioSchema as ResponsesInputAudioSchema, TraceSchema, } from "../shared/schema";
161
163
  export { ResponsesCacheControlSchema, ResponsesReasoningEffortSchema, ResponsesReasoningConfigSchema, ResponsesServiceTierSchema, ResponsesProviderMetadataSchema, ResponsesInputAudioSchema, };
162
164
  /**
163
165
  * --- Tools ---
@@ -251,6 +253,7 @@ const ResponsesInputsSchema = z.object({
251
253
  export const ResponsesBodySchema = z.object({
252
254
  model: z.string(),
253
255
  stream: z.boolean().optional(),
256
+ trace: TraceSchema,
254
257
  ...ResponsesInputsSchema.shape,
255
258
  });
256
259
  /**
@@ -61,6 +61,16 @@ export declare const ServiceTierSchema: z.ZodEnum<{
61
61
  priority: "priority";
62
62
  }>;
63
63
  export type ServiceTier = z.infer<typeof ServiceTierSchema>;
64
+ /**
65
+ * Per-request trace control.
66
+ * Accepts a boolean (`false` → "off", `true` → stripped) or a signal level string.
67
+ */
68
+ export declare const TraceSchema: z.ZodOptional<z.ZodUnion<readonly [z.ZodPipe<z.ZodBoolean, z.ZodTransform<"off" | undefined, boolean>>, z.ZodEnum<{
69
+ off: "off";
70
+ required: "required";
71
+ recommended: "recommended";
72
+ full: "full";
73
+ }>]>>;
64
74
  export declare const ContentPartAudioSchema: z.ZodObject<{
65
75
  type: z.ZodLiteral<"input_audio">;
66
76
  input_audio: z.ZodObject<{
@@ -40,6 +40,16 @@ const InputAudioSchema = z.object({
40
40
  // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/multimodal/audio-understanding
41
41
  format: InputAudioFormatSchema,
42
42
  });
43
+ /**
44
+ * Per-request trace control.
45
+ * Accepts a boolean (`false` → "off", `true` → stripped) or a signal level string.
46
+ */
47
+ export const TraceSchema = z
48
+ .union([
49
+ z.boolean().transform((v) => (v ? undefined : "off")),
50
+ z.enum(["off", "required", "recommended", "full"]),
51
+ ])
52
+ .optional();
43
53
  export const ContentPartAudioSchema = z.object({
44
54
  type: z.literal("input_audio"),
45
55
  input_audio: InputAudioSchema,
@@ -1,9 +1,14 @@
1
+ import * as z from "zod";
2
+ export declare const AnthropicErrorSchema: z.ZodObject<{
3
+ type: z.ZodLiteral<"error">;
4
+ error: z.ZodObject<{
5
+ type: z.ZodString;
6
+ message: z.ZodString;
7
+ }, z.core.$strip>;
8
+ }, z.core.$strip>;
1
9
  export declare class AnthropicError {
2
- readonly type = "error";
3
- readonly error: {
4
- type: string;
5
- message: string;
6
- };
10
+ readonly type: "error";
11
+ readonly error: z.infer<typeof AnthropicErrorSchema>["error"];
7
12
  constructor(message: string, type?: string);
8
13
  }
9
14
  export declare function toAnthropicError(error: unknown): AnthropicError;
@@ -1,6 +1,14 @@
1
+ import * as z from "zod";
1
2
  import { resolveRequestId } from "../utils/headers";
2
3
  import { toResponse } from "../utils/response";
3
4
  import { getErrorMeta, maybeMaskMessage } from "./utils";
5
+ export const AnthropicErrorSchema = z.object({
6
+ type: z.literal("error"),
7
+ error: z.object({
8
+ type: z.string(),
9
+ message: z.string(),
10
+ }),
11
+ });
4
12
  export class AnthropicError {
5
13
  type = "error";
6
14
  error;
package/dist/lifecycle.js CHANGED
@@ -60,7 +60,7 @@ export const winterCgHandler = (run, config) => {
60
60
  ctx.operation === "embeddings" ||
61
61
  ctx.operation === "messages" ||
62
62
  ctx.operation === "responses") {
63
- recordRequestDuration(performance.now() - start, realStatus, ctx, parsedConfig.telemetry?.signals?.gen_ai);
63
+ recordRequestDuration(performance.now() - start, realStatus, ctx, ctx.trace ?? parsedConfig.telemetry?.signals?.gen_ai);
64
64
  }
65
65
  span.finish();
66
66
  };
@@ -78,7 +78,7 @@ export const bedrockClaudeReasoningMiddleware = {
78
78
  // "adaptive" (which doesn't require budgetTokens), compute a fallback using
79
79
  // the same effort-based logic as other model cases, defaulting to "medium".
80
80
  // Note: Bedrock Converse API doesn't support "adaptive" natively — see vercel/ai#8513
81
- const mappedEffort = effort === "max" ? "xhigh" : effort ?? "medium";
81
+ const mappedEffort = effort === "max" ? "xhigh" : (effort ?? "medium");
82
82
  target.budgetTokens = calculateReasoningBudgetFromEffort(mappedEffort, params.maxOutputTokens ?? 65536, 1024);
83
83
  }
84
84
  }
package/dist/types.d.ts CHANGED
@@ -71,6 +71,12 @@ export type GatewayContext = {
71
71
  * Response object returned by the handler.
72
72
  */
73
73
  response?: Response;
74
+ /**
75
+ * Per-request telemetry signal level override.
76
+ * When set (via body parameter or hook), overrides `cfg.telemetry.signals.gen_ai`
77
+ * for this request's span attributes and metrics.
78
+ */
79
+ trace?: TelemetrySignalLevel;
74
80
  /**
75
81
  * Error thrown during execution.
76
82
  */
@@ -79,9 +85,10 @@ export type GatewayContext = {
79
85
  /**
80
86
  * Hook context: all fields readonly except `state` and `otel`.
81
87
  */
82
- export type HookContext = Omit<Readonly<GatewayContext>, "state" | "otel"> & {
88
+ export type HookContext = Omit<Readonly<GatewayContext>, "state" | "otel" | "trace"> & {
83
89
  state: GatewayContext["state"];
84
90
  otel: GatewayContext["otel"];
91
+ trace: GatewayContext["trace"];
85
92
  };
86
93
  type RequiredHookContext<K extends keyof GatewayContext> = Omit<HookContext, K> & Required<Pick<HookContext, K>>;
87
94
  export type OnRequestHookContext = RequiredHookContext<"request">;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.10.1",
3
+ "version": "0.10.3",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI /chat/completions, OpenResponses /responses & Anthropic /messages.",
5
5
  "keywords": [
6
6
  "ai",
@@ -208,6 +208,7 @@
208
208
  "lefthook": "^2.1.5",
209
209
  "mysql2": "^3.21.0",
210
210
  "next": "^16.2.3",
211
+ "openai": "^6.34.0",
211
212
  "oxfmt": "^0.44.0",
212
213
  "oxlint": "^1.59.0",
213
214
  "oxlint-tsgolint": "^0.20.0",