@hebo-ai/gateway 0.10.2 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -0
- package/dist/endpoints/chat-completions/handler.js +12 -12
- package/dist/endpoints/chat-completions/schema.d.ts +6 -0
- package/dist/endpoints/chat-completions/schema.js +2 -1
- package/dist/endpoints/conversations/schema.d.ts +4 -0
- package/dist/endpoints/embeddings/handler.js +7 -7
- package/dist/endpoints/embeddings/schema.d.ts +6 -0
- package/dist/endpoints/embeddings/schema.js +2 -0
- package/dist/endpoints/messages/handler.js +12 -12
- package/dist/endpoints/messages/schema.d.ts +6 -0
- package/dist/endpoints/messages/schema.js +2 -1
- package/dist/endpoints/responses/converters.js +67 -7
- package/dist/endpoints/responses/handler.js +12 -12
- package/dist/endpoints/responses/schema.d.ts +27 -1
- package/dist/endpoints/responses/schema.js +4 -1
- package/dist/endpoints/shared/schema.d.ts +10 -0
- package/dist/endpoints/shared/schema.js +10 -0
- package/dist/lifecycle.js +1 -1
- package/dist/providers/bedrock/middleware.js +1 -1
- package/dist/types.d.ts +8 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -926,6 +926,42 @@ These attributes appear on the active span and on all metric instruments (reques
|
|
|
926
926
|
> To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
|
|
927
927
|
> For `/chat/completions` and `/embeddings`, request `metadata` (`Record<string, string>`, key 1-64 chars, value up to 512 chars) is also forwarded to spans as `gen_ai.request.metadata.<key>`.
|
|
928
928
|
|
|
929
|
+
#### Per-Request Trace Control
|
|
930
|
+
|
|
931
|
+
You can override the global `telemetry.signals.gen_ai` level on a per-request basis using the `trace` body parameter. This is useful for selectively enabling detailed traces on specific requests without changing the gateway-wide configuration.
|
|
932
|
+
|
|
933
|
+
The `trace` parameter is accepted on all endpoints (`/chat/completions`, `/embeddings`, `/messages`, `/responses`):
|
|
934
|
+
|
|
935
|
+
```json
|
|
936
|
+
{
|
|
937
|
+
"model": "openai/gpt-oss-20b",
|
|
938
|
+
"messages": [{ "role": "user", "content": "Hello" }],
|
|
939
|
+
"trace": "full"
|
|
940
|
+
}
|
|
941
|
+
```
|
|
942
|
+
|
|
943
|
+
Accepted values:
|
|
944
|
+
|
|
945
|
+
- `false` — disables tracing for this request (equivalent to `"off"`)
|
|
946
|
+
- `true` — uses the global default (same as omitting the parameter)
|
|
947
|
+
- `"off"` | `"required"` | `"recommended"` | `"full"` — sets the signal level for this request
|
|
948
|
+
|
|
949
|
+
The resolution order is: **hook-set `ctx.trace`** > **body `trace` parameter** > **`cfg.telemetry.signals.gen_ai`**. This means hooks can always override the body parameter by setting `ctx.trace` directly:
|
|
950
|
+
|
|
951
|
+
```ts
|
|
952
|
+
hooks: {
|
|
953
|
+
before: (ctx) => {
|
|
954
|
+
// Force full tracing for a specific user
|
|
955
|
+
if (ctx.state.userId === "debug-user") {
|
|
956
|
+
ctx.trace = "full";
|
|
957
|
+
}
|
|
958
|
+
},
|
|
959
|
+
}
|
|
960
|
+
```
|
|
961
|
+
|
|
962
|
+
> [!NOTE]
|
|
963
|
+
> The `trace` parameter only affects span attributes and metrics signal level — it does not control whether tracing is enabled globally. The `telemetry.enabled` config field must still be `true` for any telemetry to be emitted.
|
|
964
|
+
|
|
929
965
|
#### Metrics
|
|
930
966
|
|
|
931
967
|
The Gateway also emits `gen_ai` metrics:
|
|
@@ -58,18 +58,18 @@ export const chatCompletions = (config) => {
|
|
|
58
58
|
ctx.resolvedProviderId = languageModel.provider;
|
|
59
59
|
logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
60
60
|
addSpanEvent("hebo.provider.resolved");
|
|
61
|
-
|
|
62
|
-
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx,
|
|
61
|
+
ctx.trace ??= ctx.body.trace ?? cfg.telemetry?.signals?.gen_ai;
|
|
62
|
+
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, ctx.trace);
|
|
63
63
|
setSpanAttributes(genAiGeneralAttrs);
|
|
64
64
|
// Convert inputs to AI SDK call options.
|
|
65
|
-
const { model: _model, stream, ...inputs } = ctx.body;
|
|
65
|
+
const { model: _model, stream, trace: _trace, ...inputs } = ctx.body;
|
|
66
66
|
const textOptions = convertToTextCallOptions(inputs);
|
|
67
67
|
logger.trace({
|
|
68
68
|
requestId: ctx.requestId,
|
|
69
69
|
options: textOptions,
|
|
70
70
|
}, "[chat] AI SDK options");
|
|
71
71
|
addSpanEvent("hebo.options.prepared");
|
|
72
|
-
setSpanAttributes(getChatRequestAttributes(ctx.body,
|
|
72
|
+
setSpanAttributes(getChatRequestAttributes(ctx.body, ctx.trace));
|
|
73
73
|
// Build middleware chain (model -> forward params -> provider).
|
|
74
74
|
const languageModelWithMiddleware = wrapLanguageModel({
|
|
75
75
|
model: languageModel,
|
|
@@ -93,7 +93,7 @@ export const chatCompletions = (config) => {
|
|
|
93
93
|
onChunk: () => {
|
|
94
94
|
if (!ttft) {
|
|
95
95
|
ttft = performance.now() - start;
|
|
96
|
-
recordTimeToFirstToken(ttft, genAiGeneralAttrs,
|
|
96
|
+
recordTimeToFirstToken(ttft, genAiGeneralAttrs, ctx.trace);
|
|
97
97
|
}
|
|
98
98
|
},
|
|
99
99
|
onFinish: (res) => {
|
|
@@ -101,10 +101,10 @@ export const chatCompletions = (config) => {
|
|
|
101
101
|
const streamResult = toChatCompletions(res, ctx.resolvedModelId);
|
|
102
102
|
logger.trace({ requestId: ctx.requestId, result: streamResult }, "[chat] ChatCompletions");
|
|
103
103
|
addSpanEvent("hebo.result.transformed");
|
|
104
|
-
const genAiResponseAttrs = getChatResponseAttributes(streamResult,
|
|
104
|
+
const genAiResponseAttrs = getChatResponseAttributes(streamResult, ctx.trace);
|
|
105
105
|
setSpanAttributes(genAiResponseAttrs);
|
|
106
|
-
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs,
|
|
107
|
-
recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs,
|
|
106
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
107
|
+
recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
108
108
|
},
|
|
109
109
|
experimental_include: {
|
|
110
110
|
requestBody: false,
|
|
@@ -133,19 +133,19 @@ export const chatCompletions = (config) => {
|
|
|
133
133
|
});
|
|
134
134
|
logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
|
|
135
135
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
136
|
-
recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs,
|
|
136
|
+
recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, ctx.trace);
|
|
137
137
|
// Transform result.
|
|
138
138
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
139
139
|
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] ChatCompletions");
|
|
140
140
|
addSpanEvent("hebo.result.transformed");
|
|
141
|
-
const genAiResponseAttrs = getChatResponseAttributes(ctx.result,
|
|
141
|
+
const genAiResponseAttrs = getChatResponseAttributes(ctx.result, ctx.trace);
|
|
142
142
|
setSpanAttributes(genAiResponseAttrs);
|
|
143
|
-
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs,
|
|
143
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
144
144
|
if (hooks?.after) {
|
|
145
145
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
146
146
|
addSpanEvent("hebo.hooks.after.completed");
|
|
147
147
|
}
|
|
148
|
-
recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs,
|
|
148
|
+
recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
149
149
|
return ctx.result;
|
|
150
150
|
};
|
|
151
151
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -1041,6 +1041,12 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
1041
1041
|
extra_body: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
1042
1042
|
model: z.ZodString;
|
|
1043
1043
|
stream: z.ZodOptional<z.ZodBoolean>;
|
|
1044
|
+
trace: z.ZodOptional<z.ZodUnion<readonly [z.ZodPipe<z.ZodBoolean, z.ZodTransform<"off" | undefined, boolean>>, z.ZodEnum<{
|
|
1045
|
+
off: "off";
|
|
1046
|
+
required: "required";
|
|
1047
|
+
recommended: "recommended";
|
|
1048
|
+
full: "full";
|
|
1049
|
+
}>]>>;
|
|
1044
1050
|
}, z.core.$loose>;
|
|
1045
1051
|
export type ChatCompletionsBody = z.infer<typeof ChatCompletionsBodySchema>;
|
|
1046
1052
|
export declare const ChatCompletionsFinishReasonSchema: z.ZodEnum<{
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
-
import { CacheControlSchema as ChatCompletionsCacheControlSchema, ReasoningEffortSchema as ChatCompletionsReasoningEffortSchema, ReasoningConfigSchema as ChatCompletionsReasoningConfigSchema, ServiceTierSchema as ChatCompletionsServiceTierSchema, ProviderMetadataSchema as ChatCompletionsProviderMetadataSchema, ContentPartAudioSchema as ChatCompletionsContentPartAudioSchema, } from "../shared/schema";
|
|
2
|
+
import { CacheControlSchema as ChatCompletionsCacheControlSchema, ReasoningEffortSchema as ChatCompletionsReasoningEffortSchema, ReasoningConfigSchema as ChatCompletionsReasoningConfigSchema, ServiceTierSchema as ChatCompletionsServiceTierSchema, ProviderMetadataSchema as ChatCompletionsProviderMetadataSchema, ContentPartAudioSchema as ChatCompletionsContentPartAudioSchema, TraceSchema, } from "../shared/schema";
|
|
3
3
|
export { ChatCompletionsCacheControlSchema, ChatCompletionsReasoningEffortSchema, ChatCompletionsReasoningConfigSchema, ChatCompletionsServiceTierSchema, ChatCompletionsProviderMetadataSchema, ChatCompletionsContentPartAudioSchema, };
|
|
4
4
|
export const ChatCompletionsContentPartTextSchema = z.object({
|
|
5
5
|
type: z.literal("text"),
|
|
@@ -181,6 +181,7 @@ const ChatCompletionsInputsSchema = z.object({
|
|
|
181
181
|
export const ChatCompletionsBodySchema = z.looseObject({
|
|
182
182
|
model: z.string(),
|
|
183
183
|
stream: z.boolean().optional(),
|
|
184
|
+
trace: TraceSchema,
|
|
184
185
|
...ChatCompletionsInputsSchema.shape,
|
|
185
186
|
});
|
|
186
187
|
export const ChatCompletionsFinishReasonSchema = z.enum([
|
|
@@ -403,6 +403,7 @@ export declare const ConversationItemSchema: z.ZodIntersection<z.ZodObject<{
|
|
|
403
403
|
incomplete: "incomplete";
|
|
404
404
|
}>>;
|
|
405
405
|
extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
406
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
406
407
|
}, z.core.$strip>], "type">>;
|
|
407
408
|
export type ConversationItem = z.infer<typeof ConversationItemSchema>;
|
|
408
409
|
export declare const ConversationSchema: z.ZodObject<{
|
|
@@ -819,6 +820,7 @@ export declare const ConversationCreateParamsSchema: z.ZodObject<{
|
|
|
819
820
|
incomplete: "incomplete";
|
|
820
821
|
}>>;
|
|
821
822
|
extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
823
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
822
824
|
}, z.core.$strip>], "type">>>;
|
|
823
825
|
metadata: z.ZodOptional<z.ZodOptional<z.ZodNullable<z.ZodRecord<z.ZodString, z.ZodString>>>>;
|
|
824
826
|
}, z.core.$strip>;
|
|
@@ -1225,6 +1227,7 @@ export declare const ConversationItemsAddBodySchema: z.ZodObject<{
|
|
|
1225
1227
|
incomplete: "incomplete";
|
|
1226
1228
|
}>>;
|
|
1227
1229
|
extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
1230
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
1228
1231
|
}, z.core.$strip>], "type">>;
|
|
1229
1232
|
}, z.core.$strip>;
|
|
1230
1233
|
export type ConversationItemsAddBody = z.infer<typeof ConversationItemsAddBodySchema>;
|
|
@@ -1631,6 +1634,7 @@ export declare const ConversationItemListSchema: z.ZodObject<{
|
|
|
1631
1634
|
incomplete: "incomplete";
|
|
1632
1635
|
}>>;
|
|
1633
1636
|
extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
1637
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
1634
1638
|
}, z.core.$strip>], "type">>>;
|
|
1635
1639
|
has_more: z.ZodBoolean;
|
|
1636
1640
|
first_id: z.ZodOptional<z.ZodString>;
|
|
@@ -57,15 +57,15 @@ export const embeddings = (config) => {
|
|
|
57
57
|
ctx.resolvedProviderId = embeddingModel.provider;
|
|
58
58
|
logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
|
|
59
59
|
addSpanEvent("hebo.provider.resolved");
|
|
60
|
-
|
|
61
|
-
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx,
|
|
60
|
+
ctx.trace ??= ctx.body.trace ?? cfg.telemetry?.signals?.gen_ai;
|
|
61
|
+
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, ctx.trace);
|
|
62
62
|
setSpanAttributes(genAiGeneralAttrs);
|
|
63
63
|
// Convert inputs to AI SDK call options.
|
|
64
|
-
const { model: _model, ...inputs } = ctx.body;
|
|
64
|
+
const { model: _model, trace: _trace, ...inputs } = ctx.body;
|
|
65
65
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
66
66
|
logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
67
67
|
addSpanEvent("hebo.options.prepared");
|
|
68
|
-
setSpanAttributes(getEmbeddingsRequestAttributes(ctx.body,
|
|
68
|
+
setSpanAttributes(getEmbeddingsRequestAttributes(ctx.body, ctx.trace));
|
|
69
69
|
// Build middleware chain (model -> forward params -> provider).
|
|
70
70
|
const embeddingModelWithMiddleware = wrapEmbeddingModel({
|
|
71
71
|
model: embeddingModel,
|
|
@@ -85,14 +85,14 @@ export const embeddings = (config) => {
|
|
|
85
85
|
ctx.result = toEmbeddings(result, ctx.modelId);
|
|
86
86
|
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[chat] Embeddings");
|
|
87
87
|
addSpanEvent("hebo.result.transformed");
|
|
88
|
-
const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result,
|
|
89
|
-
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs,
|
|
88
|
+
const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, ctx.trace);
|
|
89
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
90
90
|
setSpanAttributes(genAiResponseAttrs);
|
|
91
91
|
if (hooks?.after) {
|
|
92
92
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
93
93
|
addSpanEvent("hebo.hooks.after.completed");
|
|
94
94
|
}
|
|
95
|
-
recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs,
|
|
95
|
+
recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
96
96
|
return ctx.result;
|
|
97
97
|
};
|
|
98
98
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -14,6 +14,12 @@ export declare const EmbeddingsBodySchema: z.ZodObject<{
|
|
|
14
14
|
dimensions: z.ZodOptional<z.ZodInt>;
|
|
15
15
|
metadata: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
|
|
16
16
|
model: z.ZodString;
|
|
17
|
+
trace: z.ZodOptional<z.ZodUnion<readonly [z.ZodPipe<z.ZodBoolean, z.ZodTransform<"off" | undefined, boolean>>, z.ZodEnum<{
|
|
18
|
+
off: "off";
|
|
19
|
+
required: "required";
|
|
20
|
+
recommended: "recommended";
|
|
21
|
+
full: "full";
|
|
22
|
+
}>]>>;
|
|
17
23
|
}, z.core.$loose>;
|
|
18
24
|
export type EmbeddingsBody = z.infer<typeof EmbeddingsBodySchema>;
|
|
19
25
|
export declare const EmbeddingsDataSchema: z.ZodObject<{
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
+
import { TraceSchema } from "../shared/schema";
|
|
2
3
|
export const EmbeddingsDimensionsSchema = z.int().nonnegative().max(65536);
|
|
3
4
|
export const EmbeddingsMetadataSchema = z.record(z.string().min(1).max(64), z.string().max(512));
|
|
4
5
|
export const EmbeddingsInputsSchema = z.object({
|
|
@@ -8,6 +9,7 @@ export const EmbeddingsInputsSchema = z.object({
|
|
|
8
9
|
});
|
|
9
10
|
export const EmbeddingsBodySchema = z.looseObject({
|
|
10
11
|
model: z.string(),
|
|
12
|
+
trace: TraceSchema,
|
|
11
13
|
...EmbeddingsInputsSchema.shape,
|
|
12
14
|
});
|
|
13
15
|
export const EmbeddingsDataSchema = z.object({
|
|
@@ -55,14 +55,14 @@ export const messages = (config) => {
|
|
|
55
55
|
ctx.resolvedProviderId = languageModel.provider;
|
|
56
56
|
logger.debug(`[messages] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
57
57
|
addSpanEvent("hebo.provider.resolved");
|
|
58
|
-
|
|
59
|
-
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx,
|
|
58
|
+
ctx.trace ??= ctx.body.trace ?? cfg.telemetry?.signals?.gen_ai;
|
|
59
|
+
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, ctx.trace);
|
|
60
60
|
setSpanAttributes(genAiGeneralAttrs);
|
|
61
|
-
const { model: _model, stream, ...inputs } = ctx.body;
|
|
61
|
+
const { model: _model, stream, trace: _trace, ...inputs } = ctx.body;
|
|
62
62
|
const textOptions = convertToTextCallOptions(inputs);
|
|
63
63
|
logger.trace({ requestId: ctx.requestId, options: textOptions }, "[messages] AI SDK options");
|
|
64
64
|
addSpanEvent("hebo.options.prepared");
|
|
65
|
-
setSpanAttributes(getMessagesRequestAttributes(ctx.body,
|
|
65
|
+
setSpanAttributes(getMessagesRequestAttributes(ctx.body, ctx.trace));
|
|
66
66
|
const languageModelWithMiddleware = wrapLanguageModel({
|
|
67
67
|
model: languageModel,
|
|
68
68
|
middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
|
|
@@ -84,7 +84,7 @@ export const messages = (config) => {
|
|
|
84
84
|
onChunk: () => {
|
|
85
85
|
if (!ttft) {
|
|
86
86
|
ttft = performance.now() - start;
|
|
87
|
-
recordTimeToFirstToken(ttft, genAiGeneralAttrs,
|
|
87
|
+
recordTimeToFirstToken(ttft, genAiGeneralAttrs, ctx.trace);
|
|
88
88
|
}
|
|
89
89
|
},
|
|
90
90
|
onFinish: (res) => {
|
|
@@ -92,10 +92,10 @@ export const messages = (config) => {
|
|
|
92
92
|
const streamResult = toMessages(res, ctx.resolvedModelId);
|
|
93
93
|
logger.trace({ requestId: ctx.requestId, result: streamResult }, "[messages] Messages");
|
|
94
94
|
addSpanEvent("hebo.result.transformed");
|
|
95
|
-
const genAiResponseAttrs = getMessagesResponseAttributes(streamResult,
|
|
95
|
+
const genAiResponseAttrs = getMessagesResponseAttributes(streamResult, ctx.trace, res.finishReason);
|
|
96
96
|
setSpanAttributes(genAiResponseAttrs);
|
|
97
|
-
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs,
|
|
98
|
-
recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs,
|
|
97
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
98
|
+
recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
99
99
|
},
|
|
100
100
|
experimental_include: {
|
|
101
101
|
requestBody: false,
|
|
@@ -124,18 +124,18 @@ export const messages = (config) => {
|
|
|
124
124
|
});
|
|
125
125
|
logger.trace({ requestId: ctx.requestId, result }, "[messages] AI SDK result");
|
|
126
126
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
127
|
-
recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs,
|
|
127
|
+
recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, ctx.trace);
|
|
128
128
|
ctx.result = toMessages(result, ctx.resolvedModelId);
|
|
129
129
|
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[messages] Messages");
|
|
130
130
|
addSpanEvent("hebo.result.transformed");
|
|
131
|
-
const genAiResponseAttrs = getMessagesResponseAttributes(ctx.result,
|
|
131
|
+
const genAiResponseAttrs = getMessagesResponseAttributes(ctx.result, ctx.trace, result.finishReason);
|
|
132
132
|
setSpanAttributes(genAiResponseAttrs);
|
|
133
|
-
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs,
|
|
133
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
134
134
|
if (hooks?.after) {
|
|
135
135
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
136
136
|
addSpanEvent("hebo.hooks.after.completed");
|
|
137
137
|
}
|
|
138
|
-
recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs,
|
|
138
|
+
recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
139
139
|
return ctx.result;
|
|
140
140
|
};
|
|
141
141
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -452,6 +452,12 @@ export declare const MessagesBodySchema: z.ZodObject<{
|
|
|
452
452
|
}, z.core.$strip>>;
|
|
453
453
|
}, z.core.$strip>>]>>;
|
|
454
454
|
stream: z.ZodOptional<z.ZodBoolean>;
|
|
455
|
+
trace: z.ZodOptional<z.ZodUnion<readonly [z.ZodPipe<z.ZodBoolean, z.ZodTransform<"off" | undefined, boolean>>, z.ZodEnum<{
|
|
456
|
+
off: "off";
|
|
457
|
+
required: "required";
|
|
458
|
+
recommended: "recommended";
|
|
459
|
+
full: "full";
|
|
460
|
+
}>]>>;
|
|
455
461
|
temperature: z.ZodOptional<z.ZodNumber>;
|
|
456
462
|
top_p: z.ZodOptional<z.ZodNumber>;
|
|
457
463
|
stop_sequences: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
-
import { CacheControlSchema, ProviderMetadataSchema } from "../shared/schema";
|
|
2
|
+
import { CacheControlSchema, ProviderMetadataSchema, TraceSchema } from "../shared/schema";
|
|
3
3
|
// --- Content Block Schemas ---
|
|
4
4
|
const TextBlockSchema = z.object({
|
|
5
5
|
type: z.literal("text"),
|
|
@@ -172,6 +172,7 @@ export const MessagesBodySchema = z.object({
|
|
|
172
172
|
messages: z.array(MessagesMessageSchema),
|
|
173
173
|
system: z.union([z.string(), z.array(SystemBlockSchema)]).optional(),
|
|
174
174
|
stream: z.boolean().optional(),
|
|
175
|
+
trace: TraceSchema,
|
|
175
176
|
temperature: z.number().optional(),
|
|
176
177
|
top_p: z.number().optional(),
|
|
177
178
|
stop_sequences: z.array(z.string()).optional(),
|
|
@@ -91,17 +91,24 @@ export function convertToModelMessages(input, instructions) {
|
|
|
91
91
|
}
|
|
92
92
|
function fromReasoningItem(item) {
|
|
93
93
|
const parts = [];
|
|
94
|
-
|
|
94
|
+
// Prefer content (full thinking text) over summary when available
|
|
95
|
+
const source = item.content && item.content.length > 0 ? item.content : item.summary;
|
|
96
|
+
if (!source || source.length === 0) {
|
|
95
97
|
return { role: "assistant", content: parts };
|
|
96
98
|
}
|
|
97
99
|
let providerOptions;
|
|
98
|
-
if (item.extra_content || item.encrypted_content) {
|
|
99
|
-
providerOptions = item.extra_content
|
|
100
|
+
if (item.extra_content || item.encrypted_content || item.signature) {
|
|
101
|
+
providerOptions = item.extra_content ? { ...item.extra_content } : { unknown: {} };
|
|
102
|
+
const existing = (providerOptions["unknown"] ?? {});
|
|
100
103
|
if (item.encrypted_content) {
|
|
101
|
-
|
|
104
|
+
existing["redactedData"] = item.encrypted_content;
|
|
102
105
|
}
|
|
106
|
+
if (item.signature) {
|
|
107
|
+
existing["signature"] = item.signature;
|
|
108
|
+
}
|
|
109
|
+
providerOptions["unknown"] = existing;
|
|
103
110
|
}
|
|
104
|
-
for (const s of
|
|
111
|
+
for (const s of source) {
|
|
105
112
|
parts.push({
|
|
106
113
|
type: "reasoning",
|
|
107
114
|
text: s.text,
|
|
@@ -477,13 +484,17 @@ function toReasoningOutputItem(reasoning) {
|
|
|
477
484
|
};
|
|
478
485
|
if (reasoning.text) {
|
|
479
486
|
item.summary = [{ type: "summary_text", text: reasoning.text }];
|
|
487
|
+
item.content = [{ type: "reasoning_text", text: reasoning.text }];
|
|
480
488
|
}
|
|
481
489
|
const providerMetadata = reasoning.providerMetadata ?? {};
|
|
482
490
|
item.extra_content = providerMetadata;
|
|
483
|
-
const { redactedData } = extractReasoningMetadata(providerMetadata);
|
|
491
|
+
const { redactedData, signature } = extractReasoningMetadata(providerMetadata);
|
|
484
492
|
if (redactedData) {
|
|
485
493
|
item.encrypted_content = redactedData;
|
|
486
494
|
}
|
|
495
|
+
if (signature) {
|
|
496
|
+
item.signature = signature;
|
|
497
|
+
}
|
|
487
498
|
return item;
|
|
488
499
|
}
|
|
489
500
|
function toFunctionCallItem(toolCallId, toolName, input, providerMetadata, status = "completed") {
|
|
@@ -560,6 +571,7 @@ export class ResponsesTransformStream extends TransformStream {
|
|
|
560
571
|
let reasoningItem;
|
|
561
572
|
let reasoningOutputIndex = -1;
|
|
562
573
|
let summaryIndex = 0;
|
|
574
|
+
let reasoningContentIndex = 0;
|
|
563
575
|
let finishProviderMetadata;
|
|
564
576
|
const outputItems = [];
|
|
565
577
|
const inProgressToolCalls = new Map();
|
|
@@ -592,8 +604,13 @@ export class ResponsesTransformStream extends TransformStream {
|
|
|
592
604
|
type: "summary_text",
|
|
593
605
|
text: s.text,
|
|
594
606
|
})),
|
|
607
|
+
content: item.content?.map((c) => ({
|
|
608
|
+
type: "reasoning_text",
|
|
609
|
+
text: c.text,
|
|
610
|
+
})),
|
|
595
611
|
extra_content: item.extra_content,
|
|
596
612
|
encrypted_content: item.encrypted_content,
|
|
613
|
+
signature: item.signature,
|
|
597
614
|
};
|
|
598
615
|
}
|
|
599
616
|
if (item.type === "function_call") {
|
|
@@ -664,6 +681,21 @@ export class ResponsesTransformStream extends TransformStream {
|
|
|
664
681
|
});
|
|
665
682
|
}
|
|
666
683
|
}
|
|
684
|
+
if (reasoningItem && reasoningItem.content && reasoningItem.content.length > 0) {
|
|
685
|
+
const lastContentPart = reasoningItem.content[reasoningContentIndex];
|
|
686
|
+
if (lastContentPart) {
|
|
687
|
+
controller.enqueue({
|
|
688
|
+
event: "response.reasoning_text.done",
|
|
689
|
+
data: {
|
|
690
|
+
type: "response.reasoning_text.done",
|
|
691
|
+
item_id: reasoningItem.id,
|
|
692
|
+
output_index: reasoningOutputIndex,
|
|
693
|
+
content_index: reasoningContentIndex,
|
|
694
|
+
text: lastContentPart.text,
|
|
695
|
+
},
|
|
696
|
+
});
|
|
697
|
+
}
|
|
698
|
+
}
|
|
667
699
|
if (reasoningItem) {
|
|
668
700
|
reasoningItem.status = "completed";
|
|
669
701
|
controller.enqueue({
|
|
@@ -813,16 +845,21 @@ export class ResponsesTransformStream extends TransformStream {
|
|
|
813
845
|
id: uuidv7(),
|
|
814
846
|
status: "in_progress",
|
|
815
847
|
summary: [],
|
|
848
|
+
content: [],
|
|
816
849
|
};
|
|
817
850
|
const providerMetadata = part.providerMetadata;
|
|
818
851
|
if (providerMetadata) {
|
|
819
852
|
reasoningItem.extra_content = providerMetadata;
|
|
820
|
-
const { redactedData } = extractReasoningMetadata(providerMetadata);
|
|
853
|
+
const { redactedData, signature } = extractReasoningMetadata(providerMetadata);
|
|
821
854
|
if (redactedData) {
|
|
822
855
|
reasoningItem.encrypted_content = redactedData;
|
|
823
856
|
}
|
|
857
|
+
if (signature) {
|
|
858
|
+
reasoningItem.signature = signature;
|
|
859
|
+
}
|
|
824
860
|
}
|
|
825
861
|
reasoningOutputIndex = outputIndex++;
|
|
862
|
+
reasoningContentIndex = 0;
|
|
826
863
|
outputItems.push(reasoningItem);
|
|
827
864
|
controller.enqueue({
|
|
828
865
|
event: "response.output_item.added",
|
|
@@ -834,14 +871,17 @@ export class ResponsesTransformStream extends TransformStream {
|
|
|
834
871
|
id: reasoningItem.id,
|
|
835
872
|
status: "in_progress",
|
|
836
873
|
summary: [],
|
|
874
|
+
content: [],
|
|
837
875
|
extra_content: reasoningItem.extra_content,
|
|
838
876
|
encrypted_content: reasoningItem.encrypted_content,
|
|
877
|
+
signature: reasoningItem.signature,
|
|
839
878
|
},
|
|
840
879
|
},
|
|
841
880
|
});
|
|
842
881
|
break;
|
|
843
882
|
}
|
|
844
883
|
case "reasoning-delta": {
|
|
884
|
+
// Summary deltas
|
|
845
885
|
if (summaryIndex === reasoningItem.summary.length) {
|
|
846
886
|
const summaryPart = {
|
|
847
887
|
type: "summary_text",
|
|
@@ -873,6 +913,26 @@ export class ResponsesTransformStream extends TransformStream {
|
|
|
873
913
|
delta: part.text,
|
|
874
914
|
},
|
|
875
915
|
});
|
|
916
|
+
// Content deltas (parallel to summary)
|
|
917
|
+
const contentArr = reasoningItem.content;
|
|
918
|
+
if (reasoningContentIndex === contentArr.length) {
|
|
919
|
+
const contentPart = {
|
|
920
|
+
type: "reasoning_text",
|
|
921
|
+
text: "",
|
|
922
|
+
};
|
|
923
|
+
contentArr.push(contentPart);
|
|
924
|
+
}
|
|
925
|
+
contentArr[reasoningContentIndex].text += part.text;
|
|
926
|
+
controller.enqueue({
|
|
927
|
+
event: "response.reasoning_text.delta",
|
|
928
|
+
data: {
|
|
929
|
+
type: "response.reasoning_text.delta",
|
|
930
|
+
item_id: reasoningItem.id,
|
|
931
|
+
output_index: reasoningOutputIndex,
|
|
932
|
+
content_index: reasoningContentIndex,
|
|
933
|
+
delta: part.text,
|
|
934
|
+
},
|
|
935
|
+
});
|
|
876
936
|
break;
|
|
877
937
|
}
|
|
878
938
|
case "reasoning-end": {
|
|
@@ -54,14 +54,14 @@ export const responses = (config) => {
|
|
|
54
54
|
ctx.resolvedProviderId = languageModel.provider;
|
|
55
55
|
logger.debug(`[responses] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
56
56
|
addSpanEvent("hebo.provider.resolved");
|
|
57
|
-
|
|
58
|
-
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx,
|
|
57
|
+
ctx.trace ??= ctx.body.trace ?? cfg.telemetry?.signals?.gen_ai;
|
|
58
|
+
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, ctx.trace);
|
|
59
59
|
setSpanAttributes(genAiGeneralAttrs);
|
|
60
|
-
const { model: _model, stream, ...inputs } = ctx.body;
|
|
60
|
+
const { model: _model, stream, trace: _trace, ...inputs } = ctx.body;
|
|
61
61
|
const textOptions = convertToTextCallOptions(inputs);
|
|
62
62
|
logger.trace({ requestId: ctx.requestId, options: textOptions }, "[responses] AI SDK options");
|
|
63
63
|
addSpanEvent("hebo.options.prepared");
|
|
64
|
-
setSpanAttributes(getResponsesRequestAttributes(ctx.body,
|
|
64
|
+
setSpanAttributes(getResponsesRequestAttributes(ctx.body, ctx.trace));
|
|
65
65
|
const languageModelWithMiddleware = wrapLanguageModel({
|
|
66
66
|
model: languageModel,
|
|
67
67
|
middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
|
|
@@ -83,7 +83,7 @@ export const responses = (config) => {
|
|
|
83
83
|
onChunk: () => {
|
|
84
84
|
if (!ttft) {
|
|
85
85
|
ttft = performance.now() - start;
|
|
86
|
-
recordTimeToFirstToken(ttft, genAiGeneralAttrs,
|
|
86
|
+
recordTimeToFirstToken(ttft, genAiGeneralAttrs, ctx.trace);
|
|
87
87
|
}
|
|
88
88
|
},
|
|
89
89
|
onFinish: (res) => {
|
|
@@ -91,10 +91,10 @@ export const responses = (config) => {
|
|
|
91
91
|
const streamResult = toResponses(res, ctx.resolvedModelId, ctx.body.metadata);
|
|
92
92
|
logger.trace({ requestId: ctx.requestId, result: streamResult }, "[responses] Responses");
|
|
93
93
|
addSpanEvent("hebo.result.transformed");
|
|
94
|
-
const genAiResponseAttrs = getResponsesResponseAttributes(streamResult,
|
|
94
|
+
const genAiResponseAttrs = getResponsesResponseAttributes(streamResult, ctx.trace, res.finishReason);
|
|
95
95
|
setSpanAttributes(genAiResponseAttrs);
|
|
96
|
-
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs,
|
|
97
|
-
recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs,
|
|
96
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
97
|
+
recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
98
98
|
},
|
|
99
99
|
experimental_include: {
|
|
100
100
|
requestBody: false,
|
|
@@ -123,18 +123,18 @@ export const responses = (config) => {
|
|
|
123
123
|
});
|
|
124
124
|
logger.trace({ requestId: ctx.requestId, result }, "[responses] AI SDK result");
|
|
125
125
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
126
|
-
recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs,
|
|
126
|
+
recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, ctx.trace);
|
|
127
127
|
ctx.result = toResponses(result, ctx.resolvedModelId, ctx.body.metadata);
|
|
128
128
|
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[responses] Responses");
|
|
129
129
|
addSpanEvent("hebo.result.transformed");
|
|
130
|
-
const genAiResponseAttrs = getResponsesResponseAttributes(ctx.result,
|
|
130
|
+
const genAiResponseAttrs = getResponsesResponseAttributes(ctx.result, ctx.trace, result.finishReason);
|
|
131
131
|
setSpanAttributes(genAiResponseAttrs);
|
|
132
|
-
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs,
|
|
132
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
133
133
|
if (hooks?.after) {
|
|
134
134
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
135
135
|
addSpanEvent("hebo.hooks.after.completed");
|
|
136
136
|
}
|
|
137
|
-
recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs,
|
|
137
|
+
recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, ctx.trace);
|
|
138
138
|
return ctx.result;
|
|
139
139
|
};
|
|
140
140
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -554,6 +554,7 @@ export declare const ResponsesReasoningItemSchema: z.ZodObject<{
|
|
|
554
554
|
incomplete: "incomplete";
|
|
555
555
|
}>>;
|
|
556
556
|
extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
557
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
557
558
|
}, z.core.$strip>;
|
|
558
559
|
export type ResponsesReasoningItem = z.infer<typeof ResponsesReasoningItemSchema>;
|
|
559
560
|
/**
|
|
@@ -956,6 +957,7 @@ export declare const ResponsesInputItemSchema: z.ZodDiscriminatedUnion<[z.ZodDis
|
|
|
956
957
|
incomplete: "incomplete";
|
|
957
958
|
}>>;
|
|
958
959
|
extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
960
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
959
961
|
}, z.core.$strip>], "type">;
|
|
960
962
|
export type ResponsesInputItem = z.infer<typeof ResponsesInputItemSchema>;
|
|
961
963
|
import { CacheControlSchema as ResponsesCacheControlSchema, ReasoningEffortSchema as ResponsesReasoningEffortSchema, ReasoningConfigSchema as ResponsesReasoningConfigSchema, ServiceTierSchema as ResponsesServiceTierSchema, ProviderMetadataSchema as ResponsesProviderMetadataSchema, type CacheControl as ResponsesCacheControl, type ReasoningEffort as ResponsesReasoningEffort, type ReasoningConfig as ResponsesReasoningConfig, type ServiceTier as ResponsesServiceTier, type ProviderMetadata as ResponsesProviderMetadata, ContentPartAudioSchema as ResponsesInputAudioSchema, type ContentPartAudio as ResponsesInputAudio } from "../shared/schema";
|
|
@@ -1429,6 +1431,7 @@ declare const ResponsesInputsSchema: z.ZodObject<{
|
|
|
1429
1431
|
incomplete: "incomplete";
|
|
1430
1432
|
}>>;
|
|
1431
1433
|
extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
1434
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
1432
1435
|
}, z.core.$strip>], "type">>]>;
|
|
1433
1436
|
instructions: z.ZodOptional<z.ZodString>;
|
|
1434
1437
|
tools: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -1923,6 +1926,7 @@ export declare const ResponsesBodySchema: z.ZodObject<{
|
|
|
1923
1926
|
incomplete: "incomplete";
|
|
1924
1927
|
}>>;
|
|
1925
1928
|
extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
1929
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
1926
1930
|
}, z.core.$strip>], "type">>]>;
|
|
1927
1931
|
instructions: z.ZodOptional<z.ZodString>;
|
|
1928
1932
|
tools: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -2019,6 +2023,12 @@ export declare const ResponsesBodySchema: z.ZodObject<{
|
|
|
2019
2023
|
extra_body: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
2020
2024
|
model: z.ZodString;
|
|
2021
2025
|
stream: z.ZodOptional<z.ZodBoolean>;
|
|
2026
|
+
trace: z.ZodOptional<z.ZodUnion<readonly [z.ZodPipe<z.ZodBoolean, z.ZodTransform<"off" | undefined, boolean>>, z.ZodEnum<{
|
|
2027
|
+
off: "off";
|
|
2028
|
+
required: "required";
|
|
2029
|
+
recommended: "recommended";
|
|
2030
|
+
full: "full";
|
|
2031
|
+
}>]>>;
|
|
2022
2032
|
}, z.core.$strip>;
|
|
2023
2033
|
export type ResponsesBody = z.infer<typeof ResponsesBodySchema>;
|
|
2024
2034
|
/**
|
|
@@ -2094,6 +2104,7 @@ export declare const ResponsesOutputItemSchema: z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2094
2104
|
incomplete: "incomplete";
|
|
2095
2105
|
}>>;
|
|
2096
2106
|
extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
2107
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
2097
2108
|
}, z.core.$strip>], "type">;
|
|
2098
2109
|
export type ResponsesOutputItem = z.infer<typeof ResponsesOutputItemSchema>;
|
|
2099
2110
|
/**
|
|
@@ -2184,6 +2195,7 @@ export declare const ResponsesSchema: z.ZodObject<{
|
|
|
2184
2195
|
incomplete: "incomplete";
|
|
2185
2196
|
}>>;
|
|
2186
2197
|
extra_content: z.ZodOptional<z.ZodType<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown, z.core.$ZodTypeInternals<import("@ai-sdk/provider").SharedV3ProviderMetadata, unknown>>>;
|
|
2198
|
+
signature: z.ZodOptional<z.ZodString>;
|
|
2187
2199
|
}, z.core.$strip>], "type">>;
|
|
2188
2200
|
usage: z.ZodNullable<z.ZodObject<{
|
|
2189
2201
|
input_tokens: z.ZodNumber;
|
|
@@ -2270,6 +2282,20 @@ export type ResponseReasoningSummaryPartDoneEvent = SseFrame<{
|
|
|
2270
2282
|
summary_index: number;
|
|
2271
2283
|
part: ResponsesSummaryText;
|
|
2272
2284
|
}, "response.reasoning_summary_part.done">;
|
|
2285
|
+
export type ResponseReasoningTextDeltaEvent = SseFrame<{
|
|
2286
|
+
type: "response.reasoning_text.delta";
|
|
2287
|
+
item_id: string;
|
|
2288
|
+
output_index: number;
|
|
2289
|
+
content_index: number;
|
|
2290
|
+
delta: string;
|
|
2291
|
+
}, "response.reasoning_text.delta">;
|
|
2292
|
+
export type ResponseReasoningTextDoneEvent = SseFrame<{
|
|
2293
|
+
type: "response.reasoning_text.done";
|
|
2294
|
+
item_id: string;
|
|
2295
|
+
output_index: number;
|
|
2296
|
+
content_index: number;
|
|
2297
|
+
text: string;
|
|
2298
|
+
}, "response.reasoning_text.done">;
|
|
2273
2299
|
export type ResponseOutputItemDoneEvent = SseFrame<{
|
|
2274
2300
|
type: "response.output_item.done";
|
|
2275
2301
|
output_index: number;
|
|
@@ -2297,5 +2323,5 @@ export type ResponseFailedEvent = SseFrame<{
|
|
|
2297
2323
|
type: "response.failed";
|
|
2298
2324
|
response: Responses;
|
|
2299
2325
|
}, "response.failed">;
|
|
2300
|
-
export type ResponsesStreamEvent = ResponseCreatedEvent | ResponseInProgressEvent | ResponseOutputItemAddedEvent | ResponseContentPartAddedEvent | ResponseReasoningSummaryPartAddedEvent | ResponseOutputTextDeltaEvent | ResponseReasoningSummaryTextDeltaEvent | ResponseContentPartDoneEvent | ResponseReasoningSummaryPartDoneEvent | ResponseOutputItemDoneEvent | ResponseFunctionCallArgumentsDeltaEvent | ResponseFunctionCallArgumentsDoneEvent | ResponseCompletedEvent | ResponseFailedEvent;
|
|
2326
|
+
export type ResponsesStreamEvent = ResponseCreatedEvent | ResponseInProgressEvent | ResponseOutputItemAddedEvent | ResponseContentPartAddedEvent | ResponseReasoningSummaryPartAddedEvent | ResponseOutputTextDeltaEvent | ResponseReasoningSummaryTextDeltaEvent | ResponseReasoningTextDeltaEvent | ResponseContentPartDoneEvent | ResponseReasoningSummaryPartDoneEvent | ResponseReasoningTextDoneEvent | ResponseOutputItemDoneEvent | ResponseFunctionCallArgumentsDeltaEvent | ResponseFunctionCallArgumentsDoneEvent | ResponseCompletedEvent | ResponseFailedEvent;
|
|
2301
2327
|
export type ResponsesStream = ReadableStream<ResponsesStreamEvent | SseErrorFrame>;
|
|
@@ -147,6 +147,8 @@ export const ResponsesReasoningItemSchema = z.object({
|
|
|
147
147
|
status: ResponsesItemStatusSchema.optional(),
|
|
148
148
|
// Extension origin: Gemini
|
|
149
149
|
extra_content: ResponsesProviderMetadataSchema.optional().meta({ extension: true }),
|
|
150
|
+
// Extension origin: Anthropic/OpenRouter
|
|
151
|
+
signature: z.string().optional().meta({ extension: true }),
|
|
150
152
|
});
|
|
151
153
|
/**
|
|
152
154
|
* --- Input Items ---
|
|
@@ -157,7 +159,7 @@ export const ResponsesInputItemSchema = z.discriminatedUnion("type", [
|
|
|
157
159
|
ResponsesFunctionCallOutputSchema,
|
|
158
160
|
ResponsesReasoningItemSchema,
|
|
159
161
|
]);
|
|
160
|
-
import { CacheControlSchema as ResponsesCacheControlSchema, ReasoningEffortSchema as ResponsesReasoningEffortSchema, ReasoningConfigSchema as ResponsesReasoningConfigSchema, ServiceTierSchema as ResponsesServiceTierSchema, ProviderMetadataSchema as ResponsesProviderMetadataSchema, ContentPartAudioSchema as ResponsesInputAudioSchema, } from "../shared/schema";
|
|
162
|
+
import { CacheControlSchema as ResponsesCacheControlSchema, ReasoningEffortSchema as ResponsesReasoningEffortSchema, ReasoningConfigSchema as ResponsesReasoningConfigSchema, ServiceTierSchema as ResponsesServiceTierSchema, ProviderMetadataSchema as ResponsesProviderMetadataSchema, ContentPartAudioSchema as ResponsesInputAudioSchema, TraceSchema, } from "../shared/schema";
|
|
161
163
|
export { ResponsesCacheControlSchema, ResponsesReasoningEffortSchema, ResponsesReasoningConfigSchema, ResponsesServiceTierSchema, ResponsesProviderMetadataSchema, ResponsesInputAudioSchema, };
|
|
162
164
|
/**
|
|
163
165
|
* --- Tools ---
|
|
@@ -251,6 +253,7 @@ const ResponsesInputsSchema = z.object({
|
|
|
251
253
|
export const ResponsesBodySchema = z.object({
|
|
252
254
|
model: z.string(),
|
|
253
255
|
stream: z.boolean().optional(),
|
|
256
|
+
trace: TraceSchema,
|
|
254
257
|
...ResponsesInputsSchema.shape,
|
|
255
258
|
});
|
|
256
259
|
/**
|
|
@@ -61,6 +61,16 @@ export declare const ServiceTierSchema: z.ZodEnum<{
|
|
|
61
61
|
priority: "priority";
|
|
62
62
|
}>;
|
|
63
63
|
export type ServiceTier = z.infer<typeof ServiceTierSchema>;
|
|
64
|
+
/**
|
|
65
|
+
* Per-request trace control.
|
|
66
|
+
* Accepts a boolean (`false` → "off", `true` → stripped) or a signal level string.
|
|
67
|
+
*/
|
|
68
|
+
export declare const TraceSchema: z.ZodOptional<z.ZodUnion<readonly [z.ZodPipe<z.ZodBoolean, z.ZodTransform<"off" | undefined, boolean>>, z.ZodEnum<{
|
|
69
|
+
off: "off";
|
|
70
|
+
required: "required";
|
|
71
|
+
recommended: "recommended";
|
|
72
|
+
full: "full";
|
|
73
|
+
}>]>>;
|
|
64
74
|
export declare const ContentPartAudioSchema: z.ZodObject<{
|
|
65
75
|
type: z.ZodLiteral<"input_audio">;
|
|
66
76
|
input_audio: z.ZodObject<{
|
|
@@ -40,6 +40,16 @@ const InputAudioSchema = z.object({
|
|
|
40
40
|
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/multimodal/audio-understanding
|
|
41
41
|
format: InputAudioFormatSchema,
|
|
42
42
|
});
|
|
43
|
+
/**
|
|
44
|
+
* Per-request trace control.
|
|
45
|
+
* Accepts a boolean (`false` → "off", `true` → stripped) or a signal level string.
|
|
46
|
+
*/
|
|
47
|
+
export const TraceSchema = z
|
|
48
|
+
.union([
|
|
49
|
+
z.boolean().transform((v) => (v ? undefined : "off")),
|
|
50
|
+
z.enum(["off", "required", "recommended", "full"]),
|
|
51
|
+
])
|
|
52
|
+
.optional();
|
|
43
53
|
export const ContentPartAudioSchema = z.object({
|
|
44
54
|
type: z.literal("input_audio"),
|
|
45
55
|
input_audio: InputAudioSchema,
|
package/dist/lifecycle.js
CHANGED
|
@@ -60,7 +60,7 @@ export const winterCgHandler = (run, config) => {
|
|
|
60
60
|
ctx.operation === "embeddings" ||
|
|
61
61
|
ctx.operation === "messages" ||
|
|
62
62
|
ctx.operation === "responses") {
|
|
63
|
-
recordRequestDuration(performance.now() - start, realStatus, ctx, parsedConfig.telemetry?.signals?.gen_ai);
|
|
63
|
+
recordRequestDuration(performance.now() - start, realStatus, ctx, ctx.trace ?? parsedConfig.telemetry?.signals?.gen_ai);
|
|
64
64
|
}
|
|
65
65
|
span.finish();
|
|
66
66
|
};
|
|
@@ -78,7 +78,7 @@ export const bedrockClaudeReasoningMiddleware = {
|
|
|
78
78
|
// "adaptive" (which doesn't require budgetTokens), compute a fallback using
|
|
79
79
|
// the same effort-based logic as other model cases, defaulting to "medium".
|
|
80
80
|
// Note: Bedrock Converse API doesn't support "adaptive" natively — see vercel/ai#8513
|
|
81
|
-
const mappedEffort = effort === "max" ? "xhigh" : effort ?? "medium";
|
|
81
|
+
const mappedEffort = effort === "max" ? "xhigh" : (effort ?? "medium");
|
|
82
82
|
target.budgetTokens = calculateReasoningBudgetFromEffort(mappedEffort, params.maxOutputTokens ?? 65536, 1024);
|
|
83
83
|
}
|
|
84
84
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -71,6 +71,12 @@ export type GatewayContext = {
|
|
|
71
71
|
* Response object returned by the handler.
|
|
72
72
|
*/
|
|
73
73
|
response?: Response;
|
|
74
|
+
/**
|
|
75
|
+
* Per-request telemetry signal level override.
|
|
76
|
+
* When set (via body parameter or hook), overrides `cfg.telemetry.signals.gen_ai`
|
|
77
|
+
* for this request's span attributes and metrics.
|
|
78
|
+
*/
|
|
79
|
+
trace?: TelemetrySignalLevel;
|
|
74
80
|
/**
|
|
75
81
|
* Error thrown during execution.
|
|
76
82
|
*/
|
|
@@ -79,9 +85,10 @@ export type GatewayContext = {
|
|
|
79
85
|
/**
|
|
80
86
|
* Hook context: all fields readonly except `state` and `otel`.
|
|
81
87
|
*/
|
|
82
|
-
export type HookContext = Omit<Readonly<GatewayContext>, "state" | "otel"> & {
|
|
88
|
+
export type HookContext = Omit<Readonly<GatewayContext>, "state" | "otel" | "trace"> & {
|
|
83
89
|
state: GatewayContext["state"];
|
|
84
90
|
otel: GatewayContext["otel"];
|
|
91
|
+
trace: GatewayContext["trace"];
|
|
85
92
|
};
|
|
86
93
|
type RequiredHookContext<K extends keyof GatewayContext> = Omit<HookContext, K> & Required<Pick<HookContext, K>>;
|
|
87
94
|
export type OnRequestHookContext = RequiredHookContext<"request">;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hebo-ai/gateway",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.3",
|
|
4
4
|
"description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI /chat/completions, OpenResponses /responses & Anthropic /messages.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -208,6 +208,7 @@
|
|
|
208
208
|
"lefthook": "^2.1.5",
|
|
209
209
|
"mysql2": "^3.21.0",
|
|
210
210
|
"next": "^16.2.3",
|
|
211
|
+
"openai": "^6.34.0",
|
|
211
212
|
"oxfmt": "^0.44.0",
|
|
212
213
|
"oxlint": "^1.59.0",
|
|
213
214
|
"oxlint-tsgolint": "^0.20.0",
|