@hebo-ai/gateway 0.4.0-beta.2 → 0.4.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -5
- package/dist/config.js +21 -7
- package/dist/endpoints/chat-completions/converters.js +2 -2
- package/dist/endpoints/chat-completions/handler.js +31 -25
- package/dist/endpoints/chat-completions/otel.d.ts +6 -0
- package/dist/endpoints/chat-completions/otel.js +121 -0
- package/dist/endpoints/embeddings/handler.js +19 -12
- package/dist/endpoints/embeddings/otel.d.ts +6 -0
- package/dist/endpoints/embeddings/otel.js +35 -0
- package/dist/endpoints/models/handler.js +3 -4
- package/dist/errors/gateway.js +1 -2
- package/dist/errors/openai.js +10 -12
- package/dist/errors/utils.d.ts +1 -3
- package/dist/errors/utils.js +5 -6
- package/dist/gateway.js +1 -1
- package/dist/lifecycle.js +62 -28
- package/dist/middleware/matcher.js +1 -1
- package/dist/models/amazon/presets.d.ts +37 -37
- package/dist/models/amazon/presets.js +1 -1
- package/dist/models/anthropic/presets.d.ts +56 -56
- package/dist/models/cohere/presets.d.ts +54 -54
- package/dist/models/cohere/presets.js +2 -2
- package/dist/models/google/presets.d.ts +31 -31
- package/dist/models/google/presets.js +1 -1
- package/dist/models/meta/presets.d.ts +42 -42
- package/dist/models/openai/presets.d.ts +96 -96
- package/dist/models/openai/presets.js +1 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/voyage/presets.d.ts +92 -92
- package/dist/models/voyage/presets.js +1 -1
- package/dist/providers/registry.js +2 -2
- package/dist/telemetry/baggage.d.ts +1 -0
- package/dist/telemetry/baggage.js +24 -0
- package/dist/telemetry/fetch.d.ts +2 -1
- package/dist/telemetry/fetch.js +13 -3
- package/dist/telemetry/gen-ai.d.ts +4 -0
- package/dist/telemetry/gen-ai.js +42 -0
- package/dist/telemetry/http.d.ts +3 -0
- package/dist/telemetry/http.js +57 -0
- package/dist/telemetry/span.d.ts +6 -3
- package/dist/telemetry/span.js +23 -35
- package/dist/telemetry/stream.d.ts +3 -7
- package/dist/telemetry/stream.js +18 -18
- package/dist/types.d.ts +14 -12
- package/dist/utils/headers.d.ts +1 -1
- package/dist/utils/headers.js +7 -9
- package/dist/utils/request.d.ts +0 -4
- package/dist/utils/request.js +0 -9
- package/dist/utils/response.js +1 -1
- package/package.json +4 -2
- package/src/config.ts +28 -7
- package/src/endpoints/chat-completions/converters.ts +2 -2
- package/src/endpoints/chat-completions/handler.ts +39 -26
- package/src/endpoints/chat-completions/otel.ts +154 -0
- package/src/endpoints/embeddings/handler.test.ts +2 -2
- package/src/endpoints/embeddings/handler.ts +24 -12
- package/src/endpoints/embeddings/otel.ts +56 -0
- package/src/endpoints/models/handler.ts +3 -5
- package/src/errors/gateway.ts +1 -2
- package/src/errors/openai.ts +24 -17
- package/src/errors/utils.ts +5 -7
- package/src/gateway.ts +1 -1
- package/src/lifecycle.ts +73 -31
- package/src/middleware/matcher.ts +1 -1
- package/src/models/amazon/presets.ts +1 -1
- package/src/models/cohere/presets.ts +2 -2
- package/src/models/google/presets.ts +1 -1
- package/src/models/openai/presets.ts +1 -1
- package/src/models/types.ts +1 -1
- package/src/models/voyage/presets.ts +1 -1
- package/src/providers/registry.ts +2 -2
- package/src/telemetry/baggage.ts +27 -0
- package/src/telemetry/fetch.ts +15 -3
- package/src/telemetry/gen-ai.ts +60 -0
- package/src/telemetry/http.ts +65 -0
- package/src/telemetry/span.ts +28 -40
- package/src/telemetry/stream.ts +26 -30
- package/src/types.ts +15 -12
- package/src/utils/headers.ts +8 -19
- package/src/utils/request.ts +0 -11
- package/src/utils/response.ts +1 -1
- package/dist/telemetry/otel.d.ts +0 -2
- package/dist/telemetry/otel.js +0 -46
- package/dist/telemetry/utils.d.ts +0 -4
- package/dist/telemetry/utils.js +0 -223
- package/src/telemetry/otel.ts +0 -87
- package/src/telemetry/utils.ts +0 -273
|
@@ -23,16 +23,24 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
23
23
|
import { logger } from "../../logger";
|
|
24
24
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
25
25
|
import { resolveProvider } from "../../providers/registry";
|
|
26
|
-
import {
|
|
26
|
+
import { recordRequestDuration, recordTokenUsage } from "../../telemetry/gen-ai";
|
|
27
|
+
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
27
28
|
import { resolveRequestId } from "../../utils/headers";
|
|
28
29
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
29
30
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
31
|
+
import {
|
|
32
|
+
getChatGeneralAttributes,
|
|
33
|
+
getChatRequestAttributes,
|
|
34
|
+
getChatResponseAttributes,
|
|
35
|
+
} from "./otel";
|
|
30
36
|
import { ChatCompletionsBodySchema } from "./schema";
|
|
31
37
|
|
|
32
38
|
export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
33
39
|
const hooks = config.hooks;
|
|
34
40
|
|
|
35
41
|
const handler = async (ctx: GatewayContext) => {
|
|
42
|
+
const start = performance.now();
|
|
43
|
+
ctx.operation = "chat";
|
|
36
44
|
addSpanEvent("hebo.handler.started");
|
|
37
45
|
|
|
38
46
|
// Guard: enforce HTTP method early.
|
|
@@ -43,22 +51,21 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
43
51
|
const requestId = resolveRequestId(ctx.request);
|
|
44
52
|
|
|
45
53
|
// Parse + validate input.
|
|
46
|
-
let body;
|
|
47
54
|
try {
|
|
48
|
-
body = await ctx.request.json();
|
|
55
|
+
ctx.body = await ctx.request.json();
|
|
49
56
|
} catch {
|
|
50
57
|
throw new GatewayError("Invalid JSON", 400);
|
|
51
58
|
}
|
|
52
59
|
addSpanEvent("hebo.request.deserialized");
|
|
53
60
|
|
|
54
|
-
const parsed = ChatCompletionsBodySchema.safeParse(body);
|
|
61
|
+
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
55
62
|
if (!parsed.success) {
|
|
63
|
+
// FUTURE: add body shape to error message
|
|
56
64
|
throw new GatewayError(z.prettifyError(parsed.error), 400);
|
|
57
65
|
}
|
|
58
66
|
ctx.body = parsed.data;
|
|
59
67
|
addSpanEvent("hebo.request.parsed");
|
|
60
68
|
|
|
61
|
-
ctx.operation = "chat";
|
|
62
69
|
if (hooks?.before) {
|
|
63
70
|
ctx.body = (await hooks.before(ctx as BeforeHookContext)) ?? ctx.body;
|
|
64
71
|
addSpanEvent("hebo.hooks.before.completed");
|
|
@@ -71,10 +78,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
71
78
|
ctx.resolvedModelId =
|
|
72
79
|
(await hooks?.resolveModelId?.(ctx as ResolveModelHookContext)) ?? ctx.modelId;
|
|
73
80
|
logger.debug(`[chat] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
74
|
-
addSpanEvent("hebo.model.resolved"
|
|
75
|
-
"gen_ai.request.model": ctx.modelId ?? "",
|
|
76
|
-
"gen_ai.response.model": ctx.resolvedModelId ?? "",
|
|
77
|
-
});
|
|
81
|
+
addSpanEvent("hebo.model.resolved");
|
|
78
82
|
|
|
79
83
|
const override = await hooks?.resolveProvider?.(ctx as ResolveProviderHookContext);
|
|
80
84
|
ctx.provider =
|
|
@@ -89,7 +93,11 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
89
93
|
const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
|
|
90
94
|
ctx.resolvedProviderId = languageModel.provider;
|
|
91
95
|
logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
92
|
-
addSpanEvent("hebo.provider.resolved"
|
|
96
|
+
addSpanEvent("hebo.provider.resolved");
|
|
97
|
+
|
|
98
|
+
const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
|
|
99
|
+
const genAiGeneralAttrs = getChatGeneralAttributes(ctx, genAiSignalLevel);
|
|
100
|
+
setSpanAttributes(genAiGeneralAttrs);
|
|
93
101
|
|
|
94
102
|
// Convert inputs to AI SDK call options.
|
|
95
103
|
const textOptions = convertToTextCallOptions(inputs);
|
|
@@ -101,6 +109,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
101
109
|
"[chat] AI SDK options",
|
|
102
110
|
);
|
|
103
111
|
addSpanEvent("hebo.options.prepared");
|
|
112
|
+
setSpanAttributes(getChatRequestAttributes(inputs, genAiSignalLevel));
|
|
104
113
|
|
|
105
114
|
// Build middleware chain (model -> forward params -> provider).
|
|
106
115
|
const languageModelWithMiddleware = wrapLanguageModel({
|
|
@@ -116,25 +125,25 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
116
125
|
headers: prepareForwardHeaders(ctx.request),
|
|
117
126
|
// No abort signal here, otherwise we can't detect upstream from client cancellations
|
|
118
127
|
// abortSignal: ctx.request.signal,
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
logger.error({
|
|
122
|
-
requestId,
|
|
123
|
-
err,
|
|
124
|
-
});
|
|
125
|
-
throw error;
|
|
128
|
+
timeout: {
|
|
129
|
+
totalMs: 5 * 60 * 1000,
|
|
126
130
|
},
|
|
127
131
|
onAbort: () => {
|
|
128
132
|
throw new DOMException("Upstream failed", "AbortError");
|
|
129
133
|
},
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
134
|
+
onError: () => {},
|
|
135
|
+
onFinish: (res) => {
|
|
136
|
+
addSpanEvent("hebo.ai-sdk.completed");
|
|
137
|
+
const streamResult = toChatCompletions(
|
|
138
|
+
res as unknown as GenerateTextResult<ToolSet, Output.Output>,
|
|
133
139
|
ctx.resolvedModelId!,
|
|
134
140
|
);
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
141
|
+
addSpanEvent("hebo.result.transformed");
|
|
142
|
+
|
|
143
|
+
const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
|
|
144
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
145
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
146
|
+
recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
|
|
138
147
|
},
|
|
139
148
|
experimental_include: {
|
|
140
149
|
requestBody: false,
|
|
@@ -142,10 +151,8 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
142
151
|
includeRawChunks: false,
|
|
143
152
|
...textOptions,
|
|
144
153
|
});
|
|
145
|
-
addSpanEvent("hebo.ai-sdk.completed");
|
|
146
154
|
|
|
147
155
|
ctx.result = toChatCompletionsStream(result, ctx.resolvedModelId);
|
|
148
|
-
addSpanEvent("hebo.result.transformed");
|
|
149
156
|
|
|
150
157
|
if (hooks?.after) {
|
|
151
158
|
ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
|
|
@@ -161,24 +168,30 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
161
168
|
headers: prepareForwardHeaders(ctx.request),
|
|
162
169
|
// FUTURE: currently can't tell whether upstream or downstream abort
|
|
163
170
|
abortSignal: ctx.request.signal,
|
|
171
|
+
timeout: 5 * 60 * 1000,
|
|
164
172
|
experimental_include: {
|
|
165
173
|
requestBody: false,
|
|
166
174
|
responseBody: false,
|
|
167
175
|
},
|
|
168
|
-
timeout: 5 * 60 * 1000,
|
|
169
176
|
...textOptions,
|
|
170
177
|
});
|
|
171
178
|
logger.trace({ requestId, result }, "[chat] AI SDK result");
|
|
172
179
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
173
180
|
|
|
181
|
+
// Transform result.
|
|
174
182
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
175
183
|
addSpanEvent("hebo.result.transformed");
|
|
176
184
|
|
|
185
|
+
const genAiResponseAttrs = getChatResponseAttributes(ctx.result, genAiSignalLevel);
|
|
186
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
187
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
188
|
+
|
|
177
189
|
if (hooks?.after) {
|
|
178
190
|
ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
|
|
179
191
|
addSpanEvent("hebo.hooks.after.completed");
|
|
180
192
|
}
|
|
181
193
|
|
|
194
|
+
recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
|
|
182
195
|
return ctx.result;
|
|
183
196
|
};
|
|
184
197
|
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
+
|
|
3
|
+
import type {
|
|
4
|
+
ChatCompletions,
|
|
5
|
+
ChatCompletionsBody,
|
|
6
|
+
ChatCompletionsContentPart,
|
|
7
|
+
ChatCompletionsMessage,
|
|
8
|
+
} from "./schema";
|
|
9
|
+
|
|
10
|
+
import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
|
|
11
|
+
|
|
12
|
+
const toTextPart = (content: string): Record<string, unknown> => ({ type: "text", content });
|
|
13
|
+
|
|
14
|
+
const toMessageParts = (message: ChatCompletionsMessage): Record<string, unknown>[] => {
|
|
15
|
+
if (message.role === "assistant") {
|
|
16
|
+
const parts: Record<string, unknown>[] = [];
|
|
17
|
+
if (typeof message.content === "string") parts.push(toTextPart(message.content));
|
|
18
|
+
if (Array.isArray(message.tool_calls)) {
|
|
19
|
+
for (const call of message.tool_calls) {
|
|
20
|
+
parts.push({
|
|
21
|
+
type: "tool_call",
|
|
22
|
+
id: call.id,
|
|
23
|
+
name: call.function.name,
|
|
24
|
+
arguments: call.function.arguments,
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return parts;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (message.role === "tool") {
|
|
32
|
+
return [{ type: "tool_call_response", id: message.tool_call_id, content: message.content }];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (message.role === "user") {
|
|
36
|
+
const parts: Record<string, unknown>[] = [];
|
|
37
|
+
if (typeof message.content === "string") parts.push(toTextPart(message.content));
|
|
38
|
+
if (Array.isArray(message.content)) {
|
|
39
|
+
for (const part of message.content as ChatCompletionsContentPart[]) {
|
|
40
|
+
if (part.type === "text") {
|
|
41
|
+
parts.push(toTextPart(part.text));
|
|
42
|
+
} else if (part.type === "image_url") {
|
|
43
|
+
parts.push({ type: "image", content: part.image_url.url });
|
|
44
|
+
} else {
|
|
45
|
+
parts.push({
|
|
46
|
+
type: "file",
|
|
47
|
+
// FUTURE: optionally expose safe metadata without raw binary payloads.
|
|
48
|
+
content: part.file.filename ?? "[REDACTED_BINARY_DATA]",
|
|
49
|
+
media_type: part.file.media_type,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return parts;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return [];
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
export const getChatGeneralAttributes = (
|
|
61
|
+
ctx: GatewayContext,
|
|
62
|
+
signalLevel?: TelemetrySignalLevel,
|
|
63
|
+
): Attributes => {
|
|
64
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
65
|
+
|
|
66
|
+
const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
"gen_ai.operation.name": ctx.operation,
|
|
70
|
+
"gen_ai.request.model": requestModel,
|
|
71
|
+
"gen_ai.response.model": ctx.resolvedModelId,
|
|
72
|
+
"gen_ai.provider.name": ctx.resolvedProviderId,
|
|
73
|
+
};
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
export const getChatRequestAttributes = (
|
|
77
|
+
inputs: ChatCompletionsBody,
|
|
78
|
+
signalLevel?: TelemetrySignalLevel,
|
|
79
|
+
): Attributes => {
|
|
80
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
81
|
+
|
|
82
|
+
const attrs: Attributes = {};
|
|
83
|
+
|
|
84
|
+
if (inputs.seed !== undefined) {
|
|
85
|
+
Object.assign(attrs, { "gen_ai.request.seed": inputs.seed });
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if (signalLevel !== "required") {
|
|
89
|
+
Object.assign(attrs, {
|
|
90
|
+
"gen_ai.request.stream": inputs.stream,
|
|
91
|
+
"gen_ai.request.frequency_penalty": inputs.frequency_penalty,
|
|
92
|
+
"gen_ai.request.max_tokens": inputs.max_completion_tokens,
|
|
93
|
+
"gen_ai.request.presence_penalty": inputs.presence_penalty,
|
|
94
|
+
"gen_ai.request.stop_sequences": inputs.stop
|
|
95
|
+
? Array.isArray(inputs.stop)
|
|
96
|
+
? inputs.stop
|
|
97
|
+
: [inputs.stop]
|
|
98
|
+
: undefined,
|
|
99
|
+
"gen_ai.request.temperature": inputs.temperature,
|
|
100
|
+
"gen_ai.request.top_p": inputs.top_p,
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (signalLevel === "full") {
|
|
105
|
+
Object.assign(attrs, {
|
|
106
|
+
"gen_ai.system_instructions": inputs.messages
|
|
107
|
+
.filter((m) => m.role === "system")
|
|
108
|
+
.map((m) => JSON.stringify({ parts: [toTextPart(m.content)] })),
|
|
109
|
+
"gen_ai.input.messages": inputs.messages
|
|
110
|
+
.filter((m) => m.role !== "system")
|
|
111
|
+
.map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
|
|
112
|
+
"gen_ai.tool.definitions": JSON.stringify(inputs.tools),
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return attrs;
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
export const getChatResponseAttributes = (
|
|
120
|
+
completions: ChatCompletions,
|
|
121
|
+
signalLevel?: TelemetrySignalLevel,
|
|
122
|
+
): Attributes => {
|
|
123
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
124
|
+
|
|
125
|
+
const attrs: Attributes = {
|
|
126
|
+
"gen_ai.response.id": completions.id,
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
if (signalLevel !== "required") {
|
|
130
|
+
Object.assign(attrs, {
|
|
131
|
+
"gen_ai.response.finish_reasons": completions.choices?.map((c) => c.finish_reason),
|
|
132
|
+
"gen_ai.usage.total_tokens": completions.usage?.total_tokens,
|
|
133
|
+
"gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
|
|
134
|
+
"gen_ai.usage.cached_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
|
|
135
|
+
"gen_ai.usage.output_tokens": completions.usage?.completion_tokens,
|
|
136
|
+
"gen_ai.usage.reasoning_tokens":
|
|
137
|
+
completions.usage?.completion_tokens_details?.reasoning_tokens,
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (signalLevel === "full") {
|
|
142
|
+
Object.assign(attrs, {
|
|
143
|
+
"gen_ai.output.messages": completions.choices?.map((c) =>
|
|
144
|
+
JSON.stringify({
|
|
145
|
+
role: c.message.role,
|
|
146
|
+
parts: toMessageParts(c.message),
|
|
147
|
+
finish_reason: c.finish_reason,
|
|
148
|
+
}),
|
|
149
|
+
),
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
return attrs;
|
|
154
|
+
};
|
|
@@ -45,7 +45,7 @@ describe("Embeddings Handler", () => {
|
|
|
45
45
|
models: {
|
|
46
46
|
"text-embedding-3-small": {
|
|
47
47
|
name: "OpenAI Embedding Model",
|
|
48
|
-
modalities: { input: ["text"], output: ["
|
|
48
|
+
modalities: { input: ["text"], output: ["embedding"] },
|
|
49
49
|
providers: ["openai"],
|
|
50
50
|
},
|
|
51
51
|
"gpt-oss-20b": {
|
|
@@ -68,7 +68,7 @@ describe("Embeddings Handler", () => {
|
|
|
68
68
|
expect(data).toMatchObject({
|
|
69
69
|
error: {
|
|
70
70
|
code: "model_unsupported_operation",
|
|
71
|
-
message: "Model 'gpt-oss-20b' does not support '
|
|
71
|
+
message: "Model 'gpt-oss-20b' does not support 'embedding' output",
|
|
72
72
|
type: "invalid_request_error",
|
|
73
73
|
},
|
|
74
74
|
});
|
|
@@ -16,16 +16,24 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
16
16
|
import { logger } from "../../logger";
|
|
17
17
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
18
18
|
import { resolveProvider } from "../../providers/registry";
|
|
19
|
-
import {
|
|
19
|
+
import { recordRequestDuration, recordTokenUsage } from "../../telemetry/gen-ai";
|
|
20
|
+
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
20
21
|
import { resolveRequestId } from "../../utils/headers";
|
|
21
22
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
22
23
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
24
|
+
import {
|
|
25
|
+
getEmbeddingsGeneralAttributes,
|
|
26
|
+
getEmbeddingsRequestAttributes,
|
|
27
|
+
getEmbeddingsResponseAttributes,
|
|
28
|
+
} from "./otel";
|
|
23
29
|
import { EmbeddingsBodySchema } from "./schema";
|
|
24
30
|
|
|
25
31
|
export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
26
32
|
const hooks = config.hooks;
|
|
27
33
|
|
|
28
34
|
const handler = async (ctx: GatewayContext) => {
|
|
35
|
+
const start = performance.now();
|
|
36
|
+
ctx.operation = "embeddings";
|
|
29
37
|
addSpanEvent("hebo.handler.started");
|
|
30
38
|
|
|
31
39
|
// Guard: enforce HTTP method early.
|
|
@@ -36,22 +44,21 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
36
44
|
const requestId = resolveRequestId(ctx.request);
|
|
37
45
|
|
|
38
46
|
// Parse + validate input.
|
|
39
|
-
let body;
|
|
40
47
|
try {
|
|
41
|
-
body = await ctx.request.json();
|
|
48
|
+
ctx.body = await ctx.request.json();
|
|
42
49
|
} catch {
|
|
43
50
|
throw new GatewayError("Invalid JSON", 400);
|
|
44
51
|
}
|
|
45
52
|
addSpanEvent("hebo.request.deserialized");
|
|
46
53
|
|
|
47
|
-
const parsed = EmbeddingsBodySchema.safeParse(body);
|
|
54
|
+
const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
|
|
48
55
|
if (!parsed.success) {
|
|
56
|
+
// FUTURE: add body shape to error message
|
|
49
57
|
throw new GatewayError(z.prettifyError(parsed.error), 400);
|
|
50
58
|
}
|
|
51
59
|
ctx.body = parsed.data;
|
|
52
60
|
addSpanEvent("hebo.request.parsed");
|
|
53
61
|
|
|
54
|
-
ctx.operation = "embeddings";
|
|
55
62
|
if (hooks?.before) {
|
|
56
63
|
ctx.body = (await hooks.before(ctx as BeforeHookContext)) ?? ctx.body;
|
|
57
64
|
addSpanEvent("hebo.hooks.before.completed");
|
|
@@ -64,10 +71,7 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
64
71
|
ctx.resolvedModelId =
|
|
65
72
|
(await hooks?.resolveModelId?.(ctx as ResolveModelHookContext)) ?? ctx.modelId;
|
|
66
73
|
logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
67
|
-
addSpanEvent("hebo.model.resolved"
|
|
68
|
-
"gen_ai.request.model": ctx.modelId ?? "",
|
|
69
|
-
"gen_ai.response.model": ctx.resolvedModelId ?? "",
|
|
70
|
-
});
|
|
74
|
+
addSpanEvent("hebo.model.resolved");
|
|
71
75
|
|
|
72
76
|
const override = await hooks?.resolveProvider?.(ctx as ResolveProviderHookContext);
|
|
73
77
|
ctx.provider =
|
|
@@ -82,14 +86,17 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
82
86
|
const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
|
|
83
87
|
ctx.resolvedProviderId = embeddingModel.provider;
|
|
84
88
|
logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
|
|
85
|
-
addSpanEvent("hebo.provider.resolved"
|
|
86
|
-
|
|
87
|
-
|
|
89
|
+
addSpanEvent("hebo.provider.resolved");
|
|
90
|
+
|
|
91
|
+
const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
|
|
92
|
+
const genAiGeneralAttrs = getEmbeddingsGeneralAttributes(ctx, genAiSignalLevel);
|
|
93
|
+
setSpanAttributes(genAiGeneralAttrs);
|
|
88
94
|
|
|
89
95
|
// Convert inputs to AI SDK call options.
|
|
90
96
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
91
97
|
logger.trace({ requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
92
98
|
addSpanEvent("hebo.options.prepared");
|
|
99
|
+
setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
|
|
93
100
|
|
|
94
101
|
// Build middleware chain (model -> forward params -> provider).
|
|
95
102
|
const embeddingModelWithMiddleware = wrapEmbeddingModel({
|
|
@@ -108,14 +115,19 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
108
115
|
logger.trace({ requestId, result }, "[embeddings] AI SDK result");
|
|
109
116
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
110
117
|
|
|
118
|
+
// Transform result.
|
|
111
119
|
ctx.result = toEmbeddings(result, ctx.modelId);
|
|
112
120
|
addSpanEvent("hebo.result.transformed");
|
|
121
|
+
const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
|
|
122
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
123
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
113
124
|
|
|
114
125
|
if (hooks?.after) {
|
|
115
126
|
ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
|
|
116
127
|
addSpanEvent("hebo.hooks.after.completed");
|
|
117
128
|
}
|
|
118
129
|
|
|
130
|
+
recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
|
|
119
131
|
return ctx.result;
|
|
120
132
|
};
|
|
121
133
|
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
+
|
|
3
|
+
import type { Embeddings, EmbeddingsInputs } from "./schema";
|
|
4
|
+
|
|
5
|
+
import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
|
|
6
|
+
|
|
7
|
+
export const getEmbeddingsGeneralAttributes = (
|
|
8
|
+
ctx: GatewayContext,
|
|
9
|
+
signalLevel?: TelemetrySignalLevel,
|
|
10
|
+
): Attributes => {
|
|
11
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
12
|
+
|
|
13
|
+
const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
|
|
14
|
+
|
|
15
|
+
return {
|
|
16
|
+
"gen_ai.operation.name": ctx.operation,
|
|
17
|
+
"gen_ai.request.model": requestModel,
|
|
18
|
+
"gen_ai.response.model": ctx.resolvedModelId,
|
|
19
|
+
"gen_ai.provider.name": ctx.resolvedProviderId,
|
|
20
|
+
};
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export const getEmbeddingsRequestAttributes = (
|
|
24
|
+
inputs: EmbeddingsInputs,
|
|
25
|
+
signalLevel?: TelemetrySignalLevel,
|
|
26
|
+
): Attributes => {
|
|
27
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
28
|
+
|
|
29
|
+
const attrs: Attributes = {};
|
|
30
|
+
|
|
31
|
+
if (signalLevel !== "required") {
|
|
32
|
+
Object.assign(attrs, {
|
|
33
|
+
"gen_ai.embeddings.dimension.count": inputs.dimensions,
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return attrs;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export const getEmbeddingsResponseAttributes = (
|
|
41
|
+
embeddings: Embeddings,
|
|
42
|
+
signalLevel?: TelemetrySignalLevel,
|
|
43
|
+
): Attributes => {
|
|
44
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
45
|
+
|
|
46
|
+
const attrs: Attributes = {};
|
|
47
|
+
|
|
48
|
+
if (signalLevel !== "required") {
|
|
49
|
+
Object.assign(attrs, {
|
|
50
|
+
"gen_ai.usage.input_tokens": embeddings.usage?.prompt_tokens,
|
|
51
|
+
"gen_ai.usage.total_tokens": embeddings.usage?.total_tokens,
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return attrs;
|
|
56
|
+
};
|
|
@@ -7,15 +7,13 @@ import { toModels, toModel } from "./converters";
|
|
|
7
7
|
export const models = (config: GatewayConfig): Endpoint => {
|
|
8
8
|
// eslint-disable-next-line require-await
|
|
9
9
|
const handler = async (ctx: GatewayContext) => {
|
|
10
|
-
|
|
10
|
+
ctx.operation = "models";
|
|
11
11
|
|
|
12
|
-
if (!request || request.method !== "GET") {
|
|
12
|
+
if (!ctx.request || ctx.request.method !== "GET") {
|
|
13
13
|
throw new GatewayError("Method Not Allowed", 405);
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
const rawId = request.url.split("/models/", 2)[1]?.split("?", 1)[0];
|
|
16
|
+
const rawId = ctx.request.url.split("/models/", 2)[1]?.split("?", 1)[0];
|
|
19
17
|
if (!rawId) {
|
|
20
18
|
return toModels(ctx.models);
|
|
21
19
|
}
|
package/src/errors/gateway.ts
CHANGED
|
@@ -9,7 +9,6 @@ export class GatewayError extends Error {
|
|
|
9
9
|
super(msg);
|
|
10
10
|
this.status = status;
|
|
11
11
|
this.code = code ?? STATUS_CODE(status);
|
|
12
|
-
this.cause =
|
|
13
|
-
cause ?? (typeof error === "string" ? undefined : (error as { cause?: unknown }).cause);
|
|
12
|
+
this.cause = cause ?? (typeof error === "string" ? undefined : error);
|
|
14
13
|
}
|
|
15
14
|
}
|
package/src/errors/openai.ts
CHANGED
|
@@ -22,28 +22,35 @@ export class OpenAIError {
|
|
|
22
22
|
}
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
+
const mapType = (status: number) => (status < 500 ? "invalid_request_error" : "server_error");
|
|
26
|
+
|
|
27
|
+
const maybeMaskMessage = (meta: ReturnType<typeof getErrorMeta>, requestId?: string) => {
|
|
28
|
+
if (!(isProduction() && (meta.status >= 500 || meta.code.includes("UPSTREAM")))) {
|
|
29
|
+
return meta.message;
|
|
30
|
+
}
|
|
31
|
+
// FUTURE: always attach requestId to errors (masked and unmasked)
|
|
32
|
+
return `${STATUS_CODE(meta.status)} (${requestId ?? "see requestId in response headers"})`;
|
|
33
|
+
};
|
|
34
|
+
|
|
25
35
|
export function toOpenAIError(error: unknown): OpenAIError {
|
|
26
36
|
const meta = getErrorMeta(error);
|
|
27
|
-
|
|
37
|
+
|
|
38
|
+
return new OpenAIError(maybeMaskMessage(meta), mapType(meta.status), meta.code);
|
|
28
39
|
}
|
|
29
40
|
|
|
30
41
|
export function toOpenAIErrorResponse(error: unknown, responseInit?: ResponseInit) {
|
|
31
42
|
const meta = getErrorMeta(error);
|
|
32
43
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
...responseInit,
|
|
46
|
-
status: meta.status,
|
|
47
|
-
statusText: meta.code,
|
|
48
|
-
});
|
|
44
|
+
return toResponse(
|
|
45
|
+
new OpenAIError(
|
|
46
|
+
maybeMaskMessage(meta, resolveRequestId(responseInit)),
|
|
47
|
+
mapType(meta.status),
|
|
48
|
+
meta.code,
|
|
49
|
+
),
|
|
50
|
+
{
|
|
51
|
+
...responseInit,
|
|
52
|
+
status: meta.status,
|
|
53
|
+
statusText: meta.code,
|
|
54
|
+
},
|
|
55
|
+
);
|
|
49
56
|
}
|
package/src/errors/utils.ts
CHANGED
|
@@ -23,26 +23,24 @@ export const STATUS_CODE = (status: number) => {
|
|
|
23
23
|
return status >= 400 && status < 500 ? STATUS_CODES[400] : STATUS_CODES[500];
|
|
24
24
|
};
|
|
25
25
|
|
|
26
|
+
// FUTURE: always return a wrapped GatewayError?
|
|
26
27
|
export function getErrorMeta(error: unknown) {
|
|
27
28
|
const message = error instanceof Error ? error.message : String(error);
|
|
28
29
|
|
|
29
|
-
let code: string;
|
|
30
30
|
let status: number;
|
|
31
|
-
let
|
|
31
|
+
let code: string;
|
|
32
32
|
|
|
33
33
|
if (error instanceof GatewayError) {
|
|
34
|
-
({
|
|
34
|
+
({ status, code } = error);
|
|
35
35
|
} else {
|
|
36
36
|
const normalized = normalizeAiSdkError(error);
|
|
37
37
|
if (normalized) {
|
|
38
|
-
({
|
|
38
|
+
({ status, code } = normalized);
|
|
39
39
|
} else {
|
|
40
40
|
status = 500;
|
|
41
41
|
code = STATUS_CODE(status);
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
return { code, status, param, type, message };
|
|
45
|
+
return { status, code, message };
|
|
48
46
|
}
|
package/src/gateway.ts
CHANGED
|
@@ -30,7 +30,7 @@ export function gateway(config: GatewayConfig) {
|
|
|
30
30
|
pathname = pathname.slice(basePath.length);
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
-
logger.
|
|
33
|
+
logger.info(`[gateway] ${req.method} ${pathname}`);
|
|
34
34
|
for (const [route, endpoint] of routeEntries) {
|
|
35
35
|
if (pathname === route || pathname.startsWith(route + "/")) {
|
|
36
36
|
return endpoint.handler(req, state);
|