@hebo-ai/gateway 0.4.0-beta.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -5
- package/dist/config.js +21 -7
- package/dist/endpoints/chat-completions/converters.d.ts +3 -3
- package/dist/endpoints/chat-completions/converters.js +16 -8
- package/dist/endpoints/chat-completions/handler.js +34 -27
- package/dist/endpoints/chat-completions/otel.d.ts +6 -0
- package/dist/endpoints/chat-completions/otel.js +127 -0
- package/dist/endpoints/embeddings/handler.js +19 -10
- package/dist/endpoints/embeddings/otel.d.ts +6 -0
- package/dist/endpoints/embeddings/otel.js +35 -0
- package/dist/endpoints/models/handler.js +3 -4
- package/dist/errors/gateway.d.ts +1 -1
- package/dist/errors/gateway.js +3 -4
- package/dist/errors/openai.js +11 -12
- package/dist/errors/utils.d.ts +3 -4
- package/dist/errors/utils.js +6 -6
- package/dist/gateway.js +1 -1
- package/dist/lifecycle.js +71 -29
- package/dist/middleware/matcher.js +1 -1
- package/dist/models/amazon/presets.d.ts +37 -37
- package/dist/models/amazon/presets.js +1 -1
- package/dist/models/anthropic/presets.d.ts +56 -56
- package/dist/models/cohere/presets.d.ts +54 -54
- package/dist/models/cohere/presets.js +2 -2
- package/dist/models/google/presets.d.ts +31 -31
- package/dist/models/google/presets.js +1 -1
- package/dist/models/meta/presets.d.ts +42 -42
- package/dist/models/openai/presets.d.ts +96 -96
- package/dist/models/openai/presets.js +1 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/voyage/presets.d.ts +92 -92
- package/dist/models/voyage/presets.js +1 -1
- package/dist/providers/registry.js +2 -2
- package/dist/telemetry/baggage.d.ts +1 -0
- package/dist/telemetry/baggage.js +24 -0
- package/dist/telemetry/fetch.d.ts +2 -1
- package/dist/telemetry/fetch.js +13 -3
- package/dist/telemetry/gen-ai.d.ts +5 -0
- package/dist/telemetry/gen-ai.js +60 -0
- package/dist/telemetry/http.d.ts +3 -0
- package/dist/telemetry/http.js +57 -0
- package/dist/telemetry/memory.d.ts +2 -0
- package/dist/telemetry/memory.js +27 -0
- package/dist/telemetry/span.d.ts +6 -3
- package/dist/telemetry/span.js +24 -36
- package/dist/telemetry/stream.d.ts +3 -7
- package/dist/telemetry/stream.js +26 -29
- package/dist/types.d.ts +16 -15
- package/dist/utils/headers.d.ts +1 -1
- package/dist/utils/headers.js +7 -9
- package/dist/utils/request.d.ts +0 -4
- package/dist/utils/request.js +0 -9
- package/dist/utils/response.js +1 -1
- package/package.json +5 -2
- package/src/config.ts +28 -7
- package/src/endpoints/chat-completions/converters.ts +18 -11
- package/src/endpoints/chat-completions/handler.ts +46 -28
- package/src/endpoints/chat-completions/otel.ts +161 -0
- package/src/endpoints/embeddings/handler.test.ts +2 -2
- package/src/endpoints/embeddings/handler.ts +28 -10
- package/src/endpoints/embeddings/otel.ts +56 -0
- package/src/endpoints/models/handler.ts +3 -5
- package/src/errors/gateway.ts +5 -5
- package/src/errors/openai.ts +25 -17
- package/src/errors/utils.ts +6 -7
- package/src/gateway.ts +1 -1
- package/src/lifecycle.ts +85 -32
- package/src/middleware/matcher.ts +1 -1
- package/src/models/amazon/presets.ts +1 -1
- package/src/models/cohere/presets.ts +2 -2
- package/src/models/google/presets.ts +1 -1
- package/src/models/openai/presets.ts +1 -1
- package/src/models/types.ts +1 -1
- package/src/models/voyage/presets.ts +1 -1
- package/src/providers/registry.ts +2 -2
- package/src/telemetry/baggage.ts +27 -0
- package/src/telemetry/fetch.ts +15 -3
- package/src/telemetry/gen-ai.ts +88 -0
- package/src/telemetry/http.ts +65 -0
- package/src/telemetry/memory.ts +36 -0
- package/src/telemetry/span.ts +28 -40
- package/src/telemetry/stream.ts +36 -40
- package/src/types.ts +18 -18
- package/src/utils/headers.ts +8 -19
- package/src/utils/request.ts +0 -11
- package/src/utils/response.ts +1 -1
- package/dist/telemetry/otel.d.ts +0 -2
- package/dist/telemetry/otel.js +0 -50
- package/dist/telemetry/utils.d.ts +0 -4
- package/dist/telemetry/utils.js +0 -223
- package/src/telemetry/otel.ts +0 -91
- package/src/telemetry/utils.ts +0 -273
package/src/config.ts
CHANGED
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
import { isLogger, logger, setLoggerInstance } from "./logger";
|
|
2
2
|
import { createDefaultLogger } from "./logger/default";
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
kParsed,
|
|
5
|
+
type GatewayConfig,
|
|
6
|
+
type GatewayConfigParsed,
|
|
7
|
+
type TelemetrySignalLevel,
|
|
8
|
+
} from "./types";
|
|
4
9
|
|
|
5
10
|
export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
|
|
6
|
-
// If it has been parsed before, just return
|
|
11
|
+
// If it has been parsed before, just return.
|
|
7
12
|
if (kParsed in config) return config as GatewayConfigParsed;
|
|
8
13
|
|
|
9
14
|
const providers = config.providers ?? {};
|
|
10
15
|
const parsedProviders = {} as typeof providers;
|
|
11
16
|
const models = config.models ?? {};
|
|
12
17
|
|
|
13
|
-
// Set the global logger instance
|
|
18
|
+
// Set the global logger instance.
|
|
14
19
|
if (config.logger === undefined) {
|
|
15
20
|
setLoggerInstance(createDefaultLogger({}));
|
|
16
21
|
} else if (config.logger !== null) {
|
|
@@ -23,7 +28,7 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
|
|
|
23
28
|
);
|
|
24
29
|
}
|
|
25
30
|
|
|
26
|
-
// Strip providers that are not configured
|
|
31
|
+
// Strip providers that are not configured.
|
|
27
32
|
for (const id in providers) {
|
|
28
33
|
const provider = providers[id];
|
|
29
34
|
if (provider === undefined) {
|
|
@@ -37,7 +42,7 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
|
|
|
37
42
|
throw new Error("No providers configured (config.providers is empty)");
|
|
38
43
|
}
|
|
39
44
|
|
|
40
|
-
// Strip providers that are not configured from models
|
|
45
|
+
// Strip providers that are not configured from models.
|
|
41
46
|
const parsedModels = {} as typeof models;
|
|
42
47
|
const warnings = new Set<string>();
|
|
43
48
|
for (const id in models) {
|
|
@@ -60,12 +65,28 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
|
|
|
60
65
|
throw new Error("No models configured (config.models is empty)");
|
|
61
66
|
}
|
|
62
67
|
|
|
68
|
+
// Default for the telemetry settings.
|
|
69
|
+
const telemetryEnabled = config.telemetry?.enabled ?? false;
|
|
70
|
+
const telemetrySignals: Record<"http" | "gen_ai" | "hebo", TelemetrySignalLevel> =
|
|
71
|
+
telemetryEnabled
|
|
72
|
+
? {
|
|
73
|
+
http: config.telemetry?.signals?.http ?? "recommended",
|
|
74
|
+
gen_ai: config.telemetry?.signals?.gen_ai ?? "full",
|
|
75
|
+
hebo: config.telemetry?.signals?.hebo ?? "off",
|
|
76
|
+
}
|
|
77
|
+
: {
|
|
78
|
+
http: "off",
|
|
79
|
+
gen_ai: "off",
|
|
80
|
+
hebo: "off",
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// Return parsed config.
|
|
63
84
|
return {
|
|
64
85
|
...config,
|
|
65
|
-
logger: config.logger,
|
|
66
86
|
telemetry: {
|
|
67
87
|
...config.telemetry,
|
|
68
|
-
enabled:
|
|
88
|
+
enabled: telemetryEnabled,
|
|
89
|
+
signals: telemetrySignals,
|
|
69
90
|
},
|
|
70
91
|
providers: parsedProviders,
|
|
71
92
|
models: parsedModels,
|
|
@@ -185,6 +185,7 @@ export function fromChatCompletionsAssistantMessage(
|
|
|
185
185
|
|
|
186
186
|
if (tool_calls?.length) {
|
|
187
187
|
for (const tc of tool_calls) {
|
|
188
|
+
// eslint-disable-next-line no-shadow
|
|
188
189
|
const { id, function: fn, extra_content } = tc;
|
|
189
190
|
const out: ToolCallPart = {
|
|
190
191
|
type: "tool-call",
|
|
@@ -404,11 +405,12 @@ export function toChatCompletionsResponse(
|
|
|
404
405
|
return toResponse(toChatCompletions(result, model), responseInit);
|
|
405
406
|
}
|
|
406
407
|
|
|
407
|
-
export function toChatCompletionsStream(
|
|
408
|
+
export function toChatCompletionsStream<E extends boolean = false>(
|
|
408
409
|
result: StreamTextResult<ToolSet, Output.Output>,
|
|
409
410
|
model: string,
|
|
410
|
-
|
|
411
|
-
|
|
411
|
+
wrapErrors?: E,
|
|
412
|
+
): ReadableStream<ChatCompletionsChunk | (E extends true ? OpenAIError : Error)> {
|
|
413
|
+
return result.fullStream.pipeThrough(new ChatCompletionsStream(model, wrapErrors));
|
|
412
414
|
}
|
|
413
415
|
|
|
414
416
|
export function toChatCompletionsStreamResponse(
|
|
@@ -416,14 +418,14 @@ export function toChatCompletionsStreamResponse(
|
|
|
416
418
|
model: string,
|
|
417
419
|
responseInit?: ResponseInit,
|
|
418
420
|
): Response {
|
|
419
|
-
return toResponse(toChatCompletionsStream(result, model), responseInit);
|
|
421
|
+
return toResponse(toChatCompletionsStream(result, model, true), responseInit);
|
|
420
422
|
}
|
|
421
423
|
|
|
422
|
-
export class ChatCompletionsStream extends TransformStream<
|
|
424
|
+
export class ChatCompletionsStream<E extends boolean = false> extends TransformStream<
|
|
423
425
|
TextStreamPart<ToolSet>,
|
|
424
|
-
ChatCompletionsChunk | OpenAIError
|
|
426
|
+
ChatCompletionsChunk | (E extends true ? OpenAIError : Error)
|
|
425
427
|
> {
|
|
426
|
-
constructor(model: string) {
|
|
428
|
+
constructor(model: string, wrapErrors?: E) {
|
|
427
429
|
const streamId = `chatcmpl-${crypto.randomUUID()}`;
|
|
428
430
|
const creationTime = Math.floor(Date.now() / 1000);
|
|
429
431
|
let toolCallIndexCounter = 0;
|
|
@@ -534,10 +536,15 @@ export class ChatCompletionsStream extends TransformStream<
|
|
|
534
536
|
}
|
|
535
537
|
|
|
536
538
|
case "error": {
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
539
|
+
let err: Error | OpenAIError;
|
|
540
|
+
if (wrapErrors) {
|
|
541
|
+
err = toOpenAIError(part.error);
|
|
542
|
+
} else if (part.error instanceof Error) {
|
|
543
|
+
err = part.error;
|
|
544
|
+
} else {
|
|
545
|
+
err = new Error(String(part.error));
|
|
546
|
+
}
|
|
547
|
+
controller.enqueue(err as E extends true ? OpenAIError : Error);
|
|
541
548
|
}
|
|
542
549
|
}
|
|
543
550
|
},
|
|
@@ -23,16 +23,28 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
23
23
|
import { logger } from "../../logger";
|
|
24
24
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
25
25
|
import { resolveProvider } from "../../providers/registry";
|
|
26
|
-
import {
|
|
26
|
+
import {
|
|
27
|
+
recordRequestDuration,
|
|
28
|
+
recordTimePerOutputToken,
|
|
29
|
+
recordTokenUsage,
|
|
30
|
+
} from "../../telemetry/gen-ai";
|
|
31
|
+
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
27
32
|
import { resolveRequestId } from "../../utils/headers";
|
|
28
33
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
29
34
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
35
|
+
import {
|
|
36
|
+
getChatGeneralAttributes,
|
|
37
|
+
getChatRequestAttributes,
|
|
38
|
+
getChatResponseAttributes,
|
|
39
|
+
} from "./otel";
|
|
30
40
|
import { ChatCompletionsBodySchema } from "./schema";
|
|
31
41
|
|
|
32
42
|
export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
33
43
|
const hooks = config.hooks;
|
|
34
44
|
|
|
35
45
|
const handler = async (ctx: GatewayContext) => {
|
|
46
|
+
const start = performance.now();
|
|
47
|
+
ctx.operation = "chat";
|
|
36
48
|
addSpanEvent("hebo.handler.started");
|
|
37
49
|
|
|
38
50
|
// Guard: enforce HTTP method early.
|
|
@@ -52,12 +64,12 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
52
64
|
|
|
53
65
|
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
54
66
|
if (!parsed.success) {
|
|
55
|
-
|
|
67
|
+
// FUTURE: consider adding body shape to metadata
|
|
68
|
+
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
56
69
|
}
|
|
57
70
|
ctx.body = parsed.data;
|
|
58
71
|
addSpanEvent("hebo.request.parsed");
|
|
59
72
|
|
|
60
|
-
ctx.operation = "chat";
|
|
61
73
|
if (hooks?.before) {
|
|
62
74
|
ctx.body = (await hooks.before(ctx as BeforeHookContext)) ?? ctx.body;
|
|
63
75
|
addSpanEvent("hebo.hooks.before.completed");
|
|
@@ -70,10 +82,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
70
82
|
ctx.resolvedModelId =
|
|
71
83
|
(await hooks?.resolveModelId?.(ctx as ResolveModelHookContext)) ?? ctx.modelId;
|
|
72
84
|
logger.debug(`[chat] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
73
|
-
addSpanEvent("hebo.model.resolved"
|
|
74
|
-
"gen_ai.request.model": ctx.modelId ?? "",
|
|
75
|
-
"gen_ai.response.model": ctx.resolvedModelId ?? "",
|
|
76
|
-
});
|
|
85
|
+
addSpanEvent("hebo.model.resolved");
|
|
77
86
|
|
|
78
87
|
const override = await hooks?.resolveProvider?.(ctx as ResolveProviderHookContext);
|
|
79
88
|
ctx.provider =
|
|
@@ -88,7 +97,11 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
88
97
|
const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
|
|
89
98
|
ctx.resolvedProviderId = languageModel.provider;
|
|
90
99
|
logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
91
|
-
addSpanEvent("hebo.provider.resolved"
|
|
100
|
+
addSpanEvent("hebo.provider.resolved");
|
|
101
|
+
|
|
102
|
+
const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
|
|
103
|
+
const genAiGeneralAttrs = getChatGeneralAttributes(ctx, genAiSignalLevel);
|
|
104
|
+
setSpanAttributes(genAiGeneralAttrs);
|
|
92
105
|
|
|
93
106
|
// Convert inputs to AI SDK call options.
|
|
94
107
|
const textOptions = convertToTextCallOptions(inputs);
|
|
@@ -100,6 +113,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
100
113
|
"[chat] AI SDK options",
|
|
101
114
|
);
|
|
102
115
|
addSpanEvent("hebo.options.prepared");
|
|
116
|
+
setSpanAttributes(getChatRequestAttributes(inputs, genAiSignalLevel));
|
|
103
117
|
|
|
104
118
|
// Build middleware chain (model -> forward params -> provider).
|
|
105
119
|
const languageModelWithMiddleware = wrapLanguageModel({
|
|
@@ -113,27 +127,27 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
113
127
|
const result = streamText({
|
|
114
128
|
model: languageModelWithMiddleware,
|
|
115
129
|
headers: prepareForwardHeaders(ctx.request),
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
120
|
-
logger.error({
|
|
121
|
-
requestId,
|
|
122
|
-
err,
|
|
123
|
-
});
|
|
124
|
-
throw error;
|
|
130
|
+
abortSignal: ctx.request.signal,
|
|
131
|
+
timeout: {
|
|
132
|
+
totalMs: 5 * 60 * 1000,
|
|
125
133
|
},
|
|
126
134
|
onAbort: () => {
|
|
127
|
-
throw new DOMException("
|
|
135
|
+
throw new DOMException("The operation was aborted.", "AbortError");
|
|
128
136
|
},
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
137
|
+
onError: () => {},
|
|
138
|
+
onFinish: (res) => {
|
|
139
|
+
addSpanEvent("hebo.ai-sdk.completed");
|
|
140
|
+
const streamResult = toChatCompletions(
|
|
141
|
+
res as unknown as GenerateTextResult<ToolSet, Output.Output>,
|
|
132
142
|
ctx.resolvedModelId!,
|
|
133
143
|
);
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
144
|
+
addSpanEvent("hebo.result.transformed");
|
|
145
|
+
|
|
146
|
+
const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
|
|
147
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
148
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
149
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
150
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
137
151
|
},
|
|
138
152
|
experimental_include: {
|
|
139
153
|
requestBody: false,
|
|
@@ -141,10 +155,8 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
141
155
|
includeRawChunks: false,
|
|
142
156
|
...textOptions,
|
|
143
157
|
});
|
|
144
|
-
addSpanEvent("hebo.ai-sdk.completed");
|
|
145
158
|
|
|
146
159
|
ctx.result = toChatCompletionsStream(result, ctx.resolvedModelId);
|
|
147
|
-
addSpanEvent("hebo.result.transformed");
|
|
148
160
|
|
|
149
161
|
if (hooks?.after) {
|
|
150
162
|
ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
|
|
@@ -158,26 +170,32 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
158
170
|
const result = await generateText({
|
|
159
171
|
model: languageModelWithMiddleware,
|
|
160
172
|
headers: prepareForwardHeaders(ctx.request),
|
|
161
|
-
// FUTURE: currently can't tell whether upstream or downstream abort
|
|
162
173
|
abortSignal: ctx.request.signal,
|
|
174
|
+
timeout: 5 * 60 * 1000,
|
|
163
175
|
experimental_include: {
|
|
164
176
|
requestBody: false,
|
|
165
177
|
responseBody: false,
|
|
166
178
|
},
|
|
167
|
-
timeout: 5 * 60 * 1000,
|
|
168
179
|
...textOptions,
|
|
169
180
|
});
|
|
170
181
|
logger.trace({ requestId, result }, "[chat] AI SDK result");
|
|
171
182
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
172
183
|
|
|
184
|
+
// Transform result.
|
|
173
185
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
174
186
|
addSpanEvent("hebo.result.transformed");
|
|
175
187
|
|
|
188
|
+
const genAiResponseAttrs = getChatResponseAttributes(ctx.result, genAiSignalLevel);
|
|
189
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
190
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
191
|
+
|
|
176
192
|
if (hooks?.after) {
|
|
177
193
|
ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
|
|
178
194
|
addSpanEvent("hebo.hooks.after.completed");
|
|
179
195
|
}
|
|
180
196
|
|
|
197
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
198
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
181
199
|
return ctx.result;
|
|
182
200
|
};
|
|
183
201
|
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
+
|
|
3
|
+
import type {
|
|
4
|
+
ChatCompletions,
|
|
5
|
+
ChatCompletionsBody,
|
|
6
|
+
ChatCompletionsContentPart,
|
|
7
|
+
ChatCompletionsMessage,
|
|
8
|
+
} from "./schema";
|
|
9
|
+
|
|
10
|
+
import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
|
|
11
|
+
|
|
12
|
+
const toTextPart = (content: string): Record<string, unknown> => ({ type: "text", content });
|
|
13
|
+
|
|
14
|
+
const toMessageParts = (message: ChatCompletionsMessage): Record<string, unknown>[] => {
|
|
15
|
+
if (message.role === "assistant") {
|
|
16
|
+
const parts: Record<string, unknown>[] = [];
|
|
17
|
+
if (typeof message.content === "string") parts.push(toTextPart(message.content));
|
|
18
|
+
if (Array.isArray(message.tool_calls)) {
|
|
19
|
+
for (const call of message.tool_calls) {
|
|
20
|
+
parts.push({
|
|
21
|
+
type: "tool_call",
|
|
22
|
+
id: call.id,
|
|
23
|
+
name: call.function.name,
|
|
24
|
+
arguments: call.function.arguments,
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return parts;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (message.role === "tool") {
|
|
32
|
+
return [{ type: "tool_call_response", id: message.tool_call_id, content: message.content }];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (message.role === "user") {
|
|
36
|
+
const parts: Record<string, unknown>[] = [];
|
|
37
|
+
if (typeof message.content === "string") parts.push(toTextPart(message.content));
|
|
38
|
+
if (Array.isArray(message.content)) {
|
|
39
|
+
for (const part of message.content as ChatCompletionsContentPart[]) {
|
|
40
|
+
if (part.type === "text") {
|
|
41
|
+
parts.push(toTextPart(part.text));
|
|
42
|
+
} else if (part.type === "image_url") {
|
|
43
|
+
parts.push({ type: "image", content: part.image_url.url });
|
|
44
|
+
} else {
|
|
45
|
+
parts.push({
|
|
46
|
+
type: "file",
|
|
47
|
+
// FUTURE: optionally expose safe metadata without raw binary payloads.
|
|
48
|
+
content: part.file.filename ?? "[REDACTED_BINARY_DATA]",
|
|
49
|
+
media_type: part.file.media_type,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return parts;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// FUTURE: remove once Langfuse supports gen_ai.system_instructions
|
|
58
|
+
if (message.role === "system") {
|
|
59
|
+
return [toTextPart(message.content)];
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return [];
|
|
63
|
+
};
|
|
64
|
+
|
|
65
|
+
export const getChatGeneralAttributes = (
|
|
66
|
+
ctx: GatewayContext,
|
|
67
|
+
signalLevel?: TelemetrySignalLevel,
|
|
68
|
+
): Attributes => {
|
|
69
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
70
|
+
|
|
71
|
+
const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
"gen_ai.operation.name": ctx.operation,
|
|
75
|
+
"gen_ai.request.model": requestModel,
|
|
76
|
+
"gen_ai.response.model": ctx.resolvedModelId,
|
|
77
|
+
"gen_ai.provider.name": ctx.resolvedProviderId,
|
|
78
|
+
};
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
export const getChatRequestAttributes = (
|
|
82
|
+
inputs: ChatCompletionsBody,
|
|
83
|
+
signalLevel?: TelemetrySignalLevel,
|
|
84
|
+
): Attributes => {
|
|
85
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
86
|
+
|
|
87
|
+
const attrs: Attributes = {};
|
|
88
|
+
|
|
89
|
+
if (inputs.seed !== undefined) {
|
|
90
|
+
Object.assign(attrs, { "gen_ai.request.seed": inputs.seed });
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (signalLevel !== "required") {
|
|
94
|
+
Object.assign(attrs, {
|
|
95
|
+
"gen_ai.request.stream": inputs.stream,
|
|
96
|
+
"gen_ai.request.frequency_penalty": inputs.frequency_penalty,
|
|
97
|
+
"gen_ai.request.max_tokens": inputs.max_completion_tokens,
|
|
98
|
+
"gen_ai.request.presence_penalty": inputs.presence_penalty,
|
|
99
|
+
"gen_ai.request.stop_sequences": inputs.stop
|
|
100
|
+
? Array.isArray(inputs.stop)
|
|
101
|
+
? inputs.stop
|
|
102
|
+
: [inputs.stop]
|
|
103
|
+
: undefined,
|
|
104
|
+
"gen_ai.request.temperature": inputs.temperature,
|
|
105
|
+
"gen_ai.request.top_p": inputs.top_p,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (signalLevel === "full") {
|
|
110
|
+
Object.assign(attrs, {
|
|
111
|
+
// FUTURE: move system instructions from messages to here
|
|
112
|
+
// blocker: https://github.com/langfuse/langfuse/issues/11607
|
|
113
|
+
// "gen_ai.system_instructions": inputs.messages
|
|
114
|
+
// .filter((m) => m.role === "system")
|
|
115
|
+
// .map((m) => JSON.stringify(toTextPart(m.content))),
|
|
116
|
+
"gen_ai.input.messages": inputs.messages
|
|
117
|
+
//.filter((m) => m.role !== "system")
|
|
118
|
+
.map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
|
|
119
|
+
"gen_ai.tool.definitions": JSON.stringify(inputs.tools),
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return attrs;
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
export const getChatResponseAttributes = (
|
|
127
|
+
completions: ChatCompletions,
|
|
128
|
+
signalLevel?: TelemetrySignalLevel,
|
|
129
|
+
): Attributes => {
|
|
130
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
131
|
+
|
|
132
|
+
const attrs: Attributes = {
|
|
133
|
+
"gen_ai.response.id": completions.id,
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
if (signalLevel !== "required") {
|
|
137
|
+
Object.assign(attrs, {
|
|
138
|
+
"gen_ai.response.finish_reasons": completions.choices?.map((c) => c.finish_reason),
|
|
139
|
+
"gen_ai.usage.total_tokens": completions.usage?.total_tokens,
|
|
140
|
+
"gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
|
|
141
|
+
"gen_ai.usage.cached_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
|
|
142
|
+
"gen_ai.usage.output_tokens": completions.usage?.completion_tokens,
|
|
143
|
+
"gen_ai.usage.reasoning_tokens":
|
|
144
|
+
completions.usage?.completion_tokens_details?.reasoning_tokens,
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (signalLevel === "full") {
|
|
149
|
+
Object.assign(attrs, {
|
|
150
|
+
"gen_ai.output.messages": completions.choices?.map((c) =>
|
|
151
|
+
JSON.stringify({
|
|
152
|
+
role: c.message.role,
|
|
153
|
+
parts: toMessageParts(c.message),
|
|
154
|
+
finish_reason: c.finish_reason,
|
|
155
|
+
}),
|
|
156
|
+
),
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return attrs;
|
|
161
|
+
};
|
|
@@ -45,7 +45,7 @@ describe("Embeddings Handler", () => {
|
|
|
45
45
|
models: {
|
|
46
46
|
"text-embedding-3-small": {
|
|
47
47
|
name: "OpenAI Embedding Model",
|
|
48
|
-
modalities: { input: ["text"], output: ["
|
|
48
|
+
modalities: { input: ["text"], output: ["embedding"] },
|
|
49
49
|
providers: ["openai"],
|
|
50
50
|
},
|
|
51
51
|
"gpt-oss-20b": {
|
|
@@ -68,7 +68,7 @@ describe("Embeddings Handler", () => {
|
|
|
68
68
|
expect(data).toMatchObject({
|
|
69
69
|
error: {
|
|
70
70
|
code: "model_unsupported_operation",
|
|
71
|
-
message: "Model 'gpt-oss-20b' does not support '
|
|
71
|
+
message: "Model 'gpt-oss-20b' does not support 'embedding' output",
|
|
72
72
|
type: "invalid_request_error",
|
|
73
73
|
},
|
|
74
74
|
});
|
|
@@ -16,16 +16,28 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
16
16
|
import { logger } from "../../logger";
|
|
17
17
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
18
18
|
import { resolveProvider } from "../../providers/registry";
|
|
19
|
-
import {
|
|
19
|
+
import {
|
|
20
|
+
recordRequestDuration,
|
|
21
|
+
recordTimePerOutputToken,
|
|
22
|
+
recordTokenUsage,
|
|
23
|
+
} from "../../telemetry/gen-ai";
|
|
24
|
+
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
20
25
|
import { resolveRequestId } from "../../utils/headers";
|
|
21
26
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
22
27
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
28
|
+
import {
|
|
29
|
+
getEmbeddingsGeneralAttributes,
|
|
30
|
+
getEmbeddingsRequestAttributes,
|
|
31
|
+
getEmbeddingsResponseAttributes,
|
|
32
|
+
} from "./otel";
|
|
23
33
|
import { EmbeddingsBodySchema } from "./schema";
|
|
24
34
|
|
|
25
35
|
export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
26
36
|
const hooks = config.hooks;
|
|
27
37
|
|
|
28
38
|
const handler = async (ctx: GatewayContext) => {
|
|
39
|
+
const start = performance.now();
|
|
40
|
+
ctx.operation = "embeddings";
|
|
29
41
|
addSpanEvent("hebo.handler.started");
|
|
30
42
|
|
|
31
43
|
// Guard: enforce HTTP method early.
|
|
@@ -45,12 +57,12 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
45
57
|
|
|
46
58
|
const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
|
|
47
59
|
if (!parsed.success) {
|
|
48
|
-
|
|
60
|
+
// FUTURE: consider adding body shape to metadata
|
|
61
|
+
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
49
62
|
}
|
|
50
63
|
ctx.body = parsed.data;
|
|
51
64
|
addSpanEvent("hebo.request.parsed");
|
|
52
65
|
|
|
53
|
-
ctx.operation = "embeddings";
|
|
54
66
|
if (hooks?.before) {
|
|
55
67
|
ctx.body = (await hooks.before(ctx as BeforeHookContext)) ?? ctx.body;
|
|
56
68
|
addSpanEvent("hebo.hooks.before.completed");
|
|
@@ -63,10 +75,7 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
63
75
|
ctx.resolvedModelId =
|
|
64
76
|
(await hooks?.resolveModelId?.(ctx as ResolveModelHookContext)) ?? ctx.modelId;
|
|
65
77
|
logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
66
|
-
addSpanEvent("hebo.model.resolved"
|
|
67
|
-
"gen_ai.request.model": ctx.modelId ?? "",
|
|
68
|
-
"gen_ai.response.model": ctx.resolvedModelId ?? "",
|
|
69
|
-
});
|
|
78
|
+
addSpanEvent("hebo.model.resolved");
|
|
70
79
|
|
|
71
80
|
const override = await hooks?.resolveProvider?.(ctx as ResolveProviderHookContext);
|
|
72
81
|
ctx.provider =
|
|
@@ -81,14 +90,17 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
81
90
|
const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
|
|
82
91
|
ctx.resolvedProviderId = embeddingModel.provider;
|
|
83
92
|
logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
|
|
84
|
-
addSpanEvent("hebo.provider.resolved"
|
|
85
|
-
|
|
86
|
-
|
|
93
|
+
addSpanEvent("hebo.provider.resolved");
|
|
94
|
+
|
|
95
|
+
const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
|
|
96
|
+
const genAiGeneralAttrs = getEmbeddingsGeneralAttributes(ctx, genAiSignalLevel);
|
|
97
|
+
setSpanAttributes(genAiGeneralAttrs);
|
|
87
98
|
|
|
88
99
|
// Convert inputs to AI SDK call options.
|
|
89
100
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
90
101
|
logger.trace({ requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
91
102
|
addSpanEvent("hebo.options.prepared");
|
|
103
|
+
setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
|
|
92
104
|
|
|
93
105
|
// Build middleware chain (model -> forward params -> provider).
|
|
94
106
|
const embeddingModelWithMiddleware = wrapEmbeddingModel({
|
|
@@ -107,14 +119,20 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
107
119
|
logger.trace({ requestId, result }, "[embeddings] AI SDK result");
|
|
108
120
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
109
121
|
|
|
122
|
+
// Transform result.
|
|
110
123
|
ctx.result = toEmbeddings(result, ctx.modelId);
|
|
111
124
|
addSpanEvent("hebo.result.transformed");
|
|
125
|
+
const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
|
|
126
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
127
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
112
128
|
|
|
113
129
|
if (hooks?.after) {
|
|
114
130
|
ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
|
|
115
131
|
addSpanEvent("hebo.hooks.after.completed");
|
|
116
132
|
}
|
|
117
133
|
|
|
134
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
135
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
118
136
|
return ctx.result;
|
|
119
137
|
};
|
|
120
138
|
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
+
|
|
3
|
+
import type { Embeddings, EmbeddingsInputs } from "./schema";
|
|
4
|
+
|
|
5
|
+
import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
|
|
6
|
+
|
|
7
|
+
export const getEmbeddingsGeneralAttributes = (
|
|
8
|
+
ctx: GatewayContext,
|
|
9
|
+
signalLevel?: TelemetrySignalLevel,
|
|
10
|
+
): Attributes => {
|
|
11
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
12
|
+
|
|
13
|
+
const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
|
|
14
|
+
|
|
15
|
+
return {
|
|
16
|
+
"gen_ai.operation.name": ctx.operation,
|
|
17
|
+
"gen_ai.request.model": requestModel,
|
|
18
|
+
"gen_ai.response.model": ctx.resolvedModelId,
|
|
19
|
+
"gen_ai.provider.name": ctx.resolvedProviderId,
|
|
20
|
+
};
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export const getEmbeddingsRequestAttributes = (
|
|
24
|
+
inputs: EmbeddingsInputs,
|
|
25
|
+
signalLevel?: TelemetrySignalLevel,
|
|
26
|
+
): Attributes => {
|
|
27
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
28
|
+
|
|
29
|
+
const attrs: Attributes = {};
|
|
30
|
+
|
|
31
|
+
if (signalLevel !== "required") {
|
|
32
|
+
Object.assign(attrs, {
|
|
33
|
+
"gen_ai.embeddings.dimension.count": inputs.dimensions,
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return attrs;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export const getEmbeddingsResponseAttributes = (
|
|
41
|
+
embeddings: Embeddings,
|
|
42
|
+
signalLevel?: TelemetrySignalLevel,
|
|
43
|
+
): Attributes => {
|
|
44
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
45
|
+
|
|
46
|
+
const attrs: Attributes = {};
|
|
47
|
+
|
|
48
|
+
if (signalLevel !== "required") {
|
|
49
|
+
Object.assign(attrs, {
|
|
50
|
+
"gen_ai.usage.input_tokens": embeddings.usage?.prompt_tokens,
|
|
51
|
+
"gen_ai.usage.total_tokens": embeddings.usage?.total_tokens,
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return attrs;
|
|
56
|
+
};
|
|
@@ -7,15 +7,13 @@ import { toModels, toModel } from "./converters";
|
|
|
7
7
|
export const models = (config: GatewayConfig): Endpoint => {
|
|
8
8
|
// eslint-disable-next-line require-await
|
|
9
9
|
const handler = async (ctx: GatewayContext) => {
|
|
10
|
-
|
|
10
|
+
ctx.operation = "models";
|
|
11
11
|
|
|
12
|
-
if (!request || request.method !== "GET") {
|
|
12
|
+
if (!ctx.request || ctx.request.method !== "GET") {
|
|
13
13
|
throw new GatewayError("Method Not Allowed", 405);
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
const rawId = request.url.split("/models/", 2)[1]?.split("?", 1)[0];
|
|
16
|
+
const rawId = ctx.request.url.split("/models/", 2)[1]?.split("?", 1)[0];
|
|
19
17
|
if (!rawId) {
|
|
20
18
|
return toModels(ctx.models);
|
|
21
19
|
}
|
package/src/errors/gateway.ts
CHANGED
|
@@ -4,12 +4,12 @@ export class GatewayError extends Error {
|
|
|
4
4
|
readonly status: number;
|
|
5
5
|
readonly code: string;
|
|
6
6
|
|
|
7
|
-
constructor(error:
|
|
8
|
-
const
|
|
9
|
-
super(
|
|
7
|
+
constructor(error: unknown, status: number, code?: string, cause?: unknown) {
|
|
8
|
+
const isError = error instanceof Error;
|
|
9
|
+
super(isError ? error.message : String(error));
|
|
10
|
+
this.cause = cause ?? (isError ? error : undefined);
|
|
11
|
+
|
|
10
12
|
this.status = status;
|
|
11
13
|
this.code = code ?? STATUS_CODE(status);
|
|
12
|
-
this.cause =
|
|
13
|
-
cause ?? (typeof error === "string" ? undefined : (error as { cause?: unknown }).cause);
|
|
14
14
|
}
|
|
15
15
|
}
|