@hebo-ai/gateway 0.4.0-beta.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +40 -5
  2. package/dist/config.js +21 -7
  3. package/dist/endpoints/chat-completions/converters.d.ts +3 -3
  4. package/dist/endpoints/chat-completions/converters.js +16 -8
  5. package/dist/endpoints/chat-completions/handler.js +34 -27
  6. package/dist/endpoints/chat-completions/otel.d.ts +6 -0
  7. package/dist/endpoints/chat-completions/otel.js +127 -0
  8. package/dist/endpoints/embeddings/handler.js +19 -10
  9. package/dist/endpoints/embeddings/otel.d.ts +6 -0
  10. package/dist/endpoints/embeddings/otel.js +35 -0
  11. package/dist/endpoints/models/handler.js +3 -4
  12. package/dist/errors/gateway.d.ts +1 -1
  13. package/dist/errors/gateway.js +3 -4
  14. package/dist/errors/openai.js +11 -12
  15. package/dist/errors/utils.d.ts +3 -4
  16. package/dist/errors/utils.js +6 -6
  17. package/dist/gateway.js +1 -1
  18. package/dist/lifecycle.js +71 -29
  19. package/dist/middleware/matcher.js +1 -1
  20. package/dist/models/amazon/presets.d.ts +37 -37
  21. package/dist/models/amazon/presets.js +1 -1
  22. package/dist/models/anthropic/presets.d.ts +56 -56
  23. package/dist/models/cohere/presets.d.ts +54 -54
  24. package/dist/models/cohere/presets.js +2 -2
  25. package/dist/models/google/presets.d.ts +31 -31
  26. package/dist/models/google/presets.js +1 -1
  27. package/dist/models/meta/presets.d.ts +42 -42
  28. package/dist/models/openai/presets.d.ts +96 -96
  29. package/dist/models/openai/presets.js +1 -1
  30. package/dist/models/types.d.ts +1 -1
  31. package/dist/models/voyage/presets.d.ts +92 -92
  32. package/dist/models/voyage/presets.js +1 -1
  33. package/dist/providers/registry.js +2 -2
  34. package/dist/telemetry/baggage.d.ts +1 -0
  35. package/dist/telemetry/baggage.js +24 -0
  36. package/dist/telemetry/fetch.d.ts +2 -1
  37. package/dist/telemetry/fetch.js +13 -3
  38. package/dist/telemetry/gen-ai.d.ts +5 -0
  39. package/dist/telemetry/gen-ai.js +60 -0
  40. package/dist/telemetry/http.d.ts +3 -0
  41. package/dist/telemetry/http.js +57 -0
  42. package/dist/telemetry/memory.d.ts +2 -0
  43. package/dist/telemetry/memory.js +27 -0
  44. package/dist/telemetry/span.d.ts +6 -3
  45. package/dist/telemetry/span.js +24 -36
  46. package/dist/telemetry/stream.d.ts +3 -7
  47. package/dist/telemetry/stream.js +26 -29
  48. package/dist/types.d.ts +16 -15
  49. package/dist/utils/headers.d.ts +1 -1
  50. package/dist/utils/headers.js +7 -9
  51. package/dist/utils/request.d.ts +0 -4
  52. package/dist/utils/request.js +0 -9
  53. package/dist/utils/response.js +1 -1
  54. package/package.json +5 -2
  55. package/src/config.ts +28 -7
  56. package/src/endpoints/chat-completions/converters.ts +18 -11
  57. package/src/endpoints/chat-completions/handler.ts +46 -28
  58. package/src/endpoints/chat-completions/otel.ts +161 -0
  59. package/src/endpoints/embeddings/handler.test.ts +2 -2
  60. package/src/endpoints/embeddings/handler.ts +28 -10
  61. package/src/endpoints/embeddings/otel.ts +56 -0
  62. package/src/endpoints/models/handler.ts +3 -5
  63. package/src/errors/gateway.ts +5 -5
  64. package/src/errors/openai.ts +25 -17
  65. package/src/errors/utils.ts +6 -7
  66. package/src/gateway.ts +1 -1
  67. package/src/lifecycle.ts +85 -32
  68. package/src/middleware/matcher.ts +1 -1
  69. package/src/models/amazon/presets.ts +1 -1
  70. package/src/models/cohere/presets.ts +2 -2
  71. package/src/models/google/presets.ts +1 -1
  72. package/src/models/openai/presets.ts +1 -1
  73. package/src/models/types.ts +1 -1
  74. package/src/models/voyage/presets.ts +1 -1
  75. package/src/providers/registry.ts +2 -2
  76. package/src/telemetry/baggage.ts +27 -0
  77. package/src/telemetry/fetch.ts +15 -3
  78. package/src/telemetry/gen-ai.ts +88 -0
  79. package/src/telemetry/http.ts +65 -0
  80. package/src/telemetry/memory.ts +36 -0
  81. package/src/telemetry/span.ts +28 -40
  82. package/src/telemetry/stream.ts +36 -40
  83. package/src/types.ts +18 -18
  84. package/src/utils/headers.ts +8 -19
  85. package/src/utils/request.ts +0 -11
  86. package/src/utils/response.ts +1 -1
  87. package/dist/telemetry/otel.d.ts +0 -2
  88. package/dist/telemetry/otel.js +0 -50
  89. package/dist/telemetry/utils.d.ts +0 -4
  90. package/dist/telemetry/utils.js +0 -223
  91. package/src/telemetry/otel.ts +0 -91
  92. package/src/telemetry/utils.ts +0 -273
package/src/config.ts CHANGED
@@ -1,16 +1,21 @@
1
1
  import { isLogger, logger, setLoggerInstance } from "./logger";
2
2
  import { createDefaultLogger } from "./logger/default";
3
- import { kParsed, type GatewayConfig, type GatewayConfigParsed } from "./types";
3
+ import {
4
+ kParsed,
5
+ type GatewayConfig,
6
+ type GatewayConfigParsed,
7
+ type TelemetrySignalLevel,
8
+ } from "./types";
4
9
 
5
10
  export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
6
- // If it has been parsed before, just return
11
+ // If it has been parsed before, just return.
7
12
  if (kParsed in config) return config as GatewayConfigParsed;
8
13
 
9
14
  const providers = config.providers ?? {};
10
15
  const parsedProviders = {} as typeof providers;
11
16
  const models = config.models ?? {};
12
17
 
13
- // Set the global logger instance
18
+ // Set the global logger instance.
14
19
  if (config.logger === undefined) {
15
20
  setLoggerInstance(createDefaultLogger({}));
16
21
  } else if (config.logger !== null) {
@@ -23,7 +28,7 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
23
28
  );
24
29
  }
25
30
 
26
- // Strip providers that are not configured
31
+ // Strip providers that are not configured.
27
32
  for (const id in providers) {
28
33
  const provider = providers[id];
29
34
  if (provider === undefined) {
@@ -37,7 +42,7 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
37
42
  throw new Error("No providers configured (config.providers is empty)");
38
43
  }
39
44
 
40
- // Strip providers that are not configured from models
45
+ // Strip providers that are not configured from models.
41
46
  const parsedModels = {} as typeof models;
42
47
  const warnings = new Set<string>();
43
48
  for (const id in models) {
@@ -60,12 +65,28 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
60
65
  throw new Error("No models configured (config.models is empty)");
61
66
  }
62
67
 
68
+ // Default for the telemetry settings.
69
+ const telemetryEnabled = config.telemetry?.enabled ?? false;
70
+ const telemetrySignals: Record<"http" | "gen_ai" | "hebo", TelemetrySignalLevel> =
71
+ telemetryEnabled
72
+ ? {
73
+ http: config.telemetry?.signals?.http ?? "recommended",
74
+ gen_ai: config.telemetry?.signals?.gen_ai ?? "full",
75
+ hebo: config.telemetry?.signals?.hebo ?? "off",
76
+ }
77
+ : {
78
+ http: "off",
79
+ gen_ai: "off",
80
+ hebo: "off",
81
+ };
82
+
83
+ // Return parsed config.
63
84
  return {
64
85
  ...config,
65
- logger: config.logger,
66
86
  telemetry: {
67
87
  ...config.telemetry,
68
- enabled: config.telemetry?.enabled ?? false,
88
+ enabled: telemetryEnabled,
89
+ signals: telemetrySignals,
69
90
  },
70
91
  providers: parsedProviders,
71
92
  models: parsedModels,
@@ -185,6 +185,7 @@ export function fromChatCompletionsAssistantMessage(
185
185
 
186
186
  if (tool_calls?.length) {
187
187
  for (const tc of tool_calls) {
188
+ // eslint-disable-next-line no-shadow
188
189
  const { id, function: fn, extra_content } = tc;
189
190
  const out: ToolCallPart = {
190
191
  type: "tool-call",
@@ -404,11 +405,12 @@ export function toChatCompletionsResponse(
404
405
  return toResponse(toChatCompletions(result, model), responseInit);
405
406
  }
406
407
 
407
- export function toChatCompletionsStream(
408
+ export function toChatCompletionsStream<E extends boolean = false>(
408
409
  result: StreamTextResult<ToolSet, Output.Output>,
409
410
  model: string,
410
- ): ReadableStream<ChatCompletionsChunk | OpenAIError> {
411
- return result.fullStream.pipeThrough(new ChatCompletionsStream(model));
411
+ wrapErrors?: E,
412
+ ): ReadableStream<ChatCompletionsChunk | (E extends true ? OpenAIError : Error)> {
413
+ return result.fullStream.pipeThrough(new ChatCompletionsStream(model, wrapErrors));
412
414
  }
413
415
 
414
416
  export function toChatCompletionsStreamResponse(
@@ -416,14 +418,14 @@ export function toChatCompletionsStreamResponse(
416
418
  model: string,
417
419
  responseInit?: ResponseInit,
418
420
  ): Response {
419
- return toResponse(toChatCompletionsStream(result, model), responseInit);
421
+ return toResponse(toChatCompletionsStream(result, model, true), responseInit);
420
422
  }
421
423
 
422
- export class ChatCompletionsStream extends TransformStream<
424
+ export class ChatCompletionsStream<E extends boolean = false> extends TransformStream<
423
425
  TextStreamPart<ToolSet>,
424
- ChatCompletionsChunk | OpenAIError
426
+ ChatCompletionsChunk | (E extends true ? OpenAIError : Error)
425
427
  > {
426
- constructor(model: string) {
428
+ constructor(model: string, wrapErrors?: E) {
427
429
  const streamId = `chatcmpl-${crypto.randomUUID()}`;
428
430
  const creationTime = Math.floor(Date.now() / 1000);
429
431
  let toolCallIndexCounter = 0;
@@ -534,10 +536,15 @@ export class ChatCompletionsStream extends TransformStream<
534
536
  }
535
537
 
536
538
  case "error": {
537
- const error = part.error;
538
- // FUTURE mask in production mode and return responseID
539
- controller.enqueue(toOpenAIError(error));
540
- break;
539
+ let err: Error | OpenAIError;
540
+ if (wrapErrors) {
541
+ err = toOpenAIError(part.error);
542
+ } else if (part.error instanceof Error) {
543
+ err = part.error;
544
+ } else {
545
+ err = new Error(String(part.error));
546
+ }
547
+ controller.enqueue(err as E extends true ? OpenAIError : Error);
541
548
  }
542
549
  }
543
550
  },
@@ -23,16 +23,28 @@ import { winterCgHandler } from "../../lifecycle";
23
23
  import { logger } from "../../logger";
24
24
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
25
25
  import { resolveProvider } from "../../providers/registry";
26
- import { addSpanEvent } from "../../telemetry/span";
26
+ import {
27
+ recordRequestDuration,
28
+ recordTimePerOutputToken,
29
+ recordTokenUsage,
30
+ } from "../../telemetry/gen-ai";
31
+ import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
27
32
  import { resolveRequestId } from "../../utils/headers";
28
33
  import { prepareForwardHeaders } from "../../utils/request";
29
34
  import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
35
+ import {
36
+ getChatGeneralAttributes,
37
+ getChatRequestAttributes,
38
+ getChatResponseAttributes,
39
+ } from "./otel";
30
40
  import { ChatCompletionsBodySchema } from "./schema";
31
41
 
32
42
  export const chatCompletions = (config: GatewayConfig): Endpoint => {
33
43
  const hooks = config.hooks;
34
44
 
35
45
  const handler = async (ctx: GatewayContext) => {
46
+ const start = performance.now();
47
+ ctx.operation = "chat";
36
48
  addSpanEvent("hebo.handler.started");
37
49
 
38
50
  // Guard: enforce HTTP method early.
@@ -52,12 +64,12 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
52
64
 
53
65
  const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
54
66
  if (!parsed.success) {
55
- throw new GatewayError(z.prettifyError(parsed.error), 400);
67
+ // FUTURE: consider adding body shape to metadata
68
+ throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
56
69
  }
57
70
  ctx.body = parsed.data;
58
71
  addSpanEvent("hebo.request.parsed");
59
72
 
60
- ctx.operation = "chat";
61
73
  if (hooks?.before) {
62
74
  ctx.body = (await hooks.before(ctx as BeforeHookContext)) ?? ctx.body;
63
75
  addSpanEvent("hebo.hooks.before.completed");
@@ -70,10 +82,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
70
82
  ctx.resolvedModelId =
71
83
  (await hooks?.resolveModelId?.(ctx as ResolveModelHookContext)) ?? ctx.modelId;
72
84
  logger.debug(`[chat] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
73
- addSpanEvent("hebo.model.resolved", {
74
- "gen_ai.request.model": ctx.modelId ?? "",
75
- "gen_ai.response.model": ctx.resolvedModelId ?? "",
76
- });
85
+ addSpanEvent("hebo.model.resolved");
77
86
 
78
87
  const override = await hooks?.resolveProvider?.(ctx as ResolveProviderHookContext);
79
88
  ctx.provider =
@@ -88,7 +97,11 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
88
97
  const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
89
98
  ctx.resolvedProviderId = languageModel.provider;
90
99
  logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
91
- addSpanEvent("hebo.provider.resolved", { "gen_ai.provider.name": ctx.resolvedProviderId });
100
+ addSpanEvent("hebo.provider.resolved");
101
+
102
+ const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
103
+ const genAiGeneralAttrs = getChatGeneralAttributes(ctx, genAiSignalLevel);
104
+ setSpanAttributes(genAiGeneralAttrs);
92
105
 
93
106
  // Convert inputs to AI SDK call options.
94
107
  const textOptions = convertToTextCallOptions(inputs);
@@ -100,6 +113,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
100
113
  "[chat] AI SDK options",
101
114
  );
102
115
  addSpanEvent("hebo.options.prepared");
116
+ setSpanAttributes(getChatRequestAttributes(inputs, genAiSignalLevel));
103
117
 
104
118
  // Build middleware chain (model -> forward params -> provider).
105
119
  const languageModelWithMiddleware = wrapLanguageModel({
@@ -113,27 +127,27 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
113
127
  const result = streamText({
114
128
  model: languageModelWithMiddleware,
115
129
  headers: prepareForwardHeaders(ctx.request),
116
- // No abort signal here, otherwise we can't detect upstream from client cancellations
117
- // abortSignal: ctx.request.signal,
118
- onError: ({ error }) => {
119
- const err = error instanceof Error ? error : new Error(String(error));
120
- logger.error({
121
- requestId,
122
- err,
123
- });
124
- throw error;
130
+ abortSignal: ctx.request.signal,
131
+ timeout: {
132
+ totalMs: 5 * 60 * 1000,
125
133
  },
126
134
  onAbort: () => {
127
- throw new DOMException("Upstream failed", "AbortError");
135
+ throw new DOMException("The operation was aborted.", "AbortError");
128
136
  },
129
- onFinish: (result) => {
130
- ctx.streamResult = toChatCompletions(
131
- result as unknown as GenerateTextResult<ToolSet, Output.Output>,
137
+ onError: () => {},
138
+ onFinish: (res) => {
139
+ addSpanEvent("hebo.ai-sdk.completed");
140
+ const streamResult = toChatCompletions(
141
+ res as unknown as GenerateTextResult<ToolSet, Output.Output>,
132
142
  ctx.resolvedModelId!,
133
143
  );
134
- },
135
- timeout: {
136
- totalMs: 5 * 60 * 1000,
144
+ addSpanEvent("hebo.result.transformed");
145
+
146
+ const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
147
+ setSpanAttributes(genAiResponseAttrs);
148
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
149
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
150
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
137
151
  },
138
152
  experimental_include: {
139
153
  requestBody: false,
@@ -141,10 +155,8 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
141
155
  includeRawChunks: false,
142
156
  ...textOptions,
143
157
  });
144
- addSpanEvent("hebo.ai-sdk.completed");
145
158
 
146
159
  ctx.result = toChatCompletionsStream(result, ctx.resolvedModelId);
147
- addSpanEvent("hebo.result.transformed");
148
160
 
149
161
  if (hooks?.after) {
150
162
  ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
@@ -158,26 +170,32 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
158
170
  const result = await generateText({
159
171
  model: languageModelWithMiddleware,
160
172
  headers: prepareForwardHeaders(ctx.request),
161
- // FUTURE: currently can't tell whether upstream or downstream abort
162
173
  abortSignal: ctx.request.signal,
174
+ timeout: 5 * 60 * 1000,
163
175
  experimental_include: {
164
176
  requestBody: false,
165
177
  responseBody: false,
166
178
  },
167
- timeout: 5 * 60 * 1000,
168
179
  ...textOptions,
169
180
  });
170
181
  logger.trace({ requestId, result }, "[chat] AI SDK result");
171
182
  addSpanEvent("hebo.ai-sdk.completed");
172
183
 
184
+ // Transform result.
173
185
  ctx.result = toChatCompletions(result, ctx.resolvedModelId);
174
186
  addSpanEvent("hebo.result.transformed");
175
187
 
188
+ const genAiResponseAttrs = getChatResponseAttributes(ctx.result, genAiSignalLevel);
189
+ setSpanAttributes(genAiResponseAttrs);
190
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
191
+
176
192
  if (hooks?.after) {
177
193
  ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
178
194
  addSpanEvent("hebo.hooks.after.completed");
179
195
  }
180
196
 
197
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
198
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
181
199
  return ctx.result;
182
200
  };
183
201
 
@@ -0,0 +1,161 @@
1
+ import type { Attributes } from "@opentelemetry/api";
2
+
3
+ import type {
4
+ ChatCompletions,
5
+ ChatCompletionsBody,
6
+ ChatCompletionsContentPart,
7
+ ChatCompletionsMessage,
8
+ } from "./schema";
9
+
10
+ import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
11
+
12
+ const toTextPart = (content: string): Record<string, unknown> => ({ type: "text", content });
13
+
14
+ const toMessageParts = (message: ChatCompletionsMessage): Record<string, unknown>[] => {
15
+ if (message.role === "assistant") {
16
+ const parts: Record<string, unknown>[] = [];
17
+ if (typeof message.content === "string") parts.push(toTextPart(message.content));
18
+ if (Array.isArray(message.tool_calls)) {
19
+ for (const call of message.tool_calls) {
20
+ parts.push({
21
+ type: "tool_call",
22
+ id: call.id,
23
+ name: call.function.name,
24
+ arguments: call.function.arguments,
25
+ });
26
+ }
27
+ }
28
+ return parts;
29
+ }
30
+
31
+ if (message.role === "tool") {
32
+ return [{ type: "tool_call_response", id: message.tool_call_id, content: message.content }];
33
+ }
34
+
35
+ if (message.role === "user") {
36
+ const parts: Record<string, unknown>[] = [];
37
+ if (typeof message.content === "string") parts.push(toTextPart(message.content));
38
+ if (Array.isArray(message.content)) {
39
+ for (const part of message.content as ChatCompletionsContentPart[]) {
40
+ if (part.type === "text") {
41
+ parts.push(toTextPart(part.text));
42
+ } else if (part.type === "image_url") {
43
+ parts.push({ type: "image", content: part.image_url.url });
44
+ } else {
45
+ parts.push({
46
+ type: "file",
47
+ // FUTURE: optionally expose safe metadata without raw binary payloads.
48
+ content: part.file.filename ?? "[REDACTED_BINARY_DATA]",
49
+ media_type: part.file.media_type,
50
+ });
51
+ }
52
+ }
53
+ }
54
+ return parts;
55
+ }
56
+
57
+ // FUTURE: remove once Langfuse supports gen_ai.system_instructions
58
+ if (message.role === "system") {
59
+ return [toTextPart(message.content)];
60
+ }
61
+
62
+ return [];
63
+ };
64
+
65
+ export const getChatGeneralAttributes = (
66
+ ctx: GatewayContext,
67
+ signalLevel?: TelemetrySignalLevel,
68
+ ): Attributes => {
69
+ if (!signalLevel || signalLevel === "off") return {};
70
+
71
+ const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
72
+
73
+ return {
74
+ "gen_ai.operation.name": ctx.operation,
75
+ "gen_ai.request.model": requestModel,
76
+ "gen_ai.response.model": ctx.resolvedModelId,
77
+ "gen_ai.provider.name": ctx.resolvedProviderId,
78
+ };
79
+ };
80
+
81
+ export const getChatRequestAttributes = (
82
+ inputs: ChatCompletionsBody,
83
+ signalLevel?: TelemetrySignalLevel,
84
+ ): Attributes => {
85
+ if (!signalLevel || signalLevel === "off") return {};
86
+
87
+ const attrs: Attributes = {};
88
+
89
+ if (inputs.seed !== undefined) {
90
+ Object.assign(attrs, { "gen_ai.request.seed": inputs.seed });
91
+ }
92
+
93
+ if (signalLevel !== "required") {
94
+ Object.assign(attrs, {
95
+ "gen_ai.request.stream": inputs.stream,
96
+ "gen_ai.request.frequency_penalty": inputs.frequency_penalty,
97
+ "gen_ai.request.max_tokens": inputs.max_completion_tokens,
98
+ "gen_ai.request.presence_penalty": inputs.presence_penalty,
99
+ "gen_ai.request.stop_sequences": inputs.stop
100
+ ? Array.isArray(inputs.stop)
101
+ ? inputs.stop
102
+ : [inputs.stop]
103
+ : undefined,
104
+ "gen_ai.request.temperature": inputs.temperature,
105
+ "gen_ai.request.top_p": inputs.top_p,
106
+ });
107
+ }
108
+
109
+ if (signalLevel === "full") {
110
+ Object.assign(attrs, {
111
+ // FUTURE: move system instructions from messages to here
112
+ // blocker: https://github.com/langfuse/langfuse/issues/11607
113
+ // "gen_ai.system_instructions": inputs.messages
114
+ // .filter((m) => m.role === "system")
115
+ // .map((m) => JSON.stringify(toTextPart(m.content))),
116
+ "gen_ai.input.messages": inputs.messages
117
+ //.filter((m) => m.role !== "system")
118
+ .map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
119
+ "gen_ai.tool.definitions": JSON.stringify(inputs.tools),
120
+ });
121
+ }
122
+
123
+ return attrs;
124
+ };
125
+
126
+ export const getChatResponseAttributes = (
127
+ completions: ChatCompletions,
128
+ signalLevel?: TelemetrySignalLevel,
129
+ ): Attributes => {
130
+ if (!signalLevel || signalLevel === "off") return {};
131
+
132
+ const attrs: Attributes = {
133
+ "gen_ai.response.id": completions.id,
134
+ };
135
+
136
+ if (signalLevel !== "required") {
137
+ Object.assign(attrs, {
138
+ "gen_ai.response.finish_reasons": completions.choices?.map((c) => c.finish_reason),
139
+ "gen_ai.usage.total_tokens": completions.usage?.total_tokens,
140
+ "gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
141
+ "gen_ai.usage.cached_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
142
+ "gen_ai.usage.output_tokens": completions.usage?.completion_tokens,
143
+ "gen_ai.usage.reasoning_tokens":
144
+ completions.usage?.completion_tokens_details?.reasoning_tokens,
145
+ });
146
+ }
147
+
148
+ if (signalLevel === "full") {
149
+ Object.assign(attrs, {
150
+ "gen_ai.output.messages": completions.choices?.map((c) =>
151
+ JSON.stringify({
152
+ role: c.message.role,
153
+ parts: toMessageParts(c.message),
154
+ finish_reason: c.finish_reason,
155
+ }),
156
+ ),
157
+ });
158
+ }
159
+
160
+ return attrs;
161
+ };
@@ -45,7 +45,7 @@ describe("Embeddings Handler", () => {
45
45
  models: {
46
46
  "text-embedding-3-small": {
47
47
  name: "OpenAI Embedding Model",
48
- modalities: { input: ["text"], output: ["embeddings"] },
48
+ modalities: { input: ["text"], output: ["embedding"] },
49
49
  providers: ["openai"],
50
50
  },
51
51
  "gpt-oss-20b": {
@@ -68,7 +68,7 @@ describe("Embeddings Handler", () => {
68
68
  expect(data).toMatchObject({
69
69
  error: {
70
70
  code: "model_unsupported_operation",
71
- message: "Model 'gpt-oss-20b' does not support 'embeddings' output",
71
+ message: "Model 'gpt-oss-20b' does not support 'embedding' output",
72
72
  type: "invalid_request_error",
73
73
  },
74
74
  });
@@ -16,16 +16,28 @@ import { winterCgHandler } from "../../lifecycle";
16
16
  import { logger } from "../../logger";
17
17
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
18
18
  import { resolveProvider } from "../../providers/registry";
19
- import { addSpanEvent } from "../../telemetry/span";
19
+ import {
20
+ recordRequestDuration,
21
+ recordTimePerOutputToken,
22
+ recordTokenUsage,
23
+ } from "../../telemetry/gen-ai";
24
+ import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
20
25
  import { resolveRequestId } from "../../utils/headers";
21
26
  import { prepareForwardHeaders } from "../../utils/request";
22
27
  import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
28
+ import {
29
+ getEmbeddingsGeneralAttributes,
30
+ getEmbeddingsRequestAttributes,
31
+ getEmbeddingsResponseAttributes,
32
+ } from "./otel";
23
33
  import { EmbeddingsBodySchema } from "./schema";
24
34
 
25
35
  export const embeddings = (config: GatewayConfig): Endpoint => {
26
36
  const hooks = config.hooks;
27
37
 
28
38
  const handler = async (ctx: GatewayContext) => {
39
+ const start = performance.now();
40
+ ctx.operation = "embeddings";
29
41
  addSpanEvent("hebo.handler.started");
30
42
 
31
43
  // Guard: enforce HTTP method early.
@@ -45,12 +57,12 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
45
57
 
46
58
  const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
47
59
  if (!parsed.success) {
48
- throw new GatewayError(z.prettifyError(parsed.error), 400);
60
+ // FUTURE: consider adding body shape to metadata
61
+ throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
49
62
  }
50
63
  ctx.body = parsed.data;
51
64
  addSpanEvent("hebo.request.parsed");
52
65
 
53
- ctx.operation = "embeddings";
54
66
  if (hooks?.before) {
55
67
  ctx.body = (await hooks.before(ctx as BeforeHookContext)) ?? ctx.body;
56
68
  addSpanEvent("hebo.hooks.before.completed");
@@ -63,10 +75,7 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
63
75
  ctx.resolvedModelId =
64
76
  (await hooks?.resolveModelId?.(ctx as ResolveModelHookContext)) ?? ctx.modelId;
65
77
  logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
66
- addSpanEvent("hebo.model.resolved", {
67
- "gen_ai.request.model": ctx.modelId ?? "",
68
- "gen_ai.response.model": ctx.resolvedModelId ?? "",
69
- });
78
+ addSpanEvent("hebo.model.resolved");
70
79
 
71
80
  const override = await hooks?.resolveProvider?.(ctx as ResolveProviderHookContext);
72
81
  ctx.provider =
@@ -81,14 +90,17 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
81
90
  const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
82
91
  ctx.resolvedProviderId = embeddingModel.provider;
83
92
  logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
84
- addSpanEvent("hebo.provider.resolved", {
85
- "gen_ai.provider.name": ctx.resolvedProviderId,
86
- });
93
+ addSpanEvent("hebo.provider.resolved");
94
+
95
+ const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
96
+ const genAiGeneralAttrs = getEmbeddingsGeneralAttributes(ctx, genAiSignalLevel);
97
+ setSpanAttributes(genAiGeneralAttrs);
87
98
 
88
99
  // Convert inputs to AI SDK call options.
89
100
  const embedOptions = convertToEmbedCallOptions(inputs);
90
101
  logger.trace({ requestId, options: embedOptions }, "[embeddings] AI SDK options");
91
102
  addSpanEvent("hebo.options.prepared");
103
+ setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
92
104
 
93
105
  // Build middleware chain (model -> forward params -> provider).
94
106
  const embeddingModelWithMiddleware = wrapEmbeddingModel({
@@ -107,14 +119,20 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
107
119
  logger.trace({ requestId, result }, "[embeddings] AI SDK result");
108
120
  addSpanEvent("hebo.ai-sdk.completed");
109
121
 
122
+ // Transform result.
110
123
  ctx.result = toEmbeddings(result, ctx.modelId);
111
124
  addSpanEvent("hebo.result.transformed");
125
+ const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
126
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
127
+ setSpanAttributes(genAiResponseAttrs);
112
128
 
113
129
  if (hooks?.after) {
114
130
  ctx.result = (await hooks.after(ctx as AfterHookContext)) ?? ctx.result;
115
131
  addSpanEvent("hebo.hooks.after.completed");
116
132
  }
117
133
 
134
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
135
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
118
136
  return ctx.result;
119
137
  };
120
138
 
@@ -0,0 +1,56 @@
1
+ import type { Attributes } from "@opentelemetry/api";
2
+
3
+ import type { Embeddings, EmbeddingsInputs } from "./schema";
4
+
5
+ import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
6
+
7
+ export const getEmbeddingsGeneralAttributes = (
8
+ ctx: GatewayContext,
9
+ signalLevel?: TelemetrySignalLevel,
10
+ ): Attributes => {
11
+ if (!signalLevel || signalLevel === "off") return {};
12
+
13
+ const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
14
+
15
+ return {
16
+ "gen_ai.operation.name": ctx.operation,
17
+ "gen_ai.request.model": requestModel,
18
+ "gen_ai.response.model": ctx.resolvedModelId,
19
+ "gen_ai.provider.name": ctx.resolvedProviderId,
20
+ };
21
+ };
22
+
23
+ export const getEmbeddingsRequestAttributes = (
24
+ inputs: EmbeddingsInputs,
25
+ signalLevel?: TelemetrySignalLevel,
26
+ ): Attributes => {
27
+ if (!signalLevel || signalLevel === "off") return {};
28
+
29
+ const attrs: Attributes = {};
30
+
31
+ if (signalLevel !== "required") {
32
+ Object.assign(attrs, {
33
+ "gen_ai.embeddings.dimension.count": inputs.dimensions,
34
+ });
35
+ }
36
+
37
+ return attrs;
38
+ };
39
+
40
+ export const getEmbeddingsResponseAttributes = (
41
+ embeddings: Embeddings,
42
+ signalLevel?: TelemetrySignalLevel,
43
+ ): Attributes => {
44
+ if (!signalLevel || signalLevel === "off") return {};
45
+
46
+ const attrs: Attributes = {};
47
+
48
+ if (signalLevel !== "required") {
49
+ Object.assign(attrs, {
50
+ "gen_ai.usage.input_tokens": embeddings.usage?.prompt_tokens,
51
+ "gen_ai.usage.total_tokens": embeddings.usage?.total_tokens,
52
+ });
53
+ }
54
+
55
+ return attrs;
56
+ };
@@ -7,15 +7,13 @@ import { toModels, toModel } from "./converters";
7
7
  export const models = (config: GatewayConfig): Endpoint => {
8
8
  // eslint-disable-next-line require-await
9
9
  const handler = async (ctx: GatewayContext) => {
10
- const request = ctx.request;
10
+ ctx.operation = "models";
11
11
 
12
- if (!request || request.method !== "GET") {
12
+ if (!ctx.request || ctx.request.method !== "GET") {
13
13
  throw new GatewayError("Method Not Allowed", 405);
14
14
  }
15
15
 
16
- ctx.operation = "models";
17
-
18
- const rawId = request.url.split("/models/", 2)[1]?.split("?", 1)[0];
16
+ const rawId = ctx.request.url.split("/models/", 2)[1]?.split("?", 1)[0];
19
17
  if (!rawId) {
20
18
  return toModels(ctx.models);
21
19
  }
@@ -4,12 +4,12 @@ export class GatewayError extends Error {
4
4
  readonly status: number;
5
5
  readonly code: string;
6
6
 
7
- constructor(error: string | Error, status: number, code?: string, cause?: unknown) {
8
- const msg = typeof error === "string" ? error : error.message;
9
- super(msg);
7
+ constructor(error: unknown, status: number, code?: string, cause?: unknown) {
8
+ const isError = error instanceof Error;
9
+ super(isError ? error.message : String(error));
10
+ this.cause = cause ?? (isError ? error : undefined);
11
+
10
12
  this.status = status;
11
13
  this.code = code ?? STATUS_CODE(status);
12
- this.cause =
13
- cause ?? (typeof error === "string" ? undefined : (error as { cause?: unknown }).cause);
14
14
  }
15
15
  }