@hebo-ai/gateway 0.4.0-beta.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +40 -5
  2. package/dist/config.js +21 -7
  3. package/dist/endpoints/chat-completions/converters.d.ts +3 -3
  4. package/dist/endpoints/chat-completions/converters.js +16 -8
  5. package/dist/endpoints/chat-completions/handler.js +34 -27
  6. package/dist/endpoints/chat-completions/otel.d.ts +6 -0
  7. package/dist/endpoints/chat-completions/otel.js +127 -0
  8. package/dist/endpoints/embeddings/handler.js +19 -10
  9. package/dist/endpoints/embeddings/otel.d.ts +6 -0
  10. package/dist/endpoints/embeddings/otel.js +35 -0
  11. package/dist/endpoints/models/handler.js +3 -4
  12. package/dist/errors/gateway.d.ts +1 -1
  13. package/dist/errors/gateway.js +3 -4
  14. package/dist/errors/openai.js +11 -12
  15. package/dist/errors/utils.d.ts +3 -4
  16. package/dist/errors/utils.js +6 -6
  17. package/dist/gateway.js +1 -1
  18. package/dist/lifecycle.js +71 -29
  19. package/dist/middleware/matcher.js +1 -1
  20. package/dist/models/amazon/presets.d.ts +37 -37
  21. package/dist/models/amazon/presets.js +1 -1
  22. package/dist/models/anthropic/presets.d.ts +56 -56
  23. package/dist/models/cohere/presets.d.ts +54 -54
  24. package/dist/models/cohere/presets.js +2 -2
  25. package/dist/models/google/presets.d.ts +31 -31
  26. package/dist/models/google/presets.js +1 -1
  27. package/dist/models/meta/presets.d.ts +42 -42
  28. package/dist/models/openai/presets.d.ts +96 -96
  29. package/dist/models/openai/presets.js +1 -1
  30. package/dist/models/types.d.ts +1 -1
  31. package/dist/models/voyage/presets.d.ts +92 -92
  32. package/dist/models/voyage/presets.js +1 -1
  33. package/dist/providers/registry.js +2 -2
  34. package/dist/telemetry/baggage.d.ts +1 -0
  35. package/dist/telemetry/baggage.js +24 -0
  36. package/dist/telemetry/fetch.d.ts +2 -1
  37. package/dist/telemetry/fetch.js +13 -3
  38. package/dist/telemetry/gen-ai.d.ts +5 -0
  39. package/dist/telemetry/gen-ai.js +60 -0
  40. package/dist/telemetry/http.d.ts +3 -0
  41. package/dist/telemetry/http.js +57 -0
  42. package/dist/telemetry/memory.d.ts +2 -0
  43. package/dist/telemetry/memory.js +27 -0
  44. package/dist/telemetry/span.d.ts +6 -3
  45. package/dist/telemetry/span.js +24 -36
  46. package/dist/telemetry/stream.d.ts +3 -7
  47. package/dist/telemetry/stream.js +26 -29
  48. package/dist/types.d.ts +16 -15
  49. package/dist/utils/headers.d.ts +1 -1
  50. package/dist/utils/headers.js +7 -9
  51. package/dist/utils/request.d.ts +0 -4
  52. package/dist/utils/request.js +0 -9
  53. package/dist/utils/response.js +1 -1
  54. package/package.json +5 -2
  55. package/src/config.ts +28 -7
  56. package/src/endpoints/chat-completions/converters.ts +18 -11
  57. package/src/endpoints/chat-completions/handler.ts +46 -28
  58. package/src/endpoints/chat-completions/otel.ts +161 -0
  59. package/src/endpoints/embeddings/handler.test.ts +2 -2
  60. package/src/endpoints/embeddings/handler.ts +28 -10
  61. package/src/endpoints/embeddings/otel.ts +56 -0
  62. package/src/endpoints/models/handler.ts +3 -5
  63. package/src/errors/gateway.ts +5 -5
  64. package/src/errors/openai.ts +25 -17
  65. package/src/errors/utils.ts +6 -7
  66. package/src/gateway.ts +1 -1
  67. package/src/lifecycle.ts +85 -32
  68. package/src/middleware/matcher.ts +1 -1
  69. package/src/models/amazon/presets.ts +1 -1
  70. package/src/models/cohere/presets.ts +2 -2
  71. package/src/models/google/presets.ts +1 -1
  72. package/src/models/openai/presets.ts +1 -1
  73. package/src/models/types.ts +1 -1
  74. package/src/models/voyage/presets.ts +1 -1
  75. package/src/providers/registry.ts +2 -2
  76. package/src/telemetry/baggage.ts +27 -0
  77. package/src/telemetry/fetch.ts +15 -3
  78. package/src/telemetry/gen-ai.ts +88 -0
  79. package/src/telemetry/http.ts +65 -0
  80. package/src/telemetry/memory.ts +36 -0
  81. package/src/telemetry/span.ts +28 -40
  82. package/src/telemetry/stream.ts +36 -40
  83. package/src/types.ts +18 -18
  84. package/src/utils/headers.ts +8 -19
  85. package/src/utils/request.ts +0 -11
  86. package/src/utils/response.ts +1 -1
  87. package/dist/telemetry/otel.d.ts +0 -2
  88. package/dist/telemetry/otel.js +0 -50
  89. package/dist/telemetry/utils.d.ts +0 -4
  90. package/dist/telemetry/utils.js +0 -223
  91. package/src/telemetry/otel.ts +0 -91
  92. package/src/telemetry/utils.ts +0 -273
package/README.md CHANGED
@@ -19,6 +19,7 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
19
19
  - 🗂️ Model catalog with extensible metadata capabilities.
20
20
  - 🪝 Hook system to customize routing, auth, rate limits, and shape responses.
21
21
  - 🧰 Low-level OpenAI-compatible schema, converters, and middleware helpers.
22
+ - 👁️ OpenTelemetry support for GenAI semantic conventions (Langfuse-compatible).
22
23
 
23
24
  ## 📦 Installation
24
25
 
@@ -615,12 +616,20 @@ const gw = gateway({
615
616
  telemetry: {
616
617
  // default: false
617
618
  enabled: true,
618
- // default: TraceProivder from @opentelemetry/api singleton
619
+ // default: TraceProvider from @opentelemetry/api singleton
619
620
  tracer: trace.getTracer("my-gateway"),
620
- // "required" = minimal baseline attributes
621
- // "recommended" = practical operational attributes (request/response metadata, genai model/usage fields)
622
- // "full" = also include body fields (e.g. genai input/output messages)
623
- attributes: "full",
621
+ // Telemetry levels by namespace:
622
+ // "off" | "required" | "recommended" | "full"
623
+ signals: {
624
+ // gen_ai.* semantic attributes
625
+ gen_ai: "full",
626
+ // http.*, url.*, server.* semantic attributes
627
+ http: "recommended",
628
+ // hebo-specific telemetry:
629
+ // - recommended: hebo.* span events
630
+ // - full: hebo.* span events + fetch instrumentation
631
+ hebo: "recommended",
632
+ },
624
633
  },
625
634
  });
626
635
  ```
@@ -633,6 +642,32 @@ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/
633
642
 
634
643
  For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
635
644
 
645
+ #### Langfuse
646
+
647
+ Hebo telemetry spans are OpenTelemetry-compatible, so you can send them to Langfuse via `@langfuse/otel`.
648
+
649
+ ```ts
650
+ import { gateway } from "@hebo-ai/gateway";
651
+ import { LangfuseSpanProcessor } from "@langfuse/otel";
652
+ import { context } from "@opentelemetry/api";
653
+ import { AsyncLocalStorageContextManager } from "@opentelemetry/context-async-hooks";
654
+ import { BasicTracerProvider } from "@opentelemetry/sdk-trace-base";
655
+
656
+ context.setGlobalContextManager(new AsyncLocalStorageContextManager().enable());
657
+
658
+ const gw = gateway({
659
+ // ...
660
+ telemetry: {
661
+ enabled: true,
662
+ tracer = new BasicTracerProvider({
663
+ spanProcessors: [new LangfuseSpanProcessor()],
664
+ }).getTracer("hebo");,
665
+ },
666
+ });
667
+ ```
668
+
669
+ Langfuse credentials are read from environment variables by the Langfuse OTel SDK (`LANGFUSE_PUBLIC_KEY`, `LANGFUSE_SECRET_KEY`, `LANGFUSE_BASE_URL`).
670
+
636
671
  ### Passing Framework State to Hooks
637
672
 
638
673
  You can pass per-request info from your framework into the gateway via the second `state` argument on the handler, then read it in hooks through `ctx.state`.
package/dist/config.js CHANGED
@@ -1,14 +1,14 @@
1
1
  import { isLogger, logger, setLoggerInstance } from "./logger";
2
2
  import { createDefaultLogger } from "./logger/default";
3
- import { kParsed } from "./types";
3
+ import { kParsed, } from "./types";
4
4
  export const parseConfig = (config) => {
5
- // If it has been parsed before, just return
5
+ // If it has been parsed before, just return.
6
6
  if (kParsed in config)
7
7
  return config;
8
8
  const providers = config.providers ?? {};
9
9
  const parsedProviders = {};
10
10
  const models = config.models ?? {};
11
- // Set the global logger instance
11
+ // Set the global logger instance.
12
12
  if (config.logger === undefined) {
13
13
  setLoggerInstance(createDefaultLogger({}));
14
14
  }
@@ -18,7 +18,7 @@ export const parseConfig = (config) => {
18
18
  ? `[logger] custom logger configured`
19
19
  : `[logger] logger configured: level=${config.logger.level}`);
20
20
  }
21
- // Strip providers that are not configured
21
+ // Strip providers that are not configured.
22
22
  for (const id in providers) {
23
23
  const provider = providers[id];
24
24
  if (provider === undefined) {
@@ -30,7 +30,7 @@ export const parseConfig = (config) => {
30
30
  if (Object.keys(parsedProviders).length === 0) {
31
31
  throw new Error("No providers configured (config.providers is empty)");
32
32
  }
33
- // Strip providers that are not configured from models
33
+ // Strip providers that are not configured from models.
34
34
  const parsedModels = {};
35
35
  const warnings = new Set();
36
36
  for (const id in models) {
@@ -51,12 +51,26 @@ export const parseConfig = (config) => {
51
51
  if (Object.keys(parsedModels).length === 0) {
52
52
  throw new Error("No models configured (config.models is empty)");
53
53
  }
54
+ // Default for the telemetry settings.
55
+ const telemetryEnabled = config.telemetry?.enabled ?? false;
56
+ const telemetrySignals = telemetryEnabled
57
+ ? {
58
+ http: config.telemetry?.signals?.http ?? "recommended",
59
+ gen_ai: config.telemetry?.signals?.gen_ai ?? "full",
60
+ hebo: config.telemetry?.signals?.hebo ?? "off",
61
+ }
62
+ : {
63
+ http: "off",
64
+ gen_ai: "off",
65
+ hebo: "off",
66
+ };
67
+ // Return parsed config.
54
68
  return {
55
69
  ...config,
56
- logger: config.logger,
57
70
  telemetry: {
58
71
  ...config.telemetry,
59
- enabled: config.telemetry?.enabled ?? false,
72
+ enabled: telemetryEnabled,
73
+ signals: telemetrySignals,
60
74
  },
61
75
  providers: parsedProviders,
62
76
  models: parsedModels,
@@ -25,10 +25,10 @@ export declare const convertToToolSet: (tools: ChatCompletionsTool[] | undefined
25
25
  export declare const convertToToolChoice: (toolChoice: ChatCompletionsToolChoice | undefined) => ToolChoice<ToolSet> | undefined;
26
26
  export declare function toChatCompletions(result: GenerateTextResult<ToolSet, Output.Output>, model: string): ChatCompletions;
27
27
  export declare function toChatCompletionsResponse(result: GenerateTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
28
- export declare function toChatCompletionsStream(result: StreamTextResult<ToolSet, Output.Output>, model: string): ReadableStream<ChatCompletionsChunk | OpenAIError>;
28
+ export declare function toChatCompletionsStream<E extends boolean = false>(result: StreamTextResult<ToolSet, Output.Output>, model: string, wrapErrors?: E): ReadableStream<ChatCompletionsChunk | (E extends true ? OpenAIError : Error)>;
29
29
  export declare function toChatCompletionsStreamResponse(result: StreamTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
30
- export declare class ChatCompletionsStream extends TransformStream<TextStreamPart<ToolSet>, ChatCompletionsChunk | OpenAIError> {
31
- constructor(model: string);
30
+ export declare class ChatCompletionsStream<E extends boolean = false> extends TransformStream<TextStreamPart<ToolSet>, ChatCompletionsChunk | (E extends true ? OpenAIError : Error)> {
31
+ constructor(model: string, wrapErrors?: E);
32
32
  }
33
33
  export declare const toChatCompletionsAssistantMessage: (result: GenerateTextResult<ToolSet, Output.Output>) => ChatCompletionsAssistantMessage;
34
34
  export declare function toReasoningDetail(reasoning: ReasoningOutput, id: string, index: number): ChatCompletionsReasoningDetail;
@@ -94,6 +94,7 @@ export function fromChatCompletionsAssistantMessage(message) {
94
94
  }
95
95
  if (tool_calls?.length) {
96
96
  for (const tc of tool_calls) {
97
+ // eslint-disable-next-line no-shadow
97
98
  const { id, function: fn, extra_content } = tc;
98
99
  const out = {
99
100
  type: "tool-call",
@@ -274,14 +275,14 @@ export function toChatCompletions(result, model) {
274
275
  export function toChatCompletionsResponse(result, model, responseInit) {
275
276
  return toResponse(toChatCompletions(result, model), responseInit);
276
277
  }
277
- export function toChatCompletionsStream(result, model) {
278
- return result.fullStream.pipeThrough(new ChatCompletionsStream(model));
278
+ export function toChatCompletionsStream(result, model, wrapErrors) {
279
+ return result.fullStream.pipeThrough(new ChatCompletionsStream(model, wrapErrors));
279
280
  }
280
281
  export function toChatCompletionsStreamResponse(result, model, responseInit) {
281
- return toResponse(toChatCompletionsStream(result, model), responseInit);
282
+ return toResponse(toChatCompletionsStream(result, model, true), responseInit);
282
283
  }
283
284
  export class ChatCompletionsStream extends TransformStream {
284
- constructor(model) {
285
+ constructor(model, wrapErrors) {
285
286
  const streamId = `chatcmpl-${crypto.randomUUID()}`;
286
287
  const creationTime = Math.floor(Date.now() / 1000);
287
288
  let toolCallIndexCounter = 0;
@@ -347,10 +348,17 @@ export class ChatCompletionsStream extends TransformStream {
347
348
  break;
348
349
  }
349
350
  case "error": {
350
- const error = part.error;
351
- // FUTURE mask in production mode and return responseID
352
- controller.enqueue(toOpenAIError(error));
353
- break;
351
+ let err;
352
+ if (wrapErrors) {
353
+ err = toOpenAIError(part.error);
354
+ }
355
+ else if (part.error instanceof Error) {
356
+ err = part.error;
357
+ }
358
+ else {
359
+ err = new Error(String(part.error));
360
+ }
361
+ controller.enqueue(err);
354
362
  }
355
363
  }
356
364
  },
@@ -5,14 +5,18 @@ import { winterCgHandler } from "../../lifecycle";
5
5
  import { logger } from "../../logger";
6
6
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
7
  import { resolveProvider } from "../../providers/registry";
8
- import { addSpanEvent } from "../../telemetry/span";
8
+ import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
+ import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
9
10
  import { resolveRequestId } from "../../utils/headers";
10
11
  import { prepareForwardHeaders } from "../../utils/request";
11
12
  import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
13
+ import { getChatGeneralAttributes, getChatRequestAttributes, getChatResponseAttributes, } from "./otel";
12
14
  import { ChatCompletionsBodySchema } from "./schema";
13
15
  export const chatCompletions = (config) => {
14
16
  const hooks = config.hooks;
15
17
  const handler = async (ctx) => {
18
+ const start = performance.now();
19
+ ctx.operation = "chat";
16
20
  addSpanEvent("hebo.handler.started");
17
21
  // Guard: enforce HTTP method early.
18
22
  if (!ctx.request || ctx.request.method !== "POST") {
@@ -29,11 +33,11 @@ export const chatCompletions = (config) => {
29
33
  addSpanEvent("hebo.request.deserialized");
30
34
  const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
31
35
  if (!parsed.success) {
32
- throw new GatewayError(z.prettifyError(parsed.error), 400);
36
+ // FUTURE: consider adding body shape to metadata
37
+ throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
33
38
  }
34
39
  ctx.body = parsed.data;
35
40
  addSpanEvent("hebo.request.parsed");
36
- ctx.operation = "chat";
37
41
  if (hooks?.before) {
38
42
  ctx.body = (await hooks.before(ctx)) ?? ctx.body;
39
43
  addSpanEvent("hebo.hooks.before.completed");
@@ -44,10 +48,7 @@ export const chatCompletions = (config) => {
44
48
  ctx.resolvedModelId =
45
49
  (await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
46
50
  logger.debug(`[chat] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
47
- addSpanEvent("hebo.model.resolved", {
48
- "gen_ai.request.model": ctx.modelId ?? "",
49
- "gen_ai.response.model": ctx.resolvedModelId ?? "",
50
- });
51
+ addSpanEvent("hebo.model.resolved");
51
52
  const override = await hooks?.resolveProvider?.(ctx);
52
53
  ctx.provider =
53
54
  override ??
@@ -60,7 +61,10 @@ export const chatCompletions = (config) => {
60
61
  const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
61
62
  ctx.resolvedProviderId = languageModel.provider;
62
63
  logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
63
- addSpanEvent("hebo.provider.resolved", { "gen_ai.provider.name": ctx.resolvedProviderId });
64
+ addSpanEvent("hebo.provider.resolved");
65
+ const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
66
+ const genAiGeneralAttrs = getChatGeneralAttributes(ctx, genAiSignalLevel);
67
+ setSpanAttributes(genAiGeneralAttrs);
64
68
  // Convert inputs to AI SDK call options.
65
69
  const textOptions = convertToTextCallOptions(inputs);
66
70
  logger.trace({
@@ -68,6 +72,7 @@ export const chatCompletions = (config) => {
68
72
  options: textOptions,
69
73
  }, "[chat] AI SDK options");
70
74
  addSpanEvent("hebo.options.prepared");
75
+ setSpanAttributes(getChatRequestAttributes(inputs, genAiSignalLevel));
71
76
  // Build middleware chain (model -> forward params -> provider).
72
77
  const languageModelWithMiddleware = wrapLanguageModel({
73
78
  model: languageModel,
@@ -79,24 +84,23 @@ export const chatCompletions = (config) => {
79
84
  const result = streamText({
80
85
  model: languageModelWithMiddleware,
81
86
  headers: prepareForwardHeaders(ctx.request),
82
- // No abort signal here, otherwise we can't detect upstream from client cancellations
83
- // abortSignal: ctx.request.signal,
84
- onError: ({ error }) => {
85
- const err = error instanceof Error ? error : new Error(String(error));
86
- logger.error({
87
- requestId,
88
- err,
89
- });
90
- throw error;
87
+ abortSignal: ctx.request.signal,
88
+ timeout: {
89
+ totalMs: 5 * 60 * 1000,
91
90
  },
92
91
  onAbort: () => {
93
- throw new DOMException("Upstream failed", "AbortError");
92
+ throw new DOMException("The operation was aborted.", "AbortError");
94
93
  },
95
- onFinish: (result) => {
96
- ctx.streamResult = toChatCompletions(result, ctx.resolvedModelId);
97
- },
98
- timeout: {
99
- totalMs: 5 * 60 * 1000,
94
+ onError: () => { },
95
+ onFinish: (res) => {
96
+ addSpanEvent("hebo.ai-sdk.completed");
97
+ const streamResult = toChatCompletions(res, ctx.resolvedModelId);
98
+ addSpanEvent("hebo.result.transformed");
99
+ const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
100
+ setSpanAttributes(genAiResponseAttrs);
101
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
102
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
103
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
100
104
  },
101
105
  experimental_include: {
102
106
  requestBody: false,
@@ -104,9 +108,7 @@ export const chatCompletions = (config) => {
104
108
  includeRawChunks: false,
105
109
  ...textOptions,
106
110
  });
107
- addSpanEvent("hebo.ai-sdk.completed");
108
111
  ctx.result = toChatCompletionsStream(result, ctx.resolvedModelId);
109
- addSpanEvent("hebo.result.transformed");
110
112
  if (hooks?.after) {
111
113
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
112
114
  addSpanEvent("hebo.hooks.after.completed");
@@ -117,23 +119,28 @@ export const chatCompletions = (config) => {
117
119
  const result = await generateText({
118
120
  model: languageModelWithMiddleware,
119
121
  headers: prepareForwardHeaders(ctx.request),
120
- // FUTURE: currently can't tell whether upstream or downstream abort
121
122
  abortSignal: ctx.request.signal,
123
+ timeout: 5 * 60 * 1000,
122
124
  experimental_include: {
123
125
  requestBody: false,
124
126
  responseBody: false,
125
127
  },
126
- timeout: 5 * 60 * 1000,
127
128
  ...textOptions,
128
129
  });
129
130
  logger.trace({ requestId, result }, "[chat] AI SDK result");
130
131
  addSpanEvent("hebo.ai-sdk.completed");
132
+ // Transform result.
131
133
  ctx.result = toChatCompletions(result, ctx.resolvedModelId);
132
134
  addSpanEvent("hebo.result.transformed");
135
+ const genAiResponseAttrs = getChatResponseAttributes(ctx.result, genAiSignalLevel);
136
+ setSpanAttributes(genAiResponseAttrs);
137
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
133
138
  if (hooks?.after) {
134
139
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
135
140
  addSpanEvent("hebo.hooks.after.completed");
136
141
  }
142
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
143
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
137
144
  return ctx.result;
138
145
  };
139
146
  return { handler: winterCgHandler(handler, config) };
@@ -0,0 +1,6 @@
1
+ import type { Attributes } from "@opentelemetry/api";
2
+ import type { ChatCompletions, ChatCompletionsBody } from "./schema";
3
+ import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
4
+ export declare const getChatGeneralAttributes: (ctx: GatewayContext, signalLevel?: TelemetrySignalLevel) => Attributes;
5
+ export declare const getChatRequestAttributes: (inputs: ChatCompletionsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
6
+ export declare const getChatResponseAttributes: (completions: ChatCompletions, signalLevel?: TelemetrySignalLevel) => Attributes;
@@ -0,0 +1,127 @@
1
+ import {} from "../../types";
2
+ const toTextPart = (content) => ({ type: "text", content });
3
+ const toMessageParts = (message) => {
4
+ if (message.role === "assistant") {
5
+ const parts = [];
6
+ if (typeof message.content === "string")
7
+ parts.push(toTextPart(message.content));
8
+ if (Array.isArray(message.tool_calls)) {
9
+ for (const call of message.tool_calls) {
10
+ parts.push({
11
+ type: "tool_call",
12
+ id: call.id,
13
+ name: call.function.name,
14
+ arguments: call.function.arguments,
15
+ });
16
+ }
17
+ }
18
+ return parts;
19
+ }
20
+ if (message.role === "tool") {
21
+ return [{ type: "tool_call_response", id: message.tool_call_id, content: message.content }];
22
+ }
23
+ if (message.role === "user") {
24
+ const parts = [];
25
+ if (typeof message.content === "string")
26
+ parts.push(toTextPart(message.content));
27
+ if (Array.isArray(message.content)) {
28
+ for (const part of message.content) {
29
+ if (part.type === "text") {
30
+ parts.push(toTextPart(part.text));
31
+ }
32
+ else if (part.type === "image_url") {
33
+ parts.push({ type: "image", content: part.image_url.url });
34
+ }
35
+ else {
36
+ parts.push({
37
+ type: "file",
38
+ // FUTURE: optionally expose safe metadata without raw binary payloads.
39
+ content: part.file.filename ?? "[REDACTED_BINARY_DATA]",
40
+ media_type: part.file.media_type,
41
+ });
42
+ }
43
+ }
44
+ }
45
+ return parts;
46
+ }
47
+ // FUTURE: remove once Langfuse supports gen_ai.system_instructions
48
+ if (message.role === "system") {
49
+ return [toTextPart(message.content)];
50
+ }
51
+ return [];
52
+ };
53
+ export const getChatGeneralAttributes = (ctx, signalLevel) => {
54
+ if (!signalLevel || signalLevel === "off")
55
+ return {};
56
+ const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
57
+ return {
58
+ "gen_ai.operation.name": ctx.operation,
59
+ "gen_ai.request.model": requestModel,
60
+ "gen_ai.response.model": ctx.resolvedModelId,
61
+ "gen_ai.provider.name": ctx.resolvedProviderId,
62
+ };
63
+ };
64
+ export const getChatRequestAttributes = (inputs, signalLevel) => {
65
+ if (!signalLevel || signalLevel === "off")
66
+ return {};
67
+ const attrs = {};
68
+ if (inputs.seed !== undefined) {
69
+ Object.assign(attrs, { "gen_ai.request.seed": inputs.seed });
70
+ }
71
+ if (signalLevel !== "required") {
72
+ Object.assign(attrs, {
73
+ "gen_ai.request.stream": inputs.stream,
74
+ "gen_ai.request.frequency_penalty": inputs.frequency_penalty,
75
+ "gen_ai.request.max_tokens": inputs.max_completion_tokens,
76
+ "gen_ai.request.presence_penalty": inputs.presence_penalty,
77
+ "gen_ai.request.stop_sequences": inputs.stop
78
+ ? Array.isArray(inputs.stop)
79
+ ? inputs.stop
80
+ : [inputs.stop]
81
+ : undefined,
82
+ "gen_ai.request.temperature": inputs.temperature,
83
+ "gen_ai.request.top_p": inputs.top_p,
84
+ });
85
+ }
86
+ if (signalLevel === "full") {
87
+ Object.assign(attrs, {
88
+ // FUTURE: move system instructions from messages to here
89
+ // blocker: https://github.com/langfuse/langfuse/issues/11607
90
+ // "gen_ai.system_instructions": inputs.messages
91
+ // .filter((m) => m.role === "system")
92
+ // .map((m) => JSON.stringify(toTextPart(m.content))),
93
+ "gen_ai.input.messages": inputs.messages
94
+ //.filter((m) => m.role !== "system")
95
+ .map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
96
+ "gen_ai.tool.definitions": JSON.stringify(inputs.tools),
97
+ });
98
+ }
99
+ return attrs;
100
+ };
101
+ export const getChatResponseAttributes = (completions, signalLevel) => {
102
+ if (!signalLevel || signalLevel === "off")
103
+ return {};
104
+ const attrs = {
105
+ "gen_ai.response.id": completions.id,
106
+ };
107
+ if (signalLevel !== "required") {
108
+ Object.assign(attrs, {
109
+ "gen_ai.response.finish_reasons": completions.choices?.map((c) => c.finish_reason),
110
+ "gen_ai.usage.total_tokens": completions.usage?.total_tokens,
111
+ "gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
112
+ "gen_ai.usage.cached_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
113
+ "gen_ai.usage.output_tokens": completions.usage?.completion_tokens,
114
+ "gen_ai.usage.reasoning_tokens": completions.usage?.completion_tokens_details?.reasoning_tokens,
115
+ });
116
+ }
117
+ if (signalLevel === "full") {
118
+ Object.assign(attrs, {
119
+ "gen_ai.output.messages": completions.choices?.map((c) => JSON.stringify({
120
+ role: c.message.role,
121
+ parts: toMessageParts(c.message),
122
+ finish_reason: c.finish_reason,
123
+ })),
124
+ });
125
+ }
126
+ return attrs;
127
+ };
@@ -5,14 +5,18 @@ import { winterCgHandler } from "../../lifecycle";
5
5
  import { logger } from "../../logger";
6
6
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
7
  import { resolveProvider } from "../../providers/registry";
8
- import { addSpanEvent } from "../../telemetry/span";
8
+ import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
+ import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
9
10
  import { resolveRequestId } from "../../utils/headers";
10
11
  import { prepareForwardHeaders } from "../../utils/request";
11
12
  import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
13
+ import { getEmbeddingsGeneralAttributes, getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes, } from "./otel";
12
14
  import { EmbeddingsBodySchema } from "./schema";
13
15
  export const embeddings = (config) => {
14
16
  const hooks = config.hooks;
15
17
  const handler = async (ctx) => {
18
+ const start = performance.now();
19
+ ctx.operation = "embeddings";
16
20
  addSpanEvent("hebo.handler.started");
17
21
  // Guard: enforce HTTP method early.
18
22
  if (!ctx.request || ctx.request.method !== "POST") {
@@ -29,11 +33,11 @@ export const embeddings = (config) => {
29
33
  addSpanEvent("hebo.request.deserialized");
30
34
  const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
31
35
  if (!parsed.success) {
32
- throw new GatewayError(z.prettifyError(parsed.error), 400);
36
+ // FUTURE: consider adding body shape to metadata
37
+ throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
33
38
  }
34
39
  ctx.body = parsed.data;
35
40
  addSpanEvent("hebo.request.parsed");
36
- ctx.operation = "embeddings";
37
41
  if (hooks?.before) {
38
42
  ctx.body = (await hooks.before(ctx)) ?? ctx.body;
39
43
  addSpanEvent("hebo.hooks.before.completed");
@@ -44,10 +48,7 @@ export const embeddings = (config) => {
44
48
  ctx.resolvedModelId =
45
49
  (await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
46
50
  logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
47
- addSpanEvent("hebo.model.resolved", {
48
- "gen_ai.request.model": ctx.modelId ?? "",
49
- "gen_ai.response.model": ctx.resolvedModelId ?? "",
50
- });
51
+ addSpanEvent("hebo.model.resolved");
51
52
  const override = await hooks?.resolveProvider?.(ctx);
52
53
  ctx.provider =
53
54
  override ??
@@ -60,13 +61,15 @@ export const embeddings = (config) => {
60
61
  const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
61
62
  ctx.resolvedProviderId = embeddingModel.provider;
62
63
  logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
63
- addSpanEvent("hebo.provider.resolved", {
64
- "gen_ai.provider.name": ctx.resolvedProviderId,
65
- });
64
+ addSpanEvent("hebo.provider.resolved");
65
+ const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
66
+ const genAiGeneralAttrs = getEmbeddingsGeneralAttributes(ctx, genAiSignalLevel);
67
+ setSpanAttributes(genAiGeneralAttrs);
66
68
  // Convert inputs to AI SDK call options.
67
69
  const embedOptions = convertToEmbedCallOptions(inputs);
68
70
  logger.trace({ requestId, options: embedOptions }, "[embeddings] AI SDK options");
69
71
  addSpanEvent("hebo.options.prepared");
72
+ setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
70
73
  // Build middleware chain (model -> forward params -> provider).
71
74
  const embeddingModelWithMiddleware = wrapEmbeddingModel({
72
75
  model: embeddingModel,
@@ -82,12 +85,18 @@ export const embeddings = (config) => {
82
85
  });
83
86
  logger.trace({ requestId, result }, "[embeddings] AI SDK result");
84
87
  addSpanEvent("hebo.ai-sdk.completed");
88
+ // Transform result.
85
89
  ctx.result = toEmbeddings(result, ctx.modelId);
86
90
  addSpanEvent("hebo.result.transformed");
91
+ const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
92
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
93
+ setSpanAttributes(genAiResponseAttrs);
87
94
  if (hooks?.after) {
88
95
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
89
96
  addSpanEvent("hebo.hooks.after.completed");
90
97
  }
98
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
99
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
91
100
  return ctx.result;
92
101
  };
93
102
  return { handler: winterCgHandler(handler, config) };
@@ -0,0 +1,6 @@
1
+ import type { Attributes } from "@opentelemetry/api";
2
+ import type { Embeddings, EmbeddingsInputs } from "./schema";
3
+ import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
4
+ export declare const getEmbeddingsGeneralAttributes: (ctx: GatewayContext, signalLevel?: TelemetrySignalLevel) => Attributes;
5
+ export declare const getEmbeddingsRequestAttributes: (inputs: EmbeddingsInputs, signalLevel?: TelemetrySignalLevel) => Attributes;
6
+ export declare const getEmbeddingsResponseAttributes: (embeddings: Embeddings, signalLevel?: TelemetrySignalLevel) => Attributes;
@@ -0,0 +1,35 @@
1
+ import {} from "../../types";
2
+ export const getEmbeddingsGeneralAttributes = (ctx, signalLevel) => {
3
+ if (!signalLevel || signalLevel === "off")
4
+ return {};
5
+ const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
6
+ return {
7
+ "gen_ai.operation.name": ctx.operation,
8
+ "gen_ai.request.model": requestModel,
9
+ "gen_ai.response.model": ctx.resolvedModelId,
10
+ "gen_ai.provider.name": ctx.resolvedProviderId,
11
+ };
12
+ };
13
+ export const getEmbeddingsRequestAttributes = (inputs, signalLevel) => {
14
+ if (!signalLevel || signalLevel === "off")
15
+ return {};
16
+ const attrs = {};
17
+ if (signalLevel !== "required") {
18
+ Object.assign(attrs, {
19
+ "gen_ai.embeddings.dimension.count": inputs.dimensions,
20
+ });
21
+ }
22
+ return attrs;
23
+ };
24
+ export const getEmbeddingsResponseAttributes = (embeddings, signalLevel) => {
25
+ if (!signalLevel || signalLevel === "off")
26
+ return {};
27
+ const attrs = {};
28
+ if (signalLevel !== "required") {
29
+ Object.assign(attrs, {
30
+ "gen_ai.usage.input_tokens": embeddings.usage?.prompt_tokens,
31
+ "gen_ai.usage.total_tokens": embeddings.usage?.total_tokens,
32
+ });
33
+ }
34
+ return attrs;
35
+ };
@@ -4,12 +4,11 @@ import { toModels, toModel } from "./converters";
4
4
  export const models = (config) => {
5
5
  // eslint-disable-next-line require-await
6
6
  const handler = async (ctx) => {
7
- const request = ctx.request;
8
- if (!request || request.method !== "GET") {
7
+ ctx.operation = "models";
8
+ if (!ctx.request || ctx.request.method !== "GET") {
9
9
  throw new GatewayError("Method Not Allowed", 405);
10
10
  }
11
- ctx.operation = "models";
12
- const rawId = request.url.split("/models/", 2)[1]?.split("?", 1)[0];
11
+ const rawId = ctx.request.url.split("/models/", 2)[1]?.split("?", 1)[0];
13
12
  if (!rawId) {
14
13
  return toModels(ctx.models);
15
14
  }
@@ -1,5 +1,5 @@
1
1
  export declare class GatewayError extends Error {
2
2
  readonly status: number;
3
3
  readonly code: string;
4
- constructor(error: string | Error, status: number, code?: string, cause?: unknown);
4
+ constructor(error: unknown, status: number, code?: string, cause?: unknown);
5
5
  }
@@ -3,11 +3,10 @@ export class GatewayError extends Error {
3
3
  status;
4
4
  code;
5
5
  constructor(error, status, code, cause) {
6
- const msg = typeof error === "string" ? error : error.message;
7
- super(msg);
6
+ const isError = error instanceof Error;
7
+ super(isError ? error.message : String(error));
8
+ this.cause = cause ?? (isError ? error : undefined);
8
9
  this.status = status;
9
10
  this.code = code ?? STATUS_CODE(status);
10
- this.cause =
11
- cause ?? (typeof error === "string" ? undefined : error.cause);
12
11
  }
13
12
  }