@hebo-ai/gateway 0.4.0-beta.4 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +34 -7
  2. package/dist/endpoints/chat-completions/converters.d.ts +3 -3
  3. package/dist/endpoints/chat-completions/converters.js +15 -7
  4. package/dist/endpoints/chat-completions/handler.js +9 -9
  5. package/dist/endpoints/chat-completions/otel.js +10 -4
  6. package/dist/endpoints/embeddings/handler.js +5 -4
  7. package/dist/errors/gateway.d.ts +1 -1
  8. package/dist/errors/gateway.js +3 -3
  9. package/dist/errors/openai.js +2 -1
  10. package/dist/errors/utils.d.ts +2 -1
  11. package/dist/errors/utils.js +1 -0
  12. package/dist/lifecycle.js +14 -6
  13. package/dist/models/anthropic/presets.d.ts +463 -0
  14. package/dist/models/anthropic/presets.js +10 -2
  15. package/dist/models/types.d.ts +1 -1
  16. package/dist/models/types.js +1 -0
  17. package/dist/providers/bedrock/canonical.js +1 -0
  18. package/dist/telemetry/gen-ai.d.ts +2 -1
  19. package/dist/telemetry/gen-ai.js +21 -3
  20. package/dist/telemetry/memory.d.ts +2 -0
  21. package/dist/telemetry/memory.js +27 -0
  22. package/dist/telemetry/span.js +1 -1
  23. package/dist/telemetry/stream.d.ts +1 -1
  24. package/dist/telemetry/stream.js +25 -28
  25. package/dist/types.d.ts +2 -3
  26. package/package.json +2 -1
  27. package/src/endpoints/chat-completions/converters.ts +17 -10
  28. package/src/endpoints/chat-completions/handler.ts +13 -9
  29. package/src/endpoints/chat-completions/otel.ts +11 -4
  30. package/src/endpoints/embeddings/handler.ts +9 -4
  31. package/src/errors/gateway.ts +5 -4
  32. package/src/errors/openai.ts +2 -1
  33. package/src/errors/utils.ts +1 -0
  34. package/src/lifecycle.ts +17 -6
  35. package/src/models/anthropic/presets.ts +14 -2
  36. package/src/models/types.ts +1 -0
  37. package/src/providers/bedrock/canonical.ts +1 -0
  38. package/src/telemetry/gen-ai.ts +31 -3
  39. package/src/telemetry/memory.ts +36 -0
  40. package/src/telemetry/span.ts +1 -1
  41. package/src/telemetry/stream.ts +31 -31
  42. package/src/types.ts +3 -6
@@ -1,3 +1,3 @@
1
1
  export declare const wrapStream: (src: ReadableStream, hooks: {
2
2
  onDone?: (status: number, reason: unknown) => void;
3
- }, signal?: AbortSignal) => ReadableStream;
3
+ }) => ReadableStream;
@@ -1,43 +1,37 @@
1
- const isErrorChunk = (v) => !!v?.error;
2
- export const wrapStream = (src, hooks, signal) => {
3
- let finishOnce = false;
4
- const finish = (status, reason) => {
5
- if (finishOnce)
6
- return;
7
- finishOnce = true;
8
- hooks.onDone?.(status, reason ?? signal?.reason);
1
+ import { toOpenAIError } from "../errors/openai";
2
+ const isErrorChunk = (v) => v instanceof Error || !!v?.error;
3
+ export const wrapStream = (src, hooks) => {
4
+ let finished = false;
5
+ const done = (reader, controller, status, reason) => {
6
+ if (!finished) {
7
+ finished = true;
8
+ hooks.onDone?.(status, reason);
9
+ }
10
+ reader.cancel(reason).catch(() => { });
11
+ controller.close();
9
12
  };
10
13
  return new ReadableStream({
11
14
  async start(controller) {
12
15
  const reader = src.getReader();
13
- const close = (status, reason) => {
14
- finish(status, reason);
15
- reader.cancel(reason).catch(() => { });
16
- controller.close();
17
- };
18
16
  try {
19
17
  for (;;) {
20
- if (signal?.aborted) {
21
- close(499, signal.reason);
22
- return;
23
- }
24
18
  // eslint-disable-next-line no-await-in-loop
25
- const { value, done } = await reader.read();
26
- if (done)
19
+ const { value, done: eof } = await reader.read();
20
+ if (eof)
27
21
  break;
28
- controller.enqueue(value);
29
- if (isErrorChunk(value)) {
30
- const status = value.error.type === "invalid_request_error" ? 422 : 502;
31
- close(status, value.error.message);
22
+ const out = isErrorChunk(value) ? toOpenAIError(value) : value;
23
+ controller.enqueue(out);
24
+ if (out !== value) {
25
+ const status = out.error?.type === "invalid_request_error" ? 422 : 502;
26
+ done(reader, controller, status, value);
32
27
  return;
33
28
  }
34
29
  }
35
- finish(200);
36
- controller.close();
30
+ done(reader, controller, 200);
37
31
  }
38
32
  catch (err) {
39
- const status = signal?.aborted ? 499 : err?.name === "AbortError" ? 503 : 502;
40
- close(status, err);
33
+ controller.enqueue(toOpenAIError(err));
34
+ done(reader, controller, 502, err);
41
35
  }
42
36
  finally {
43
37
  try {
@@ -47,7 +41,10 @@ export const wrapStream = (src, hooks, signal) => {
47
41
  }
48
42
  },
49
43
  cancel(reason) {
50
- finish(499, reason);
44
+ if (!finished) {
45
+ finished = true;
46
+ hooks.onDone?.(499, reason);
47
+ }
51
48
  src.cancel(reason).catch(() => { });
52
49
  },
53
50
  });
package/dist/types.d.ts CHANGED
@@ -3,7 +3,6 @@ import type { Tracer } from "@opentelemetry/api";
3
3
  import type { ChatCompletions, ChatCompletionsBody, ChatCompletionsChunk } from "./endpoints/chat-completions/schema";
4
4
  import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
5
5
  import type { Model, ModelList } from "./endpoints/models";
6
- import type { OpenAIError } from "./errors/openai";
7
6
  import type { Logger, LoggerConfig } from "./logger";
8
7
  import type { ModelCatalog, ModelId } from "./models/types";
9
8
  import type { ProviderId, ProviderRegistry } from "./providers/types";
@@ -67,7 +66,7 @@ export type GatewayContext = {
67
66
  /**
68
67
  * Result returned by the handler (pre-response).
69
68
  */
70
- result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | Model | ModelList;
69
+ result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | Model | ModelList;
71
70
  /**
72
71
  * Response object returned by the handler.
73
72
  */
@@ -115,7 +114,7 @@ export type GatewayHooks = {
115
114
  * Runs after the endpoint handler.
116
115
  * @returns Result to replace, or undefined to keep original.
117
116
  */
118
- after?: (ctx: AfterHookContext) => void | ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | Promise<void | ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings>;
117
+ after?: (ctx: AfterHookContext) => void | ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | Promise<void | ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings>;
119
118
  /**
120
119
  * Runs after the lifecycle has produced the final Response.
121
120
  * @returns Replacement Response, or undefined to keep original.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.4.0-beta.4",
3
+ "version": "0.4.1",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
5
5
  "keywords": [
6
6
  "ai",
@@ -168,6 +168,7 @@
168
168
  "@ai-sdk/groq": "^3.0.19",
169
169
  "@ai-sdk/openai": "^3.0.23",
170
170
  "@aws-sdk/credential-providers": "^3.981.0",
171
+ "@langfuse/otel": "^4.6.1",
171
172
  "@mjackson/node-fetch-server": "^0.7.0",
172
173
  "@opentelemetry/api": "^1.9.0",
173
174
  "@opentelemetry/context-async-hooks": "^2.5.1",
@@ -405,11 +405,12 @@ export function toChatCompletionsResponse(
405
405
  return toResponse(toChatCompletions(result, model), responseInit);
406
406
  }
407
407
 
408
- export function toChatCompletionsStream(
408
+ export function toChatCompletionsStream<E extends boolean = false>(
409
409
  result: StreamTextResult<ToolSet, Output.Output>,
410
410
  model: string,
411
- ): ReadableStream<ChatCompletionsChunk | OpenAIError> {
412
- return result.fullStream.pipeThrough(new ChatCompletionsStream(model));
411
+ wrapErrors?: E,
412
+ ): ReadableStream<ChatCompletionsChunk | (E extends true ? OpenAIError : Error)> {
413
+ return result.fullStream.pipeThrough(new ChatCompletionsStream(model, wrapErrors));
413
414
  }
414
415
 
415
416
  export function toChatCompletionsStreamResponse(
@@ -417,14 +418,14 @@ export function toChatCompletionsStreamResponse(
417
418
  model: string,
418
419
  responseInit?: ResponseInit,
419
420
  ): Response {
420
- return toResponse(toChatCompletionsStream(result, model), responseInit);
421
+ return toResponse(toChatCompletionsStream(result, model, true), responseInit);
421
422
  }
422
423
 
423
- export class ChatCompletionsStream extends TransformStream<
424
+ export class ChatCompletionsStream<E extends boolean = false> extends TransformStream<
424
425
  TextStreamPart<ToolSet>,
425
- ChatCompletionsChunk | OpenAIError
426
+ ChatCompletionsChunk | (E extends true ? OpenAIError : Error)
426
427
  > {
427
- constructor(model: string) {
428
+ constructor(model: string, wrapErrors?: E) {
428
429
  const streamId = `chatcmpl-${crypto.randomUUID()}`;
429
430
  const creationTime = Math.floor(Date.now() / 1000);
430
431
  let toolCallIndexCounter = 0;
@@ -535,9 +536,15 @@ export class ChatCompletionsStream extends TransformStream<
535
536
  }
536
537
 
537
538
  case "error": {
538
- const error = part.error;
539
- controller.enqueue(toOpenAIError(error));
540
- controller.terminate();
539
+ let err: Error | OpenAIError;
540
+ if (wrapErrors) {
541
+ err = toOpenAIError(part.error);
542
+ } else if (part.error instanceof Error) {
543
+ err = part.error;
544
+ } else {
545
+ err = new Error(String(part.error));
546
+ }
547
+ controller.enqueue(err as E extends true ? OpenAIError : Error);
541
548
  }
542
549
  }
543
550
  },
@@ -23,7 +23,11 @@ import { winterCgHandler } from "../../lifecycle";
23
23
  import { logger } from "../../logger";
24
24
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
25
25
  import { resolveProvider } from "../../providers/registry";
26
- import { recordRequestDuration, recordTokenUsage } from "../../telemetry/gen-ai";
26
+ import {
27
+ recordRequestDuration,
28
+ recordTimePerOutputToken,
29
+ recordTokenUsage,
30
+ } from "../../telemetry/gen-ai";
27
31
  import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
28
32
  import { resolveRequestId } from "../../utils/headers";
29
33
  import { prepareForwardHeaders } from "../../utils/request";
@@ -60,8 +64,8 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
60
64
 
61
65
  const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
62
66
  if (!parsed.success) {
63
- // FUTURE: add body shape to error message
64
- throw new GatewayError(z.prettifyError(parsed.error), 400);
67
+ // FUTURE: consider adding body shape to metadata
68
+ throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
65
69
  }
66
70
  ctx.body = parsed.data;
67
71
  addSpanEvent("hebo.request.parsed");
@@ -123,13 +127,12 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
123
127
  const result = streamText({
124
128
  model: languageModelWithMiddleware,
125
129
  headers: prepareForwardHeaders(ctx.request),
126
- // No abort signal here, otherwise we can't detect upstream from client cancellations
127
- // abortSignal: ctx.request.signal,
130
+ abortSignal: ctx.request.signal,
128
131
  timeout: {
129
132
  totalMs: 5 * 60 * 1000,
130
133
  },
131
134
  onAbort: () => {
132
- throw new DOMException("Upstream failed", "AbortError");
135
+ throw new DOMException("The operation was aborted.", "AbortError");
133
136
  },
134
137
  onError: () => {},
135
138
  onFinish: (res) => {
@@ -143,7 +146,8 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
143
146
  const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
144
147
  setSpanAttributes(genAiResponseAttrs);
145
148
  recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
146
- recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
149
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
150
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
147
151
  },
148
152
  experimental_include: {
149
153
  requestBody: false,
@@ -166,7 +170,6 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
166
170
  const result = await generateText({
167
171
  model: languageModelWithMiddleware,
168
172
  headers: prepareForwardHeaders(ctx.request),
169
- // FUTURE: currently can't tell whether upstream or downstream abort
170
173
  abortSignal: ctx.request.signal,
171
174
  timeout: 5 * 60 * 1000,
172
175
  experimental_include: {
@@ -191,7 +194,8 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
191
194
  addSpanEvent("hebo.hooks.after.completed");
192
195
  }
193
196
 
194
- recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
197
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
198
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
195
199
  return ctx.result;
196
200
  };
197
201
 
@@ -54,6 +54,11 @@ const toMessageParts = (message: ChatCompletionsMessage): Record<string, unknown
54
54
  return parts;
55
55
  }
56
56
 
57
+ // FUTURE: remove once Langfuse supports gen_ai.system_instructions
58
+ if (message.role === "system") {
59
+ return [toTextPart(message.content)];
60
+ }
61
+
57
62
  return [];
58
63
  };
59
64
 
@@ -103,11 +108,13 @@ export const getChatRequestAttributes = (
103
108
 
104
109
  if (signalLevel === "full") {
105
110
  Object.assign(attrs, {
106
- "gen_ai.system_instructions": inputs.messages
107
- .filter((m) => m.role === "system")
108
- .map((m) => JSON.stringify({ parts: [toTextPart(m.content)] })),
111
+ // FUTURE: move system instructions from messages to here
112
+ // blocker: https://github.com/langfuse/langfuse/issues/11607
113
+ // "gen_ai.system_instructions": inputs.messages
114
+ // .filter((m) => m.role === "system")
115
+ // .map((m) => JSON.stringify(toTextPart(m.content))),
109
116
  "gen_ai.input.messages": inputs.messages
110
- .filter((m) => m.role !== "system")
117
+ //.filter((m) => m.role !== "system")
111
118
  .map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
112
119
  "gen_ai.tool.definitions": JSON.stringify(inputs.tools),
113
120
  });
@@ -16,7 +16,11 @@ import { winterCgHandler } from "../../lifecycle";
16
16
  import { logger } from "../../logger";
17
17
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
18
18
  import { resolveProvider } from "../../providers/registry";
19
- import { recordRequestDuration, recordTokenUsage } from "../../telemetry/gen-ai";
19
+ import {
20
+ recordRequestDuration,
21
+ recordTimePerOutputToken,
22
+ recordTokenUsage,
23
+ } from "../../telemetry/gen-ai";
20
24
  import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
21
25
  import { resolveRequestId } from "../../utils/headers";
22
26
  import { prepareForwardHeaders } from "../../utils/request";
@@ -53,8 +57,8 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
53
57
 
54
58
  const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
55
59
  if (!parsed.success) {
56
- // FUTURE: add body shape to error message
57
- throw new GatewayError(z.prettifyError(parsed.error), 400);
60
+ // FUTURE: consider adding body shape to metadata
61
+ throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
58
62
  }
59
63
  ctx.body = parsed.data;
60
64
  addSpanEvent("hebo.request.parsed");
@@ -127,7 +131,8 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
127
131
  addSpanEvent("hebo.hooks.after.completed");
128
132
  }
129
133
 
130
- recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
134
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
135
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
131
136
  return ctx.result;
132
137
  };
133
138
 
@@ -4,11 +4,12 @@ export class GatewayError extends Error {
4
4
  readonly status: number;
5
5
  readonly code: string;
6
6
 
7
- constructor(error: string | Error, status: number, code?: string, cause?: unknown) {
8
- const msg = typeof error === "string" ? error : error.message;
9
- super(msg);
7
+ constructor(error: unknown, status: number, code?: string, cause?: unknown) {
8
+ const isError = error instanceof Error;
9
+ super(isError ? error.message : String(error));
10
+ this.cause = cause ?? (isError ? error : undefined);
11
+
10
12
  this.status = status;
11
13
  this.code = code ?? STATUS_CODE(status);
12
- this.cause = cause ?? (typeof error === "string" ? undefined : error);
13
14
  }
14
15
  }
@@ -25,7 +25,8 @@ export class OpenAIError {
25
25
  const mapType = (status: number) => (status < 500 ? "invalid_request_error" : "server_error");
26
26
 
27
27
  const maybeMaskMessage = (meta: ReturnType<typeof getErrorMeta>, requestId?: string) => {
28
- if (!(isProduction() && (meta.status >= 500 || meta.code.includes("UPSTREAM")))) {
28
+ // FUTURE: consider masking all upstream errors, also 4xx
29
+ if (!(isProduction() && meta.status >= 500)) {
29
30
  return meta.message;
30
31
  }
31
32
  // FUTURE: always attach requestId to errors (masked and unmasked)
@@ -11,6 +11,7 @@ export const STATUS_CODES = {
11
11
  409: "CONFLICT",
12
12
  422: "UNPROCESSABLE_ENTITY",
13
13
  429: "TOO_MANY_REQUESTS",
14
+ 499: "CLIENT_CLOSED_REQUEST",
14
15
  500: "INTERNAL_SERVER_ERROR",
15
16
  502: "BAD_GATEWAY",
16
17
  503: "SERVICE_UNAVAILABLE",
package/src/lifecycle.ts CHANGED
@@ -6,11 +6,13 @@ import type {
6
6
  } from "./types";
7
7
 
8
8
  import { parseConfig } from "./config";
9
+ import { GatewayError } from "./errors/gateway";
9
10
  import { toOpenAIErrorResponse } from "./errors/openai";
10
11
  import { logger } from "./logger";
11
12
  import { getBaggageAttributes } from "./telemetry/baggage";
12
13
  import { initFetch } from "./telemetry/fetch";
13
14
  import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
15
+ import { recordV8jsMemory } from "./telemetry/memory";
14
16
  import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
15
17
  import { wrapStream } from "./telemetry/stream";
16
18
  import { resolveRequestId } from "./utils/headers";
@@ -23,7 +25,7 @@ export const winterCgHandler = (
23
25
  ) => {
24
26
  const parsedConfig = parseConfig(config);
25
27
 
26
- if (parsedConfig.telemetry!.enabled) {
28
+ if (parsedConfig.telemetry?.enabled) {
27
29
  setSpanTracer(parsedConfig.telemetry?.tracer);
28
30
  setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
29
31
  initFetch(parsedConfig.telemetry?.signals?.hebo);
@@ -58,18 +60,22 @@ export const winterCgHandler = (
58
60
  );
59
61
  }
60
62
 
61
- const realStatus = status === 200 ? (ctx.response?.status ?? status) : status;
63
+ let realStatus = status;
64
+ if (ctx.request.signal.aborted) realStatus = 499;
65
+ else if (status === 200 && ctx.response?.status) realStatus = ctx.response.status;
66
+
62
67
  if (realStatus !== 200) {
63
- // FUTURE: in-stream errors are redacted in prod
64
68
  (realStatus >= 500 ? logger.error : logger.warn)({
65
69
  requestId: resolveRequestId(ctx.request),
66
- err: reason,
70
+ err: reason ?? ctx.request.signal.reason,
67
71
  });
68
72
 
69
73
  if (realStatus >= 500) span.recordError(reason);
70
74
  }
71
75
  span.setAttributes({ "http.response.status_code_effective": realStatus });
72
76
 
77
+ recordV8jsMemory(parsedConfig.telemetry?.signals?.hebo);
78
+
73
79
  span.finish();
74
80
  };
75
81
 
@@ -89,7 +95,7 @@ export const winterCgHandler = (
89
95
  ctx.result = (await span.runWithContext(() => run(ctx))) as typeof ctx.result;
90
96
 
91
97
  if (ctx.result instanceof ReadableStream) {
92
- ctx.result = wrapStream(ctx.result, { onDone: finalize }, ctx.request.signal);
98
+ ctx.result = wrapStream(ctx.result, { onDone: finalize });
93
99
  }
94
100
 
95
101
  ctx.response = toResponse(ctx.result!, prepareResponseInit(ctx.request));
@@ -108,7 +114,12 @@ export const winterCgHandler = (
108
114
  finalize(ctx.response.status);
109
115
  }
110
116
  } catch (error) {
111
- ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
117
+ ctx.response = toOpenAIErrorResponse(
118
+ ctx.request.signal.aborted
119
+ ? new GatewayError(error ?? ctx.request.signal.reason, 499)
120
+ : error,
121
+ prepareResponseInit(ctx.request),
122
+ );
112
123
  finalize(ctx.response.status, error);
113
124
  }
114
125
 
@@ -65,6 +65,18 @@ export const claudeSonnet45 = presetFor<CanonicalModelId, CatalogModel>()(
65
65
  } satisfies DeepPartial<CatalogModel>,
66
66
  );
67
67
 
68
+ export const claudeSonnet46 = presetFor<CanonicalModelId, CatalogModel>()(
69
+ "anthropic/claude-sonnet-4.6" as const,
70
+ {
71
+ ...CLAUDE_BASE,
72
+ ...CLAUDE_PDF_MODALITIES,
73
+ name: "Claude Sonnet 4.6",
74
+ capabilities: [...CLAUDE_BASE.capabilities, "reasoning"],
75
+ created: "2026-02-17",
76
+ knowledge: "2025-08",
77
+ } satisfies DeepPartial<CatalogModel>,
78
+ );
79
+
68
80
  export const claudeSonnet4 = presetFor<CanonicalModelId, CatalogModel>()(
69
81
  "anthropic/claude-sonnet-4" as const,
70
82
  {
@@ -149,7 +161,7 @@ export const claudeOpus4 = presetFor<CanonicalModelId, CatalogModel>()(
149
161
  );
150
162
 
151
163
  const claudeAtomic = {
152
- "v4.6": [claudeOpus46],
164
+ "v4.6": [claudeSonnet46, claudeOpus46],
153
165
  "v4.5": [claudeHaiku45, claudeSonnet45, claudeOpus45],
154
166
  "v4.1": [claudeOpus41],
155
167
  v4: [claudeSonnet4, claudeOpus4],
@@ -157,7 +169,7 @@ const claudeAtomic = {
157
169
  "v3.5": [claudeSonnet35, claudeHaiku35],
158
170
  v3: [claudeHaiku3],
159
171
  haiku: [claudeHaiku45, claudeHaiku35, claudeHaiku3],
160
- sonnet: [claudeSonnet45, claudeSonnet4, claudeSonnet37, claudeSonnet35],
172
+ sonnet: [claudeSonnet46, claudeSonnet45, claudeSonnet4, claudeSonnet37, claudeSonnet35],
161
173
  opus: [claudeOpus46, claudeOpus45, claudeOpus41, claudeOpus4],
162
174
  } as const;
163
175
 
@@ -3,6 +3,7 @@ import type { ProviderId } from "../providers/types";
3
3
  export const CANONICAL_MODEL_IDS = [
4
4
  // Anthropic
5
5
  "anthropic/claude-opus-4.6",
6
+ "anthropic/claude-sonnet-4.6",
6
7
  "anthropic/claude-haiku-4.5",
7
8
  "anthropic/claude-sonnet-4.5",
8
9
  "anthropic/claude-opus-4.5",
@@ -13,6 +13,7 @@ import { withCanonicalIds } from "../registry";
13
13
  const MAPPING = {
14
14
  // Require Inference Profiles and can't be resolved from standard name mapping
15
15
  "anthropic/claude-haiku-4.5": "{ip}anthropic.claude-haiku-4-5-20251001-v1:0",
16
+ "anthropic/claude-sonnet-4.6": "{ip}anthropic.claude-sonnet-4-6",
16
17
  "anthropic/claude-sonnet-4.5": "{ip}anthropic.claude-sonnet-4-5-20250929-v1:0",
17
18
  "anthropic/claude-opus-4.6": "{ip}anthropic.claude-opus-4-6-v1",
18
19
  "anthropic/claude-opus-4.5": "{ip}anthropic.claude-opus-4-5-20251101-v1:0",
@@ -2,7 +2,7 @@ import { metrics, type Attributes } from "@opentelemetry/api";
2
2
 
3
3
  import type { TelemetrySignalLevel } from "../types";
4
4
 
5
- const meter = metrics.getMeter("@hebo-ai/gateway");
5
+ const meter = metrics.getMeter("@hebo/gateway");
6
6
 
7
7
  const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.duration", {
8
8
  description: "End-to-end gateway request duration",
@@ -14,6 +14,16 @@ const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.du
14
14
  },
15
15
  });
16
16
 
17
+ const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_per_output_token", {
18
+ description: "End-to-end gateway request duration per output token",
19
+ unit: "s",
20
+ advice: {
21
+ explicitBucketBoundaries: [
22
+ 0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5,
23
+ ],
24
+ },
25
+ });
26
+
17
27
  const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
18
28
  description: "Token usage reported by upstream model responses",
19
29
  unit: "{token}",
@@ -27,13 +37,31 @@ const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
27
37
 
28
38
  // FUTURE: record unsuccessful calls
29
39
  export const recordRequestDuration = (
30
- duration: number,
40
+ start: number,
31
41
  attrs: Attributes,
32
42
  signalLevel?: TelemetrySignalLevel,
33
43
  ) => {
34
44
  if (!signalLevel || signalLevel === "off") return;
35
45
 
36
- requestDurationHistogram.record(duration / 1000, attrs);
46
+ requestDurationHistogram.record((performance.now() - start) / 1000, attrs);
47
+ };
48
+
49
+ // FUTURE: record unsuccessful calls
50
+ export const recordTimePerOutputToken = (
51
+ start: number,
52
+ tokenAttrs: Attributes,
53
+ metricAttrs: Attributes,
54
+ signalLevel?: TelemetrySignalLevel,
55
+ ) => {
56
+ if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full")) return;
57
+
58
+ const outputTokens = tokenAttrs["gen_ai.usage.output_tokens"];
59
+ if (typeof outputTokens !== "number" || outputTokens <= 0) return;
60
+
61
+ timePerOutputTokenHistogram.record(
62
+ (performance.now() - start) / 1000 / outputTokens,
63
+ metricAttrs,
64
+ );
37
65
  };
38
66
 
39
67
  // FUTURE: record unsuccessful calls
@@ -0,0 +1,36 @@
1
+ import { metrics } from "@opentelemetry/api";
2
+
3
+ import type { TelemetrySignalLevel } from "../types";
4
+
5
+ const meter = metrics.getMeter("@hebo/gateway");
6
+ const defaultHeapSpaceAttrs = { "v8js.heap.space.name": "total" } as const;
7
+
8
+ const heapUsedCounter = meter.createUpDownCounter("v8js.memory.heap.used", {
9
+ description: "Used bytes in the V8 heap",
10
+ unit: "By",
11
+ });
12
+
13
+ const heapSpacePhysicalSizeCounter = meter.createUpDownCounter(
14
+ "v8js.memory.heap.space.physical_size",
15
+ {
16
+ description: "Physical bytes allocated for the V8 heap space",
17
+ unit: "By",
18
+ },
19
+ );
20
+
21
+ const isEnabled = (level?: TelemetrySignalLevel) => level === "recommended" || level === "full";
22
+
23
+ export const recordV8jsMemory = (level?: TelemetrySignalLevel) => {
24
+ if (!isEnabled(level)) return;
25
+
26
+ let usage;
27
+ try {
28
+ usage = globalThis.process?.memoryUsage?.();
29
+ } catch {
30
+ return;
31
+ }
32
+ if (!usage) return;
33
+
34
+ heapUsedCounter.add(usage.heapUsed, defaultHeapSpaceAttrs);
35
+ heapSpacePhysicalSizeCounter.add(usage.rss, defaultHeapSpaceAttrs);
36
+ };
@@ -4,7 +4,7 @@ import { INVALID_SPAN_CONTEXT, SpanKind, SpanStatusCode, context, trace } from "
4
4
 
5
5
  import type { TelemetrySignalLevel } from "../types";
6
6
 
7
- const DEFAULT_TRACER_NAME = "@hebo-ai/gateway";
7
+ const DEFAULT_TRACER_NAME = "@hebo/gateway";
8
8
 
9
9
  let spanTracer: Tracer | undefined;
10
10
  let spanEventsEnabled = false;