@hebo-ai/gateway 0.11.1 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -160,6 +160,7 @@ export const getChatResponseAttributes = (completions, signalLevel) => {
160
160
  "gen_ai.usage.total_tokens": completions.usage?.total_tokens,
161
161
  "gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
162
162
  "gen_ai.usage.cache_read.input_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
163
+ "gen_ai.usage.cache_creation.input_tokens": completions.usage?.prompt_tokens_details?.cache_write_tokens,
163
164
  "gen_ai.usage.output_tokens": completions.usage?.completion_tokens,
164
165
  "gen_ai.usage.reasoning.output_tokens": completions.usage?.completion_tokens_details?.reasoning_tokens,
165
166
  });
@@ -6,7 +6,7 @@ export declare function createParamsMapper(mappers: ((v: unknown) => unknown)[])
6
6
  * Normalizes an object (row) by applying a chain of atomic mappers.
7
7
  * Mappers are expected to mutate the object for performance and to avoid spreads.
8
8
  */
9
- export declare function createRowMapper<T>(mappers: ((row: Record<string, unknown>) => Record<string, unknown>)[]): (row: Record<string, unknown>) => T;
9
+ export declare function createRowMapper<T>(mappers: ((row: T) => T)[]): (row: T) => T;
10
10
  /**
11
11
  * Atomic mappers for input parameters.
12
12
  */
@@ -541,17 +541,20 @@ export class MessagesTransformStream extends TransformStream {
541
541
  }
542
542
  case "tool-input-start": {
543
543
  currentToolCallId = part.id;
544
+ const contentBlock = {
545
+ type: "tool_use",
546
+ id: part.id,
547
+ name: normalizeToolName(part.toolName),
548
+ input: {},
549
+ };
550
+ if (part.providerMetadata)
551
+ contentBlock.extra_content = part.providerMetadata;
544
552
  controller.enqueue({
545
553
  event: "content_block_start",
546
554
  data: {
547
555
  type: "content_block_start",
548
556
  index: blockIndex,
549
- content_block: {
550
- type: "tool_use",
551
- id: part.id,
552
- name: normalizeToolName(part.toolName),
553
- input: {},
554
- },
557
+ content_block: contentBlock,
555
558
  },
556
559
  });
557
560
  break;
@@ -622,14 +625,12 @@ export class MessagesTransformStream extends TransformStream {
622
625
  }
623
626
  case "finish": {
624
627
  const stopReason = mapStopReason(part.finishReason);
625
- const totalOutputTokens = part.totalUsage?.outputTokens ?? 0;
626
- const totalInputTokens = part.totalUsage?.inputTokens ?? 0;
627
628
  controller.enqueue({
628
629
  event: "message_delta",
629
630
  data: {
630
631
  type: "message_delta",
631
632
  delta: { stop_reason: stopReason, stop_sequence: null },
632
- usage: { output_tokens: totalOutputTokens, input_tokens: totalInputTokens },
633
+ usage: mapUsage(part.totalUsage),
633
634
  },
634
635
  });
635
636
  controller.enqueue({
@@ -647,10 +647,7 @@ export type MessageDeltaEvent = SseFrame<{
647
647
  stop_reason: MessagesStopReason;
648
648
  stop_sequence: string | null;
649
649
  };
650
- usage: {
651
- output_tokens: number;
652
- input_tokens?: number;
653
- };
650
+ usage: MessagesUsage;
654
651
  }, "message_delta">;
655
652
  export type MessageStopEvent = SseFrame<{
656
653
  type: "message_stop";
@@ -6,10 +6,19 @@ const normalizeApiCallError = (error) => {
6
6
  const statusText = `UPSTREAM_${STATUS_TEXT(status)}`;
7
7
  return new GatewayError(error, status, statusText, undefined, error.responseHeaders ?? undefined);
8
8
  };
9
+ // `AbortError` / `TimeoutError` (raised by the AI SDK's internal `timeout` controller,
10
+ // `AbortSignal.timeout`, or an aborted upstream `fetch`) reach us as plain DOMExceptions
11
+ // that none of the AI SDK error classes match. Treat them as upstream gateway timeouts
12
+ // so they surface as 504 with retry headers rather than defaulting to 500/502.
13
+ // Inbound client disconnects are caught earlier in `lifecycle.ts` and overridden to 499.
14
+ const isUpstreamAbortError = (error) => error instanceof Error && (error.name === "AbortError" || error.name === "TimeoutError");
9
15
  export const normalizeAiSdkError = (error) => {
10
16
  if (APICallError.isInstance(error)) {
11
17
  return normalizeApiCallError(error);
12
18
  }
19
+ if (isUpstreamAbortError(error)) {
20
+ return new GatewayError(error, 504, `UPSTREAM_${STATUS_TEXT(504)}`);
21
+ }
13
22
  if (RetryError.isInstance(error)) {
14
23
  if (APICallError.isInstance(error.lastError)) {
15
24
  return normalizeApiCallError(error.lastError);
package/dist/lifecycle.js CHANGED
@@ -50,11 +50,12 @@ export const winterCgHandler = (run, config) => {
50
50
  else if (status === 200 && ctx.response?.status)
51
51
  realStatus = ctx.response.status;
52
52
  if (realStatus !== 200) {
53
+ const err = reason ?? ctx.request.signal.reason;
53
54
  logger[realStatus >= 500 ? "error" : "warn"]({
54
55
  requestId: ctx.requestId,
55
- err: reason ?? ctx.request.signal.reason,
56
+ err,
56
57
  });
57
- span.recordError(reason, true);
58
+ span.recordError(err, true);
58
59
  }
59
60
  span.setAttributes({ "http.response.status_code_effective": realStatus });
60
61
  if (ctx.operation === "chat" ||
@@ -249,6 +249,30 @@ export declare const gpt54Pro: import("../../utils").Preset<"openai/gpt-5.4-pro"
249
249
  capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
250
250
  providers: readonly ["openai", "azure"];
251
251
  }>;
252
+ export declare const gpt55: import("../../utils").Preset<"openai/gpt-5.5", CatalogModel, {
253
+ name: string;
254
+ created: string;
255
+ knowledge: string;
256
+ context: number;
257
+ modalities: {
258
+ input: readonly ["text", "image"];
259
+ output: readonly ["text"];
260
+ };
261
+ capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
262
+ providers: readonly ["openai", "azure"];
263
+ }>;
264
+ export declare const gpt55Pro: import("../../utils").Preset<"openai/gpt-5.5-pro", CatalogModel, {
265
+ name: string;
266
+ created: string;
267
+ knowledge: string;
268
+ context: number;
269
+ modalities: {
270
+ input: readonly ["text", "image"];
271
+ output: readonly ["text"];
272
+ };
273
+ capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
274
+ providers: readonly ["openai", "azure"];
275
+ }>;
252
276
  export declare const textEmbedding3Small: import("../../utils").Preset<"openai/text-embedding-3-small", CatalogModel, {
253
277
  name: string;
254
278
  created: string;
@@ -388,7 +412,18 @@ export declare const gptOss: {
388
412
  }>];
389
413
  };
390
414
  export declare const gpt: {
391
- readonly latest: readonly [import("../../utils").Preset<"openai/gpt-5.4", CatalogModel, {
415
+ readonly latest: readonly [import("../../utils").Preset<"openai/gpt-5.5", CatalogModel, {
416
+ name: string;
417
+ created: string;
418
+ knowledge: string;
419
+ context: number;
420
+ modalities: {
421
+ input: readonly ["text", "image"];
422
+ output: readonly ["text"];
423
+ };
424
+ capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
425
+ providers: readonly ["openai", "azure"];
426
+ }>, import("../../utils").Preset<"openai/gpt-5.5-pro", CatalogModel, {
392
427
  name: string;
393
428
  created: string;
394
429
  knowledge: string;
@@ -651,6 +686,28 @@ export declare const gpt: {
651
686
  };
652
687
  capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
653
688
  providers: readonly ["openai", "azure"];
689
+ }> | import("../../utils").Preset<"openai/gpt-5.5", CatalogModel, {
690
+ name: string;
691
+ created: string;
692
+ knowledge: string;
693
+ context: number;
694
+ modalities: {
695
+ input: readonly ["text", "image"];
696
+ output: readonly ["text"];
697
+ };
698
+ capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
699
+ providers: readonly ["openai", "azure"];
700
+ }> | import("../../utils").Preset<"openai/gpt-5.5-pro", CatalogModel, {
701
+ name: string;
702
+ created: string;
703
+ knowledge: string;
704
+ context: number;
705
+ modalities: {
706
+ input: readonly ["text", "image"];
707
+ output: readonly ["text"];
708
+ };
709
+ capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
710
+ providers: readonly ["openai", "azure"];
654
711
  }>)[];
655
712
  readonly "v5.x": readonly [import("../../utils").Preset<"openai/gpt-5", CatalogModel, {
656
713
  name: string;
@@ -870,6 +927,28 @@ export declare const gpt: {
870
927
  };
871
928
  capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
872
929
  providers: readonly ["openai", "azure"];
930
+ }>, import("../../utils").Preset<"openai/gpt-5.5", CatalogModel, {
931
+ name: string;
932
+ created: string;
933
+ knowledge: string;
934
+ context: number;
935
+ modalities: {
936
+ input: readonly ["text", "image"];
937
+ output: readonly ["text"];
938
+ };
939
+ capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
940
+ providers: readonly ["openai", "azure"];
941
+ }>, import("../../utils").Preset<"openai/gpt-5.5-pro", CatalogModel, {
942
+ name: string;
943
+ created: string;
944
+ knowledge: string;
945
+ context: number;
946
+ modalities: {
947
+ input: readonly ["text", "image"];
948
+ output: readonly ["text"];
949
+ };
950
+ capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
951
+ providers: readonly ["openai", "azure"];
873
952
  }>];
874
953
  readonly v5: readonly [import("../../utils").Preset<"openai/gpt-5", CatalogModel, {
875
954
  name: string;
@@ -1094,6 +1173,29 @@ export declare const gpt: {
1094
1173
  capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
1095
1174
  providers: readonly ["openai", "azure"];
1096
1175
  }>];
1176
+ readonly "v5.5": readonly [import("../../utils").Preset<"openai/gpt-5.5", CatalogModel, {
1177
+ name: string;
1178
+ created: string;
1179
+ knowledge: string;
1180
+ context: number;
1181
+ modalities: {
1182
+ input: readonly ["text", "image"];
1183
+ output: readonly ["text"];
1184
+ };
1185
+ capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
1186
+ providers: readonly ["openai", "azure"];
1187
+ }>, import("../../utils").Preset<"openai/gpt-5.5-pro", CatalogModel, {
1188
+ name: string;
1189
+ created: string;
1190
+ knowledge: string;
1191
+ context: number;
1192
+ modalities: {
1193
+ input: readonly ["text", "image"];
1194
+ output: readonly ["text"];
1195
+ };
1196
+ capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
1197
+ providers: readonly ["openai", "azure"];
1198
+ }>];
1097
1199
  readonly codex: readonly [import("../../utils").Preset<"openai/gpt-5-codex", CatalogModel, {
1098
1200
  name: string;
1099
1201
  created: string;
@@ -1237,6 +1339,17 @@ export declare const gpt: {
1237
1339
  };
1238
1340
  capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
1239
1341
  providers: readonly ["openai", "azure"];
1342
+ }>, import("../../utils").Preset<"openai/gpt-5.5-pro", CatalogModel, {
1343
+ name: string;
1344
+ created: string;
1345
+ knowledge: string;
1346
+ context: number;
1347
+ modalities: {
1348
+ input: readonly ["text", "image"];
1349
+ output: readonly ["text"];
1350
+ };
1351
+ capabilities: readonly ["attachments", "reasoning", "tool_call", "structured_output", "temperature"];
1352
+ providers: readonly ["openai", "azure"];
1240
1353
  }>];
1241
1354
  };
1242
1355
  export declare const textEmbeddings: {
@@ -194,6 +194,20 @@ export const gpt54Pro = presetFor()("openai/gpt-5.4-pro", {
194
194
  knowledge: "2025-08",
195
195
  context: 1050000,
196
196
  });
197
+ export const gpt55 = presetFor()("openai/gpt-5.5", {
198
+ ...GPT_BASE,
199
+ name: "GPT-5.5",
200
+ created: "2026-04-22",
201
+ knowledge: "2025-08",
202
+ context: 1050000,
203
+ });
204
+ export const gpt55Pro = presetFor()("openai/gpt-5.5-pro", {
205
+ ...GPT_PRO_BASE,
206
+ name: "GPT-5.5 Pro",
207
+ created: "2026-04-24",
208
+ knowledge: "2025-12",
209
+ context: 1050000,
210
+ });
197
211
  export const textEmbedding3Small = presetFor()("openai/text-embedding-3-small", {
198
212
  ...EMBEDDINGS_BASE,
199
213
  name: "Text Embedding 3 Small",
@@ -230,6 +244,7 @@ const gptAtomic = {
230
244
  "v5.2": [gpt52, gpt52Chat, gpt52Pro, gpt52Codex],
231
245
  "v5.3": [gpt53Codex, gpt53CodexSpark, gpt53Chat],
232
246
  "v5.4": [gpt54, gpt54Mini, gpt54Nano, gpt54Pro],
247
+ "v5.5": [gpt55, gpt55Pro],
233
248
  codex: [
234
249
  gpt5Codex,
235
250
  gpt51Codex,
@@ -240,7 +255,7 @@ const gptAtomic = {
240
255
  gpt53CodexSpark,
241
256
  ],
242
257
  chat: [gpt51Chat, gpt52Chat, gpt53Chat],
243
- pro: [gpt5Pro, gpt52Pro, gpt54Pro],
258
+ pro: [gpt5Pro, gpt52Pro, gpt54Pro, gpt55Pro],
244
259
  };
245
260
  const gptGroups = {
246
261
  "v5.x": [
@@ -249,6 +264,7 @@ const gptGroups = {
249
264
  ...gptAtomic["v5.2"],
250
265
  ...gptAtomic["v5.3"],
251
266
  ...gptAtomic["v5.4"],
267
+ ...gptAtomic["v5.5"],
252
268
  ],
253
269
  };
254
270
  const textEmbeddingsAtomic = {
@@ -266,7 +282,8 @@ export const gptOss = {
266
282
  export const gpt = {
267
283
  ...gptAtomic,
268
284
  ...gptGroups,
269
- latest: [gpt54, gpt54Mini, gpt54Nano],
285
+ // 5.5 Mini/Nano not released yet; keep 5.4 small variants in `latest` until they ship.
286
+ latest: [gpt55, gpt55Pro, gpt54Mini, gpt54Nano],
270
287
  all: Object.values(gptAtomic).flat(),
271
288
  };
272
289
  export const textEmbeddings = {
@@ -1,5 +1,5 @@
1
1
  import type { ProviderId } from "../providers/types";
2
- export declare const CANONICAL_MODEL_IDS: readonly ["anthropic/claude-opus-4.7", "anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4.5", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-3.7", "anthropic/claude-sonnet-3.5", "anthropic/claude-haiku-3.5", "anthropic/claude-haiku-3", "openai/gpt-oss-20b", "openai/gpt-oss-120b", "openai/gpt-5", "openai/gpt-5-pro", "openai/gpt-5.2", "openai/gpt-5.2-chat", "openai/gpt-5.2-pro", "openai/gpt-5.2-codex", "openai/gpt-5.3-codex", "openai/gpt-5.3-codex-spark", "openai/gpt-5.3-chat", "openai/gpt-5.4", "openai/gpt-5.4-mini", "openai/gpt-5.4-nano", "openai/gpt-5.4-pro", "openai/gpt-5-mini", "openai/gpt-5-nano", "openai/gpt-5-codex", "openai/gpt-5.1-codex", "openai/gpt-5.1-codex-max", "openai/gpt-5.1-codex-mini", "openai/gpt-5.1-chat", "openai/gpt-5.1", "openai/text-embedding-3-small", "openai/text-embedding-3-large", "amazon/nova-micro", "amazon/nova-lite", "amazon/nova-pro", "amazon/nova-premier", "amazon/nova-2-lite", "amazon/nova-2-multimodal-embeddings", "google/gemini-2.5-flash-lite", "google/gemini-2.5-flash", "google/gemini-2.5-pro", "google/gemini-3-flash-preview", "google/gemini-3.1-flash-lite-preview", "google/gemini-3.1-pro-preview", "google/gemini-embedding-2", "google/embedding-001", "google/gemma-3-1b", "google/gemma-3-4b", "google/gemma-3-12b", "google/gemma-3-27b", "google/gemma-4-e2b", "google/gemma-4-e4b", "google/gemma-4-26b-a4b", "google/gemma-4-31b", "meta/llama-3.1-8b", "meta/llama-3.1-70b", "meta/llama-3.1-405b", "meta/llama-3.2-1b", "meta/llama-3.2-3b", "meta/llama-3.2-11b", "meta/llama-3.2-90b", "meta/llama-3.3-70b", "meta/llama-4-scout", "meta/llama-4-maverick", "cohere/embed-v4.0", "cohere/embed-english-v3.0", "cohere/embed-english-light-v3.0", "cohere/embed-multilingual-v3.0", "cohere/embed-multilingual-light-v3.0", "cohere/command-a", "cohere/command-r7b", "cohere/command-a-translate", "cohere/command-a-reasoning", "cohere/command-a-vision", "cohere/command-r", "cohere/command-r-plus", "minimax/m2.5", "minimax/m2.7", "moonshot/kimi-k2.5", "moonshot/kimi-k2.6", "xai/grok-4.1-fast", "xai/grok-4.1-fast-reasoning", "xai/grok-4.2", "xai/grok-4.2-reasoning", "xai/grok-4.2-multi-agent", "deepseek/deepseek-v3.2", "voyage/voyage-2-code", "voyage/voyage-2-law", "voyage/voyage-2-finance", "voyage/voyage-3-code", "voyage/voyage-3-large", "voyage/voyage-3.5-lite", "voyage/voyage-3.5", "voyage/voyage-4-lite", "voyage/voyage-4", "voyage/voyage-4-large", "alibaba/qwen3-235b", "alibaba/qwen3-32b", "alibaba/qwen3.5-plus", "alibaba/qwen3.5-flash", "alibaba/qwen3.5-397b", "alibaba/qwen3.5-122b", "alibaba/qwen3.5-35b", "alibaba/qwen3.5-27b", "alibaba/qwen3.5-9b", "alibaba/qwen3.5-4b", "alibaba/qwen3.5-2b", "alibaba/qwen3.5-0.8b", "alibaba/qwen3.6-plus", "alibaba/qwen3.6-flash", "alibaba/qwen3.6-27b", "alibaba/qwen3.6-max-preview", "alibaba/qwen3-coder-next", "alibaba/qwen3-vl-235b", "alibaba/qwen3-embedding-0.6b", "alibaba/qwen3-embedding-4b", "alibaba/qwen3-embedding-8b", "zhipu/glm-5", "zhipu/glm-5-turbo", "zhipu/glm-5.1"];
2
+ export declare const CANONICAL_MODEL_IDS: readonly ["anthropic/claude-opus-4.7", "anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4.5", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-3.7", "anthropic/claude-sonnet-3.5", "anthropic/claude-haiku-3.5", "anthropic/claude-haiku-3", "openai/gpt-oss-20b", "openai/gpt-oss-120b", "openai/gpt-5", "openai/gpt-5-pro", "openai/gpt-5.2", "openai/gpt-5.2-chat", "openai/gpt-5.2-pro", "openai/gpt-5.2-codex", "openai/gpt-5.3-codex", "openai/gpt-5.3-codex-spark", "openai/gpt-5.3-chat", "openai/gpt-5.4", "openai/gpt-5.4-mini", "openai/gpt-5.4-nano", "openai/gpt-5.4-pro", "openai/gpt-5.5", "openai/gpt-5.5-pro", "openai/gpt-5-mini", "openai/gpt-5-nano", "openai/gpt-5-codex", "openai/gpt-5.1-codex", "openai/gpt-5.1-codex-max", "openai/gpt-5.1-codex-mini", "openai/gpt-5.1-chat", "openai/gpt-5.1", "openai/text-embedding-3-small", "openai/text-embedding-3-large", "amazon/nova-micro", "amazon/nova-lite", "amazon/nova-pro", "amazon/nova-premier", "amazon/nova-2-lite", "amazon/nova-2-multimodal-embeddings", "google/gemini-2.5-flash-lite", "google/gemini-2.5-flash", "google/gemini-2.5-pro", "google/gemini-3-flash-preview", "google/gemini-3.1-flash-lite-preview", "google/gemini-3.1-pro-preview", "google/gemini-embedding-2", "google/embedding-001", "google/gemma-3-1b", "google/gemma-3-4b", "google/gemma-3-12b", "google/gemma-3-27b", "google/gemma-4-e2b", "google/gemma-4-e4b", "google/gemma-4-26b-a4b", "google/gemma-4-31b", "meta/llama-3.1-8b", "meta/llama-3.1-70b", "meta/llama-3.1-405b", "meta/llama-3.2-1b", "meta/llama-3.2-3b", "meta/llama-3.2-11b", "meta/llama-3.2-90b", "meta/llama-3.3-70b", "meta/llama-4-scout", "meta/llama-4-maverick", "cohere/embed-v4.0", "cohere/embed-english-v3.0", "cohere/embed-english-light-v3.0", "cohere/embed-multilingual-v3.0", "cohere/embed-multilingual-light-v3.0", "cohere/command-a", "cohere/command-r7b", "cohere/command-a-translate", "cohere/command-a-reasoning", "cohere/command-a-vision", "cohere/command-r", "cohere/command-r-plus", "minimax/m2.5", "minimax/m2.7", "moonshot/kimi-k2.5", "moonshot/kimi-k2.6", "xai/grok-4.1-fast", "xai/grok-4.1-fast-reasoning", "xai/grok-4.2", "xai/grok-4.2-reasoning", "xai/grok-4.2-multi-agent", "deepseek/deepseek-v3.2", "voyage/voyage-2-code", "voyage/voyage-2-law", "voyage/voyage-2-finance", "voyage/voyage-3-code", "voyage/voyage-3-large", "voyage/voyage-3.5-lite", "voyage/voyage-3.5", "voyage/voyage-4-lite", "voyage/voyage-4", "voyage/voyage-4-large", "alibaba/qwen3-235b", "alibaba/qwen3-32b", "alibaba/qwen3.5-plus", "alibaba/qwen3.5-flash", "alibaba/qwen3.5-397b", "alibaba/qwen3.5-122b", "alibaba/qwen3.5-35b", "alibaba/qwen3.5-27b", "alibaba/qwen3.5-9b", "alibaba/qwen3.5-4b", "alibaba/qwen3.5-2b", "alibaba/qwen3.5-0.8b", "alibaba/qwen3.6-plus", "alibaba/qwen3.6-flash", "alibaba/qwen3.6-27b", "alibaba/qwen3.6-max-preview", "alibaba/qwen3-coder-next", "alibaba/qwen3-vl-235b", "alibaba/qwen3-embedding-0.6b", "alibaba/qwen3-embedding-4b", "alibaba/qwen3-embedding-8b", "zhipu/glm-5", "zhipu/glm-5-turbo", "zhipu/glm-5.1"];
3
3
  export type CanonicalModelId = (typeof CANONICAL_MODEL_IDS)[number];
4
4
  export type ModelId = CanonicalModelId | (string & {});
5
5
  export type CatalogModel = {
@@ -29,6 +29,8 @@ export const CANONICAL_MODEL_IDS = [
29
29
  "openai/gpt-5.4-mini",
30
30
  "openai/gpt-5.4-nano",
31
31
  "openai/gpt-5.4-pro",
32
+ "openai/gpt-5.5",
33
+ "openai/gpt-5.5-pro",
32
34
  "openai/gpt-5-mini",
33
35
  "openai/gpt-5-nano",
34
36
  "openai/gpt-5-codex",
@@ -1,5 +1,6 @@
1
1
  import { metrics } from "@opentelemetry/api";
2
2
  import { STATUS_TEXT } from "../errors/utils";
3
+ import { logger } from "../logger";
3
4
  const getMeter = () => metrics.getMeter("@hebo/gateway");
4
5
  let requestDurationHistogram;
5
6
  let timePerOutputTokenHistogram;
@@ -93,20 +94,59 @@ export const recordTimePerOutputToken = (start, ttft, tokenAttrs, metricAttrs, s
93
94
  return;
94
95
  getTimePerOutputTokenHistogram().record((performance.now() - start - ttft) / 1000 / (outputTokens - 1), metricAttrs);
95
96
  };
97
+ // Partitioning follows OTel semconv PR #3624:
98
+ // https://github.com/open-telemetry/semantic-conventions/pull/3624
99
+ // When a cache or reasoning breakdown is reported, partitioned data points sum
100
+ // to the total and a bare {type} point MUST NOT be emitted alongside them.
96
101
  // FUTURE: record unsuccessful calls
97
102
  export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
98
- if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full"))
103
+ if (signalLevel !== "recommended" && signalLevel !== "full")
99
104
  return;
100
- const record = (value, tokenType) => {
101
- if (typeof value !== "number")
102
- return;
103
- getTokenUsageHistogram().record(value, Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }));
105
+ const histogram = getTokenUsageHistogram();
106
+ const emit = (value, extra) => {
107
+ if (value > 0)
108
+ histogram.record(value, { ...metricAttrs, ...extra });
104
109
  };
105
- record(tokenAttrs["gen_ai.usage.input_tokens"], "input");
106
- record(tokenAttrs["gen_ai.usage.output_tokens"], "output");
107
- // FUTURE: "cached" and "reasoning" token types are not yet in the OTel standard — monitor:
108
- // https://github.com/open-telemetry/semantic-conventions/issues/1959
109
- // https://github.com/open-telemetry/semantic-conventions/issues/3341
110
- record(tokenAttrs["gen_ai.usage.cache_read.input_tokens"], "cached");
111
- record(tokenAttrs["gen_ai.usage.reasoning.output_tokens"], "reasoning");
110
+ emitInputTokens(emit, tokenAttrs);
111
+ emitOutputTokens(emit, tokenAttrs);
112
+ };
113
+ const emitInputTokens = (emit, tokenAttrs) => {
114
+ const total = tokenAttrs["gen_ai.usage.input_tokens"];
115
+ if (total === undefined)
116
+ return;
117
+ const cacheRead = tokenAttrs["gen_ai.usage.cache_read.input_tokens"];
118
+ const cacheCreation = tokenAttrs["gen_ai.usage.cache_creation.input_tokens"];
119
+ if (cacheRead === undefined && cacheCreation === undefined) {
120
+ emit(total, { "gen_ai.token.type": "input" });
121
+ return;
122
+ }
123
+ const read = cacheRead ?? 0;
124
+ const creation = cacheCreation ?? 0;
125
+ let uncached = total - read - creation;
126
+ if (uncached < 0) {
127
+ logger.warn({ inputTokens: total, cacheRead: read, cacheCreation: creation }, "[telemetry] input token cache partitions exceed total; clamping uncached to 0");
128
+ uncached = 0;
129
+ }
130
+ emit(read, { "gen_ai.token.type": "input", "gen_ai.token.cache": "read" });
131
+ emit(creation, { "gen_ai.token.type": "input", "gen_ai.token.cache": "creation" });
132
+ emit(uncached, { "gen_ai.token.type": "input", "gen_ai.token.cache": "uncached" });
133
+ };
134
+ const emitOutputTokens = (emit, tokenAttrs) => {
135
+ const total = tokenAttrs["gen_ai.usage.output_tokens"];
136
+ if (total === undefined)
137
+ return;
138
+ const reasoning = tokenAttrs["gen_ai.usage.reasoning.output_tokens"];
139
+ if (reasoning === undefined) {
140
+ emit(total, { "gen_ai.token.type": "output" });
141
+ return;
142
+ }
143
+ let reasoned = reasoning;
144
+ let nonReasoning = total - reasoning;
145
+ if (nonReasoning < 0) {
146
+ logger.warn({ outputTokens: total, reasoningTokens: reasoning }, "[telemetry] reasoning tokens exceed output total; clamping non-reasoning to 0");
147
+ reasoned = total;
148
+ nonReasoning = 0;
149
+ }
150
+ emit(reasoned, { "gen_ai.token.type": "output", "gen_ai.token.reasoning": true });
151
+ emit(nonReasoning, { "gen_ai.token.type": "output", "gen_ai.token.reasoning": false });
112
152
  };
@@ -31,6 +31,7 @@ export function deepMerge(base, override) {
31
31
  }
32
32
  return out;
33
33
  }
34
+ // oxlint-disable-next-line no-unnecessary-type-parameters
34
35
  export function presetFor() {
35
36
  return function preset(id, base) {
36
37
  return (override) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.11.1",
3
+ "version": "0.11.3",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI /chat/completions, OpenResponses /responses & Anthropic /messages.",
5
5
  "keywords": [
6
6
  "ai",
@@ -252,57 +252,57 @@
252
252
  "fix": "bun lint:staged && bun format:staged"
253
253
  },
254
254
  "dependencies": {
255
- "@ai-sdk/provider": "^3.0.8",
256
- "ai": "^6.0.168",
255
+ "@ai-sdk/provider": "^3.0.9",
256
+ "ai": "^6.0.169",
257
257
  "lru-cache": "^11.3.5",
258
- "uuid": "^13.0.0",
258
+ "uuid": "^14.0.0",
259
259
  "zod": "^4.3.6"
260
260
  },
261
261
  "devDependencies": {
262
- "@ai-sdk/alibaba": "^1.0.17",
263
- "@ai-sdk/amazon-bedrock": "^4.0.96",
264
- "@ai-sdk/anthropic": "^3.0.71",
265
- "@ai-sdk/cohere": "^3.0.30",
266
- "@ai-sdk/deepinfra": "^2.0.45",
267
- "@ai-sdk/deepseek": "^2.0.29",
268
- "@ai-sdk/fireworks": "^2.0.46",
269
- "@ai-sdk/google-vertex": "^4.0.112",
270
- "@ai-sdk/groq": "^3.0.35",
271
- "@ai-sdk/moonshotai": "^2.0.16",
272
- "@ai-sdk/openai": "^3.0.53",
273
- "@ai-sdk/togetherai": "^2.0.45",
274
- "@ai-sdk/xai": "^3.0.83",
275
- "@anthropic-ai/sdk": "^0.88.0",
276
- "@aws-sdk/credential-providers": "^3.1035.0",
262
+ "@ai-sdk/alibaba": "^1.0.18",
263
+ "@ai-sdk/amazon-bedrock": "^4.0.97",
264
+ "@ai-sdk/anthropic": "^3.0.72",
265
+ "@ai-sdk/cohere": "^3.0.31",
266
+ "@ai-sdk/deepinfra": "^2.0.46",
267
+ "@ai-sdk/deepseek": "^2.0.30",
268
+ "@ai-sdk/fireworks": "^2.0.47",
269
+ "@ai-sdk/google-vertex": "^4.0.113",
270
+ "@ai-sdk/groq": "^3.0.36",
271
+ "@ai-sdk/moonshotai": "^2.0.17",
272
+ "@ai-sdk/openai": "^3.0.54",
273
+ "@ai-sdk/togetherai": "^2.0.46",
274
+ "@ai-sdk/xai": "^3.0.84",
275
+ "@anthropic-ai/sdk": "^0.91.1",
276
+ "@aws-sdk/credential-providers": "^3.1038.0",
277
277
  "@langfuse/otel": "^5.2.0",
278
- "@libsql/client": "^0.17.2",
278
+ "@libsql/client": "^0.17.3",
279
279
  "@mjackson/node-fetch-server": "^0.7.0",
280
280
  "@opentelemetry/api": "^1.9.1",
281
281
  "@opentelemetry/context-async-hooks": "^2.7.0",
282
282
  "@opentelemetry/sdk-trace-base": "^2.7.0",
283
- "@tanstack/react-router": "^1.168.23",
284
- "@tanstack/react-start": "^1.167.42",
283
+ "@tanstack/react-router": "^1.168.25",
284
+ "@tanstack/react-start": "^1.167.50",
285
285
  "@types/better-sqlite3": "^7.6.13",
286
- "@types/bun": "1.3.12",
286
+ "@types/bun": "1.3.13",
287
287
  "@types/pg": "^8.20.0",
288
288
  "@types/react": "^19.2.14",
289
289
  "@types/react-dom": "^19.2.3",
290
290
  "@types/uuid": "^11.0.0",
291
291
  "better-sqlite3": "^12.9.0",
292
292
  "elysia": "^1.4.28",
293
- "hono": "^4.12.14",
293
+ "hono": "^4.12.15",
294
294
  "lefthook": "^2.1.6",
295
- "mysql2": "^3.22.2",
295
+ "mysql2": "^3.22.3",
296
296
  "next": "^16.2.4",
297
- "openai": "^6.34.0",
298
- "oxfmt": "^0.44.0",
299
- "oxlint": "^1.61.0",
300
- "oxlint-tsgolint": "^0.20.0",
297
+ "openai": "^6.35.0",
298
+ "oxfmt": "^0.46.0",
299
+ "oxlint": "^1.62.0",
300
+ "oxlint-tsgolint": "^0.22.1",
301
301
  "pg": "^8.20.0",
302
302
  "pino": "^10.3.1",
303
303
  "postgres": "^3.4.9",
304
304
  "typescript": "^6.0.3",
305
- "vite": "^7.3.2",
305
+ "vite": "^8.0.10",
306
306
  "vite-tsconfig-paths": "^6.1.1",
307
307
  "voyage-ai-provider": "^3.0.0",
308
308
  "zhipu-ai-provider": "^0.3.0"
@@ -315,12 +315,12 @@
315
315
  "@ai-sdk/deepinfra": "^2.0.45",
316
316
  "@ai-sdk/deepseek": "^2.0.29",
317
317
  "@ai-sdk/fireworks": "^2.0.46",
318
- "@ai-sdk/google": "^3.0.64",
318
+ "@ai-sdk/google": "^3.0.65",
319
319
  "@ai-sdk/google-vertex": "^4.0.80",
320
320
  "@ai-sdk/groq": "^3.0.29",
321
321
  "@ai-sdk/moonshotai": "^2.0.16",
322
322
  "@ai-sdk/openai": "^3.0.41",
323
- "@ai-sdk/openai-compatible": "^2.0.41",
323
+ "@ai-sdk/openai-compatible": "^2.0.42",
324
324
  "@ai-sdk/togetherai": "^2.0.45",
325
325
  "@ai-sdk/xai": "^3.0.83",
326
326
  "@libsql/client": "^0.14.0",