@hebo-ai/gateway 0.11.2 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/endpoints/chat-completions/otel.js +1 -0
- package/dist/endpoints/messages/converters.js +1 -3
- package/dist/endpoints/messages/otel.js +0 -1
- package/dist/endpoints/messages/schema.d.ts +1 -4
- package/dist/errors/ai-sdk.js +9 -0
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/lifecycle.js +3 -2
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +1 -0
- package/dist/models/xai/middleware.js +6 -1
- package/dist/models/xai/presets.d.ts +36 -23
- package/dist/models/xai/presets.js +9 -1
- package/dist/providers/xai/canonical.js +1 -0
- package/dist/telemetry/gen-ai.js +54 -14
- package/dist/utils/headers.d.ts +1 -0
- package/dist/utils/headers.js +1 -1
- package/package.json +24 -24
package/README.md
CHANGED
|
@@ -251,7 +251,7 @@ Out-of-the-box model presets:
|
|
|
251
251
|
Voyage: `voyage` (`v2`, `v3`, `v3.5`, `v4`, `v2.x`, `v3.x`, `v4.x`, `latest`, `all`)
|
|
252
252
|
|
|
253
253
|
- **xAI** — `@hebo-ai/gateway/models/xai`
|
|
254
|
-
Grok: `grok` (`v4.1`, `v4.2`, `latest`, `all`)
|
|
254
|
+
Grok: `grok` (`v4.1`, `v4.2`, `v4.3`, `latest`, `all`)
|
|
255
255
|
|
|
256
256
|
- **Z.ai** — `@hebo-ai/gateway/models/zai`
|
|
257
257
|
GLM: `glm` (`v5`, `v5.1`, `v5.x`, `latest`, `all`)
|
|
@@ -160,6 +160,7 @@ export const getChatResponseAttributes = (completions, signalLevel) => {
|
|
|
160
160
|
"gen_ai.usage.total_tokens": completions.usage?.total_tokens,
|
|
161
161
|
"gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
|
|
162
162
|
"gen_ai.usage.cache_read.input_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
|
|
163
|
+
"gen_ai.usage.cache_creation.input_tokens": completions.usage?.prompt_tokens_details?.cache_write_tokens,
|
|
163
164
|
"gen_ai.usage.output_tokens": completions.usage?.completion_tokens,
|
|
164
165
|
"gen_ai.usage.reasoning.output_tokens": completions.usage?.completion_tokens_details?.reasoning_tokens,
|
|
165
166
|
});
|
|
@@ -625,14 +625,12 @@ export class MessagesTransformStream extends TransformStream {
|
|
|
625
625
|
}
|
|
626
626
|
case "finish": {
|
|
627
627
|
const stopReason = mapStopReason(part.finishReason);
|
|
628
|
-
const totalOutputTokens = part.totalUsage?.outputTokens ?? 0;
|
|
629
|
-
const totalInputTokens = part.totalUsage?.inputTokens ?? 0;
|
|
630
628
|
controller.enqueue({
|
|
631
629
|
event: "message_delta",
|
|
632
630
|
data: {
|
|
633
631
|
type: "message_delta",
|
|
634
632
|
delta: { stop_reason: stopReason, stop_sequence: null },
|
|
635
|
-
usage:
|
|
633
|
+
usage: mapUsage(part.totalUsage),
|
|
636
634
|
},
|
|
637
635
|
});
|
|
638
636
|
controller.enqueue({
|
|
@@ -102,7 +102,6 @@ export const getMessagesRequestAttributes = (body, signalLevel) => {
|
|
|
102
102
|
if (signalLevel !== "required") {
|
|
103
103
|
Object.assign(attrs, {
|
|
104
104
|
"gen_ai.request.stream": body.stream,
|
|
105
|
-
"gen_ai.request.service_tier": body.service_tier,
|
|
106
105
|
"gen_ai.request.max_tokens": body.max_tokens,
|
|
107
106
|
"gen_ai.request.temperature": body.temperature,
|
|
108
107
|
"gen_ai.request.top_p": body.top_p,
|
|
@@ -647,10 +647,7 @@ export type MessageDeltaEvent = SseFrame<{
|
|
|
647
647
|
stop_reason: MessagesStopReason;
|
|
648
648
|
stop_sequence: string | null;
|
|
649
649
|
};
|
|
650
|
-
usage:
|
|
651
|
-
output_tokens: number;
|
|
652
|
-
input_tokens?: number;
|
|
653
|
-
};
|
|
650
|
+
usage: MessagesUsage;
|
|
654
651
|
}, "message_delta">;
|
|
655
652
|
export type MessageStopEvent = SseFrame<{
|
|
656
653
|
type: "message_stop";
|
package/dist/errors/ai-sdk.js
CHANGED
|
@@ -6,10 +6,19 @@ const normalizeApiCallError = (error) => {
|
|
|
6
6
|
const statusText = `UPSTREAM_${STATUS_TEXT(status)}`;
|
|
7
7
|
return new GatewayError(error, status, statusText, undefined, error.responseHeaders ?? undefined);
|
|
8
8
|
};
|
|
9
|
+
// `AbortError` / `TimeoutError` (raised by the AI SDK's internal `timeout` controller,
|
|
10
|
+
// `AbortSignal.timeout`, or an aborted upstream `fetch`) reach us as plain DOMExceptions
|
|
11
|
+
// that none of the AI SDK error classes match. Treat them as upstream gateway timeouts
|
|
12
|
+
// so they surface as 504 with retry headers rather than defaulting to 500/502.
|
|
13
|
+
// Inbound client disconnects are caught earlier in `lifecycle.ts` and overridden to 499.
|
|
14
|
+
const isUpstreamAbortError = (error) => error instanceof Error && (error.name === "AbortError" || error.name === "TimeoutError");
|
|
9
15
|
export const normalizeAiSdkError = (error) => {
|
|
10
16
|
if (APICallError.isInstance(error)) {
|
|
11
17
|
return normalizeApiCallError(error);
|
|
12
18
|
}
|
|
19
|
+
if (isUpstreamAbortError(error)) {
|
|
20
|
+
return new GatewayError(error, 504, `UPSTREAM_${STATUS_TEXT(504)}`);
|
|
21
|
+
}
|
|
13
22
|
if (RetryError.isInstance(error)) {
|
|
14
23
|
if (APICallError.isInstance(error.lastError)) {
|
|
15
24
|
return normalizeApiCallError(error.lastError);
|
package/dist/index.d.ts
CHANGED
|
@@ -10,4 +10,4 @@ export * from "./models/catalog";
|
|
|
10
10
|
export * from "./models/types";
|
|
11
11
|
export * from "./providers/registry";
|
|
12
12
|
export * from "./providers/types";
|
|
13
|
-
export { FORWARD_HEADER_ALLOWLIST } from "./utils";
|
|
13
|
+
export { FORWARD_HEADER_ALLOWLIST, RESPONSE_HEADER_ALLOWLIST } from "./utils";
|
package/dist/index.js
CHANGED
|
@@ -9,4 +9,4 @@ export * from "./models/catalog";
|
|
|
9
9
|
export * from "./models/types";
|
|
10
10
|
export * from "./providers/registry";
|
|
11
11
|
export * from "./providers/types";
|
|
12
|
-
export { FORWARD_HEADER_ALLOWLIST } from "./utils";
|
|
12
|
+
export { FORWARD_HEADER_ALLOWLIST, RESPONSE_HEADER_ALLOWLIST } from "./utils";
|
package/dist/lifecycle.js
CHANGED
|
@@ -50,11 +50,12 @@ export const winterCgHandler = (run, config) => {
|
|
|
50
50
|
else if (status === 200 && ctx.response?.status)
|
|
51
51
|
realStatus = ctx.response.status;
|
|
52
52
|
if (realStatus !== 200) {
|
|
53
|
+
const err = reason ?? ctx.request.signal.reason;
|
|
53
54
|
logger[realStatus >= 500 ? "error" : "warn"]({
|
|
54
55
|
requestId: ctx.requestId,
|
|
55
|
-
err
|
|
56
|
+
err,
|
|
56
57
|
});
|
|
57
|
-
span.recordError(
|
|
58
|
+
span.recordError(err, true);
|
|
58
59
|
}
|
|
59
60
|
span.setAttributes({ "http.response.status_code_effective": realStatus });
|
|
60
61
|
if (ctx.operation === "chat" ||
|
package/dist/models/types.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { ProviderId } from "../providers/types";
|
|
2
|
-
export declare const CANONICAL_MODEL_IDS: readonly ["anthropic/claude-opus-4.7", "anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4.5", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-3.7", "anthropic/claude-sonnet-3.5", "anthropic/claude-haiku-3.5", "anthropic/claude-haiku-3", "openai/gpt-oss-20b", "openai/gpt-oss-120b", "openai/gpt-5", "openai/gpt-5-pro", "openai/gpt-5.2", "openai/gpt-5.2-chat", "openai/gpt-5.2-pro", "openai/gpt-5.2-codex", "openai/gpt-5.3-codex", "openai/gpt-5.3-codex-spark", "openai/gpt-5.3-chat", "openai/gpt-5.4", "openai/gpt-5.4-mini", "openai/gpt-5.4-nano", "openai/gpt-5.4-pro", "openai/gpt-5.5", "openai/gpt-5.5-pro", "openai/gpt-5-mini", "openai/gpt-5-nano", "openai/gpt-5-codex", "openai/gpt-5.1-codex", "openai/gpt-5.1-codex-max", "openai/gpt-5.1-codex-mini", "openai/gpt-5.1-chat", "openai/gpt-5.1", "openai/text-embedding-3-small", "openai/text-embedding-3-large", "amazon/nova-micro", "amazon/nova-lite", "amazon/nova-pro", "amazon/nova-premier", "amazon/nova-2-lite", "amazon/nova-2-multimodal-embeddings", "google/gemini-2.5-flash-lite", "google/gemini-2.5-flash", "google/gemini-2.5-pro", "google/gemini-3-flash-preview", "google/gemini-3.1-flash-lite-preview", "google/gemini-3.1-pro-preview", "google/gemini-embedding-2", "google/embedding-001", "google/gemma-3-1b", "google/gemma-3-4b", "google/gemma-3-12b", "google/gemma-3-27b", "google/gemma-4-e2b", "google/gemma-4-e4b", "google/gemma-4-26b-a4b", "google/gemma-4-31b", "meta/llama-3.1-8b", "meta/llama-3.1-70b", "meta/llama-3.1-405b", "meta/llama-3.2-1b", "meta/llama-3.2-3b", "meta/llama-3.2-11b", "meta/llama-3.2-90b", "meta/llama-3.3-70b", "meta/llama-4-scout", "meta/llama-4-maverick", "cohere/embed-v4.0", "cohere/embed-english-v3.0", "cohere/embed-english-light-v3.0", "cohere/embed-multilingual-v3.0", "cohere/embed-multilingual-light-v3.0", "cohere/command-a", "cohere/command-r7b", "cohere/command-a-translate", "cohere/command-a-reasoning", "cohere/command-a-vision", "cohere/command-r", "cohere/command-r-plus", "minimax/m2.5", "minimax/m2.7", "moonshot/kimi-k2.5", "moonshot/kimi-k2.6", "xai/grok-4.1-fast", "xai/grok-4.1-fast-reasoning", "xai/grok-4.2", "xai/grok-4.2-reasoning", "xai/grok-4.2-multi-agent", "deepseek/deepseek-v3.2", "voyage/voyage-2-code", "voyage/voyage-2-law", "voyage/voyage-2-finance", "voyage/voyage-3-code", "voyage/voyage-3-large", "voyage/voyage-3.5-lite", "voyage/voyage-3.5", "voyage/voyage-4-lite", "voyage/voyage-4", "voyage/voyage-4-large", "alibaba/qwen3-235b", "alibaba/qwen3-32b", "alibaba/qwen3.5-plus", "alibaba/qwen3.5-flash", "alibaba/qwen3.5-397b", "alibaba/qwen3.5-122b", "alibaba/qwen3.5-35b", "alibaba/qwen3.5-27b", "alibaba/qwen3.5-9b", "alibaba/qwen3.5-4b", "alibaba/qwen3.5-2b", "alibaba/qwen3.5-0.8b", "alibaba/qwen3.6-plus", "alibaba/qwen3.6-flash", "alibaba/qwen3.6-27b", "alibaba/qwen3.6-max-preview", "alibaba/qwen3-coder-next", "alibaba/qwen3-vl-235b", "alibaba/qwen3-embedding-0.6b", "alibaba/qwen3-embedding-4b", "alibaba/qwen3-embedding-8b", "zhipu/glm-5", "zhipu/glm-5-turbo", "zhipu/glm-5.1"];
|
|
2
|
+
export declare const CANONICAL_MODEL_IDS: readonly ["anthropic/claude-opus-4.7", "anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "anthropic/claude-haiku-4.5", "anthropic/claude-sonnet-4.5", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-3.7", "anthropic/claude-sonnet-3.5", "anthropic/claude-haiku-3.5", "anthropic/claude-haiku-3", "openai/gpt-oss-20b", "openai/gpt-oss-120b", "openai/gpt-5", "openai/gpt-5-pro", "openai/gpt-5.2", "openai/gpt-5.2-chat", "openai/gpt-5.2-pro", "openai/gpt-5.2-codex", "openai/gpt-5.3-codex", "openai/gpt-5.3-codex-spark", "openai/gpt-5.3-chat", "openai/gpt-5.4", "openai/gpt-5.4-mini", "openai/gpt-5.4-nano", "openai/gpt-5.4-pro", "openai/gpt-5.5", "openai/gpt-5.5-pro", "openai/gpt-5-mini", "openai/gpt-5-nano", "openai/gpt-5-codex", "openai/gpt-5.1-codex", "openai/gpt-5.1-codex-max", "openai/gpt-5.1-codex-mini", "openai/gpt-5.1-chat", "openai/gpt-5.1", "openai/text-embedding-3-small", "openai/text-embedding-3-large", "amazon/nova-micro", "amazon/nova-lite", "amazon/nova-pro", "amazon/nova-premier", "amazon/nova-2-lite", "amazon/nova-2-multimodal-embeddings", "google/gemini-2.5-flash-lite", "google/gemini-2.5-flash", "google/gemini-2.5-pro", "google/gemini-3-flash-preview", "google/gemini-3.1-flash-lite-preview", "google/gemini-3.1-pro-preview", "google/gemini-embedding-2", "google/embedding-001", "google/gemma-3-1b", "google/gemma-3-4b", "google/gemma-3-12b", "google/gemma-3-27b", "google/gemma-4-e2b", "google/gemma-4-e4b", "google/gemma-4-26b-a4b", "google/gemma-4-31b", "meta/llama-3.1-8b", "meta/llama-3.1-70b", "meta/llama-3.1-405b", "meta/llama-3.2-1b", "meta/llama-3.2-3b", "meta/llama-3.2-11b", "meta/llama-3.2-90b", "meta/llama-3.3-70b", "meta/llama-4-scout", "meta/llama-4-maverick", "cohere/embed-v4.0", "cohere/embed-english-v3.0", "cohere/embed-english-light-v3.0", "cohere/embed-multilingual-v3.0", "cohere/embed-multilingual-light-v3.0", "cohere/command-a", "cohere/command-r7b", "cohere/command-a-translate", "cohere/command-a-reasoning", "cohere/command-a-vision", "cohere/command-r", "cohere/command-r-plus", "minimax/m2.5", "minimax/m2.7", "moonshot/kimi-k2.5", "moonshot/kimi-k2.6", "xai/grok-4.1-fast", "xai/grok-4.1-fast-reasoning", "xai/grok-4.2", "xai/grok-4.2-reasoning", "xai/grok-4.2-multi-agent", "xai/grok-4.3", "deepseek/deepseek-v3.2", "voyage/voyage-2-code", "voyage/voyage-2-law", "voyage/voyage-2-finance", "voyage/voyage-3-code", "voyage/voyage-3-large", "voyage/voyage-3.5-lite", "voyage/voyage-3.5", "voyage/voyage-4-lite", "voyage/voyage-4", "voyage/voyage-4-large", "alibaba/qwen3-235b", "alibaba/qwen3-32b", "alibaba/qwen3.5-plus", "alibaba/qwen3.5-flash", "alibaba/qwen3.5-397b", "alibaba/qwen3.5-122b", "alibaba/qwen3.5-35b", "alibaba/qwen3.5-27b", "alibaba/qwen3.5-9b", "alibaba/qwen3.5-4b", "alibaba/qwen3.5-2b", "alibaba/qwen3.5-0.8b", "alibaba/qwen3.6-plus", "alibaba/qwen3.6-flash", "alibaba/qwen3.6-27b", "alibaba/qwen3.6-max-preview", "alibaba/qwen3-coder-next", "alibaba/qwen3-vl-235b", "alibaba/qwen3-embedding-0.6b", "alibaba/qwen3-embedding-4b", "alibaba/qwen3-embedding-8b", "zhipu/glm-5", "zhipu/glm-5-turbo", "zhipu/glm-5.1"];
|
|
3
3
|
export type CanonicalModelId = (typeof CANONICAL_MODEL_IDS)[number];
|
|
4
4
|
export type ModelId = CanonicalModelId | (string & {});
|
|
5
5
|
export type CatalogModel = {
|
package/dist/models/types.js
CHANGED
|
@@ -34,4 +34,9 @@ export const xaiReasoningMiddleware = {
|
|
|
34
34
|
return params;
|
|
35
35
|
},
|
|
36
36
|
};
|
|
37
|
-
modelMiddlewareMatcher.useForModel([
|
|
37
|
+
modelMiddlewareMatcher.useForModel([
|
|
38
|
+
"xai/grok-4.1-fast-reasoning",
|
|
39
|
+
"xai/grok-4.2-reasoning",
|
|
40
|
+
"xai/grok-4.2-multi-agent",
|
|
41
|
+
"xai/grok-4.3",
|
|
42
|
+
], { language: [xaiReasoningMiddleware] });
|
|
@@ -59,40 +59,30 @@ export declare const grok42MultiAgent: import("../../utils").Preset<"xai/grok-4.
|
|
|
59
59
|
providers: readonly ["xai"];
|
|
60
60
|
context: number;
|
|
61
61
|
}>;
|
|
62
|
+
export declare const grok43: import("../../utils").Preset<"xai/grok-4.3", CatalogModel, {
|
|
63
|
+
name: string;
|
|
64
|
+
created: string;
|
|
65
|
+
knowledge: string;
|
|
66
|
+
context: number;
|
|
67
|
+
capabilities: readonly ["tool_call", "structured_output", "reasoning", "temperature"];
|
|
68
|
+
modalities: {
|
|
69
|
+
input: readonly ["text", "image"];
|
|
70
|
+
output: readonly ["text"];
|
|
71
|
+
};
|
|
72
|
+
providers: readonly ["xai"];
|
|
73
|
+
}>;
|
|
62
74
|
export declare const grok: {
|
|
63
|
-
readonly latest: readonly [import("../../utils").Preset<"xai/grok-4.
|
|
75
|
+
readonly latest: readonly [import("../../utils").Preset<"xai/grok-4.3", CatalogModel, {
|
|
64
76
|
name: string;
|
|
65
77
|
created: string;
|
|
66
78
|
knowledge: string;
|
|
67
|
-
modalities: {
|
|
68
|
-
input: readonly ["text", "image"];
|
|
69
|
-
output: readonly ["text"];
|
|
70
|
-
};
|
|
71
|
-
capabilities: readonly ["tool_call", "structured_output", "temperature"];
|
|
72
|
-
providers: readonly ["xai"];
|
|
73
|
-
context: number;
|
|
74
|
-
}>, import("../../utils").Preset<"xai/grok-4.2-reasoning", CatalogModel, {
|
|
75
|
-
name: string;
|
|
76
|
-
created: string;
|
|
77
|
-
knowledge: string;
|
|
78
|
-
capabilities: readonly ["tool_call", "structured_output", "reasoning", "temperature"];
|
|
79
|
-
modalities: {
|
|
80
|
-
input: readonly ["text", "image"];
|
|
81
|
-
output: readonly ["text"];
|
|
82
|
-
};
|
|
83
|
-
providers: readonly ["xai"];
|
|
84
79
|
context: number;
|
|
85
|
-
}>, import("../../utils").Preset<"xai/grok-4.2-multi-agent", CatalogModel, {
|
|
86
|
-
name: string;
|
|
87
|
-
created: string;
|
|
88
|
-
knowledge: string;
|
|
89
80
|
capabilities: readonly ["tool_call", "structured_output", "reasoning", "temperature"];
|
|
90
81
|
modalities: {
|
|
91
82
|
input: readonly ["text", "image"];
|
|
92
83
|
output: readonly ["text"];
|
|
93
84
|
};
|
|
94
85
|
providers: readonly ["xai"];
|
|
95
|
-
context: number;
|
|
96
86
|
}>];
|
|
97
87
|
readonly all: (import("../../utils").Preset<"xai/grok-4.1-fast", CatalogModel, {
|
|
98
88
|
name: string;
|
|
@@ -149,6 +139,17 @@ export declare const grok: {
|
|
|
149
139
|
};
|
|
150
140
|
providers: readonly ["xai"];
|
|
151
141
|
context: number;
|
|
142
|
+
}> | import("../../utils").Preset<"xai/grok-4.3", CatalogModel, {
|
|
143
|
+
name: string;
|
|
144
|
+
created: string;
|
|
145
|
+
knowledge: string;
|
|
146
|
+
context: number;
|
|
147
|
+
capabilities: readonly ["tool_call", "structured_output", "reasoning", "temperature"];
|
|
148
|
+
modalities: {
|
|
149
|
+
input: readonly ["text", "image"];
|
|
150
|
+
output: readonly ["text"];
|
|
151
|
+
};
|
|
152
|
+
providers: readonly ["xai"];
|
|
152
153
|
}>)[];
|
|
153
154
|
readonly "v4.1": readonly [import("../../utils").Preset<"xai/grok-4.1-fast", CatalogModel, {
|
|
154
155
|
name: string;
|
|
@@ -207,4 +208,16 @@ export declare const grok: {
|
|
|
207
208
|
providers: readonly ["xai"];
|
|
208
209
|
context: number;
|
|
209
210
|
}>];
|
|
211
|
+
readonly "v4.3": readonly [import("../../utils").Preset<"xai/grok-4.3", CatalogModel, {
|
|
212
|
+
name: string;
|
|
213
|
+
created: string;
|
|
214
|
+
knowledge: string;
|
|
215
|
+
context: number;
|
|
216
|
+
capabilities: readonly ["tool_call", "structured_output", "reasoning", "temperature"];
|
|
217
|
+
modalities: {
|
|
218
|
+
input: readonly ["text", "image"];
|
|
219
|
+
output: readonly ["text"];
|
|
220
|
+
};
|
|
221
|
+
providers: readonly ["xai"];
|
|
222
|
+
}>];
|
|
210
223
|
};
|
|
@@ -42,14 +42,22 @@ export const grok42MultiAgent = presetFor()("xai/grok-4.2-multi-agent", {
|
|
|
42
42
|
created: "2026-03-16",
|
|
43
43
|
knowledge: "2024-11",
|
|
44
44
|
});
|
|
45
|
+
export const grok43 = presetFor()("xai/grok-4.3", {
|
|
46
|
+
...GROK_REASONING_BASE,
|
|
47
|
+
name: "Grok 4.3",
|
|
48
|
+
created: "2026-05-01",
|
|
49
|
+
knowledge: "2024-11",
|
|
50
|
+
context: 1000000,
|
|
51
|
+
});
|
|
45
52
|
const grokAtomic = {
|
|
46
53
|
"v4.1": [grok41Fast, grok41FastReasoning],
|
|
47
54
|
"v4.2": [grok42, grok42Reasoning, grok42MultiAgent],
|
|
55
|
+
"v4.3": [grok43],
|
|
48
56
|
};
|
|
49
57
|
const grokGroups = {};
|
|
50
58
|
export const grok = {
|
|
51
59
|
...grokAtomic,
|
|
52
60
|
...grokGroups,
|
|
53
|
-
latest: [
|
|
61
|
+
latest: [grok43],
|
|
54
62
|
all: Object.values(grokAtomic).flat(),
|
|
55
63
|
};
|
|
@@ -5,6 +5,7 @@ const MAPPING = {
|
|
|
5
5
|
"xai/grok-4.2": "grok-4.20-0309-non-reasoning",
|
|
6
6
|
"xai/grok-4.2-reasoning": "grok-4.20-0309-reasoning",
|
|
7
7
|
"xai/grok-4.2-multi-agent": "grok-4.20-multi-agent-0309",
|
|
8
|
+
"xai/grok-4.3": "grok-4.3",
|
|
8
9
|
};
|
|
9
10
|
export const withCanonicalIdsForXai = (provider, extraMapping) => withCanonicalIds(provider, {
|
|
10
11
|
mapping: { ...MAPPING, ...extraMapping },
|
package/dist/telemetry/gen-ai.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { metrics } from "@opentelemetry/api";
|
|
2
2
|
import { STATUS_TEXT } from "../errors/utils";
|
|
3
|
+
import { logger } from "../logger";
|
|
3
4
|
const getMeter = () => metrics.getMeter("@hebo/gateway");
|
|
4
5
|
let requestDurationHistogram;
|
|
5
6
|
let timePerOutputTokenHistogram;
|
|
@@ -54,14 +55,14 @@ export const getGenAiGeneralAttributes = (ctx, signalLevel) => {
|
|
|
54
55
|
if (!signalLevel || signalLevel === "off")
|
|
55
56
|
return {};
|
|
56
57
|
const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
|
|
57
|
-
const serviceTier = typeof ctx.body?.service_tier === "string" ? ctx.body.service_tier :
|
|
58
|
+
const serviceTier = typeof ctx.body?.service_tier === "string" ? ctx.body.service_tier : "auto";
|
|
58
59
|
const attrs = {
|
|
59
60
|
"gen_ai.operation.name": ctx.operation,
|
|
60
61
|
"gen_ai.request.model": requestModel,
|
|
61
62
|
"gen_ai.response.model": ctx.resolvedModelId,
|
|
62
63
|
"gen_ai.provider.name": ctx.resolvedProviderId,
|
|
63
64
|
};
|
|
64
|
-
if (signalLevel !== "required"
|
|
65
|
+
if (signalLevel !== "required") {
|
|
65
66
|
attrs["gen_ai.request.service_tier"] = serviceTier;
|
|
66
67
|
}
|
|
67
68
|
for (const [key, value] of Object.entries(ctx.otel)) {
|
|
@@ -93,20 +94,59 @@ export const recordTimePerOutputToken = (start, ttft, tokenAttrs, metricAttrs, s
|
|
|
93
94
|
return;
|
|
94
95
|
getTimePerOutputTokenHistogram().record((performance.now() - start - ttft) / 1000 / (outputTokens - 1), metricAttrs);
|
|
95
96
|
};
|
|
97
|
+
// Partitioning follows OTel semconv PR #3624:
|
|
98
|
+
// https://github.com/open-telemetry/semantic-conventions/pull/3624
|
|
99
|
+
// When a cache or reasoning breakdown is reported, partitioned data points sum
|
|
100
|
+
// to the total and a bare {type} point MUST NOT be emitted alongside them.
|
|
96
101
|
// FUTURE: record unsuccessful calls
|
|
97
102
|
export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
|
|
98
|
-
if (
|
|
103
|
+
if (signalLevel !== "recommended" && signalLevel !== "full")
|
|
99
104
|
return;
|
|
100
|
-
const
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
105
|
+
const histogram = getTokenUsageHistogram();
|
|
106
|
+
const emit = (value, extra) => {
|
|
107
|
+
if (value > 0)
|
|
108
|
+
histogram.record(value, { ...metricAttrs, ...extra });
|
|
104
109
|
};
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
110
|
+
emitInputTokens(emit, tokenAttrs);
|
|
111
|
+
emitOutputTokens(emit, tokenAttrs);
|
|
112
|
+
};
|
|
113
|
+
const emitInputTokens = (emit, tokenAttrs) => {
|
|
114
|
+
const total = tokenAttrs["gen_ai.usage.input_tokens"];
|
|
115
|
+
if (total === undefined)
|
|
116
|
+
return;
|
|
117
|
+
const cacheRead = tokenAttrs["gen_ai.usage.cache_read.input_tokens"];
|
|
118
|
+
const cacheCreation = tokenAttrs["gen_ai.usage.cache_creation.input_tokens"];
|
|
119
|
+
if (cacheRead === undefined && cacheCreation === undefined) {
|
|
120
|
+
emit(total, { "gen_ai.token.type": "input" });
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
const read = cacheRead ?? 0;
|
|
124
|
+
const creation = cacheCreation ?? 0;
|
|
125
|
+
let uncached = total - read - creation;
|
|
126
|
+
if (uncached < 0) {
|
|
127
|
+
logger.warn({ inputTokens: total, cacheRead: read, cacheCreation: creation }, "[telemetry] input token cache partitions exceed total; clamping uncached to 0");
|
|
128
|
+
uncached = 0;
|
|
129
|
+
}
|
|
130
|
+
emit(read, { "gen_ai.token.type": "input", "gen_ai.token.cache": "read" });
|
|
131
|
+
emit(creation, { "gen_ai.token.type": "input", "gen_ai.token.cache": "creation" });
|
|
132
|
+
emit(uncached, { "gen_ai.token.type": "input", "gen_ai.token.cache": "uncached" });
|
|
133
|
+
};
|
|
134
|
+
const emitOutputTokens = (emit, tokenAttrs) => {
|
|
135
|
+
const total = tokenAttrs["gen_ai.usage.output_tokens"];
|
|
136
|
+
if (total === undefined)
|
|
137
|
+
return;
|
|
138
|
+
const reasoning = tokenAttrs["gen_ai.usage.reasoning.output_tokens"];
|
|
139
|
+
if (reasoning === undefined) {
|
|
140
|
+
emit(total, { "gen_ai.token.type": "output" });
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
let reasoned = reasoning;
|
|
144
|
+
let nonReasoning = total - reasoning;
|
|
145
|
+
if (nonReasoning < 0) {
|
|
146
|
+
logger.warn({ outputTokens: total, reasoningTokens: reasoning }, "[telemetry] reasoning tokens exceed output total; clamping non-reasoning to 0");
|
|
147
|
+
reasoned = total;
|
|
148
|
+
nonReasoning = 0;
|
|
149
|
+
}
|
|
150
|
+
emit(reasoned, { "gen_ai.token.type": "output", "gen_ai.token.reasoning": true });
|
|
151
|
+
emit(nonReasoning, { "gen_ai.token.type": "output", "gen_ai.token.reasoning": false });
|
|
112
152
|
};
|
package/dist/utils/headers.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ export declare const REQUEST_ID_HEADER = "x-request-id";
|
|
|
2
2
|
export declare const RETRY_AFTER_HEADER = "retry-after";
|
|
3
3
|
export declare const RETRY_AFTER_MS_HEADER = "retry-after-ms";
|
|
4
4
|
export declare const X_SHOULD_RETRY_HEADER = "x-should-retry";
|
|
5
|
+
export declare const RESPONSE_HEADER_ALLOWLIST: readonly ["retry-after", "retry-after-ms", "x-should-retry"];
|
|
5
6
|
type HeaderSource = Request | ResponseInit | undefined;
|
|
6
7
|
export declare const resolveRequestId: (source: HeaderSource) => string | undefined;
|
|
7
8
|
export declare const filterResponseHeaders: (upstream?: HeadersInit) => Record<string, string>;
|
package/dist/utils/headers.js
CHANGED
|
@@ -2,7 +2,7 @@ export const REQUEST_ID_HEADER = "x-request-id";
|
|
|
2
2
|
export const RETRY_AFTER_HEADER = "retry-after";
|
|
3
3
|
export const RETRY_AFTER_MS_HEADER = "retry-after-ms";
|
|
4
4
|
export const X_SHOULD_RETRY_HEADER = "x-should-retry";
|
|
5
|
-
const RESPONSE_HEADER_ALLOWLIST = [
|
|
5
|
+
export const RESPONSE_HEADER_ALLOWLIST = [
|
|
6
6
|
RETRY_AFTER_HEADER,
|
|
7
7
|
RETRY_AFTER_MS_HEADER,
|
|
8
8
|
X_SHOULD_RETRY_HEADER,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hebo-ai/gateway",
|
|
3
|
-
"version": "0.11.
|
|
3
|
+
"version": "0.11.4",
|
|
4
4
|
"description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI /chat/completions, OpenResponses /responses & Anthropic /messages.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -252,36 +252,36 @@
|
|
|
252
252
|
"fix": "bun lint:staged && bun format:staged"
|
|
253
253
|
},
|
|
254
254
|
"dependencies": {
|
|
255
|
-
"@ai-sdk/provider": "^3.0.
|
|
256
|
-
"ai": "^6.0.
|
|
255
|
+
"@ai-sdk/provider": "^3.0.9",
|
|
256
|
+
"ai": "^6.0.169",
|
|
257
257
|
"lru-cache": "^11.3.5",
|
|
258
258
|
"uuid": "^14.0.0",
|
|
259
259
|
"zod": "^4.3.6"
|
|
260
260
|
},
|
|
261
261
|
"devDependencies": {
|
|
262
|
-
"@ai-sdk/alibaba": "^1.0.
|
|
263
|
-
"@ai-sdk/amazon-bedrock": "^4.0.
|
|
264
|
-
"@ai-sdk/anthropic": "^3.0.
|
|
265
|
-
"@ai-sdk/cohere": "^3.0.
|
|
266
|
-
"@ai-sdk/deepinfra": "^2.0.
|
|
267
|
-
"@ai-sdk/deepseek": "^2.0.
|
|
268
|
-
"@ai-sdk/fireworks": "^2.0.
|
|
269
|
-
"@ai-sdk/google-vertex": "^4.0.
|
|
270
|
-
"@ai-sdk/groq": "^3.0.
|
|
271
|
-
"@ai-sdk/moonshotai": "^2.0.
|
|
272
|
-
"@ai-sdk/openai": "^3.0.
|
|
273
|
-
"@ai-sdk/togetherai": "^2.0.
|
|
274
|
-
"@ai-sdk/xai": "^3.0.
|
|
262
|
+
"@ai-sdk/alibaba": "^1.0.18",
|
|
263
|
+
"@ai-sdk/amazon-bedrock": "^4.0.97",
|
|
264
|
+
"@ai-sdk/anthropic": "^3.0.72",
|
|
265
|
+
"@ai-sdk/cohere": "^3.0.31",
|
|
266
|
+
"@ai-sdk/deepinfra": "^2.0.46",
|
|
267
|
+
"@ai-sdk/deepseek": "^2.0.30",
|
|
268
|
+
"@ai-sdk/fireworks": "^2.0.47",
|
|
269
|
+
"@ai-sdk/google-vertex": "^4.0.113",
|
|
270
|
+
"@ai-sdk/groq": "^3.0.36",
|
|
271
|
+
"@ai-sdk/moonshotai": "^2.0.17",
|
|
272
|
+
"@ai-sdk/openai": "^3.0.54",
|
|
273
|
+
"@ai-sdk/togetherai": "^2.0.46",
|
|
274
|
+
"@ai-sdk/xai": "^3.0.84",
|
|
275
275
|
"@anthropic-ai/sdk": "^0.91.1",
|
|
276
|
-
"@aws-sdk/credential-providers": "^3.
|
|
276
|
+
"@aws-sdk/credential-providers": "^3.1038.0",
|
|
277
277
|
"@langfuse/otel": "^5.2.0",
|
|
278
278
|
"@libsql/client": "^0.17.3",
|
|
279
279
|
"@mjackson/node-fetch-server": "^0.7.0",
|
|
280
280
|
"@opentelemetry/api": "^1.9.1",
|
|
281
281
|
"@opentelemetry/context-async-hooks": "^2.7.0",
|
|
282
282
|
"@opentelemetry/sdk-trace-base": "^2.7.0",
|
|
283
|
-
"@tanstack/react-router": "^1.168.
|
|
284
|
-
"@tanstack/react-start": "^1.167.
|
|
283
|
+
"@tanstack/react-router": "^1.168.25",
|
|
284
|
+
"@tanstack/react-start": "^1.167.50",
|
|
285
285
|
"@types/better-sqlite3": "^7.6.13",
|
|
286
286
|
"@types/bun": "1.3.13",
|
|
287
287
|
"@types/pg": "^8.20.0",
|
|
@@ -294,10 +294,10 @@
|
|
|
294
294
|
"lefthook": "^2.1.6",
|
|
295
295
|
"mysql2": "^3.22.3",
|
|
296
296
|
"next": "^16.2.4",
|
|
297
|
-
"openai": "^6.
|
|
297
|
+
"openai": "^6.35.0",
|
|
298
298
|
"oxfmt": "^0.46.0",
|
|
299
|
-
"oxlint": "^1.
|
|
300
|
-
"oxlint-tsgolint": "^0.22.
|
|
299
|
+
"oxlint": "^1.62.0",
|
|
300
|
+
"oxlint-tsgolint": "^0.22.1",
|
|
301
301
|
"pg": "^8.20.0",
|
|
302
302
|
"pino": "^10.3.1",
|
|
303
303
|
"postgres": "^3.4.9",
|
|
@@ -315,12 +315,12 @@
|
|
|
315
315
|
"@ai-sdk/deepinfra": "^2.0.45",
|
|
316
316
|
"@ai-sdk/deepseek": "^2.0.29",
|
|
317
317
|
"@ai-sdk/fireworks": "^2.0.46",
|
|
318
|
-
"@ai-sdk/google": "^3.0.
|
|
318
|
+
"@ai-sdk/google": "^3.0.65",
|
|
319
319
|
"@ai-sdk/google-vertex": "^4.0.80",
|
|
320
320
|
"@ai-sdk/groq": "^3.0.29",
|
|
321
321
|
"@ai-sdk/moonshotai": "^2.0.16",
|
|
322
322
|
"@ai-sdk/openai": "^3.0.41",
|
|
323
|
-
"@ai-sdk/openai-compatible": "^2.0.
|
|
323
|
+
"@ai-sdk/openai-compatible": "^2.0.42",
|
|
324
324
|
"@ai-sdk/togetherai": "^2.0.45",
|
|
325
325
|
"@ai-sdk/xai": "^3.0.83",
|
|
326
326
|
"@libsql/client": "^0.14.0",
|