@hebo-ai/gateway 0.6.2-rc0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/endpoints/chat-completions/converters.js +26 -21
- package/dist/endpoints/chat-completions/handler.js +2 -0
- package/dist/endpoints/chat-completions/otel.js +1 -1
- package/dist/endpoints/chat-completions/schema.d.ts +4 -18
- package/dist/endpoints/chat-completions/schema.js +14 -17
- package/dist/endpoints/embeddings/handler.js +2 -0
- package/dist/endpoints/embeddings/otel.js +5 -0
- package/dist/endpoints/embeddings/schema.d.ts +6 -0
- package/dist/endpoints/embeddings/schema.js +4 -1
- package/dist/endpoints/models/converters.js +3 -3
- package/dist/lifecycle.js +2 -2
- package/dist/logger/default.js +3 -3
- package/dist/logger/index.d.ts +2 -5
- package/dist/middleware/common.js +1 -0
- package/dist/middleware/utils.js +0 -3
- package/dist/models/amazon/middleware.js +8 -5
- package/dist/models/anthropic/middleware.js +13 -13
- package/dist/models/catalog.js +5 -1
- package/dist/models/cohere/middleware.js +7 -5
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/models/google/middleware.js +29 -25
- package/dist/models/openai/middleware.js +13 -9
- package/dist/models/voyage/middleware.js +2 -1
- package/dist/providers/bedrock/middleware.js +21 -23
- package/dist/providers/registry.js +3 -0
- package/dist/telemetry/fetch.js +7 -2
- package/dist/telemetry/gen-ai.js +15 -12
- package/dist/telemetry/memory.d.ts +1 -1
- package/dist/telemetry/memory.js +30 -14
- package/dist/telemetry/span.js +1 -1
- package/dist/telemetry/stream.js +30 -23
- package/dist/utils/env.js +4 -2
- package/dist/utils/preset.js +1 -0
- package/dist/utils/response.js +3 -1
- package/package.json +36 -50
- package/src/config.ts +0 -98
- package/src/endpoints/chat-completions/converters.test.ts +0 -631
- package/src/endpoints/chat-completions/converters.ts +0 -899
- package/src/endpoints/chat-completions/handler.test.ts +0 -391
- package/src/endpoints/chat-completions/handler.ts +0 -201
- package/src/endpoints/chat-completions/index.ts +0 -4
- package/src/endpoints/chat-completions/otel.test.ts +0 -315
- package/src/endpoints/chat-completions/otel.ts +0 -214
- package/src/endpoints/chat-completions/schema.ts +0 -364
- package/src/endpoints/embeddings/converters.ts +0 -51
- package/src/endpoints/embeddings/handler.test.ts +0 -133
- package/src/endpoints/embeddings/handler.ts +0 -137
- package/src/endpoints/embeddings/index.ts +0 -4
- package/src/endpoints/embeddings/otel.ts +0 -40
- package/src/endpoints/embeddings/schema.ts +0 -36
- package/src/endpoints/models/converters.ts +0 -56
- package/src/endpoints/models/handler.test.ts +0 -122
- package/src/endpoints/models/handler.ts +0 -37
- package/src/endpoints/models/index.ts +0 -3
- package/src/endpoints/models/schema.ts +0 -37
- package/src/errors/ai-sdk.ts +0 -99
- package/src/errors/gateway.ts +0 -17
- package/src/errors/openai.ts +0 -57
- package/src/errors/utils.ts +0 -47
- package/src/gateway.ts +0 -50
- package/src/index.ts +0 -19
- package/src/lifecycle.ts +0 -135
- package/src/logger/default.ts +0 -105
- package/src/logger/index.ts +0 -42
- package/src/middleware/common.test.ts +0 -215
- package/src/middleware/common.ts +0 -163
- package/src/middleware/debug.ts +0 -37
- package/src/middleware/matcher.ts +0 -161
- package/src/middleware/utils.ts +0 -34
- package/src/models/amazon/index.ts +0 -2
- package/src/models/amazon/middleware.test.ts +0 -133
- package/src/models/amazon/middleware.ts +0 -79
- package/src/models/amazon/presets.ts +0 -104
- package/src/models/anthropic/index.ts +0 -2
- package/src/models/anthropic/middleware.test.ts +0 -643
- package/src/models/anthropic/middleware.ts +0 -148
- package/src/models/anthropic/presets.ts +0 -191
- package/src/models/catalog.ts +0 -13
- package/src/models/cohere/index.ts +0 -2
- package/src/models/cohere/middleware.test.ts +0 -138
- package/src/models/cohere/middleware.ts +0 -76
- package/src/models/cohere/presets.ts +0 -186
- package/src/models/google/index.ts +0 -2
- package/src/models/google/middleware.test.ts +0 -298
- package/src/models/google/middleware.ts +0 -137
- package/src/models/google/presets.ts +0 -118
- package/src/models/meta/index.ts +0 -1
- package/src/models/meta/presets.ts +0 -143
- package/src/models/openai/index.ts +0 -2
- package/src/models/openai/middleware.test.ts +0 -189
- package/src/models/openai/middleware.ts +0 -103
- package/src/models/openai/presets.ts +0 -280
- package/src/models/types.ts +0 -114
- package/src/models/voyage/index.ts +0 -2
- package/src/models/voyage/middleware.test.ts +0 -28
- package/src/models/voyage/middleware.ts +0 -23
- package/src/models/voyage/presets.ts +0 -126
- package/src/providers/anthropic/canonical.ts +0 -17
- package/src/providers/anthropic/index.ts +0 -1
- package/src/providers/bedrock/canonical.ts +0 -87
- package/src/providers/bedrock/index.ts +0 -2
- package/src/providers/bedrock/middleware.test.ts +0 -303
- package/src/providers/bedrock/middleware.ts +0 -128
- package/src/providers/cohere/canonical.ts +0 -26
- package/src/providers/cohere/index.ts +0 -1
- package/src/providers/groq/canonical.ts +0 -21
- package/src/providers/groq/index.ts +0 -1
- package/src/providers/openai/canonical.ts +0 -16
- package/src/providers/openai/index.ts +0 -1
- package/src/providers/registry.test.ts +0 -44
- package/src/providers/registry.ts +0 -165
- package/src/providers/types.ts +0 -20
- package/src/providers/vertex/canonical.ts +0 -17
- package/src/providers/vertex/index.ts +0 -1
- package/src/providers/voyage/canonical.ts +0 -16
- package/src/providers/voyage/index.ts +0 -1
- package/src/telemetry/ai-sdk.ts +0 -46
- package/src/telemetry/baggage.ts +0 -27
- package/src/telemetry/fetch.ts +0 -62
- package/src/telemetry/gen-ai.ts +0 -113
- package/src/telemetry/http.ts +0 -62
- package/src/telemetry/index.ts +0 -1
- package/src/telemetry/memory.ts +0 -36
- package/src/telemetry/span.ts +0 -85
- package/src/telemetry/stream.ts +0 -64
- package/src/types.ts +0 -223
- package/src/utils/env.ts +0 -7
- package/src/utils/headers.ts +0 -27
- package/src/utils/preset.ts +0 -65
- package/src/utils/request.test.ts +0 -75
- package/src/utils/request.ts +0 -52
- package/src/utils/response.ts +0 -84
- package/src/utils/url.ts +0 -26
|
@@ -11,14 +11,15 @@ export const geminiDimensionsMiddleware = {
|
|
|
11
11
|
const dimensions = unknown["dimensions"];
|
|
12
12
|
if (!dimensions)
|
|
13
13
|
return params;
|
|
14
|
-
(params.providerOptions["google"] ??= {})
|
|
14
|
+
const target = (params.providerOptions["google"] ??= {});
|
|
15
|
+
target.outputDimensionality = dimensions;
|
|
15
16
|
delete unknown["dimensions"];
|
|
16
17
|
return params;
|
|
17
18
|
},
|
|
18
19
|
};
|
|
19
20
|
// https://ai.google.dev/gemini-api/docs/thinking#thinking-levels
|
|
20
21
|
export function mapGeminiReasoningEffort(effort, modelId) {
|
|
21
|
-
if (modelId.includes("
|
|
22
|
+
if (modelId.includes("pro")) {
|
|
22
23
|
switch (effort) {
|
|
23
24
|
case "none":
|
|
24
25
|
case "minimal":
|
|
@@ -28,26 +29,22 @@ export function mapGeminiReasoningEffort(effort, modelId) {
|
|
|
28
29
|
return "medium";
|
|
29
30
|
case "high":
|
|
30
31
|
case "xhigh":
|
|
31
|
-
case "max":
|
|
32
32
|
return "high";
|
|
33
33
|
}
|
|
34
34
|
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
return "high";
|
|
48
|
-
}
|
|
35
|
+
// Flash
|
|
36
|
+
switch (effort) {
|
|
37
|
+
case "none":
|
|
38
|
+
case "minimal":
|
|
39
|
+
return "minimal";
|
|
40
|
+
case "low":
|
|
41
|
+
return "low";
|
|
42
|
+
case "medium":
|
|
43
|
+
return "medium";
|
|
44
|
+
case "high":
|
|
45
|
+
case "xhigh":
|
|
46
|
+
return "high";
|
|
49
47
|
}
|
|
50
|
-
return effort;
|
|
51
48
|
}
|
|
52
49
|
export const GEMINI_DEFAULT_MAX_OUTPUT_TOKENS = 65536;
|
|
53
50
|
export const GEMINI_2_5_PRO_MIN_THINKING_BUDGET = 128;
|
|
@@ -58,6 +55,9 @@ export const geminiReasoningMiddleware = {
|
|
|
58
55
|
const unknown = params.providerOptions?.["unknown"];
|
|
59
56
|
if (!unknown)
|
|
60
57
|
return params;
|
|
58
|
+
// If thinking options exist, just pass through
|
|
59
|
+
if (unknown["thinking_config"])
|
|
60
|
+
return params;
|
|
61
61
|
const reasoning = unknown["reasoning"];
|
|
62
62
|
if (!reasoning)
|
|
63
63
|
return params;
|
|
@@ -65,19 +65,19 @@ export const geminiReasoningMiddleware = {
|
|
|
65
65
|
const modelId = model.modelId;
|
|
66
66
|
if (modelId.includes("gemini-2")) {
|
|
67
67
|
const is25Pro = modelId.includes("gemini-2.5-pro");
|
|
68
|
-
target
|
|
68
|
+
target.thinkingConfig = {
|
|
69
69
|
thinkingBudget: reasoning.max_tokens ??
|
|
70
70
|
calculateReasoningBudgetFromEffort(reasoning.effort ?? "none", params.maxOutputTokens ?? GEMINI_DEFAULT_MAX_OUTPUT_TOKENS, is25Pro ? GEMINI_2_5_PRO_MIN_THINKING_BUDGET : 0),
|
|
71
71
|
};
|
|
72
72
|
}
|
|
73
73
|
else if (modelId.includes("gemini-3") && reasoning.effort) {
|
|
74
|
-
target
|
|
74
|
+
target.thinkingConfig = {
|
|
75
75
|
thinkingLevel: mapGeminiReasoningEffort(reasoning.effort, modelId),
|
|
76
76
|
};
|
|
77
77
|
// FUTURE: warn if model is gemini-3 and max_tokens (unsupported) was ignored
|
|
78
78
|
}
|
|
79
|
-
(target
|
|
80
|
-
|
|
79
|
+
const thinkingConfig = (target.thinkingConfig ??= {});
|
|
80
|
+
thinkingConfig.includeThoughts = reasoning.enabled ? !reasoning.exclude : false;
|
|
81
81
|
delete unknown["reasoning"];
|
|
82
82
|
return params;
|
|
83
83
|
},
|
|
@@ -91,9 +91,13 @@ export const geminiPromptCachingMiddleware = {
|
|
|
91
91
|
const unknown = params.providerOptions?.["unknown"];
|
|
92
92
|
if (!unknown)
|
|
93
93
|
return params;
|
|
94
|
-
|
|
95
|
-
if (
|
|
96
|
-
|
|
94
|
+
// If cached_content options exist, just pass through
|
|
95
|
+
if (unknown["cached_content"])
|
|
96
|
+
return params;
|
|
97
|
+
const promptCacheKey = unknown["prompt_cache_key"];
|
|
98
|
+
if (promptCacheKey) {
|
|
99
|
+
(params.providerOptions["google"] ??= {}).cachedContent =
|
|
100
|
+
promptCacheKey;
|
|
97
101
|
}
|
|
98
102
|
delete unknown["cached_content"];
|
|
99
103
|
return params;
|
|
@@ -10,21 +10,25 @@ export const openAIDimensionsMiddleware = {
|
|
|
10
10
|
const dimensions = unknown["dimensions"];
|
|
11
11
|
if (!dimensions)
|
|
12
12
|
return params;
|
|
13
|
-
(params.providerOptions["openai"] ??= {})
|
|
13
|
+
const target = (params.providerOptions["openai"] ??= {});
|
|
14
|
+
target.dimensions = dimensions;
|
|
14
15
|
delete unknown["dimensions"];
|
|
15
16
|
return params;
|
|
16
17
|
},
|
|
17
18
|
};
|
|
18
19
|
function mapGptOssReasoningEffort(effort) {
|
|
19
20
|
switch (effort) {
|
|
21
|
+
case undefined:
|
|
22
|
+
case "none":
|
|
23
|
+
return;
|
|
24
|
+
case "minimal":
|
|
25
|
+
case "low":
|
|
26
|
+
return "low";
|
|
20
27
|
case "medium":
|
|
21
28
|
return "medium";
|
|
22
29
|
case "high":
|
|
23
30
|
case "xhigh":
|
|
24
|
-
case "max":
|
|
25
31
|
return "high";
|
|
26
|
-
default:
|
|
27
|
-
return "low";
|
|
28
32
|
}
|
|
29
33
|
}
|
|
30
34
|
export const openAIReasoningMiddleware = {
|
|
@@ -41,13 +45,13 @@ export const openAIReasoningMiddleware = {
|
|
|
41
45
|
const isGptOss = model.modelId.includes("gpt-oss");
|
|
42
46
|
if (isGptOss) {
|
|
43
47
|
// FUTURE: warn that unable to disable reasoning for gpt-oss models
|
|
44
|
-
target
|
|
48
|
+
target.reasoningEffort = mapGptOssReasoningEffort(reasoning.effort);
|
|
45
49
|
}
|
|
46
50
|
else if (reasoning.enabled === false) {
|
|
47
|
-
target
|
|
51
|
+
target.reasoningEffort = "none";
|
|
48
52
|
}
|
|
49
53
|
else if (reasoning.effort) {
|
|
50
|
-
target
|
|
54
|
+
target.reasoningEffort = reasoning.effort;
|
|
51
55
|
}
|
|
52
56
|
// FUTURE: warn that reasoning.max_tokens (not supported) was ignored
|
|
53
57
|
delete unknown["reasoning"];
|
|
@@ -67,9 +71,9 @@ export const openAIPromptCachingMiddleware = {
|
|
|
67
71
|
if (key || retention) {
|
|
68
72
|
const target = (params.providerOptions["openai"] ??= {});
|
|
69
73
|
if (key)
|
|
70
|
-
target
|
|
74
|
+
target.promptCacheKey = key;
|
|
71
75
|
if (retention)
|
|
72
|
-
target
|
|
76
|
+
target.promptCacheRetention = retention;
|
|
73
77
|
}
|
|
74
78
|
delete unknown["prompt_cache_key"];
|
|
75
79
|
delete unknown["prompt_cache_retention"];
|
|
@@ -10,7 +10,8 @@ export const voyageDimensionsMiddleware = {
|
|
|
10
10
|
const dimensions = unknown["dimensions"];
|
|
11
11
|
if (!dimensions)
|
|
12
12
|
return params;
|
|
13
|
-
(params.providerOptions["voyage"] ??= {})
|
|
13
|
+
const target = (params.providerOptions["voyage"] ??= {});
|
|
14
|
+
target.outputDimension = dimensions;
|
|
14
15
|
delete unknown["dimensions"];
|
|
15
16
|
return params;
|
|
16
17
|
},
|
|
@@ -7,14 +7,15 @@ export const bedrockGptReasoningMiddleware = {
|
|
|
7
7
|
if (!model.modelId.includes("gpt"))
|
|
8
8
|
return params;
|
|
9
9
|
const bedrock = params.providerOptions?.["bedrock"];
|
|
10
|
-
if (!bedrock
|
|
10
|
+
if (!bedrock)
|
|
11
11
|
return params;
|
|
12
|
-
const effort = bedrock
|
|
12
|
+
const effort = bedrock.reasoningEffort;
|
|
13
13
|
if (effort === undefined)
|
|
14
14
|
return params;
|
|
15
|
-
const target = (bedrock
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
const target = (bedrock.reasoningConfig ??= {});
|
|
16
|
+
// @ts-expect-error AI SDK does accept this
|
|
17
|
+
target.maxReasoningEffort = effort;
|
|
18
|
+
delete bedrock.reasoningEffort;
|
|
18
19
|
return params;
|
|
19
20
|
},
|
|
20
21
|
};
|
|
@@ -25,28 +26,25 @@ export const bedrockClaudeReasoningMiddleware = {
|
|
|
25
26
|
if (!model.modelId.includes("claude"))
|
|
26
27
|
return params;
|
|
27
28
|
const bedrock = params.providerOptions?.["bedrock"];
|
|
28
|
-
if (!bedrock
|
|
29
|
+
if (!bedrock)
|
|
29
30
|
return params;
|
|
30
|
-
const thinking = bedrock
|
|
31
|
-
const effort = bedrock
|
|
31
|
+
const thinking = bedrock.thinking;
|
|
32
|
+
const effort = bedrock.effort;
|
|
32
33
|
if (!thinking && effort === undefined)
|
|
33
34
|
return params;
|
|
34
|
-
const target = (bedrock
|
|
35
|
+
const target = (bedrock.reasoningConfig ??= {});
|
|
35
36
|
if (thinking && typeof thinking === "object") {
|
|
36
|
-
|
|
37
|
-
if (
|
|
38
|
-
target
|
|
39
|
-
}
|
|
40
|
-
if (thinkingOptions["budgetTokens"] !== undefined) {
|
|
41
|
-
target["budgetTokens"] = thinkingOptions["budgetTokens"];
|
|
37
|
+
target.type = thinking.type;
|
|
38
|
+
if ("budgetTokens" in thinking && thinking.budgetTokens !== undefined) {
|
|
39
|
+
target.budgetTokens = thinking.budgetTokens;
|
|
42
40
|
}
|
|
43
41
|
}
|
|
44
42
|
// FUTURE: bedrock currently does not support "effort" for other 4.x models
|
|
45
43
|
if (effort !== undefined && isClaude46(model.modelId)) {
|
|
46
|
-
target
|
|
44
|
+
target.maxReasoningEffort = effort;
|
|
47
45
|
}
|
|
48
|
-
delete bedrock
|
|
49
|
-
delete bedrock
|
|
46
|
+
delete bedrock.thinking;
|
|
47
|
+
delete bedrock.effort;
|
|
50
48
|
return params;
|
|
51
49
|
},
|
|
52
50
|
};
|
|
@@ -79,18 +77,18 @@ export const bedrockPromptCachingMiddleware = {
|
|
|
79
77
|
delete entryBedrock["cacheControl"];
|
|
80
78
|
};
|
|
81
79
|
for (const message of params.prompt) {
|
|
82
|
-
processCacheControl(message
|
|
83
|
-
if (!Array.isArray(message
|
|
80
|
+
processCacheControl(message.providerOptions);
|
|
81
|
+
if (!Array.isArray(message.content))
|
|
84
82
|
continue;
|
|
85
|
-
for (const part of message
|
|
86
|
-
processCacheControl(part
|
|
83
|
+
for (const part of message.content) {
|
|
84
|
+
processCacheControl(part.providerOptions);
|
|
87
85
|
}
|
|
88
86
|
lastCacheableBlock = message;
|
|
89
87
|
}
|
|
90
88
|
const bedrock = params.providerOptions?.["bedrock"];
|
|
91
89
|
const cacheControl = bedrock?.["cacheControl"];
|
|
92
90
|
if (cacheControl && !hasExplicitCacheControl && lastCacheableBlock) {
|
|
93
|
-
((lastCacheableBlock
|
|
91
|
+
((lastCacheableBlock.providerOptions ??= {})["bedrock"] ??= {})["cachePoint"] =
|
|
94
92
|
toBedrockCachePoint(model.modelId, cacheControl);
|
|
95
93
|
}
|
|
96
94
|
delete bedrock?.["cacheControl"];
|
|
@@ -55,7 +55,9 @@ export const withCanonicalIds = (provider, config = {}) => {
|
|
|
55
55
|
};
|
|
56
56
|
const needsFallbackWrap = stripNamespace || normalizeDelimiters || namespaceSeparator !== "/" || !!prefix || !!postfix;
|
|
57
57
|
// FUTURE: use embeddingModel instead of textEmbeddingModel once voyage supports it
|
|
58
|
+
// oxlint-disable-next-line unbound-method
|
|
58
59
|
const languageModel = provider.languageModel;
|
|
60
|
+
// oxlint-disable-next-line unbound-method, no-deprecated
|
|
59
61
|
const embeddingModel = provider.textEmbeddingModel;
|
|
60
62
|
const fallbackProvider = needsFallbackWrap
|
|
61
63
|
? {
|
|
@@ -69,6 +71,7 @@ export const withCanonicalIds = (provider, config = {}) => {
|
|
|
69
71
|
embeddingModel: (id) => {
|
|
70
72
|
const mapped = applyFallbackAffixes(normalizeId(id));
|
|
71
73
|
logger.debug(`[canonical] mapped ${id} to ${mapped}`);
|
|
74
|
+
// oxlint-disable-next-line no-deprecated
|
|
72
75
|
return embeddingModel(mapped);
|
|
73
76
|
},
|
|
74
77
|
}
|
package/dist/telemetry/fetch.js
CHANGED
|
@@ -16,8 +16,13 @@ const getRequestAttributes = (input, init) => {
|
|
|
16
16
|
attrs["url.full"] = input.url;
|
|
17
17
|
return attrs;
|
|
18
18
|
};
|
|
19
|
-
const shouldTraceFetch = (init) =>
|
|
20
|
-
init
|
|
19
|
+
const shouldTraceFetch = (init) => {
|
|
20
|
+
const h = init?.headers;
|
|
21
|
+
if (!h || typeof h !== "object" || Array.isArray(h) || h instanceof Headers)
|
|
22
|
+
return false;
|
|
23
|
+
const ua = h["user-agent"];
|
|
24
|
+
return typeof ua === "string" && ua.includes("ai-sdk/provider-utils");
|
|
25
|
+
};
|
|
21
26
|
const otelFetch = (input, init) => {
|
|
22
27
|
const original = g[ORIGINAL_FETCH_KEY];
|
|
23
28
|
if (!fetchTracingEnabled)
|
package/dist/telemetry/gen-ai.js
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import { metrics } from "@opentelemetry/api";
|
|
2
2
|
import { STATUS_CODE } from "../errors/utils";
|
|
3
|
-
const
|
|
4
|
-
|
|
3
|
+
const getMeter = () => metrics.getMeter("@hebo/gateway");
|
|
4
|
+
let requestDurationHistogram;
|
|
5
|
+
let timePerOutputTokenHistogram;
|
|
6
|
+
let tokenUsageHistogram;
|
|
7
|
+
const getRequestDurationHistogram = () => (requestDurationHistogram ??= getMeter().createHistogram("gen_ai.server.request.duration", {
|
|
5
8
|
description: "End-to-end gateway request duration",
|
|
6
9
|
unit: "s",
|
|
7
10
|
advice: {
|
|
@@ -9,8 +12,8 @@ const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.du
|
|
|
9
12
|
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 240,
|
|
10
13
|
],
|
|
11
14
|
},
|
|
12
|
-
});
|
|
13
|
-
const
|
|
15
|
+
}));
|
|
16
|
+
const getTimePerOutputTokenHistogram = () => (timePerOutputTokenHistogram ??= getMeter().createHistogram("gen_ai.server.time_per_output_token", {
|
|
14
17
|
description: "End-to-end gateway request duration per output token",
|
|
15
18
|
unit: "s",
|
|
16
19
|
advice: {
|
|
@@ -18,17 +21,17 @@ const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_pe
|
|
|
18
21
|
0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5,
|
|
19
22
|
],
|
|
20
23
|
},
|
|
21
|
-
});
|
|
22
|
-
const
|
|
24
|
+
}));
|
|
25
|
+
const getTokenUsageHistogram = () => (tokenUsageHistogram ??= getMeter().createHistogram("gen_ai.client.token.usage", {
|
|
23
26
|
description: "Token usage reported by upstream model responses",
|
|
24
27
|
unit: "{token}",
|
|
25
28
|
advice: {
|
|
26
29
|
explicitBucketBoundaries: [
|
|
27
|
-
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072,
|
|
28
|
-
524288, 1048576,
|
|
30
|
+
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072,
|
|
31
|
+
262144, 524288, 1048576,
|
|
29
32
|
],
|
|
30
33
|
},
|
|
31
|
-
});
|
|
34
|
+
}));
|
|
32
35
|
export const getGenAiGeneralAttributes = (ctx, signalLevel) => {
|
|
33
36
|
if (!signalLevel || signalLevel === "off")
|
|
34
37
|
return {};
|
|
@@ -47,7 +50,7 @@ export const recordRequestDuration = (duration, status, ctx, signalLevel) => {
|
|
|
47
50
|
if (status !== 200) {
|
|
48
51
|
attrs["error.type"] = `${status} ${STATUS_CODE(status).toLowerCase()}`;
|
|
49
52
|
}
|
|
50
|
-
|
|
53
|
+
getRequestDurationHistogram().record(duration / 1000, attrs);
|
|
51
54
|
};
|
|
52
55
|
// FUTURE: record unsuccessful calls
|
|
53
56
|
export const recordTimePerOutputToken = (start, tokenAttrs, metricAttrs, signalLevel) => {
|
|
@@ -56,7 +59,7 @@ export const recordTimePerOutputToken = (start, tokenAttrs, metricAttrs, signalL
|
|
|
56
59
|
const outputTokens = tokenAttrs["gen_ai.usage.output_tokens"];
|
|
57
60
|
if (typeof outputTokens !== "number" || outputTokens <= 0)
|
|
58
61
|
return;
|
|
59
|
-
|
|
62
|
+
getTimePerOutputTokenHistogram().record((performance.now() - start) / 1000 / outputTokens, metricAttrs);
|
|
60
63
|
};
|
|
61
64
|
// FUTURE: record unsuccessful calls
|
|
62
65
|
export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
|
|
@@ -65,7 +68,7 @@ export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
|
|
|
65
68
|
const record = (value, tokenType) => {
|
|
66
69
|
if (typeof value !== "number")
|
|
67
70
|
return;
|
|
68
|
-
|
|
71
|
+
getTokenUsageHistogram().record(value, Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }));
|
|
69
72
|
};
|
|
70
73
|
record(tokenAttrs["gen_ai.usage.input_tokens"], "input");
|
|
71
74
|
record(tokenAttrs["gen_ai.usage.output_tokens"], "output");
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import type { TelemetrySignalLevel } from "../types";
|
|
2
|
-
export declare const
|
|
2
|
+
export declare const observeV8jsMemoryMetrics: (level?: TelemetrySignalLevel) => void;
|
package/dist/telemetry/memory.js
CHANGED
|
@@ -1,18 +1,9 @@
|
|
|
1
1
|
import { metrics } from "@opentelemetry/api";
|
|
2
|
-
const
|
|
2
|
+
const getMeter = () => metrics.getMeter("@hebo/gateway");
|
|
3
3
|
const defaultHeapSpaceAttrs = { "v8js.heap.space.name": "total" };
|
|
4
|
-
|
|
5
|
-
description: "Used bytes in the V8 heap",
|
|
6
|
-
unit: "By",
|
|
7
|
-
});
|
|
8
|
-
const heapSpacePhysicalSizeCounter = meter.createUpDownCounter("v8js.memory.heap.space.physical_size", {
|
|
9
|
-
description: "Physical bytes allocated for the V8 heap space",
|
|
10
|
-
unit: "By",
|
|
11
|
-
});
|
|
4
|
+
let registered = false;
|
|
12
5
|
const isEnabled = (level) => level === "recommended" || level === "full";
|
|
13
|
-
|
|
14
|
-
if (!isEnabled(level))
|
|
15
|
-
return;
|
|
6
|
+
const observeMemory = (observe) => {
|
|
16
7
|
let usage;
|
|
17
8
|
try {
|
|
18
9
|
usage = globalThis.process?.memoryUsage?.();
|
|
@@ -22,6 +13,31 @@ export const recordV8jsMemory = (level) => {
|
|
|
22
13
|
}
|
|
23
14
|
if (!usage)
|
|
24
15
|
return;
|
|
25
|
-
|
|
26
|
-
|
|
16
|
+
observe(usage.heapUsed, usage.rss);
|
|
17
|
+
};
|
|
18
|
+
export const observeV8jsMemoryMetrics = (level) => {
|
|
19
|
+
if (!isEnabled(level) || registered)
|
|
20
|
+
return;
|
|
21
|
+
registered = true;
|
|
22
|
+
const meter = getMeter();
|
|
23
|
+
meter
|
|
24
|
+
.createObservableGauge("v8js.memory.heap.used", {
|
|
25
|
+
description: "Used bytes in the V8 heap",
|
|
26
|
+
unit: "By",
|
|
27
|
+
})
|
|
28
|
+
.addCallback((result) => {
|
|
29
|
+
observeMemory((heapUsed) => {
|
|
30
|
+
result.observe(heapUsed, defaultHeapSpaceAttrs);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
meter
|
|
34
|
+
.createObservableGauge("v8js.memory.heap.space.physical_size", {
|
|
35
|
+
description: "Physical bytes allocated for the V8 heap space",
|
|
36
|
+
unit: "By",
|
|
37
|
+
})
|
|
38
|
+
.addCallback((result) => {
|
|
39
|
+
observeMemory((_, rss) => {
|
|
40
|
+
result.observe(rss, defaultHeapSpaceAttrs);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
27
43
|
};
|
package/dist/telemetry/span.js
CHANGED
package/dist/telemetry/stream.js
CHANGED
|
@@ -1,51 +1,58 @@
|
|
|
1
1
|
import { toOpenAIError } from "../errors/openai";
|
|
2
|
-
const isErrorChunk = (v) => v instanceof Error ||
|
|
2
|
+
const isErrorChunk = (v) => v instanceof Error || (typeof v === "object" && v !== null && "error" in v);
|
|
3
3
|
export const wrapStream = (src, hooks) => {
|
|
4
4
|
let finished = false;
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
let reader;
|
|
6
|
+
const done = (controller, status, reason) => {
|
|
7
|
+
if (finished)
|
|
8
|
+
return;
|
|
9
|
+
finished = true;
|
|
10
|
+
hooks.onDone?.(status, reason);
|
|
11
|
+
if (status !== 200) {
|
|
12
|
+
reader?.cancel(reason).catch(() => { });
|
|
13
|
+
}
|
|
14
|
+
try {
|
|
15
|
+
controller.close();
|
|
9
16
|
}
|
|
10
|
-
|
|
11
|
-
controller.close();
|
|
17
|
+
catch { }
|
|
12
18
|
};
|
|
13
19
|
return new ReadableStream({
|
|
14
20
|
async start(controller) {
|
|
15
|
-
|
|
21
|
+
reader = src.getReader();
|
|
16
22
|
try {
|
|
17
23
|
for (;;) {
|
|
18
|
-
// oxlint-disable-next-line no-await-in-loop
|
|
24
|
+
// oxlint-disable-next-line no-await-in-loop, no-unsafe-assignment
|
|
19
25
|
const { value, done: eof } = await reader.read();
|
|
20
26
|
if (eof)
|
|
21
27
|
break;
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
const status = out.error?.type === "invalid_request_error" ? 422 : 502;
|
|
26
|
-
done(reader, controller, status, value);
|
|
28
|
+
controller.enqueue(value);
|
|
29
|
+
if (isErrorChunk(value)) {
|
|
30
|
+
done(controller, toOpenAIError(value).error.type === "invalid_request_error" ? 422 : 502, value);
|
|
27
31
|
return;
|
|
28
32
|
}
|
|
29
33
|
}
|
|
30
|
-
done(
|
|
34
|
+
done(controller, 200);
|
|
31
35
|
}
|
|
32
36
|
catch (err) {
|
|
33
|
-
|
|
34
|
-
|
|
37
|
+
try {
|
|
38
|
+
controller.enqueue(toOpenAIError(err));
|
|
39
|
+
}
|
|
40
|
+
catch { }
|
|
41
|
+
done(controller, 502, err);
|
|
35
42
|
}
|
|
36
43
|
finally {
|
|
37
44
|
try {
|
|
38
|
-
reader
|
|
45
|
+
reader?.releaseLock();
|
|
39
46
|
}
|
|
40
47
|
catch { }
|
|
41
48
|
}
|
|
42
49
|
},
|
|
43
50
|
cancel(reason) {
|
|
44
|
-
if (
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
51
|
+
if (finished)
|
|
52
|
+
return;
|
|
53
|
+
finished = true;
|
|
54
|
+
hooks.onDone?.(499, reason);
|
|
55
|
+
reader?.cancel(reason).catch(() => { });
|
|
49
56
|
},
|
|
50
57
|
});
|
|
51
58
|
};
|
package/dist/utils/env.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
const NODE_ENV = typeof process === "undefined"
|
|
2
|
-
?
|
|
3
|
-
|
|
2
|
+
? // oxlint-disable-next-line no-unsafe-member-access
|
|
3
|
+
(globalThis.NODE_ENV ?? globalThis.ENV?.NODE_ENV)
|
|
4
|
+
: // oxlint-disable-next-line no-unsafe-assignment
|
|
5
|
+
process.env?.NODE_ENV;
|
|
4
6
|
export const isProduction = () => NODE_ENV === "production";
|
|
5
7
|
export const isTest = () => NODE_ENV === "test";
|
package/dist/utils/preset.js
CHANGED
package/dist/utils/response.js
CHANGED
|
@@ -19,7 +19,9 @@ export const mergeResponseInit = (defaultHeaders, responseInit) => {
|
|
|
19
19
|
const headers = new Headers(defaultHeaders);
|
|
20
20
|
const override = responseInit?.headers;
|
|
21
21
|
if (override) {
|
|
22
|
-
new Headers(override).forEach((value, key) =>
|
|
22
|
+
new Headers(override).forEach((value, key) => {
|
|
23
|
+
headers.set(key, value);
|
|
24
|
+
});
|
|
23
25
|
}
|
|
24
26
|
if (!responseInit)
|
|
25
27
|
return { headers };
|