@hebo-ai/gateway 0.6.1 → 0.6.2-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/endpoints/chat-completions/converters.js +7 -3
- package/dist/endpoints/chat-completions/handler.js +2 -0
- package/dist/endpoints/chat-completions/otel.js +1 -1
- package/dist/endpoints/chat-completions/schema.d.ts +10 -2
- package/dist/endpoints/chat-completions/schema.js +11 -1
- package/dist/endpoints/embeddings/handler.js +2 -0
- package/dist/endpoints/models/converters.js +3 -3
- package/dist/lifecycle.js +2 -2
- package/dist/logger/default.js +3 -3
- package/dist/logger/index.d.ts +2 -5
- package/dist/middleware/common.js +1 -0
- package/dist/middleware/utils.js +0 -2
- package/dist/models/amazon/middleware.js +2 -0
- package/dist/models/catalog.js +5 -1
- package/dist/models/openai/middleware.js +6 -2
- package/dist/providers/registry.js +3 -0
- package/dist/telemetry/fetch.js +7 -2
- package/dist/telemetry/gen-ai.js +15 -12
- package/dist/telemetry/memory.d.ts +1 -1
- package/dist/telemetry/memory.js +30 -14
- package/dist/telemetry/span.js +1 -1
- package/dist/telemetry/stream.js +30 -23
- package/dist/utils/env.js +4 -2
- package/dist/utils/preset.js +1 -0
- package/dist/utils/response.js +3 -1
- package/package.json +32 -50
- package/src/config.ts +0 -98
- package/src/endpoints/chat-completions/converters.test.ts +0 -631
- package/src/endpoints/chat-completions/converters.ts +0 -899
- package/src/endpoints/chat-completions/handler.test.ts +0 -391
- package/src/endpoints/chat-completions/handler.ts +0 -201
- package/src/endpoints/chat-completions/index.ts +0 -4
- package/src/endpoints/chat-completions/otel.test.ts +0 -315
- package/src/endpoints/chat-completions/otel.ts +0 -214
- package/src/endpoints/chat-completions/schema.ts +0 -354
- package/src/endpoints/embeddings/converters.ts +0 -51
- package/src/endpoints/embeddings/handler.test.ts +0 -133
- package/src/endpoints/embeddings/handler.ts +0 -137
- package/src/endpoints/embeddings/index.ts +0 -4
- package/src/endpoints/embeddings/otel.ts +0 -40
- package/src/endpoints/embeddings/schema.ts +0 -36
- package/src/endpoints/models/converters.ts +0 -56
- package/src/endpoints/models/handler.test.ts +0 -122
- package/src/endpoints/models/handler.ts +0 -37
- package/src/endpoints/models/index.ts +0 -3
- package/src/endpoints/models/schema.ts +0 -37
- package/src/errors/ai-sdk.ts +0 -99
- package/src/errors/gateway.ts +0 -17
- package/src/errors/openai.ts +0 -57
- package/src/errors/utils.ts +0 -47
- package/src/gateway.ts +0 -50
- package/src/index.ts +0 -19
- package/src/lifecycle.ts +0 -135
- package/src/logger/default.ts +0 -105
- package/src/logger/index.ts +0 -42
- package/src/middleware/common.test.ts +0 -215
- package/src/middleware/common.ts +0 -163
- package/src/middleware/debug.ts +0 -37
- package/src/middleware/matcher.ts +0 -161
- package/src/middleware/utils.ts +0 -34
- package/src/models/amazon/index.ts +0 -2
- package/src/models/amazon/middleware.test.ts +0 -133
- package/src/models/amazon/middleware.ts +0 -79
- package/src/models/amazon/presets.ts +0 -104
- package/src/models/anthropic/index.ts +0 -2
- package/src/models/anthropic/middleware.test.ts +0 -643
- package/src/models/anthropic/middleware.ts +0 -148
- package/src/models/anthropic/presets.ts +0 -191
- package/src/models/catalog.ts +0 -13
- package/src/models/cohere/index.ts +0 -2
- package/src/models/cohere/middleware.test.ts +0 -138
- package/src/models/cohere/middleware.ts +0 -76
- package/src/models/cohere/presets.ts +0 -186
- package/src/models/google/index.ts +0 -2
- package/src/models/google/middleware.test.ts +0 -298
- package/src/models/google/middleware.ts +0 -137
- package/src/models/google/presets.ts +0 -118
- package/src/models/meta/index.ts +0 -1
- package/src/models/meta/presets.ts +0 -143
- package/src/models/openai/index.ts +0 -2
- package/src/models/openai/middleware.test.ts +0 -189
- package/src/models/openai/middleware.ts +0 -103
- package/src/models/openai/presets.ts +0 -280
- package/src/models/types.ts +0 -114
- package/src/models/voyage/index.ts +0 -2
- package/src/models/voyage/middleware.test.ts +0 -28
- package/src/models/voyage/middleware.ts +0 -23
- package/src/models/voyage/presets.ts +0 -126
- package/src/providers/anthropic/canonical.ts +0 -17
- package/src/providers/anthropic/index.ts +0 -1
- package/src/providers/bedrock/canonical.ts +0 -87
- package/src/providers/bedrock/index.ts +0 -2
- package/src/providers/bedrock/middleware.test.ts +0 -303
- package/src/providers/bedrock/middleware.ts +0 -128
- package/src/providers/cohere/canonical.ts +0 -26
- package/src/providers/cohere/index.ts +0 -1
- package/src/providers/groq/canonical.ts +0 -21
- package/src/providers/groq/index.ts +0 -1
- package/src/providers/openai/canonical.ts +0 -16
- package/src/providers/openai/index.ts +0 -1
- package/src/providers/registry.test.ts +0 -44
- package/src/providers/registry.ts +0 -165
- package/src/providers/types.ts +0 -20
- package/src/providers/vertex/canonical.ts +0 -17
- package/src/providers/vertex/index.ts +0 -1
- package/src/providers/voyage/canonical.ts +0 -16
- package/src/providers/voyage/index.ts +0 -1
- package/src/telemetry/ai-sdk.ts +0 -46
- package/src/telemetry/baggage.ts +0 -27
- package/src/telemetry/fetch.ts +0 -62
- package/src/telemetry/gen-ai.ts +0 -113
- package/src/telemetry/http.ts +0 -62
- package/src/telemetry/index.ts +0 -1
- package/src/telemetry/memory.ts +0 -36
- package/src/telemetry/span.ts +0 -85
- package/src/telemetry/stream.ts +0 -64
- package/src/types.ts +0 -223
- package/src/utils/env.ts +0 -7
- package/src/utils/headers.ts +0 -27
- package/src/utils/preset.ts +0 -65
- package/src/utils/request.test.ts +0 -75
- package/src/utils/request.ts +0 -52
- package/src/utils/response.ts +0 -84
- package/src/utils/url.ts +0 -26
package/README.md
CHANGED
|
@@ -569,7 +569,7 @@ Accepted request fields:
|
|
|
569
569
|
|
|
570
570
|
- `prompt_cache_key` + `prompt_cache_retention` (OpenAI style)
|
|
571
571
|
- `cache_control` (OpenRouter / Vercel / Claude style)
|
|
572
|
-
- `cached_content` (Gemini style)
|
|
572
|
+
- `extra_body { google: { cached_content } }` (Gemini style)
|
|
573
573
|
|
|
574
574
|
```json
|
|
575
575
|
{
|
|
@@ -6,9 +6,9 @@ import { toResponse } from "../../utils/response";
|
|
|
6
6
|
import { parseDataUrl } from "../../utils/url";
|
|
7
7
|
// --- Request Flow ---
|
|
8
8
|
export function convertToTextCallOptions(params) {
|
|
9
|
-
const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention,
|
|
9
|
+
const { messages, tools, tool_choice, temperature, max_tokens, max_completion_tokens, response_format, reasoning_effort, reasoning, prompt_cache_key, prompt_cache_retention, extra_body, cache_control, frequency_penalty, presence_penalty, seed, stop, top_p, ...rest } = params;
|
|
10
10
|
Object.assign(rest, parseReasoningOptions(reasoning_effort, reasoning));
|
|
11
|
-
Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, cached_content, cache_control));
|
|
11
|
+
Object.assign(rest, parsePromptCachingOptions(prompt_cache_key, prompt_cache_retention, extra_body?.google?.cached_content, cache_control));
|
|
12
12
|
const { toolChoice, activeTools } = convertToToolChoiceOptions(tool_choice);
|
|
13
13
|
return {
|
|
14
14
|
messages: convertToModelMessages(messages),
|
|
@@ -192,7 +192,7 @@ export function fromChatCompletionsContent(content) {
|
|
|
192
192
|
return fromFilePart(part.file.data, part.file.media_type, part.file.filename, part.cache_control);
|
|
193
193
|
case "input_audio":
|
|
194
194
|
return fromFilePart(part.input_audio.data, `audio/${part.input_audio.format}`, undefined, part.cache_control);
|
|
195
|
-
|
|
195
|
+
case "text": {
|
|
196
196
|
const out = {
|
|
197
197
|
type: "text",
|
|
198
198
|
text: part.text,
|
|
@@ -204,6 +204,8 @@ export function fromChatCompletionsContent(content) {
|
|
|
204
204
|
}
|
|
205
205
|
return out;
|
|
206
206
|
}
|
|
207
|
+
default:
|
|
208
|
+
throw new Error(`Unhandled content part type: ${part.type}`);
|
|
207
209
|
}
|
|
208
210
|
});
|
|
209
211
|
}
|
|
@@ -305,6 +307,7 @@ function parseToolResult(content) {
|
|
|
305
307
|
}
|
|
306
308
|
function parseJsonOrText(content) {
|
|
307
309
|
try {
|
|
310
|
+
// oxlint-disable-next-line no-unsafe-assignment
|
|
308
311
|
return { type: "json", value: JSON.parse(content) };
|
|
309
312
|
}
|
|
310
313
|
catch {
|
|
@@ -414,6 +417,7 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
414
417
|
};
|
|
415
418
|
super({
|
|
416
419
|
transform(part, controller) {
|
|
420
|
+
// oxlint-disable-next-line switch-exhaustiveness-check
|
|
417
421
|
switch (part.type) {
|
|
418
422
|
case "text-delta": {
|
|
419
423
|
controller.enqueue(createChunk({ role: "assistant", content: part.text }, part.providerMetadata));
|
|
@@ -23,6 +23,7 @@ export const chatCompletions = (config) => {
|
|
|
23
23
|
}
|
|
24
24
|
// Parse + validate input.
|
|
25
25
|
try {
|
|
26
|
+
// oxlint-disable-next-line no-unsafe-assignment
|
|
26
27
|
ctx.body = await ctx.request.json();
|
|
27
28
|
}
|
|
28
29
|
catch {
|
|
@@ -66,6 +67,7 @@ export const chatCompletions = (config) => {
|
|
|
66
67
|
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
|
|
67
68
|
setSpanAttributes(genAiGeneralAttrs);
|
|
68
69
|
// Convert inputs to AI SDK call options.
|
|
70
|
+
// oxlint-disable-next-line no-unsafe-argument
|
|
69
71
|
const textOptions = convertToTextCallOptions(inputs);
|
|
70
72
|
logger.trace({
|
|
71
73
|
requestId: ctx.requestId,
|
|
@@ -96,7 +96,7 @@ const toMessageParts = (message) => {
|
|
|
96
96
|
case "system":
|
|
97
97
|
return toTextParts(message.content);
|
|
98
98
|
default:
|
|
99
|
-
|
|
99
|
+
throw new Error(`Unhandled content part type: ${message.role}`);
|
|
100
100
|
}
|
|
101
101
|
};
|
|
102
102
|
export const getChatRequestAttributes = (inputs, signalLevel) => {
|
|
@@ -658,7 +658,11 @@ declare const ChatCompletionsInputsSchema: z.ZodObject<{
|
|
|
658
658
|
in_memory: "in_memory";
|
|
659
659
|
"24h": "24h";
|
|
660
660
|
}>>;
|
|
661
|
-
|
|
661
|
+
extra_body: z.ZodOptional<z.ZodObject<{
|
|
662
|
+
google: z.ZodOptional<z.ZodObject<{
|
|
663
|
+
cached_content: z.ZodOptional<z.ZodString>;
|
|
664
|
+
}, z.core.$strip>>;
|
|
665
|
+
}, z.core.$strip>>;
|
|
662
666
|
cache_control: z.ZodOptional<z.ZodObject<{
|
|
663
667
|
type: z.ZodLiteral<"ephemeral">;
|
|
664
668
|
ttl: z.ZodOptional<z.ZodString>;
|
|
@@ -866,7 +870,11 @@ export declare const ChatCompletionsBodySchema: z.ZodObject<{
|
|
|
866
870
|
in_memory: "in_memory";
|
|
867
871
|
"24h": "24h";
|
|
868
872
|
}>>;
|
|
869
|
-
|
|
873
|
+
extra_body: z.ZodOptional<z.ZodObject<{
|
|
874
|
+
google: z.ZodOptional<z.ZodObject<{
|
|
875
|
+
cached_content: z.ZodOptional<z.ZodString>;
|
|
876
|
+
}, z.core.$strip>>;
|
|
877
|
+
}, z.core.$strip>>;
|
|
870
878
|
cache_control: z.ZodOptional<z.ZodObject<{
|
|
871
879
|
type: z.ZodLiteral<"ephemeral">;
|
|
872
880
|
ttl: z.ZodOptional<z.ZodString>;
|
|
@@ -213,7 +213,17 @@ const ChatCompletionsInputsSchema = z.object({
|
|
|
213
213
|
prompt_cache_key: z.string().optional(),
|
|
214
214
|
prompt_cache_retention: z.enum(["in_memory", "24h"]).optional(),
|
|
215
215
|
// Extension origin: Gemini explicit cache handle
|
|
216
|
-
|
|
216
|
+
// FUTURE: generalize extra_body handling
|
|
217
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/migrate/openai/overview
|
|
218
|
+
extra_body: z
|
|
219
|
+
.object({
|
|
220
|
+
google: z
|
|
221
|
+
.object({
|
|
222
|
+
cached_content: z.string().optional().meta({ extension: true }),
|
|
223
|
+
})
|
|
224
|
+
.optional(),
|
|
225
|
+
})
|
|
226
|
+
.optional(),
|
|
217
227
|
// Extension origin: OpenRouter/Vercel/Anthropic
|
|
218
228
|
cache_control: ChatCompletionsCacheControlSchema.optional().meta({ extension: true }),
|
|
219
229
|
// Extension origin: OpenRouter
|
|
@@ -23,6 +23,7 @@ export const embeddings = (config) => {
|
|
|
23
23
|
}
|
|
24
24
|
// Parse + validate input.
|
|
25
25
|
try {
|
|
26
|
+
// oxlint-disable-next-line no-unsafe-assignment
|
|
26
27
|
ctx.body = await ctx.request.json();
|
|
27
28
|
}
|
|
28
29
|
catch {
|
|
@@ -65,6 +66,7 @@ export const embeddings = (config) => {
|
|
|
65
66
|
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
|
|
66
67
|
setSpanAttributes(genAiGeneralAttrs);
|
|
67
68
|
// Convert inputs to AI SDK call options.
|
|
69
|
+
// oxlint-disable-next-line no-unsafe-argument
|
|
68
70
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
69
71
|
logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
70
72
|
addSpanEvent("hebo.options.prepared");
|
|
@@ -12,13 +12,13 @@ export function toModel(id, catalogModel) {
|
|
|
12
12
|
id,
|
|
13
13
|
object: "model",
|
|
14
14
|
created: createdTimestamp,
|
|
15
|
-
owned_by: id.split("/")[0]
|
|
15
|
+
owned_by: id.split("/")[0] ?? "system",
|
|
16
16
|
architecture: {
|
|
17
|
-
input_modalities: modalities?.input
|
|
17
|
+
input_modalities: modalities?.input ?? [],
|
|
18
18
|
modality: modalities?.input &&
|
|
19
19
|
modalities?.output &&
|
|
20
20
|
`${modalities.input?.[0]}->${modalities.output?.[0]}`,
|
|
21
|
-
output_modalities: modalities?.output
|
|
21
|
+
output_modalities: modalities?.output ?? [],
|
|
22
22
|
},
|
|
23
23
|
endpoints: providers?.map((provider) => ({
|
|
24
24
|
tag: provider,
|
package/dist/lifecycle.js
CHANGED
|
@@ -6,7 +6,7 @@ import { getBaggageAttributes } from "./telemetry/baggage";
|
|
|
6
6
|
import { instrumentFetch } from "./telemetry/fetch";
|
|
7
7
|
import { recordRequestDuration } from "./telemetry/gen-ai";
|
|
8
8
|
import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
|
|
9
|
-
import {
|
|
9
|
+
import { observeV8jsMemoryMetrics } from "./telemetry/memory";
|
|
10
10
|
import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
|
|
11
11
|
import { wrapStream } from "./telemetry/stream";
|
|
12
12
|
import { resolveOrCreateRequestId } from "./utils/request";
|
|
@@ -17,6 +17,7 @@ export const winterCgHandler = (run, config) => {
|
|
|
17
17
|
setSpanTracer(parsedConfig.telemetry?.tracer);
|
|
18
18
|
setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
|
|
19
19
|
instrumentFetch(parsedConfig.telemetry?.signals?.hebo);
|
|
20
|
+
observeV8jsMemoryMetrics(parsedConfig.telemetry?.signals?.hebo);
|
|
20
21
|
}
|
|
21
22
|
return async (request, state) => {
|
|
22
23
|
const start = performance.now();
|
|
@@ -57,7 +58,6 @@ export const winterCgHandler = (run, config) => {
|
|
|
57
58
|
if (ctx.operation === "chat" || ctx.operation === "embeddings") {
|
|
58
59
|
recordRequestDuration(performance.now() - start, realStatus, ctx, parsedConfig.telemetry?.signals?.gen_ai);
|
|
59
60
|
}
|
|
60
|
-
recordV8jsMemory(parsedConfig.telemetry?.signals?.hebo);
|
|
61
61
|
span.finish();
|
|
62
62
|
};
|
|
63
63
|
try {
|
package/dist/logger/default.js
CHANGED
|
@@ -37,8 +37,6 @@ function serializeError(err, _seen) {
|
|
|
37
37
|
return out;
|
|
38
38
|
}
|
|
39
39
|
const buildLogObject = (level, args) => {
|
|
40
|
-
if (args.length === 0)
|
|
41
|
-
return {};
|
|
42
40
|
const [first, second] = args;
|
|
43
41
|
let obj;
|
|
44
42
|
let err;
|
|
@@ -70,7 +68,9 @@ const buildLogObject = (level, args) => {
|
|
|
70
68
|
...obj,
|
|
71
69
|
};
|
|
72
70
|
};
|
|
73
|
-
const makeLogFn = (level, write) => (...args) =>
|
|
71
|
+
const makeLogFn = (level, write) => (...args) => {
|
|
72
|
+
write(JSON.stringify(buildLogObject(level, args)));
|
|
73
|
+
};
|
|
74
74
|
export const createDefaultLogger = (config) => {
|
|
75
75
|
if (config.level === "silent" || getDefaultLogLevel() === "silent") {
|
|
76
76
|
return { trace: noop, debug: noop, info: noop, warn: noop, error: noop };
|
package/dist/logger/index.d.ts
CHANGED
|
@@ -1,8 +1,5 @@
|
|
|
1
|
-
export type
|
|
2
|
-
|
|
3
|
-
(obj: Record<string, unknown>, msg?: string): void;
|
|
4
|
-
(err: Error, msg?: string): void;
|
|
5
|
-
};
|
|
1
|
+
export type LogArgs = [msg: string] | [obj: Record<string, unknown>, msg?: string] | [err: Error, msg?: string];
|
|
2
|
+
export type LogFn = (...args: LogArgs) => void;
|
|
6
3
|
export type Logger = Record<"trace" | "debug" | "info" | "warn" | "error", LogFn>;
|
|
7
4
|
export type LogLevel = "trace" | "debug" | "info" | "warn" | "error" | "silent";
|
|
8
5
|
export type LoggerConfig = {
|
package/dist/middleware/utils.js
CHANGED
package/dist/models/catalog.js
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
export function defineModelCatalog(...inputs) {
|
|
2
2
|
const catalogs = inputs.flat().map((input) => (typeof input === "function" ? input() : input));
|
|
3
|
-
|
|
3
|
+
const out = {};
|
|
4
|
+
for (const catalog of catalogs) {
|
|
5
|
+
Object.assign(out, catalog);
|
|
6
|
+
}
|
|
7
|
+
return out;
|
|
4
8
|
}
|
|
@@ -17,14 +17,18 @@ export const openAIDimensionsMiddleware = {
|
|
|
17
17
|
};
|
|
18
18
|
function mapGptOssReasoningEffort(effort) {
|
|
19
19
|
switch (effort) {
|
|
20
|
+
case undefined:
|
|
21
|
+
case "none":
|
|
22
|
+
return;
|
|
23
|
+
case "minimal":
|
|
24
|
+
case "low":
|
|
25
|
+
return "low";
|
|
20
26
|
case "medium":
|
|
21
27
|
return "medium";
|
|
22
28
|
case "high":
|
|
23
29
|
case "xhigh":
|
|
24
30
|
case "max":
|
|
25
31
|
return "high";
|
|
26
|
-
default:
|
|
27
|
-
return "low";
|
|
28
32
|
}
|
|
29
33
|
}
|
|
30
34
|
export const openAIReasoningMiddleware = {
|
|
@@ -55,7 +55,9 @@ export const withCanonicalIds = (provider, config = {}) => {
|
|
|
55
55
|
};
|
|
56
56
|
const needsFallbackWrap = stripNamespace || normalizeDelimiters || namespaceSeparator !== "/" || !!prefix || !!postfix;
|
|
57
57
|
// FUTURE: use embeddingModel instead of textEmbeddingModel once voyage supports it
|
|
58
|
+
// oxlint-disable-next-line unbound-method
|
|
58
59
|
const languageModel = provider.languageModel;
|
|
60
|
+
// oxlint-disable-next-line unbound-method, no-deprecated
|
|
59
61
|
const embeddingModel = provider.textEmbeddingModel;
|
|
60
62
|
const fallbackProvider = needsFallbackWrap
|
|
61
63
|
? {
|
|
@@ -69,6 +71,7 @@ export const withCanonicalIds = (provider, config = {}) => {
|
|
|
69
71
|
embeddingModel: (id) => {
|
|
70
72
|
const mapped = applyFallbackAffixes(normalizeId(id));
|
|
71
73
|
logger.debug(`[canonical] mapped ${id} to ${mapped}`);
|
|
74
|
+
// oxlint-disable-next-line no-deprecated
|
|
72
75
|
return embeddingModel(mapped);
|
|
73
76
|
},
|
|
74
77
|
}
|
package/dist/telemetry/fetch.js
CHANGED
|
@@ -16,8 +16,13 @@ const getRequestAttributes = (input, init) => {
|
|
|
16
16
|
attrs["url.full"] = input.url;
|
|
17
17
|
return attrs;
|
|
18
18
|
};
|
|
19
|
-
const shouldTraceFetch = (init) =>
|
|
20
|
-
init
|
|
19
|
+
const shouldTraceFetch = (init) => {
|
|
20
|
+
const h = init?.headers;
|
|
21
|
+
if (!h || typeof h !== "object" || Array.isArray(h) || h instanceof Headers)
|
|
22
|
+
return false;
|
|
23
|
+
const ua = h["user-agent"];
|
|
24
|
+
return typeof ua === "string" && ua.includes("ai-sdk/provider-utils");
|
|
25
|
+
};
|
|
21
26
|
const otelFetch = (input, init) => {
|
|
22
27
|
const original = g[ORIGINAL_FETCH_KEY];
|
|
23
28
|
if (!fetchTracingEnabled)
|
package/dist/telemetry/gen-ai.js
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import { metrics } from "@opentelemetry/api";
|
|
2
2
|
import { STATUS_CODE } from "../errors/utils";
|
|
3
|
-
const
|
|
4
|
-
|
|
3
|
+
const getMeter = () => metrics.getMeter("@hebo/gateway");
|
|
4
|
+
let requestDurationHistogram;
|
|
5
|
+
let timePerOutputTokenHistogram;
|
|
6
|
+
let tokenUsageHistogram;
|
|
7
|
+
const getRequestDurationHistogram = () => (requestDurationHistogram ??= getMeter().createHistogram("gen_ai.server.request.duration", {
|
|
5
8
|
description: "End-to-end gateway request duration",
|
|
6
9
|
unit: "s",
|
|
7
10
|
advice: {
|
|
@@ -9,8 +12,8 @@ const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.du
|
|
|
9
12
|
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 240,
|
|
10
13
|
],
|
|
11
14
|
},
|
|
12
|
-
});
|
|
13
|
-
const
|
|
15
|
+
}));
|
|
16
|
+
const getTimePerOutputTokenHistogram = () => (timePerOutputTokenHistogram ??= getMeter().createHistogram("gen_ai.server.time_per_output_token", {
|
|
14
17
|
description: "End-to-end gateway request duration per output token",
|
|
15
18
|
unit: "s",
|
|
16
19
|
advice: {
|
|
@@ -18,17 +21,17 @@ const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_pe
|
|
|
18
21
|
0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5,
|
|
19
22
|
],
|
|
20
23
|
},
|
|
21
|
-
});
|
|
22
|
-
const
|
|
24
|
+
}));
|
|
25
|
+
const getTokenUsageHistogram = () => (tokenUsageHistogram ??= getMeter().createHistogram("gen_ai.client.token.usage", {
|
|
23
26
|
description: "Token usage reported by upstream model responses",
|
|
24
27
|
unit: "{token}",
|
|
25
28
|
advice: {
|
|
26
29
|
explicitBucketBoundaries: [
|
|
27
|
-
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072,
|
|
28
|
-
524288, 1048576,
|
|
30
|
+
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072,
|
|
31
|
+
262144, 524288, 1048576,
|
|
29
32
|
],
|
|
30
33
|
},
|
|
31
|
-
});
|
|
34
|
+
}));
|
|
32
35
|
export const getGenAiGeneralAttributes = (ctx, signalLevel) => {
|
|
33
36
|
if (!signalLevel || signalLevel === "off")
|
|
34
37
|
return {};
|
|
@@ -47,7 +50,7 @@ export const recordRequestDuration = (duration, status, ctx, signalLevel) => {
|
|
|
47
50
|
if (status !== 200) {
|
|
48
51
|
attrs["error.type"] = `${status} ${STATUS_CODE(status).toLowerCase()}`;
|
|
49
52
|
}
|
|
50
|
-
|
|
53
|
+
getRequestDurationHistogram().record(duration / 1000, attrs);
|
|
51
54
|
};
|
|
52
55
|
// FUTURE: record unsuccessful calls
|
|
53
56
|
export const recordTimePerOutputToken = (start, tokenAttrs, metricAttrs, signalLevel) => {
|
|
@@ -56,7 +59,7 @@ export const recordTimePerOutputToken = (start, tokenAttrs, metricAttrs, signalL
|
|
|
56
59
|
const outputTokens = tokenAttrs["gen_ai.usage.output_tokens"];
|
|
57
60
|
if (typeof outputTokens !== "number" || outputTokens <= 0)
|
|
58
61
|
return;
|
|
59
|
-
|
|
62
|
+
getTimePerOutputTokenHistogram().record((performance.now() - start) / 1000 / outputTokens, metricAttrs);
|
|
60
63
|
};
|
|
61
64
|
// FUTURE: record unsuccessful calls
|
|
62
65
|
export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
|
|
@@ -65,7 +68,7 @@ export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
|
|
|
65
68
|
const record = (value, tokenType) => {
|
|
66
69
|
if (typeof value !== "number")
|
|
67
70
|
return;
|
|
68
|
-
|
|
71
|
+
getTokenUsageHistogram().record(value, Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }));
|
|
69
72
|
};
|
|
70
73
|
record(tokenAttrs["gen_ai.usage.input_tokens"], "input");
|
|
71
74
|
record(tokenAttrs["gen_ai.usage.output_tokens"], "output");
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import type { TelemetrySignalLevel } from "../types";
|
|
2
|
-
export declare const
|
|
2
|
+
export declare const observeV8jsMemoryMetrics: (level?: TelemetrySignalLevel) => void;
|
package/dist/telemetry/memory.js
CHANGED
|
@@ -1,18 +1,9 @@
|
|
|
1
1
|
import { metrics } from "@opentelemetry/api";
|
|
2
|
-
const
|
|
2
|
+
const getMeter = () => metrics.getMeter("@hebo/gateway");
|
|
3
3
|
const defaultHeapSpaceAttrs = { "v8js.heap.space.name": "total" };
|
|
4
|
-
|
|
5
|
-
description: "Used bytes in the V8 heap",
|
|
6
|
-
unit: "By",
|
|
7
|
-
});
|
|
8
|
-
const heapSpacePhysicalSizeCounter = meter.createUpDownCounter("v8js.memory.heap.space.physical_size", {
|
|
9
|
-
description: "Physical bytes allocated for the V8 heap space",
|
|
10
|
-
unit: "By",
|
|
11
|
-
});
|
|
4
|
+
let registered = false;
|
|
12
5
|
const isEnabled = (level) => level === "recommended" || level === "full";
|
|
13
|
-
|
|
14
|
-
if (!isEnabled(level))
|
|
15
|
-
return;
|
|
6
|
+
const observeMemory = (observe) => {
|
|
16
7
|
let usage;
|
|
17
8
|
try {
|
|
18
9
|
usage = globalThis.process?.memoryUsage?.();
|
|
@@ -22,6 +13,31 @@ export const recordV8jsMemory = (level) => {
|
|
|
22
13
|
}
|
|
23
14
|
if (!usage)
|
|
24
15
|
return;
|
|
25
|
-
|
|
26
|
-
|
|
16
|
+
observe(usage.heapUsed, usage.rss);
|
|
17
|
+
};
|
|
18
|
+
export const observeV8jsMemoryMetrics = (level) => {
|
|
19
|
+
if (!isEnabled(level) || registered)
|
|
20
|
+
return;
|
|
21
|
+
registered = true;
|
|
22
|
+
const meter = getMeter();
|
|
23
|
+
meter
|
|
24
|
+
.createObservableGauge("v8js.memory.heap.used", {
|
|
25
|
+
description: "Used bytes in the V8 heap",
|
|
26
|
+
unit: "By",
|
|
27
|
+
})
|
|
28
|
+
.addCallback((result) => {
|
|
29
|
+
observeMemory((heapUsed) => {
|
|
30
|
+
result.observe(heapUsed, defaultHeapSpaceAttrs);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
meter
|
|
34
|
+
.createObservableGauge("v8js.memory.heap.space.physical_size", {
|
|
35
|
+
description: "Physical bytes allocated for the V8 heap space",
|
|
36
|
+
unit: "By",
|
|
37
|
+
})
|
|
38
|
+
.addCallback((result) => {
|
|
39
|
+
observeMemory((_, rss) => {
|
|
40
|
+
result.observe(rss, defaultHeapSpaceAttrs);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
27
43
|
};
|
package/dist/telemetry/span.js
CHANGED
package/dist/telemetry/stream.js
CHANGED
|
@@ -1,51 +1,58 @@
|
|
|
1
1
|
import { toOpenAIError } from "../errors/openai";
|
|
2
|
-
const isErrorChunk = (v) => v instanceof Error ||
|
|
2
|
+
const isErrorChunk = (v) => v instanceof Error || (typeof v === "object" && v !== null && "error" in v);
|
|
3
3
|
export const wrapStream = (src, hooks) => {
|
|
4
4
|
let finished = false;
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
let reader;
|
|
6
|
+
const done = (controller, status, reason) => {
|
|
7
|
+
if (finished)
|
|
8
|
+
return;
|
|
9
|
+
finished = true;
|
|
10
|
+
hooks.onDone?.(status, reason);
|
|
11
|
+
if (status !== 200) {
|
|
12
|
+
reader?.cancel(reason).catch(() => { });
|
|
13
|
+
}
|
|
14
|
+
try {
|
|
15
|
+
controller.close();
|
|
9
16
|
}
|
|
10
|
-
|
|
11
|
-
controller.close();
|
|
17
|
+
catch { }
|
|
12
18
|
};
|
|
13
19
|
return new ReadableStream({
|
|
14
20
|
async start(controller) {
|
|
15
|
-
|
|
21
|
+
reader = src.getReader();
|
|
16
22
|
try {
|
|
17
23
|
for (;;) {
|
|
18
|
-
// oxlint-disable-next-line no-await-in-loop
|
|
24
|
+
// oxlint-disable-next-line no-await-in-loop, no-unsafe-assignment
|
|
19
25
|
const { value, done: eof } = await reader.read();
|
|
20
26
|
if (eof)
|
|
21
27
|
break;
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
const status = out.error?.type === "invalid_request_error" ? 422 : 502;
|
|
26
|
-
done(reader, controller, status, value);
|
|
28
|
+
controller.enqueue(value);
|
|
29
|
+
if (isErrorChunk(value)) {
|
|
30
|
+
done(controller, toOpenAIError(value).error.type === "invalid_request_error" ? 422 : 502, value);
|
|
27
31
|
return;
|
|
28
32
|
}
|
|
29
33
|
}
|
|
30
|
-
done(
|
|
34
|
+
done(controller, 200);
|
|
31
35
|
}
|
|
32
36
|
catch (err) {
|
|
33
|
-
|
|
34
|
-
|
|
37
|
+
try {
|
|
38
|
+
controller.enqueue(toOpenAIError(err));
|
|
39
|
+
}
|
|
40
|
+
catch { }
|
|
41
|
+
done(controller, 502, err);
|
|
35
42
|
}
|
|
36
43
|
finally {
|
|
37
44
|
try {
|
|
38
|
-
reader
|
|
45
|
+
reader?.releaseLock();
|
|
39
46
|
}
|
|
40
47
|
catch { }
|
|
41
48
|
}
|
|
42
49
|
},
|
|
43
50
|
cancel(reason) {
|
|
44
|
-
if (
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
51
|
+
if (finished)
|
|
52
|
+
return;
|
|
53
|
+
finished = true;
|
|
54
|
+
hooks.onDone?.(499, reason);
|
|
55
|
+
reader?.cancel(reason).catch(() => { });
|
|
49
56
|
},
|
|
50
57
|
});
|
|
51
58
|
};
|
package/dist/utils/env.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
const NODE_ENV = typeof process === "undefined"
|
|
2
|
-
?
|
|
3
|
-
|
|
2
|
+
? // oxlint-disable-next-line no-unsafe-member-access
|
|
3
|
+
(globalThis.NODE_ENV ?? globalThis.ENV?.NODE_ENV)
|
|
4
|
+
: // oxlint-disable-next-line no-unsafe-assignment
|
|
5
|
+
process.env?.NODE_ENV;
|
|
4
6
|
export const isProduction = () => NODE_ENV === "production";
|
|
5
7
|
export const isTest = () => NODE_ENV === "test";
|
package/dist/utils/preset.js
CHANGED
package/dist/utils/response.js
CHANGED
|
@@ -19,7 +19,9 @@ export const mergeResponseInit = (defaultHeaders, responseInit) => {
|
|
|
19
19
|
const headers = new Headers(defaultHeaders);
|
|
20
20
|
const override = responseInit?.headers;
|
|
21
21
|
if (override) {
|
|
22
|
-
new Headers(override).forEach((value, key) =>
|
|
22
|
+
new Headers(override).forEach((value, key) => {
|
|
23
|
+
headers.set(key, value);
|
|
24
|
+
});
|
|
23
25
|
}
|
|
24
26
|
if (!responseInit)
|
|
25
27
|
return { headers };
|