@hebo-ai/gateway 0.4.0-beta.2 → 0.4.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -5
- package/dist/config.js +21 -7
- package/dist/endpoints/chat-completions/converters.js +2 -2
- package/dist/endpoints/chat-completions/handler.js +31 -25
- package/dist/endpoints/chat-completions/otel.d.ts +6 -0
- package/dist/endpoints/chat-completions/otel.js +121 -0
- package/dist/endpoints/embeddings/handler.js +19 -12
- package/dist/endpoints/embeddings/otel.d.ts +6 -0
- package/dist/endpoints/embeddings/otel.js +35 -0
- package/dist/endpoints/models/handler.js +3 -4
- package/dist/errors/gateway.js +1 -2
- package/dist/errors/openai.js +10 -12
- package/dist/errors/utils.d.ts +1 -3
- package/dist/errors/utils.js +5 -6
- package/dist/gateway.js +1 -1
- package/dist/lifecycle.js +62 -28
- package/dist/middleware/matcher.js +1 -1
- package/dist/models/amazon/presets.d.ts +37 -37
- package/dist/models/amazon/presets.js +1 -1
- package/dist/models/anthropic/presets.d.ts +56 -56
- package/dist/models/cohere/presets.d.ts +54 -54
- package/dist/models/cohere/presets.js +2 -2
- package/dist/models/google/presets.d.ts +31 -31
- package/dist/models/google/presets.js +1 -1
- package/dist/models/meta/presets.d.ts +42 -42
- package/dist/models/openai/presets.d.ts +96 -96
- package/dist/models/openai/presets.js +1 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/voyage/presets.d.ts +92 -92
- package/dist/models/voyage/presets.js +1 -1
- package/dist/providers/registry.js +2 -2
- package/dist/telemetry/baggage.d.ts +1 -0
- package/dist/telemetry/baggage.js +24 -0
- package/dist/telemetry/fetch.d.ts +2 -1
- package/dist/telemetry/fetch.js +13 -3
- package/dist/telemetry/gen-ai.d.ts +4 -0
- package/dist/telemetry/gen-ai.js +42 -0
- package/dist/telemetry/http.d.ts +3 -0
- package/dist/telemetry/http.js +57 -0
- package/dist/telemetry/span.d.ts +6 -3
- package/dist/telemetry/span.js +23 -35
- package/dist/telemetry/stream.d.ts +3 -7
- package/dist/telemetry/stream.js +18 -18
- package/dist/types.d.ts +14 -12
- package/dist/utils/headers.d.ts +1 -1
- package/dist/utils/headers.js +7 -9
- package/dist/utils/request.d.ts +0 -4
- package/dist/utils/request.js +0 -9
- package/dist/utils/response.js +1 -1
- package/package.json +4 -2
- package/src/config.ts +28 -7
- package/src/endpoints/chat-completions/converters.ts +2 -2
- package/src/endpoints/chat-completions/handler.ts +39 -26
- package/src/endpoints/chat-completions/otel.ts +154 -0
- package/src/endpoints/embeddings/handler.test.ts +2 -2
- package/src/endpoints/embeddings/handler.ts +24 -12
- package/src/endpoints/embeddings/otel.ts +56 -0
- package/src/endpoints/models/handler.ts +3 -5
- package/src/errors/gateway.ts +1 -2
- package/src/errors/openai.ts +24 -17
- package/src/errors/utils.ts +5 -7
- package/src/gateway.ts +1 -1
- package/src/lifecycle.ts +73 -31
- package/src/middleware/matcher.ts +1 -1
- package/src/models/amazon/presets.ts +1 -1
- package/src/models/cohere/presets.ts +2 -2
- package/src/models/google/presets.ts +1 -1
- package/src/models/openai/presets.ts +1 -1
- package/src/models/types.ts +1 -1
- package/src/models/voyage/presets.ts +1 -1
- package/src/providers/registry.ts +2 -2
- package/src/telemetry/baggage.ts +27 -0
- package/src/telemetry/fetch.ts +15 -3
- package/src/telemetry/gen-ai.ts +60 -0
- package/src/telemetry/http.ts +65 -0
- package/src/telemetry/span.ts +28 -40
- package/src/telemetry/stream.ts +26 -30
- package/src/types.ts +15 -12
- package/src/utils/headers.ts +8 -19
- package/src/utils/request.ts +0 -11
- package/src/utils/response.ts +1 -1
- package/dist/telemetry/otel.d.ts +0 -2
- package/dist/telemetry/otel.js +0 -46
- package/dist/telemetry/utils.d.ts +0 -4
- package/dist/telemetry/utils.js +0 -223
- package/src/telemetry/otel.ts +0 -87
- package/src/telemetry/utils.ts +0 -273
package/README.md
CHANGED
|
@@ -615,12 +615,20 @@ const gw = gateway({
|
|
|
615
615
|
telemetry: {
|
|
616
616
|
// default: false
|
|
617
617
|
enabled: true,
|
|
618
|
-
// default:
|
|
618
|
+
// default: TraceProvider from @opentelemetry/api singleton
|
|
619
619
|
tracer: trace.getTracer("my-gateway"),
|
|
620
|
-
//
|
|
621
|
-
// "
|
|
622
|
-
|
|
623
|
-
|
|
620
|
+
// Telemetry levels by namespace:
|
|
621
|
+
// "off" | "required" | "recommended" | "full"
|
|
622
|
+
signals: {
|
|
623
|
+
// gen_ai.* semantic attributes
|
|
624
|
+
gen_ai: "full",
|
|
625
|
+
// http.*, url.*, server.* semantic attributes
|
|
626
|
+
http: "recommended",
|
|
627
|
+
// hebo-specific telemetry:
|
|
628
|
+
// - recommended: hebo.* span events
|
|
629
|
+
// - full: hebo.* span events + fetch instrumentation
|
|
630
|
+
hebo: "recommended",
|
|
631
|
+
},
|
|
624
632
|
},
|
|
625
633
|
});
|
|
626
634
|
```
|
package/dist/config.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import { isLogger, logger, setLoggerInstance } from "./logger";
|
|
2
2
|
import { createDefaultLogger } from "./logger/default";
|
|
3
|
-
import { kParsed } from "./types";
|
|
3
|
+
import { kParsed, } from "./types";
|
|
4
4
|
export const parseConfig = (config) => {
|
|
5
|
-
// If it has been parsed before, just return
|
|
5
|
+
// If it has been parsed before, just return.
|
|
6
6
|
if (kParsed in config)
|
|
7
7
|
return config;
|
|
8
8
|
const providers = config.providers ?? {};
|
|
9
9
|
const parsedProviders = {};
|
|
10
10
|
const models = config.models ?? {};
|
|
11
|
-
// Set the global logger instance
|
|
11
|
+
// Set the global logger instance.
|
|
12
12
|
if (config.logger === undefined) {
|
|
13
13
|
setLoggerInstance(createDefaultLogger({}));
|
|
14
14
|
}
|
|
@@ -18,7 +18,7 @@ export const parseConfig = (config) => {
|
|
|
18
18
|
? `[logger] custom logger configured`
|
|
19
19
|
: `[logger] logger configured: level=${config.logger.level}`);
|
|
20
20
|
}
|
|
21
|
-
// Strip providers that are not configured
|
|
21
|
+
// Strip providers that are not configured.
|
|
22
22
|
for (const id in providers) {
|
|
23
23
|
const provider = providers[id];
|
|
24
24
|
if (provider === undefined) {
|
|
@@ -30,7 +30,7 @@ export const parseConfig = (config) => {
|
|
|
30
30
|
if (Object.keys(parsedProviders).length === 0) {
|
|
31
31
|
throw new Error("No providers configured (config.providers is empty)");
|
|
32
32
|
}
|
|
33
|
-
// Strip providers that are not configured from models
|
|
33
|
+
// Strip providers that are not configured from models.
|
|
34
34
|
const parsedModels = {};
|
|
35
35
|
const warnings = new Set();
|
|
36
36
|
for (const id in models) {
|
|
@@ -51,12 +51,26 @@ export const parseConfig = (config) => {
|
|
|
51
51
|
if (Object.keys(parsedModels).length === 0) {
|
|
52
52
|
throw new Error("No models configured (config.models is empty)");
|
|
53
53
|
}
|
|
54
|
+
// Default for the telemetry settings.
|
|
55
|
+
const telemetryEnabled = config.telemetry?.enabled ?? false;
|
|
56
|
+
const telemetrySignals = telemetryEnabled
|
|
57
|
+
? {
|
|
58
|
+
http: config.telemetry?.signals?.http ?? "recommended",
|
|
59
|
+
gen_ai: config.telemetry?.signals?.gen_ai ?? "full",
|
|
60
|
+
hebo: config.telemetry?.signals?.hebo ?? "off",
|
|
61
|
+
}
|
|
62
|
+
: {
|
|
63
|
+
http: "off",
|
|
64
|
+
gen_ai: "off",
|
|
65
|
+
hebo: "off",
|
|
66
|
+
};
|
|
67
|
+
// Return parsed config.
|
|
54
68
|
return {
|
|
55
69
|
...config,
|
|
56
|
-
logger: config.logger,
|
|
57
70
|
telemetry: {
|
|
58
71
|
...config.telemetry,
|
|
59
|
-
enabled:
|
|
72
|
+
enabled: telemetryEnabled,
|
|
73
|
+
signals: telemetrySignals,
|
|
60
74
|
},
|
|
61
75
|
providers: parsedProviders,
|
|
62
76
|
models: parsedModels,
|
|
@@ -94,6 +94,7 @@ export function fromChatCompletionsAssistantMessage(message) {
|
|
|
94
94
|
}
|
|
95
95
|
if (tool_calls?.length) {
|
|
96
96
|
for (const tc of tool_calls) {
|
|
97
|
+
// eslint-disable-next-line no-shadow
|
|
97
98
|
const { id, function: fn, extra_content } = tc;
|
|
98
99
|
const out = {
|
|
99
100
|
type: "tool-call",
|
|
@@ -348,9 +349,8 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
348
349
|
}
|
|
349
350
|
case "error": {
|
|
350
351
|
const error = part.error;
|
|
351
|
-
// FUTURE mask in production mode and return responseID
|
|
352
352
|
controller.enqueue(toOpenAIError(error));
|
|
353
|
-
|
|
353
|
+
controller.terminate();
|
|
354
354
|
}
|
|
355
355
|
}
|
|
356
356
|
},
|
|
@@ -5,14 +5,18 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
-
import {
|
|
8
|
+
import { recordRequestDuration, recordTokenUsage } from "../../telemetry/gen-ai";
|
|
9
|
+
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
9
10
|
import { resolveRequestId } from "../../utils/headers";
|
|
10
11
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
11
12
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
13
|
+
import { getChatGeneralAttributes, getChatRequestAttributes, getChatResponseAttributes, } from "./otel";
|
|
12
14
|
import { ChatCompletionsBodySchema } from "./schema";
|
|
13
15
|
export const chatCompletions = (config) => {
|
|
14
16
|
const hooks = config.hooks;
|
|
15
17
|
const handler = async (ctx) => {
|
|
18
|
+
const start = performance.now();
|
|
19
|
+
ctx.operation = "chat";
|
|
16
20
|
addSpanEvent("hebo.handler.started");
|
|
17
21
|
// Guard: enforce HTTP method early.
|
|
18
22
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
@@ -20,21 +24,20 @@ export const chatCompletions = (config) => {
|
|
|
20
24
|
}
|
|
21
25
|
const requestId = resolveRequestId(ctx.request);
|
|
22
26
|
// Parse + validate input.
|
|
23
|
-
let body;
|
|
24
27
|
try {
|
|
25
|
-
body = await ctx.request.json();
|
|
28
|
+
ctx.body = await ctx.request.json();
|
|
26
29
|
}
|
|
27
30
|
catch {
|
|
28
31
|
throw new GatewayError("Invalid JSON", 400);
|
|
29
32
|
}
|
|
30
33
|
addSpanEvent("hebo.request.deserialized");
|
|
31
|
-
const parsed = ChatCompletionsBodySchema.safeParse(body);
|
|
34
|
+
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
32
35
|
if (!parsed.success) {
|
|
36
|
+
// FUTURE: add body shape to error message
|
|
33
37
|
throw new GatewayError(z.prettifyError(parsed.error), 400);
|
|
34
38
|
}
|
|
35
39
|
ctx.body = parsed.data;
|
|
36
40
|
addSpanEvent("hebo.request.parsed");
|
|
37
|
-
ctx.operation = "chat";
|
|
38
41
|
if (hooks?.before) {
|
|
39
42
|
ctx.body = (await hooks.before(ctx)) ?? ctx.body;
|
|
40
43
|
addSpanEvent("hebo.hooks.before.completed");
|
|
@@ -45,10 +48,7 @@ export const chatCompletions = (config) => {
|
|
|
45
48
|
ctx.resolvedModelId =
|
|
46
49
|
(await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
|
|
47
50
|
logger.debug(`[chat] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
48
|
-
addSpanEvent("hebo.model.resolved"
|
|
49
|
-
"gen_ai.request.model": ctx.modelId ?? "",
|
|
50
|
-
"gen_ai.response.model": ctx.resolvedModelId ?? "",
|
|
51
|
-
});
|
|
51
|
+
addSpanEvent("hebo.model.resolved");
|
|
52
52
|
const override = await hooks?.resolveProvider?.(ctx);
|
|
53
53
|
ctx.provider =
|
|
54
54
|
override ??
|
|
@@ -61,7 +61,10 @@ export const chatCompletions = (config) => {
|
|
|
61
61
|
const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
|
|
62
62
|
ctx.resolvedProviderId = languageModel.provider;
|
|
63
63
|
logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
64
|
-
addSpanEvent("hebo.provider.resolved"
|
|
64
|
+
addSpanEvent("hebo.provider.resolved");
|
|
65
|
+
const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
|
|
66
|
+
const genAiGeneralAttrs = getChatGeneralAttributes(ctx, genAiSignalLevel);
|
|
67
|
+
setSpanAttributes(genAiGeneralAttrs);
|
|
65
68
|
// Convert inputs to AI SDK call options.
|
|
66
69
|
const textOptions = convertToTextCallOptions(inputs);
|
|
67
70
|
logger.trace({
|
|
@@ -69,6 +72,7 @@ export const chatCompletions = (config) => {
|
|
|
69
72
|
options: textOptions,
|
|
70
73
|
}, "[chat] AI SDK options");
|
|
71
74
|
addSpanEvent("hebo.options.prepared");
|
|
75
|
+
setSpanAttributes(getChatRequestAttributes(inputs, genAiSignalLevel));
|
|
72
76
|
// Build middleware chain (model -> forward params -> provider).
|
|
73
77
|
const languageModelWithMiddleware = wrapLanguageModel({
|
|
74
78
|
model: languageModel,
|
|
@@ -82,22 +86,21 @@ export const chatCompletions = (config) => {
|
|
|
82
86
|
headers: prepareForwardHeaders(ctx.request),
|
|
83
87
|
// No abort signal here, otherwise we can't detect upstream from client cancellations
|
|
84
88
|
// abortSignal: ctx.request.signal,
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
logger.error({
|
|
88
|
-
requestId,
|
|
89
|
-
err,
|
|
90
|
-
});
|
|
91
|
-
throw error;
|
|
89
|
+
timeout: {
|
|
90
|
+
totalMs: 5 * 60 * 1000,
|
|
92
91
|
},
|
|
93
92
|
onAbort: () => {
|
|
94
93
|
throw new DOMException("Upstream failed", "AbortError");
|
|
95
94
|
},
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
onError: () => { },
|
|
96
|
+
onFinish: (res) => {
|
|
97
|
+
addSpanEvent("hebo.ai-sdk.completed");
|
|
98
|
+
const streamResult = toChatCompletions(res, ctx.resolvedModelId);
|
|
99
|
+
addSpanEvent("hebo.result.transformed");
|
|
100
|
+
const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
|
|
101
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
102
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
103
|
+
recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
|
|
101
104
|
},
|
|
102
105
|
experimental_include: {
|
|
103
106
|
requestBody: false,
|
|
@@ -105,9 +108,7 @@ export const chatCompletions = (config) => {
|
|
|
105
108
|
includeRawChunks: false,
|
|
106
109
|
...textOptions,
|
|
107
110
|
});
|
|
108
|
-
addSpanEvent("hebo.ai-sdk.completed");
|
|
109
111
|
ctx.result = toChatCompletionsStream(result, ctx.resolvedModelId);
|
|
110
|
-
addSpanEvent("hebo.result.transformed");
|
|
111
112
|
if (hooks?.after) {
|
|
112
113
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
113
114
|
addSpanEvent("hebo.hooks.after.completed");
|
|
@@ -120,21 +121,26 @@ export const chatCompletions = (config) => {
|
|
|
120
121
|
headers: prepareForwardHeaders(ctx.request),
|
|
121
122
|
// FUTURE: currently can't tell whether upstream or downstream abort
|
|
122
123
|
abortSignal: ctx.request.signal,
|
|
124
|
+
timeout: 5 * 60 * 1000,
|
|
123
125
|
experimental_include: {
|
|
124
126
|
requestBody: false,
|
|
125
127
|
responseBody: false,
|
|
126
128
|
},
|
|
127
|
-
timeout: 5 * 60 * 1000,
|
|
128
129
|
...textOptions,
|
|
129
130
|
});
|
|
130
131
|
logger.trace({ requestId, result }, "[chat] AI SDK result");
|
|
131
132
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
133
|
+
// Transform result.
|
|
132
134
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
133
135
|
addSpanEvent("hebo.result.transformed");
|
|
136
|
+
const genAiResponseAttrs = getChatResponseAttributes(ctx.result, genAiSignalLevel);
|
|
137
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
138
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
134
139
|
if (hooks?.after) {
|
|
135
140
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
136
141
|
addSpanEvent("hebo.hooks.after.completed");
|
|
137
142
|
}
|
|
143
|
+
recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
|
|
138
144
|
return ctx.result;
|
|
139
145
|
};
|
|
140
146
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
+
import type { ChatCompletions, ChatCompletionsBody } from "./schema";
|
|
3
|
+
import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
|
|
4
|
+
export declare const getChatGeneralAttributes: (ctx: GatewayContext, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
5
|
+
export declare const getChatRequestAttributes: (inputs: ChatCompletionsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
6
|
+
export declare const getChatResponseAttributes: (completions: ChatCompletions, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import {} from "../../types";
|
|
2
|
+
const toTextPart = (content) => ({ type: "text", content });
|
|
3
|
+
const toMessageParts = (message) => {
|
|
4
|
+
if (message.role === "assistant") {
|
|
5
|
+
const parts = [];
|
|
6
|
+
if (typeof message.content === "string")
|
|
7
|
+
parts.push(toTextPart(message.content));
|
|
8
|
+
if (Array.isArray(message.tool_calls)) {
|
|
9
|
+
for (const call of message.tool_calls) {
|
|
10
|
+
parts.push({
|
|
11
|
+
type: "tool_call",
|
|
12
|
+
id: call.id,
|
|
13
|
+
name: call.function.name,
|
|
14
|
+
arguments: call.function.arguments,
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return parts;
|
|
19
|
+
}
|
|
20
|
+
if (message.role === "tool") {
|
|
21
|
+
return [{ type: "tool_call_response", id: message.tool_call_id, content: message.content }];
|
|
22
|
+
}
|
|
23
|
+
if (message.role === "user") {
|
|
24
|
+
const parts = [];
|
|
25
|
+
if (typeof message.content === "string")
|
|
26
|
+
parts.push(toTextPart(message.content));
|
|
27
|
+
if (Array.isArray(message.content)) {
|
|
28
|
+
for (const part of message.content) {
|
|
29
|
+
if (part.type === "text") {
|
|
30
|
+
parts.push(toTextPart(part.text));
|
|
31
|
+
}
|
|
32
|
+
else if (part.type === "image_url") {
|
|
33
|
+
parts.push({ type: "image", content: part.image_url.url });
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
parts.push({
|
|
37
|
+
type: "file",
|
|
38
|
+
// FUTURE: optionally expose safe metadata without raw binary payloads.
|
|
39
|
+
content: part.file.filename ?? "[REDACTED_BINARY_DATA]",
|
|
40
|
+
media_type: part.file.media_type,
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return parts;
|
|
46
|
+
}
|
|
47
|
+
return [];
|
|
48
|
+
};
|
|
49
|
+
export const getChatGeneralAttributes = (ctx, signalLevel) => {
|
|
50
|
+
if (!signalLevel || signalLevel === "off")
|
|
51
|
+
return {};
|
|
52
|
+
const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
|
|
53
|
+
return {
|
|
54
|
+
"gen_ai.operation.name": ctx.operation,
|
|
55
|
+
"gen_ai.request.model": requestModel,
|
|
56
|
+
"gen_ai.response.model": ctx.resolvedModelId,
|
|
57
|
+
"gen_ai.provider.name": ctx.resolvedProviderId,
|
|
58
|
+
};
|
|
59
|
+
};
|
|
60
|
+
export const getChatRequestAttributes = (inputs, signalLevel) => {
|
|
61
|
+
if (!signalLevel || signalLevel === "off")
|
|
62
|
+
return {};
|
|
63
|
+
const attrs = {};
|
|
64
|
+
if (inputs.seed !== undefined) {
|
|
65
|
+
Object.assign(attrs, { "gen_ai.request.seed": inputs.seed });
|
|
66
|
+
}
|
|
67
|
+
if (signalLevel !== "required") {
|
|
68
|
+
Object.assign(attrs, {
|
|
69
|
+
"gen_ai.request.stream": inputs.stream,
|
|
70
|
+
"gen_ai.request.frequency_penalty": inputs.frequency_penalty,
|
|
71
|
+
"gen_ai.request.max_tokens": inputs.max_completion_tokens,
|
|
72
|
+
"gen_ai.request.presence_penalty": inputs.presence_penalty,
|
|
73
|
+
"gen_ai.request.stop_sequences": inputs.stop
|
|
74
|
+
? Array.isArray(inputs.stop)
|
|
75
|
+
? inputs.stop
|
|
76
|
+
: [inputs.stop]
|
|
77
|
+
: undefined,
|
|
78
|
+
"gen_ai.request.temperature": inputs.temperature,
|
|
79
|
+
"gen_ai.request.top_p": inputs.top_p,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
if (signalLevel === "full") {
|
|
83
|
+
Object.assign(attrs, {
|
|
84
|
+
"gen_ai.system_instructions": inputs.messages
|
|
85
|
+
.filter((m) => m.role === "system")
|
|
86
|
+
.map((m) => JSON.stringify({ parts: [toTextPart(m.content)] })),
|
|
87
|
+
"gen_ai.input.messages": inputs.messages
|
|
88
|
+
.filter((m) => m.role !== "system")
|
|
89
|
+
.map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
|
|
90
|
+
"gen_ai.tool.definitions": JSON.stringify(inputs.tools),
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
return attrs;
|
|
94
|
+
};
|
|
95
|
+
export const getChatResponseAttributes = (completions, signalLevel) => {
|
|
96
|
+
if (!signalLevel || signalLevel === "off")
|
|
97
|
+
return {};
|
|
98
|
+
const attrs = {
|
|
99
|
+
"gen_ai.response.id": completions.id,
|
|
100
|
+
};
|
|
101
|
+
if (signalLevel !== "required") {
|
|
102
|
+
Object.assign(attrs, {
|
|
103
|
+
"gen_ai.response.finish_reasons": completions.choices?.map((c) => c.finish_reason),
|
|
104
|
+
"gen_ai.usage.total_tokens": completions.usage?.total_tokens,
|
|
105
|
+
"gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
|
|
106
|
+
"gen_ai.usage.cached_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
|
|
107
|
+
"gen_ai.usage.output_tokens": completions.usage?.completion_tokens,
|
|
108
|
+
"gen_ai.usage.reasoning_tokens": completions.usage?.completion_tokens_details?.reasoning_tokens,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
if (signalLevel === "full") {
|
|
112
|
+
Object.assign(attrs, {
|
|
113
|
+
"gen_ai.output.messages": completions.choices?.map((c) => JSON.stringify({
|
|
114
|
+
role: c.message.role,
|
|
115
|
+
parts: toMessageParts(c.message),
|
|
116
|
+
finish_reason: c.finish_reason,
|
|
117
|
+
})),
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
return attrs;
|
|
121
|
+
};
|
|
@@ -5,14 +5,18 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
-
import {
|
|
8
|
+
import { recordRequestDuration, recordTokenUsage } from "../../telemetry/gen-ai";
|
|
9
|
+
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
9
10
|
import { resolveRequestId } from "../../utils/headers";
|
|
10
11
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
11
12
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
13
|
+
import { getEmbeddingsGeneralAttributes, getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes, } from "./otel";
|
|
12
14
|
import { EmbeddingsBodySchema } from "./schema";
|
|
13
15
|
export const embeddings = (config) => {
|
|
14
16
|
const hooks = config.hooks;
|
|
15
17
|
const handler = async (ctx) => {
|
|
18
|
+
const start = performance.now();
|
|
19
|
+
ctx.operation = "embeddings";
|
|
16
20
|
addSpanEvent("hebo.handler.started");
|
|
17
21
|
// Guard: enforce HTTP method early.
|
|
18
22
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
@@ -20,21 +24,20 @@ export const embeddings = (config) => {
|
|
|
20
24
|
}
|
|
21
25
|
const requestId = resolveRequestId(ctx.request);
|
|
22
26
|
// Parse + validate input.
|
|
23
|
-
let body;
|
|
24
27
|
try {
|
|
25
|
-
body = await ctx.request.json();
|
|
28
|
+
ctx.body = await ctx.request.json();
|
|
26
29
|
}
|
|
27
30
|
catch {
|
|
28
31
|
throw new GatewayError("Invalid JSON", 400);
|
|
29
32
|
}
|
|
30
33
|
addSpanEvent("hebo.request.deserialized");
|
|
31
|
-
const parsed = EmbeddingsBodySchema.safeParse(body);
|
|
34
|
+
const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
|
|
32
35
|
if (!parsed.success) {
|
|
36
|
+
// FUTURE: add body shape to error message
|
|
33
37
|
throw new GatewayError(z.prettifyError(parsed.error), 400);
|
|
34
38
|
}
|
|
35
39
|
ctx.body = parsed.data;
|
|
36
40
|
addSpanEvent("hebo.request.parsed");
|
|
37
|
-
ctx.operation = "embeddings";
|
|
38
41
|
if (hooks?.before) {
|
|
39
42
|
ctx.body = (await hooks.before(ctx)) ?? ctx.body;
|
|
40
43
|
addSpanEvent("hebo.hooks.before.completed");
|
|
@@ -45,10 +48,7 @@ export const embeddings = (config) => {
|
|
|
45
48
|
ctx.resolvedModelId =
|
|
46
49
|
(await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
|
|
47
50
|
logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
48
|
-
addSpanEvent("hebo.model.resolved"
|
|
49
|
-
"gen_ai.request.model": ctx.modelId ?? "",
|
|
50
|
-
"gen_ai.response.model": ctx.resolvedModelId ?? "",
|
|
51
|
-
});
|
|
51
|
+
addSpanEvent("hebo.model.resolved");
|
|
52
52
|
const override = await hooks?.resolveProvider?.(ctx);
|
|
53
53
|
ctx.provider =
|
|
54
54
|
override ??
|
|
@@ -61,13 +61,15 @@ export const embeddings = (config) => {
|
|
|
61
61
|
const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
|
|
62
62
|
ctx.resolvedProviderId = embeddingModel.provider;
|
|
63
63
|
logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
|
|
64
|
-
addSpanEvent("hebo.provider.resolved"
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
addSpanEvent("hebo.provider.resolved");
|
|
65
|
+
const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
|
|
66
|
+
const genAiGeneralAttrs = getEmbeddingsGeneralAttributes(ctx, genAiSignalLevel);
|
|
67
|
+
setSpanAttributes(genAiGeneralAttrs);
|
|
67
68
|
// Convert inputs to AI SDK call options.
|
|
68
69
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
69
70
|
logger.trace({ requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
70
71
|
addSpanEvent("hebo.options.prepared");
|
|
72
|
+
setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
|
|
71
73
|
// Build middleware chain (model -> forward params -> provider).
|
|
72
74
|
const embeddingModelWithMiddleware = wrapEmbeddingModel({
|
|
73
75
|
model: embeddingModel,
|
|
@@ -83,12 +85,17 @@ export const embeddings = (config) => {
|
|
|
83
85
|
});
|
|
84
86
|
logger.trace({ requestId, result }, "[embeddings] AI SDK result");
|
|
85
87
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
88
|
+
// Transform result.
|
|
86
89
|
ctx.result = toEmbeddings(result, ctx.modelId);
|
|
87
90
|
addSpanEvent("hebo.result.transformed");
|
|
91
|
+
const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
|
|
92
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
93
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
88
94
|
if (hooks?.after) {
|
|
89
95
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
90
96
|
addSpanEvent("hebo.hooks.after.completed");
|
|
91
97
|
}
|
|
98
|
+
recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
|
|
92
99
|
return ctx.result;
|
|
93
100
|
};
|
|
94
101
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
+
import type { Embeddings, EmbeddingsInputs } from "./schema";
|
|
3
|
+
import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
|
|
4
|
+
export declare const getEmbeddingsGeneralAttributes: (ctx: GatewayContext, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
5
|
+
export declare const getEmbeddingsRequestAttributes: (inputs: EmbeddingsInputs, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
6
|
+
export declare const getEmbeddingsResponseAttributes: (embeddings: Embeddings, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import {} from "../../types";
|
|
2
|
+
export const getEmbeddingsGeneralAttributes = (ctx, signalLevel) => {
|
|
3
|
+
if (!signalLevel || signalLevel === "off")
|
|
4
|
+
return {};
|
|
5
|
+
const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
|
|
6
|
+
return {
|
|
7
|
+
"gen_ai.operation.name": ctx.operation,
|
|
8
|
+
"gen_ai.request.model": requestModel,
|
|
9
|
+
"gen_ai.response.model": ctx.resolvedModelId,
|
|
10
|
+
"gen_ai.provider.name": ctx.resolvedProviderId,
|
|
11
|
+
};
|
|
12
|
+
};
|
|
13
|
+
export const getEmbeddingsRequestAttributes = (inputs, signalLevel) => {
|
|
14
|
+
if (!signalLevel || signalLevel === "off")
|
|
15
|
+
return {};
|
|
16
|
+
const attrs = {};
|
|
17
|
+
if (signalLevel !== "required") {
|
|
18
|
+
Object.assign(attrs, {
|
|
19
|
+
"gen_ai.embeddings.dimension.count": inputs.dimensions,
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
return attrs;
|
|
23
|
+
};
|
|
24
|
+
export const getEmbeddingsResponseAttributes = (embeddings, signalLevel) => {
|
|
25
|
+
if (!signalLevel || signalLevel === "off")
|
|
26
|
+
return {};
|
|
27
|
+
const attrs = {};
|
|
28
|
+
if (signalLevel !== "required") {
|
|
29
|
+
Object.assign(attrs, {
|
|
30
|
+
"gen_ai.usage.input_tokens": embeddings.usage?.prompt_tokens,
|
|
31
|
+
"gen_ai.usage.total_tokens": embeddings.usage?.total_tokens,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
return attrs;
|
|
35
|
+
};
|
|
@@ -4,12 +4,11 @@ import { toModels, toModel } from "./converters";
|
|
|
4
4
|
export const models = (config) => {
|
|
5
5
|
// eslint-disable-next-line require-await
|
|
6
6
|
const handler = async (ctx) => {
|
|
7
|
-
|
|
8
|
-
if (!request || request.method !== "GET") {
|
|
7
|
+
ctx.operation = "models";
|
|
8
|
+
if (!ctx.request || ctx.request.method !== "GET") {
|
|
9
9
|
throw new GatewayError("Method Not Allowed", 405);
|
|
10
10
|
}
|
|
11
|
-
|
|
12
|
-
const rawId = request.url.split("/models/", 2)[1]?.split("?", 1)[0];
|
|
11
|
+
const rawId = ctx.request.url.split("/models/", 2)[1]?.split("?", 1)[0];
|
|
13
12
|
if (!rawId) {
|
|
14
13
|
return toModels(ctx.models);
|
|
15
14
|
}
|
package/dist/errors/gateway.js
CHANGED
|
@@ -7,7 +7,6 @@ export class GatewayError extends Error {
|
|
|
7
7
|
super(msg);
|
|
8
8
|
this.status = status;
|
|
9
9
|
this.code = code ?? STATUS_CODE(status);
|
|
10
|
-
this.cause =
|
|
11
|
-
cause ?? (typeof error === "string" ? undefined : error.cause);
|
|
10
|
+
this.cause = cause ?? (typeof error === "string" ? undefined : error);
|
|
12
11
|
}
|
|
13
12
|
}
|
package/dist/errors/openai.js
CHANGED
|
@@ -17,23 +17,21 @@ export class OpenAIError {
|
|
|
17
17
|
this.error = { message, type, code: code?.toLowerCase(), param };
|
|
18
18
|
}
|
|
19
19
|
}
|
|
20
|
+
const mapType = (status) => (status < 500 ? "invalid_request_error" : "server_error");
|
|
21
|
+
const maybeMaskMessage = (meta, requestId) => {
|
|
22
|
+
if (!(isProduction() && (meta.status >= 500 || meta.code.includes("UPSTREAM")))) {
|
|
23
|
+
return meta.message;
|
|
24
|
+
}
|
|
25
|
+
// FUTURE: always attach requestId to errors (masked and unmasked)
|
|
26
|
+
return `${STATUS_CODE(meta.status)} (${requestId ?? "see requestId in response headers"})`;
|
|
27
|
+
};
|
|
20
28
|
export function toOpenAIError(error) {
|
|
21
29
|
const meta = getErrorMeta(error);
|
|
22
|
-
return new OpenAIError(meta
|
|
30
|
+
return new OpenAIError(maybeMaskMessage(meta), mapType(meta.status), meta.code);
|
|
23
31
|
}
|
|
24
32
|
export function toOpenAIErrorResponse(error, responseInit) {
|
|
25
33
|
const meta = getErrorMeta(error);
|
|
26
|
-
|
|
27
|
-
let message;
|
|
28
|
-
if (shouldMask) {
|
|
29
|
-
const requestId = resolveRequestId(responseInit);
|
|
30
|
-
// FUTURE: always attach requestId to errors (masked and unmasked)
|
|
31
|
-
message = `${STATUS_CODE(meta.status)} (${requestId})`;
|
|
32
|
-
}
|
|
33
|
-
else {
|
|
34
|
-
message = meta.message;
|
|
35
|
-
}
|
|
36
|
-
return toResponse(new OpenAIError(message, meta.type, meta.code), {
|
|
34
|
+
return toResponse(new OpenAIError(maybeMaskMessage(meta, resolveRequestId(responseInit)), mapType(meta.status), meta.code), {
|
|
37
35
|
...responseInit,
|
|
38
36
|
status: meta.status,
|
|
39
37
|
statusText: meta.code,
|
package/dist/errors/utils.d.ts
CHANGED
|
@@ -15,9 +15,7 @@ export declare const STATUS_CODES: {
|
|
|
15
15
|
};
|
|
16
16
|
export declare const STATUS_CODE: (status: number) => "BAD_REQUEST" | "UNAUTHORIZED" | "PAYMENT_REQUIRED" | "FORBIDDEN" | "NOT_FOUND" | "METHOD_NOT_ALLOWED" | "CONFLICT" | "UNPROCESSABLE_ENTITY" | "TOO_MANY_REQUESTS" | "INTERNAL_SERVER_ERROR" | "BAD_GATEWAY" | "SERVICE_UNAVAILABLE" | "GATEWAY_TIMEOUT";
|
|
17
17
|
export declare function getErrorMeta(error: unknown): {
|
|
18
|
-
code: string;
|
|
19
18
|
status: number;
|
|
20
|
-
|
|
21
|
-
type: string;
|
|
19
|
+
code: string;
|
|
22
20
|
message: string;
|
|
23
21
|
};
|
package/dist/errors/utils.js
CHANGED
|
@@ -21,24 +21,23 @@ export const STATUS_CODE = (status) => {
|
|
|
21
21
|
return label;
|
|
22
22
|
return status >= 400 && status < 500 ? STATUS_CODES[400] : STATUS_CODES[500];
|
|
23
23
|
};
|
|
24
|
+
// FUTURE: always return a wrapped GatewayError?
|
|
24
25
|
export function getErrorMeta(error) {
|
|
25
26
|
const message = error instanceof Error ? error.message : String(error);
|
|
26
|
-
let code;
|
|
27
27
|
let status;
|
|
28
|
-
let
|
|
28
|
+
let code;
|
|
29
29
|
if (error instanceof GatewayError) {
|
|
30
|
-
({
|
|
30
|
+
({ status, code } = error);
|
|
31
31
|
}
|
|
32
32
|
else {
|
|
33
33
|
const normalized = normalizeAiSdkError(error);
|
|
34
34
|
if (normalized) {
|
|
35
|
-
({
|
|
35
|
+
({ status, code } = normalized);
|
|
36
36
|
}
|
|
37
37
|
else {
|
|
38
38
|
status = 500;
|
|
39
39
|
code = STATUS_CODE(status);
|
|
40
40
|
}
|
|
41
41
|
}
|
|
42
|
-
|
|
43
|
-
return { code, status, param, type, message };
|
|
42
|
+
return { status, code, message };
|
|
44
43
|
}
|
package/dist/gateway.js
CHANGED
|
@@ -22,7 +22,7 @@ export function gateway(config) {
|
|
|
22
22
|
if (basePath && pathname.startsWith(basePath)) {
|
|
23
23
|
pathname = pathname.slice(basePath.length);
|
|
24
24
|
}
|
|
25
|
-
logger.
|
|
25
|
+
logger.info(`[gateway] ${req.method} ${pathname}`);
|
|
26
26
|
for (const [route, endpoint] of routeEntries) {
|
|
27
27
|
if (pathname === route || pathname.startsWith(route + "/")) {
|
|
28
28
|
return endpoint.handler(req, state);
|