@hebo-ai/gateway 0.4.0-beta.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -5
- package/dist/config.js +21 -7
- package/dist/endpoints/chat-completions/converters.d.ts +3 -3
- package/dist/endpoints/chat-completions/converters.js +16 -8
- package/dist/endpoints/chat-completions/handler.js +34 -27
- package/dist/endpoints/chat-completions/otel.d.ts +6 -0
- package/dist/endpoints/chat-completions/otel.js +127 -0
- package/dist/endpoints/embeddings/handler.js +19 -10
- package/dist/endpoints/embeddings/otel.d.ts +6 -0
- package/dist/endpoints/embeddings/otel.js +35 -0
- package/dist/endpoints/models/handler.js +3 -4
- package/dist/errors/gateway.d.ts +1 -1
- package/dist/errors/gateway.js +3 -4
- package/dist/errors/openai.js +11 -12
- package/dist/errors/utils.d.ts +3 -4
- package/dist/errors/utils.js +6 -6
- package/dist/gateway.js +1 -1
- package/dist/lifecycle.js +71 -29
- package/dist/middleware/matcher.js +1 -1
- package/dist/models/amazon/presets.d.ts +37 -37
- package/dist/models/amazon/presets.js +1 -1
- package/dist/models/anthropic/presets.d.ts +56 -56
- package/dist/models/cohere/presets.d.ts +54 -54
- package/dist/models/cohere/presets.js +2 -2
- package/dist/models/google/presets.d.ts +31 -31
- package/dist/models/google/presets.js +1 -1
- package/dist/models/meta/presets.d.ts +42 -42
- package/dist/models/openai/presets.d.ts +96 -96
- package/dist/models/openai/presets.js +1 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/voyage/presets.d.ts +92 -92
- package/dist/models/voyage/presets.js +1 -1
- package/dist/providers/registry.js +2 -2
- package/dist/telemetry/baggage.d.ts +1 -0
- package/dist/telemetry/baggage.js +24 -0
- package/dist/telemetry/fetch.d.ts +2 -1
- package/dist/telemetry/fetch.js +13 -3
- package/dist/telemetry/gen-ai.d.ts +5 -0
- package/dist/telemetry/gen-ai.js +60 -0
- package/dist/telemetry/http.d.ts +3 -0
- package/dist/telemetry/http.js +57 -0
- package/dist/telemetry/memory.d.ts +2 -0
- package/dist/telemetry/memory.js +27 -0
- package/dist/telemetry/span.d.ts +6 -3
- package/dist/telemetry/span.js +24 -36
- package/dist/telemetry/stream.d.ts +3 -7
- package/dist/telemetry/stream.js +26 -29
- package/dist/types.d.ts +16 -15
- package/dist/utils/headers.d.ts +1 -1
- package/dist/utils/headers.js +7 -9
- package/dist/utils/request.d.ts +0 -4
- package/dist/utils/request.js +0 -9
- package/dist/utils/response.js +1 -1
- package/package.json +5 -2
- package/src/config.ts +28 -7
- package/src/endpoints/chat-completions/converters.ts +18 -11
- package/src/endpoints/chat-completions/handler.ts +46 -28
- package/src/endpoints/chat-completions/otel.ts +161 -0
- package/src/endpoints/embeddings/handler.test.ts +2 -2
- package/src/endpoints/embeddings/handler.ts +28 -10
- package/src/endpoints/embeddings/otel.ts +56 -0
- package/src/endpoints/models/handler.ts +3 -5
- package/src/errors/gateway.ts +5 -5
- package/src/errors/openai.ts +25 -17
- package/src/errors/utils.ts +6 -7
- package/src/gateway.ts +1 -1
- package/src/lifecycle.ts +85 -32
- package/src/middleware/matcher.ts +1 -1
- package/src/models/amazon/presets.ts +1 -1
- package/src/models/cohere/presets.ts +2 -2
- package/src/models/google/presets.ts +1 -1
- package/src/models/openai/presets.ts +1 -1
- package/src/models/types.ts +1 -1
- package/src/models/voyage/presets.ts +1 -1
- package/src/providers/registry.ts +2 -2
- package/src/telemetry/baggage.ts +27 -0
- package/src/telemetry/fetch.ts +15 -3
- package/src/telemetry/gen-ai.ts +88 -0
- package/src/telemetry/http.ts +65 -0
- package/src/telemetry/memory.ts +36 -0
- package/src/telemetry/span.ts +28 -40
- package/src/telemetry/stream.ts +36 -40
- package/src/types.ts +18 -18
- package/src/utils/headers.ts +8 -19
- package/src/utils/request.ts +0 -11
- package/src/utils/response.ts +1 -1
- package/dist/telemetry/otel.d.ts +0 -2
- package/dist/telemetry/otel.js +0 -50
- package/dist/telemetry/utils.d.ts +0 -4
- package/dist/telemetry/utils.js +0 -223
- package/src/telemetry/otel.ts +0 -91
- package/src/telemetry/utils.ts +0 -273
package/README.md
CHANGED
|
@@ -19,6 +19,7 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
|
|
|
19
19
|
- 🗂️ Model catalog with extensible metadata capabilities.
|
|
20
20
|
- 🪝 Hook system to customize routing, auth, rate limits, and shape responses.
|
|
21
21
|
- 🧰 Low-level OpenAI-compatible schema, converters, and middleware helpers.
|
|
22
|
+
- 👁️ OpenTelemetry support for GenAI semantic conventions (Langfuse-compatible).
|
|
22
23
|
|
|
23
24
|
## 📦 Installation
|
|
24
25
|
|
|
@@ -615,12 +616,20 @@ const gw = gateway({
|
|
|
615
616
|
telemetry: {
|
|
616
617
|
// default: false
|
|
617
618
|
enabled: true,
|
|
618
|
-
// default:
|
|
619
|
+
// default: TraceProvider from @opentelemetry/api singleton
|
|
619
620
|
tracer: trace.getTracer("my-gateway"),
|
|
620
|
-
//
|
|
621
|
-
// "
|
|
622
|
-
|
|
623
|
-
|
|
621
|
+
// Telemetry levels by namespace:
|
|
622
|
+
// "off" | "required" | "recommended" | "full"
|
|
623
|
+
signals: {
|
|
624
|
+
// gen_ai.* semantic attributes
|
|
625
|
+
gen_ai: "full",
|
|
626
|
+
// http.*, url.*, server.* semantic attributes
|
|
627
|
+
http: "recommended",
|
|
628
|
+
// hebo-specific telemetry:
|
|
629
|
+
// - recommended: hebo.* span events
|
|
630
|
+
// - full: hebo.* span events + fetch instrumentation
|
|
631
|
+
hebo: "recommended",
|
|
632
|
+
},
|
|
624
633
|
},
|
|
625
634
|
});
|
|
626
635
|
```
|
|
@@ -633,6 +642,32 @@ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/
|
|
|
633
642
|
|
|
634
643
|
For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
|
|
635
644
|
|
|
645
|
+
#### Langfuse
|
|
646
|
+
|
|
647
|
+
Hebo telemetry spans are OpenTelemetry-compatible, so you can send them to Langfuse via `@langfuse/otel`.
|
|
648
|
+
|
|
649
|
+
```ts
|
|
650
|
+
import { gateway } from "@hebo-ai/gateway";
|
|
651
|
+
import { LangfuseSpanProcessor } from "@langfuse/otel";
|
|
652
|
+
import { context } from "@opentelemetry/api";
|
|
653
|
+
import { AsyncLocalStorageContextManager } from "@opentelemetry/context-async-hooks";
|
|
654
|
+
import { BasicTracerProvider } from "@opentelemetry/sdk-trace-base";
|
|
655
|
+
|
|
656
|
+
context.setGlobalContextManager(new AsyncLocalStorageContextManager().enable());
|
|
657
|
+
|
|
658
|
+
const gw = gateway({
|
|
659
|
+
// ...
|
|
660
|
+
telemetry: {
|
|
661
|
+
enabled: true,
|
|
662
|
+
tracer = new BasicTracerProvider({
|
|
663
|
+
spanProcessors: [new LangfuseSpanProcessor()],
|
|
664
|
+
}).getTracer("hebo");,
|
|
665
|
+
},
|
|
666
|
+
});
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
Langfuse credentials are read from environment variables by the Langfuse OTel SDK (`LANGFUSE_PUBLIC_KEY`, `LANGFUSE_SECRET_KEY`, `LANGFUSE_BASE_URL`).
|
|
670
|
+
|
|
636
671
|
### Passing Framework State to Hooks
|
|
637
672
|
|
|
638
673
|
You can pass per-request info from your framework into the gateway via the second `state` argument on the handler, then read it in hooks through `ctx.state`.
|
package/dist/config.js
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
import { isLogger, logger, setLoggerInstance } from "./logger";
|
|
2
2
|
import { createDefaultLogger } from "./logger/default";
|
|
3
|
-
import { kParsed } from "./types";
|
|
3
|
+
import { kParsed, } from "./types";
|
|
4
4
|
export const parseConfig = (config) => {
|
|
5
|
-
// If it has been parsed before, just return
|
|
5
|
+
// If it has been parsed before, just return.
|
|
6
6
|
if (kParsed in config)
|
|
7
7
|
return config;
|
|
8
8
|
const providers = config.providers ?? {};
|
|
9
9
|
const parsedProviders = {};
|
|
10
10
|
const models = config.models ?? {};
|
|
11
|
-
// Set the global logger instance
|
|
11
|
+
// Set the global logger instance.
|
|
12
12
|
if (config.logger === undefined) {
|
|
13
13
|
setLoggerInstance(createDefaultLogger({}));
|
|
14
14
|
}
|
|
@@ -18,7 +18,7 @@ export const parseConfig = (config) => {
|
|
|
18
18
|
? `[logger] custom logger configured`
|
|
19
19
|
: `[logger] logger configured: level=${config.logger.level}`);
|
|
20
20
|
}
|
|
21
|
-
// Strip providers that are not configured
|
|
21
|
+
// Strip providers that are not configured.
|
|
22
22
|
for (const id in providers) {
|
|
23
23
|
const provider = providers[id];
|
|
24
24
|
if (provider === undefined) {
|
|
@@ -30,7 +30,7 @@ export const parseConfig = (config) => {
|
|
|
30
30
|
if (Object.keys(parsedProviders).length === 0) {
|
|
31
31
|
throw new Error("No providers configured (config.providers is empty)");
|
|
32
32
|
}
|
|
33
|
-
// Strip providers that are not configured from models
|
|
33
|
+
// Strip providers that are not configured from models.
|
|
34
34
|
const parsedModels = {};
|
|
35
35
|
const warnings = new Set();
|
|
36
36
|
for (const id in models) {
|
|
@@ -51,12 +51,26 @@ export const parseConfig = (config) => {
|
|
|
51
51
|
if (Object.keys(parsedModels).length === 0) {
|
|
52
52
|
throw new Error("No models configured (config.models is empty)");
|
|
53
53
|
}
|
|
54
|
+
// Default for the telemetry settings.
|
|
55
|
+
const telemetryEnabled = config.telemetry?.enabled ?? false;
|
|
56
|
+
const telemetrySignals = telemetryEnabled
|
|
57
|
+
? {
|
|
58
|
+
http: config.telemetry?.signals?.http ?? "recommended",
|
|
59
|
+
gen_ai: config.telemetry?.signals?.gen_ai ?? "full",
|
|
60
|
+
hebo: config.telemetry?.signals?.hebo ?? "off",
|
|
61
|
+
}
|
|
62
|
+
: {
|
|
63
|
+
http: "off",
|
|
64
|
+
gen_ai: "off",
|
|
65
|
+
hebo: "off",
|
|
66
|
+
};
|
|
67
|
+
// Return parsed config.
|
|
54
68
|
return {
|
|
55
69
|
...config,
|
|
56
|
-
logger: config.logger,
|
|
57
70
|
telemetry: {
|
|
58
71
|
...config.telemetry,
|
|
59
|
-
enabled:
|
|
72
|
+
enabled: telemetryEnabled,
|
|
73
|
+
signals: telemetrySignals,
|
|
60
74
|
},
|
|
61
75
|
providers: parsedProviders,
|
|
62
76
|
models: parsedModels,
|
|
@@ -25,10 +25,10 @@ export declare const convertToToolSet: (tools: ChatCompletionsTool[] | undefined
|
|
|
25
25
|
export declare const convertToToolChoice: (toolChoice: ChatCompletionsToolChoice | undefined) => ToolChoice<ToolSet> | undefined;
|
|
26
26
|
export declare function toChatCompletions(result: GenerateTextResult<ToolSet, Output.Output>, model: string): ChatCompletions;
|
|
27
27
|
export declare function toChatCompletionsResponse(result: GenerateTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
|
|
28
|
-
export declare function toChatCompletionsStream(result: StreamTextResult<ToolSet, Output.Output>, model: string): ReadableStream<ChatCompletionsChunk | OpenAIError>;
|
|
28
|
+
export declare function toChatCompletionsStream<E extends boolean = false>(result: StreamTextResult<ToolSet, Output.Output>, model: string, wrapErrors?: E): ReadableStream<ChatCompletionsChunk | (E extends true ? OpenAIError : Error)>;
|
|
29
29
|
export declare function toChatCompletionsStreamResponse(result: StreamTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
|
|
30
|
-
export declare class ChatCompletionsStream extends TransformStream<TextStreamPart<ToolSet>, ChatCompletionsChunk | OpenAIError> {
|
|
31
|
-
constructor(model: string);
|
|
30
|
+
export declare class ChatCompletionsStream<E extends boolean = false> extends TransformStream<TextStreamPart<ToolSet>, ChatCompletionsChunk | (E extends true ? OpenAIError : Error)> {
|
|
31
|
+
constructor(model: string, wrapErrors?: E);
|
|
32
32
|
}
|
|
33
33
|
export declare const toChatCompletionsAssistantMessage: (result: GenerateTextResult<ToolSet, Output.Output>) => ChatCompletionsAssistantMessage;
|
|
34
34
|
export declare function toReasoningDetail(reasoning: ReasoningOutput, id: string, index: number): ChatCompletionsReasoningDetail;
|
|
@@ -94,6 +94,7 @@ export function fromChatCompletionsAssistantMessage(message) {
|
|
|
94
94
|
}
|
|
95
95
|
if (tool_calls?.length) {
|
|
96
96
|
for (const tc of tool_calls) {
|
|
97
|
+
// eslint-disable-next-line no-shadow
|
|
97
98
|
const { id, function: fn, extra_content } = tc;
|
|
98
99
|
const out = {
|
|
99
100
|
type: "tool-call",
|
|
@@ -274,14 +275,14 @@ export function toChatCompletions(result, model) {
|
|
|
274
275
|
export function toChatCompletionsResponse(result, model, responseInit) {
|
|
275
276
|
return toResponse(toChatCompletions(result, model), responseInit);
|
|
276
277
|
}
|
|
277
|
-
export function toChatCompletionsStream(result, model) {
|
|
278
|
-
return result.fullStream.pipeThrough(new ChatCompletionsStream(model));
|
|
278
|
+
export function toChatCompletionsStream(result, model, wrapErrors) {
|
|
279
|
+
return result.fullStream.pipeThrough(new ChatCompletionsStream(model, wrapErrors));
|
|
279
280
|
}
|
|
280
281
|
export function toChatCompletionsStreamResponse(result, model, responseInit) {
|
|
281
|
-
return toResponse(toChatCompletionsStream(result, model), responseInit);
|
|
282
|
+
return toResponse(toChatCompletionsStream(result, model, true), responseInit);
|
|
282
283
|
}
|
|
283
284
|
export class ChatCompletionsStream extends TransformStream {
|
|
284
|
-
constructor(model) {
|
|
285
|
+
constructor(model, wrapErrors) {
|
|
285
286
|
const streamId = `chatcmpl-${crypto.randomUUID()}`;
|
|
286
287
|
const creationTime = Math.floor(Date.now() / 1000);
|
|
287
288
|
let toolCallIndexCounter = 0;
|
|
@@ -347,10 +348,17 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
347
348
|
break;
|
|
348
349
|
}
|
|
349
350
|
case "error": {
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
351
|
+
let err;
|
|
352
|
+
if (wrapErrors) {
|
|
353
|
+
err = toOpenAIError(part.error);
|
|
354
|
+
}
|
|
355
|
+
else if (part.error instanceof Error) {
|
|
356
|
+
err = part.error;
|
|
357
|
+
}
|
|
358
|
+
else {
|
|
359
|
+
err = new Error(String(part.error));
|
|
360
|
+
}
|
|
361
|
+
controller.enqueue(err);
|
|
354
362
|
}
|
|
355
363
|
}
|
|
356
364
|
},
|
|
@@ -5,14 +5,18 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
-
import {
|
|
8
|
+
import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
|
+
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
9
10
|
import { resolveRequestId } from "../../utils/headers";
|
|
10
11
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
11
12
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
13
|
+
import { getChatGeneralAttributes, getChatRequestAttributes, getChatResponseAttributes, } from "./otel";
|
|
12
14
|
import { ChatCompletionsBodySchema } from "./schema";
|
|
13
15
|
export const chatCompletions = (config) => {
|
|
14
16
|
const hooks = config.hooks;
|
|
15
17
|
const handler = async (ctx) => {
|
|
18
|
+
const start = performance.now();
|
|
19
|
+
ctx.operation = "chat";
|
|
16
20
|
addSpanEvent("hebo.handler.started");
|
|
17
21
|
// Guard: enforce HTTP method early.
|
|
18
22
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
@@ -29,11 +33,11 @@ export const chatCompletions = (config) => {
|
|
|
29
33
|
addSpanEvent("hebo.request.deserialized");
|
|
30
34
|
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
31
35
|
if (!parsed.success) {
|
|
32
|
-
|
|
36
|
+
// FUTURE: consider adding body shape to metadata
|
|
37
|
+
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
33
38
|
}
|
|
34
39
|
ctx.body = parsed.data;
|
|
35
40
|
addSpanEvent("hebo.request.parsed");
|
|
36
|
-
ctx.operation = "chat";
|
|
37
41
|
if (hooks?.before) {
|
|
38
42
|
ctx.body = (await hooks.before(ctx)) ?? ctx.body;
|
|
39
43
|
addSpanEvent("hebo.hooks.before.completed");
|
|
@@ -44,10 +48,7 @@ export const chatCompletions = (config) => {
|
|
|
44
48
|
ctx.resolvedModelId =
|
|
45
49
|
(await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
|
|
46
50
|
logger.debug(`[chat] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
47
|
-
addSpanEvent("hebo.model.resolved"
|
|
48
|
-
"gen_ai.request.model": ctx.modelId ?? "",
|
|
49
|
-
"gen_ai.response.model": ctx.resolvedModelId ?? "",
|
|
50
|
-
});
|
|
51
|
+
addSpanEvent("hebo.model.resolved");
|
|
51
52
|
const override = await hooks?.resolveProvider?.(ctx);
|
|
52
53
|
ctx.provider =
|
|
53
54
|
override ??
|
|
@@ -60,7 +61,10 @@ export const chatCompletions = (config) => {
|
|
|
60
61
|
const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
|
|
61
62
|
ctx.resolvedProviderId = languageModel.provider;
|
|
62
63
|
logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
63
|
-
addSpanEvent("hebo.provider.resolved"
|
|
64
|
+
addSpanEvent("hebo.provider.resolved");
|
|
65
|
+
const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
|
|
66
|
+
const genAiGeneralAttrs = getChatGeneralAttributes(ctx, genAiSignalLevel);
|
|
67
|
+
setSpanAttributes(genAiGeneralAttrs);
|
|
64
68
|
// Convert inputs to AI SDK call options.
|
|
65
69
|
const textOptions = convertToTextCallOptions(inputs);
|
|
66
70
|
logger.trace({
|
|
@@ -68,6 +72,7 @@ export const chatCompletions = (config) => {
|
|
|
68
72
|
options: textOptions,
|
|
69
73
|
}, "[chat] AI SDK options");
|
|
70
74
|
addSpanEvent("hebo.options.prepared");
|
|
75
|
+
setSpanAttributes(getChatRequestAttributes(inputs, genAiSignalLevel));
|
|
71
76
|
// Build middleware chain (model -> forward params -> provider).
|
|
72
77
|
const languageModelWithMiddleware = wrapLanguageModel({
|
|
73
78
|
model: languageModel,
|
|
@@ -79,24 +84,23 @@ export const chatCompletions = (config) => {
|
|
|
79
84
|
const result = streamText({
|
|
80
85
|
model: languageModelWithMiddleware,
|
|
81
86
|
headers: prepareForwardHeaders(ctx.request),
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
86
|
-
logger.error({
|
|
87
|
-
requestId,
|
|
88
|
-
err,
|
|
89
|
-
});
|
|
90
|
-
throw error;
|
|
87
|
+
abortSignal: ctx.request.signal,
|
|
88
|
+
timeout: {
|
|
89
|
+
totalMs: 5 * 60 * 1000,
|
|
91
90
|
},
|
|
92
91
|
onAbort: () => {
|
|
93
|
-
throw new DOMException("
|
|
92
|
+
throw new DOMException("The operation was aborted.", "AbortError");
|
|
94
93
|
},
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
94
|
+
onError: () => { },
|
|
95
|
+
onFinish: (res) => {
|
|
96
|
+
addSpanEvent("hebo.ai-sdk.completed");
|
|
97
|
+
const streamResult = toChatCompletions(res, ctx.resolvedModelId);
|
|
98
|
+
addSpanEvent("hebo.result.transformed");
|
|
99
|
+
const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
|
|
100
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
101
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
102
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
103
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
100
104
|
},
|
|
101
105
|
experimental_include: {
|
|
102
106
|
requestBody: false,
|
|
@@ -104,9 +108,7 @@ export const chatCompletions = (config) => {
|
|
|
104
108
|
includeRawChunks: false,
|
|
105
109
|
...textOptions,
|
|
106
110
|
});
|
|
107
|
-
addSpanEvent("hebo.ai-sdk.completed");
|
|
108
111
|
ctx.result = toChatCompletionsStream(result, ctx.resolvedModelId);
|
|
109
|
-
addSpanEvent("hebo.result.transformed");
|
|
110
112
|
if (hooks?.after) {
|
|
111
113
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
112
114
|
addSpanEvent("hebo.hooks.after.completed");
|
|
@@ -117,23 +119,28 @@ export const chatCompletions = (config) => {
|
|
|
117
119
|
const result = await generateText({
|
|
118
120
|
model: languageModelWithMiddleware,
|
|
119
121
|
headers: prepareForwardHeaders(ctx.request),
|
|
120
|
-
// FUTURE: currently can't tell whether upstream or downstream abort
|
|
121
122
|
abortSignal: ctx.request.signal,
|
|
123
|
+
timeout: 5 * 60 * 1000,
|
|
122
124
|
experimental_include: {
|
|
123
125
|
requestBody: false,
|
|
124
126
|
responseBody: false,
|
|
125
127
|
},
|
|
126
|
-
timeout: 5 * 60 * 1000,
|
|
127
128
|
...textOptions,
|
|
128
129
|
});
|
|
129
130
|
logger.trace({ requestId, result }, "[chat] AI SDK result");
|
|
130
131
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
132
|
+
// Transform result.
|
|
131
133
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
132
134
|
addSpanEvent("hebo.result.transformed");
|
|
135
|
+
const genAiResponseAttrs = getChatResponseAttributes(ctx.result, genAiSignalLevel);
|
|
136
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
137
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
133
138
|
if (hooks?.after) {
|
|
134
139
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
135
140
|
addSpanEvent("hebo.hooks.after.completed");
|
|
136
141
|
}
|
|
142
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
143
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
137
144
|
return ctx.result;
|
|
138
145
|
};
|
|
139
146
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
+
import type { ChatCompletions, ChatCompletionsBody } from "./schema";
|
|
3
|
+
import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
|
|
4
|
+
export declare const getChatGeneralAttributes: (ctx: GatewayContext, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
5
|
+
export declare const getChatRequestAttributes: (inputs: ChatCompletionsBody, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
6
|
+
export declare const getChatResponseAttributes: (completions: ChatCompletions, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import {} from "../../types";
|
|
2
|
+
const toTextPart = (content) => ({ type: "text", content });
|
|
3
|
+
const toMessageParts = (message) => {
|
|
4
|
+
if (message.role === "assistant") {
|
|
5
|
+
const parts = [];
|
|
6
|
+
if (typeof message.content === "string")
|
|
7
|
+
parts.push(toTextPart(message.content));
|
|
8
|
+
if (Array.isArray(message.tool_calls)) {
|
|
9
|
+
for (const call of message.tool_calls) {
|
|
10
|
+
parts.push({
|
|
11
|
+
type: "tool_call",
|
|
12
|
+
id: call.id,
|
|
13
|
+
name: call.function.name,
|
|
14
|
+
arguments: call.function.arguments,
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return parts;
|
|
19
|
+
}
|
|
20
|
+
if (message.role === "tool") {
|
|
21
|
+
return [{ type: "tool_call_response", id: message.tool_call_id, content: message.content }];
|
|
22
|
+
}
|
|
23
|
+
if (message.role === "user") {
|
|
24
|
+
const parts = [];
|
|
25
|
+
if (typeof message.content === "string")
|
|
26
|
+
parts.push(toTextPart(message.content));
|
|
27
|
+
if (Array.isArray(message.content)) {
|
|
28
|
+
for (const part of message.content) {
|
|
29
|
+
if (part.type === "text") {
|
|
30
|
+
parts.push(toTextPart(part.text));
|
|
31
|
+
}
|
|
32
|
+
else if (part.type === "image_url") {
|
|
33
|
+
parts.push({ type: "image", content: part.image_url.url });
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
parts.push({
|
|
37
|
+
type: "file",
|
|
38
|
+
// FUTURE: optionally expose safe metadata without raw binary payloads.
|
|
39
|
+
content: part.file.filename ?? "[REDACTED_BINARY_DATA]",
|
|
40
|
+
media_type: part.file.media_type,
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
return parts;
|
|
46
|
+
}
|
|
47
|
+
// FUTURE: remove once Langfuse supports gen_ai.system_instructions
|
|
48
|
+
if (message.role === "system") {
|
|
49
|
+
return [toTextPart(message.content)];
|
|
50
|
+
}
|
|
51
|
+
return [];
|
|
52
|
+
};
|
|
53
|
+
export const getChatGeneralAttributes = (ctx, signalLevel) => {
|
|
54
|
+
if (!signalLevel || signalLevel === "off")
|
|
55
|
+
return {};
|
|
56
|
+
const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
|
|
57
|
+
return {
|
|
58
|
+
"gen_ai.operation.name": ctx.operation,
|
|
59
|
+
"gen_ai.request.model": requestModel,
|
|
60
|
+
"gen_ai.response.model": ctx.resolvedModelId,
|
|
61
|
+
"gen_ai.provider.name": ctx.resolvedProviderId,
|
|
62
|
+
};
|
|
63
|
+
};
|
|
64
|
+
export const getChatRequestAttributes = (inputs, signalLevel) => {
|
|
65
|
+
if (!signalLevel || signalLevel === "off")
|
|
66
|
+
return {};
|
|
67
|
+
const attrs = {};
|
|
68
|
+
if (inputs.seed !== undefined) {
|
|
69
|
+
Object.assign(attrs, { "gen_ai.request.seed": inputs.seed });
|
|
70
|
+
}
|
|
71
|
+
if (signalLevel !== "required") {
|
|
72
|
+
Object.assign(attrs, {
|
|
73
|
+
"gen_ai.request.stream": inputs.stream,
|
|
74
|
+
"gen_ai.request.frequency_penalty": inputs.frequency_penalty,
|
|
75
|
+
"gen_ai.request.max_tokens": inputs.max_completion_tokens,
|
|
76
|
+
"gen_ai.request.presence_penalty": inputs.presence_penalty,
|
|
77
|
+
"gen_ai.request.stop_sequences": inputs.stop
|
|
78
|
+
? Array.isArray(inputs.stop)
|
|
79
|
+
? inputs.stop
|
|
80
|
+
: [inputs.stop]
|
|
81
|
+
: undefined,
|
|
82
|
+
"gen_ai.request.temperature": inputs.temperature,
|
|
83
|
+
"gen_ai.request.top_p": inputs.top_p,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
if (signalLevel === "full") {
|
|
87
|
+
Object.assign(attrs, {
|
|
88
|
+
// FUTURE: move system instructions from messages to here
|
|
89
|
+
// blocker: https://github.com/langfuse/langfuse/issues/11607
|
|
90
|
+
// "gen_ai.system_instructions": inputs.messages
|
|
91
|
+
// .filter((m) => m.role === "system")
|
|
92
|
+
// .map((m) => JSON.stringify(toTextPart(m.content))),
|
|
93
|
+
"gen_ai.input.messages": inputs.messages
|
|
94
|
+
//.filter((m) => m.role !== "system")
|
|
95
|
+
.map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
|
|
96
|
+
"gen_ai.tool.definitions": JSON.stringify(inputs.tools),
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
return attrs;
|
|
100
|
+
};
|
|
101
|
+
export const getChatResponseAttributes = (completions, signalLevel) => {
|
|
102
|
+
if (!signalLevel || signalLevel === "off")
|
|
103
|
+
return {};
|
|
104
|
+
const attrs = {
|
|
105
|
+
"gen_ai.response.id": completions.id,
|
|
106
|
+
};
|
|
107
|
+
if (signalLevel !== "required") {
|
|
108
|
+
Object.assign(attrs, {
|
|
109
|
+
"gen_ai.response.finish_reasons": completions.choices?.map((c) => c.finish_reason),
|
|
110
|
+
"gen_ai.usage.total_tokens": completions.usage?.total_tokens,
|
|
111
|
+
"gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
|
|
112
|
+
"gen_ai.usage.cached_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
|
|
113
|
+
"gen_ai.usage.output_tokens": completions.usage?.completion_tokens,
|
|
114
|
+
"gen_ai.usage.reasoning_tokens": completions.usage?.completion_tokens_details?.reasoning_tokens,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
if (signalLevel === "full") {
|
|
118
|
+
Object.assign(attrs, {
|
|
119
|
+
"gen_ai.output.messages": completions.choices?.map((c) => JSON.stringify({
|
|
120
|
+
role: c.message.role,
|
|
121
|
+
parts: toMessageParts(c.message),
|
|
122
|
+
finish_reason: c.finish_reason,
|
|
123
|
+
})),
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
return attrs;
|
|
127
|
+
};
|
|
@@ -5,14 +5,18 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
-
import {
|
|
8
|
+
import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
|
+
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
9
10
|
import { resolveRequestId } from "../../utils/headers";
|
|
10
11
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
11
12
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
13
|
+
import { getEmbeddingsGeneralAttributes, getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes, } from "./otel";
|
|
12
14
|
import { EmbeddingsBodySchema } from "./schema";
|
|
13
15
|
export const embeddings = (config) => {
|
|
14
16
|
const hooks = config.hooks;
|
|
15
17
|
const handler = async (ctx) => {
|
|
18
|
+
const start = performance.now();
|
|
19
|
+
ctx.operation = "embeddings";
|
|
16
20
|
addSpanEvent("hebo.handler.started");
|
|
17
21
|
// Guard: enforce HTTP method early.
|
|
18
22
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
@@ -29,11 +33,11 @@ export const embeddings = (config) => {
|
|
|
29
33
|
addSpanEvent("hebo.request.deserialized");
|
|
30
34
|
const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
|
|
31
35
|
if (!parsed.success) {
|
|
32
|
-
|
|
36
|
+
// FUTURE: consider adding body shape to metadata
|
|
37
|
+
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
33
38
|
}
|
|
34
39
|
ctx.body = parsed.data;
|
|
35
40
|
addSpanEvent("hebo.request.parsed");
|
|
36
|
-
ctx.operation = "embeddings";
|
|
37
41
|
if (hooks?.before) {
|
|
38
42
|
ctx.body = (await hooks.before(ctx)) ?? ctx.body;
|
|
39
43
|
addSpanEvent("hebo.hooks.before.completed");
|
|
@@ -44,10 +48,7 @@ export const embeddings = (config) => {
|
|
|
44
48
|
ctx.resolvedModelId =
|
|
45
49
|
(await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
|
|
46
50
|
logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
47
|
-
addSpanEvent("hebo.model.resolved"
|
|
48
|
-
"gen_ai.request.model": ctx.modelId ?? "",
|
|
49
|
-
"gen_ai.response.model": ctx.resolvedModelId ?? "",
|
|
50
|
-
});
|
|
51
|
+
addSpanEvent("hebo.model.resolved");
|
|
51
52
|
const override = await hooks?.resolveProvider?.(ctx);
|
|
52
53
|
ctx.provider =
|
|
53
54
|
override ??
|
|
@@ -60,13 +61,15 @@ export const embeddings = (config) => {
|
|
|
60
61
|
const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
|
|
61
62
|
ctx.resolvedProviderId = embeddingModel.provider;
|
|
62
63
|
logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
|
|
63
|
-
addSpanEvent("hebo.provider.resolved"
|
|
64
|
-
|
|
65
|
-
|
|
64
|
+
addSpanEvent("hebo.provider.resolved");
|
|
65
|
+
const genAiSignalLevel = config.telemetry?.signals?.gen_ai;
|
|
66
|
+
const genAiGeneralAttrs = getEmbeddingsGeneralAttributes(ctx, genAiSignalLevel);
|
|
67
|
+
setSpanAttributes(genAiGeneralAttrs);
|
|
66
68
|
// Convert inputs to AI SDK call options.
|
|
67
69
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
68
70
|
logger.trace({ requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
69
71
|
addSpanEvent("hebo.options.prepared");
|
|
72
|
+
setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
|
|
70
73
|
// Build middleware chain (model -> forward params -> provider).
|
|
71
74
|
const embeddingModelWithMiddleware = wrapEmbeddingModel({
|
|
72
75
|
model: embeddingModel,
|
|
@@ -82,12 +85,18 @@ export const embeddings = (config) => {
|
|
|
82
85
|
});
|
|
83
86
|
logger.trace({ requestId, result }, "[embeddings] AI SDK result");
|
|
84
87
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
88
|
+
// Transform result.
|
|
85
89
|
ctx.result = toEmbeddings(result, ctx.modelId);
|
|
86
90
|
addSpanEvent("hebo.result.transformed");
|
|
91
|
+
const genAiResponseAttrs = getEmbeddingsResponseAttributes(ctx.result, genAiSignalLevel);
|
|
92
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
93
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
87
94
|
if (hooks?.after) {
|
|
88
95
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
89
96
|
addSpanEvent("hebo.hooks.after.completed");
|
|
90
97
|
}
|
|
98
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
99
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
91
100
|
return ctx.result;
|
|
92
101
|
};
|
|
93
102
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
+
import type { Embeddings, EmbeddingsInputs } from "./schema";
|
|
3
|
+
import { type GatewayContext, type TelemetrySignalLevel } from "../../types";
|
|
4
|
+
export declare const getEmbeddingsGeneralAttributes: (ctx: GatewayContext, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
5
|
+
export declare const getEmbeddingsRequestAttributes: (inputs: EmbeddingsInputs, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
6
|
+
export declare const getEmbeddingsResponseAttributes: (embeddings: Embeddings, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import {} from "../../types";
|
|
2
|
+
export const getEmbeddingsGeneralAttributes = (ctx, signalLevel) => {
|
|
3
|
+
if (!signalLevel || signalLevel === "off")
|
|
4
|
+
return {};
|
|
5
|
+
const requestModel = typeof ctx.body?.model === "string" ? ctx.body.model : ctx.modelId;
|
|
6
|
+
return {
|
|
7
|
+
"gen_ai.operation.name": ctx.operation,
|
|
8
|
+
"gen_ai.request.model": requestModel,
|
|
9
|
+
"gen_ai.response.model": ctx.resolvedModelId,
|
|
10
|
+
"gen_ai.provider.name": ctx.resolvedProviderId,
|
|
11
|
+
};
|
|
12
|
+
};
|
|
13
|
+
export const getEmbeddingsRequestAttributes = (inputs, signalLevel) => {
|
|
14
|
+
if (!signalLevel || signalLevel === "off")
|
|
15
|
+
return {};
|
|
16
|
+
const attrs = {};
|
|
17
|
+
if (signalLevel !== "required") {
|
|
18
|
+
Object.assign(attrs, {
|
|
19
|
+
"gen_ai.embeddings.dimension.count": inputs.dimensions,
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
return attrs;
|
|
23
|
+
};
|
|
24
|
+
export const getEmbeddingsResponseAttributes = (embeddings, signalLevel) => {
|
|
25
|
+
if (!signalLevel || signalLevel === "off")
|
|
26
|
+
return {};
|
|
27
|
+
const attrs = {};
|
|
28
|
+
if (signalLevel !== "required") {
|
|
29
|
+
Object.assign(attrs, {
|
|
30
|
+
"gen_ai.usage.input_tokens": embeddings.usage?.prompt_tokens,
|
|
31
|
+
"gen_ai.usage.total_tokens": embeddings.usage?.total_tokens,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
return attrs;
|
|
35
|
+
};
|
|
@@ -4,12 +4,11 @@ import { toModels, toModel } from "./converters";
|
|
|
4
4
|
export const models = (config) => {
|
|
5
5
|
// eslint-disable-next-line require-await
|
|
6
6
|
const handler = async (ctx) => {
|
|
7
|
-
|
|
8
|
-
if (!request || request.method !== "GET") {
|
|
7
|
+
ctx.operation = "models";
|
|
8
|
+
if (!ctx.request || ctx.request.method !== "GET") {
|
|
9
9
|
throw new GatewayError("Method Not Allowed", 405);
|
|
10
10
|
}
|
|
11
|
-
|
|
12
|
-
const rawId = request.url.split("/models/", 2)[1]?.split("?", 1)[0];
|
|
11
|
+
const rawId = ctx.request.url.split("/models/", 2)[1]?.split("?", 1)[0];
|
|
13
12
|
if (!rawId) {
|
|
14
13
|
return toModels(ctx.models);
|
|
15
14
|
}
|
package/dist/errors/gateway.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export declare class GatewayError extends Error {
|
|
2
2
|
readonly status: number;
|
|
3
3
|
readonly code: string;
|
|
4
|
-
constructor(error:
|
|
4
|
+
constructor(error: unknown, status: number, code?: string, cause?: unknown);
|
|
5
5
|
}
|
package/dist/errors/gateway.js
CHANGED
|
@@ -3,11 +3,10 @@ export class GatewayError extends Error {
|
|
|
3
3
|
status;
|
|
4
4
|
code;
|
|
5
5
|
constructor(error, status, code, cause) {
|
|
6
|
-
const
|
|
7
|
-
super(
|
|
6
|
+
const isError = error instanceof Error;
|
|
7
|
+
super(isError ? error.message : String(error));
|
|
8
|
+
this.cause = cause ?? (isError ? error : undefined);
|
|
8
9
|
this.status = status;
|
|
9
10
|
this.code = code ?? STATUS_CODE(status);
|
|
10
|
-
this.cause =
|
|
11
|
-
cause ?? (typeof error === "string" ? undefined : error.cause);
|
|
12
11
|
}
|
|
13
12
|
}
|