@hebo-ai/gateway 0.9.4 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,142 @@
1
+ import { generateText, Output, streamText, wrapLanguageModel, } from "ai";
2
+ import * as z from "zod";
3
+ import { GatewayError } from "../../errors/gateway";
4
+ import { winterCgHandler } from "../../lifecycle";
5
+ import { logger } from "../../logger";
6
+ import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
+ import { resolveProvider } from "../../providers/registry";
8
+ import { getGenAiGeneralAttributes, recordTimePerOutputToken, recordTimeToFirstToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
+ import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
10
+ import { parseRequestBody } from "../../utils/body";
11
+ import { prepareForwardHeaders } from "../../utils/request";
12
+ import { convertToTextCallOptions, toMessages, toMessagesStream } from "./converters";
13
+ import { getMessagesRequestAttributes, getMessagesResponseAttributes } from "./otel";
14
+ import { MessagesBodySchema } from "./schema";
15
+ export const messages = (config) => {
16
+ const hooks = config.hooks;
17
+ const handler = async (ctx, cfg) => {
18
+ const start = performance.now();
19
+ ctx.operation = "messages";
20
+ setSpanAttributes({ "gen_ai.operation.name": ctx.operation });
21
+ addSpanEvent("hebo.handler.started");
22
+ if (!ctx.request || ctx.request.method !== "POST") {
23
+ throw new GatewayError("Method Not Allowed", 405);
24
+ }
25
+ // Parse + validate input (handles Content-Encoding decompression + body size limits).
26
+ ctx.body = (await parseRequestBody(ctx.request, cfg.maxBodySize));
27
+ logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[messages] MessagesBody");
28
+ addSpanEvent("hebo.request.deserialized");
29
+ const parsed = MessagesBodySchema.safeParse(ctx.body);
30
+ if (!parsed.success) {
31
+ // FUTURE: consider adding body shape to metadata
32
+ throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
33
+ }
34
+ ctx.body = parsed.data;
35
+ addSpanEvent("hebo.request.parsed");
36
+ if (hooks?.before) {
37
+ ctx.body = (await hooks.before(ctx)) ?? ctx.body;
38
+ addSpanEvent("hebo.hooks.before.completed");
39
+ }
40
+ ctx.modelId = ctx.body.model;
41
+ ctx.resolvedModelId =
42
+ (await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
43
+ logger.debug(`[messages] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
44
+ addSpanEvent("hebo.model.resolved");
45
+ const override = await hooks?.resolveProvider?.(ctx);
46
+ ctx.provider =
47
+ override ??
48
+ resolveProvider({
49
+ providers: ctx.providers,
50
+ models: ctx.models,
51
+ modelId: ctx.resolvedModelId,
52
+ operation: ctx.operation,
53
+ });
54
+ const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
55
+ ctx.resolvedProviderId = languageModel.provider;
56
+ logger.debug(`[messages] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
57
+ addSpanEvent("hebo.provider.resolved");
58
+ const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
59
+ const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
60
+ setSpanAttributes(genAiGeneralAttrs);
61
+ const { model: _model, stream, ...inputs } = ctx.body;
62
+ const textOptions = convertToTextCallOptions(inputs);
63
+ logger.trace({ requestId: ctx.requestId, options: textOptions }, "[messages] AI SDK options");
64
+ addSpanEvent("hebo.options.prepared");
65
+ setSpanAttributes(getMessagesRequestAttributes(ctx.body, genAiSignalLevel));
66
+ const languageModelWithMiddleware = wrapLanguageModel({
67
+ model: languageModel,
68
+ middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
69
+ });
70
+ if (stream) {
71
+ addSpanEvent("hebo.ai-sdk.started");
72
+ let ttft = 0;
73
+ const result = streamText({
74
+ model: languageModelWithMiddleware,
75
+ headers: prepareForwardHeaders(ctx.request),
76
+ abortSignal: ctx.request.signal,
77
+ timeout: {
78
+ totalMs: cfg.timeouts.normal,
79
+ },
80
+ onAbort: () => {
81
+ throw new DOMException("The operation was aborted.", "AbortError");
82
+ },
83
+ onError: () => { },
84
+ onChunk: () => {
85
+ if (!ttft) {
86
+ ttft = performance.now() - start;
87
+ recordTimeToFirstToken(ttft, genAiGeneralAttrs, genAiSignalLevel);
88
+ }
89
+ },
90
+ onFinish: (res) => {
91
+ addSpanEvent("hebo.ai-sdk.completed");
92
+ const streamResult = toMessages(res, ctx.resolvedModelId);
93
+ logger.trace({ requestId: ctx.requestId, result: streamResult }, "[messages] Messages");
94
+ addSpanEvent("hebo.result.transformed");
95
+ const genAiResponseAttrs = getMessagesResponseAttributes(streamResult, genAiSignalLevel, res.finishReason);
96
+ setSpanAttributes(genAiResponseAttrs);
97
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
98
+ recordTimePerOutputToken(start, ttft, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
99
+ },
100
+ experimental_include: {
101
+ requestBody: false,
102
+ },
103
+ includeRawChunks: false,
104
+ ...textOptions,
105
+ });
106
+ ctx.result = toMessagesStream(result, ctx.resolvedModelId);
107
+ if (hooks?.after) {
108
+ ctx.result = (await hooks.after(ctx)) ?? ctx.result;
109
+ addSpanEvent("hebo.hooks.after.completed");
110
+ }
111
+ return ctx.result;
112
+ }
113
+ addSpanEvent("hebo.ai-sdk.started");
114
+ const result = await generateText({
115
+ model: languageModelWithMiddleware,
116
+ headers: prepareForwardHeaders(ctx.request),
117
+ abortSignal: ctx.request.signal,
118
+ timeout: cfg.timeouts.normal,
119
+ experimental_include: {
120
+ requestBody: false,
121
+ responseBody: false,
122
+ },
123
+ ...textOptions,
124
+ });
125
+ logger.trace({ requestId: ctx.requestId, result }, "[messages] AI SDK result");
126
+ addSpanEvent("hebo.ai-sdk.completed");
127
+ recordTimeToFirstToken(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
128
+ ctx.result = toMessages(result, ctx.resolvedModelId);
129
+ logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[messages] Messages");
130
+ addSpanEvent("hebo.result.transformed");
131
+ const genAiResponseAttrs = getMessagesResponseAttributes(ctx.result, genAiSignalLevel, result.finishReason);
132
+ setSpanAttributes(genAiResponseAttrs);
133
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
134
+ if (hooks?.after) {
135
+ ctx.result = (await hooks.after(ctx)) ?? ctx.result;
136
+ addSpanEvent("hebo.hooks.after.completed");
137
+ }
138
+ recordTimePerOutputToken(start, 0, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
139
+ return ctx.result;
140
+ };
141
+ return { handler: winterCgHandler(handler, config) };
142
+ };
@@ -0,0 +1,4 @@
1
+ export * from "./converters";
2
+ export * from "./handler";
3
+ export * from "./schema";
4
+ export * from "./otel";
@@ -0,0 +1,4 @@
1
+ export * from "./converters";
2
+ export * from "./handler";
3
+ export * from "./schema";
4
+ export * from "./otel";
@@ -0,0 +1,6 @@
1
+ import type { Attributes } from "@opentelemetry/api";
2
+ import type { FinishReason } from "ai";
3
+ import { type TelemetrySignalLevel } from "../../types";
4
+ import type { Messages, MessagesBody } from "./schema";
5
+ export declare const getMessagesRequestAttributes: (body: MessagesBody, signalLevel?: TelemetrySignalLevel) => Attributes;
6
+ export declare const getMessagesResponseAttributes: (response: Messages, signalLevel?: TelemetrySignalLevel, finishReason?: FinishReason) => Attributes;
@@ -0,0 +1,171 @@
1
+ import {} from "../../types";
2
+ const toBlobPart = (modality, mimeType) => {
3
+ const part = {
4
+ type: "blob",
5
+ modality,
6
+ content: "[REDACTED_BINARY_DATA]",
7
+ };
8
+ if (mimeType)
9
+ part["mime_type"] = mimeType;
10
+ return part;
11
+ };
12
+ const toUserBlockParts = (block) => {
13
+ switch (block.type) {
14
+ case "text":
15
+ return { type: "text", content: block.text };
16
+ case "image":
17
+ if (block.source.type === "base64") {
18
+ return toBlobPart("image", block.source.media_type);
19
+ }
20
+ return { type: "uri", modality: "image", uri: block.source.url };
21
+ case "tool_result":
22
+ return {
23
+ type: "tool_call_response",
24
+ id: block.tool_use_id,
25
+ response: typeof block.content === "string"
26
+ ? block.content
27
+ : block.content
28
+ ? block.content.map((p) => (p.type === "text" ? p.text : "")).join("")
29
+ : "",
30
+ };
31
+ case "document":
32
+ if (block.source.type === "base64") {
33
+ return toBlobPart("file", block.source.media_type);
34
+ }
35
+ if (block.source.type === "url") {
36
+ return { type: "uri", modality: "file", uri: block.source.url };
37
+ }
38
+ return { type: "text", content: block.source.data };
39
+ default:
40
+ return { type: block.type, content: "[UNHANDLED_CONTENT_BLOCK]" };
41
+ }
42
+ };
43
+ const toMessageParts = (message) => {
44
+ if (typeof message.content === "string") {
45
+ return [{ type: "text", content: message.content }];
46
+ }
47
+ const parts = [];
48
+ for (const block of message.content) {
49
+ if (message.role === "user") {
50
+ parts.push(toUserBlockParts(block));
51
+ }
52
+ else {
53
+ const assistantBlock = block;
54
+ switch (assistantBlock.type) {
55
+ case "text":
56
+ parts.push({ type: "text", content: assistantBlock.text });
57
+ break;
58
+ case "tool_use":
59
+ parts.push({
60
+ type: "tool_call",
61
+ id: assistantBlock.id,
62
+ name: assistantBlock.name,
63
+ arguments: typeof assistantBlock.input === "string"
64
+ ? assistantBlock.input
65
+ : JSON.stringify(assistantBlock.input),
66
+ });
67
+ break;
68
+ case "thinking":
69
+ parts.push({ type: "reasoning", content: assistantBlock.thinking });
70
+ break;
71
+ case "redacted_thinking":
72
+ parts.push({ type: "reasoning", content: "[ENCRYPTED_REASONING]" });
73
+ break;
74
+ }
75
+ }
76
+ }
77
+ return parts;
78
+ };
79
+ const toResponseBlockPart = (block) => {
80
+ switch (block.type) {
81
+ case "text":
82
+ return { type: "text", content: block.text };
83
+ case "tool_use":
84
+ return {
85
+ type: "tool_call",
86
+ id: block.id,
87
+ name: block.name,
88
+ arguments: typeof block.input === "string" ? block.input : JSON.stringify(block.input),
89
+ };
90
+ case "thinking":
91
+ return { type: "reasoning", content: block.thinking };
92
+ case "redacted_thinking":
93
+ return { type: "reasoning", content: "[ENCRYPTED_REASONING]" };
94
+ default:
95
+ return { type: "unknown", content: "[UNHANDLED_RESPONSE_BLOCK]" };
96
+ }
97
+ };
98
+ export const getMessagesRequestAttributes = (body, signalLevel) => {
99
+ if (!signalLevel || signalLevel === "off")
100
+ return {};
101
+ const attrs = {};
102
+ if (signalLevel !== "required") {
103
+ Object.assign(attrs, {
104
+ "gen_ai.request.stream": body.stream,
105
+ "gen_ai.request.service_tier": body.service_tier,
106
+ "gen_ai.request.max_tokens": body.max_tokens,
107
+ "gen_ai.request.temperature": body.temperature,
108
+ "gen_ai.request.top_p": body.top_p,
109
+ });
110
+ if (body.metadata) {
111
+ for (const key in body.metadata) {
112
+ attrs[`gen_ai.request.metadata.${key}`] = body.metadata[key];
113
+ }
114
+ }
115
+ }
116
+ if (signalLevel === "full") {
117
+ const inputMessages = [];
118
+ // System prompt
119
+ if (body.system) {
120
+ const systemText = typeof body.system === "string" ? body.system : body.system.map((b) => b.text).join("");
121
+ inputMessages.push(JSON.stringify({
122
+ role: "system",
123
+ parts: [{ type: "text", content: systemText }],
124
+ }));
125
+ }
126
+ // Messages
127
+ for (const message of body.messages) {
128
+ inputMessages.push(JSON.stringify({
129
+ role: message.role,
130
+ parts: toMessageParts(message),
131
+ }));
132
+ }
133
+ Object.assign(attrs, {
134
+ "gen_ai.input.messages": inputMessages,
135
+ "gen_ai.tool.definitions": body.tools?.map((toolDef) => JSON.stringify(toolDef)),
136
+ });
137
+ }
138
+ return attrs;
139
+ };
140
+ export const getMessagesResponseAttributes = (response, signalLevel, finishReason) => {
141
+ if (!signalLevel || signalLevel === "off")
142
+ return {};
143
+ const attrs = {
144
+ "gen_ai.response.id": response.id,
145
+ };
146
+ if (signalLevel !== "required") {
147
+ Object.assign(attrs, {
148
+ "gen_ai.response.finish_reasons": finishReason
149
+ ? [finishReason]
150
+ : response.stop_reason
151
+ ? [response.stop_reason]
152
+ : [],
153
+ "gen_ai.response.service_tier": response.service_tier,
154
+ "gen_ai.usage.input_tokens": response.usage?.input_tokens,
155
+ "gen_ai.usage.output_tokens": response.usage?.output_tokens,
156
+ "gen_ai.usage.cache_read.input_tokens": response.usage?.cache_read_input_tokens,
157
+ "gen_ai.usage.cache_creation.input_tokens": response.usage?.cache_creation_input_tokens,
158
+ });
159
+ }
160
+ if (signalLevel === "full") {
161
+ Object.assign(attrs, {
162
+ "gen_ai.output.messages": [
163
+ JSON.stringify({
164
+ role: "assistant",
165
+ parts: response.content.map(toResponseBlockPart),
166
+ }),
167
+ ],
168
+ });
169
+ }
170
+ return attrs;
171
+ };