@hebo-ai/gateway 0.8.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -32
- package/dist/endpoints/chat-completions/converters.d.ts +4 -21
- package/dist/endpoints/chat-completions/converters.js +23 -160
- package/dist/endpoints/chat-completions/handler.js +2 -2
- package/dist/endpoints/chat-completions/schema.d.ts +45 -101
- package/dist/endpoints/chat-completions/schema.js +13 -69
- package/dist/endpoints/conversations/converters.js +2 -3
- package/dist/endpoints/conversations/schema.d.ts +506 -644
- package/dist/endpoints/conversations/schema.js +8 -159
- package/dist/endpoints/conversations/storage/dialects/greptime.js +4 -2
- package/dist/endpoints/conversations/storage/dialects/mysql.js +3 -1
- package/dist/endpoints/conversations/storage/dialects/postgres.js +6 -3
- package/dist/endpoints/conversations/storage/dialects/sqlite.js +3 -1
- package/dist/endpoints/conversations/storage/sql.js +11 -6
- package/dist/endpoints/embeddings/handler.js +1 -1
- package/dist/endpoints/responses/converters.d.ts +17 -0
- package/dist/endpoints/responses/converters.js +1034 -0
- package/dist/endpoints/responses/handler.d.ts +2 -0
- package/dist/endpoints/responses/handler.js +137 -0
- package/dist/endpoints/responses/index.d.ts +4 -0
- package/dist/endpoints/responses/index.js +4 -0
- package/dist/endpoints/responses/otel.d.ts +6 -0
- package/dist/endpoints/responses/otel.js +221 -0
- package/dist/endpoints/responses/schema.d.ts +2109 -0
- package/dist/endpoints/responses/schema.js +314 -0
- package/dist/endpoints/shared/converters.d.ts +55 -0
- package/dist/endpoints/shared/converters.js +179 -0
- package/dist/endpoints/shared/schema.d.ts +70 -0
- package/dist/endpoints/shared/schema.js +46 -0
- package/dist/gateway.d.ts +1 -0
- package/dist/gateway.js +2 -0
- package/dist/index.d.ts +0 -4
- package/dist/index.js +0 -4
- package/dist/lifecycle.js +46 -29
- package/dist/models/anthropic/middleware.d.ts +1 -1
- package/dist/models/google/middleware.d.ts +1 -1
- package/dist/providers/registry.d.ts +1 -1
- package/dist/types.d.ts +18 -6
- package/dist/utils/preset.js +0 -1
- package/package.json +5 -1
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { generateText, Output, streamText, wrapLanguageModel, } from "ai";
|
|
2
|
+
import * as z from "zod";
|
|
3
|
+
import { GatewayError } from "../../errors/gateway";
|
|
4
|
+
import { winterCgHandler } from "../../lifecycle";
|
|
5
|
+
import { logger } from "../../logger";
|
|
6
|
+
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
|
+
import { resolveProvider } from "../../providers/registry";
|
|
8
|
+
import { getGenAiGeneralAttributes, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
|
+
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
10
|
+
import { prepareForwardHeaders } from "../../utils/request";
|
|
11
|
+
import { convertToTextCallOptions, toResponses, toResponsesStream } from "./converters";
|
|
12
|
+
import { getResponsesRequestAttributes, getResponsesResponseAttributes } from "./otel";
|
|
13
|
+
import { ResponsesBodySchema } from "./schema";
|
|
14
|
+
export const responses = (config) => {
|
|
15
|
+
const hooks = config.hooks;
|
|
16
|
+
const handler = async (ctx, cfg) => {
|
|
17
|
+
const start = performance.now();
|
|
18
|
+
ctx.operation = "responses";
|
|
19
|
+
setSpanAttributes({ "gen_ai.operation.name": ctx.operation });
|
|
20
|
+
addSpanEvent("hebo.handler.started");
|
|
21
|
+
if (!ctx.request || ctx.request.method !== "POST") {
|
|
22
|
+
throw new GatewayError("Method Not Allowed", 405);
|
|
23
|
+
}
|
|
24
|
+
try {
|
|
25
|
+
// oxlint-disable-next-line no-unsafe-assignment
|
|
26
|
+
ctx.body = await ctx.request.json();
|
|
27
|
+
}
|
|
28
|
+
catch {
|
|
29
|
+
throw new GatewayError("Invalid JSON", 400);
|
|
30
|
+
}
|
|
31
|
+
logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[responses] ResponsesBody");
|
|
32
|
+
addSpanEvent("hebo.request.deserialized");
|
|
33
|
+
const parsed = ResponsesBodySchema.safeParse(ctx.body);
|
|
34
|
+
if (!parsed.success) {
|
|
35
|
+
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
36
|
+
}
|
|
37
|
+
ctx.body = parsed.data;
|
|
38
|
+
addSpanEvent("hebo.request.parsed");
|
|
39
|
+
if (hooks?.before) {
|
|
40
|
+
ctx.body = (await hooks.before(ctx)) ?? ctx.body;
|
|
41
|
+
addSpanEvent("hebo.hooks.before.completed");
|
|
42
|
+
}
|
|
43
|
+
ctx.modelId = ctx.body.model;
|
|
44
|
+
ctx.resolvedModelId =
|
|
45
|
+
(await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
|
|
46
|
+
logger.debug(`[responses] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
47
|
+
addSpanEvent("hebo.model.resolved");
|
|
48
|
+
const override = await hooks?.resolveProvider?.(ctx);
|
|
49
|
+
ctx.provider =
|
|
50
|
+
override ??
|
|
51
|
+
resolveProvider({
|
|
52
|
+
providers: ctx.providers,
|
|
53
|
+
models: ctx.models,
|
|
54
|
+
modelId: ctx.resolvedModelId,
|
|
55
|
+
operation: ctx.operation,
|
|
56
|
+
});
|
|
57
|
+
const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
|
|
58
|
+
ctx.resolvedProviderId = languageModel.provider;
|
|
59
|
+
logger.debug(`[responses] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
60
|
+
addSpanEvent("hebo.provider.resolved");
|
|
61
|
+
const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
|
|
62
|
+
const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
|
|
63
|
+
setSpanAttributes(genAiGeneralAttrs);
|
|
64
|
+
const { model: _model, stream, ...inputs } = ctx.body;
|
|
65
|
+
const textOptions = convertToTextCallOptions(inputs);
|
|
66
|
+
logger.trace({ requestId: ctx.requestId, options: textOptions }, "[responses] AI SDK options");
|
|
67
|
+
addSpanEvent("hebo.options.prepared");
|
|
68
|
+
setSpanAttributes(getResponsesRequestAttributes(ctx.body, genAiSignalLevel));
|
|
69
|
+
const languageModelWithMiddleware = wrapLanguageModel({
|
|
70
|
+
model: languageModel,
|
|
71
|
+
middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
|
|
72
|
+
});
|
|
73
|
+
if (stream) {
|
|
74
|
+
addSpanEvent("hebo.ai-sdk.started");
|
|
75
|
+
const result = streamText({
|
|
76
|
+
model: languageModelWithMiddleware,
|
|
77
|
+
headers: prepareForwardHeaders(ctx.request),
|
|
78
|
+
abortSignal: ctx.request.signal,
|
|
79
|
+
timeout: {
|
|
80
|
+
totalMs: ctx.body.service_tier === "flex" ? cfg.timeouts.flex : cfg.timeouts.normal,
|
|
81
|
+
},
|
|
82
|
+
onAbort: () => {
|
|
83
|
+
throw new DOMException("The operation was aborted.", "AbortError");
|
|
84
|
+
},
|
|
85
|
+
onError: () => { },
|
|
86
|
+
onFinish: (res) => {
|
|
87
|
+
addSpanEvent("hebo.ai-sdk.completed");
|
|
88
|
+
const streamResult = toResponses(res, ctx.resolvedModelId, ctx.body.metadata);
|
|
89
|
+
logger.trace({ requestId: ctx.requestId, result: streamResult }, "[responses] Responses");
|
|
90
|
+
addSpanEvent("hebo.result.transformed");
|
|
91
|
+
const genAiResponseAttrs = getResponsesResponseAttributes(streamResult, genAiSignalLevel, res.finishReason);
|
|
92
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
93
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
94
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
95
|
+
},
|
|
96
|
+
experimental_include: {
|
|
97
|
+
requestBody: false,
|
|
98
|
+
},
|
|
99
|
+
includeRawChunks: false,
|
|
100
|
+
...textOptions,
|
|
101
|
+
});
|
|
102
|
+
ctx.result = toResponsesStream(result, ctx.resolvedModelId, ctx.body.metadata);
|
|
103
|
+
if (hooks?.after) {
|
|
104
|
+
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
105
|
+
addSpanEvent("hebo.hooks.after.completed");
|
|
106
|
+
}
|
|
107
|
+
return ctx.result;
|
|
108
|
+
}
|
|
109
|
+
addSpanEvent("hebo.ai-sdk.started");
|
|
110
|
+
const result = await generateText({
|
|
111
|
+
model: languageModelWithMiddleware,
|
|
112
|
+
headers: prepareForwardHeaders(ctx.request),
|
|
113
|
+
abortSignal: ctx.request.signal,
|
|
114
|
+
timeout: ctx.body.service_tier === "flex" ? cfg.timeouts.flex : cfg.timeouts.normal,
|
|
115
|
+
experimental_include: {
|
|
116
|
+
requestBody: false,
|
|
117
|
+
responseBody: false,
|
|
118
|
+
},
|
|
119
|
+
...textOptions,
|
|
120
|
+
});
|
|
121
|
+
logger.trace({ requestId: ctx.requestId, result }, "[responses] AI SDK result");
|
|
122
|
+
addSpanEvent("hebo.ai-sdk.completed");
|
|
123
|
+
ctx.result = toResponses(result, ctx.resolvedModelId, ctx.body.metadata);
|
|
124
|
+
logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[responses] Responses");
|
|
125
|
+
addSpanEvent("hebo.result.transformed");
|
|
126
|
+
const genAiResponseAttrs = getResponsesResponseAttributes(ctx.result, genAiSignalLevel, result.finishReason);
|
|
127
|
+
setSpanAttributes(genAiResponseAttrs);
|
|
128
|
+
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
129
|
+
if (hooks?.after) {
|
|
130
|
+
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
131
|
+
addSpanEvent("hebo.hooks.after.completed");
|
|
132
|
+
}
|
|
133
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
134
|
+
return ctx.result;
|
|
135
|
+
};
|
|
136
|
+
return { handler: winterCgHandler(handler, config) };
|
|
137
|
+
};
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
+
import type { FinishReason } from "ai";
|
|
3
|
+
import type { Responses, ResponsesBody } from "./schema";
|
|
4
|
+
import { type TelemetrySignalLevel } from "../../types";
|
|
5
|
+
export declare const getResponsesRequestAttributes: (body: ResponsesBody, signalLevel?: TelemetrySignalLevel) => Attributes;
|
|
6
|
+
export declare const getResponsesResponseAttributes: (responses: Responses, signalLevel?: TelemetrySignalLevel, finishReason?: FinishReason) => Attributes;
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
import {} from "../../types";
|
|
2
|
+
import { parseDataUrl } from "../../utils/url";
|
|
3
|
+
const toBlobPart = (modality, mimeType) => {
|
|
4
|
+
const part = {
|
|
5
|
+
type: "blob",
|
|
6
|
+
modality,
|
|
7
|
+
content: "[REDACTED_BINARY_DATA]",
|
|
8
|
+
};
|
|
9
|
+
if (mimeType)
|
|
10
|
+
part["mime_type"] = mimeType;
|
|
11
|
+
return part;
|
|
12
|
+
};
|
|
13
|
+
const toInputParts = (content) => {
|
|
14
|
+
if (typeof content === "string")
|
|
15
|
+
return [{ type: "text", content }];
|
|
16
|
+
const parts = [];
|
|
17
|
+
for (const part of content) {
|
|
18
|
+
switch (part.type) {
|
|
19
|
+
case "input_text":
|
|
20
|
+
parts.push({ type: "text", content: part.text });
|
|
21
|
+
break;
|
|
22
|
+
case "input_image": {
|
|
23
|
+
const url = part.image_url;
|
|
24
|
+
if (url && url.slice(0, 5).toLowerCase() === "data:") {
|
|
25
|
+
const { mimeType } = parseDataUrl(url);
|
|
26
|
+
parts.push(toBlobPart("image", mimeType || undefined));
|
|
27
|
+
}
|
|
28
|
+
else if (url) {
|
|
29
|
+
parts.push({ type: "uri", modality: "image", uri: url });
|
|
30
|
+
}
|
|
31
|
+
else if (part.file_id) {
|
|
32
|
+
parts.push({ type: "blob", modality: "image", content: `file_id:${part.file_id}` });
|
|
33
|
+
}
|
|
34
|
+
break;
|
|
35
|
+
}
|
|
36
|
+
case "input_audio":
|
|
37
|
+
parts.push(toBlobPart("audio", `audio/${part.input_audio.format}`));
|
|
38
|
+
break;
|
|
39
|
+
case "input_file": {
|
|
40
|
+
if (part.file_data) {
|
|
41
|
+
parts.push(toBlobPart("file"));
|
|
42
|
+
}
|
|
43
|
+
else if (part.file_url) {
|
|
44
|
+
parts.push({ type: "uri", modality: "file", uri: part.file_url });
|
|
45
|
+
}
|
|
46
|
+
else if (part.file_id) {
|
|
47
|
+
parts.push({ type: "blob", modality: "file", content: `file_id:${part.file_id}` });
|
|
48
|
+
}
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return parts;
|
|
54
|
+
};
|
|
55
|
+
const toOutputTextParts = (content) => {
|
|
56
|
+
if (typeof content === "string") {
|
|
57
|
+
return [{ type: "text", content }];
|
|
58
|
+
}
|
|
59
|
+
return content.map((part) => ({ type: "text", content: part.text }));
|
|
60
|
+
};
|
|
61
|
+
const toItemParts = (item) => {
|
|
62
|
+
switch (item.type) {
|
|
63
|
+
case "message":
|
|
64
|
+
return toMessageParts(item);
|
|
65
|
+
case "function_call":
|
|
66
|
+
return [
|
|
67
|
+
{
|
|
68
|
+
type: "tool_call",
|
|
69
|
+
id: item.call_id,
|
|
70
|
+
name: item.name,
|
|
71
|
+
arguments: item.arguments,
|
|
72
|
+
},
|
|
73
|
+
];
|
|
74
|
+
case "function_call_output":
|
|
75
|
+
return [
|
|
76
|
+
{
|
|
77
|
+
type: "tool_call_response",
|
|
78
|
+
id: item.call_id,
|
|
79
|
+
response: typeof item.output === "string"
|
|
80
|
+
? item.output
|
|
81
|
+
: // FUTURE: Use toInputParts() to preserve multimodal content once telemetry
|
|
82
|
+
// backends (like Langfuse) have a standard representation for multimodal
|
|
83
|
+
// tool responses. Currently collapsed to text-only for compatibility.
|
|
84
|
+
item.output.map((p) => (p.type === "input_text" ? p.text : "")).join(""),
|
|
85
|
+
},
|
|
86
|
+
];
|
|
87
|
+
case "reasoning": {
|
|
88
|
+
const parts = [];
|
|
89
|
+
for (const s of item.summary) {
|
|
90
|
+
parts.push({ type: "reasoning", content: s.text });
|
|
91
|
+
}
|
|
92
|
+
if (item.content) {
|
|
93
|
+
for (const c of item.content) {
|
|
94
|
+
parts.push({ type: "reasoning", content: c.text });
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (item.encrypted_content) {
|
|
98
|
+
parts.push({ type: "reasoning", content: "[ENCRYPTED_REASONING]" });
|
|
99
|
+
}
|
|
100
|
+
return parts;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
const toMessageParts = (item) => {
|
|
105
|
+
switch (item.role) {
|
|
106
|
+
case "assistant":
|
|
107
|
+
return toOutputTextParts(item.content);
|
|
108
|
+
case "user":
|
|
109
|
+
case "developer":
|
|
110
|
+
case "system":
|
|
111
|
+
// FUTURE: remove once Langfuse supports gen_ai.system_instructions
|
|
112
|
+
// https://github.com/langfuse/langfuse/issues/11607
|
|
113
|
+
return toInputParts(item.content);
|
|
114
|
+
default:
|
|
115
|
+
return [];
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
export const getResponsesRequestAttributes = (body, signalLevel) => {
|
|
119
|
+
if (!signalLevel || signalLevel === "off")
|
|
120
|
+
return {};
|
|
121
|
+
const attrs = {};
|
|
122
|
+
if (signalLevel !== "required") {
|
|
123
|
+
Object.assign(attrs, {
|
|
124
|
+
"gen_ai.request.stream": body.stream,
|
|
125
|
+
"gen_ai.request.service_tier": body.service_tier,
|
|
126
|
+
"gen_ai.request.frequency_penalty": body.frequency_penalty,
|
|
127
|
+
"gen_ai.request.max_tokens": body.max_output_tokens,
|
|
128
|
+
"gen_ai.request.presence_penalty": body.presence_penalty,
|
|
129
|
+
"gen_ai.request.temperature": body.temperature,
|
|
130
|
+
"gen_ai.request.top_p": body.top_p,
|
|
131
|
+
// FUTURE: Support text.verbosity configuration
|
|
132
|
+
});
|
|
133
|
+
if (body.metadata) {
|
|
134
|
+
for (const key in body.metadata) {
|
|
135
|
+
attrs[`gen_ai.request.metadata.${key}`] = body.metadata[key];
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
if (signalLevel === "full") {
|
|
140
|
+
const inputMessages = [];
|
|
141
|
+
if (body.instructions) {
|
|
142
|
+
// FUTURE: move system instructions from messages to here
|
|
143
|
+
// blocker: https://github.com/langfuse/langfuse/issues/11607
|
|
144
|
+
inputMessages.push(JSON.stringify({
|
|
145
|
+
role: "system",
|
|
146
|
+
parts: [{ type: "text", content: body.instructions }],
|
|
147
|
+
}));
|
|
148
|
+
}
|
|
149
|
+
if (typeof body.input === "string") {
|
|
150
|
+
inputMessages.push(JSON.stringify({
|
|
151
|
+
role: "user",
|
|
152
|
+
parts: [{ type: "text", content: body.input }],
|
|
153
|
+
}));
|
|
154
|
+
}
|
|
155
|
+
else if (Array.isArray(body.input)) {
|
|
156
|
+
for (const item of body.input) {
|
|
157
|
+
if (item.type === "message") {
|
|
158
|
+
inputMessages.push(JSON.stringify({
|
|
159
|
+
role: item.role,
|
|
160
|
+
parts: toItemParts(item),
|
|
161
|
+
}));
|
|
162
|
+
}
|
|
163
|
+
else {
|
|
164
|
+
inputMessages.push(JSON.stringify({
|
|
165
|
+
type: item.type,
|
|
166
|
+
parts: toItemParts(item),
|
|
167
|
+
}));
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
Object.assign(attrs, {
|
|
172
|
+
"gen_ai.input.messages": inputMessages,
|
|
173
|
+
"gen_ai.tool.definitions": body.tools?.map((toolDef) => JSON.stringify(toolDef)),
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
return attrs;
|
|
177
|
+
};
|
|
178
|
+
export const getResponsesResponseAttributes = (responses, signalLevel, finishReason) => {
|
|
179
|
+
if (!signalLevel || signalLevel === "off")
|
|
180
|
+
return {};
|
|
181
|
+
const attrs = {
|
|
182
|
+
"gen_ai.response.id": responses.id,
|
|
183
|
+
};
|
|
184
|
+
if (signalLevel !== "required") {
|
|
185
|
+
Object.assign(attrs, {
|
|
186
|
+
"gen_ai.response.finish_reasons": finishReason ? [finishReason] : [responses.status],
|
|
187
|
+
"gen_ai.response.service_tier": responses.service_tier,
|
|
188
|
+
"gen_ai.usage.total_tokens": responses.usage?.total_tokens,
|
|
189
|
+
"gen_ai.usage.input_tokens": responses.usage?.input_tokens,
|
|
190
|
+
"gen_ai.usage.cache_read.input_tokens": responses.usage?.input_tokens_details?.cached_tokens,
|
|
191
|
+
"gen_ai.usage.output_tokens": responses.usage?.output_tokens,
|
|
192
|
+
"gen_ai.usage.reasoning.output_tokens": responses.usage?.output_tokens_details?.reasoning_tokens,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
if (signalLevel === "full") {
|
|
196
|
+
Object.assign(attrs, {
|
|
197
|
+
"gen_ai.output.messages": responses.output?.map((item) => {
|
|
198
|
+
const base = {
|
|
199
|
+
type: item.type,
|
|
200
|
+
status: item.status,
|
|
201
|
+
parts: [],
|
|
202
|
+
};
|
|
203
|
+
if (item.type === "message") {
|
|
204
|
+
base.role = item.role;
|
|
205
|
+
base.parts = item.content.map((c) => ({ type: "text", content: c.text }));
|
|
206
|
+
}
|
|
207
|
+
else if (item.type === "function_call") {
|
|
208
|
+
base.name = item.name;
|
|
209
|
+
base.arguments = item.arguments;
|
|
210
|
+
}
|
|
211
|
+
else if (item.type === "reasoning") {
|
|
212
|
+
// Casting needed because ResponsesOutputItem and ResponsesInputItem share
|
|
213
|
+
// the ResponsesReasoningItem definition.
|
|
214
|
+
base.parts = toItemParts(item);
|
|
215
|
+
}
|
|
216
|
+
return JSON.stringify(base);
|
|
217
|
+
}),
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
return attrs;
|
|
221
|
+
};
|