@hebo-ai/gateway 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +131 -32
  2. package/dist/endpoints/chat-completions/converters.d.ts +4 -21
  3. package/dist/endpoints/chat-completions/converters.js +23 -160
  4. package/dist/endpoints/chat-completions/handler.js +2 -2
  5. package/dist/endpoints/chat-completions/schema.d.ts +45 -101
  6. package/dist/endpoints/chat-completions/schema.js +13 -69
  7. package/dist/endpoints/conversations/converters.js +2 -3
  8. package/dist/endpoints/conversations/schema.d.ts +506 -644
  9. package/dist/endpoints/conversations/schema.js +8 -159
  10. package/dist/endpoints/conversations/storage/dialects/greptime.js +4 -2
  11. package/dist/endpoints/conversations/storage/dialects/mysql.js +3 -1
  12. package/dist/endpoints/conversations/storage/dialects/postgres.js +6 -3
  13. package/dist/endpoints/conversations/storage/dialects/sqlite.js +3 -1
  14. package/dist/endpoints/conversations/storage/sql.js +11 -6
  15. package/dist/endpoints/embeddings/handler.js +1 -1
  16. package/dist/endpoints/responses/converters.d.ts +17 -0
  17. package/dist/endpoints/responses/converters.js +1034 -0
  18. package/dist/endpoints/responses/handler.d.ts +2 -0
  19. package/dist/endpoints/responses/handler.js +137 -0
  20. package/dist/endpoints/responses/index.d.ts +4 -0
  21. package/dist/endpoints/responses/index.js +4 -0
  22. package/dist/endpoints/responses/otel.d.ts +6 -0
  23. package/dist/endpoints/responses/otel.js +221 -0
  24. package/dist/endpoints/responses/schema.d.ts +2109 -0
  25. package/dist/endpoints/responses/schema.js +314 -0
  26. package/dist/endpoints/shared/converters.d.ts +55 -0
  27. package/dist/endpoints/shared/converters.js +179 -0
  28. package/dist/endpoints/shared/schema.d.ts +70 -0
  29. package/dist/endpoints/shared/schema.js +46 -0
  30. package/dist/gateway.d.ts +1 -0
  31. package/dist/gateway.js +2 -0
  32. package/dist/index.d.ts +0 -4
  33. package/dist/index.js +0 -4
  34. package/dist/lifecycle.js +46 -29
  35. package/dist/models/anthropic/middleware.d.ts +1 -1
  36. package/dist/models/google/middleware.d.ts +1 -1
  37. package/dist/providers/registry.d.ts +1 -1
  38. package/dist/types.d.ts +18 -6
  39. package/dist/utils/preset.js +0 -1
  40. package/package.json +5 -1
@@ -0,0 +1,2 @@
1
+ import type { GatewayConfig, Endpoint } from "../../types";
2
+ export declare const responses: (config: GatewayConfig) => Endpoint;
@@ -0,0 +1,137 @@
1
+ import { generateText, Output, streamText, wrapLanguageModel, } from "ai";
2
+ import * as z from "zod";
3
+ import { GatewayError } from "../../errors/gateway";
4
+ import { winterCgHandler } from "../../lifecycle";
5
+ import { logger } from "../../logger";
6
+ import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
+ import { resolveProvider } from "../../providers/registry";
8
+ import { getGenAiGeneralAttributes, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
+ import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
10
+ import { prepareForwardHeaders } from "../../utils/request";
11
+ import { convertToTextCallOptions, toResponses, toResponsesStream } from "./converters";
12
+ import { getResponsesRequestAttributes, getResponsesResponseAttributes } from "./otel";
13
+ import { ResponsesBodySchema } from "./schema";
14
+ export const responses = (config) => {
15
+ const hooks = config.hooks;
16
+ const handler = async (ctx, cfg) => {
17
+ const start = performance.now();
18
+ ctx.operation = "responses";
19
+ setSpanAttributes({ "gen_ai.operation.name": ctx.operation });
20
+ addSpanEvent("hebo.handler.started");
21
+ if (!ctx.request || ctx.request.method !== "POST") {
22
+ throw new GatewayError("Method Not Allowed", 405);
23
+ }
24
+ try {
25
+ // oxlint-disable-next-line no-unsafe-assignment
26
+ ctx.body = await ctx.request.json();
27
+ }
28
+ catch {
29
+ throw new GatewayError("Invalid JSON", 400);
30
+ }
31
+ logger.trace({ requestId: ctx.requestId, body: ctx.body }, "[responses] ResponsesBody");
32
+ addSpanEvent("hebo.request.deserialized");
33
+ const parsed = ResponsesBodySchema.safeParse(ctx.body);
34
+ if (!parsed.success) {
35
+ throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
36
+ }
37
+ ctx.body = parsed.data;
38
+ addSpanEvent("hebo.request.parsed");
39
+ if (hooks?.before) {
40
+ ctx.body = (await hooks.before(ctx)) ?? ctx.body;
41
+ addSpanEvent("hebo.hooks.before.completed");
42
+ }
43
+ ctx.modelId = ctx.body.model;
44
+ ctx.resolvedModelId =
45
+ (await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
46
+ logger.debug(`[responses] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
47
+ addSpanEvent("hebo.model.resolved");
48
+ const override = await hooks?.resolveProvider?.(ctx);
49
+ ctx.provider =
50
+ override ??
51
+ resolveProvider({
52
+ providers: ctx.providers,
53
+ models: ctx.models,
54
+ modelId: ctx.resolvedModelId,
55
+ operation: ctx.operation,
56
+ });
57
+ const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
58
+ ctx.resolvedProviderId = languageModel.provider;
59
+ logger.debug(`[responses] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
60
+ addSpanEvent("hebo.provider.resolved");
61
+ const genAiSignalLevel = cfg.telemetry?.signals?.gen_ai;
62
+ const genAiGeneralAttrs = getGenAiGeneralAttributes(ctx, genAiSignalLevel);
63
+ setSpanAttributes(genAiGeneralAttrs);
64
+ const { model: _model, stream, ...inputs } = ctx.body;
65
+ const textOptions = convertToTextCallOptions(inputs);
66
+ logger.trace({ requestId: ctx.requestId, options: textOptions }, "[responses] AI SDK options");
67
+ addSpanEvent("hebo.options.prepared");
68
+ setSpanAttributes(getResponsesRequestAttributes(ctx.body, genAiSignalLevel));
69
+ const languageModelWithMiddleware = wrapLanguageModel({
70
+ model: languageModel,
71
+ middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
72
+ });
73
+ if (stream) {
74
+ addSpanEvent("hebo.ai-sdk.started");
75
+ const result = streamText({
76
+ model: languageModelWithMiddleware,
77
+ headers: prepareForwardHeaders(ctx.request),
78
+ abortSignal: ctx.request.signal,
79
+ timeout: {
80
+ totalMs: ctx.body.service_tier === "flex" ? cfg.timeouts.flex : cfg.timeouts.normal,
81
+ },
82
+ onAbort: () => {
83
+ throw new DOMException("The operation was aborted.", "AbortError");
84
+ },
85
+ onError: () => { },
86
+ onFinish: (res) => {
87
+ addSpanEvent("hebo.ai-sdk.completed");
88
+ const streamResult = toResponses(res, ctx.resolvedModelId, ctx.body.metadata);
89
+ logger.trace({ requestId: ctx.requestId, result: streamResult }, "[responses] Responses");
90
+ addSpanEvent("hebo.result.transformed");
91
+ const genAiResponseAttrs = getResponsesResponseAttributes(streamResult, genAiSignalLevel, res.finishReason);
92
+ setSpanAttributes(genAiResponseAttrs);
93
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
94
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
95
+ },
96
+ experimental_include: {
97
+ requestBody: false,
98
+ },
99
+ includeRawChunks: false,
100
+ ...textOptions,
101
+ });
102
+ ctx.result = toResponsesStream(result, ctx.resolvedModelId, ctx.body.metadata);
103
+ if (hooks?.after) {
104
+ ctx.result = (await hooks.after(ctx)) ?? ctx.result;
105
+ addSpanEvent("hebo.hooks.after.completed");
106
+ }
107
+ return ctx.result;
108
+ }
109
+ addSpanEvent("hebo.ai-sdk.started");
110
+ const result = await generateText({
111
+ model: languageModelWithMiddleware,
112
+ headers: prepareForwardHeaders(ctx.request),
113
+ abortSignal: ctx.request.signal,
114
+ timeout: ctx.body.service_tier === "flex" ? cfg.timeouts.flex : cfg.timeouts.normal,
115
+ experimental_include: {
116
+ requestBody: false,
117
+ responseBody: false,
118
+ },
119
+ ...textOptions,
120
+ });
121
+ logger.trace({ requestId: ctx.requestId, result }, "[responses] AI SDK result");
122
+ addSpanEvent("hebo.ai-sdk.completed");
123
+ ctx.result = toResponses(result, ctx.resolvedModelId, ctx.body.metadata);
124
+ logger.trace({ requestId: ctx.requestId, result: ctx.result }, "[responses] Responses");
125
+ addSpanEvent("hebo.result.transformed");
126
+ const genAiResponseAttrs = getResponsesResponseAttributes(ctx.result, genAiSignalLevel, result.finishReason);
127
+ setSpanAttributes(genAiResponseAttrs);
128
+ recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
129
+ if (hooks?.after) {
130
+ ctx.result = (await hooks.after(ctx)) ?? ctx.result;
131
+ addSpanEvent("hebo.hooks.after.completed");
132
+ }
133
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
134
+ return ctx.result;
135
+ };
136
+ return { handler: winterCgHandler(handler, config) };
137
+ };
@@ -0,0 +1,4 @@
1
+ export * from "./converters";
2
+ export * from "./handler";
3
+ export * from "./schema";
4
+ export * from "./otel";
@@ -0,0 +1,4 @@
1
+ export * from "./converters";
2
+ export * from "./handler";
3
+ export * from "./schema";
4
+ export * from "./otel";
@@ -0,0 +1,6 @@
1
+ import type { Attributes } from "@opentelemetry/api";
2
+ import type { FinishReason } from "ai";
3
+ import type { Responses, ResponsesBody } from "./schema";
4
+ import { type TelemetrySignalLevel } from "../../types";
5
+ export declare const getResponsesRequestAttributes: (body: ResponsesBody, signalLevel?: TelemetrySignalLevel) => Attributes;
6
+ export declare const getResponsesResponseAttributes: (responses: Responses, signalLevel?: TelemetrySignalLevel, finishReason?: FinishReason) => Attributes;
@@ -0,0 +1,221 @@
1
+ import {} from "../../types";
2
+ import { parseDataUrl } from "../../utils/url";
3
+ const toBlobPart = (modality, mimeType) => {
4
+ const part = {
5
+ type: "blob",
6
+ modality,
7
+ content: "[REDACTED_BINARY_DATA]",
8
+ };
9
+ if (mimeType)
10
+ part["mime_type"] = mimeType;
11
+ return part;
12
+ };
13
+ const toInputParts = (content) => {
14
+ if (typeof content === "string")
15
+ return [{ type: "text", content }];
16
+ const parts = [];
17
+ for (const part of content) {
18
+ switch (part.type) {
19
+ case "input_text":
20
+ parts.push({ type: "text", content: part.text });
21
+ break;
22
+ case "input_image": {
23
+ const url = part.image_url;
24
+ if (url && url.slice(0, 5).toLowerCase() === "data:") {
25
+ const { mimeType } = parseDataUrl(url);
26
+ parts.push(toBlobPart("image", mimeType || undefined));
27
+ }
28
+ else if (url) {
29
+ parts.push({ type: "uri", modality: "image", uri: url });
30
+ }
31
+ else if (part.file_id) {
32
+ parts.push({ type: "blob", modality: "image", content: `file_id:${part.file_id}` });
33
+ }
34
+ break;
35
+ }
36
+ case "input_audio":
37
+ parts.push(toBlobPart("audio", `audio/${part.input_audio.format}`));
38
+ break;
39
+ case "input_file": {
40
+ if (part.file_data) {
41
+ parts.push(toBlobPart("file"));
42
+ }
43
+ else if (part.file_url) {
44
+ parts.push({ type: "uri", modality: "file", uri: part.file_url });
45
+ }
46
+ else if (part.file_id) {
47
+ parts.push({ type: "blob", modality: "file", content: `file_id:${part.file_id}` });
48
+ }
49
+ break;
50
+ }
51
+ }
52
+ }
53
+ return parts;
54
+ };
55
+ const toOutputTextParts = (content) => {
56
+ if (typeof content === "string") {
57
+ return [{ type: "text", content }];
58
+ }
59
+ return content.map((part) => ({ type: "text", content: part.text }));
60
+ };
61
+ const toItemParts = (item) => {
62
+ switch (item.type) {
63
+ case "message":
64
+ return toMessageParts(item);
65
+ case "function_call":
66
+ return [
67
+ {
68
+ type: "tool_call",
69
+ id: item.call_id,
70
+ name: item.name,
71
+ arguments: item.arguments,
72
+ },
73
+ ];
74
+ case "function_call_output":
75
+ return [
76
+ {
77
+ type: "tool_call_response",
78
+ id: item.call_id,
79
+ response: typeof item.output === "string"
80
+ ? item.output
81
+ : // FUTURE: Use toInputParts() to preserve multimodal content once telemetry
82
+ // backends (like Langfuse) have a standard representation for multimodal
83
+ // tool responses. Currently collapsed to text-only for compatibility.
84
+ item.output.map((p) => (p.type === "input_text" ? p.text : "")).join(""),
85
+ },
86
+ ];
87
+ case "reasoning": {
88
+ const parts = [];
89
+ for (const s of item.summary) {
90
+ parts.push({ type: "reasoning", content: s.text });
91
+ }
92
+ if (item.content) {
93
+ for (const c of item.content) {
94
+ parts.push({ type: "reasoning", content: c.text });
95
+ }
96
+ }
97
+ if (item.encrypted_content) {
98
+ parts.push({ type: "reasoning", content: "[ENCRYPTED_REASONING]" });
99
+ }
100
+ return parts;
101
+ }
102
+ }
103
+ };
104
+ const toMessageParts = (item) => {
105
+ switch (item.role) {
106
+ case "assistant":
107
+ return toOutputTextParts(item.content);
108
+ case "user":
109
+ case "developer":
110
+ case "system":
111
+ // FUTURE: remove once Langfuse supports gen_ai.system_instructions
112
+ // https://github.com/langfuse/langfuse/issues/11607
113
+ return toInputParts(item.content);
114
+ default:
115
+ return [];
116
+ }
117
+ };
118
+ export const getResponsesRequestAttributes = (body, signalLevel) => {
119
+ if (!signalLevel || signalLevel === "off")
120
+ return {};
121
+ const attrs = {};
122
+ if (signalLevel !== "required") {
123
+ Object.assign(attrs, {
124
+ "gen_ai.request.stream": body.stream,
125
+ "gen_ai.request.service_tier": body.service_tier,
126
+ "gen_ai.request.frequency_penalty": body.frequency_penalty,
127
+ "gen_ai.request.max_tokens": body.max_output_tokens,
128
+ "gen_ai.request.presence_penalty": body.presence_penalty,
129
+ "gen_ai.request.temperature": body.temperature,
130
+ "gen_ai.request.top_p": body.top_p,
131
+ // FUTURE: Support text.verbosity configuration
132
+ });
133
+ if (body.metadata) {
134
+ for (const key in body.metadata) {
135
+ attrs[`gen_ai.request.metadata.${key}`] = body.metadata[key];
136
+ }
137
+ }
138
+ }
139
+ if (signalLevel === "full") {
140
+ const inputMessages = [];
141
+ if (body.instructions) {
142
+ // FUTURE: move system instructions from messages to here
143
+ // blocker: https://github.com/langfuse/langfuse/issues/11607
144
+ inputMessages.push(JSON.stringify({
145
+ role: "system",
146
+ parts: [{ type: "text", content: body.instructions }],
147
+ }));
148
+ }
149
+ if (typeof body.input === "string") {
150
+ inputMessages.push(JSON.stringify({
151
+ role: "user",
152
+ parts: [{ type: "text", content: body.input }],
153
+ }));
154
+ }
155
+ else if (Array.isArray(body.input)) {
156
+ for (const item of body.input) {
157
+ if (item.type === "message") {
158
+ inputMessages.push(JSON.stringify({
159
+ role: item.role,
160
+ parts: toItemParts(item),
161
+ }));
162
+ }
163
+ else {
164
+ inputMessages.push(JSON.stringify({
165
+ type: item.type,
166
+ parts: toItemParts(item),
167
+ }));
168
+ }
169
+ }
170
+ }
171
+ Object.assign(attrs, {
172
+ "gen_ai.input.messages": inputMessages,
173
+ "gen_ai.tool.definitions": body.tools?.map((toolDef) => JSON.stringify(toolDef)),
174
+ });
175
+ }
176
+ return attrs;
177
+ };
178
+ export const getResponsesResponseAttributes = (responses, signalLevel, finishReason) => {
179
+ if (!signalLevel || signalLevel === "off")
180
+ return {};
181
+ const attrs = {
182
+ "gen_ai.response.id": responses.id,
183
+ };
184
+ if (signalLevel !== "required") {
185
+ Object.assign(attrs, {
186
+ "gen_ai.response.finish_reasons": finishReason ? [finishReason] : [responses.status],
187
+ "gen_ai.response.service_tier": responses.service_tier,
188
+ "gen_ai.usage.total_tokens": responses.usage?.total_tokens,
189
+ "gen_ai.usage.input_tokens": responses.usage?.input_tokens,
190
+ "gen_ai.usage.cache_read.input_tokens": responses.usage?.input_tokens_details?.cached_tokens,
191
+ "gen_ai.usage.output_tokens": responses.usage?.output_tokens,
192
+ "gen_ai.usage.reasoning.output_tokens": responses.usage?.output_tokens_details?.reasoning_tokens,
193
+ });
194
+ }
195
+ if (signalLevel === "full") {
196
+ Object.assign(attrs, {
197
+ "gen_ai.output.messages": responses.output?.map((item) => {
198
+ const base = {
199
+ type: item.type,
200
+ status: item.status,
201
+ parts: [],
202
+ };
203
+ if (item.type === "message") {
204
+ base.role = item.role;
205
+ base.parts = item.content.map((c) => ({ type: "text", content: c.text }));
206
+ }
207
+ else if (item.type === "function_call") {
208
+ base.name = item.name;
209
+ base.arguments = item.arguments;
210
+ }
211
+ else if (item.type === "reasoning") {
212
+ // Casting needed because ResponsesOutputItem and ResponsesInputItem share
213
+ // the ResponsesReasoningItem definition.
214
+ base.parts = toItemParts(item);
215
+ }
216
+ return JSON.stringify(base);
217
+ }),
218
+ });
219
+ }
220
+ return attrs;
221
+ };