@hebo-ai/gateway 0.4.0-beta.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -5
- package/dist/config.js +21 -7
- package/dist/endpoints/chat-completions/converters.d.ts +3 -3
- package/dist/endpoints/chat-completions/converters.js +16 -8
- package/dist/endpoints/chat-completions/handler.js +34 -27
- package/dist/endpoints/chat-completions/otel.d.ts +6 -0
- package/dist/endpoints/chat-completions/otel.js +127 -0
- package/dist/endpoints/embeddings/handler.js +19 -10
- package/dist/endpoints/embeddings/otel.d.ts +6 -0
- package/dist/endpoints/embeddings/otel.js +35 -0
- package/dist/endpoints/models/handler.js +3 -4
- package/dist/errors/gateway.d.ts +1 -1
- package/dist/errors/gateway.js +3 -4
- package/dist/errors/openai.js +11 -12
- package/dist/errors/utils.d.ts +3 -4
- package/dist/errors/utils.js +6 -6
- package/dist/gateway.js +1 -1
- package/dist/lifecycle.js +71 -29
- package/dist/middleware/matcher.js +1 -1
- package/dist/models/amazon/presets.d.ts +37 -37
- package/dist/models/amazon/presets.js +1 -1
- package/dist/models/anthropic/presets.d.ts +56 -56
- package/dist/models/cohere/presets.d.ts +54 -54
- package/dist/models/cohere/presets.js +2 -2
- package/dist/models/google/presets.d.ts +31 -31
- package/dist/models/google/presets.js +1 -1
- package/dist/models/meta/presets.d.ts +42 -42
- package/dist/models/openai/presets.d.ts +96 -96
- package/dist/models/openai/presets.js +1 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/voyage/presets.d.ts +92 -92
- package/dist/models/voyage/presets.js +1 -1
- package/dist/providers/registry.js +2 -2
- package/dist/telemetry/baggage.d.ts +1 -0
- package/dist/telemetry/baggage.js +24 -0
- package/dist/telemetry/fetch.d.ts +2 -1
- package/dist/telemetry/fetch.js +13 -3
- package/dist/telemetry/gen-ai.d.ts +5 -0
- package/dist/telemetry/gen-ai.js +60 -0
- package/dist/telemetry/http.d.ts +3 -0
- package/dist/telemetry/http.js +57 -0
- package/dist/telemetry/memory.d.ts +2 -0
- package/dist/telemetry/memory.js +27 -0
- package/dist/telemetry/span.d.ts +6 -3
- package/dist/telemetry/span.js +24 -36
- package/dist/telemetry/stream.d.ts +3 -7
- package/dist/telemetry/stream.js +26 -29
- package/dist/types.d.ts +16 -15
- package/dist/utils/headers.d.ts +1 -1
- package/dist/utils/headers.js +7 -9
- package/dist/utils/request.d.ts +0 -4
- package/dist/utils/request.js +0 -9
- package/dist/utils/response.js +1 -1
- package/package.json +5 -2
- package/src/config.ts +28 -7
- package/src/endpoints/chat-completions/converters.ts +18 -11
- package/src/endpoints/chat-completions/handler.ts +46 -28
- package/src/endpoints/chat-completions/otel.ts +161 -0
- package/src/endpoints/embeddings/handler.test.ts +2 -2
- package/src/endpoints/embeddings/handler.ts +28 -10
- package/src/endpoints/embeddings/otel.ts +56 -0
- package/src/endpoints/models/handler.ts +3 -5
- package/src/errors/gateway.ts +5 -5
- package/src/errors/openai.ts +25 -17
- package/src/errors/utils.ts +6 -7
- package/src/gateway.ts +1 -1
- package/src/lifecycle.ts +85 -32
- package/src/middleware/matcher.ts +1 -1
- package/src/models/amazon/presets.ts +1 -1
- package/src/models/cohere/presets.ts +2 -2
- package/src/models/google/presets.ts +1 -1
- package/src/models/openai/presets.ts +1 -1
- package/src/models/types.ts +1 -1
- package/src/models/voyage/presets.ts +1 -1
- package/src/providers/registry.ts +2 -2
- package/src/telemetry/baggage.ts +27 -0
- package/src/telemetry/fetch.ts +15 -3
- package/src/telemetry/gen-ai.ts +88 -0
- package/src/telemetry/http.ts +65 -0
- package/src/telemetry/memory.ts +36 -0
- package/src/telemetry/span.ts +28 -40
- package/src/telemetry/stream.ts +36 -40
- package/src/types.ts +18 -18
- package/src/utils/headers.ts +8 -19
- package/src/utils/request.ts +0 -11
- package/src/utils/response.ts +1 -1
- package/dist/telemetry/otel.d.ts +0 -2
- package/dist/telemetry/otel.js +0 -50
- package/dist/telemetry/utils.d.ts +0 -4
- package/dist/telemetry/utils.js +0 -223
- package/src/telemetry/otel.ts +0 -91
- package/src/telemetry/utils.ts +0 -273
package/src/telemetry/otel.ts
DELETED
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
import type { Attributes } from "@opentelemetry/api";
|
|
2
|
-
|
|
3
|
-
import { SpanStatusCode } from "@opentelemetry/api";
|
|
4
|
-
|
|
5
|
-
import type { GatewayConfigParsed, GatewayContext } from "../types";
|
|
6
|
-
|
|
7
|
-
import { initFetch } from "./fetch";
|
|
8
|
-
import { startSpan } from "./span";
|
|
9
|
-
import { instrumentStream } from "./stream";
|
|
10
|
-
import {
|
|
11
|
-
getAIAttributes,
|
|
12
|
-
getBaggageAttributes,
|
|
13
|
-
getRequestAttributes,
|
|
14
|
-
getResponseAttributes,
|
|
15
|
-
} from "./utils";
|
|
16
|
-
|
|
17
|
-
export const withOtel =
|
|
18
|
-
(run: (ctx: GatewayContext) => Promise<void>, config: GatewayConfigParsed) =>
|
|
19
|
-
async (ctx: GatewayContext) => {
|
|
20
|
-
const requestStart = performance.now();
|
|
21
|
-
const aiSpan = startSpan(ctx.request.url, undefined, config.telemetry?.tracer);
|
|
22
|
-
initFetch();
|
|
23
|
-
|
|
24
|
-
const endAiSpan = (status: number, stats?: { bytes: number }) => {
|
|
25
|
-
const attrs: Attributes = getAIAttributes(
|
|
26
|
-
ctx.body,
|
|
27
|
-
ctx.streamResult ?? ctx.result,
|
|
28
|
-
config.telemetry?.attributes,
|
|
29
|
-
ctx.resolvedProviderId,
|
|
30
|
-
);
|
|
31
|
-
|
|
32
|
-
attrs["gen_ai.server.request.duration"] = Number(
|
|
33
|
-
((performance.now() - requestStart) / 1000).toFixed(4),
|
|
34
|
-
);
|
|
35
|
-
|
|
36
|
-
if (!aiSpan.isExisting) {
|
|
37
|
-
Object.assign(
|
|
38
|
-
attrs,
|
|
39
|
-
getRequestAttributes(ctx.request, config.telemetry?.attributes),
|
|
40
|
-
getResponseAttributes(ctx.response, config.telemetry?.attributes),
|
|
41
|
-
);
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
Object.assign(attrs, getBaggageAttributes(ctx.request));
|
|
45
|
-
|
|
46
|
-
if (config.telemetry?.attributes !== "required") {
|
|
47
|
-
attrs["http.request.body.size"] = Number(ctx.request.headers.get("content-length") || 0);
|
|
48
|
-
attrs["http.response.body.size"] =
|
|
49
|
-
stats?.bytes ?? Number(attrs["http.response.header.content-length"] || 0);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
if (config.telemetry?.attributes === "full") {
|
|
53
|
-
attrs["http.request.body"] = JSON.stringify(ctx.body);
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
const realStatus = status === 200 ? (ctx.response?.status ?? status) : status;
|
|
57
|
-
attrs["http.response.status_code_effective"] = realStatus;
|
|
58
|
-
aiSpan.setStatus({ code: realStatus >= 500 ? SpanStatusCode.ERROR : SpanStatusCode.OK });
|
|
59
|
-
|
|
60
|
-
if (ctx.operation && ctx.modelId) {
|
|
61
|
-
aiSpan.updateName(`${ctx.operation} ${ctx.modelId}`);
|
|
62
|
-
} else if (ctx.operation) {
|
|
63
|
-
aiSpan.updateName(`${ctx.operation}`);
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
aiSpan.setAttributes(attrs);
|
|
67
|
-
|
|
68
|
-
aiSpan.finish();
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
await aiSpan.runWithContext(() => run(ctx));
|
|
72
|
-
|
|
73
|
-
if (ctx.response!.body instanceof ReadableStream) {
|
|
74
|
-
const instrumented = instrumentStream(
|
|
75
|
-
ctx.response!.body,
|
|
76
|
-
{
|
|
77
|
-
onComplete: (status, params) => endAiSpan(status, params),
|
|
78
|
-
},
|
|
79
|
-
ctx.request.signal,
|
|
80
|
-
);
|
|
81
|
-
|
|
82
|
-
ctx.response = new Response(instrumented, {
|
|
83
|
-
status: ctx.response!.status,
|
|
84
|
-
statusText: ctx.response!.statusText,
|
|
85
|
-
headers: ctx.response!.headers,
|
|
86
|
-
});
|
|
87
|
-
return;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
endAiSpan(ctx.response!.status);
|
|
91
|
-
};
|
package/src/telemetry/utils.ts
DELETED
|
@@ -1,273 +0,0 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
ChatCompletions,
|
|
3
|
-
ChatCompletionsBody,
|
|
4
|
-
ChatCompletionsContentPart,
|
|
5
|
-
ChatCompletionsMessage,
|
|
6
|
-
} from "../endpoints/chat-completions/schema";
|
|
7
|
-
import type { Embeddings, EmbeddingsBody } from "../endpoints/embeddings";
|
|
8
|
-
|
|
9
|
-
import { resolveRequestId } from "../utils/headers";
|
|
10
|
-
|
|
11
|
-
type GenAIPart = Record<string, unknown>;
|
|
12
|
-
const DEFAULT_ATTRIBUTES_LEVEL = "recommended";
|
|
13
|
-
const HEBO_BAGGAGE_PREFIX = "hebo.";
|
|
14
|
-
|
|
15
|
-
const toTextPart = (content: string): GenAIPart => ({ type: "text", content });
|
|
16
|
-
|
|
17
|
-
const toMessageParts = (message: ChatCompletionsMessage): GenAIPart[] => {
|
|
18
|
-
if (message.role === "assistant") {
|
|
19
|
-
const parts: GenAIPart[] = [];
|
|
20
|
-
if (typeof message.content === "string") parts.push(toTextPart(message.content));
|
|
21
|
-
if (Array.isArray(message.tool_calls)) {
|
|
22
|
-
for (const call of message.tool_calls) {
|
|
23
|
-
parts.push({
|
|
24
|
-
type: "tool_call",
|
|
25
|
-
id: call.id,
|
|
26
|
-
name: call.function.name,
|
|
27
|
-
arguments: call.function.arguments,
|
|
28
|
-
});
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
|
-
return parts;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
if (message.role === "tool") {
|
|
35
|
-
return [{ type: "tool_call_response", id: message.tool_call_id, content: message.content }];
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
if (message.role === "user") {
|
|
39
|
-
const parts: GenAIPart[] = [];
|
|
40
|
-
if (typeof message.content === "string") parts.push(toTextPart(message.content));
|
|
41
|
-
if (Array.isArray(message.content)) {
|
|
42
|
-
for (const part of message.content as ChatCompletionsContentPart[]) {
|
|
43
|
-
if (part.type === "text") {
|
|
44
|
-
parts.push(toTextPart(part.text));
|
|
45
|
-
} else if (part.type === "image_url") {
|
|
46
|
-
parts.push({ type: "image", content: part.image_url.url });
|
|
47
|
-
} else {
|
|
48
|
-
parts.push({
|
|
49
|
-
type: "file",
|
|
50
|
-
// FUTURE: optionally expose safe metadata without raw binary payloads.
|
|
51
|
-
content: part.file.filename ?? "[REDACTED_BINARY_DATA]",
|
|
52
|
-
media_type: part.file.media_type,
|
|
53
|
-
});
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
return parts;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
return [];
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
export const getRequestAttributes = (
|
|
64
|
-
request?: Request,
|
|
65
|
-
attributesLevel = DEFAULT_ATTRIBUTES_LEVEL,
|
|
66
|
-
) => {
|
|
67
|
-
if (!request) return {};
|
|
68
|
-
|
|
69
|
-
let url;
|
|
70
|
-
try {
|
|
71
|
-
// FUTURE: use URL from lifecycle
|
|
72
|
-
url = new URL(request.url);
|
|
73
|
-
} catch {}
|
|
74
|
-
|
|
75
|
-
const attrs = {
|
|
76
|
-
"http.request.method": request.method,
|
|
77
|
-
"url.full": request.url,
|
|
78
|
-
"url.path": url?.pathname,
|
|
79
|
-
"url.scheme": url?.protocol.replace(":", ""),
|
|
80
|
-
"server.address": url?.hostname,
|
|
81
|
-
"server.port": url
|
|
82
|
-
? url.port
|
|
83
|
-
? Number(url.port)
|
|
84
|
-
: url.protocol === "https:"
|
|
85
|
-
? 443
|
|
86
|
-
: 80
|
|
87
|
-
: undefined,
|
|
88
|
-
};
|
|
89
|
-
|
|
90
|
-
if (attributesLevel !== "required") {
|
|
91
|
-
Object.assign(attrs, {
|
|
92
|
-
"http.request.id": resolveRequestId(request),
|
|
93
|
-
"user_agent.original": request.headers.get("user-agent") ?? undefined,
|
|
94
|
-
});
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
if (attributesLevel === "full") {
|
|
98
|
-
Object.assign(attrs, {
|
|
99
|
-
// FUTURE: "url.query"
|
|
100
|
-
"http.request.header.content-type": [request.headers.get("content-type") ?? undefined],
|
|
101
|
-
"http.request.header.content-length": [request.headers.get("content-length") ?? undefined],
|
|
102
|
-
// FUTURE: "client.address"
|
|
103
|
-
});
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
return attrs;
|
|
107
|
-
};
|
|
108
|
-
|
|
109
|
-
export const getAIAttributes = (
|
|
110
|
-
body?: object,
|
|
111
|
-
result?: object,
|
|
112
|
-
attributesLevel = DEFAULT_ATTRIBUTES_LEVEL,
|
|
113
|
-
providerName?: string,
|
|
114
|
-
) => {
|
|
115
|
-
if (!body && !result) return {};
|
|
116
|
-
|
|
117
|
-
const isChat = !!body && "messages" in body;
|
|
118
|
-
const isEmbeddings = !!body && "input" in body;
|
|
119
|
-
|
|
120
|
-
const attrs = {
|
|
121
|
-
"gen_ai.operation.name": isEmbeddings ? "embeddings" : isChat ? "chat" : undefined,
|
|
122
|
-
"gen_ai.output.type": isEmbeddings ? "embedding" : isChat ? "text" : undefined,
|
|
123
|
-
"gen_ai.request.model": body && "model" in body ? body.model : undefined,
|
|
124
|
-
"gen_ai.provider.name": providerName,
|
|
125
|
-
};
|
|
126
|
-
|
|
127
|
-
if (isChat) {
|
|
128
|
-
if (body) {
|
|
129
|
-
const inputs = body as ChatCompletionsBody;
|
|
130
|
-
|
|
131
|
-
if (inputs.seed !== undefined) {
|
|
132
|
-
Object.assign(attrs, { "gen_ai.request.seed": inputs.seed });
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
if (attributesLevel !== "required") {
|
|
136
|
-
Object.assign(attrs, {
|
|
137
|
-
"gen_ai.request.stream": inputs.stream,
|
|
138
|
-
"gen_ai.request.frequency_penalty": inputs.frequency_penalty,
|
|
139
|
-
"gen_ai.request.max_tokens": inputs.max_completion_tokens,
|
|
140
|
-
"gen_ai.request.presence_penalty": inputs.presence_penalty,
|
|
141
|
-
"gen_ai.request.stop_sequences": inputs.stop
|
|
142
|
-
? Array.isArray(inputs.stop)
|
|
143
|
-
? inputs.stop
|
|
144
|
-
: [inputs.stop]
|
|
145
|
-
: undefined,
|
|
146
|
-
"gen_ai.request.temperature": inputs.temperature,
|
|
147
|
-
"gen_ai.request.top_p": inputs.top_p,
|
|
148
|
-
});
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
if (attributesLevel === "full") {
|
|
152
|
-
Object.assign(attrs, {
|
|
153
|
-
// FUTURE: only construct once
|
|
154
|
-
"gen_ai.system_instructions": inputs.messages
|
|
155
|
-
.filter((m) => m.role === "system")
|
|
156
|
-
.map((m) => JSON.stringify({ parts: [toTextPart(m.content)] })),
|
|
157
|
-
"gen_ai.input.messages": inputs.messages
|
|
158
|
-
.filter((m) => m.role !== "system")
|
|
159
|
-
.map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
|
|
160
|
-
"gen_ai.tool.definitions": JSON.stringify(inputs.tools),
|
|
161
|
-
});
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// FUTURE: implement streaming
|
|
166
|
-
if (result && !(result instanceof ReadableStream)) {
|
|
167
|
-
const completions = result as ChatCompletions;
|
|
168
|
-
|
|
169
|
-
Object.assign(attrs, {
|
|
170
|
-
"gen_ai.response.model": completions.model,
|
|
171
|
-
"gen_ai.response.id": completions.id,
|
|
172
|
-
});
|
|
173
|
-
|
|
174
|
-
if (attributesLevel !== "required") {
|
|
175
|
-
Object.assign(attrs, {
|
|
176
|
-
"gen_ai.response.finish_reasons": completions.choices?.map((c) => c.finish_reason),
|
|
177
|
-
"gen_ai.usage.total_tokens": completions.usage?.total_tokens,
|
|
178
|
-
"gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
|
|
179
|
-
"gen_ai.usage.cached_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
|
|
180
|
-
"gen_ai.usage.output_tokens": completions.usage?.completion_tokens,
|
|
181
|
-
"gen_ai.usage.reasoning_tokens":
|
|
182
|
-
completions.usage?.completion_tokens_details?.reasoning_tokens,
|
|
183
|
-
});
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
if (attributesLevel === "full") {
|
|
187
|
-
Object.assign(attrs, {
|
|
188
|
-
"gen_ai.output.messages": completions.choices?.map((c) =>
|
|
189
|
-
JSON.stringify({
|
|
190
|
-
role: c.message.role,
|
|
191
|
-
parts: toMessageParts(c.message),
|
|
192
|
-
finish_reason: c.finish_reason,
|
|
193
|
-
}),
|
|
194
|
-
),
|
|
195
|
-
});
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
if (isEmbeddings) {
|
|
201
|
-
if (body) {
|
|
202
|
-
const inputs = body as EmbeddingsBody;
|
|
203
|
-
if (attributesLevel !== "required") {
|
|
204
|
-
Object.assign(attrs, {
|
|
205
|
-
"gen_ai.embeddings.dimension.count": inputs.dimensions,
|
|
206
|
-
});
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
if (result) {
|
|
211
|
-
const embeddings = result as Embeddings;
|
|
212
|
-
|
|
213
|
-
Object.assign(attrs, {
|
|
214
|
-
"gen_ai.response.model": embeddings.model,
|
|
215
|
-
});
|
|
216
|
-
|
|
217
|
-
if (attributesLevel !== "required") {
|
|
218
|
-
Object.assign(attrs, {
|
|
219
|
-
"gen_ai.usage.input_tokens": embeddings.usage?.prompt_tokens,
|
|
220
|
-
"gen_ai.usage.total_tokens": embeddings.usage?.total_tokens,
|
|
221
|
-
});
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
return attrs;
|
|
227
|
-
};
|
|
228
|
-
|
|
229
|
-
export const getResponseAttributes = (
|
|
230
|
-
response?: Response,
|
|
231
|
-
attributesLevel = DEFAULT_ATTRIBUTES_LEVEL,
|
|
232
|
-
) => {
|
|
233
|
-
if (!response) return {};
|
|
234
|
-
|
|
235
|
-
const attrs = {
|
|
236
|
-
"http.response.status_code": response.status,
|
|
237
|
-
};
|
|
238
|
-
|
|
239
|
-
if (attributesLevel === "full") {
|
|
240
|
-
Object.assign(attrs, {
|
|
241
|
-
"http.response.header.content-type": [response.headers.get("content-type") ?? undefined],
|
|
242
|
-
"http.response.header.content-length": [response.headers.get("content-length") ?? undefined],
|
|
243
|
-
});
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
return attrs;
|
|
247
|
-
};
|
|
248
|
-
|
|
249
|
-
export const getBaggageAttributes = (request?: Request) => {
|
|
250
|
-
const h = request?.headers.get("baggage");
|
|
251
|
-
if (!h) return {};
|
|
252
|
-
|
|
253
|
-
const attrs: Record<string, string> = {};
|
|
254
|
-
|
|
255
|
-
for (const part of h.split(",")) {
|
|
256
|
-
const [k, v] = part.trim().split("=", 2);
|
|
257
|
-
if (!k || !v) continue;
|
|
258
|
-
|
|
259
|
-
const [rawValue] = v.split(";", 1);
|
|
260
|
-
if (!rawValue) continue;
|
|
261
|
-
|
|
262
|
-
let value = rawValue;
|
|
263
|
-
try {
|
|
264
|
-
value = decodeURIComponent(rawValue);
|
|
265
|
-
} catch {}
|
|
266
|
-
|
|
267
|
-
if (k.startsWith(HEBO_BAGGAGE_PREFIX)) {
|
|
268
|
-
attrs[k.slice(HEBO_BAGGAGE_PREFIX.length)] = value;
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
return attrs;
|
|
273
|
-
};
|