@hebo-ai/gateway 0.4.0-beta.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +40 -5
  2. package/dist/config.js +21 -7
  3. package/dist/endpoints/chat-completions/converters.d.ts +3 -3
  4. package/dist/endpoints/chat-completions/converters.js +16 -8
  5. package/dist/endpoints/chat-completions/handler.js +34 -27
  6. package/dist/endpoints/chat-completions/otel.d.ts +6 -0
  7. package/dist/endpoints/chat-completions/otel.js +127 -0
  8. package/dist/endpoints/embeddings/handler.js +19 -10
  9. package/dist/endpoints/embeddings/otel.d.ts +6 -0
  10. package/dist/endpoints/embeddings/otel.js +35 -0
  11. package/dist/endpoints/models/handler.js +3 -4
  12. package/dist/errors/gateway.d.ts +1 -1
  13. package/dist/errors/gateway.js +3 -4
  14. package/dist/errors/openai.js +11 -12
  15. package/dist/errors/utils.d.ts +3 -4
  16. package/dist/errors/utils.js +6 -6
  17. package/dist/gateway.js +1 -1
  18. package/dist/lifecycle.js +71 -29
  19. package/dist/middleware/matcher.js +1 -1
  20. package/dist/models/amazon/presets.d.ts +37 -37
  21. package/dist/models/amazon/presets.js +1 -1
  22. package/dist/models/anthropic/presets.d.ts +56 -56
  23. package/dist/models/cohere/presets.d.ts +54 -54
  24. package/dist/models/cohere/presets.js +2 -2
  25. package/dist/models/google/presets.d.ts +31 -31
  26. package/dist/models/google/presets.js +1 -1
  27. package/dist/models/meta/presets.d.ts +42 -42
  28. package/dist/models/openai/presets.d.ts +96 -96
  29. package/dist/models/openai/presets.js +1 -1
  30. package/dist/models/types.d.ts +1 -1
  31. package/dist/models/voyage/presets.d.ts +92 -92
  32. package/dist/models/voyage/presets.js +1 -1
  33. package/dist/providers/registry.js +2 -2
  34. package/dist/telemetry/baggage.d.ts +1 -0
  35. package/dist/telemetry/baggage.js +24 -0
  36. package/dist/telemetry/fetch.d.ts +2 -1
  37. package/dist/telemetry/fetch.js +13 -3
  38. package/dist/telemetry/gen-ai.d.ts +5 -0
  39. package/dist/telemetry/gen-ai.js +60 -0
  40. package/dist/telemetry/http.d.ts +3 -0
  41. package/dist/telemetry/http.js +57 -0
  42. package/dist/telemetry/memory.d.ts +2 -0
  43. package/dist/telemetry/memory.js +27 -0
  44. package/dist/telemetry/span.d.ts +6 -3
  45. package/dist/telemetry/span.js +24 -36
  46. package/dist/telemetry/stream.d.ts +3 -7
  47. package/dist/telemetry/stream.js +26 -29
  48. package/dist/types.d.ts +16 -15
  49. package/dist/utils/headers.d.ts +1 -1
  50. package/dist/utils/headers.js +7 -9
  51. package/dist/utils/request.d.ts +0 -4
  52. package/dist/utils/request.js +0 -9
  53. package/dist/utils/response.js +1 -1
  54. package/package.json +5 -2
  55. package/src/config.ts +28 -7
  56. package/src/endpoints/chat-completions/converters.ts +18 -11
  57. package/src/endpoints/chat-completions/handler.ts +46 -28
  58. package/src/endpoints/chat-completions/otel.ts +161 -0
  59. package/src/endpoints/embeddings/handler.test.ts +2 -2
  60. package/src/endpoints/embeddings/handler.ts +28 -10
  61. package/src/endpoints/embeddings/otel.ts +56 -0
  62. package/src/endpoints/models/handler.ts +3 -5
  63. package/src/errors/gateway.ts +5 -5
  64. package/src/errors/openai.ts +25 -17
  65. package/src/errors/utils.ts +6 -7
  66. package/src/gateway.ts +1 -1
  67. package/src/lifecycle.ts +85 -32
  68. package/src/middleware/matcher.ts +1 -1
  69. package/src/models/amazon/presets.ts +1 -1
  70. package/src/models/cohere/presets.ts +2 -2
  71. package/src/models/google/presets.ts +1 -1
  72. package/src/models/openai/presets.ts +1 -1
  73. package/src/models/types.ts +1 -1
  74. package/src/models/voyage/presets.ts +1 -1
  75. package/src/providers/registry.ts +2 -2
  76. package/src/telemetry/baggage.ts +27 -0
  77. package/src/telemetry/fetch.ts +15 -3
  78. package/src/telemetry/gen-ai.ts +88 -0
  79. package/src/telemetry/http.ts +65 -0
  80. package/src/telemetry/memory.ts +36 -0
  81. package/src/telemetry/span.ts +28 -40
  82. package/src/telemetry/stream.ts +36 -40
  83. package/src/types.ts +18 -18
  84. package/src/utils/headers.ts +8 -19
  85. package/src/utils/request.ts +0 -11
  86. package/src/utils/response.ts +1 -1
  87. package/dist/telemetry/otel.d.ts +0 -2
  88. package/dist/telemetry/otel.js +0 -50
  89. package/dist/telemetry/utils.d.ts +0 -4
  90. package/dist/telemetry/utils.js +0 -223
  91. package/src/telemetry/otel.ts +0 -91
  92. package/src/telemetry/utils.ts +0 -273
@@ -1,91 +0,0 @@
1
- import type { Attributes } from "@opentelemetry/api";
2
-
3
- import { SpanStatusCode } from "@opentelemetry/api";
4
-
5
- import type { GatewayConfigParsed, GatewayContext } from "../types";
6
-
7
- import { initFetch } from "./fetch";
8
- import { startSpan } from "./span";
9
- import { instrumentStream } from "./stream";
10
- import {
11
- getAIAttributes,
12
- getBaggageAttributes,
13
- getRequestAttributes,
14
- getResponseAttributes,
15
- } from "./utils";
16
-
17
- export const withOtel =
18
- (run: (ctx: GatewayContext) => Promise<void>, config: GatewayConfigParsed) =>
19
- async (ctx: GatewayContext) => {
20
- const requestStart = performance.now();
21
- const aiSpan = startSpan(ctx.request.url, undefined, config.telemetry?.tracer);
22
- initFetch();
23
-
24
- const endAiSpan = (status: number, stats?: { bytes: number }) => {
25
- const attrs: Attributes = getAIAttributes(
26
- ctx.body,
27
- ctx.streamResult ?? ctx.result,
28
- config.telemetry?.attributes,
29
- ctx.resolvedProviderId,
30
- );
31
-
32
- attrs["gen_ai.server.request.duration"] = Number(
33
- ((performance.now() - requestStart) / 1000).toFixed(4),
34
- );
35
-
36
- if (!aiSpan.isExisting) {
37
- Object.assign(
38
- attrs,
39
- getRequestAttributes(ctx.request, config.telemetry?.attributes),
40
- getResponseAttributes(ctx.response, config.telemetry?.attributes),
41
- );
42
- }
43
-
44
- Object.assign(attrs, getBaggageAttributes(ctx.request));
45
-
46
- if (config.telemetry?.attributes !== "required") {
47
- attrs["http.request.body.size"] = Number(ctx.request.headers.get("content-length") || 0);
48
- attrs["http.response.body.size"] =
49
- stats?.bytes ?? Number(attrs["http.response.header.content-length"] || 0);
50
- }
51
-
52
- if (config.telemetry?.attributes === "full") {
53
- attrs["http.request.body"] = JSON.stringify(ctx.body);
54
- }
55
-
56
- const realStatus = status === 200 ? (ctx.response?.status ?? status) : status;
57
- attrs["http.response.status_code_effective"] = realStatus;
58
- aiSpan.setStatus({ code: realStatus >= 500 ? SpanStatusCode.ERROR : SpanStatusCode.OK });
59
-
60
- if (ctx.operation && ctx.modelId) {
61
- aiSpan.updateName(`${ctx.operation} ${ctx.modelId}`);
62
- } else if (ctx.operation) {
63
- aiSpan.updateName(`${ctx.operation}`);
64
- }
65
-
66
- aiSpan.setAttributes(attrs);
67
-
68
- aiSpan.finish();
69
- };
70
-
71
- await aiSpan.runWithContext(() => run(ctx));
72
-
73
- if (ctx.response!.body instanceof ReadableStream) {
74
- const instrumented = instrumentStream(
75
- ctx.response!.body,
76
- {
77
- onComplete: (status, params) => endAiSpan(status, params),
78
- },
79
- ctx.request.signal,
80
- );
81
-
82
- ctx.response = new Response(instrumented, {
83
- status: ctx.response!.status,
84
- statusText: ctx.response!.statusText,
85
- headers: ctx.response!.headers,
86
- });
87
- return;
88
- }
89
-
90
- endAiSpan(ctx.response!.status);
91
- };
@@ -1,273 +0,0 @@
1
- import type {
2
- ChatCompletions,
3
- ChatCompletionsBody,
4
- ChatCompletionsContentPart,
5
- ChatCompletionsMessage,
6
- } from "../endpoints/chat-completions/schema";
7
- import type { Embeddings, EmbeddingsBody } from "../endpoints/embeddings";
8
-
9
- import { resolveRequestId } from "../utils/headers";
10
-
11
- type GenAIPart = Record<string, unknown>;
12
- const DEFAULT_ATTRIBUTES_LEVEL = "recommended";
13
- const HEBO_BAGGAGE_PREFIX = "hebo.";
14
-
15
- const toTextPart = (content: string): GenAIPart => ({ type: "text", content });
16
-
17
- const toMessageParts = (message: ChatCompletionsMessage): GenAIPart[] => {
18
- if (message.role === "assistant") {
19
- const parts: GenAIPart[] = [];
20
- if (typeof message.content === "string") parts.push(toTextPart(message.content));
21
- if (Array.isArray(message.tool_calls)) {
22
- for (const call of message.tool_calls) {
23
- parts.push({
24
- type: "tool_call",
25
- id: call.id,
26
- name: call.function.name,
27
- arguments: call.function.arguments,
28
- });
29
- }
30
- }
31
- return parts;
32
- }
33
-
34
- if (message.role === "tool") {
35
- return [{ type: "tool_call_response", id: message.tool_call_id, content: message.content }];
36
- }
37
-
38
- if (message.role === "user") {
39
- const parts: GenAIPart[] = [];
40
- if (typeof message.content === "string") parts.push(toTextPart(message.content));
41
- if (Array.isArray(message.content)) {
42
- for (const part of message.content as ChatCompletionsContentPart[]) {
43
- if (part.type === "text") {
44
- parts.push(toTextPart(part.text));
45
- } else if (part.type === "image_url") {
46
- parts.push({ type: "image", content: part.image_url.url });
47
- } else {
48
- parts.push({
49
- type: "file",
50
- // FUTURE: optionally expose safe metadata without raw binary payloads.
51
- content: part.file.filename ?? "[REDACTED_BINARY_DATA]",
52
- media_type: part.file.media_type,
53
- });
54
- }
55
- }
56
- }
57
- return parts;
58
- }
59
-
60
- return [];
61
- };
62
-
63
- export const getRequestAttributes = (
64
- request?: Request,
65
- attributesLevel = DEFAULT_ATTRIBUTES_LEVEL,
66
- ) => {
67
- if (!request) return {};
68
-
69
- let url;
70
- try {
71
- // FUTURE: use URL from lifecycle
72
- url = new URL(request.url);
73
- } catch {}
74
-
75
- const attrs = {
76
- "http.request.method": request.method,
77
- "url.full": request.url,
78
- "url.path": url?.pathname,
79
- "url.scheme": url?.protocol.replace(":", ""),
80
- "server.address": url?.hostname,
81
- "server.port": url
82
- ? url.port
83
- ? Number(url.port)
84
- : url.protocol === "https:"
85
- ? 443
86
- : 80
87
- : undefined,
88
- };
89
-
90
- if (attributesLevel !== "required") {
91
- Object.assign(attrs, {
92
- "http.request.id": resolveRequestId(request),
93
- "user_agent.original": request.headers.get("user-agent") ?? undefined,
94
- });
95
- }
96
-
97
- if (attributesLevel === "full") {
98
- Object.assign(attrs, {
99
- // FUTURE: "url.query"
100
- "http.request.header.content-type": [request.headers.get("content-type") ?? undefined],
101
- "http.request.header.content-length": [request.headers.get("content-length") ?? undefined],
102
- // FUTURE: "client.address"
103
- });
104
- }
105
-
106
- return attrs;
107
- };
108
-
109
- export const getAIAttributes = (
110
- body?: object,
111
- result?: object,
112
- attributesLevel = DEFAULT_ATTRIBUTES_LEVEL,
113
- providerName?: string,
114
- ) => {
115
- if (!body && !result) return {};
116
-
117
- const isChat = !!body && "messages" in body;
118
- const isEmbeddings = !!body && "input" in body;
119
-
120
- const attrs = {
121
- "gen_ai.operation.name": isEmbeddings ? "embeddings" : isChat ? "chat" : undefined,
122
- "gen_ai.output.type": isEmbeddings ? "embedding" : isChat ? "text" : undefined,
123
- "gen_ai.request.model": body && "model" in body ? body.model : undefined,
124
- "gen_ai.provider.name": providerName,
125
- };
126
-
127
- if (isChat) {
128
- if (body) {
129
- const inputs = body as ChatCompletionsBody;
130
-
131
- if (inputs.seed !== undefined) {
132
- Object.assign(attrs, { "gen_ai.request.seed": inputs.seed });
133
- }
134
-
135
- if (attributesLevel !== "required") {
136
- Object.assign(attrs, {
137
- "gen_ai.request.stream": inputs.stream,
138
- "gen_ai.request.frequency_penalty": inputs.frequency_penalty,
139
- "gen_ai.request.max_tokens": inputs.max_completion_tokens,
140
- "gen_ai.request.presence_penalty": inputs.presence_penalty,
141
- "gen_ai.request.stop_sequences": inputs.stop
142
- ? Array.isArray(inputs.stop)
143
- ? inputs.stop
144
- : [inputs.stop]
145
- : undefined,
146
- "gen_ai.request.temperature": inputs.temperature,
147
- "gen_ai.request.top_p": inputs.top_p,
148
- });
149
- }
150
-
151
- if (attributesLevel === "full") {
152
- Object.assign(attrs, {
153
- // FUTURE: only construct once
154
- "gen_ai.system_instructions": inputs.messages
155
- .filter((m) => m.role === "system")
156
- .map((m) => JSON.stringify({ parts: [toTextPart(m.content)] })),
157
- "gen_ai.input.messages": inputs.messages
158
- .filter((m) => m.role !== "system")
159
- .map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
160
- "gen_ai.tool.definitions": JSON.stringify(inputs.tools),
161
- });
162
- }
163
- }
164
-
165
- // FUTURE: implement streaming
166
- if (result && !(result instanceof ReadableStream)) {
167
- const completions = result as ChatCompletions;
168
-
169
- Object.assign(attrs, {
170
- "gen_ai.response.model": completions.model,
171
- "gen_ai.response.id": completions.id,
172
- });
173
-
174
- if (attributesLevel !== "required") {
175
- Object.assign(attrs, {
176
- "gen_ai.response.finish_reasons": completions.choices?.map((c) => c.finish_reason),
177
- "gen_ai.usage.total_tokens": completions.usage?.total_tokens,
178
- "gen_ai.usage.input_tokens": completions.usage?.prompt_tokens,
179
- "gen_ai.usage.cached_tokens": completions.usage?.prompt_tokens_details?.cached_tokens,
180
- "gen_ai.usage.output_tokens": completions.usage?.completion_tokens,
181
- "gen_ai.usage.reasoning_tokens":
182
- completions.usage?.completion_tokens_details?.reasoning_tokens,
183
- });
184
- }
185
-
186
- if (attributesLevel === "full") {
187
- Object.assign(attrs, {
188
- "gen_ai.output.messages": completions.choices?.map((c) =>
189
- JSON.stringify({
190
- role: c.message.role,
191
- parts: toMessageParts(c.message),
192
- finish_reason: c.finish_reason,
193
- }),
194
- ),
195
- });
196
- }
197
- }
198
- }
199
-
200
- if (isEmbeddings) {
201
- if (body) {
202
- const inputs = body as EmbeddingsBody;
203
- if (attributesLevel !== "required") {
204
- Object.assign(attrs, {
205
- "gen_ai.embeddings.dimension.count": inputs.dimensions,
206
- });
207
- }
208
- }
209
-
210
- if (result) {
211
- const embeddings = result as Embeddings;
212
-
213
- Object.assign(attrs, {
214
- "gen_ai.response.model": embeddings.model,
215
- });
216
-
217
- if (attributesLevel !== "required") {
218
- Object.assign(attrs, {
219
- "gen_ai.usage.input_tokens": embeddings.usage?.prompt_tokens,
220
- "gen_ai.usage.total_tokens": embeddings.usage?.total_tokens,
221
- });
222
- }
223
- }
224
- }
225
-
226
- return attrs;
227
- };
228
-
229
- export const getResponseAttributes = (
230
- response?: Response,
231
- attributesLevel = DEFAULT_ATTRIBUTES_LEVEL,
232
- ) => {
233
- if (!response) return {};
234
-
235
- const attrs = {
236
- "http.response.status_code": response.status,
237
- };
238
-
239
- if (attributesLevel === "full") {
240
- Object.assign(attrs, {
241
- "http.response.header.content-type": [response.headers.get("content-type") ?? undefined],
242
- "http.response.header.content-length": [response.headers.get("content-length") ?? undefined],
243
- });
244
- }
245
-
246
- return attrs;
247
- };
248
-
249
- export const getBaggageAttributes = (request?: Request) => {
250
- const h = request?.headers.get("baggage");
251
- if (!h) return {};
252
-
253
- const attrs: Record<string, string> = {};
254
-
255
- for (const part of h.split(",")) {
256
- const [k, v] = part.trim().split("=", 2);
257
- if (!k || !v) continue;
258
-
259
- const [rawValue] = v.split(";", 1);
260
- if (!rawValue) continue;
261
-
262
- let value = rawValue;
263
- try {
264
- value = decodeURIComponent(rawValue);
265
- } catch {}
266
-
267
- if (k.startsWith(HEBO_BAGGAGE_PREFIX)) {
268
- attrs[k.slice(HEBO_BAGGAGE_PREFIX.length)] = value;
269
- }
270
- }
271
-
272
- return attrs;
273
- };