@hebo-ai/gateway 0.4.0-alpha.4 → 0.4.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -4
- package/dist/config.js +1 -1
- package/dist/endpoints/chat-completions/handler.js +39 -15
- package/dist/endpoints/embeddings/handler.js +24 -7
- package/dist/endpoints/models/handler.js +1 -0
- package/dist/gateway.js +7 -9
- package/dist/lifecycle.js +20 -12
- package/dist/middleware/matcher.js +2 -0
- package/dist/providers/registry.d.ts +1 -1
- package/dist/providers/registry.js +2 -1
- package/dist/telemetry/fetch.js +4 -6
- package/dist/telemetry/otel.d.ts +2 -7
- package/dist/telemetry/otel.js +46 -5
- package/dist/telemetry/span.d.ts +9 -0
- package/dist/telemetry/span.js +64 -0
- package/dist/telemetry/utils.d.ts +4 -4
- package/dist/telemetry/utils.js +212 -30
- package/dist/types.d.ts +15 -4
- package/dist/utils/response.js +2 -3
- package/package.json +3 -5
- package/src/config.ts +1 -1
- package/src/endpoints/chat-completions/handler.ts +52 -16
- package/src/endpoints/embeddings/handler.ts +26 -8
- package/src/endpoints/models/handler.ts +2 -0
- package/src/gateway.ts +8 -13
- package/src/lifecycle.ts +21 -11
- package/src/middleware/matcher.ts +2 -0
- package/src/providers/registry.ts +3 -2
- package/src/telemetry/fetch.ts +5 -6
- package/src/telemetry/otel.ts +87 -12
- package/src/telemetry/span.ts +88 -0
- package/src/telemetry/utils.ts +258 -31
- package/src/types.ts +15 -4
- package/src/utils/response.ts +3 -4
- package/dist/telemetry/access-log.d.ts +0 -2
- package/dist/telemetry/access-log.js +0 -49
- package/dist/telemetry/perf.d.ts +0 -11
- package/dist/telemetry/perf.js +0 -60
- package/src/telemetry/access-log.ts +0 -70
- package/src/telemetry/perf.ts +0 -89
package/README.md
CHANGED
|
@@ -301,7 +301,7 @@ const gw = gateway({
|
|
|
301
301
|
*/
|
|
302
302
|
before: async (ctx: {
|
|
303
303
|
body: ChatCompletionsBody | EmbeddingsBody;
|
|
304
|
-
operation: "
|
|
304
|
+
operation: "chat" | "embeddings";
|
|
305
305
|
}): Promise<ChatCompletionsBody | EmbeddingsBody | void> => {
|
|
306
306
|
// Example Use Cases:
|
|
307
307
|
// - Transform request body
|
|
@@ -328,7 +328,7 @@ const gw = gateway({
|
|
|
328
328
|
* @param ctx.models ModelCatalog from config.
|
|
329
329
|
* @param ctx.body The parsed body object with all call parameters.
|
|
330
330
|
* @param ctx.modelId Resolved model ID.
|
|
331
|
-
* @param ctx.operation Operation type ("
|
|
331
|
+
* @param ctx.operation Operation type ("chat" | "embeddings").
|
|
332
332
|
* @returns ProviderV3 to override, or undefined to use default.
|
|
333
333
|
*/
|
|
334
334
|
resolveProvider: async (ctx: {
|
|
@@ -336,7 +336,7 @@ const gw = gateway({
|
|
|
336
336
|
models: ModelCatalog;
|
|
337
337
|
body: ChatCompletionsBody | EmbeddingsBody;
|
|
338
338
|
modelId: ModelId;
|
|
339
|
-
operation: "
|
|
339
|
+
operation: "chat" | "embeddings";
|
|
340
340
|
}): Promise<ProviderV3 | void> => {
|
|
341
341
|
// Example Use Cases:
|
|
342
342
|
// - Routing logic between providers
|
|
@@ -617,12 +617,21 @@ const gw = gateway({
|
|
|
617
617
|
enabled: true,
|
|
618
618
|
// default: TraceProivder from @opentelemetry/api singleton
|
|
619
619
|
tracer: trace.getTracer("my-gateway"),
|
|
620
|
+
// "required" = minimal baseline attributes
|
|
621
|
+
// "recommended" = practical operational attributes (request/response metadata, genai model/usage fields)
|
|
622
|
+
// "full" = also include body fields (e.g. genai input/output messages)
|
|
623
|
+
attributes: "full",
|
|
620
624
|
},
|
|
621
625
|
});
|
|
622
626
|
```
|
|
623
627
|
|
|
628
|
+
Attribute names and span semantics follow OpenTelemetry GenAI semantic conventions:
|
|
629
|
+
https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/
|
|
630
|
+
|
|
624
631
|
> [!TIP]
|
|
625
|
-
>
|
|
632
|
+
> To populate custom span attributes, the inbound W3C `baggage` header is supported. Keys in the `hebo.` namespace are mapped to span attributes, with the namespace stripped. For example: `baggage: hebo.user_id=u-123` becomes span attribute `user_id=u-123`.
|
|
633
|
+
|
|
634
|
+
For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
|
|
626
635
|
|
|
627
636
|
### Passing Framework State to Hooks
|
|
628
637
|
|
package/dist/config.js
CHANGED
|
@@ -55,8 +55,8 @@ export const parseConfig = (config) => {
|
|
|
55
55
|
...config,
|
|
56
56
|
logger: config.logger,
|
|
57
57
|
telemetry: {
|
|
58
|
+
...config.telemetry,
|
|
58
59
|
enabled: config.telemetry?.enabled ?? false,
|
|
59
|
-
tracer: config.telemetry?.tracer,
|
|
60
60
|
},
|
|
61
61
|
providers: parsedProviders,
|
|
62
62
|
models: parsedModels,
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
import { generateText, streamText, wrapLanguageModel } from "ai";
|
|
1
|
+
import { generateText, Output, streamText, wrapLanguageModel, } from "ai";
|
|
2
2
|
import * as z from "zod/mini";
|
|
3
3
|
import { GatewayError } from "../../errors/gateway";
|
|
4
4
|
import { winterCgHandler } from "../../lifecycle";
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
-
import {
|
|
9
|
-
import { markPerf } from "../../telemetry/perf";
|
|
8
|
+
import { addSpanEvent } from "../../telemetry/span";
|
|
10
9
|
import { resolveRequestId } from "../../utils/headers";
|
|
11
10
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
12
11
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
@@ -14,6 +13,7 @@ import { ChatCompletionsBodySchema } from "./schema";
|
|
|
14
13
|
export const chatCompletions = (config) => {
|
|
15
14
|
const hooks = config.hooks;
|
|
16
15
|
const handler = async (ctx) => {
|
|
16
|
+
addSpanEvent("hebo.handler.started");
|
|
17
17
|
// Guard: enforce HTTP method early.
|
|
18
18
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
19
19
|
throw new GatewayError("Method Not Allowed", 405);
|
|
@@ -27,19 +27,28 @@ export const chatCompletions = (config) => {
|
|
|
27
27
|
catch {
|
|
28
28
|
throw new GatewayError("Invalid JSON", 400);
|
|
29
29
|
}
|
|
30
|
+
addSpanEvent("hebo.request.deserialized");
|
|
30
31
|
const parsed = ChatCompletionsBodySchema.safeParse(body);
|
|
31
32
|
if (!parsed.success) {
|
|
32
33
|
throw new GatewayError(z.prettifyError(parsed.error), 400);
|
|
33
34
|
}
|
|
34
35
|
ctx.body = parsed.data;
|
|
35
|
-
|
|
36
|
-
ctx.
|
|
36
|
+
addSpanEvent("hebo.request.parsed");
|
|
37
|
+
ctx.operation = "chat";
|
|
38
|
+
if (hooks?.before) {
|
|
39
|
+
ctx.body = (await hooks.before(ctx)) ?? ctx.body;
|
|
40
|
+
addSpanEvent("hebo.hooks.before.completed");
|
|
41
|
+
}
|
|
37
42
|
// Resolve model + provider (hooks may override defaults).
|
|
38
43
|
let inputs, stream;
|
|
39
44
|
({ model: ctx.modelId, stream, ...inputs } = ctx.body);
|
|
40
45
|
ctx.resolvedModelId =
|
|
41
46
|
(await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
|
|
42
47
|
logger.debug(`[chat] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
48
|
+
addSpanEvent("hebo.model.resolved", {
|
|
49
|
+
"gen_ai.request.model": ctx.modelId ?? "",
|
|
50
|
+
"gen_ai.response.model": ctx.resolvedModelId ?? "",
|
|
51
|
+
});
|
|
43
52
|
const override = await hooks?.resolveProvider?.(ctx);
|
|
44
53
|
ctx.provider =
|
|
45
54
|
override ??
|
|
@@ -52,36 +61,41 @@ export const chatCompletions = (config) => {
|
|
|
52
61
|
const languageModel = ctx.provider.languageModel(ctx.resolvedModelId);
|
|
53
62
|
ctx.resolvedProviderId = languageModel.provider;
|
|
54
63
|
logger.debug(`[chat] using ${languageModel.provider} for ${ctx.resolvedModelId}`);
|
|
64
|
+
addSpanEvent("hebo.provider.resolved", { "gen_ai.provider.name": ctx.resolvedProviderId });
|
|
55
65
|
// Convert inputs to AI SDK call options.
|
|
56
66
|
const textOptions = convertToTextCallOptions(inputs);
|
|
57
67
|
logger.trace({
|
|
58
68
|
requestId,
|
|
59
69
|
options: textOptions,
|
|
60
70
|
}, "[chat] AI SDK options");
|
|
71
|
+
addSpanEvent("hebo.options.prepared");
|
|
61
72
|
// Build middleware chain (model -> forward params -> provider).
|
|
62
73
|
const languageModelWithMiddleware = wrapLanguageModel({
|
|
63
74
|
model: languageModel,
|
|
64
75
|
middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
|
|
65
76
|
});
|
|
66
77
|
// Execute request (streaming vs. non-streaming).
|
|
67
|
-
markPerf(ctx.request, "aiSdkStart");
|
|
68
78
|
if (stream) {
|
|
79
|
+
addSpanEvent("hebo.ai-sdk.started");
|
|
69
80
|
const result = streamText({
|
|
70
81
|
model: languageModelWithMiddleware,
|
|
71
82
|
headers: prepareForwardHeaders(ctx.request),
|
|
72
|
-
experimental_telemetry: toAiSdkTelemetry(config, ctx.operation),
|
|
73
83
|
// No abort signal here, otherwise we can't detect upstream from client cancellations
|
|
74
84
|
// abortSignal: ctx.request.signal,
|
|
75
85
|
onError: ({ error }) => {
|
|
86
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
76
87
|
logger.error({
|
|
77
88
|
requestId,
|
|
78
|
-
err
|
|
89
|
+
err,
|
|
79
90
|
});
|
|
80
91
|
throw error;
|
|
81
92
|
},
|
|
82
93
|
onAbort: () => {
|
|
83
94
|
throw new DOMException("Upstream failed", "AbortError");
|
|
84
95
|
},
|
|
96
|
+
onFinish: (result) => {
|
|
97
|
+
ctx.streamResult = toChatCompletions(result, ctx.resolvedModelId);
|
|
98
|
+
},
|
|
85
99
|
timeout: {
|
|
86
100
|
totalMs: 5 * 60 * 1000,
|
|
87
101
|
},
|
|
@@ -91,14 +105,19 @@ export const chatCompletions = (config) => {
|
|
|
91
105
|
includeRawChunks: false,
|
|
92
106
|
...textOptions,
|
|
93
107
|
});
|
|
94
|
-
|
|
95
|
-
ctx.result = toChatCompletionsStream(result, ctx.
|
|
96
|
-
|
|
108
|
+
addSpanEvent("hebo.ai-sdk.completed");
|
|
109
|
+
ctx.result = toChatCompletionsStream(result, ctx.resolvedModelId);
|
|
110
|
+
addSpanEvent("hebo.result.transformed");
|
|
111
|
+
if (hooks?.after) {
|
|
112
|
+
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
113
|
+
addSpanEvent("hebo.hooks.after.completed");
|
|
114
|
+
}
|
|
115
|
+
return ctx.result;
|
|
97
116
|
}
|
|
117
|
+
addSpanEvent("hebo.ai-sdk.started");
|
|
98
118
|
const result = await generateText({
|
|
99
119
|
model: languageModelWithMiddleware,
|
|
100
120
|
headers: prepareForwardHeaders(ctx.request),
|
|
101
|
-
experimental_telemetry: toAiSdkTelemetry(config, ctx.operation),
|
|
102
121
|
// FUTURE: currently can't tell whether upstream or downstream abort
|
|
103
122
|
abortSignal: ctx.request.signal,
|
|
104
123
|
experimental_include: {
|
|
@@ -108,10 +127,15 @@ export const chatCompletions = (config) => {
|
|
|
108
127
|
timeout: 5 * 60 * 1000,
|
|
109
128
|
...textOptions,
|
|
110
129
|
});
|
|
111
|
-
markPerf(ctx.request, "aiSdkEnd");
|
|
112
130
|
logger.trace({ requestId, result }, "[chat] AI SDK result");
|
|
113
|
-
|
|
114
|
-
|
|
131
|
+
addSpanEvent("hebo.ai-sdk.completed");
|
|
132
|
+
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
133
|
+
addSpanEvent("hebo.result.transformed");
|
|
134
|
+
if (hooks?.after) {
|
|
135
|
+
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
136
|
+
addSpanEvent("hebo.hooks.after.completed");
|
|
137
|
+
}
|
|
138
|
+
return ctx.result;
|
|
115
139
|
};
|
|
116
140
|
return { handler: winterCgHandler(handler, config) };
|
|
117
141
|
};
|
|
@@ -5,8 +5,7 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
-
import {
|
|
9
|
-
import { markPerf } from "../../telemetry/perf";
|
|
8
|
+
import { addSpanEvent } from "../../telemetry/span";
|
|
10
9
|
import { resolveRequestId } from "../../utils/headers";
|
|
11
10
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
12
11
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
@@ -14,6 +13,7 @@ import { EmbeddingsBodySchema } from "./schema";
|
|
|
14
13
|
export const embeddings = (config) => {
|
|
15
14
|
const hooks = config.hooks;
|
|
16
15
|
const handler = async (ctx) => {
|
|
16
|
+
addSpanEvent("hebo.handler.started");
|
|
17
17
|
// Guard: enforce HTTP method early.
|
|
18
18
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
19
19
|
throw new GatewayError("Method Not Allowed", 405);
|
|
@@ -27,19 +27,28 @@ export const embeddings = (config) => {
|
|
|
27
27
|
catch {
|
|
28
28
|
throw new GatewayError("Invalid JSON", 400);
|
|
29
29
|
}
|
|
30
|
+
addSpanEvent("hebo.request.deserialized");
|
|
30
31
|
const parsed = EmbeddingsBodySchema.safeParse(body);
|
|
31
32
|
if (!parsed.success) {
|
|
32
33
|
throw new GatewayError(z.prettifyError(parsed.error), 400);
|
|
33
34
|
}
|
|
34
35
|
ctx.body = parsed.data;
|
|
36
|
+
addSpanEvent("hebo.request.parsed");
|
|
35
37
|
ctx.operation = "embeddings";
|
|
36
|
-
|
|
38
|
+
if (hooks?.before) {
|
|
39
|
+
ctx.body = (await hooks.before(ctx)) ?? ctx.body;
|
|
40
|
+
addSpanEvent("hebo.hooks.before.completed");
|
|
41
|
+
}
|
|
37
42
|
// Resolve model + provider (hooks may override defaults).
|
|
38
43
|
let inputs;
|
|
39
44
|
({ model: ctx.modelId, ...inputs } = ctx.body);
|
|
40
45
|
ctx.resolvedModelId =
|
|
41
46
|
(await hooks?.resolveModelId?.(ctx)) ?? ctx.modelId;
|
|
42
47
|
logger.debug(`[embeddings] resolved ${ctx.modelId} to ${ctx.resolvedModelId}`);
|
|
48
|
+
addSpanEvent("hebo.model.resolved", {
|
|
49
|
+
"gen_ai.request.model": ctx.modelId ?? "",
|
|
50
|
+
"gen_ai.response.model": ctx.resolvedModelId ?? "",
|
|
51
|
+
});
|
|
43
52
|
const override = await hooks?.resolveProvider?.(ctx);
|
|
44
53
|
ctx.provider =
|
|
45
54
|
override ??
|
|
@@ -52,27 +61,35 @@ export const embeddings = (config) => {
|
|
|
52
61
|
const embeddingModel = ctx.provider.embeddingModel(ctx.resolvedModelId);
|
|
53
62
|
ctx.resolvedProviderId = embeddingModel.provider;
|
|
54
63
|
logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
|
|
64
|
+
addSpanEvent("hebo.provider.resolved", {
|
|
65
|
+
"gen_ai.provider.name": ctx.resolvedProviderId,
|
|
66
|
+
});
|
|
55
67
|
// Convert inputs to AI SDK call options.
|
|
56
68
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
57
69
|
logger.trace({ requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
70
|
+
addSpanEvent("hebo.options.prepared");
|
|
58
71
|
// Build middleware chain (model -> forward params -> provider).
|
|
59
72
|
const embeddingModelWithMiddleware = wrapEmbeddingModel({
|
|
60
73
|
model: embeddingModel,
|
|
61
74
|
middleware: modelMiddlewareMatcher.forEmbedding(ctx.resolvedModelId, embeddingModel.provider),
|
|
62
75
|
});
|
|
63
76
|
// Execute request.
|
|
64
|
-
|
|
77
|
+
addSpanEvent("hebo.ai-sdk.started");
|
|
65
78
|
const result = await embedMany({
|
|
66
79
|
model: embeddingModelWithMiddleware,
|
|
67
80
|
headers: prepareForwardHeaders(ctx.request),
|
|
68
|
-
experimental_telemetry: toAiSdkTelemetry(config, ctx.operation),
|
|
69
81
|
abortSignal: ctx.request.signal,
|
|
70
82
|
...embedOptions,
|
|
71
83
|
});
|
|
72
|
-
markPerf(ctx.request, "aiSdkEnd");
|
|
73
84
|
logger.trace({ requestId, result }, "[embeddings] AI SDK result");
|
|
85
|
+
addSpanEvent("hebo.ai-sdk.completed");
|
|
74
86
|
ctx.result = toEmbeddings(result, ctx.modelId);
|
|
75
|
-
|
|
87
|
+
addSpanEvent("hebo.result.transformed");
|
|
88
|
+
if (hooks?.after) {
|
|
89
|
+
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
90
|
+
addSpanEvent("hebo.hooks.after.completed");
|
|
91
|
+
}
|
|
92
|
+
return ctx.result;
|
|
76
93
|
};
|
|
77
94
|
return { handler: winterCgHandler(handler, config) };
|
|
78
95
|
};
|
|
@@ -8,6 +8,7 @@ export const models = (config) => {
|
|
|
8
8
|
if (!request || request.method !== "GET") {
|
|
9
9
|
throw new GatewayError("Method Not Allowed", 405);
|
|
10
10
|
}
|
|
11
|
+
ctx.operation = "models";
|
|
11
12
|
const rawId = request.url.split("/models/", 2)[1]?.split("?", 1)[0];
|
|
12
13
|
if (!rawId) {
|
|
13
14
|
return toModels(ctx.models);
|
package/dist/gateway.js
CHANGED
|
@@ -2,11 +2,15 @@ import { parseConfig } from "./config";
|
|
|
2
2
|
import { chatCompletions } from "./endpoints/chat-completions/handler";
|
|
3
3
|
import { embeddings } from "./endpoints/embeddings/handler";
|
|
4
4
|
import { models } from "./endpoints/models/handler";
|
|
5
|
+
import { GatewayError } from "./errors/gateway";
|
|
6
|
+
import { winterCgHandler } from "./lifecycle";
|
|
5
7
|
import { logger } from "./logger";
|
|
6
|
-
import { getRequestMeta, getResponseMeta } from "./telemetry/utils";
|
|
7
8
|
export function gateway(config) {
|
|
8
9
|
const basePath = (config.basePath ?? "").replace(/\/+$/, "");
|
|
9
10
|
const parsedConfig = parseConfig(config);
|
|
11
|
+
const notFoundHandler = winterCgHandler(() => {
|
|
12
|
+
throw new GatewayError("Not Found", 404);
|
|
13
|
+
}, parsedConfig);
|
|
10
14
|
const routes = {
|
|
11
15
|
["/chat/completions"]: chatCompletions(parsedConfig),
|
|
12
16
|
["/embeddings"]: embeddings(parsedConfig),
|
|
@@ -14,23 +18,17 @@ export function gateway(config) {
|
|
|
14
18
|
};
|
|
15
19
|
const routeEntries = Object.entries(routes);
|
|
16
20
|
const handler = (req, state) => {
|
|
17
|
-
const start = performance.now();
|
|
18
21
|
let pathname = new URL(req.url).pathname;
|
|
19
22
|
if (basePath && pathname.startsWith(basePath)) {
|
|
20
23
|
pathname = pathname.slice(basePath.length);
|
|
21
24
|
}
|
|
25
|
+
logger.debug(`[gateway] ${req.method} ${pathname}`);
|
|
22
26
|
for (const [route, endpoint] of routeEntries) {
|
|
23
27
|
if (pathname === route || pathname.startsWith(route + "/")) {
|
|
24
28
|
return endpoint.handler(req, state);
|
|
25
29
|
}
|
|
26
30
|
}
|
|
27
|
-
|
|
28
|
-
const durationMs = +(performance.now() - start).toFixed(2);
|
|
29
|
-
logger.warn({
|
|
30
|
-
req: getRequestMeta(req),
|
|
31
|
-
res: { ...getResponseMeta(response), durationMs, ttfbMs: durationMs },
|
|
32
|
-
}, `${req.method} ${pathname} 404`);
|
|
33
|
-
return Promise.resolve(response);
|
|
31
|
+
return notFoundHandler(req, state);
|
|
34
32
|
};
|
|
35
33
|
return { handler, routes };
|
|
36
34
|
}
|
package/dist/lifecycle.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { parseConfig } from "./config";
|
|
2
2
|
import { toOpenAIErrorResponse } from "./errors/openai";
|
|
3
|
-
import {
|
|
4
|
-
import {
|
|
3
|
+
import { logger } from "./logger";
|
|
4
|
+
import { withOtel } from "./telemetry/otel";
|
|
5
|
+
import { addSpanEvent } from "./telemetry/span";
|
|
5
6
|
import { resolveRequestId } from "./utils/headers";
|
|
6
7
|
import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
|
|
7
8
|
import { prepareResponseInit, toResponse } from "./utils/response";
|
|
@@ -9,19 +10,26 @@ export const winterCgHandler = (run, config) => {
|
|
|
9
10
|
const parsedConfig = parseConfig(config);
|
|
10
11
|
const core = async (ctx) => {
|
|
11
12
|
try {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
if (parsedConfig.hooks?.onRequest) {
|
|
14
|
+
const onRequest = await parsedConfig.hooks.onRequest(ctx);
|
|
15
|
+
addSpanEvent("hebo.hooks.on_request.completed");
|
|
16
|
+
if (onRequest) {
|
|
17
|
+
if (onRequest instanceof Response) {
|
|
18
|
+
ctx.response = onRequest;
|
|
19
|
+
return;
|
|
20
|
+
}
|
|
21
|
+
ctx.request = maybeApplyRequestPatch(ctx.request, onRequest);
|
|
17
22
|
}
|
|
18
|
-
ctx.request = maybeApplyRequestPatch(ctx.request, onRequest);
|
|
19
23
|
}
|
|
20
24
|
ctx.result = (await run(ctx));
|
|
21
25
|
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.request));
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
26
|
+
if (parsedConfig.hooks?.onResponse) {
|
|
27
|
+
const onResponse = await parsedConfig.hooks.onResponse(ctx);
|
|
28
|
+
addSpanEvent("hebo.hooks.on_response.completed");
|
|
29
|
+
if (onResponse) {
|
|
30
|
+
ctx.response = onResponse;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
25
33
|
}
|
|
26
34
|
catch (error) {
|
|
27
35
|
logger.error({
|
|
@@ -31,7 +39,7 @@ export const winterCgHandler = (run, config) => {
|
|
|
31
39
|
ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
|
|
32
40
|
}
|
|
33
41
|
};
|
|
34
|
-
const handler =
|
|
42
|
+
const handler = parsedConfig.telemetry?.enabled ? withOtel(core, parsedConfig) : core;
|
|
35
43
|
return async (request, state) => {
|
|
36
44
|
const ctx = {
|
|
37
45
|
request,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { logger } from "../logger";
|
|
2
|
+
import { addSpanEvent } from "../telemetry/span";
|
|
2
3
|
import { forwardParamsEmbeddingMiddleware, forwardParamsMiddleware } from "./common";
|
|
3
4
|
class SimpleMatcher {
|
|
4
5
|
rules = [];
|
|
@@ -82,6 +83,7 @@ class ModelMiddlewareMatcher {
|
|
|
82
83
|
break;
|
|
83
84
|
}
|
|
84
85
|
logger.warn(`[middleware] cache eviction`);
|
|
86
|
+
addSpanEvent("hebo.middelware.cache.evicted");
|
|
85
87
|
}
|
|
86
88
|
this.cache.set(key, out);
|
|
87
89
|
return out;
|
|
@@ -5,7 +5,7 @@ export declare const resolveProvider: (args: {
|
|
|
5
5
|
providers: ProviderRegistry;
|
|
6
6
|
models: ModelCatalog;
|
|
7
7
|
modelId: ModelId;
|
|
8
|
-
operation: "
|
|
8
|
+
operation: "chat" | "embeddings";
|
|
9
9
|
}) => ProviderV3;
|
|
10
10
|
export type CanonicalIdsOptions = {
|
|
11
11
|
mapping?: Partial<Record<ModelId, string>>;
|
|
@@ -7,7 +7,8 @@ export const resolveProvider = (args) => {
|
|
|
7
7
|
if (!catalogModel) {
|
|
8
8
|
throw new GatewayError(`Model '${modelId}' not found in catalog`, 422, "MODEL_NOT_FOUND");
|
|
9
9
|
}
|
|
10
|
-
|
|
10
|
+
const modality = operation === "embeddings" ? "embeddings" : "text";
|
|
11
|
+
if (catalogModel.modalities && !catalogModel.modalities.output.includes(modality)) {
|
|
11
12
|
throw new GatewayError(`Model '${modelId}' does not support '${operation}' output`, 422, "MODEL_UNSUPPORTED_OPERATION");
|
|
12
13
|
}
|
|
13
14
|
// FUTURE: implement fallback logic [e.g. runtime config invalid]
|
package/dist/telemetry/fetch.js
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { SpanKind } from "@opentelemetry/api";
|
|
2
|
+
import { withSpan } from "./span";
|
|
2
3
|
const ORIGINAL_FETCH_KEY = Symbol.for("@hebo/fetch/original-fetch");
|
|
3
4
|
const g = globalThis;
|
|
4
|
-
const perfFetch =
|
|
5
|
+
const perfFetch = (input, init) => {
|
|
5
6
|
const original = g[ORIGINAL_FETCH_KEY];
|
|
6
|
-
|
|
7
|
-
const response = await original(input, init);
|
|
8
|
-
markPerf(init ?? input, "fetchEnd");
|
|
9
|
-
return response;
|
|
7
|
+
return withSpan("fetch", () => original(input, init), { kind: SpanKind.CLIENT });
|
|
10
8
|
};
|
|
11
9
|
export const initFetch = () => {
|
|
12
10
|
if (g[ORIGINAL_FETCH_KEY])
|
package/dist/telemetry/otel.d.ts
CHANGED
|
@@ -1,7 +1,2 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
|
|
3
|
-
export declare const toAiSdkTelemetry: (config: GatewayConfig, functionId: string) => {
|
|
4
|
-
isEnabled: boolean;
|
|
5
|
-
tracer?: Tracer;
|
|
6
|
-
functionId?: string;
|
|
7
|
-
};
|
|
1
|
+
import type { GatewayConfigParsed, GatewayContext } from "../types";
|
|
2
|
+
export declare const withOtel: (run: (ctx: GatewayContext) => Promise<void>, config: GatewayConfigParsed) => (ctx: GatewayContext) => Promise<void>;
|
package/dist/telemetry/otel.js
CHANGED
|
@@ -1,5 +1,46 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
}
|
|
1
|
+
import { SpanStatusCode } from "@opentelemetry/api";
|
|
2
|
+
import { initFetch } from "./fetch";
|
|
3
|
+
import { startSpan } from "./span";
|
|
4
|
+
import { instrumentStream } from "./stream";
|
|
5
|
+
import { getAIAttributes, getBaggageAttributes, getRequestAttributes, getResponseAttributes, } from "./utils";
|
|
6
|
+
export const withOtel = (run, config) => async (ctx) => {
|
|
7
|
+
const requestStart = performance.now();
|
|
8
|
+
const aiSpan = startSpan(ctx.request.url, undefined, config.telemetry?.tracer);
|
|
9
|
+
initFetch();
|
|
10
|
+
const endAiSpan = (status, stats) => {
|
|
11
|
+
const attrs = getAIAttributes(ctx.body, ctx.streamResult ?? ctx.result, config.telemetry?.attributes, ctx.resolvedProviderId);
|
|
12
|
+
attrs["gen_ai.server.request.duration"] = Number(((performance.now() - requestStart) / 1000).toFixed(4));
|
|
13
|
+
if (!aiSpan.isExisting) {
|
|
14
|
+
Object.assign(attrs, getRequestAttributes(ctx.request, config.telemetry?.attributes), getResponseAttributes(ctx.response, config.telemetry?.attributes));
|
|
15
|
+
}
|
|
16
|
+
Object.assign(attrs, getBaggageAttributes(ctx.request));
|
|
17
|
+
if (config.telemetry?.attributes === "full") {
|
|
18
|
+
attrs["http.request.body.size"] = Number(ctx.request.headers.get("content-length") || 0);
|
|
19
|
+
attrs["http.response.body.size"] =
|
|
20
|
+
stats?.bytes ?? Number(attrs["http.response.header.content-length"] || 0);
|
|
21
|
+
}
|
|
22
|
+
attrs["http.response.status_code_effective"] = status;
|
|
23
|
+
aiSpan.setStatus({ code: status >= 500 ? SpanStatusCode.ERROR : SpanStatusCode.OK });
|
|
24
|
+
if (ctx.operation && ctx.modelId) {
|
|
25
|
+
aiSpan.updateName(`${ctx.operation} ${ctx.modelId}`);
|
|
26
|
+
}
|
|
27
|
+
else if (ctx.operation) {
|
|
28
|
+
aiSpan.updateName(`${ctx.operation}`);
|
|
29
|
+
}
|
|
30
|
+
aiSpan.setAttributes(attrs);
|
|
31
|
+
aiSpan.finish();
|
|
32
|
+
};
|
|
33
|
+
await aiSpan.runWithContext(() => run(ctx));
|
|
34
|
+
if (ctx.response.body instanceof ReadableStream) {
|
|
35
|
+
const instrumented = instrumentStream(ctx.response.body, {
|
|
36
|
+
onComplete: (status, params) => endAiSpan(status, params),
|
|
37
|
+
}, ctx.request.signal);
|
|
38
|
+
ctx.response = new Response(instrumented, {
|
|
39
|
+
status: ctx.response.status,
|
|
40
|
+
statusText: ctx.response.statusText,
|
|
41
|
+
headers: ctx.response.headers,
|
|
42
|
+
});
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
endAiSpan(ctx.response.status);
|
|
46
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { Attributes, Span, SpanOptions, Tracer } from "@opentelemetry/api";
|
|
2
|
+
export declare const startSpan: (name: string, options?: SpanOptions, customTracer?: Tracer) => Span & {
|
|
3
|
+
runWithContext: <T>(fn: () => Promise<T> | T) => T | Promise<T>;
|
|
4
|
+
recordError: (_error: unknown) => void;
|
|
5
|
+
finish: () => void;
|
|
6
|
+
isExisting: boolean;
|
|
7
|
+
};
|
|
8
|
+
export declare const withSpan: <T>(name: string, run: () => Promise<T> | T, options?: SpanOptions) => Promise<T>;
|
|
9
|
+
export declare const addSpanEvent: (name: string, attributes?: Attributes) => void;
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { INVALID_SPAN_CONTEXT, SpanKind, SpanStatusCode, context, trace } from "@opentelemetry/api";
|
|
2
|
+
const DEFAULT_TRACER_NAME = "@hebo-ai/gateway";
|
|
3
|
+
const mem = () => process?.memoryUsage?.();
|
|
4
|
+
const toError = (error) => (error instanceof Error ? error : new Error(String(error)));
|
|
5
|
+
const maybeSetDynamicAttributes = (span, getAttributes) => {
|
|
6
|
+
const attrs = getAttributes();
|
|
7
|
+
if (Object.keys(attrs).length === 0)
|
|
8
|
+
return;
|
|
9
|
+
span.setAttributes(attrs);
|
|
10
|
+
};
|
|
11
|
+
const getMemoryAttributes = () => {
|
|
12
|
+
const memory = mem();
|
|
13
|
+
if (!memory)
|
|
14
|
+
return {};
|
|
15
|
+
return {
|
|
16
|
+
"process.memory.usage": memory.rss,
|
|
17
|
+
"process.memory.heap.used": memory.heapUsed,
|
|
18
|
+
"process.memory.heap.total": memory.heapTotal,
|
|
19
|
+
};
|
|
20
|
+
};
|
|
21
|
+
const NOOP_SPAN = {
|
|
22
|
+
runWithContext: (fn) => fn(),
|
|
23
|
+
recordError: (_error) => { },
|
|
24
|
+
finish: () => { },
|
|
25
|
+
isExisting: true,
|
|
26
|
+
};
|
|
27
|
+
export const startSpan = (name, options, customTracer) => {
|
|
28
|
+
const tracer = customTracer ?? trace.getTracer(DEFAULT_TRACER_NAME);
|
|
29
|
+
const parentContext = context.active();
|
|
30
|
+
const activeSpan = trace.getActiveSpan();
|
|
31
|
+
const span = tracer.startSpan(name, { kind: activeSpan ? SpanKind.INTERNAL : SpanKind.SERVER, ...options }, parentContext);
|
|
32
|
+
if (!span.isRecording()) {
|
|
33
|
+
return Object.assign(trace.wrapSpanContext(INVALID_SPAN_CONTEXT), NOOP_SPAN);
|
|
34
|
+
}
|
|
35
|
+
maybeSetDynamicAttributes(span, getMemoryAttributes);
|
|
36
|
+
const runWithContext = (fn) => context.with(trace.setSpan(parentContext, span), fn);
|
|
37
|
+
const recordError = (error) => {
|
|
38
|
+
const err = toError(error);
|
|
39
|
+
span.recordException(err);
|
|
40
|
+
span.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
|
|
41
|
+
};
|
|
42
|
+
const finish = () => {
|
|
43
|
+
maybeSetDynamicAttributes(span, getMemoryAttributes);
|
|
44
|
+
span.end();
|
|
45
|
+
};
|
|
46
|
+
return Object.assign(span, { runWithContext, recordError, finish, isExisting: !!activeSpan });
|
|
47
|
+
};
|
|
48
|
+
export const withSpan = async (name, run, options) => {
|
|
49
|
+
const started = startSpan(name, options);
|
|
50
|
+
try {
|
|
51
|
+
return await started.runWithContext(run);
|
|
52
|
+
}
|
|
53
|
+
catch (error) {
|
|
54
|
+
started.recordError(error);
|
|
55
|
+
throw error;
|
|
56
|
+
}
|
|
57
|
+
finally {
|
|
58
|
+
started.finish();
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
export const addSpanEvent = (name, attributes) => {
|
|
62
|
+
const allAttributes = Object.assign(attributes ?? {}, getMemoryAttributes());
|
|
63
|
+
trace.getActiveSpan()?.addEvent(name, allAttributes);
|
|
64
|
+
};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
export declare const
|
|
3
|
-
export declare const
|
|
4
|
-
export declare const
|
|
1
|
+
export declare const getRequestAttributes: (request?: Request, attributesLevel?: string) => {};
|
|
2
|
+
export declare const getAIAttributes: (body?: object, result?: object, attributesLevel?: string, providerName?: string) => {};
|
|
3
|
+
export declare const getResponseAttributes: (response?: Response, attributesLevel?: string) => {};
|
|
4
|
+
export declare const getBaggageAttributes: (request?: Request) => Record<string, string>;
|