@hebo-ai/gateway 0.4.0-beta.2 → 0.4.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -5
- package/dist/config.js +21 -7
- package/dist/endpoints/chat-completions/converters.js +2 -2
- package/dist/endpoints/chat-completions/handler.js +31 -25
- package/dist/endpoints/chat-completions/otel.d.ts +6 -0
- package/dist/endpoints/chat-completions/otel.js +121 -0
- package/dist/endpoints/embeddings/handler.js +19 -12
- package/dist/endpoints/embeddings/otel.d.ts +6 -0
- package/dist/endpoints/embeddings/otel.js +35 -0
- package/dist/endpoints/models/handler.js +3 -4
- package/dist/errors/gateway.js +1 -2
- package/dist/errors/openai.js +10 -12
- package/dist/errors/utils.d.ts +1 -3
- package/dist/errors/utils.js +5 -6
- package/dist/gateway.js +1 -1
- package/dist/lifecycle.js +62 -28
- package/dist/middleware/matcher.js +1 -1
- package/dist/models/amazon/presets.d.ts +37 -37
- package/dist/models/amazon/presets.js +1 -1
- package/dist/models/anthropic/presets.d.ts +56 -56
- package/dist/models/cohere/presets.d.ts +54 -54
- package/dist/models/cohere/presets.js +2 -2
- package/dist/models/google/presets.d.ts +31 -31
- package/dist/models/google/presets.js +1 -1
- package/dist/models/meta/presets.d.ts +42 -42
- package/dist/models/openai/presets.d.ts +96 -96
- package/dist/models/openai/presets.js +1 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/voyage/presets.d.ts +92 -92
- package/dist/models/voyage/presets.js +1 -1
- package/dist/providers/registry.js +2 -2
- package/dist/telemetry/baggage.d.ts +1 -0
- package/dist/telemetry/baggage.js +24 -0
- package/dist/telemetry/fetch.d.ts +2 -1
- package/dist/telemetry/fetch.js +13 -3
- package/dist/telemetry/gen-ai.d.ts +4 -0
- package/dist/telemetry/gen-ai.js +42 -0
- package/dist/telemetry/http.d.ts +3 -0
- package/dist/telemetry/http.js +57 -0
- package/dist/telemetry/span.d.ts +6 -3
- package/dist/telemetry/span.js +23 -35
- package/dist/telemetry/stream.d.ts +3 -7
- package/dist/telemetry/stream.js +18 -18
- package/dist/types.d.ts +14 -12
- package/dist/utils/headers.d.ts +1 -1
- package/dist/utils/headers.js +7 -9
- package/dist/utils/request.d.ts +0 -4
- package/dist/utils/request.js +0 -9
- package/dist/utils/response.js +1 -1
- package/package.json +4 -2
- package/src/config.ts +28 -7
- package/src/endpoints/chat-completions/converters.ts +2 -2
- package/src/endpoints/chat-completions/handler.ts +39 -26
- package/src/endpoints/chat-completions/otel.ts +154 -0
- package/src/endpoints/embeddings/handler.test.ts +2 -2
- package/src/endpoints/embeddings/handler.ts +24 -12
- package/src/endpoints/embeddings/otel.ts +56 -0
- package/src/endpoints/models/handler.ts +3 -5
- package/src/errors/gateway.ts +1 -2
- package/src/errors/openai.ts +24 -17
- package/src/errors/utils.ts +5 -7
- package/src/gateway.ts +1 -1
- package/src/lifecycle.ts +73 -31
- package/src/middleware/matcher.ts +1 -1
- package/src/models/amazon/presets.ts +1 -1
- package/src/models/cohere/presets.ts +2 -2
- package/src/models/google/presets.ts +1 -1
- package/src/models/openai/presets.ts +1 -1
- package/src/models/types.ts +1 -1
- package/src/models/voyage/presets.ts +1 -1
- package/src/providers/registry.ts +2 -2
- package/src/telemetry/baggage.ts +27 -0
- package/src/telemetry/fetch.ts +15 -3
- package/src/telemetry/gen-ai.ts +60 -0
- package/src/telemetry/http.ts +65 -0
- package/src/telemetry/span.ts +28 -40
- package/src/telemetry/stream.ts +26 -30
- package/src/types.ts +15 -12
- package/src/utils/headers.ts +8 -19
- package/src/utils/request.ts +0 -11
- package/src/utils/response.ts +1 -1
- package/dist/telemetry/otel.d.ts +0 -2
- package/dist/telemetry/otel.js +0 -46
- package/dist/telemetry/utils.d.ts +0 -4
- package/dist/telemetry/utils.js +0 -223
- package/src/telemetry/otel.ts +0 -87
- package/src/telemetry/utils.ts +0 -273
package/src/lifecycle.ts
CHANGED
|
@@ -8,8 +8,11 @@ import type {
|
|
|
8
8
|
import { parseConfig } from "./config";
|
|
9
9
|
import { toOpenAIErrorResponse } from "./errors/openai";
|
|
10
10
|
import { logger } from "./logger";
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
11
|
+
import { getBaggageAttributes } from "./telemetry/baggage";
|
|
12
|
+
import { initFetch } from "./telemetry/fetch";
|
|
13
|
+
import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
|
|
14
|
+
import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
|
|
15
|
+
import { wrapStream } from "./telemetry/stream";
|
|
13
16
|
import { resolveRequestId } from "./utils/headers";
|
|
14
17
|
import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
|
|
15
18
|
import { prepareResponseInit, toResponse } from "./utils/response";
|
|
@@ -20,23 +23,77 @@ export const winterCgHandler = (
|
|
|
20
23
|
) => {
|
|
21
24
|
const parsedConfig = parseConfig(config);
|
|
22
25
|
|
|
23
|
-
|
|
26
|
+
if (parsedConfig.telemetry!.enabled) {
|
|
27
|
+
setSpanTracer(parsedConfig.telemetry?.tracer);
|
|
28
|
+
setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
|
|
29
|
+
initFetch(parsedConfig.telemetry?.signals?.hebo);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return async (request: Request, state?: Record<string, unknown>): Promise<Response> => {
|
|
33
|
+
const ctx: GatewayContext = {
|
|
34
|
+
request,
|
|
35
|
+
state: state ?? {},
|
|
36
|
+
providers: parsedConfig.providers,
|
|
37
|
+
models: parsedConfig.models,
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
const headers = prepareRequestHeaders(ctx.request);
|
|
41
|
+
if (headers) ctx.request = new Request(ctx.request, { headers });
|
|
42
|
+
|
|
43
|
+
const span = startSpan(ctx.request.url);
|
|
44
|
+
span.setAttributes(getBaggageAttributes(ctx.request));
|
|
45
|
+
if (!span.isExisting) {
|
|
46
|
+
span.setAttributes(getRequestAttributes(ctx.request, parsedConfig.telemetry?.signals?.http));
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const finalize = (status: number, reason?: unknown) => {
|
|
50
|
+
if (ctx.operation) {
|
|
51
|
+
span.updateName(`${ctx.operation}${ctx.modelId ? ` ${ctx.modelId}` : ""}`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (!span.isExisting) {
|
|
55
|
+
// FUTURE add http.server.request.duration
|
|
56
|
+
span.setAttributes(
|
|
57
|
+
getResponseAttributes(ctx.response!, parsedConfig.telemetry?.signals?.http),
|
|
58
|
+
);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const realStatus = status === 200 ? (ctx.response?.status ?? status) : status;
|
|
62
|
+
if (realStatus !== 200) {
|
|
63
|
+
// FUTURE: in-stream errors are redacted in prod
|
|
64
|
+
(realStatus >= 500 ? logger.error : logger.warn)({
|
|
65
|
+
requestId: resolveRequestId(ctx.request),
|
|
66
|
+
err: reason,
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
if (realStatus >= 500) span.recordError(reason);
|
|
70
|
+
}
|
|
71
|
+
span.setAttributes({ "http.response.status_code_effective": realStatus });
|
|
72
|
+
|
|
73
|
+
span.finish();
|
|
74
|
+
};
|
|
75
|
+
|
|
24
76
|
try {
|
|
25
77
|
if (parsedConfig.hooks?.onRequest) {
|
|
26
78
|
const onRequest = await parsedConfig.hooks.onRequest(ctx as OnRequestHookContext);
|
|
27
79
|
addSpanEvent("hebo.hooks.on_request.completed");
|
|
28
80
|
|
|
29
|
-
if (onRequest) {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
return;
|
|
33
|
-
}
|
|
81
|
+
if (onRequest instanceof Response) {
|
|
82
|
+
ctx.response = onRequest;
|
|
83
|
+
} else if (onRequest) {
|
|
34
84
|
ctx.request = maybeApplyRequestPatch(ctx.request, onRequest);
|
|
35
85
|
}
|
|
36
86
|
}
|
|
37
87
|
|
|
38
|
-
|
|
39
|
-
|
|
88
|
+
if (!ctx.response) {
|
|
89
|
+
ctx.result = (await span.runWithContext(() => run(ctx))) as typeof ctx.result;
|
|
90
|
+
|
|
91
|
+
if (ctx.result instanceof ReadableStream) {
|
|
92
|
+
ctx.result = wrapStream(ctx.result, { onDone: finalize }, ctx.request.signal);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
ctx.response = toResponse(ctx.result!, prepareResponseInit(ctx.request));
|
|
96
|
+
}
|
|
40
97
|
|
|
41
98
|
if (parsedConfig.hooks?.onResponse) {
|
|
42
99
|
const onResponse = await parsedConfig.hooks.onResponse(ctx as OnResponseHookContext);
|
|
@@ -45,30 +102,15 @@ export const winterCgHandler = (
|
|
|
45
102
|
ctx.response = onResponse;
|
|
46
103
|
}
|
|
47
104
|
}
|
|
105
|
+
|
|
106
|
+
// FUTURE: this can leak if onResponse removed wrapper from response.body
|
|
107
|
+
if (!(ctx.result instanceof ReadableStream)) {
|
|
108
|
+
finalize(ctx.response.status);
|
|
109
|
+
}
|
|
48
110
|
} catch (error) {
|
|
49
|
-
recordSpanError(error);
|
|
50
|
-
logger.error({
|
|
51
|
-
requestId: resolveRequestId(ctx.request),
|
|
52
|
-
err: error instanceof Error ? error : new Error(String(error)),
|
|
53
|
-
});
|
|
54
111
|
ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
|
|
112
|
+
finalize(ctx.response.status, error);
|
|
55
113
|
}
|
|
56
|
-
};
|
|
57
|
-
|
|
58
|
-
const handler = parsedConfig.telemetry?.enabled ? withOtel(core, parsedConfig) : core;
|
|
59
|
-
|
|
60
|
-
return async (request: Request, state?: Record<string, unknown>): Promise<Response> => {
|
|
61
|
-
const ctx: GatewayContext = {
|
|
62
|
-
request,
|
|
63
|
-
state: state ?? {},
|
|
64
|
-
providers: parsedConfig.providers,
|
|
65
|
-
models: parsedConfig.models,
|
|
66
|
-
};
|
|
67
|
-
|
|
68
|
-
const headers = prepareRequestHeaders(ctx.request);
|
|
69
|
-
if (headers) ctx.request = new Request(ctx.request, { headers });
|
|
70
|
-
|
|
71
|
-
await handler(ctx);
|
|
72
114
|
|
|
73
115
|
return ctx.response ?? new Response("Internal Server Error", { status: 500 });
|
|
74
116
|
};
|
|
@@ -118,7 +118,7 @@ class ModelMiddlewareMatcher {
|
|
|
118
118
|
if (--n === 0) break;
|
|
119
119
|
}
|
|
120
120
|
logger.warn(`[middleware] cache eviction`);
|
|
121
|
-
addSpanEvent("hebo.
|
|
121
|
+
addSpanEvent("hebo.middleware.cache.evicted");
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
this.cache.set(key, out);
|
|
@@ -24,7 +24,7 @@ const NOVA_MULTIMODAL_BASE = {
|
|
|
24
24
|
const NOVA_EMBEDDINGS_BASE = {
|
|
25
25
|
modalities: {
|
|
26
26
|
input: ["text", "image", "audio", "video", "pdf"] as const,
|
|
27
|
-
output: ["
|
|
27
|
+
output: ["embedding"] as const,
|
|
28
28
|
},
|
|
29
29
|
providers: ["bedrock"] as const satisfies readonly CanonicalProviderId[],
|
|
30
30
|
} satisfies DeepPartial<CatalogModel>;
|
|
@@ -26,7 +26,7 @@ const COMMAND_VISION_BASE = {
|
|
|
26
26
|
const EMBED_V3_BASE = {
|
|
27
27
|
modalities: {
|
|
28
28
|
input: ["text", "image"] as const,
|
|
29
|
-
output: ["
|
|
29
|
+
output: ["embedding"] as const,
|
|
30
30
|
},
|
|
31
31
|
providers: ["cohere"] as const satisfies readonly CanonicalProviderId[],
|
|
32
32
|
} satisfies DeepPartial<CatalogModel>;
|
|
@@ -34,7 +34,7 @@ const EMBED_V3_BASE = {
|
|
|
34
34
|
const EMBED_V4_BASE = {
|
|
35
35
|
modalities: {
|
|
36
36
|
input: ["text", "image", "pdf"] as const,
|
|
37
|
-
output: ["
|
|
37
|
+
output: ["embedding"] as const,
|
|
38
38
|
},
|
|
39
39
|
providers: ["cohere"] as const satisfies readonly CanonicalProviderId[],
|
|
40
40
|
} satisfies DeepPartial<CatalogModel>;
|
|
@@ -22,7 +22,7 @@ const GEMINI_BASE = {
|
|
|
22
22
|
const GEMINI_EMBEDDINGS_BASE = {
|
|
23
23
|
modalities: {
|
|
24
24
|
input: ["text"] as const,
|
|
25
|
-
output: ["
|
|
25
|
+
output: ["embedding"] as const,
|
|
26
26
|
},
|
|
27
27
|
providers: ["vertex"] as const satisfies readonly CanonicalProviderId[],
|
|
28
28
|
} satisfies DeepPartial<CatalogModel>;
|
|
@@ -52,7 +52,7 @@ const GPT_PRO_BASE = {
|
|
|
52
52
|
const EMBEDDINGS_BASE = {
|
|
53
53
|
modalities: {
|
|
54
54
|
input: ["text"] as const,
|
|
55
|
-
output: ["
|
|
55
|
+
output: ["embedding"] as const,
|
|
56
56
|
},
|
|
57
57
|
providers: ["openai", "azure"] as const satisfies readonly CanonicalProviderId[],
|
|
58
58
|
} satisfies DeepPartial<CatalogModel>;
|
package/src/models/types.ts
CHANGED
|
@@ -93,7 +93,7 @@ export type CatalogModel = {
|
|
|
93
93
|
knowledge?: string;
|
|
94
94
|
modalities?: {
|
|
95
95
|
input: readonly ("text" | "image" | "file" | "audio" | "video" | "pdf")[];
|
|
96
|
-
output: readonly ("text" | "image" | "audio" | "video" | "
|
|
96
|
+
output: readonly ("text" | "image" | "audio" | "video" | "embedding")[];
|
|
97
97
|
};
|
|
98
98
|
context?: number;
|
|
99
99
|
capabilities?: readonly (
|
|
@@ -6,7 +6,7 @@ import { presetFor, type DeepPartial } from "../../utils/preset";
|
|
|
6
6
|
const VOYAGE_BASE = {
|
|
7
7
|
modalities: {
|
|
8
8
|
input: ["text"] as const,
|
|
9
|
-
output: ["
|
|
9
|
+
output: ["embedding"] as const,
|
|
10
10
|
},
|
|
11
11
|
providers: ["voyage"] as const satisfies readonly CanonicalProviderId[],
|
|
12
12
|
} satisfies DeepPartial<CatalogModel>;
|
|
@@ -22,10 +22,10 @@ export const resolveProvider = (args: {
|
|
|
22
22
|
throw new GatewayError(`Model '${modelId}' not found in catalog`, 422, "MODEL_NOT_FOUND");
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
const modality = operation === "embeddings" ? "
|
|
25
|
+
const modality = operation === "embeddings" ? "embedding" : "text";
|
|
26
26
|
if (catalogModel.modalities && !catalogModel.modalities.output.includes(modality)) {
|
|
27
27
|
throw new GatewayError(
|
|
28
|
-
`Model '${modelId}' does not support '${
|
|
28
|
+
`Model '${modelId}' does not support '${modality}' output`,
|
|
29
29
|
422,
|
|
30
30
|
"MODEL_UNSUPPORTED_OPERATION",
|
|
31
31
|
);
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
const HEBO_BAGGAGE_PREFIX = "hebo.";
|
|
2
|
+
|
|
3
|
+
export const getBaggageAttributes = (request?: Request) => {
|
|
4
|
+
const h = request?.headers.get("baggage");
|
|
5
|
+
if (!h) return {};
|
|
6
|
+
|
|
7
|
+
const attrs: Record<string, string> = {};
|
|
8
|
+
|
|
9
|
+
for (const part of h.split(",")) {
|
|
10
|
+
const [k, v] = part.trim().split("=", 2);
|
|
11
|
+
if (!k || !v) continue;
|
|
12
|
+
|
|
13
|
+
const [rawValue] = v.split(";", 1);
|
|
14
|
+
if (!rawValue) continue;
|
|
15
|
+
|
|
16
|
+
let value = rawValue;
|
|
17
|
+
try {
|
|
18
|
+
value = decodeURIComponent(rawValue);
|
|
19
|
+
} catch {}
|
|
20
|
+
|
|
21
|
+
if (k.startsWith(HEBO_BAGGAGE_PREFIX)) {
|
|
22
|
+
attrs[k.slice(HEBO_BAGGAGE_PREFIX.length)] = value;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
return attrs;
|
|
27
|
+
};
|
package/src/telemetry/fetch.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import { SpanKind } from "@opentelemetry/api";
|
|
2
2
|
|
|
3
|
+
import type { TelemetrySignalLevel } from "../types";
|
|
4
|
+
|
|
3
5
|
import { withSpan } from "./span";
|
|
4
6
|
|
|
5
7
|
const ORIGINAL_FETCH_KEY = Symbol.for("@hebo/fetch/original-fetch");
|
|
@@ -9,15 +11,25 @@ type GlobalFetchState = typeof globalThis & {
|
|
|
9
11
|
};
|
|
10
12
|
|
|
11
13
|
const g = globalThis as GlobalFetchState;
|
|
14
|
+
let fetchTracingEnabled = false;
|
|
15
|
+
|
|
16
|
+
const shouldTraceFetch = (init?: RequestInit): boolean =>
|
|
17
|
+
typeof (init?.headers as any)?.["user-agent"] === "string" &&
|
|
18
|
+
(init!.headers as any)["user-agent"].indexOf("ai-sdk/provider-utils") !== -1;
|
|
12
19
|
|
|
13
|
-
const
|
|
20
|
+
const otelFetch = (input: RequestInfo | URL, init?: RequestInit) => {
|
|
14
21
|
const original = g[ORIGINAL_FETCH_KEY]!;
|
|
22
|
+
|
|
23
|
+
if (!fetchTracingEnabled) return original(input, init);
|
|
24
|
+
if (!shouldTraceFetch(init)) return original(input, init);
|
|
15
25
|
return withSpan("fetch", () => original(input, init), { kind: SpanKind.CLIENT });
|
|
16
26
|
};
|
|
17
27
|
|
|
18
|
-
export const initFetch = () => {
|
|
28
|
+
export const initFetch = (level?: TelemetrySignalLevel) => {
|
|
29
|
+
fetchTracingEnabled = level === "full";
|
|
30
|
+
if (!fetchTracingEnabled) return;
|
|
19
31
|
if (g[ORIGINAL_FETCH_KEY]) return;
|
|
20
32
|
|
|
21
33
|
g[ORIGINAL_FETCH_KEY] = globalThis.fetch.bind(globalThis);
|
|
22
|
-
globalThis.fetch =
|
|
34
|
+
globalThis.fetch = otelFetch as typeof fetch;
|
|
23
35
|
};
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { metrics, type Attributes } from "@opentelemetry/api";
|
|
2
|
+
|
|
3
|
+
import type { TelemetrySignalLevel } from "../types";
|
|
4
|
+
|
|
5
|
+
const meter = metrics.getMeter("@hebo-ai/gateway");
|
|
6
|
+
|
|
7
|
+
const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.duration", {
|
|
8
|
+
description: "End-to-end gateway request duration",
|
|
9
|
+
unit: "s",
|
|
10
|
+
advice: {
|
|
11
|
+
explicitBucketBoundaries: [
|
|
12
|
+
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 240,
|
|
13
|
+
],
|
|
14
|
+
},
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
|
|
18
|
+
description: "Token usage reported by upstream model responses",
|
|
19
|
+
unit: "{token}",
|
|
20
|
+
advice: {
|
|
21
|
+
explicitBucketBoundaries: [
|
|
22
|
+
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
|
|
23
|
+
524288, 1048576,
|
|
24
|
+
],
|
|
25
|
+
},
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
// FUTURE: record unsuccessful calls
|
|
29
|
+
export const recordRequestDuration = (
|
|
30
|
+
duration: number,
|
|
31
|
+
attrs: Attributes,
|
|
32
|
+
signalLevel?: TelemetrySignalLevel,
|
|
33
|
+
) => {
|
|
34
|
+
if (!signalLevel || signalLevel === "off") return;
|
|
35
|
+
|
|
36
|
+
requestDurationHistogram.record(duration / 1000, attrs);
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
// FUTURE: record unsuccessful calls
|
|
40
|
+
export const recordTokenUsage = (
|
|
41
|
+
tokenAttrs: Attributes,
|
|
42
|
+
metricAttrs: Attributes,
|
|
43
|
+
signalLevel?: TelemetrySignalLevel,
|
|
44
|
+
) => {
|
|
45
|
+
if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full")) return;
|
|
46
|
+
|
|
47
|
+
const record = (value: unknown, tokenType: string) => {
|
|
48
|
+
if (typeof value !== "number") return;
|
|
49
|
+
tokenUsageHistogram.record(
|
|
50
|
+
value,
|
|
51
|
+
Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }),
|
|
52
|
+
);
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
record(tokenAttrs["gen_ai.usage.input_tokens"], "input");
|
|
56
|
+
record(tokenAttrs["gen_ai.usage.output_tokens"], "output");
|
|
57
|
+
record(tokenAttrs["gen_ai.usage.total_tokens"], "total");
|
|
58
|
+
record(tokenAttrs["gen_ai.usage.cached_tokens"], "cached");
|
|
59
|
+
record(tokenAttrs["gen_ai.usage.reasoning_tokens"], "reasoning");
|
|
60
|
+
};
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { type TelemetrySignalLevel } from "../types";
|
|
2
|
+
import { resolveRequestId } from "../utils/headers";
|
|
3
|
+
|
|
4
|
+
const headerArr = (h: Headers, k: string) => (h.has(k) ? [h.get(k)!] : undefined);
|
|
5
|
+
|
|
6
|
+
export const getRequestAttributes = (request: Request, signalLevel?: TelemetrySignalLevel) => {
|
|
7
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
8
|
+
|
|
9
|
+
let url;
|
|
10
|
+
try {
|
|
11
|
+
// FUTURE: reuse URL from lifecycle
|
|
12
|
+
url = new URL(request.url);
|
|
13
|
+
} catch {}
|
|
14
|
+
|
|
15
|
+
const attrs = {
|
|
16
|
+
"http.request.method": request.method,
|
|
17
|
+
"url.full": request.url,
|
|
18
|
+
"url.path": url?.pathname,
|
|
19
|
+
"url.scheme": url?.protocol.replace(":", ""),
|
|
20
|
+
"server.address": url?.hostname,
|
|
21
|
+
"server.port": url
|
|
22
|
+
? url.port
|
|
23
|
+
? Number(url.port)
|
|
24
|
+
: url.protocol === "https:"
|
|
25
|
+
? 443
|
|
26
|
+
: 80
|
|
27
|
+
: undefined,
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
if (signalLevel !== "required") {
|
|
31
|
+
Object.assign(attrs, {
|
|
32
|
+
// FUTURE: does ElysiaJS and other frameworks attach request id?
|
|
33
|
+
"http.request.id": resolveRequestId(request),
|
|
34
|
+
"user_agent.original": request.headers.get("user-agent") ?? undefined,
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if (signalLevel === "full") {
|
|
39
|
+
Object.assign(attrs, {
|
|
40
|
+
// FUTURE: "url.query"
|
|
41
|
+
"http.request.header.content-type": headerArr(request.headers, "content-type"),
|
|
42
|
+
"http.request.header.content-length": headerArr(request.headers, "content-length"),
|
|
43
|
+
// FUTURE: "client.address"
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return attrs;
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
export const getResponseAttributes = (response: Response, signalLevel?: TelemetrySignalLevel) => {
|
|
51
|
+
if (!signalLevel || signalLevel === "off") return {};
|
|
52
|
+
|
|
53
|
+
const attrs = {
|
|
54
|
+
"http.response.status_code": response.status,
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
if (signalLevel === "full") {
|
|
58
|
+
Object.assign(attrs, {
|
|
59
|
+
"http.response.header.content-type": [headerArr(response.headers, "content-type")],
|
|
60
|
+
"http.response.header.content-length": [headerArr(response.headers, "content-length")],
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return attrs;
|
|
65
|
+
};
|
package/src/telemetry/span.ts
CHANGED
|
@@ -1,28 +1,13 @@
|
|
|
1
|
-
import type { Attributes,
|
|
1
|
+
import type { Attributes, SpanOptions, Tracer } from "@opentelemetry/api";
|
|
2
2
|
|
|
3
3
|
import { INVALID_SPAN_CONTEXT, SpanKind, SpanStatusCode, context, trace } from "@opentelemetry/api";
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
const mem = () => process?.memoryUsage?.();
|
|
7
|
-
|
|
8
|
-
const toError = (error: unknown) => (error instanceof Error ? error : new Error(String(error)));
|
|
5
|
+
import type { TelemetrySignalLevel } from "../types";
|
|
9
6
|
|
|
10
|
-
const
|
|
11
|
-
const attrs = getAttributes();
|
|
12
|
-
if (Object.keys(attrs).length === 0) return;
|
|
13
|
-
span.setAttributes(attrs);
|
|
14
|
-
};
|
|
15
|
-
|
|
16
|
-
const getMemoryAttributes = (): Attributes => {
|
|
17
|
-
const memory = mem();
|
|
18
|
-
if (!memory) return {};
|
|
7
|
+
const DEFAULT_TRACER_NAME = "@hebo-ai/gateway";
|
|
19
8
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
"process.memory.heap.used": memory.heapUsed,
|
|
23
|
-
"process.memory.heap.total": memory.heapTotal,
|
|
24
|
-
};
|
|
25
|
-
};
|
|
9
|
+
let spanTracer: Tracer | undefined;
|
|
10
|
+
let spanEventsEnabled = false;
|
|
26
11
|
|
|
27
12
|
const NOOP_SPAN = {
|
|
28
13
|
runWithContext: <T>(fn: () => Promise<T> | T) => fn(),
|
|
@@ -31,35 +16,38 @@ const NOOP_SPAN = {
|
|
|
31
16
|
isExisting: true,
|
|
32
17
|
};
|
|
33
18
|
|
|
34
|
-
export const
|
|
35
|
-
|
|
19
|
+
export const setSpanTracer = (tracer?: Tracer) => {
|
|
20
|
+
spanTracer = tracer ?? trace.getTracer(DEFAULT_TRACER_NAME);
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export const setSpanEventsEnabled = (level?: TelemetrySignalLevel) => {
|
|
24
|
+
spanEventsEnabled = level === "recommended" || level === "full";
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
export const startSpan = (name: string, options?: SpanOptions) => {
|
|
28
|
+
if (!spanTracer) {
|
|
29
|
+
return Object.assign(trace.wrapSpanContext(INVALID_SPAN_CONTEXT), NOOP_SPAN);
|
|
30
|
+
}
|
|
36
31
|
|
|
37
32
|
const parentContext = context.active();
|
|
38
33
|
const activeSpan = trace.getActiveSpan();
|
|
39
34
|
|
|
40
|
-
const span =
|
|
35
|
+
const span = spanTracer.startSpan(
|
|
41
36
|
name,
|
|
42
37
|
{ kind: activeSpan ? SpanKind.INTERNAL : SpanKind.SERVER, ...options },
|
|
43
38
|
parentContext,
|
|
44
39
|
);
|
|
45
40
|
|
|
46
|
-
if (!span.isRecording()) {
|
|
47
|
-
return Object.assign(trace.wrapSpanContext(INVALID_SPAN_CONTEXT), NOOP_SPAN);
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
maybeSetDynamicAttributes(span, getMemoryAttributes);
|
|
51
|
-
|
|
52
41
|
const runWithContext = <T>(fn: () => Promise<T> | T) =>
|
|
53
42
|
context.with(trace.setSpan(parentContext, span), fn);
|
|
54
43
|
|
|
55
44
|
const recordError = (error: unknown) => {
|
|
56
|
-
const err =
|
|
45
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
57
46
|
span.recordException(err);
|
|
58
47
|
span.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
|
|
59
48
|
};
|
|
60
49
|
|
|
61
50
|
const finish = () => {
|
|
62
|
-
maybeSetDynamicAttributes(span, getMemoryAttributes);
|
|
63
51
|
span.end();
|
|
64
52
|
};
|
|
65
53
|
|
|
@@ -71,6 +59,10 @@ export const withSpan = async <T>(
|
|
|
71
59
|
run: () => Promise<T> | T,
|
|
72
60
|
options?: SpanOptions,
|
|
73
61
|
): Promise<T> => {
|
|
62
|
+
if (!spanTracer) {
|
|
63
|
+
return await run();
|
|
64
|
+
}
|
|
65
|
+
|
|
74
66
|
const started = startSpan(name, options);
|
|
75
67
|
try {
|
|
76
68
|
return await started.runWithContext(run);
|
|
@@ -83,15 +75,11 @@ export const withSpan = async <T>(
|
|
|
83
75
|
};
|
|
84
76
|
|
|
85
77
|
export const addSpanEvent = (name: string, attributes?: Attributes) => {
|
|
86
|
-
|
|
87
|
-
trace.getActiveSpan()?.addEvent(name,
|
|
78
|
+
if (!spanEventsEnabled) return;
|
|
79
|
+
trace.getActiveSpan()?.addEvent(name, attributes);
|
|
88
80
|
};
|
|
89
81
|
|
|
90
|
-
export const
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
const err = toError(error);
|
|
95
|
-
span.recordException(err);
|
|
96
|
-
span.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
|
|
82
|
+
export const setSpanAttributes = (attributes?: Attributes) => {
|
|
83
|
+
if (!attributes) return;
|
|
84
|
+
trace.getActiveSpan()?.setAttributes(attributes);
|
|
97
85
|
};
|
package/src/telemetry/stream.ts
CHANGED
|
@@ -1,39 +1,33 @@
|
|
|
1
|
-
|
|
2
|
-
onComplete?: (status: number, stats: { bytes: number }) => void;
|
|
3
|
-
onError?: (error: unknown, status: number) => void;
|
|
4
|
-
};
|
|
1
|
+
const isErrorChunk = (v: unknown) => !!(v as any)?.error;
|
|
5
2
|
|
|
6
|
-
export const
|
|
7
|
-
src: ReadableStream
|
|
8
|
-
hooks:
|
|
3
|
+
export const wrapStream = (
|
|
4
|
+
src: ReadableStream,
|
|
5
|
+
hooks: { onDone?: (status: number, reason: unknown) => void },
|
|
9
6
|
signal?: AbortSignal,
|
|
10
|
-
): ReadableStream
|
|
11
|
-
|
|
12
|
-
let done = false;
|
|
7
|
+
): ReadableStream => {
|
|
8
|
+
let finishOnce = false;
|
|
13
9
|
|
|
14
10
|
const finish = (status: number, reason?: unknown) => {
|
|
15
|
-
if (
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
if (!reason) reason = signal?.reason;
|
|
19
|
-
|
|
20
|
-
if (status >= 400) {
|
|
21
|
-
hooks.onError?.(reason, status);
|
|
22
|
-
}
|
|
11
|
+
if (finishOnce) return;
|
|
12
|
+
finishOnce = true;
|
|
23
13
|
|
|
24
|
-
hooks.
|
|
14
|
+
hooks.onDone?.(status, reason ?? signal?.reason);
|
|
25
15
|
};
|
|
26
16
|
|
|
27
|
-
return new ReadableStream
|
|
17
|
+
return new ReadableStream({
|
|
28
18
|
async start(controller) {
|
|
29
19
|
const reader = src.getReader();
|
|
30
20
|
|
|
21
|
+
const close = (status: number, reason?: unknown) => {
|
|
22
|
+
finish(status, reason);
|
|
23
|
+
reader.cancel(reason).catch(() => {});
|
|
24
|
+
controller.close();
|
|
25
|
+
};
|
|
26
|
+
|
|
31
27
|
try {
|
|
32
28
|
for (;;) {
|
|
33
29
|
if (signal?.aborted) {
|
|
34
|
-
|
|
35
|
-
reader.cancel(signal.reason).catch(() => {});
|
|
36
|
-
controller.close();
|
|
30
|
+
close(499, signal.reason);
|
|
37
31
|
return;
|
|
38
32
|
}
|
|
39
33
|
|
|
@@ -41,18 +35,20 @@ export const instrumentStream = (
|
|
|
41
35
|
const { value, done } = await reader.read();
|
|
42
36
|
if (done) break;
|
|
43
37
|
|
|
44
|
-
|
|
45
|
-
|
|
38
|
+
controller.enqueue(value);
|
|
39
|
+
|
|
40
|
+
if (isErrorChunk(value)) {
|
|
41
|
+
const status = value.error.type === "invalid_request_error" ? 422 : 502;
|
|
42
|
+
close(status, value.error.message);
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
46
45
|
}
|
|
47
46
|
|
|
48
47
|
finish(200);
|
|
49
48
|
controller.close();
|
|
50
49
|
} catch (err) {
|
|
51
50
|
const status = signal?.aborted ? 499 : (err as any)?.name === "AbortError" ? 503 : 502;
|
|
52
|
-
|
|
53
|
-
finish(status, err);
|
|
54
|
-
reader.cancel(err).catch(() => {});
|
|
55
|
-
controller.close();
|
|
51
|
+
close(status, err);
|
|
56
52
|
} finally {
|
|
57
53
|
try {
|
|
58
54
|
reader.releaseLock();
|
|
@@ -60,7 +56,7 @@ export const instrumentStream = (
|
|
|
60
56
|
}
|
|
61
57
|
},
|
|
62
58
|
|
|
63
|
-
cancel(reason) {
|
|
59
|
+
cancel(reason?: unknown) {
|
|
64
60
|
finish(499, reason);
|
|
65
61
|
src.cancel(reason).catch(() => {});
|
|
66
62
|
},
|