@hebo-ai/gateway 0.4.0-beta.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +40 -5
  2. package/dist/config.js +21 -7
  3. package/dist/endpoints/chat-completions/converters.d.ts +3 -3
  4. package/dist/endpoints/chat-completions/converters.js +16 -8
  5. package/dist/endpoints/chat-completions/handler.js +34 -27
  6. package/dist/endpoints/chat-completions/otel.d.ts +6 -0
  7. package/dist/endpoints/chat-completions/otel.js +127 -0
  8. package/dist/endpoints/embeddings/handler.js +19 -10
  9. package/dist/endpoints/embeddings/otel.d.ts +6 -0
  10. package/dist/endpoints/embeddings/otel.js +35 -0
  11. package/dist/endpoints/models/handler.js +3 -4
  12. package/dist/errors/gateway.d.ts +1 -1
  13. package/dist/errors/gateway.js +3 -4
  14. package/dist/errors/openai.js +11 -12
  15. package/dist/errors/utils.d.ts +3 -4
  16. package/dist/errors/utils.js +6 -6
  17. package/dist/gateway.js +1 -1
  18. package/dist/lifecycle.js +71 -29
  19. package/dist/middleware/matcher.js +1 -1
  20. package/dist/models/amazon/presets.d.ts +37 -37
  21. package/dist/models/amazon/presets.js +1 -1
  22. package/dist/models/anthropic/presets.d.ts +56 -56
  23. package/dist/models/cohere/presets.d.ts +54 -54
  24. package/dist/models/cohere/presets.js +2 -2
  25. package/dist/models/google/presets.d.ts +31 -31
  26. package/dist/models/google/presets.js +1 -1
  27. package/dist/models/meta/presets.d.ts +42 -42
  28. package/dist/models/openai/presets.d.ts +96 -96
  29. package/dist/models/openai/presets.js +1 -1
  30. package/dist/models/types.d.ts +1 -1
  31. package/dist/models/voyage/presets.d.ts +92 -92
  32. package/dist/models/voyage/presets.js +1 -1
  33. package/dist/providers/registry.js +2 -2
  34. package/dist/telemetry/baggage.d.ts +1 -0
  35. package/dist/telemetry/baggage.js +24 -0
  36. package/dist/telemetry/fetch.d.ts +2 -1
  37. package/dist/telemetry/fetch.js +13 -3
  38. package/dist/telemetry/gen-ai.d.ts +5 -0
  39. package/dist/telemetry/gen-ai.js +60 -0
  40. package/dist/telemetry/http.d.ts +3 -0
  41. package/dist/telemetry/http.js +57 -0
  42. package/dist/telemetry/memory.d.ts +2 -0
  43. package/dist/telemetry/memory.js +27 -0
  44. package/dist/telemetry/span.d.ts +6 -3
  45. package/dist/telemetry/span.js +24 -36
  46. package/dist/telemetry/stream.d.ts +3 -7
  47. package/dist/telemetry/stream.js +26 -29
  48. package/dist/types.d.ts +16 -15
  49. package/dist/utils/headers.d.ts +1 -1
  50. package/dist/utils/headers.js +7 -9
  51. package/dist/utils/request.d.ts +0 -4
  52. package/dist/utils/request.js +0 -9
  53. package/dist/utils/response.js +1 -1
  54. package/package.json +5 -2
  55. package/src/config.ts +28 -7
  56. package/src/endpoints/chat-completions/converters.ts +18 -11
  57. package/src/endpoints/chat-completions/handler.ts +46 -28
  58. package/src/endpoints/chat-completions/otel.ts +161 -0
  59. package/src/endpoints/embeddings/handler.test.ts +2 -2
  60. package/src/endpoints/embeddings/handler.ts +28 -10
  61. package/src/endpoints/embeddings/otel.ts +56 -0
  62. package/src/endpoints/models/handler.ts +3 -5
  63. package/src/errors/gateway.ts +5 -5
  64. package/src/errors/openai.ts +25 -17
  65. package/src/errors/utils.ts +6 -7
  66. package/src/gateway.ts +1 -1
  67. package/src/lifecycle.ts +85 -32
  68. package/src/middleware/matcher.ts +1 -1
  69. package/src/models/amazon/presets.ts +1 -1
  70. package/src/models/cohere/presets.ts +2 -2
  71. package/src/models/google/presets.ts +1 -1
  72. package/src/models/openai/presets.ts +1 -1
  73. package/src/models/types.ts +1 -1
  74. package/src/models/voyage/presets.ts +1 -1
  75. package/src/providers/registry.ts +2 -2
  76. package/src/telemetry/baggage.ts +27 -0
  77. package/src/telemetry/fetch.ts +15 -3
  78. package/src/telemetry/gen-ai.ts +88 -0
  79. package/src/telemetry/http.ts +65 -0
  80. package/src/telemetry/memory.ts +36 -0
  81. package/src/telemetry/span.ts +28 -40
  82. package/src/telemetry/stream.ts +36 -40
  83. package/src/types.ts +18 -18
  84. package/src/utils/headers.ts +8 -19
  85. package/src/utils/request.ts +0 -11
  86. package/src/utils/response.ts +1 -1
  87. package/dist/telemetry/otel.d.ts +0 -2
  88. package/dist/telemetry/otel.js +0 -50
  89. package/dist/telemetry/utils.d.ts +0 -4
  90. package/dist/telemetry/utils.js +0 -223
  91. package/src/telemetry/otel.ts +0 -91
  92. package/src/telemetry/utils.ts +0 -273
@@ -22,28 +22,36 @@ export class OpenAIError {
22
22
  }
23
23
  }
24
24
 
25
+ const mapType = (status: number) => (status < 500 ? "invalid_request_error" : "server_error");
26
+
27
+ const maybeMaskMessage = (meta: ReturnType<typeof getErrorMeta>, requestId?: string) => {
28
+ // FUTURE: consider masking all upstream errors, also 4xx
29
+ if (!(isProduction() && meta.status >= 500)) {
30
+ return meta.message;
31
+ }
32
+ // FUTURE: always attach requestId to errors (masked and unmasked)
33
+ return `${STATUS_CODE(meta.status)} (${requestId ?? "see requestId in response headers"})`;
34
+ };
35
+
25
36
  export function toOpenAIError(error: unknown): OpenAIError {
26
37
  const meta = getErrorMeta(error);
27
- return new OpenAIError(meta.message, meta.type, meta.code);
38
+
39
+ return new OpenAIError(maybeMaskMessage(meta), mapType(meta.status), meta.code);
28
40
  }
29
41
 
30
42
  export function toOpenAIErrorResponse(error: unknown, responseInit?: ResponseInit) {
31
43
  const meta = getErrorMeta(error);
32
44
 
33
- const shouldMask = isProduction() && (meta.status >= 500 || meta.code.includes("UPSTREAM"));
34
-
35
- let message;
36
- if (shouldMask) {
37
- const requestId = resolveRequestId(responseInit);
38
- // FUTURE: always attach requestId to errors (masked and unmasked)
39
- message = `${STATUS_CODE(meta.status)} (${requestId})`;
40
- } else {
41
- message = meta.message;
42
- }
43
-
44
- return toResponse(new OpenAIError(message, meta.type, meta.code), {
45
- ...responseInit,
46
- status: meta.status,
47
- statusText: meta.code,
48
- });
45
+ return toResponse(
46
+ new OpenAIError(
47
+ maybeMaskMessage(meta, resolveRequestId(responseInit)),
48
+ mapType(meta.status),
49
+ meta.code,
50
+ ),
51
+ {
52
+ ...responseInit,
53
+ status: meta.status,
54
+ statusText: meta.code,
55
+ },
56
+ );
49
57
  }
@@ -11,6 +11,7 @@ export const STATUS_CODES = {
11
11
  409: "CONFLICT",
12
12
  422: "UNPROCESSABLE_ENTITY",
13
13
  429: "TOO_MANY_REQUESTS",
14
+ 499: "CLIENT_CLOSED_REQUEST",
14
15
  500: "INTERNAL_SERVER_ERROR",
15
16
  502: "BAD_GATEWAY",
16
17
  503: "SERVICE_UNAVAILABLE",
@@ -23,26 +24,24 @@ export const STATUS_CODE = (status: number) => {
23
24
  return status >= 400 && status < 500 ? STATUS_CODES[400] : STATUS_CODES[500];
24
25
  };
25
26
 
27
+ // FUTURE: always return a wrapped GatewayError?
26
28
  export function getErrorMeta(error: unknown) {
27
29
  const message = error instanceof Error ? error.message : String(error);
28
30
 
29
- let code: string;
30
31
  let status: number;
31
- let param = "";
32
+ let code: string;
32
33
 
33
34
  if (error instanceof GatewayError) {
34
- ({ code, status } = error);
35
+ ({ status, code } = error);
35
36
  } else {
36
37
  const normalized = normalizeAiSdkError(error);
37
38
  if (normalized) {
38
- ({ code, status } = normalized);
39
+ ({ status, code } = normalized);
39
40
  } else {
40
41
  status = 500;
41
42
  code = STATUS_CODE(status);
42
43
  }
43
44
  }
44
45
 
45
- const type = status < 500 ? "invalid_request_error" : "server_error";
46
-
47
- return { code, status, param, type, message };
46
+ return { status, code, message };
48
47
  }
package/src/gateway.ts CHANGED
@@ -30,7 +30,7 @@ export function gateway(config: GatewayConfig) {
30
30
  pathname = pathname.slice(basePath.length);
31
31
  }
32
32
 
33
- logger.debug(`[gateway] ${req.method} ${pathname}`);
33
+ logger.info(`[gateway] ${req.method} ${pathname}`);
34
34
  for (const [route, endpoint] of routeEntries) {
35
35
  if (pathname === route || pathname.startsWith(route + "/")) {
36
36
  return endpoint.handler(req, state);
package/src/lifecycle.ts CHANGED
@@ -6,10 +6,15 @@ import type {
6
6
  } from "./types";
7
7
 
8
8
  import { parseConfig } from "./config";
9
+ import { GatewayError } from "./errors/gateway";
9
10
  import { toOpenAIErrorResponse } from "./errors/openai";
10
11
  import { logger } from "./logger";
11
- import { withOtel } from "./telemetry/otel";
12
- import { addSpanEvent, recordSpanError } from "./telemetry/span";
12
+ import { getBaggageAttributes } from "./telemetry/baggage";
13
+ import { initFetch } from "./telemetry/fetch";
14
+ import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
15
+ import { recordV8jsMemory } from "./telemetry/memory";
16
+ import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
17
+ import { wrapStream } from "./telemetry/stream";
13
18
  import { resolveRequestId } from "./utils/headers";
14
19
  import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
15
20
  import { prepareResponseInit, toResponse } from "./utils/response";
@@ -20,23 +25,81 @@ export const winterCgHandler = (
20
25
  ) => {
21
26
  const parsedConfig = parseConfig(config);
22
27
 
23
- const core = async (ctx: GatewayContext): Promise<void> => {
28
+ if (parsedConfig.telemetry?.enabled) {
29
+ setSpanTracer(parsedConfig.telemetry?.tracer);
30
+ setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
31
+ initFetch(parsedConfig.telemetry?.signals?.hebo);
32
+ }
33
+
34
+ return async (request: Request, state?: Record<string, unknown>): Promise<Response> => {
35
+ const ctx: GatewayContext = {
36
+ request,
37
+ state: state ?? {},
38
+ providers: parsedConfig.providers,
39
+ models: parsedConfig.models,
40
+ };
41
+
42
+ const headers = prepareRequestHeaders(ctx.request);
43
+ if (headers) ctx.request = new Request(ctx.request, { headers });
44
+
45
+ const span = startSpan(ctx.request.url);
46
+ span.setAttributes(getBaggageAttributes(ctx.request));
47
+ if (!span.isExisting) {
48
+ span.setAttributes(getRequestAttributes(ctx.request, parsedConfig.telemetry?.signals?.http));
49
+ }
50
+
51
+ const finalize = (status: number, reason?: unknown) => {
52
+ if (ctx.operation) {
53
+ span.updateName(`${ctx.operation}${ctx.modelId ? ` ${ctx.modelId}` : ""}`);
54
+ }
55
+
56
+ if (!span.isExisting) {
57
+ // FUTURE add http.server.request.duration
58
+ span.setAttributes(
59
+ getResponseAttributes(ctx.response!, parsedConfig.telemetry?.signals?.http),
60
+ );
61
+ }
62
+
63
+ let realStatus = status;
64
+ if (ctx.request.signal.aborted) realStatus = 499;
65
+ else if (status === 200 && ctx.response?.status) realStatus = ctx.response.status;
66
+
67
+ if (realStatus !== 200) {
68
+ (realStatus >= 500 ? logger.error : logger.warn)({
69
+ requestId: resolveRequestId(ctx.request),
70
+ err: reason ?? ctx.request.signal.reason,
71
+ });
72
+
73
+ if (realStatus >= 500) span.recordError(reason);
74
+ }
75
+ span.setAttributes({ "http.response.status_code_effective": realStatus });
76
+
77
+ recordV8jsMemory(parsedConfig.telemetry?.signals?.hebo);
78
+
79
+ span.finish();
80
+ };
81
+
24
82
  try {
25
83
  if (parsedConfig.hooks?.onRequest) {
26
84
  const onRequest = await parsedConfig.hooks.onRequest(ctx as OnRequestHookContext);
27
85
  addSpanEvent("hebo.hooks.on_request.completed");
28
86
 
29
- if (onRequest) {
30
- if (onRequest instanceof Response) {
31
- ctx.response = onRequest;
32
- return;
33
- }
87
+ if (onRequest instanceof Response) {
88
+ ctx.response = onRequest;
89
+ } else if (onRequest) {
34
90
  ctx.request = maybeApplyRequestPatch(ctx.request, onRequest);
35
91
  }
36
92
  }
37
93
 
38
- ctx.result = (await run(ctx)) as typeof ctx.result;
39
- ctx.response = toResponse(ctx.result!, prepareResponseInit(ctx.request));
94
+ if (!ctx.response) {
95
+ ctx.result = (await span.runWithContext(() => run(ctx))) as typeof ctx.result;
96
+
97
+ if (ctx.result instanceof ReadableStream) {
98
+ ctx.result = wrapStream(ctx.result, { onDone: finalize });
99
+ }
100
+
101
+ ctx.response = toResponse(ctx.result!, prepareResponseInit(ctx.request));
102
+ }
40
103
 
41
104
  if (parsedConfig.hooks?.onResponse) {
42
105
  const onResponse = await parsedConfig.hooks.onResponse(ctx as OnResponseHookContext);
@@ -45,30 +108,20 @@ export const winterCgHandler = (
45
108
  ctx.response = onResponse;
46
109
  }
47
110
  }
111
+
112
+ // FUTURE: this can leak if onResponse removed wrapper from response.body
113
+ if (!(ctx.result instanceof ReadableStream)) {
114
+ finalize(ctx.response.status);
115
+ }
48
116
  } catch (error) {
49
- recordSpanError(error);
50
- logger.error({
51
- requestId: resolveRequestId(ctx.request),
52
- err: error instanceof Error ? error : new Error(String(error)),
53
- });
54
- ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
117
+ ctx.response = toOpenAIErrorResponse(
118
+ ctx.request.signal.aborted
119
+ ? new GatewayError(error ?? ctx.request.signal.reason, 499)
120
+ : error,
121
+ prepareResponseInit(ctx.request),
122
+ );
123
+ finalize(ctx.response.status, error);
55
124
  }
56
- };
57
-
58
- const handler = parsedConfig.telemetry?.enabled ? withOtel(core, parsedConfig) : core;
59
-
60
- return async (request: Request, state?: Record<string, unknown>): Promise<Response> => {
61
- const ctx: GatewayContext = {
62
- request,
63
- state: state ?? {},
64
- providers: parsedConfig.providers,
65
- models: parsedConfig.models,
66
- };
67
-
68
- const headers = prepareRequestHeaders(ctx.request);
69
- if (headers) ctx.request = new Request(ctx.request, { headers });
70
-
71
- await handler(ctx);
72
125
 
73
126
  return ctx.response ?? new Response("Internal Server Error", { status: 500 });
74
127
  };
@@ -118,7 +118,7 @@ class ModelMiddlewareMatcher {
118
118
  if (--n === 0) break;
119
119
  }
120
120
  logger.warn(`[middleware] cache eviction`);
121
- addSpanEvent("hebo.middelware.cache.evicted");
121
+ addSpanEvent("hebo.middleware.cache.evicted");
122
122
  }
123
123
 
124
124
  this.cache.set(key, out);
@@ -24,7 +24,7 @@ const NOVA_MULTIMODAL_BASE = {
24
24
  const NOVA_EMBEDDINGS_BASE = {
25
25
  modalities: {
26
26
  input: ["text", "image", "audio", "video", "pdf"] as const,
27
- output: ["embeddings"] as const,
27
+ output: ["embedding"] as const,
28
28
  },
29
29
  providers: ["bedrock"] as const satisfies readonly CanonicalProviderId[],
30
30
  } satisfies DeepPartial<CatalogModel>;
@@ -26,7 +26,7 @@ const COMMAND_VISION_BASE = {
26
26
  const EMBED_V3_BASE = {
27
27
  modalities: {
28
28
  input: ["text", "image"] as const,
29
- output: ["embeddings"] as const,
29
+ output: ["embedding"] as const,
30
30
  },
31
31
  providers: ["cohere"] as const satisfies readonly CanonicalProviderId[],
32
32
  } satisfies DeepPartial<CatalogModel>;
@@ -34,7 +34,7 @@ const EMBED_V3_BASE = {
34
34
  const EMBED_V4_BASE = {
35
35
  modalities: {
36
36
  input: ["text", "image", "pdf"] as const,
37
- output: ["embeddings"] as const,
37
+ output: ["embedding"] as const,
38
38
  },
39
39
  providers: ["cohere"] as const satisfies readonly CanonicalProviderId[],
40
40
  } satisfies DeepPartial<CatalogModel>;
@@ -22,7 +22,7 @@ const GEMINI_BASE = {
22
22
  const GEMINI_EMBEDDINGS_BASE = {
23
23
  modalities: {
24
24
  input: ["text"] as const,
25
- output: ["embeddings"] as const,
25
+ output: ["embedding"] as const,
26
26
  },
27
27
  providers: ["vertex"] as const satisfies readonly CanonicalProviderId[],
28
28
  } satisfies DeepPartial<CatalogModel>;
@@ -52,7 +52,7 @@ const GPT_PRO_BASE = {
52
52
  const EMBEDDINGS_BASE = {
53
53
  modalities: {
54
54
  input: ["text"] as const,
55
- output: ["embeddings"] as const,
55
+ output: ["embedding"] as const,
56
56
  },
57
57
  providers: ["openai", "azure"] as const satisfies readonly CanonicalProviderId[],
58
58
  } satisfies DeepPartial<CatalogModel>;
@@ -93,7 +93,7 @@ export type CatalogModel = {
93
93
  knowledge?: string;
94
94
  modalities?: {
95
95
  input: readonly ("text" | "image" | "file" | "audio" | "video" | "pdf")[];
96
- output: readonly ("text" | "image" | "audio" | "video" | "embeddings")[];
96
+ output: readonly ("text" | "image" | "audio" | "video" | "embedding")[];
97
97
  };
98
98
  context?: number;
99
99
  capabilities?: readonly (
@@ -6,7 +6,7 @@ import { presetFor, type DeepPartial } from "../../utils/preset";
6
6
  const VOYAGE_BASE = {
7
7
  modalities: {
8
8
  input: ["text"] as const,
9
- output: ["embeddings"] as const,
9
+ output: ["embedding"] as const,
10
10
  },
11
11
  providers: ["voyage"] as const satisfies readonly CanonicalProviderId[],
12
12
  } satisfies DeepPartial<CatalogModel>;
@@ -22,10 +22,10 @@ export const resolveProvider = (args: {
22
22
  throw new GatewayError(`Model '${modelId}' not found in catalog`, 422, "MODEL_NOT_FOUND");
23
23
  }
24
24
 
25
- const modality = operation === "embeddings" ? "embeddings" : "text";
25
+ const modality = operation === "embeddings" ? "embedding" : "text";
26
26
  if (catalogModel.modalities && !catalogModel.modalities.output.includes(modality)) {
27
27
  throw new GatewayError(
28
- `Model '${modelId}' does not support '${operation}' output`,
28
+ `Model '${modelId}' does not support '${modality}' output`,
29
29
  422,
30
30
  "MODEL_UNSUPPORTED_OPERATION",
31
31
  );
@@ -0,0 +1,27 @@
1
+ const HEBO_BAGGAGE_PREFIX = "hebo.";
2
+
3
+ export const getBaggageAttributes = (request?: Request) => {
4
+ const h = request?.headers.get("baggage");
5
+ if (!h) return {};
6
+
7
+ const attrs: Record<string, string> = {};
8
+
9
+ for (const part of h.split(",")) {
10
+ const [k, v] = part.trim().split("=", 2);
11
+ if (!k || !v) continue;
12
+
13
+ const [rawValue] = v.split(";", 1);
14
+ if (!rawValue) continue;
15
+
16
+ let value = rawValue;
17
+ try {
18
+ value = decodeURIComponent(rawValue);
19
+ } catch {}
20
+
21
+ if (k.startsWith(HEBO_BAGGAGE_PREFIX)) {
22
+ attrs[k.slice(HEBO_BAGGAGE_PREFIX.length)] = value;
23
+ }
24
+ }
25
+
26
+ return attrs;
27
+ };
@@ -1,5 +1,7 @@
1
1
  import { SpanKind } from "@opentelemetry/api";
2
2
 
3
+ import type { TelemetrySignalLevel } from "../types";
4
+
3
5
  import { withSpan } from "./span";
4
6
 
5
7
  const ORIGINAL_FETCH_KEY = Symbol.for("@hebo/fetch/original-fetch");
@@ -9,15 +11,25 @@ type GlobalFetchState = typeof globalThis & {
9
11
  };
10
12
 
11
13
  const g = globalThis as GlobalFetchState;
14
+ let fetchTracingEnabled = false;
15
+
16
+ const shouldTraceFetch = (init?: RequestInit): boolean =>
17
+ typeof (init?.headers as any)?.["user-agent"] === "string" &&
18
+ (init!.headers as any)["user-agent"].indexOf("ai-sdk/provider-utils") !== -1;
12
19
 
13
- const perfFetch = (input: RequestInfo | URL, init?: RequestInit) => {
20
+ const otelFetch = (input: RequestInfo | URL, init?: RequestInit) => {
14
21
  const original = g[ORIGINAL_FETCH_KEY]!;
22
+
23
+ if (!fetchTracingEnabled) return original(input, init);
24
+ if (!shouldTraceFetch(init)) return original(input, init);
15
25
  return withSpan("fetch", () => original(input, init), { kind: SpanKind.CLIENT });
16
26
  };
17
27
 
18
- export const initFetch = () => {
28
+ export const initFetch = (level?: TelemetrySignalLevel) => {
29
+ fetchTracingEnabled = level === "full";
30
+ if (!fetchTracingEnabled) return;
19
31
  if (g[ORIGINAL_FETCH_KEY]) return;
20
32
 
21
33
  g[ORIGINAL_FETCH_KEY] = globalThis.fetch.bind(globalThis);
22
- globalThis.fetch = perfFetch as typeof fetch;
34
+ globalThis.fetch = otelFetch as typeof fetch;
23
35
  };
@@ -0,0 +1,88 @@
1
+ import { metrics, type Attributes } from "@opentelemetry/api";
2
+
3
+ import type { TelemetrySignalLevel } from "../types";
4
+
5
+ const meter = metrics.getMeter("@hebo/gateway");
6
+
7
+ const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.duration", {
8
+ description: "End-to-end gateway request duration",
9
+ unit: "s",
10
+ advice: {
11
+ explicitBucketBoundaries: [
12
+ 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 240,
13
+ ],
14
+ },
15
+ });
16
+
17
+ const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_per_output_token", {
18
+ description: "End-to-end gateway request duration per output token",
19
+ unit: "s",
20
+ advice: {
21
+ explicitBucketBoundaries: [
22
+ 0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5,
23
+ ],
24
+ },
25
+ });
26
+
27
+ const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
28
+ description: "Token usage reported by upstream model responses",
29
+ unit: "{token}",
30
+ advice: {
31
+ explicitBucketBoundaries: [
32
+ 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
33
+ 524288, 1048576,
34
+ ],
35
+ },
36
+ });
37
+
38
+ // FUTURE: record unsuccessful calls
39
+ export const recordRequestDuration = (
40
+ start: number,
41
+ attrs: Attributes,
42
+ signalLevel?: TelemetrySignalLevel,
43
+ ) => {
44
+ if (!signalLevel || signalLevel === "off") return;
45
+
46
+ requestDurationHistogram.record((performance.now() - start) / 1000, attrs);
47
+ };
48
+
49
+ // FUTURE: record unsuccessful calls
50
+ export const recordTimePerOutputToken = (
51
+ start: number,
52
+ tokenAttrs: Attributes,
53
+ metricAttrs: Attributes,
54
+ signalLevel?: TelemetrySignalLevel,
55
+ ) => {
56
+ if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full")) return;
57
+
58
+ const outputTokens = tokenAttrs["gen_ai.usage.output_tokens"];
59
+ if (typeof outputTokens !== "number" || outputTokens <= 0) return;
60
+
61
+ timePerOutputTokenHistogram.record(
62
+ (performance.now() - start) / 1000 / outputTokens,
63
+ metricAttrs,
64
+ );
65
+ };
66
+
67
+ // FUTURE: record unsuccessful calls
68
+ export const recordTokenUsage = (
69
+ tokenAttrs: Attributes,
70
+ metricAttrs: Attributes,
71
+ signalLevel?: TelemetrySignalLevel,
72
+ ) => {
73
+ if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full")) return;
74
+
75
+ const record = (value: unknown, tokenType: string) => {
76
+ if (typeof value !== "number") return;
77
+ tokenUsageHistogram.record(
78
+ value,
79
+ Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }),
80
+ );
81
+ };
82
+
83
+ record(tokenAttrs["gen_ai.usage.input_tokens"], "input");
84
+ record(tokenAttrs["gen_ai.usage.output_tokens"], "output");
85
+ record(tokenAttrs["gen_ai.usage.total_tokens"], "total");
86
+ record(tokenAttrs["gen_ai.usage.cached_tokens"], "cached");
87
+ record(tokenAttrs["gen_ai.usage.reasoning_tokens"], "reasoning");
88
+ };
@@ -0,0 +1,65 @@
1
+ import { type TelemetrySignalLevel } from "../types";
2
+ import { resolveRequestId } from "../utils/headers";
3
+
4
+ const headerArr = (h: Headers, k: string) => (h.has(k) ? [h.get(k)!] : undefined);
5
+
6
+ export const getRequestAttributes = (request: Request, signalLevel?: TelemetrySignalLevel) => {
7
+ if (!signalLevel || signalLevel === "off") return {};
8
+
9
+ let url;
10
+ try {
11
+ // FUTURE: reuse URL from lifecycle
12
+ url = new URL(request.url);
13
+ } catch {}
14
+
15
+ const attrs = {
16
+ "http.request.method": request.method,
17
+ "url.full": request.url,
18
+ "url.path": url?.pathname,
19
+ "url.scheme": url?.protocol.replace(":", ""),
20
+ "server.address": url?.hostname,
21
+ "server.port": url
22
+ ? url.port
23
+ ? Number(url.port)
24
+ : url.protocol === "https:"
25
+ ? 443
26
+ : 80
27
+ : undefined,
28
+ };
29
+
30
+ if (signalLevel !== "required") {
31
+ Object.assign(attrs, {
32
+ // FUTURE: does ElysiaJS and other frameworks attach request id?
33
+ "http.request.id": resolveRequestId(request),
34
+ "user_agent.original": request.headers.get("user-agent") ?? undefined,
35
+ });
36
+ }
37
+
38
+ if (signalLevel === "full") {
39
+ Object.assign(attrs, {
40
+ // FUTURE: "url.query"
41
+ "http.request.header.content-type": headerArr(request.headers, "content-type"),
42
+ "http.request.header.content-length": headerArr(request.headers, "content-length"),
43
+ // FUTURE: "client.address"
44
+ });
45
+ }
46
+
47
+ return attrs;
48
+ };
49
+
50
+ export const getResponseAttributes = (response: Response, signalLevel?: TelemetrySignalLevel) => {
51
+ if (!signalLevel || signalLevel === "off") return {};
52
+
53
+ const attrs = {
54
+ "http.response.status_code": response.status,
55
+ };
56
+
57
+ if (signalLevel === "full") {
58
+ Object.assign(attrs, {
59
+ "http.response.header.content-type": [headerArr(response.headers, "content-type")],
60
+ "http.response.header.content-length": [headerArr(response.headers, "content-length")],
61
+ });
62
+ }
63
+
64
+ return attrs;
65
+ };
@@ -0,0 +1,36 @@
1
+ import { metrics } from "@opentelemetry/api";
2
+
3
+ import type { TelemetrySignalLevel } from "../types";
4
+
5
+ const meter = metrics.getMeter("@hebo/gateway");
6
+ const defaultHeapSpaceAttrs = { "v8js.heap.space.name": "total" } as const;
7
+
8
+ const heapUsedCounter = meter.createUpDownCounter("v8js.memory.heap.used", {
9
+ description: "Used bytes in the V8 heap",
10
+ unit: "By",
11
+ });
12
+
13
+ const heapSpacePhysicalSizeCounter = meter.createUpDownCounter(
14
+ "v8js.memory.heap.space.physical_size",
15
+ {
16
+ description: "Physical bytes allocated for the V8 heap space",
17
+ unit: "By",
18
+ },
19
+ );
20
+
21
+ const isEnabled = (level?: TelemetrySignalLevel) => level === "recommended" || level === "full";
22
+
23
+ export const recordV8jsMemory = (level?: TelemetrySignalLevel) => {
24
+ if (!isEnabled(level)) return;
25
+
26
+ let usage;
27
+ try {
28
+ usage = globalThis.process?.memoryUsage?.();
29
+ } catch {
30
+ return;
31
+ }
32
+ if (!usage) return;
33
+
34
+ heapUsedCounter.add(usage.heapUsed, defaultHeapSpaceAttrs);
35
+ heapSpacePhysicalSizeCounter.add(usage.rss, defaultHeapSpaceAttrs);
36
+ };