@hebo-ai/gateway 0.4.0-beta.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +40 -5
  2. package/dist/config.js +21 -7
  3. package/dist/endpoints/chat-completions/converters.d.ts +3 -3
  4. package/dist/endpoints/chat-completions/converters.js +16 -8
  5. package/dist/endpoints/chat-completions/handler.js +34 -27
  6. package/dist/endpoints/chat-completions/otel.d.ts +6 -0
  7. package/dist/endpoints/chat-completions/otel.js +127 -0
  8. package/dist/endpoints/embeddings/handler.js +19 -10
  9. package/dist/endpoints/embeddings/otel.d.ts +6 -0
  10. package/dist/endpoints/embeddings/otel.js +35 -0
  11. package/dist/endpoints/models/handler.js +3 -4
  12. package/dist/errors/gateway.d.ts +1 -1
  13. package/dist/errors/gateway.js +3 -4
  14. package/dist/errors/openai.js +11 -12
  15. package/dist/errors/utils.d.ts +3 -4
  16. package/dist/errors/utils.js +6 -6
  17. package/dist/gateway.js +1 -1
  18. package/dist/lifecycle.js +71 -29
  19. package/dist/middleware/matcher.js +1 -1
  20. package/dist/models/amazon/presets.d.ts +37 -37
  21. package/dist/models/amazon/presets.js +1 -1
  22. package/dist/models/anthropic/presets.d.ts +56 -56
  23. package/dist/models/cohere/presets.d.ts +54 -54
  24. package/dist/models/cohere/presets.js +2 -2
  25. package/dist/models/google/presets.d.ts +31 -31
  26. package/dist/models/google/presets.js +1 -1
  27. package/dist/models/meta/presets.d.ts +42 -42
  28. package/dist/models/openai/presets.d.ts +96 -96
  29. package/dist/models/openai/presets.js +1 -1
  30. package/dist/models/types.d.ts +1 -1
  31. package/dist/models/voyage/presets.d.ts +92 -92
  32. package/dist/models/voyage/presets.js +1 -1
  33. package/dist/providers/registry.js +2 -2
  34. package/dist/telemetry/baggage.d.ts +1 -0
  35. package/dist/telemetry/baggage.js +24 -0
  36. package/dist/telemetry/fetch.d.ts +2 -1
  37. package/dist/telemetry/fetch.js +13 -3
  38. package/dist/telemetry/gen-ai.d.ts +5 -0
  39. package/dist/telemetry/gen-ai.js +60 -0
  40. package/dist/telemetry/http.d.ts +3 -0
  41. package/dist/telemetry/http.js +57 -0
  42. package/dist/telemetry/memory.d.ts +2 -0
  43. package/dist/telemetry/memory.js +27 -0
  44. package/dist/telemetry/span.d.ts +6 -3
  45. package/dist/telemetry/span.js +24 -36
  46. package/dist/telemetry/stream.d.ts +3 -7
  47. package/dist/telemetry/stream.js +26 -29
  48. package/dist/types.d.ts +16 -15
  49. package/dist/utils/headers.d.ts +1 -1
  50. package/dist/utils/headers.js +7 -9
  51. package/dist/utils/request.d.ts +0 -4
  52. package/dist/utils/request.js +0 -9
  53. package/dist/utils/response.js +1 -1
  54. package/package.json +5 -2
  55. package/src/config.ts +28 -7
  56. package/src/endpoints/chat-completions/converters.ts +18 -11
  57. package/src/endpoints/chat-completions/handler.ts +46 -28
  58. package/src/endpoints/chat-completions/otel.ts +161 -0
  59. package/src/endpoints/embeddings/handler.test.ts +2 -2
  60. package/src/endpoints/embeddings/handler.ts +28 -10
  61. package/src/endpoints/embeddings/otel.ts +56 -0
  62. package/src/endpoints/models/handler.ts +3 -5
  63. package/src/errors/gateway.ts +5 -5
  64. package/src/errors/openai.ts +25 -17
  65. package/src/errors/utils.ts +6 -7
  66. package/src/gateway.ts +1 -1
  67. package/src/lifecycle.ts +85 -32
  68. package/src/middleware/matcher.ts +1 -1
  69. package/src/models/amazon/presets.ts +1 -1
  70. package/src/models/cohere/presets.ts +2 -2
  71. package/src/models/google/presets.ts +1 -1
  72. package/src/models/openai/presets.ts +1 -1
  73. package/src/models/types.ts +1 -1
  74. package/src/models/voyage/presets.ts +1 -1
  75. package/src/providers/registry.ts +2 -2
  76. package/src/telemetry/baggage.ts +27 -0
  77. package/src/telemetry/fetch.ts +15 -3
  78. package/src/telemetry/gen-ai.ts +88 -0
  79. package/src/telemetry/http.ts +65 -0
  80. package/src/telemetry/memory.ts +36 -0
  81. package/src/telemetry/span.ts +28 -40
  82. package/src/telemetry/stream.ts +36 -40
  83. package/src/types.ts +18 -18
  84. package/src/utils/headers.ts +8 -19
  85. package/src/utils/request.ts +0 -11
  86. package/src/utils/response.ts +1 -1
  87. package/dist/telemetry/otel.d.ts +0 -2
  88. package/dist/telemetry/otel.js +0 -50
  89. package/dist/telemetry/utils.d.ts +0 -4
  90. package/dist/telemetry/utils.js +0 -223
  91. package/src/telemetry/otel.ts +0 -91
  92. package/src/telemetry/utils.ts +0 -273
@@ -2,7 +2,7 @@ import { presetFor } from "../../utils/preset";
2
2
  const VOYAGE_BASE = {
3
3
  modalities: {
4
4
  input: ["text"],
5
- output: ["embeddings"],
5
+ output: ["embedding"],
6
6
  },
7
7
  providers: ["voyage"],
8
8
  };
@@ -7,9 +7,9 @@ export const resolveProvider = (args) => {
7
7
  if (!catalogModel) {
8
8
  throw new GatewayError(`Model '${modelId}' not found in catalog`, 422, "MODEL_NOT_FOUND");
9
9
  }
10
- const modality = operation === "embeddings" ? "embeddings" : "text";
10
+ const modality = operation === "embeddings" ? "embedding" : "text";
11
11
  if (catalogModel.modalities && !catalogModel.modalities.output.includes(modality)) {
12
- throw new GatewayError(`Model '${modelId}' does not support '${operation}' output`, 422, "MODEL_UNSUPPORTED_OPERATION");
12
+ throw new GatewayError(`Model '${modelId}' does not support '${modality}' output`, 422, "MODEL_UNSUPPORTED_OPERATION");
13
13
  }
14
14
  // FUTURE: implement fallback logic [e.g. runtime config invalid]
15
15
  const resolvedProviderId = catalogModel.providers[0];
@@ -0,0 +1 @@
1
+ export declare const getBaggageAttributes: (request?: Request) => Record<string, string>;
@@ -0,0 +1,24 @@
1
+ const HEBO_BAGGAGE_PREFIX = "hebo.";
2
+ export const getBaggageAttributes = (request) => {
3
+ const h = request?.headers.get("baggage");
4
+ if (!h)
5
+ return {};
6
+ const attrs = {};
7
+ for (const part of h.split(",")) {
8
+ const [k, v] = part.trim().split("=", 2);
9
+ if (!k || !v)
10
+ continue;
11
+ const [rawValue] = v.split(";", 1);
12
+ if (!rawValue)
13
+ continue;
14
+ let value = rawValue;
15
+ try {
16
+ value = decodeURIComponent(rawValue);
17
+ }
18
+ catch { }
19
+ if (k.startsWith(HEBO_BAGGAGE_PREFIX)) {
20
+ attrs[k.slice(HEBO_BAGGAGE_PREFIX.length)] = value;
21
+ }
22
+ }
23
+ return attrs;
24
+ };
@@ -1 +1,2 @@
1
- export declare const initFetch: () => void;
1
+ import type { TelemetrySignalLevel } from "../types";
2
+ export declare const initFetch: (level?: TelemetrySignalLevel) => void;
@@ -2,13 +2,23 @@ import { SpanKind } from "@opentelemetry/api";
2
2
  import { withSpan } from "./span";
3
3
  const ORIGINAL_FETCH_KEY = Symbol.for("@hebo/fetch/original-fetch");
4
4
  const g = globalThis;
5
- const perfFetch = (input, init) => {
5
+ let fetchTracingEnabled = false;
6
+ const shouldTraceFetch = (init) => typeof init?.headers?.["user-agent"] === "string" &&
7
+ init.headers["user-agent"].indexOf("ai-sdk/provider-utils") !== -1;
8
+ const otelFetch = (input, init) => {
6
9
  const original = g[ORIGINAL_FETCH_KEY];
10
+ if (!fetchTracingEnabled)
11
+ return original(input, init);
12
+ if (!shouldTraceFetch(init))
13
+ return original(input, init);
7
14
  return withSpan("fetch", () => original(input, init), { kind: SpanKind.CLIENT });
8
15
  };
9
- export const initFetch = () => {
16
+ export const initFetch = (level) => {
17
+ fetchTracingEnabled = level === "full";
18
+ if (!fetchTracingEnabled)
19
+ return;
10
20
  if (g[ORIGINAL_FETCH_KEY])
11
21
  return;
12
22
  g[ORIGINAL_FETCH_KEY] = globalThis.fetch.bind(globalThis);
13
- globalThis.fetch = perfFetch;
23
+ globalThis.fetch = otelFetch;
14
24
  };
@@ -0,0 +1,5 @@
1
+ import { type Attributes } from "@opentelemetry/api";
2
+ import type { TelemetrySignalLevel } from "../types";
3
+ export declare const recordRequestDuration: (start: number, attrs: Attributes, signalLevel?: TelemetrySignalLevel) => void;
4
+ export declare const recordTimePerOutputToken: (start: number, tokenAttrs: Attributes, metricAttrs: Attributes, signalLevel?: TelemetrySignalLevel) => void;
5
+ export declare const recordTokenUsage: (tokenAttrs: Attributes, metricAttrs: Attributes, signalLevel?: TelemetrySignalLevel) => void;
@@ -0,0 +1,60 @@
1
+ import { metrics } from "@opentelemetry/api";
2
+ const meter = metrics.getMeter("@hebo/gateway");
3
+ const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.duration", {
4
+ description: "End-to-end gateway request duration",
5
+ unit: "s",
6
+ advice: {
7
+ explicitBucketBoundaries: [
8
+ 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 240,
9
+ ],
10
+ },
11
+ });
12
+ const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_per_output_token", {
13
+ description: "End-to-end gateway request duration per output token",
14
+ unit: "s",
15
+ advice: {
16
+ explicitBucketBoundaries: [
17
+ 0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5,
18
+ ],
19
+ },
20
+ });
21
+ const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
22
+ description: "Token usage reported by upstream model responses",
23
+ unit: "{token}",
24
+ advice: {
25
+ explicitBucketBoundaries: [
26
+ 1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
27
+ 524288, 1048576,
28
+ ],
29
+ },
30
+ });
31
+ // FUTURE: record unsuccessful calls
32
+ export const recordRequestDuration = (start, attrs, signalLevel) => {
33
+ if (!signalLevel || signalLevel === "off")
34
+ return;
35
+ requestDurationHistogram.record((performance.now() - start) / 1000, attrs);
36
+ };
37
+ // FUTURE: record unsuccessful calls
38
+ export const recordTimePerOutputToken = (start, tokenAttrs, metricAttrs, signalLevel) => {
39
+ if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full"))
40
+ return;
41
+ const outputTokens = tokenAttrs["gen_ai.usage.output_tokens"];
42
+ if (typeof outputTokens !== "number" || outputTokens <= 0)
43
+ return;
44
+ timePerOutputTokenHistogram.record((performance.now() - start) / 1000 / outputTokens, metricAttrs);
45
+ };
46
+ // FUTURE: record unsuccessful calls
47
+ export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
48
+ if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full"))
49
+ return;
50
+ const record = (value, tokenType) => {
51
+ if (typeof value !== "number")
52
+ return;
53
+ tokenUsageHistogram.record(value, Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }));
54
+ };
55
+ record(tokenAttrs["gen_ai.usage.input_tokens"], "input");
56
+ record(tokenAttrs["gen_ai.usage.output_tokens"], "output");
57
+ record(tokenAttrs["gen_ai.usage.total_tokens"], "total");
58
+ record(tokenAttrs["gen_ai.usage.cached_tokens"], "cached");
59
+ record(tokenAttrs["gen_ai.usage.reasoning_tokens"], "reasoning");
60
+ };
@@ -0,0 +1,3 @@
1
+ import { type TelemetrySignalLevel } from "../types";
2
+ export declare const getRequestAttributes: (request: Request, signalLevel?: TelemetrySignalLevel) => {};
3
+ export declare const getResponseAttributes: (response: Response, signalLevel?: TelemetrySignalLevel) => {};
@@ -0,0 +1,57 @@
1
+ import {} from "../types";
2
+ import { resolveRequestId } from "../utils/headers";
3
+ const headerArr = (h, k) => (h.has(k) ? [h.get(k)] : undefined);
4
+ export const getRequestAttributes = (request, signalLevel) => {
5
+ if (!signalLevel || signalLevel === "off")
6
+ return {};
7
+ let url;
8
+ try {
9
+ // FUTURE: reuse URL from lifecycle
10
+ url = new URL(request.url);
11
+ }
12
+ catch { }
13
+ const attrs = {
14
+ "http.request.method": request.method,
15
+ "url.full": request.url,
16
+ "url.path": url?.pathname,
17
+ "url.scheme": url?.protocol.replace(":", ""),
18
+ "server.address": url?.hostname,
19
+ "server.port": url
20
+ ? url.port
21
+ ? Number(url.port)
22
+ : url.protocol === "https:"
23
+ ? 443
24
+ : 80
25
+ : undefined,
26
+ };
27
+ if (signalLevel !== "required") {
28
+ Object.assign(attrs, {
29
+ // FUTURE: does ElysiaJS and other frameworks attach request id?
30
+ "http.request.id": resolveRequestId(request),
31
+ "user_agent.original": request.headers.get("user-agent") ?? undefined,
32
+ });
33
+ }
34
+ if (signalLevel === "full") {
35
+ Object.assign(attrs, {
36
+ // FUTURE: "url.query"
37
+ "http.request.header.content-type": headerArr(request.headers, "content-type"),
38
+ "http.request.header.content-length": headerArr(request.headers, "content-length"),
39
+ // FUTURE: "client.address"
40
+ });
41
+ }
42
+ return attrs;
43
+ };
44
+ export const getResponseAttributes = (response, signalLevel) => {
45
+ if (!signalLevel || signalLevel === "off")
46
+ return {};
47
+ const attrs = {
48
+ "http.response.status_code": response.status,
49
+ };
50
+ if (signalLevel === "full") {
51
+ Object.assign(attrs, {
52
+ "http.response.header.content-type": [headerArr(response.headers, "content-type")],
53
+ "http.response.header.content-length": [headerArr(response.headers, "content-length")],
54
+ });
55
+ }
56
+ return attrs;
57
+ };
@@ -0,0 +1,2 @@
1
+ import type { TelemetrySignalLevel } from "../types";
2
+ export declare const recordV8jsMemory: (level?: TelemetrySignalLevel) => void;
@@ -0,0 +1,27 @@
1
+ import { metrics } from "@opentelemetry/api";
2
+ const meter = metrics.getMeter("@hebo/gateway");
3
+ const defaultHeapSpaceAttrs = { "v8js.heap.space.name": "total" };
4
+ const heapUsedCounter = meter.createUpDownCounter("v8js.memory.heap.used", {
5
+ description: "Used bytes in the V8 heap",
6
+ unit: "By",
7
+ });
8
+ const heapSpacePhysicalSizeCounter = meter.createUpDownCounter("v8js.memory.heap.space.physical_size", {
9
+ description: "Physical bytes allocated for the V8 heap space",
10
+ unit: "By",
11
+ });
12
+ const isEnabled = (level) => level === "recommended" || level === "full";
13
+ export const recordV8jsMemory = (level) => {
14
+ if (!isEnabled(level))
15
+ return;
16
+ let usage;
17
+ try {
18
+ usage = globalThis.process?.memoryUsage?.();
19
+ }
20
+ catch {
21
+ return;
22
+ }
23
+ if (!usage)
24
+ return;
25
+ heapUsedCounter.add(usage.heapUsed, defaultHeapSpaceAttrs);
26
+ heapSpacePhysicalSizeCounter.add(usage.rss, defaultHeapSpaceAttrs);
27
+ };
@@ -1,5 +1,8 @@
1
- import type { Attributes, Span, SpanOptions, Tracer } from "@opentelemetry/api";
2
- export declare const startSpan: (name: string, options?: SpanOptions, customTracer?: Tracer) => Span & {
1
+ import type { Attributes, SpanOptions, Tracer } from "@opentelemetry/api";
2
+ import type { TelemetrySignalLevel } from "../types";
3
+ export declare const setSpanTracer: (tracer?: Tracer) => void;
4
+ export declare const setSpanEventsEnabled: (level?: TelemetrySignalLevel) => void;
5
+ export declare const startSpan: (name: string, options?: SpanOptions) => import("@opentelemetry/api").Span & {
3
6
  runWithContext: <T>(fn: () => Promise<T> | T) => T | Promise<T>;
4
7
  recordError: (_error: unknown) => void;
5
8
  finish: () => void;
@@ -7,4 +10,4 @@ export declare const startSpan: (name: string, options?: SpanOptions, customTrac
7
10
  };
8
11
  export declare const withSpan: <T>(name: string, run: () => Promise<T> | T, options?: SpanOptions) => Promise<T>;
9
12
  export declare const addSpanEvent: (name: string, attributes?: Attributes) => void;
10
- export declare const recordSpanError: (error: unknown) => void;
13
+ export declare const setSpanAttributes: (attributes?: Attributes) => void;
@@ -1,51 +1,41 @@
1
1
  import { INVALID_SPAN_CONTEXT, SpanKind, SpanStatusCode, context, trace } from "@opentelemetry/api";
2
- const DEFAULT_TRACER_NAME = "@hebo-ai/gateway";
3
- const mem = () => process?.memoryUsage?.();
4
- const toError = (error) => (error instanceof Error ? error : new Error(String(error)));
5
- const maybeSetDynamicAttributes = (span, getAttributes) => {
6
- const attrs = getAttributes();
7
- if (Object.keys(attrs).length === 0)
8
- return;
9
- span.setAttributes(attrs);
10
- };
11
- const getMemoryAttributes = () => {
12
- const memory = mem();
13
- if (!memory)
14
- return {};
15
- return {
16
- "process.memory.usage": memory.rss,
17
- "process.memory.heap.used": memory.heapUsed,
18
- "process.memory.heap.total": memory.heapTotal,
19
- };
20
- };
2
+ const DEFAULT_TRACER_NAME = "@hebo/gateway";
3
+ let spanTracer;
4
+ let spanEventsEnabled = false;
21
5
  const NOOP_SPAN = {
22
6
  runWithContext: (fn) => fn(),
23
7
  recordError: (_error) => { },
24
8
  finish: () => { },
25
9
  isExisting: true,
26
10
  };
27
- export const startSpan = (name, options, customTracer) => {
28
- const tracer = customTracer ?? trace.getTracer(DEFAULT_TRACER_NAME);
29
- const parentContext = context.active();
30
- const activeSpan = trace.getActiveSpan();
31
- const span = tracer.startSpan(name, { kind: activeSpan ? SpanKind.INTERNAL : SpanKind.SERVER, ...options }, parentContext);
32
- if (!span.isRecording()) {
11
+ export const setSpanTracer = (tracer) => {
12
+ spanTracer = tracer ?? trace.getTracer(DEFAULT_TRACER_NAME);
13
+ };
14
+ export const setSpanEventsEnabled = (level) => {
15
+ spanEventsEnabled = level === "recommended" || level === "full";
16
+ };
17
+ export const startSpan = (name, options) => {
18
+ if (!spanTracer) {
33
19
  return Object.assign(trace.wrapSpanContext(INVALID_SPAN_CONTEXT), NOOP_SPAN);
34
20
  }
35
- maybeSetDynamicAttributes(span, getMemoryAttributes);
21
+ const parentContext = context.active();
22
+ const activeSpan = trace.getActiveSpan();
23
+ const span = spanTracer.startSpan(name, { kind: activeSpan ? SpanKind.INTERNAL : SpanKind.SERVER, ...options }, parentContext);
36
24
  const runWithContext = (fn) => context.with(trace.setSpan(parentContext, span), fn);
37
25
  const recordError = (error) => {
38
- const err = toError(error);
26
+ const err = error instanceof Error ? error : new Error(String(error));
39
27
  span.recordException(err);
40
28
  span.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
41
29
  };
42
30
  const finish = () => {
43
- maybeSetDynamicAttributes(span, getMemoryAttributes);
44
31
  span.end();
45
32
  };
46
33
  return Object.assign(span, { runWithContext, recordError, finish, isExisting: !!activeSpan });
47
34
  };
48
35
  export const withSpan = async (name, run, options) => {
36
+ if (!spanTracer) {
37
+ return await run();
38
+ }
49
39
  const started = startSpan(name, options);
50
40
  try {
51
41
  return await started.runWithContext(run);
@@ -59,14 +49,12 @@ export const withSpan = async (name, run, options) => {
59
49
  }
60
50
  };
61
51
  export const addSpanEvent = (name, attributes) => {
62
- const allAttributes = Object.assign(attributes ?? {}, getMemoryAttributes());
63
- trace.getActiveSpan()?.addEvent(name, allAttributes);
52
+ if (!spanEventsEnabled)
53
+ return;
54
+ trace.getActiveSpan()?.addEvent(name, attributes);
64
55
  };
65
- export const recordSpanError = (error) => {
66
- const span = trace.getActiveSpan();
67
- if (!span)
56
+ export const setSpanAttributes = (attributes) => {
57
+ if (!attributes)
68
58
  return;
69
- const err = toError(error);
70
- span.recordException(err);
71
- span.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
59
+ trace.getActiveSpan()?.setAttributes(attributes);
72
60
  };
@@ -1,7 +1,3 @@
1
- export type InstrumentStreamHooks = {
2
- onComplete?: (status: number, stats: {
3
- bytes: number;
4
- }) => void;
5
- onError?: (error: unknown, status: number) => void;
6
- };
7
- export declare const instrumentStream: (src: ReadableStream<Uint8Array>, hooks: InstrumentStreamHooks, signal?: AbortSignal) => ReadableStream<Uint8Array>;
1
+ export declare const wrapStream: (src: ReadableStream, hooks: {
2
+ onDone?: (status: number, reason: unknown) => void;
3
+ }) => ReadableStream;
@@ -1,43 +1,37 @@
1
- export const instrumentStream = (src, hooks, signal) => {
2
- const stats = { bytes: 0 };
3
- let done = false;
4
- const finish = (status, reason) => {
5
- if (done)
6
- return;
7
- done = true;
8
- if (!reason)
9
- reason = signal?.reason;
10
- if (status >= 400) {
11
- hooks.onError?.(reason, status);
1
+ import { toOpenAIError } from "#/errors/openai";
2
+ const isErrorChunk = (v) => v instanceof Error || !!v?.error;
3
+ export const wrapStream = (src, hooks) => {
4
+ let finished = false;
5
+ const done = (reader, controller, status, reason) => {
6
+ if (!finished) {
7
+ finished = true;
8
+ hooks.onDone?.(status, reason);
12
9
  }
13
- hooks.onComplete?.(status, stats);
10
+ reader.cancel(reason).catch(() => { });
11
+ controller.close();
14
12
  };
15
13
  return new ReadableStream({
16
14
  async start(controller) {
17
15
  const reader = src.getReader();
18
16
  try {
19
17
  for (;;) {
20
- if (signal?.aborted) {
21
- finish(499, signal.reason);
22
- reader.cancel(signal.reason).catch(() => { });
23
- controller.close();
24
- return;
25
- }
26
18
  // eslint-disable-next-line no-await-in-loop
27
- const { value, done } = await reader.read();
28
- if (done)
19
+ const { value, done: eof } = await reader.read();
20
+ if (eof)
29
21
  break;
30
- stats.bytes += value.byteLength;
31
- controller.enqueue(value);
22
+ const out = isErrorChunk(value) ? toOpenAIError(value) : value;
23
+ controller.enqueue(out);
24
+ if (out !== value) {
25
+ const status = out.error?.type === "invalid_request_error" ? 422 : 502;
26
+ done(reader, controller, status, value);
27
+ return;
28
+ }
32
29
  }
33
- finish(200);
34
- controller.close();
30
+ done(reader, controller, 200);
35
31
  }
36
32
  catch (err) {
37
- const status = signal?.aborted ? 499 : err?.name === "AbortError" ? 503 : 502;
38
- finish(status, err);
39
- reader.cancel(err).catch(() => { });
40
- controller.close();
33
+ controller.enqueue(toOpenAIError(err));
34
+ done(reader, controller, 502, err);
41
35
  }
42
36
  finally {
43
37
  try {
@@ -47,7 +41,10 @@ export const instrumentStream = (src, hooks, signal) => {
47
41
  }
48
42
  },
49
43
  cancel(reason) {
50
- finish(499, reason);
44
+ if (!finished) {
45
+ finished = true;
46
+ hooks.onDone?.(499, reason);
47
+ }
51
48
  src.cancel(reason).catch(() => { });
52
49
  },
53
50
  });
package/dist/types.d.ts CHANGED
@@ -3,7 +3,6 @@ import type { Tracer } from "@opentelemetry/api";
3
3
  import type { ChatCompletions, ChatCompletionsBody, ChatCompletionsChunk } from "./endpoints/chat-completions/schema";
4
4
  import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
5
5
  import type { Model, ModelList } from "./endpoints/models";
6
- import type { OpenAIError } from "./errors/openai";
7
6
  import type { Logger, LoggerConfig } from "./logger";
8
7
  import type { ModelCatalog, ModelId } from "./models/types";
9
8
  import type { ProviderId, ProviderRegistry } from "./providers/types";
@@ -67,15 +66,11 @@ export type GatewayContext = {
67
66
  /**
68
67
  * Result returned by the handler (pre-response).
69
68
  */
70
- result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | Model | ModelList;
69
+ result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | Model | ModelList;
71
70
  /**
72
71
  * Response object returned by the handler.
73
72
  */
74
73
  response?: Response;
75
- /**
76
- * Structured object result for streaming requests. Only available at the end of the stream.
77
- */
78
- streamResult?: ChatCompletions;
79
74
  };
80
75
  /**
81
76
  * Hook context: all fields readonly except `state`.
@@ -119,13 +114,14 @@ export type GatewayHooks = {
119
114
  * Runs after the endpoint handler.
120
115
  * @returns Result to replace, or undefined to keep original.
121
116
  */
122
- after?: (ctx: AfterHookContext) => void | ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | Promise<void | ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings>;
117
+ after?: (ctx: AfterHookContext) => void | ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | Promise<void | ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings>;
123
118
  /**
124
119
  * Runs after the lifecycle has produced the final Response.
125
120
  * @returns Replacement Response, or undefined to keep original.
126
121
  */
127
122
  onResponse?: (ctx: OnResponseHookContext) => void | Response | Promise<void | Response>;
128
123
  };
124
+ export type TelemetrySignalLevel = "off" | "required" | "recommended" | "full";
129
125
  /**
130
126
  * Main configuration object for the gateway.
131
127
  */
@@ -146,6 +142,10 @@ export type GatewayConfig = {
146
142
  * Optional lifecycle hooks for routing, auth, and response shaping.
147
143
  */
148
144
  hooks?: GatewayHooks;
145
+ /**
146
+ * Preferred logger configuration: custom logger or default logger settings.
147
+ */
148
+ logger?: Logger | LoggerConfig | null;
149
149
  /**
150
150
  * Optional AI SDK telemetry configuration.
151
151
  */
@@ -160,17 +160,18 @@ export type GatewayConfig = {
160
160
  */
161
161
  tracer?: Tracer;
162
162
  /**
163
- * Controls how many telemetry attributes are attached to spans.
164
- * - required: minimal safe baseline
163
+ * Telemetry signal levels by namespace.
164
+ * - off: disable the namespace
165
+ * - required: minimal baseline
165
166
  * - recommended: practical defaults
166
- * - full: include all available attributes
167
+ * - full: include all available details
167
168
  */
168
- attributes?: "required" | "recommended" | "full";
169
+ signals?: {
170
+ gen_ai?: TelemetrySignalLevel;
171
+ http?: TelemetrySignalLevel;
172
+ hebo?: TelemetrySignalLevel;
173
+ };
169
174
  };
170
- /**
171
- * Preferred logger configuration: custom logger or default logger settings.
172
- */
173
- logger?: Logger | LoggerConfig | null;
174
175
  };
175
176
  export declare const kParsed: unique symbol;
176
177
  export type GatewayConfigParsed = GatewayConfig & {
@@ -1,4 +1,4 @@
1
1
  export declare const REQUEST_ID_HEADER = "x-request-id";
2
- type HeaderSource = string | URL | Headers | Request | Response | RequestInit | ResponseInit | HeadersInit | undefined;
2
+ type HeaderSource = Request | ResponseInit | undefined;
3
3
  export declare const resolveRequestId: (source: HeaderSource) => string | undefined;
4
4
  export {};
@@ -1,18 +1,16 @@
1
1
  export const REQUEST_ID_HEADER = "x-request-id";
2
2
  export const resolveRequestId = (source) => {
3
- if (!source || typeof source === "string" || source instanceof URL)
3
+ if (!source)
4
4
  return undefined;
5
- if (source instanceof Request || source instanceof Response) {
5
+ if (source instanceof Request) {
6
6
  return source.headers.get(REQUEST_ID_HEADER) ?? undefined;
7
7
  }
8
- const headers = "headers" in source ? source.headers : source;
9
- if (!headers || typeof headers === "string")
8
+ const headers = source.headers;
9
+ if (!headers)
10
10
  return undefined;
11
- if (Object.getPrototypeOf(headers) === Object.prototype) {
12
- return headers[REQUEST_ID_HEADER] ?? undefined;
13
- }
14
- if (headers instanceof Headers)
11
+ if (headers instanceof Headers) {
15
12
  return headers.get(REQUEST_ID_HEADER) ?? undefined;
13
+ }
16
14
  if (Array.isArray(headers)) {
17
15
  for (const [key, value] of headers) {
18
16
  if (key.toLowerCase() === REQUEST_ID_HEADER)
@@ -20,5 +18,5 @@ export const resolveRequestId = (source) => {
20
18
  }
21
19
  return undefined;
22
20
  }
23
- return undefined;
21
+ return headers[REQUEST_ID_HEADER];
24
22
  };
@@ -1,8 +1,4 @@
1
1
  import type { RequestPatch } from "../types";
2
2
  export declare const prepareRequestHeaders: (request: Request) => Headers | undefined;
3
- export declare const prepareRequestBody: (request: Request) => Promise<{
4
- body: ArrayBuffer | undefined;
5
- requestBytes: number;
6
- }>;
7
3
  export declare const prepareForwardHeaders: (request: Request) => Record<string, string>;
8
4
  export declare const maybeApplyRequestPatch: (request: Request, patch: RequestPatch) => Request;
@@ -10,15 +10,6 @@ export const prepareRequestHeaders = (request) => {
10
10
  headers.set(REQUEST_ID_HEADER, requestId);
11
11
  return headers;
12
12
  };
13
- export const prepareRequestBody = async (request) => {
14
- let requestBytes = 0;
15
- let body;
16
- if (request.body) {
17
- body = await request.arrayBuffer();
18
- requestBytes = body.byteLength;
19
- }
20
- return { body, requestBytes };
21
- };
22
13
  export const prepareForwardHeaders = (request) => {
23
14
  const userAgent = request.headers.get("user-agent");
24
15
  const appendedUserAgent = userAgent
@@ -13,7 +13,7 @@ class JsonToSseTransformStream extends TransformStream {
13
13
  }
14
14
  }
15
15
  export const prepareResponseInit = (request) => ({
16
- headers: { [REQUEST_ID_HEADER]: resolveRequestId(request.headers) },
16
+ headers: { [REQUEST_ID_HEADER]: resolveRequestId(request) },
17
17
  });
18
18
  export const mergeResponseInit = (defaultHeaders, responseInit) => {
19
19
  const headers = new Headers(defaultHeaders);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.4.0-beta.3",
3
+ "version": "0.4.0",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
5
5
  "keywords": [
6
6
  "ai",
@@ -156,7 +156,6 @@
156
156
  "dependencies": {
157
157
  "@ai-sdk/provider": "^3.0.7",
158
158
  "@ai-sdk/provider-utils": "^4.0.13",
159
- "@opentelemetry/api": "^1.9.0",
160
159
  "ai": "^6.0.67",
161
160
  "serialize-error": "^13.0.1",
162
161
  "zod": "^4.3.6"
@@ -169,7 +168,10 @@
169
168
  "@ai-sdk/groq": "^3.0.19",
170
169
  "@ai-sdk/openai": "^3.0.23",
171
170
  "@aws-sdk/credential-providers": "^3.981.0",
171
+ "@langfuse/otel": "^4.6.1",
172
172
  "@mjackson/node-fetch-server": "^0.7.0",
173
+ "@opentelemetry/api": "^1.9.0",
174
+ "@opentelemetry/context-async-hooks": "^2.5.1",
173
175
  "@opentelemetry/sdk-trace-base": "^2.5.1",
174
176
  "@tanstack/react-router": "^1.157.16",
175
177
  "@tanstack/react-start": "^1.157.16",
@@ -195,6 +197,7 @@
195
197
  "@ai-sdk/google-vertex": "^4.0.37",
196
198
  "@ai-sdk/groq": "^3.0.19",
197
199
  "@ai-sdk/openai": "^3.0.23",
200
+ "@opentelemetry/api": "^1.9.0",
198
201
  "typescript": "^5.9.3",
199
202
  "voyage-ai-provider": "^3.0.0"
200
203
  },