@hebo-ai/gateway 0.4.0-beta.4 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +34 -7
  2. package/dist/endpoints/chat-completions/converters.d.ts +3 -3
  3. package/dist/endpoints/chat-completions/converters.js +15 -7
  4. package/dist/endpoints/chat-completions/handler.js +9 -9
  5. package/dist/endpoints/chat-completions/otel.js +10 -4
  6. package/dist/endpoints/embeddings/handler.js +5 -4
  7. package/dist/errors/gateway.d.ts +1 -1
  8. package/dist/errors/gateway.js +3 -3
  9. package/dist/errors/openai.js +2 -1
  10. package/dist/errors/utils.d.ts +2 -1
  11. package/dist/errors/utils.js +1 -0
  12. package/dist/lifecycle.js +14 -6
  13. package/dist/models/anthropic/presets.d.ts +463 -0
  14. package/dist/models/anthropic/presets.js +10 -2
  15. package/dist/models/types.d.ts +1 -1
  16. package/dist/models/types.js +1 -0
  17. package/dist/providers/bedrock/canonical.js +1 -0
  18. package/dist/telemetry/gen-ai.d.ts +2 -1
  19. package/dist/telemetry/gen-ai.js +21 -3
  20. package/dist/telemetry/memory.d.ts +2 -0
  21. package/dist/telemetry/memory.js +27 -0
  22. package/dist/telemetry/span.js +1 -1
  23. package/dist/telemetry/stream.d.ts +1 -1
  24. package/dist/telemetry/stream.js +25 -28
  25. package/dist/types.d.ts +2 -3
  26. package/package.json +2 -1
  27. package/src/endpoints/chat-completions/converters.ts +17 -10
  28. package/src/endpoints/chat-completions/handler.ts +13 -9
  29. package/src/endpoints/chat-completions/otel.ts +11 -4
  30. package/src/endpoints/embeddings/handler.ts +9 -4
  31. package/src/errors/gateway.ts +5 -4
  32. package/src/errors/openai.ts +2 -1
  33. package/src/errors/utils.ts +1 -0
  34. package/src/lifecycle.ts +17 -6
  35. package/src/models/anthropic/presets.ts +14 -2
  36. package/src/models/types.ts +1 -0
  37. package/src/providers/bedrock/canonical.ts +1 -0
  38. package/src/telemetry/gen-ai.ts +31 -3
  39. package/src/telemetry/memory.ts +36 -0
  40. package/src/telemetry/span.ts +1 -1
  41. package/src/telemetry/stream.ts +31 -31
  42. package/src/types.ts +3 -6
package/README.md CHANGED
@@ -19,6 +19,7 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
19
19
  - 🗂️ Model catalog with extensible metadata capabilities.
20
20
  - 🪝 Hook system to customize routing, auth, rate limits, and shape responses.
21
21
  - 🧰 Low-level OpenAI-compatible schema, converters, and middleware helpers.
22
+ - 👁️ Observability via OTel GenAI semantic conventions (Langfuse-compatible).
22
23
 
23
24
  ## 📦 Installation
24
25
 
@@ -271,7 +272,7 @@ const gw = gateway({
271
272
 
272
273
  ### Hooks
273
274
 
274
- Hooks allow you to plug-into the lifecycle of the gateway and enrich it with additional functionality, like your actual routing logic. All hooks are available as async and non-async.
275
+ Hooks allow you to plug into the lifecycle of the gateway and enrich it with additional functionality, like your actual routing logic. All hooks are available as async and non-async.
275
276
 
276
277
  ```ts
277
278
  const gw = gateway({
@@ -314,10 +315,10 @@ const gw = gateway({
314
315
  * @param ctx.modelId Incoming model ID.
315
316
  * @returns Canonical model ID or undefined to keep original.
316
317
  */
317
- resolveModelId?: (ctx: {
318
+ resolveModelId: async (ctx: {
318
319
  body: ChatCompletionsBody | EmbeddingsBody;
319
320
  modelId: ModelId;
320
- }) => ModelId | void | Promise<ModelId | void> {
321
+ }): Promise<ModelId | void> => {
321
322
  // Example Use Cases:
322
323
  // - Resolve modelAlias to modelId
323
324
  return undefined;
@@ -327,7 +328,7 @@ const gw = gateway({
327
328
  * @param ctx.providers ProviderRegistry from config.
328
329
  * @param ctx.models ModelCatalog from config.
329
330
  * @param ctx.body The parsed body object with all call parameters.
330
- * @param ctx.modelId Resolved model ID.
331
+ * @param ctx.resolvedModelId Resolved model ID.
331
332
  * @param ctx.operation Operation type ("chat" | "embeddings").
332
333
  * @returns ProviderV3 to override, or undefined to use default.
333
334
  */
@@ -335,7 +336,7 @@ const gw = gateway({
335
336
  providers: ProviderRegistry;
336
337
  models: ModelCatalog;
337
338
  body: ChatCompletionsBody | EmbeddingsBody;
338
- modelId: ModelId;
339
+ resolvedModelId: ModelId;
339
340
  operation: "chat" | "embeddings";
340
341
  }): Promise<ProviderV3 | void> => {
341
342
  // Example Use Cases:
@@ -349,8 +350,8 @@ const gw = gateway({
349
350
  * @returns Modified result, or undefined to keep original.
350
351
  */
351
352
  after: async (ctx: {
352
- result: ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings
353
- }): Promise<ChatCompletions | ReadableStream<ChatCompletionsChunk | OpenAIError> | Embeddings | void> => {
353
+ result: ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings;
354
+ }): Promise<ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | void> => {
354
355
  // Example Use Cases:
355
356
  // - Transform result
356
357
  // - Result logging
@@ -641,6 +642,32 @@ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/
641
642
 
642
643
  For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
643
644
 
645
+ #### Langfuse
646
+
647
+ Hebo telemetry spans are OpenTelemetry-compatible, so you can send them to Langfuse via `@langfuse/otel`.
648
+
649
+ ```ts
650
+ import { gateway } from "@hebo-ai/gateway";
651
+ import { LangfuseSpanProcessor } from "@langfuse/otel";
652
+ import { context } from "@opentelemetry/api";
653
+ import { AsyncLocalStorageContextManager } from "@opentelemetry/context-async-hooks";
654
+ import { BasicTracerProvider } from "@opentelemetry/sdk-trace-base";
655
+
656
+ context.setGlobalContextManager(new AsyncLocalStorageContextManager().enable());
657
+
658
+ const gw = gateway({
659
+ // ...
660
+ telemetry: {
661
+ enabled: true,
662
+ tracer = new BasicTracerProvider({
663
+ spanProcessors: [new LangfuseSpanProcessor()],
664
+ }).getTracer("hebo");,
665
+ },
666
+ });
667
+ ```
668
+
669
+ Langfuse credentials are read from environment variables by the Langfuse OTel SDK (`LANGFUSE_PUBLIC_KEY`, `LANGFUSE_SECRET_KEY`, `LANGFUSE_BASE_URL`).
670
+
644
671
  ### Passing Framework State to Hooks
645
672
 
646
673
  You can pass per-request info from your framework into the gateway via the second `state` argument on the handler, then read it in hooks through `ctx.state`.
@@ -25,10 +25,10 @@ export declare const convertToToolSet: (tools: ChatCompletionsTool[] | undefined
25
25
  export declare const convertToToolChoice: (toolChoice: ChatCompletionsToolChoice | undefined) => ToolChoice<ToolSet> | undefined;
26
26
  export declare function toChatCompletions(result: GenerateTextResult<ToolSet, Output.Output>, model: string): ChatCompletions;
27
27
  export declare function toChatCompletionsResponse(result: GenerateTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
28
- export declare function toChatCompletionsStream(result: StreamTextResult<ToolSet, Output.Output>, model: string): ReadableStream<ChatCompletionsChunk | OpenAIError>;
28
+ export declare function toChatCompletionsStream<E extends boolean = false>(result: StreamTextResult<ToolSet, Output.Output>, model: string, wrapErrors?: E): ReadableStream<ChatCompletionsChunk | (E extends true ? OpenAIError : Error)>;
29
29
  export declare function toChatCompletionsStreamResponse(result: StreamTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
30
- export declare class ChatCompletionsStream extends TransformStream<TextStreamPart<ToolSet>, ChatCompletionsChunk | OpenAIError> {
31
- constructor(model: string);
30
+ export declare class ChatCompletionsStream<E extends boolean = false> extends TransformStream<TextStreamPart<ToolSet>, ChatCompletionsChunk | (E extends true ? OpenAIError : Error)> {
31
+ constructor(model: string, wrapErrors?: E);
32
32
  }
33
33
  export declare const toChatCompletionsAssistantMessage: (result: GenerateTextResult<ToolSet, Output.Output>) => ChatCompletionsAssistantMessage;
34
34
  export declare function toReasoningDetail(reasoning: ReasoningOutput, id: string, index: number): ChatCompletionsReasoningDetail;
@@ -275,14 +275,14 @@ export function toChatCompletions(result, model) {
275
275
  export function toChatCompletionsResponse(result, model, responseInit) {
276
276
  return toResponse(toChatCompletions(result, model), responseInit);
277
277
  }
278
- export function toChatCompletionsStream(result, model) {
279
- return result.fullStream.pipeThrough(new ChatCompletionsStream(model));
278
+ export function toChatCompletionsStream(result, model, wrapErrors) {
279
+ return result.fullStream.pipeThrough(new ChatCompletionsStream(model, wrapErrors));
280
280
  }
281
281
  export function toChatCompletionsStreamResponse(result, model, responseInit) {
282
- return toResponse(toChatCompletionsStream(result, model), responseInit);
282
+ return toResponse(toChatCompletionsStream(result, model, true), responseInit);
283
283
  }
284
284
  export class ChatCompletionsStream extends TransformStream {
285
- constructor(model) {
285
+ constructor(model, wrapErrors) {
286
286
  const streamId = `chatcmpl-${crypto.randomUUID()}`;
287
287
  const creationTime = Math.floor(Date.now() / 1000);
288
288
  let toolCallIndexCounter = 0;
@@ -348,9 +348,17 @@ export class ChatCompletionsStream extends TransformStream {
348
348
  break;
349
349
  }
350
350
  case "error": {
351
- const error = part.error;
352
- controller.enqueue(toOpenAIError(error));
353
- controller.terminate();
351
+ let err;
352
+ if (wrapErrors) {
353
+ err = toOpenAIError(part.error);
354
+ }
355
+ else if (part.error instanceof Error) {
356
+ err = part.error;
357
+ }
358
+ else {
359
+ err = new Error(String(part.error));
360
+ }
361
+ controller.enqueue(err);
354
362
  }
355
363
  }
356
364
  },
@@ -5,7 +5,7 @@ import { winterCgHandler } from "../../lifecycle";
5
5
  import { logger } from "../../logger";
6
6
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
7
  import { resolveProvider } from "../../providers/registry";
8
- import { recordRequestDuration, recordTokenUsage } from "../../telemetry/gen-ai";
8
+ import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
9
  import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
10
10
  import { resolveRequestId } from "../../utils/headers";
11
11
  import { prepareForwardHeaders } from "../../utils/request";
@@ -33,8 +33,8 @@ export const chatCompletions = (config) => {
33
33
  addSpanEvent("hebo.request.deserialized");
34
34
  const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
35
35
  if (!parsed.success) {
36
- // FUTURE: add body shape to error message
37
- throw new GatewayError(z.prettifyError(parsed.error), 400);
36
+ // FUTURE: consider adding body shape to metadata
37
+ throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
38
38
  }
39
39
  ctx.body = parsed.data;
40
40
  addSpanEvent("hebo.request.parsed");
@@ -84,13 +84,12 @@ export const chatCompletions = (config) => {
84
84
  const result = streamText({
85
85
  model: languageModelWithMiddleware,
86
86
  headers: prepareForwardHeaders(ctx.request),
87
- // No abort signal here, otherwise we can't detect upstream from client cancellations
88
- // abortSignal: ctx.request.signal,
87
+ abortSignal: ctx.request.signal,
89
88
  timeout: {
90
89
  totalMs: 5 * 60 * 1000,
91
90
  },
92
91
  onAbort: () => {
93
- throw new DOMException("Upstream failed", "AbortError");
92
+ throw new DOMException("The operation was aborted.", "AbortError");
94
93
  },
95
94
  onError: () => { },
96
95
  onFinish: (res) => {
@@ -100,7 +99,8 @@ export const chatCompletions = (config) => {
100
99
  const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
101
100
  setSpanAttributes(genAiResponseAttrs);
102
101
  recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
103
- recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
102
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
103
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
104
104
  },
105
105
  experimental_include: {
106
106
  requestBody: false,
@@ -119,7 +119,6 @@ export const chatCompletions = (config) => {
119
119
  const result = await generateText({
120
120
  model: languageModelWithMiddleware,
121
121
  headers: prepareForwardHeaders(ctx.request),
122
- // FUTURE: currently can't tell whether upstream or downstream abort
123
122
  abortSignal: ctx.request.signal,
124
123
  timeout: 5 * 60 * 1000,
125
124
  experimental_include: {
@@ -140,7 +139,8 @@ export const chatCompletions = (config) => {
140
139
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
141
140
  addSpanEvent("hebo.hooks.after.completed");
142
141
  }
143
- recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
142
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
143
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
144
144
  return ctx.result;
145
145
  };
146
146
  return { handler: winterCgHandler(handler, config) };
@@ -44,6 +44,10 @@ const toMessageParts = (message) => {
44
44
  }
45
45
  return parts;
46
46
  }
47
+ // FUTURE: remove once Langfuse supports gen_ai.system_instructions
48
+ if (message.role === "system") {
49
+ return [toTextPart(message.content)];
50
+ }
47
51
  return [];
48
52
  };
49
53
  export const getChatGeneralAttributes = (ctx, signalLevel) => {
@@ -81,11 +85,13 @@ export const getChatRequestAttributes = (inputs, signalLevel) => {
81
85
  }
82
86
  if (signalLevel === "full") {
83
87
  Object.assign(attrs, {
84
- "gen_ai.system_instructions": inputs.messages
85
- .filter((m) => m.role === "system")
86
- .map((m) => JSON.stringify({ parts: [toTextPart(m.content)] })),
88
+ // FUTURE: move system instructions from messages to here
89
+ // blocker: https://github.com/langfuse/langfuse/issues/11607
90
+ // "gen_ai.system_instructions": inputs.messages
91
+ // .filter((m) => m.role === "system")
92
+ // .map((m) => JSON.stringify(toTextPart(m.content))),
87
93
  "gen_ai.input.messages": inputs.messages
88
- .filter((m) => m.role !== "system")
94
+ //.filter((m) => m.role !== "system")
89
95
  .map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
90
96
  "gen_ai.tool.definitions": JSON.stringify(inputs.tools),
91
97
  });
@@ -5,7 +5,7 @@ import { winterCgHandler } from "../../lifecycle";
5
5
  import { logger } from "../../logger";
6
6
  import { modelMiddlewareMatcher } from "../../middleware/matcher";
7
7
  import { resolveProvider } from "../../providers/registry";
8
- import { recordRequestDuration, recordTokenUsage } from "../../telemetry/gen-ai";
8
+ import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
9
9
  import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
10
10
  import { resolveRequestId } from "../../utils/headers";
11
11
  import { prepareForwardHeaders } from "../../utils/request";
@@ -33,8 +33,8 @@ export const embeddings = (config) => {
33
33
  addSpanEvent("hebo.request.deserialized");
34
34
  const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
35
35
  if (!parsed.success) {
36
- // FUTURE: add body shape to error message
37
- throw new GatewayError(z.prettifyError(parsed.error), 400);
36
+ // FUTURE: consider adding body shape to metadata
37
+ throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
38
38
  }
39
39
  ctx.body = parsed.data;
40
40
  addSpanEvent("hebo.request.parsed");
@@ -95,7 +95,8 @@ export const embeddings = (config) => {
95
95
  ctx.result = (await hooks.after(ctx)) ?? ctx.result;
96
96
  addSpanEvent("hebo.hooks.after.completed");
97
97
  }
98
- recordRequestDuration(performance.now() - start, genAiGeneralAttrs, genAiSignalLevel);
98
+ recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
99
+ recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
99
100
  return ctx.result;
100
101
  };
101
102
  return { handler: winterCgHandler(handler, config) };
@@ -1,5 +1,5 @@
1
1
  export declare class GatewayError extends Error {
2
2
  readonly status: number;
3
3
  readonly code: string;
4
- constructor(error: string | Error, status: number, code?: string, cause?: unknown);
4
+ constructor(error: unknown, status: number, code?: string, cause?: unknown);
5
5
  }
@@ -3,10 +3,10 @@ export class GatewayError extends Error {
3
3
  status;
4
4
  code;
5
5
  constructor(error, status, code, cause) {
6
- const msg = typeof error === "string" ? error : error.message;
7
- super(msg);
6
+ const isError = error instanceof Error;
7
+ super(isError ? error.message : String(error));
8
+ this.cause = cause ?? (isError ? error : undefined);
8
9
  this.status = status;
9
10
  this.code = code ?? STATUS_CODE(status);
10
- this.cause = cause ?? (typeof error === "string" ? undefined : error);
11
11
  }
12
12
  }
@@ -19,7 +19,8 @@ export class OpenAIError {
19
19
  }
20
20
  const mapType = (status) => (status < 500 ? "invalid_request_error" : "server_error");
21
21
  const maybeMaskMessage = (meta, requestId) => {
22
- if (!(isProduction() && (meta.status >= 500 || meta.code.includes("UPSTREAM")))) {
22
+ // FUTURE: consider masking all upstream errors, also 4xx
23
+ if (!(isProduction() && meta.status >= 500)) {
23
24
  return meta.message;
24
25
  }
25
26
  // FUTURE: always attach requestId to errors (masked and unmasked)
@@ -8,12 +8,13 @@ export declare const STATUS_CODES: {
8
8
  readonly 409: "CONFLICT";
9
9
  readonly 422: "UNPROCESSABLE_ENTITY";
10
10
  readonly 429: "TOO_MANY_REQUESTS";
11
+ readonly 499: "CLIENT_CLOSED_REQUEST";
11
12
  readonly 500: "INTERNAL_SERVER_ERROR";
12
13
  readonly 502: "BAD_GATEWAY";
13
14
  readonly 503: "SERVICE_UNAVAILABLE";
14
15
  readonly 504: "GATEWAY_TIMEOUT";
15
16
  };
16
- export declare const STATUS_CODE: (status: number) => "BAD_REQUEST" | "UNAUTHORIZED" | "PAYMENT_REQUIRED" | "FORBIDDEN" | "NOT_FOUND" | "METHOD_NOT_ALLOWED" | "CONFLICT" | "UNPROCESSABLE_ENTITY" | "TOO_MANY_REQUESTS" | "INTERNAL_SERVER_ERROR" | "BAD_GATEWAY" | "SERVICE_UNAVAILABLE" | "GATEWAY_TIMEOUT";
17
+ export declare const STATUS_CODE: (status: number) => "BAD_REQUEST" | "UNAUTHORIZED" | "PAYMENT_REQUIRED" | "FORBIDDEN" | "NOT_FOUND" | "METHOD_NOT_ALLOWED" | "CONFLICT" | "UNPROCESSABLE_ENTITY" | "TOO_MANY_REQUESTS" | "CLIENT_CLOSED_REQUEST" | "INTERNAL_SERVER_ERROR" | "BAD_GATEWAY" | "SERVICE_UNAVAILABLE" | "GATEWAY_TIMEOUT";
17
18
  export declare function getErrorMeta(error: unknown): {
18
19
  status: number;
19
20
  code: string;
@@ -10,6 +10,7 @@ export const STATUS_CODES = {
10
10
  409: "CONFLICT",
11
11
  422: "UNPROCESSABLE_ENTITY",
12
12
  429: "TOO_MANY_REQUESTS",
13
+ 499: "CLIENT_CLOSED_REQUEST",
13
14
  500: "INTERNAL_SERVER_ERROR",
14
15
  502: "BAD_GATEWAY",
15
16
  503: "SERVICE_UNAVAILABLE",
package/dist/lifecycle.js CHANGED
@@ -1,9 +1,11 @@
1
1
  import { parseConfig } from "./config";
2
+ import { GatewayError } from "./errors/gateway";
2
3
  import { toOpenAIErrorResponse } from "./errors/openai";
3
4
  import { logger } from "./logger";
4
5
  import { getBaggageAttributes } from "./telemetry/baggage";
5
6
  import { initFetch } from "./telemetry/fetch";
6
7
  import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
8
+ import { recordV8jsMemory } from "./telemetry/memory";
7
9
  import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
8
10
  import { wrapStream } from "./telemetry/stream";
9
11
  import { resolveRequestId } from "./utils/headers";
@@ -11,7 +13,7 @@ import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
11
13
  import { prepareResponseInit, toResponse } from "./utils/response";
12
14
  export const winterCgHandler = (run, config) => {
13
15
  const parsedConfig = parseConfig(config);
14
- if (parsedConfig.telemetry.enabled) {
16
+ if (parsedConfig.telemetry?.enabled) {
15
17
  setSpanTracer(parsedConfig.telemetry?.tracer);
16
18
  setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
17
19
  initFetch(parsedConfig.telemetry?.signals?.hebo);
@@ -39,17 +41,21 @@ export const winterCgHandler = (run, config) => {
39
41
  // FUTURE add http.server.request.duration
40
42
  span.setAttributes(getResponseAttributes(ctx.response, parsedConfig.telemetry?.signals?.http));
41
43
  }
42
- const realStatus = status === 200 ? (ctx.response?.status ?? status) : status;
44
+ let realStatus = status;
45
+ if (ctx.request.signal.aborted)
46
+ realStatus = 499;
47
+ else if (status === 200 && ctx.response?.status)
48
+ realStatus = ctx.response.status;
43
49
  if (realStatus !== 200) {
44
- // FUTURE: in-stream errors are redacted in prod
45
50
  (realStatus >= 500 ? logger.error : logger.warn)({
46
51
  requestId: resolveRequestId(ctx.request),
47
- err: reason,
52
+ err: reason ?? ctx.request.signal.reason,
48
53
  });
49
54
  if (realStatus >= 500)
50
55
  span.recordError(reason);
51
56
  }
52
57
  span.setAttributes({ "http.response.status_code_effective": realStatus });
58
+ recordV8jsMemory(parsedConfig.telemetry?.signals?.hebo);
53
59
  span.finish();
54
60
  };
55
61
  try {
@@ -66,7 +72,7 @@ export const winterCgHandler = (run, config) => {
66
72
  if (!ctx.response) {
67
73
  ctx.result = (await span.runWithContext(() => run(ctx)));
68
74
  if (ctx.result instanceof ReadableStream) {
69
- ctx.result = wrapStream(ctx.result, { onDone: finalize }, ctx.request.signal);
75
+ ctx.result = wrapStream(ctx.result, { onDone: finalize });
70
76
  }
71
77
  ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.request));
72
78
  }
@@ -83,7 +89,9 @@ export const winterCgHandler = (run, config) => {
83
89
  }
84
90
  }
85
91
  catch (error) {
86
- ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
92
+ ctx.response = toOpenAIErrorResponse(ctx.request.signal.aborted
93
+ ? new GatewayError(error ?? ctx.request.signal.reason, 499)
94
+ : error, prepareResponseInit(ctx.request));
87
95
  finalize(ctx.response.status, error);
88
96
  }
89
97
  return ctx.response ?? new Response("Internal Server Error", { status: 500 });