@hebo-ai/gateway 0.4.0-beta.4 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -7
- package/dist/endpoints/chat-completions/converters.d.ts +3 -3
- package/dist/endpoints/chat-completions/converters.js +15 -7
- package/dist/endpoints/chat-completions/handler.js +9 -9
- package/dist/endpoints/chat-completions/otel.js +10 -4
- package/dist/endpoints/embeddings/handler.js +5 -4
- package/dist/errors/gateway.d.ts +1 -1
- package/dist/errors/gateway.js +3 -3
- package/dist/errors/openai.js +2 -1
- package/dist/errors/utils.d.ts +2 -1
- package/dist/errors/utils.js +1 -0
- package/dist/lifecycle.js +14 -6
- package/dist/models/anthropic/presets.d.ts +463 -0
- package/dist/models/anthropic/presets.js +10 -2
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +1 -0
- package/dist/providers/bedrock/canonical.js +1 -0
- package/dist/telemetry/gen-ai.d.ts +2 -1
- package/dist/telemetry/gen-ai.js +21 -3
- package/dist/telemetry/memory.d.ts +2 -0
- package/dist/telemetry/memory.js +27 -0
- package/dist/telemetry/span.js +1 -1
- package/dist/telemetry/stream.d.ts +1 -1
- package/dist/telemetry/stream.js +25 -28
- package/dist/types.d.ts +2 -3
- package/package.json +2 -1
- package/src/endpoints/chat-completions/converters.ts +17 -10
- package/src/endpoints/chat-completions/handler.ts +13 -9
- package/src/endpoints/chat-completions/otel.ts +11 -4
- package/src/endpoints/embeddings/handler.ts +9 -4
- package/src/errors/gateway.ts +5 -4
- package/src/errors/openai.ts +2 -1
- package/src/errors/utils.ts +1 -0
- package/src/lifecycle.ts +17 -6
- package/src/models/anthropic/presets.ts +14 -2
- package/src/models/types.ts +1 -0
- package/src/providers/bedrock/canonical.ts +1 -0
- package/src/telemetry/gen-ai.ts +31 -3
- package/src/telemetry/memory.ts +36 -0
- package/src/telemetry/span.ts +1 -1
- package/src/telemetry/stream.ts +31 -31
- package/src/types.ts +3 -6
package/README.md
CHANGED
|
@@ -19,6 +19,7 @@ Learn more in our blog post: [Yet Another AI Gateway?](https://hebo.ai/blog/2601
|
|
|
19
19
|
- 🗂️ Model catalog with extensible metadata capabilities.
|
|
20
20
|
- 🪝 Hook system to customize routing, auth, rate limits, and shape responses.
|
|
21
21
|
- 🧰 Low-level OpenAI-compatible schema, converters, and middleware helpers.
|
|
22
|
+
- 👁️ Observability via OTel GenAI semantic conventions (Langfuse-compatible).
|
|
22
23
|
|
|
23
24
|
## 📦 Installation
|
|
24
25
|
|
|
@@ -271,7 +272,7 @@ const gw = gateway({
|
|
|
271
272
|
|
|
272
273
|
### Hooks
|
|
273
274
|
|
|
274
|
-
Hooks allow you to plug
|
|
275
|
+
Hooks allow you to plug into the lifecycle of the gateway and enrich it with additional functionality, like your actual routing logic. All hooks are available as async and non-async.
|
|
275
276
|
|
|
276
277
|
```ts
|
|
277
278
|
const gw = gateway({
|
|
@@ -314,10 +315,10 @@ const gw = gateway({
|
|
|
314
315
|
* @param ctx.modelId Incoming model ID.
|
|
315
316
|
* @returns Canonical model ID or undefined to keep original.
|
|
316
317
|
*/
|
|
317
|
-
resolveModelId
|
|
318
|
+
resolveModelId: async (ctx: {
|
|
318
319
|
body: ChatCompletionsBody | EmbeddingsBody;
|
|
319
320
|
modelId: ModelId;
|
|
320
|
-
})
|
|
321
|
+
}): Promise<ModelId | void> => {
|
|
321
322
|
// Example Use Cases:
|
|
322
323
|
// - Resolve modelAlias to modelId
|
|
323
324
|
return undefined;
|
|
@@ -327,7 +328,7 @@ const gw = gateway({
|
|
|
327
328
|
* @param ctx.providers ProviderRegistry from config.
|
|
328
329
|
* @param ctx.models ModelCatalog from config.
|
|
329
330
|
* @param ctx.body The parsed body object with all call parameters.
|
|
330
|
-
* @param ctx.
|
|
331
|
+
* @param ctx.resolvedModelId Resolved model ID.
|
|
331
332
|
* @param ctx.operation Operation type ("chat" | "embeddings").
|
|
332
333
|
* @returns ProviderV3 to override, or undefined to use default.
|
|
333
334
|
*/
|
|
@@ -335,7 +336,7 @@ const gw = gateway({
|
|
|
335
336
|
providers: ProviderRegistry;
|
|
336
337
|
models: ModelCatalog;
|
|
337
338
|
body: ChatCompletionsBody | EmbeddingsBody;
|
|
338
|
-
|
|
339
|
+
resolvedModelId: ModelId;
|
|
339
340
|
operation: "chat" | "embeddings";
|
|
340
341
|
}): Promise<ProviderV3 | void> => {
|
|
341
342
|
// Example Use Cases:
|
|
@@ -349,8 +350,8 @@ const gw = gateway({
|
|
|
349
350
|
* @returns Modified result, or undefined to keep original.
|
|
350
351
|
*/
|
|
351
352
|
after: async (ctx: {
|
|
352
|
-
result: ChatCompletions
|
|
353
|
-
}): Promise<ChatCompletions
|
|
353
|
+
result: ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings;
|
|
354
|
+
}): Promise<ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | void> => {
|
|
354
355
|
// Example Use Cases:
|
|
355
356
|
// - Transform result
|
|
356
357
|
// - Result logging
|
|
@@ -641,6 +642,32 @@ https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/
|
|
|
641
642
|
|
|
642
643
|
For observability integration that is not otel compliant, you can disable built-in telemetry and manually instrument requests during `before` / `after` hooks.
|
|
643
644
|
|
|
645
|
+
#### Langfuse
|
|
646
|
+
|
|
647
|
+
Hebo telemetry spans are OpenTelemetry-compatible, so you can send them to Langfuse via `@langfuse/otel`.
|
|
648
|
+
|
|
649
|
+
```ts
|
|
650
|
+
import { gateway } from "@hebo-ai/gateway";
|
|
651
|
+
import { LangfuseSpanProcessor } from "@langfuse/otel";
|
|
652
|
+
import { context } from "@opentelemetry/api";
|
|
653
|
+
import { AsyncLocalStorageContextManager } from "@opentelemetry/context-async-hooks";
|
|
654
|
+
import { BasicTracerProvider } from "@opentelemetry/sdk-trace-base";
|
|
655
|
+
|
|
656
|
+
context.setGlobalContextManager(new AsyncLocalStorageContextManager().enable());
|
|
657
|
+
|
|
658
|
+
const gw = gateway({
|
|
659
|
+
// ...
|
|
660
|
+
telemetry: {
|
|
661
|
+
enabled: true,
|
|
662
|
+
tracer = new BasicTracerProvider({
|
|
663
|
+
spanProcessors: [new LangfuseSpanProcessor()],
|
|
664
|
+
}).getTracer("hebo");,
|
|
665
|
+
},
|
|
666
|
+
});
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
Langfuse credentials are read from environment variables by the Langfuse OTel SDK (`LANGFUSE_PUBLIC_KEY`, `LANGFUSE_SECRET_KEY`, `LANGFUSE_BASE_URL`).
|
|
670
|
+
|
|
644
671
|
### Passing Framework State to Hooks
|
|
645
672
|
|
|
646
673
|
You can pass per-request info from your framework into the gateway via the second `state` argument on the handler, then read it in hooks through `ctx.state`.
|
|
@@ -25,10 +25,10 @@ export declare const convertToToolSet: (tools: ChatCompletionsTool[] | undefined
|
|
|
25
25
|
export declare const convertToToolChoice: (toolChoice: ChatCompletionsToolChoice | undefined) => ToolChoice<ToolSet> | undefined;
|
|
26
26
|
export declare function toChatCompletions(result: GenerateTextResult<ToolSet, Output.Output>, model: string): ChatCompletions;
|
|
27
27
|
export declare function toChatCompletionsResponse(result: GenerateTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
|
|
28
|
-
export declare function toChatCompletionsStream(result: StreamTextResult<ToolSet, Output.Output>, model: string): ReadableStream<ChatCompletionsChunk | OpenAIError>;
|
|
28
|
+
export declare function toChatCompletionsStream<E extends boolean = false>(result: StreamTextResult<ToolSet, Output.Output>, model: string, wrapErrors?: E): ReadableStream<ChatCompletionsChunk | (E extends true ? OpenAIError : Error)>;
|
|
29
29
|
export declare function toChatCompletionsStreamResponse(result: StreamTextResult<ToolSet, Output.Output>, model: string, responseInit?: ResponseInit): Response;
|
|
30
|
-
export declare class ChatCompletionsStream extends TransformStream<TextStreamPart<ToolSet>, ChatCompletionsChunk | OpenAIError> {
|
|
31
|
-
constructor(model: string);
|
|
30
|
+
export declare class ChatCompletionsStream<E extends boolean = false> extends TransformStream<TextStreamPart<ToolSet>, ChatCompletionsChunk | (E extends true ? OpenAIError : Error)> {
|
|
31
|
+
constructor(model: string, wrapErrors?: E);
|
|
32
32
|
}
|
|
33
33
|
export declare const toChatCompletionsAssistantMessage: (result: GenerateTextResult<ToolSet, Output.Output>) => ChatCompletionsAssistantMessage;
|
|
34
34
|
export declare function toReasoningDetail(reasoning: ReasoningOutput, id: string, index: number): ChatCompletionsReasoningDetail;
|
|
@@ -275,14 +275,14 @@ export function toChatCompletions(result, model) {
|
|
|
275
275
|
export function toChatCompletionsResponse(result, model, responseInit) {
|
|
276
276
|
return toResponse(toChatCompletions(result, model), responseInit);
|
|
277
277
|
}
|
|
278
|
-
export function toChatCompletionsStream(result, model) {
|
|
279
|
-
return result.fullStream.pipeThrough(new ChatCompletionsStream(model));
|
|
278
|
+
export function toChatCompletionsStream(result, model, wrapErrors) {
|
|
279
|
+
return result.fullStream.pipeThrough(new ChatCompletionsStream(model, wrapErrors));
|
|
280
280
|
}
|
|
281
281
|
export function toChatCompletionsStreamResponse(result, model, responseInit) {
|
|
282
|
-
return toResponse(toChatCompletionsStream(result, model), responseInit);
|
|
282
|
+
return toResponse(toChatCompletionsStream(result, model, true), responseInit);
|
|
283
283
|
}
|
|
284
284
|
export class ChatCompletionsStream extends TransformStream {
|
|
285
|
-
constructor(model) {
|
|
285
|
+
constructor(model, wrapErrors) {
|
|
286
286
|
const streamId = `chatcmpl-${crypto.randomUUID()}`;
|
|
287
287
|
const creationTime = Math.floor(Date.now() / 1000);
|
|
288
288
|
let toolCallIndexCounter = 0;
|
|
@@ -348,9 +348,17 @@ export class ChatCompletionsStream extends TransformStream {
|
|
|
348
348
|
break;
|
|
349
349
|
}
|
|
350
350
|
case "error": {
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
351
|
+
let err;
|
|
352
|
+
if (wrapErrors) {
|
|
353
|
+
err = toOpenAIError(part.error);
|
|
354
|
+
}
|
|
355
|
+
else if (part.error instanceof Error) {
|
|
356
|
+
err = part.error;
|
|
357
|
+
}
|
|
358
|
+
else {
|
|
359
|
+
err = new Error(String(part.error));
|
|
360
|
+
}
|
|
361
|
+
controller.enqueue(err);
|
|
354
362
|
}
|
|
355
363
|
}
|
|
356
364
|
},
|
|
@@ -5,7 +5,7 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
-
import { recordRequestDuration, recordTokenUsage } from "../../telemetry/gen-ai";
|
|
8
|
+
import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
9
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
10
10
|
import { resolveRequestId } from "../../utils/headers";
|
|
11
11
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
@@ -33,8 +33,8 @@ export const chatCompletions = (config) => {
|
|
|
33
33
|
addSpanEvent("hebo.request.deserialized");
|
|
34
34
|
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
35
35
|
if (!parsed.success) {
|
|
36
|
-
// FUTURE:
|
|
37
|
-
throw new GatewayError(z.prettifyError(parsed.error), 400);
|
|
36
|
+
// FUTURE: consider adding body shape to metadata
|
|
37
|
+
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
38
38
|
}
|
|
39
39
|
ctx.body = parsed.data;
|
|
40
40
|
addSpanEvent("hebo.request.parsed");
|
|
@@ -84,13 +84,12 @@ export const chatCompletions = (config) => {
|
|
|
84
84
|
const result = streamText({
|
|
85
85
|
model: languageModelWithMiddleware,
|
|
86
86
|
headers: prepareForwardHeaders(ctx.request),
|
|
87
|
-
|
|
88
|
-
// abortSignal: ctx.request.signal,
|
|
87
|
+
abortSignal: ctx.request.signal,
|
|
89
88
|
timeout: {
|
|
90
89
|
totalMs: 5 * 60 * 1000,
|
|
91
90
|
},
|
|
92
91
|
onAbort: () => {
|
|
93
|
-
throw new DOMException("
|
|
92
|
+
throw new DOMException("The operation was aborted.", "AbortError");
|
|
94
93
|
},
|
|
95
94
|
onError: () => { },
|
|
96
95
|
onFinish: (res) => {
|
|
@@ -100,7 +99,8 @@ export const chatCompletions = (config) => {
|
|
|
100
99
|
const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
|
|
101
100
|
setSpanAttributes(genAiResponseAttrs);
|
|
102
101
|
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
103
|
-
|
|
102
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
103
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
104
104
|
},
|
|
105
105
|
experimental_include: {
|
|
106
106
|
requestBody: false,
|
|
@@ -119,7 +119,6 @@ export const chatCompletions = (config) => {
|
|
|
119
119
|
const result = await generateText({
|
|
120
120
|
model: languageModelWithMiddleware,
|
|
121
121
|
headers: prepareForwardHeaders(ctx.request),
|
|
122
|
-
// FUTURE: currently can't tell whether upstream or downstream abort
|
|
123
122
|
abortSignal: ctx.request.signal,
|
|
124
123
|
timeout: 5 * 60 * 1000,
|
|
125
124
|
experimental_include: {
|
|
@@ -140,7 +139,8 @@ export const chatCompletions = (config) => {
|
|
|
140
139
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
141
140
|
addSpanEvent("hebo.hooks.after.completed");
|
|
142
141
|
}
|
|
143
|
-
|
|
142
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
143
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
144
144
|
return ctx.result;
|
|
145
145
|
};
|
|
146
146
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -44,6 +44,10 @@ const toMessageParts = (message) => {
|
|
|
44
44
|
}
|
|
45
45
|
return parts;
|
|
46
46
|
}
|
|
47
|
+
// FUTURE: remove once Langfuse supports gen_ai.system_instructions
|
|
48
|
+
if (message.role === "system") {
|
|
49
|
+
return [toTextPart(message.content)];
|
|
50
|
+
}
|
|
47
51
|
return [];
|
|
48
52
|
};
|
|
49
53
|
export const getChatGeneralAttributes = (ctx, signalLevel) => {
|
|
@@ -81,11 +85,13 @@ export const getChatRequestAttributes = (inputs, signalLevel) => {
|
|
|
81
85
|
}
|
|
82
86
|
if (signalLevel === "full") {
|
|
83
87
|
Object.assign(attrs, {
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
88
|
+
// FUTURE: move system instructions from messages to here
|
|
89
|
+
// blocker: https://github.com/langfuse/langfuse/issues/11607
|
|
90
|
+
// "gen_ai.system_instructions": inputs.messages
|
|
91
|
+
// .filter((m) => m.role === "system")
|
|
92
|
+
// .map((m) => JSON.stringify(toTextPart(m.content))),
|
|
87
93
|
"gen_ai.input.messages": inputs.messages
|
|
88
|
-
|
|
94
|
+
//.filter((m) => m.role !== "system")
|
|
89
95
|
.map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
|
|
90
96
|
"gen_ai.tool.definitions": JSON.stringify(inputs.tools),
|
|
91
97
|
});
|
|
@@ -5,7 +5,7 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
-
import { recordRequestDuration, recordTokenUsage } from "../../telemetry/gen-ai";
|
|
8
|
+
import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
9
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
10
10
|
import { resolveRequestId } from "../../utils/headers";
|
|
11
11
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
@@ -33,8 +33,8 @@ export const embeddings = (config) => {
|
|
|
33
33
|
addSpanEvent("hebo.request.deserialized");
|
|
34
34
|
const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
|
|
35
35
|
if (!parsed.success) {
|
|
36
|
-
// FUTURE:
|
|
37
|
-
throw new GatewayError(z.prettifyError(parsed.error), 400);
|
|
36
|
+
// FUTURE: consider adding body shape to metadata
|
|
37
|
+
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
38
38
|
}
|
|
39
39
|
ctx.body = parsed.data;
|
|
40
40
|
addSpanEvent("hebo.request.parsed");
|
|
@@ -95,7 +95,8 @@ export const embeddings = (config) => {
|
|
|
95
95
|
ctx.result = (await hooks.after(ctx)) ?? ctx.result;
|
|
96
96
|
addSpanEvent("hebo.hooks.after.completed");
|
|
97
97
|
}
|
|
98
|
-
|
|
98
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
99
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
99
100
|
return ctx.result;
|
|
100
101
|
};
|
|
101
102
|
return { handler: winterCgHandler(handler, config) };
|
package/dist/errors/gateway.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export declare class GatewayError extends Error {
|
|
2
2
|
readonly status: number;
|
|
3
3
|
readonly code: string;
|
|
4
|
-
constructor(error:
|
|
4
|
+
constructor(error: unknown, status: number, code?: string, cause?: unknown);
|
|
5
5
|
}
|
package/dist/errors/gateway.js
CHANGED
|
@@ -3,10 +3,10 @@ export class GatewayError extends Error {
|
|
|
3
3
|
status;
|
|
4
4
|
code;
|
|
5
5
|
constructor(error, status, code, cause) {
|
|
6
|
-
const
|
|
7
|
-
super(
|
|
6
|
+
const isError = error instanceof Error;
|
|
7
|
+
super(isError ? error.message : String(error));
|
|
8
|
+
this.cause = cause ?? (isError ? error : undefined);
|
|
8
9
|
this.status = status;
|
|
9
10
|
this.code = code ?? STATUS_CODE(status);
|
|
10
|
-
this.cause = cause ?? (typeof error === "string" ? undefined : error);
|
|
11
11
|
}
|
|
12
12
|
}
|
package/dist/errors/openai.js
CHANGED
|
@@ -19,7 +19,8 @@ export class OpenAIError {
|
|
|
19
19
|
}
|
|
20
20
|
const mapType = (status) => (status < 500 ? "invalid_request_error" : "server_error");
|
|
21
21
|
const maybeMaskMessage = (meta, requestId) => {
|
|
22
|
-
|
|
22
|
+
// FUTURE: consider masking all upstream errors, also 4xx
|
|
23
|
+
if (!(isProduction() && meta.status >= 500)) {
|
|
23
24
|
return meta.message;
|
|
24
25
|
}
|
|
25
26
|
// FUTURE: always attach requestId to errors (masked and unmasked)
|
package/dist/errors/utils.d.ts
CHANGED
|
@@ -8,12 +8,13 @@ export declare const STATUS_CODES: {
|
|
|
8
8
|
readonly 409: "CONFLICT";
|
|
9
9
|
readonly 422: "UNPROCESSABLE_ENTITY";
|
|
10
10
|
readonly 429: "TOO_MANY_REQUESTS";
|
|
11
|
+
readonly 499: "CLIENT_CLOSED_REQUEST";
|
|
11
12
|
readonly 500: "INTERNAL_SERVER_ERROR";
|
|
12
13
|
readonly 502: "BAD_GATEWAY";
|
|
13
14
|
readonly 503: "SERVICE_UNAVAILABLE";
|
|
14
15
|
readonly 504: "GATEWAY_TIMEOUT";
|
|
15
16
|
};
|
|
16
|
-
export declare const STATUS_CODE: (status: number) => "BAD_REQUEST" | "UNAUTHORIZED" | "PAYMENT_REQUIRED" | "FORBIDDEN" | "NOT_FOUND" | "METHOD_NOT_ALLOWED" | "CONFLICT" | "UNPROCESSABLE_ENTITY" | "TOO_MANY_REQUESTS" | "INTERNAL_SERVER_ERROR" | "BAD_GATEWAY" | "SERVICE_UNAVAILABLE" | "GATEWAY_TIMEOUT";
|
|
17
|
+
export declare const STATUS_CODE: (status: number) => "BAD_REQUEST" | "UNAUTHORIZED" | "PAYMENT_REQUIRED" | "FORBIDDEN" | "NOT_FOUND" | "METHOD_NOT_ALLOWED" | "CONFLICT" | "UNPROCESSABLE_ENTITY" | "TOO_MANY_REQUESTS" | "CLIENT_CLOSED_REQUEST" | "INTERNAL_SERVER_ERROR" | "BAD_GATEWAY" | "SERVICE_UNAVAILABLE" | "GATEWAY_TIMEOUT";
|
|
17
18
|
export declare function getErrorMeta(error: unknown): {
|
|
18
19
|
status: number;
|
|
19
20
|
code: string;
|
package/dist/errors/utils.js
CHANGED
package/dist/lifecycle.js
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import { parseConfig } from "./config";
|
|
2
|
+
import { GatewayError } from "./errors/gateway";
|
|
2
3
|
import { toOpenAIErrorResponse } from "./errors/openai";
|
|
3
4
|
import { logger } from "./logger";
|
|
4
5
|
import { getBaggageAttributes } from "./telemetry/baggage";
|
|
5
6
|
import { initFetch } from "./telemetry/fetch";
|
|
6
7
|
import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
|
|
8
|
+
import { recordV8jsMemory } from "./telemetry/memory";
|
|
7
9
|
import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
|
|
8
10
|
import { wrapStream } from "./telemetry/stream";
|
|
9
11
|
import { resolveRequestId } from "./utils/headers";
|
|
@@ -11,7 +13,7 @@ import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
|
|
|
11
13
|
import { prepareResponseInit, toResponse } from "./utils/response";
|
|
12
14
|
export const winterCgHandler = (run, config) => {
|
|
13
15
|
const parsedConfig = parseConfig(config);
|
|
14
|
-
if (parsedConfig.telemetry
|
|
16
|
+
if (parsedConfig.telemetry?.enabled) {
|
|
15
17
|
setSpanTracer(parsedConfig.telemetry?.tracer);
|
|
16
18
|
setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
|
|
17
19
|
initFetch(parsedConfig.telemetry?.signals?.hebo);
|
|
@@ -39,17 +41,21 @@ export const winterCgHandler = (run, config) => {
|
|
|
39
41
|
// FUTURE add http.server.request.duration
|
|
40
42
|
span.setAttributes(getResponseAttributes(ctx.response, parsedConfig.telemetry?.signals?.http));
|
|
41
43
|
}
|
|
42
|
-
|
|
44
|
+
let realStatus = status;
|
|
45
|
+
if (ctx.request.signal.aborted)
|
|
46
|
+
realStatus = 499;
|
|
47
|
+
else if (status === 200 && ctx.response?.status)
|
|
48
|
+
realStatus = ctx.response.status;
|
|
43
49
|
if (realStatus !== 200) {
|
|
44
|
-
// FUTURE: in-stream errors are redacted in prod
|
|
45
50
|
(realStatus >= 500 ? logger.error : logger.warn)({
|
|
46
51
|
requestId: resolveRequestId(ctx.request),
|
|
47
|
-
err: reason,
|
|
52
|
+
err: reason ?? ctx.request.signal.reason,
|
|
48
53
|
});
|
|
49
54
|
if (realStatus >= 500)
|
|
50
55
|
span.recordError(reason);
|
|
51
56
|
}
|
|
52
57
|
span.setAttributes({ "http.response.status_code_effective": realStatus });
|
|
58
|
+
recordV8jsMemory(parsedConfig.telemetry?.signals?.hebo);
|
|
53
59
|
span.finish();
|
|
54
60
|
};
|
|
55
61
|
try {
|
|
@@ -66,7 +72,7 @@ export const winterCgHandler = (run, config) => {
|
|
|
66
72
|
if (!ctx.response) {
|
|
67
73
|
ctx.result = (await span.runWithContext(() => run(ctx)));
|
|
68
74
|
if (ctx.result instanceof ReadableStream) {
|
|
69
|
-
ctx.result = wrapStream(ctx.result, { onDone: finalize }
|
|
75
|
+
ctx.result = wrapStream(ctx.result, { onDone: finalize });
|
|
70
76
|
}
|
|
71
77
|
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.request));
|
|
72
78
|
}
|
|
@@ -83,7 +89,9 @@ export const winterCgHandler = (run, config) => {
|
|
|
83
89
|
}
|
|
84
90
|
}
|
|
85
91
|
catch (error) {
|
|
86
|
-
ctx.response = toOpenAIErrorResponse(
|
|
92
|
+
ctx.response = toOpenAIErrorResponse(ctx.request.signal.aborted
|
|
93
|
+
? new GatewayError(error ?? ctx.request.signal.reason, 499)
|
|
94
|
+
: error, prepareResponseInit(ctx.request));
|
|
87
95
|
finalize(ctx.response.status, error);
|
|
88
96
|
}
|
|
89
97
|
return ctx.response ?? new Response("Internal Server Error", { status: 500 });
|