@hebo-ai/gateway 0.4.0-beta.4 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -7
- package/dist/endpoints/chat-completions/converters.d.ts +3 -3
- package/dist/endpoints/chat-completions/converters.js +15 -7
- package/dist/endpoints/chat-completions/handler.js +9 -9
- package/dist/endpoints/chat-completions/otel.js +10 -4
- package/dist/endpoints/embeddings/handler.js +5 -4
- package/dist/errors/gateway.d.ts +1 -1
- package/dist/errors/gateway.js +3 -3
- package/dist/errors/openai.js +2 -1
- package/dist/errors/utils.d.ts +2 -1
- package/dist/errors/utils.js +1 -0
- package/dist/lifecycle.js +14 -6
- package/dist/models/anthropic/presets.d.ts +463 -0
- package/dist/models/anthropic/presets.js +10 -2
- package/dist/models/types.d.ts +1 -1
- package/dist/models/types.js +1 -0
- package/dist/providers/bedrock/canonical.js +1 -0
- package/dist/telemetry/gen-ai.d.ts +2 -1
- package/dist/telemetry/gen-ai.js +21 -3
- package/dist/telemetry/memory.d.ts +2 -0
- package/dist/telemetry/memory.js +27 -0
- package/dist/telemetry/span.js +1 -1
- package/dist/telemetry/stream.d.ts +1 -1
- package/dist/telemetry/stream.js +25 -28
- package/dist/types.d.ts +2 -3
- package/package.json +2 -1
- package/src/endpoints/chat-completions/converters.ts +17 -10
- package/src/endpoints/chat-completions/handler.ts +13 -9
- package/src/endpoints/chat-completions/otel.ts +11 -4
- package/src/endpoints/embeddings/handler.ts +9 -4
- package/src/errors/gateway.ts +5 -4
- package/src/errors/openai.ts +2 -1
- package/src/errors/utils.ts +1 -0
- package/src/lifecycle.ts +17 -6
- package/src/models/anthropic/presets.ts +14 -2
- package/src/models/types.ts +1 -0
- package/src/providers/bedrock/canonical.ts +1 -0
- package/src/telemetry/gen-ai.ts +31 -3
- package/src/telemetry/memory.ts +36 -0
- package/src/telemetry/span.ts +1 -1
- package/src/telemetry/stream.ts +31 -31
- package/src/types.ts +3 -6
package/dist/telemetry/stream.js
CHANGED
|
@@ -1,43 +1,37 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
import { toOpenAIError } from "../errors/openai";
|
|
2
|
+
const isErrorChunk = (v) => v instanceof Error || !!v?.error;
|
|
3
|
+
export const wrapStream = (src, hooks) => {
|
|
4
|
+
let finished = false;
|
|
5
|
+
const done = (reader, controller, status, reason) => {
|
|
6
|
+
if (!finished) {
|
|
7
|
+
finished = true;
|
|
8
|
+
hooks.onDone?.(status, reason);
|
|
9
|
+
}
|
|
10
|
+
reader.cancel(reason).catch(() => { });
|
|
11
|
+
controller.close();
|
|
9
12
|
};
|
|
10
13
|
return new ReadableStream({
|
|
11
14
|
async start(controller) {
|
|
12
15
|
const reader = src.getReader();
|
|
13
|
-
const close = (status, reason) => {
|
|
14
|
-
finish(status, reason);
|
|
15
|
-
reader.cancel(reason).catch(() => { });
|
|
16
|
-
controller.close();
|
|
17
|
-
};
|
|
18
16
|
try {
|
|
19
17
|
for (;;) {
|
|
20
|
-
if (signal?.aborted) {
|
|
21
|
-
close(499, signal.reason);
|
|
22
|
-
return;
|
|
23
|
-
}
|
|
24
18
|
// eslint-disable-next-line no-await-in-loop
|
|
25
|
-
const { value, done } = await reader.read();
|
|
26
|
-
if (
|
|
19
|
+
const { value, done: eof } = await reader.read();
|
|
20
|
+
if (eof)
|
|
27
21
|
break;
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
22
|
+
const out = isErrorChunk(value) ? toOpenAIError(value) : value;
|
|
23
|
+
controller.enqueue(out);
|
|
24
|
+
if (out !== value) {
|
|
25
|
+
const status = out.error?.type === "invalid_request_error" ? 422 : 502;
|
|
26
|
+
done(reader, controller, status, value);
|
|
32
27
|
return;
|
|
33
28
|
}
|
|
34
29
|
}
|
|
35
|
-
|
|
36
|
-
controller.close();
|
|
30
|
+
done(reader, controller, 200);
|
|
37
31
|
}
|
|
38
32
|
catch (err) {
|
|
39
|
-
|
|
40
|
-
|
|
33
|
+
controller.enqueue(toOpenAIError(err));
|
|
34
|
+
done(reader, controller, 502, err);
|
|
41
35
|
}
|
|
42
36
|
finally {
|
|
43
37
|
try {
|
|
@@ -47,7 +41,10 @@ export const wrapStream = (src, hooks, signal) => {
|
|
|
47
41
|
}
|
|
48
42
|
},
|
|
49
43
|
cancel(reason) {
|
|
50
|
-
|
|
44
|
+
if (!finished) {
|
|
45
|
+
finished = true;
|
|
46
|
+
hooks.onDone?.(499, reason);
|
|
47
|
+
}
|
|
51
48
|
src.cancel(reason).catch(() => { });
|
|
52
49
|
},
|
|
53
50
|
});
|
package/dist/types.d.ts
CHANGED
|
@@ -3,7 +3,6 @@ import type { Tracer } from "@opentelemetry/api";
|
|
|
3
3
|
import type { ChatCompletions, ChatCompletionsBody, ChatCompletionsChunk } from "./endpoints/chat-completions/schema";
|
|
4
4
|
import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
|
|
5
5
|
import type { Model, ModelList } from "./endpoints/models";
|
|
6
|
-
import type { OpenAIError } from "./errors/openai";
|
|
7
6
|
import type { Logger, LoggerConfig } from "./logger";
|
|
8
7
|
import type { ModelCatalog, ModelId } from "./models/types";
|
|
9
8
|
import type { ProviderId, ProviderRegistry } from "./providers/types";
|
|
@@ -67,7 +66,7 @@ export type GatewayContext = {
|
|
|
67
66
|
/**
|
|
68
67
|
* Result returned by the handler (pre-response).
|
|
69
68
|
*/
|
|
70
|
-
result?: ChatCompletions | ReadableStream<ChatCompletionsChunk |
|
|
69
|
+
result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | Model | ModelList;
|
|
71
70
|
/**
|
|
72
71
|
* Response object returned by the handler.
|
|
73
72
|
*/
|
|
@@ -115,7 +114,7 @@ export type GatewayHooks = {
|
|
|
115
114
|
* Runs after the endpoint handler.
|
|
116
115
|
* @returns Result to replace, or undefined to keep original.
|
|
117
116
|
*/
|
|
118
|
-
after?: (ctx: AfterHookContext) => void | ChatCompletions | ReadableStream<ChatCompletionsChunk |
|
|
117
|
+
after?: (ctx: AfterHookContext) => void | ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | Promise<void | ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings>;
|
|
119
118
|
/**
|
|
120
119
|
* Runs after the lifecycle has produced the final Response.
|
|
121
120
|
* @returns Replacement Response, or undefined to keep original.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hebo-ai/gateway",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.1",
|
|
4
4
|
"description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -168,6 +168,7 @@
|
|
|
168
168
|
"@ai-sdk/groq": "^3.0.19",
|
|
169
169
|
"@ai-sdk/openai": "^3.0.23",
|
|
170
170
|
"@aws-sdk/credential-providers": "^3.981.0",
|
|
171
|
+
"@langfuse/otel": "^4.6.1",
|
|
171
172
|
"@mjackson/node-fetch-server": "^0.7.0",
|
|
172
173
|
"@opentelemetry/api": "^1.9.0",
|
|
173
174
|
"@opentelemetry/context-async-hooks": "^2.5.1",
|
|
@@ -405,11 +405,12 @@ export function toChatCompletionsResponse(
|
|
|
405
405
|
return toResponse(toChatCompletions(result, model), responseInit);
|
|
406
406
|
}
|
|
407
407
|
|
|
408
|
-
export function toChatCompletionsStream(
|
|
408
|
+
export function toChatCompletionsStream<E extends boolean = false>(
|
|
409
409
|
result: StreamTextResult<ToolSet, Output.Output>,
|
|
410
410
|
model: string,
|
|
411
|
-
|
|
412
|
-
|
|
411
|
+
wrapErrors?: E,
|
|
412
|
+
): ReadableStream<ChatCompletionsChunk | (E extends true ? OpenAIError : Error)> {
|
|
413
|
+
return result.fullStream.pipeThrough(new ChatCompletionsStream(model, wrapErrors));
|
|
413
414
|
}
|
|
414
415
|
|
|
415
416
|
export function toChatCompletionsStreamResponse(
|
|
@@ -417,14 +418,14 @@ export function toChatCompletionsStreamResponse(
|
|
|
417
418
|
model: string,
|
|
418
419
|
responseInit?: ResponseInit,
|
|
419
420
|
): Response {
|
|
420
|
-
return toResponse(toChatCompletionsStream(result, model), responseInit);
|
|
421
|
+
return toResponse(toChatCompletionsStream(result, model, true), responseInit);
|
|
421
422
|
}
|
|
422
423
|
|
|
423
|
-
export class ChatCompletionsStream extends TransformStream<
|
|
424
|
+
export class ChatCompletionsStream<E extends boolean = false> extends TransformStream<
|
|
424
425
|
TextStreamPart<ToolSet>,
|
|
425
|
-
ChatCompletionsChunk | OpenAIError
|
|
426
|
+
ChatCompletionsChunk | (E extends true ? OpenAIError : Error)
|
|
426
427
|
> {
|
|
427
|
-
constructor(model: string) {
|
|
428
|
+
constructor(model: string, wrapErrors?: E) {
|
|
428
429
|
const streamId = `chatcmpl-${crypto.randomUUID()}`;
|
|
429
430
|
const creationTime = Math.floor(Date.now() / 1000);
|
|
430
431
|
let toolCallIndexCounter = 0;
|
|
@@ -535,9 +536,15 @@ export class ChatCompletionsStream extends TransformStream<
|
|
|
535
536
|
}
|
|
536
537
|
|
|
537
538
|
case "error": {
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
539
|
+
let err: Error | OpenAIError;
|
|
540
|
+
if (wrapErrors) {
|
|
541
|
+
err = toOpenAIError(part.error);
|
|
542
|
+
} else if (part.error instanceof Error) {
|
|
543
|
+
err = part.error;
|
|
544
|
+
} else {
|
|
545
|
+
err = new Error(String(part.error));
|
|
546
|
+
}
|
|
547
|
+
controller.enqueue(err as E extends true ? OpenAIError : Error);
|
|
541
548
|
}
|
|
542
549
|
}
|
|
543
550
|
},
|
|
@@ -23,7 +23,11 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
23
23
|
import { logger } from "../../logger";
|
|
24
24
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
25
25
|
import { resolveProvider } from "../../providers/registry";
|
|
26
|
-
import {
|
|
26
|
+
import {
|
|
27
|
+
recordRequestDuration,
|
|
28
|
+
recordTimePerOutputToken,
|
|
29
|
+
recordTokenUsage,
|
|
30
|
+
} from "../../telemetry/gen-ai";
|
|
27
31
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
28
32
|
import { resolveRequestId } from "../../utils/headers";
|
|
29
33
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
@@ -60,8 +64,8 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
60
64
|
|
|
61
65
|
const parsed = ChatCompletionsBodySchema.safeParse(ctx.body);
|
|
62
66
|
if (!parsed.success) {
|
|
63
|
-
// FUTURE:
|
|
64
|
-
throw new GatewayError(z.prettifyError(parsed.error), 400);
|
|
67
|
+
// FUTURE: consider adding body shape to metadata
|
|
68
|
+
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
65
69
|
}
|
|
66
70
|
ctx.body = parsed.data;
|
|
67
71
|
addSpanEvent("hebo.request.parsed");
|
|
@@ -123,13 +127,12 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
123
127
|
const result = streamText({
|
|
124
128
|
model: languageModelWithMiddleware,
|
|
125
129
|
headers: prepareForwardHeaders(ctx.request),
|
|
126
|
-
|
|
127
|
-
// abortSignal: ctx.request.signal,
|
|
130
|
+
abortSignal: ctx.request.signal,
|
|
128
131
|
timeout: {
|
|
129
132
|
totalMs: 5 * 60 * 1000,
|
|
130
133
|
},
|
|
131
134
|
onAbort: () => {
|
|
132
|
-
throw new DOMException("
|
|
135
|
+
throw new DOMException("The operation was aborted.", "AbortError");
|
|
133
136
|
},
|
|
134
137
|
onError: () => {},
|
|
135
138
|
onFinish: (res) => {
|
|
@@ -143,7 +146,8 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
143
146
|
const genAiResponseAttrs = getChatResponseAttributes(streamResult, genAiSignalLevel);
|
|
144
147
|
setSpanAttributes(genAiResponseAttrs);
|
|
145
148
|
recordTokenUsage(genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
146
|
-
|
|
149
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
150
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
147
151
|
},
|
|
148
152
|
experimental_include: {
|
|
149
153
|
requestBody: false,
|
|
@@ -166,7 +170,6 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
166
170
|
const result = await generateText({
|
|
167
171
|
model: languageModelWithMiddleware,
|
|
168
172
|
headers: prepareForwardHeaders(ctx.request),
|
|
169
|
-
// FUTURE: currently can't tell whether upstream or downstream abort
|
|
170
173
|
abortSignal: ctx.request.signal,
|
|
171
174
|
timeout: 5 * 60 * 1000,
|
|
172
175
|
experimental_include: {
|
|
@@ -191,7 +194,8 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
191
194
|
addSpanEvent("hebo.hooks.after.completed");
|
|
192
195
|
}
|
|
193
196
|
|
|
194
|
-
|
|
197
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
198
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
195
199
|
return ctx.result;
|
|
196
200
|
};
|
|
197
201
|
|
|
@@ -54,6 +54,11 @@ const toMessageParts = (message: ChatCompletionsMessage): Record<string, unknown
|
|
|
54
54
|
return parts;
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
// FUTURE: remove once Langfuse supports gen_ai.system_instructions
|
|
58
|
+
if (message.role === "system") {
|
|
59
|
+
return [toTextPart(message.content)];
|
|
60
|
+
}
|
|
61
|
+
|
|
57
62
|
return [];
|
|
58
63
|
};
|
|
59
64
|
|
|
@@ -103,11 +108,13 @@ export const getChatRequestAttributes = (
|
|
|
103
108
|
|
|
104
109
|
if (signalLevel === "full") {
|
|
105
110
|
Object.assign(attrs, {
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
111
|
+
// FUTURE: move system instructions from messages to here
|
|
112
|
+
// blocker: https://github.com/langfuse/langfuse/issues/11607
|
|
113
|
+
// "gen_ai.system_instructions": inputs.messages
|
|
114
|
+
// .filter((m) => m.role === "system")
|
|
115
|
+
// .map((m) => JSON.stringify(toTextPart(m.content))),
|
|
109
116
|
"gen_ai.input.messages": inputs.messages
|
|
110
|
-
|
|
117
|
+
//.filter((m) => m.role !== "system")
|
|
111
118
|
.map((m) => JSON.stringify({ role: m.role, parts: toMessageParts(m) })),
|
|
112
119
|
"gen_ai.tool.definitions": JSON.stringify(inputs.tools),
|
|
113
120
|
});
|
|
@@ -16,7 +16,11 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
16
16
|
import { logger } from "../../logger";
|
|
17
17
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
18
18
|
import { resolveProvider } from "../../providers/registry";
|
|
19
|
-
import {
|
|
19
|
+
import {
|
|
20
|
+
recordRequestDuration,
|
|
21
|
+
recordTimePerOutputToken,
|
|
22
|
+
recordTokenUsage,
|
|
23
|
+
} from "../../telemetry/gen-ai";
|
|
20
24
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
21
25
|
import { resolveRequestId } from "../../utils/headers";
|
|
22
26
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
@@ -53,8 +57,8 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
53
57
|
|
|
54
58
|
const parsed = EmbeddingsBodySchema.safeParse(ctx.body);
|
|
55
59
|
if (!parsed.success) {
|
|
56
|
-
// FUTURE:
|
|
57
|
-
throw new GatewayError(z.prettifyError(parsed.error), 400);
|
|
60
|
+
// FUTURE: consider adding body shape to metadata
|
|
61
|
+
throw new GatewayError(z.prettifyError(parsed.error), 400, undefined, parsed.error);
|
|
58
62
|
}
|
|
59
63
|
ctx.body = parsed.data;
|
|
60
64
|
addSpanEvent("hebo.request.parsed");
|
|
@@ -127,7 +131,8 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
127
131
|
addSpanEvent("hebo.hooks.after.completed");
|
|
128
132
|
}
|
|
129
133
|
|
|
130
|
-
|
|
134
|
+
recordTimePerOutputToken(start, genAiResponseAttrs, genAiGeneralAttrs, genAiSignalLevel);
|
|
135
|
+
recordRequestDuration(start, genAiGeneralAttrs, genAiSignalLevel);
|
|
131
136
|
return ctx.result;
|
|
132
137
|
};
|
|
133
138
|
|
package/src/errors/gateway.ts
CHANGED
|
@@ -4,11 +4,12 @@ export class GatewayError extends Error {
|
|
|
4
4
|
readonly status: number;
|
|
5
5
|
readonly code: string;
|
|
6
6
|
|
|
7
|
-
constructor(error:
|
|
8
|
-
const
|
|
9
|
-
super(
|
|
7
|
+
constructor(error: unknown, status: number, code?: string, cause?: unknown) {
|
|
8
|
+
const isError = error instanceof Error;
|
|
9
|
+
super(isError ? error.message : String(error));
|
|
10
|
+
this.cause = cause ?? (isError ? error : undefined);
|
|
11
|
+
|
|
10
12
|
this.status = status;
|
|
11
13
|
this.code = code ?? STATUS_CODE(status);
|
|
12
|
-
this.cause = cause ?? (typeof error === "string" ? undefined : error);
|
|
13
14
|
}
|
|
14
15
|
}
|
package/src/errors/openai.ts
CHANGED
|
@@ -25,7 +25,8 @@ export class OpenAIError {
|
|
|
25
25
|
const mapType = (status: number) => (status < 500 ? "invalid_request_error" : "server_error");
|
|
26
26
|
|
|
27
27
|
const maybeMaskMessage = (meta: ReturnType<typeof getErrorMeta>, requestId?: string) => {
|
|
28
|
-
|
|
28
|
+
// FUTURE: consider masking all upstream errors, also 4xx
|
|
29
|
+
if (!(isProduction() && meta.status >= 500)) {
|
|
29
30
|
return meta.message;
|
|
30
31
|
}
|
|
31
32
|
// FUTURE: always attach requestId to errors (masked and unmasked)
|
package/src/errors/utils.ts
CHANGED
package/src/lifecycle.ts
CHANGED
|
@@ -6,11 +6,13 @@ import type {
|
|
|
6
6
|
} from "./types";
|
|
7
7
|
|
|
8
8
|
import { parseConfig } from "./config";
|
|
9
|
+
import { GatewayError } from "./errors/gateway";
|
|
9
10
|
import { toOpenAIErrorResponse } from "./errors/openai";
|
|
10
11
|
import { logger } from "./logger";
|
|
11
12
|
import { getBaggageAttributes } from "./telemetry/baggage";
|
|
12
13
|
import { initFetch } from "./telemetry/fetch";
|
|
13
14
|
import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
|
|
15
|
+
import { recordV8jsMemory } from "./telemetry/memory";
|
|
14
16
|
import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
|
|
15
17
|
import { wrapStream } from "./telemetry/stream";
|
|
16
18
|
import { resolveRequestId } from "./utils/headers";
|
|
@@ -23,7 +25,7 @@ export const winterCgHandler = (
|
|
|
23
25
|
) => {
|
|
24
26
|
const parsedConfig = parseConfig(config);
|
|
25
27
|
|
|
26
|
-
if (parsedConfig.telemetry
|
|
28
|
+
if (parsedConfig.telemetry?.enabled) {
|
|
27
29
|
setSpanTracer(parsedConfig.telemetry?.tracer);
|
|
28
30
|
setSpanEventsEnabled(parsedConfig.telemetry?.signals?.hebo);
|
|
29
31
|
initFetch(parsedConfig.telemetry?.signals?.hebo);
|
|
@@ -58,18 +60,22 @@ export const winterCgHandler = (
|
|
|
58
60
|
);
|
|
59
61
|
}
|
|
60
62
|
|
|
61
|
-
|
|
63
|
+
let realStatus = status;
|
|
64
|
+
if (ctx.request.signal.aborted) realStatus = 499;
|
|
65
|
+
else if (status === 200 && ctx.response?.status) realStatus = ctx.response.status;
|
|
66
|
+
|
|
62
67
|
if (realStatus !== 200) {
|
|
63
|
-
// FUTURE: in-stream errors are redacted in prod
|
|
64
68
|
(realStatus >= 500 ? logger.error : logger.warn)({
|
|
65
69
|
requestId: resolveRequestId(ctx.request),
|
|
66
|
-
err: reason,
|
|
70
|
+
err: reason ?? ctx.request.signal.reason,
|
|
67
71
|
});
|
|
68
72
|
|
|
69
73
|
if (realStatus >= 500) span.recordError(reason);
|
|
70
74
|
}
|
|
71
75
|
span.setAttributes({ "http.response.status_code_effective": realStatus });
|
|
72
76
|
|
|
77
|
+
recordV8jsMemory(parsedConfig.telemetry?.signals?.hebo);
|
|
78
|
+
|
|
73
79
|
span.finish();
|
|
74
80
|
};
|
|
75
81
|
|
|
@@ -89,7 +95,7 @@ export const winterCgHandler = (
|
|
|
89
95
|
ctx.result = (await span.runWithContext(() => run(ctx))) as typeof ctx.result;
|
|
90
96
|
|
|
91
97
|
if (ctx.result instanceof ReadableStream) {
|
|
92
|
-
ctx.result = wrapStream(ctx.result, { onDone: finalize }
|
|
98
|
+
ctx.result = wrapStream(ctx.result, { onDone: finalize });
|
|
93
99
|
}
|
|
94
100
|
|
|
95
101
|
ctx.response = toResponse(ctx.result!, prepareResponseInit(ctx.request));
|
|
@@ -108,7 +114,12 @@ export const winterCgHandler = (
|
|
|
108
114
|
finalize(ctx.response.status);
|
|
109
115
|
}
|
|
110
116
|
} catch (error) {
|
|
111
|
-
ctx.response = toOpenAIErrorResponse(
|
|
117
|
+
ctx.response = toOpenAIErrorResponse(
|
|
118
|
+
ctx.request.signal.aborted
|
|
119
|
+
? new GatewayError(error ?? ctx.request.signal.reason, 499)
|
|
120
|
+
: error,
|
|
121
|
+
prepareResponseInit(ctx.request),
|
|
122
|
+
);
|
|
112
123
|
finalize(ctx.response.status, error);
|
|
113
124
|
}
|
|
114
125
|
|
|
@@ -65,6 +65,18 @@ export const claudeSonnet45 = presetFor<CanonicalModelId, CatalogModel>()(
|
|
|
65
65
|
} satisfies DeepPartial<CatalogModel>,
|
|
66
66
|
);
|
|
67
67
|
|
|
68
|
+
export const claudeSonnet46 = presetFor<CanonicalModelId, CatalogModel>()(
|
|
69
|
+
"anthropic/claude-sonnet-4.6" as const,
|
|
70
|
+
{
|
|
71
|
+
...CLAUDE_BASE,
|
|
72
|
+
...CLAUDE_PDF_MODALITIES,
|
|
73
|
+
name: "Claude Sonnet 4.6",
|
|
74
|
+
capabilities: [...CLAUDE_BASE.capabilities, "reasoning"],
|
|
75
|
+
created: "2026-02-17",
|
|
76
|
+
knowledge: "2025-08",
|
|
77
|
+
} satisfies DeepPartial<CatalogModel>,
|
|
78
|
+
);
|
|
79
|
+
|
|
68
80
|
export const claudeSonnet4 = presetFor<CanonicalModelId, CatalogModel>()(
|
|
69
81
|
"anthropic/claude-sonnet-4" as const,
|
|
70
82
|
{
|
|
@@ -149,7 +161,7 @@ export const claudeOpus4 = presetFor<CanonicalModelId, CatalogModel>()(
|
|
|
149
161
|
);
|
|
150
162
|
|
|
151
163
|
const claudeAtomic = {
|
|
152
|
-
"v4.6": [claudeOpus46],
|
|
164
|
+
"v4.6": [claudeSonnet46, claudeOpus46],
|
|
153
165
|
"v4.5": [claudeHaiku45, claudeSonnet45, claudeOpus45],
|
|
154
166
|
"v4.1": [claudeOpus41],
|
|
155
167
|
v4: [claudeSonnet4, claudeOpus4],
|
|
@@ -157,7 +169,7 @@ const claudeAtomic = {
|
|
|
157
169
|
"v3.5": [claudeSonnet35, claudeHaiku35],
|
|
158
170
|
v3: [claudeHaiku3],
|
|
159
171
|
haiku: [claudeHaiku45, claudeHaiku35, claudeHaiku3],
|
|
160
|
-
sonnet: [claudeSonnet45, claudeSonnet4, claudeSonnet37, claudeSonnet35],
|
|
172
|
+
sonnet: [claudeSonnet46, claudeSonnet45, claudeSonnet4, claudeSonnet37, claudeSonnet35],
|
|
161
173
|
opus: [claudeOpus46, claudeOpus45, claudeOpus41, claudeOpus4],
|
|
162
174
|
} as const;
|
|
163
175
|
|
package/src/models/types.ts
CHANGED
|
@@ -13,6 +13,7 @@ import { withCanonicalIds } from "../registry";
|
|
|
13
13
|
const MAPPING = {
|
|
14
14
|
// Require Inference Profiles and can't be resolved from standard name mapping
|
|
15
15
|
"anthropic/claude-haiku-4.5": "{ip}anthropic.claude-haiku-4-5-20251001-v1:0",
|
|
16
|
+
"anthropic/claude-sonnet-4.6": "{ip}anthropic.claude-sonnet-4-6",
|
|
16
17
|
"anthropic/claude-sonnet-4.5": "{ip}anthropic.claude-sonnet-4-5-20250929-v1:0",
|
|
17
18
|
"anthropic/claude-opus-4.6": "{ip}anthropic.claude-opus-4-6-v1",
|
|
18
19
|
"anthropic/claude-opus-4.5": "{ip}anthropic.claude-opus-4-5-20251101-v1:0",
|
package/src/telemetry/gen-ai.ts
CHANGED
|
@@ -2,7 +2,7 @@ import { metrics, type Attributes } from "@opentelemetry/api";
|
|
|
2
2
|
|
|
3
3
|
import type { TelemetrySignalLevel } from "../types";
|
|
4
4
|
|
|
5
|
-
const meter = metrics.getMeter("@hebo
|
|
5
|
+
const meter = metrics.getMeter("@hebo/gateway");
|
|
6
6
|
|
|
7
7
|
const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.duration", {
|
|
8
8
|
description: "End-to-end gateway request duration",
|
|
@@ -14,6 +14,16 @@ const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.du
|
|
|
14
14
|
},
|
|
15
15
|
});
|
|
16
16
|
|
|
17
|
+
const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_per_output_token", {
|
|
18
|
+
description: "End-to-end gateway request duration per output token",
|
|
19
|
+
unit: "s",
|
|
20
|
+
advice: {
|
|
21
|
+
explicitBucketBoundaries: [
|
|
22
|
+
0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5,
|
|
23
|
+
],
|
|
24
|
+
},
|
|
25
|
+
});
|
|
26
|
+
|
|
17
27
|
const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
|
|
18
28
|
description: "Token usage reported by upstream model responses",
|
|
19
29
|
unit: "{token}",
|
|
@@ -27,13 +37,31 @@ const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
|
|
|
27
37
|
|
|
28
38
|
// FUTURE: record unsuccessful calls
|
|
29
39
|
export const recordRequestDuration = (
|
|
30
|
-
|
|
40
|
+
start: number,
|
|
31
41
|
attrs: Attributes,
|
|
32
42
|
signalLevel?: TelemetrySignalLevel,
|
|
33
43
|
) => {
|
|
34
44
|
if (!signalLevel || signalLevel === "off") return;
|
|
35
45
|
|
|
36
|
-
requestDurationHistogram.record(
|
|
46
|
+
requestDurationHistogram.record((performance.now() - start) / 1000, attrs);
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// FUTURE: record unsuccessful calls
|
|
50
|
+
export const recordTimePerOutputToken = (
|
|
51
|
+
start: number,
|
|
52
|
+
tokenAttrs: Attributes,
|
|
53
|
+
metricAttrs: Attributes,
|
|
54
|
+
signalLevel?: TelemetrySignalLevel,
|
|
55
|
+
) => {
|
|
56
|
+
if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full")) return;
|
|
57
|
+
|
|
58
|
+
const outputTokens = tokenAttrs["gen_ai.usage.output_tokens"];
|
|
59
|
+
if (typeof outputTokens !== "number" || outputTokens <= 0) return;
|
|
60
|
+
|
|
61
|
+
timePerOutputTokenHistogram.record(
|
|
62
|
+
(performance.now() - start) / 1000 / outputTokens,
|
|
63
|
+
metricAttrs,
|
|
64
|
+
);
|
|
37
65
|
};
|
|
38
66
|
|
|
39
67
|
// FUTURE: record unsuccessful calls
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { metrics } from "@opentelemetry/api";
|
|
2
|
+
|
|
3
|
+
import type { TelemetrySignalLevel } from "../types";
|
|
4
|
+
|
|
5
|
+
const meter = metrics.getMeter("@hebo/gateway");
|
|
6
|
+
const defaultHeapSpaceAttrs = { "v8js.heap.space.name": "total" } as const;
|
|
7
|
+
|
|
8
|
+
const heapUsedCounter = meter.createUpDownCounter("v8js.memory.heap.used", {
|
|
9
|
+
description: "Used bytes in the V8 heap",
|
|
10
|
+
unit: "By",
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
const heapSpacePhysicalSizeCounter = meter.createUpDownCounter(
|
|
14
|
+
"v8js.memory.heap.space.physical_size",
|
|
15
|
+
{
|
|
16
|
+
description: "Physical bytes allocated for the V8 heap space",
|
|
17
|
+
unit: "By",
|
|
18
|
+
},
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
const isEnabled = (level?: TelemetrySignalLevel) => level === "recommended" || level === "full";
|
|
22
|
+
|
|
23
|
+
export const recordV8jsMemory = (level?: TelemetrySignalLevel) => {
|
|
24
|
+
if (!isEnabled(level)) return;
|
|
25
|
+
|
|
26
|
+
let usage;
|
|
27
|
+
try {
|
|
28
|
+
usage = globalThis.process?.memoryUsage?.();
|
|
29
|
+
} catch {
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
if (!usage) return;
|
|
33
|
+
|
|
34
|
+
heapUsedCounter.add(usage.heapUsed, defaultHeapSpaceAttrs);
|
|
35
|
+
heapSpacePhysicalSizeCounter.add(usage.rss, defaultHeapSpaceAttrs);
|
|
36
|
+
};
|
package/src/telemetry/span.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { INVALID_SPAN_CONTEXT, SpanKind, SpanStatusCode, context, trace } from "
|
|
|
4
4
|
|
|
5
5
|
import type { TelemetrySignalLevel } from "../types";
|
|
6
6
|
|
|
7
|
-
const DEFAULT_TRACER_NAME = "@hebo
|
|
7
|
+
const DEFAULT_TRACER_NAME = "@hebo/gateway";
|
|
8
8
|
|
|
9
9
|
let spanTracer: Tracer | undefined;
|
|
10
10
|
let spanEventsEnabled = false;
|