@hebo-ai/gateway 0.4.0-beta.2 → 0.4.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -5
- package/dist/config.js +21 -7
- package/dist/endpoints/chat-completions/converters.js +2 -2
- package/dist/endpoints/chat-completions/handler.js +31 -25
- package/dist/endpoints/chat-completions/otel.d.ts +6 -0
- package/dist/endpoints/chat-completions/otel.js +121 -0
- package/dist/endpoints/embeddings/handler.js +19 -12
- package/dist/endpoints/embeddings/otel.d.ts +6 -0
- package/dist/endpoints/embeddings/otel.js +35 -0
- package/dist/endpoints/models/handler.js +3 -4
- package/dist/errors/gateway.js +1 -2
- package/dist/errors/openai.js +10 -12
- package/dist/errors/utils.d.ts +1 -3
- package/dist/errors/utils.js +5 -6
- package/dist/gateway.js +1 -1
- package/dist/lifecycle.js +62 -28
- package/dist/middleware/matcher.js +1 -1
- package/dist/models/amazon/presets.d.ts +37 -37
- package/dist/models/amazon/presets.js +1 -1
- package/dist/models/anthropic/presets.d.ts +56 -56
- package/dist/models/cohere/presets.d.ts +54 -54
- package/dist/models/cohere/presets.js +2 -2
- package/dist/models/google/presets.d.ts +31 -31
- package/dist/models/google/presets.js +1 -1
- package/dist/models/meta/presets.d.ts +42 -42
- package/dist/models/openai/presets.d.ts +96 -96
- package/dist/models/openai/presets.js +1 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/voyage/presets.d.ts +92 -92
- package/dist/models/voyage/presets.js +1 -1
- package/dist/providers/registry.js +2 -2
- package/dist/telemetry/baggage.d.ts +1 -0
- package/dist/telemetry/baggage.js +24 -0
- package/dist/telemetry/fetch.d.ts +2 -1
- package/dist/telemetry/fetch.js +13 -3
- package/dist/telemetry/gen-ai.d.ts +4 -0
- package/dist/telemetry/gen-ai.js +42 -0
- package/dist/telemetry/http.d.ts +3 -0
- package/dist/telemetry/http.js +57 -0
- package/dist/telemetry/span.d.ts +6 -3
- package/dist/telemetry/span.js +23 -35
- package/dist/telemetry/stream.d.ts +3 -7
- package/dist/telemetry/stream.js +18 -18
- package/dist/types.d.ts +14 -12
- package/dist/utils/headers.d.ts +1 -1
- package/dist/utils/headers.js +7 -9
- package/dist/utils/request.d.ts +0 -4
- package/dist/utils/request.js +0 -9
- package/dist/utils/response.js +1 -1
- package/package.json +4 -2
- package/src/config.ts +28 -7
- package/src/endpoints/chat-completions/converters.ts +2 -2
- package/src/endpoints/chat-completions/handler.ts +39 -26
- package/src/endpoints/chat-completions/otel.ts +154 -0
- package/src/endpoints/embeddings/handler.test.ts +2 -2
- package/src/endpoints/embeddings/handler.ts +24 -12
- package/src/endpoints/embeddings/otel.ts +56 -0
- package/src/endpoints/models/handler.ts +3 -5
- package/src/errors/gateway.ts +1 -2
- package/src/errors/openai.ts +24 -17
- package/src/errors/utils.ts +5 -7
- package/src/gateway.ts +1 -1
- package/src/lifecycle.ts +73 -31
- package/src/middleware/matcher.ts +1 -1
- package/src/models/amazon/presets.ts +1 -1
- package/src/models/cohere/presets.ts +2 -2
- package/src/models/google/presets.ts +1 -1
- package/src/models/openai/presets.ts +1 -1
- package/src/models/types.ts +1 -1
- package/src/models/voyage/presets.ts +1 -1
- package/src/providers/registry.ts +2 -2
- package/src/telemetry/baggage.ts +27 -0
- package/src/telemetry/fetch.ts +15 -3
- package/src/telemetry/gen-ai.ts +60 -0
- package/src/telemetry/http.ts +65 -0
- package/src/telemetry/span.ts +28 -40
- package/src/telemetry/stream.ts +26 -30
- package/src/types.ts +15 -12
- package/src/utils/headers.ts +8 -19
- package/src/utils/request.ts +0 -11
- package/src/utils/response.ts +1 -1
- package/dist/telemetry/otel.d.ts +0 -2
- package/dist/telemetry/otel.js +0 -46
- package/dist/telemetry/utils.d.ts +0 -4
- package/dist/telemetry/utils.js +0 -223
- package/src/telemetry/otel.ts +0 -87
- package/src/telemetry/utils.ts +0 -273
|
@@ -7,9 +7,9 @@ export const resolveProvider = (args) => {
|
|
|
7
7
|
if (!catalogModel) {
|
|
8
8
|
throw new GatewayError(`Model '${modelId}' not found in catalog`, 422, "MODEL_NOT_FOUND");
|
|
9
9
|
}
|
|
10
|
-
const modality = operation === "embeddings" ? "
|
|
10
|
+
const modality = operation === "embeddings" ? "embedding" : "text";
|
|
11
11
|
if (catalogModel.modalities && !catalogModel.modalities.output.includes(modality)) {
|
|
12
|
-
throw new GatewayError(`Model '${modelId}' does not support '${
|
|
12
|
+
throw new GatewayError(`Model '${modelId}' does not support '${modality}' output`, 422, "MODEL_UNSUPPORTED_OPERATION");
|
|
13
13
|
}
|
|
14
14
|
// FUTURE: implement fallback logic [e.g. runtime config invalid]
|
|
15
15
|
const resolvedProviderId = catalogModel.providers[0];
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const getBaggageAttributes: (request?: Request) => Record<string, string>;
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
const HEBO_BAGGAGE_PREFIX = "hebo.";
|
|
2
|
+
export const getBaggageAttributes = (request) => {
|
|
3
|
+
const h = request?.headers.get("baggage");
|
|
4
|
+
if (!h)
|
|
5
|
+
return {};
|
|
6
|
+
const attrs = {};
|
|
7
|
+
for (const part of h.split(",")) {
|
|
8
|
+
const [k, v] = part.trim().split("=", 2);
|
|
9
|
+
if (!k || !v)
|
|
10
|
+
continue;
|
|
11
|
+
const [rawValue] = v.split(";", 1);
|
|
12
|
+
if (!rawValue)
|
|
13
|
+
continue;
|
|
14
|
+
let value = rawValue;
|
|
15
|
+
try {
|
|
16
|
+
value = decodeURIComponent(rawValue);
|
|
17
|
+
}
|
|
18
|
+
catch { }
|
|
19
|
+
if (k.startsWith(HEBO_BAGGAGE_PREFIX)) {
|
|
20
|
+
attrs[k.slice(HEBO_BAGGAGE_PREFIX.length)] = value;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return attrs;
|
|
24
|
+
};
|
|
@@ -1 +1,2 @@
|
|
|
1
|
-
|
|
1
|
+
import type { TelemetrySignalLevel } from "../types";
|
|
2
|
+
export declare const initFetch: (level?: TelemetrySignalLevel) => void;
|
package/dist/telemetry/fetch.js
CHANGED
|
@@ -2,13 +2,23 @@ import { SpanKind } from "@opentelemetry/api";
|
|
|
2
2
|
import { withSpan } from "./span";
|
|
3
3
|
const ORIGINAL_FETCH_KEY = Symbol.for("@hebo/fetch/original-fetch");
|
|
4
4
|
const g = globalThis;
|
|
5
|
-
|
|
5
|
+
let fetchTracingEnabled = false;
|
|
6
|
+
const shouldTraceFetch = (init) => typeof init?.headers?.["user-agent"] === "string" &&
|
|
7
|
+
init.headers["user-agent"].indexOf("ai-sdk/provider-utils") !== -1;
|
|
8
|
+
const otelFetch = (input, init) => {
|
|
6
9
|
const original = g[ORIGINAL_FETCH_KEY];
|
|
10
|
+
if (!fetchTracingEnabled)
|
|
11
|
+
return original(input, init);
|
|
12
|
+
if (!shouldTraceFetch(init))
|
|
13
|
+
return original(input, init);
|
|
7
14
|
return withSpan("fetch", () => original(input, init), { kind: SpanKind.CLIENT });
|
|
8
15
|
};
|
|
9
|
-
export const initFetch = () => {
|
|
16
|
+
export const initFetch = (level) => {
|
|
17
|
+
fetchTracingEnabled = level === "full";
|
|
18
|
+
if (!fetchTracingEnabled)
|
|
19
|
+
return;
|
|
10
20
|
if (g[ORIGINAL_FETCH_KEY])
|
|
11
21
|
return;
|
|
12
22
|
g[ORIGINAL_FETCH_KEY] = globalThis.fetch.bind(globalThis);
|
|
13
|
-
globalThis.fetch =
|
|
23
|
+
globalThis.fetch = otelFetch;
|
|
14
24
|
};
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import { type Attributes } from "@opentelemetry/api";
|
|
2
|
+
import type { TelemetrySignalLevel } from "../types";
|
|
3
|
+
export declare const recordRequestDuration: (duration: number, attrs: Attributes, signalLevel?: TelemetrySignalLevel) => void;
|
|
4
|
+
export declare const recordTokenUsage: (tokenAttrs: Attributes, metricAttrs: Attributes, signalLevel?: TelemetrySignalLevel) => void;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { metrics } from "@opentelemetry/api";
|
|
2
|
+
const meter = metrics.getMeter("@hebo-ai/gateway");
|
|
3
|
+
const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.duration", {
|
|
4
|
+
description: "End-to-end gateway request duration",
|
|
5
|
+
unit: "s",
|
|
6
|
+
advice: {
|
|
7
|
+
explicitBucketBoundaries: [
|
|
8
|
+
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 240,
|
|
9
|
+
],
|
|
10
|
+
},
|
|
11
|
+
});
|
|
12
|
+
const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
|
|
13
|
+
description: "Token usage reported by upstream model responses",
|
|
14
|
+
unit: "{token}",
|
|
15
|
+
advice: {
|
|
16
|
+
explicitBucketBoundaries: [
|
|
17
|
+
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
|
|
18
|
+
524288, 1048576,
|
|
19
|
+
],
|
|
20
|
+
},
|
|
21
|
+
});
|
|
22
|
+
// FUTURE: record unsuccessful calls
|
|
23
|
+
export const recordRequestDuration = (duration, attrs, signalLevel) => {
|
|
24
|
+
if (!signalLevel || signalLevel === "off")
|
|
25
|
+
return;
|
|
26
|
+
requestDurationHistogram.record(duration / 1000, attrs);
|
|
27
|
+
};
|
|
28
|
+
// FUTURE: record unsuccessful calls
|
|
29
|
+
export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
|
|
30
|
+
if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full"))
|
|
31
|
+
return;
|
|
32
|
+
const record = (value, tokenType) => {
|
|
33
|
+
if (typeof value !== "number")
|
|
34
|
+
return;
|
|
35
|
+
tokenUsageHistogram.record(value, Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }));
|
|
36
|
+
};
|
|
37
|
+
record(tokenAttrs["gen_ai.usage.input_tokens"], "input");
|
|
38
|
+
record(tokenAttrs["gen_ai.usage.output_tokens"], "output");
|
|
39
|
+
record(tokenAttrs["gen_ai.usage.total_tokens"], "total");
|
|
40
|
+
record(tokenAttrs["gen_ai.usage.cached_tokens"], "cached");
|
|
41
|
+
record(tokenAttrs["gen_ai.usage.reasoning_tokens"], "reasoning");
|
|
42
|
+
};
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import {} from "../types";
|
|
2
|
+
import { resolveRequestId } from "../utils/headers";
|
|
3
|
+
const headerArr = (h, k) => (h.has(k) ? [h.get(k)] : undefined);
|
|
4
|
+
export const getRequestAttributes = (request, signalLevel) => {
|
|
5
|
+
if (!signalLevel || signalLevel === "off")
|
|
6
|
+
return {};
|
|
7
|
+
let url;
|
|
8
|
+
try {
|
|
9
|
+
// FUTURE: reuse URL from lifecycle
|
|
10
|
+
url = new URL(request.url);
|
|
11
|
+
}
|
|
12
|
+
catch { }
|
|
13
|
+
const attrs = {
|
|
14
|
+
"http.request.method": request.method,
|
|
15
|
+
"url.full": request.url,
|
|
16
|
+
"url.path": url?.pathname,
|
|
17
|
+
"url.scheme": url?.protocol.replace(":", ""),
|
|
18
|
+
"server.address": url?.hostname,
|
|
19
|
+
"server.port": url
|
|
20
|
+
? url.port
|
|
21
|
+
? Number(url.port)
|
|
22
|
+
: url.protocol === "https:"
|
|
23
|
+
? 443
|
|
24
|
+
: 80
|
|
25
|
+
: undefined,
|
|
26
|
+
};
|
|
27
|
+
if (signalLevel !== "required") {
|
|
28
|
+
Object.assign(attrs, {
|
|
29
|
+
// FUTURE: does ElysiaJS and other frameworks attach request id?
|
|
30
|
+
"http.request.id": resolveRequestId(request),
|
|
31
|
+
"user_agent.original": request.headers.get("user-agent") ?? undefined,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
if (signalLevel === "full") {
|
|
35
|
+
Object.assign(attrs, {
|
|
36
|
+
// FUTURE: "url.query"
|
|
37
|
+
"http.request.header.content-type": headerArr(request.headers, "content-type"),
|
|
38
|
+
"http.request.header.content-length": headerArr(request.headers, "content-length"),
|
|
39
|
+
// FUTURE: "client.address"
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
return attrs;
|
|
43
|
+
};
|
|
44
|
+
export const getResponseAttributes = (response, signalLevel) => {
|
|
45
|
+
if (!signalLevel || signalLevel === "off")
|
|
46
|
+
return {};
|
|
47
|
+
const attrs = {
|
|
48
|
+
"http.response.status_code": response.status,
|
|
49
|
+
};
|
|
50
|
+
if (signalLevel === "full") {
|
|
51
|
+
Object.assign(attrs, {
|
|
52
|
+
"http.response.header.content-type": [headerArr(response.headers, "content-type")],
|
|
53
|
+
"http.response.header.content-length": [headerArr(response.headers, "content-length")],
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
return attrs;
|
|
57
|
+
};
|
package/dist/telemetry/span.d.ts
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
|
-
import type { Attributes,
|
|
2
|
-
|
|
1
|
+
import type { Attributes, SpanOptions, Tracer } from "@opentelemetry/api";
|
|
2
|
+
import type { TelemetrySignalLevel } from "../types";
|
|
3
|
+
export declare const setSpanTracer: (tracer?: Tracer) => void;
|
|
4
|
+
export declare const setSpanEventsEnabled: (level?: TelemetrySignalLevel) => void;
|
|
5
|
+
export declare const startSpan: (name: string, options?: SpanOptions) => import("@opentelemetry/api").Span & {
|
|
3
6
|
runWithContext: <T>(fn: () => Promise<T> | T) => T | Promise<T>;
|
|
4
7
|
recordError: (_error: unknown) => void;
|
|
5
8
|
finish: () => void;
|
|
@@ -7,4 +10,4 @@ export declare const startSpan: (name: string, options?: SpanOptions, customTrac
|
|
|
7
10
|
};
|
|
8
11
|
export declare const withSpan: <T>(name: string, run: () => Promise<T> | T, options?: SpanOptions) => Promise<T>;
|
|
9
12
|
export declare const addSpanEvent: (name: string, attributes?: Attributes) => void;
|
|
10
|
-
export declare const
|
|
13
|
+
export declare const setSpanAttributes: (attributes?: Attributes) => void;
|
package/dist/telemetry/span.js
CHANGED
|
@@ -1,51 +1,41 @@
|
|
|
1
1
|
import { INVALID_SPAN_CONTEXT, SpanKind, SpanStatusCode, context, trace } from "@opentelemetry/api";
|
|
2
2
|
const DEFAULT_TRACER_NAME = "@hebo-ai/gateway";
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
const maybeSetDynamicAttributes = (span, getAttributes) => {
|
|
6
|
-
const attrs = getAttributes();
|
|
7
|
-
if (Object.keys(attrs).length === 0)
|
|
8
|
-
return;
|
|
9
|
-
span.setAttributes(attrs);
|
|
10
|
-
};
|
|
11
|
-
const getMemoryAttributes = () => {
|
|
12
|
-
const memory = mem();
|
|
13
|
-
if (!memory)
|
|
14
|
-
return {};
|
|
15
|
-
return {
|
|
16
|
-
"process.memory.usage": memory.rss,
|
|
17
|
-
"process.memory.heap.used": memory.heapUsed,
|
|
18
|
-
"process.memory.heap.total": memory.heapTotal,
|
|
19
|
-
};
|
|
20
|
-
};
|
|
3
|
+
let spanTracer;
|
|
4
|
+
let spanEventsEnabled = false;
|
|
21
5
|
const NOOP_SPAN = {
|
|
22
6
|
runWithContext: (fn) => fn(),
|
|
23
7
|
recordError: (_error) => { },
|
|
24
8
|
finish: () => { },
|
|
25
9
|
isExisting: true,
|
|
26
10
|
};
|
|
27
|
-
export const
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
11
|
+
export const setSpanTracer = (tracer) => {
|
|
12
|
+
spanTracer = tracer ?? trace.getTracer(DEFAULT_TRACER_NAME);
|
|
13
|
+
};
|
|
14
|
+
export const setSpanEventsEnabled = (level) => {
|
|
15
|
+
spanEventsEnabled = level === "recommended" || level === "full";
|
|
16
|
+
};
|
|
17
|
+
export const startSpan = (name, options) => {
|
|
18
|
+
if (!spanTracer) {
|
|
33
19
|
return Object.assign(trace.wrapSpanContext(INVALID_SPAN_CONTEXT), NOOP_SPAN);
|
|
34
20
|
}
|
|
35
|
-
|
|
21
|
+
const parentContext = context.active();
|
|
22
|
+
const activeSpan = trace.getActiveSpan();
|
|
23
|
+
const span = spanTracer.startSpan(name, { kind: activeSpan ? SpanKind.INTERNAL : SpanKind.SERVER, ...options }, parentContext);
|
|
36
24
|
const runWithContext = (fn) => context.with(trace.setSpan(parentContext, span), fn);
|
|
37
25
|
const recordError = (error) => {
|
|
38
|
-
const err =
|
|
26
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
39
27
|
span.recordException(err);
|
|
40
28
|
span.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
|
|
41
29
|
};
|
|
42
30
|
const finish = () => {
|
|
43
|
-
maybeSetDynamicAttributes(span, getMemoryAttributes);
|
|
44
31
|
span.end();
|
|
45
32
|
};
|
|
46
33
|
return Object.assign(span, { runWithContext, recordError, finish, isExisting: !!activeSpan });
|
|
47
34
|
};
|
|
48
35
|
export const withSpan = async (name, run, options) => {
|
|
36
|
+
if (!spanTracer) {
|
|
37
|
+
return await run();
|
|
38
|
+
}
|
|
49
39
|
const started = startSpan(name, options);
|
|
50
40
|
try {
|
|
51
41
|
return await started.runWithContext(run);
|
|
@@ -59,14 +49,12 @@ export const withSpan = async (name, run, options) => {
|
|
|
59
49
|
}
|
|
60
50
|
};
|
|
61
51
|
export const addSpanEvent = (name, attributes) => {
|
|
62
|
-
|
|
63
|
-
|
|
52
|
+
if (!spanEventsEnabled)
|
|
53
|
+
return;
|
|
54
|
+
trace.getActiveSpan()?.addEvent(name, attributes);
|
|
64
55
|
};
|
|
65
|
-
export const
|
|
66
|
-
|
|
67
|
-
if (!span)
|
|
56
|
+
export const setSpanAttributes = (attributes) => {
|
|
57
|
+
if (!attributes)
|
|
68
58
|
return;
|
|
69
|
-
|
|
70
|
-
span.recordException(err);
|
|
71
|
-
span.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
|
|
59
|
+
trace.getActiveSpan()?.setAttributes(attributes);
|
|
72
60
|
};
|
|
@@ -1,7 +1,3 @@
|
|
|
1
|
-
export
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
}) => void;
|
|
5
|
-
onError?: (error: unknown, status: number) => void;
|
|
6
|
-
};
|
|
7
|
-
export declare const instrumentStream: (src: ReadableStream<Uint8Array>, hooks: InstrumentStreamHooks, signal?: AbortSignal) => ReadableStream<Uint8Array>;
|
|
1
|
+
export declare const wrapStream: (src: ReadableStream, hooks: {
|
|
2
|
+
onDone?: (status: number, reason: unknown) => void;
|
|
3
|
+
}, signal?: AbortSignal) => ReadableStream;
|
package/dist/telemetry/stream.js
CHANGED
|
@@ -1,43 +1,43 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
let
|
|
1
|
+
const isErrorChunk = (v) => !!v?.error;
|
|
2
|
+
export const wrapStream = (src, hooks, signal) => {
|
|
3
|
+
let finishOnce = false;
|
|
4
4
|
const finish = (status, reason) => {
|
|
5
|
-
if (
|
|
5
|
+
if (finishOnce)
|
|
6
6
|
return;
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
reason = signal?.reason;
|
|
10
|
-
if (status >= 400) {
|
|
11
|
-
hooks.onError?.(reason, status);
|
|
12
|
-
}
|
|
13
|
-
hooks.onComplete?.(status, stats);
|
|
7
|
+
finishOnce = true;
|
|
8
|
+
hooks.onDone?.(status, reason ?? signal?.reason);
|
|
14
9
|
};
|
|
15
10
|
return new ReadableStream({
|
|
16
11
|
async start(controller) {
|
|
17
12
|
const reader = src.getReader();
|
|
13
|
+
const close = (status, reason) => {
|
|
14
|
+
finish(status, reason);
|
|
15
|
+
reader.cancel(reason).catch(() => { });
|
|
16
|
+
controller.close();
|
|
17
|
+
};
|
|
18
18
|
try {
|
|
19
19
|
for (;;) {
|
|
20
20
|
if (signal?.aborted) {
|
|
21
|
-
|
|
22
|
-
reader.cancel(signal.reason).catch(() => { });
|
|
23
|
-
controller.close();
|
|
21
|
+
close(499, signal.reason);
|
|
24
22
|
return;
|
|
25
23
|
}
|
|
26
24
|
// eslint-disable-next-line no-await-in-loop
|
|
27
25
|
const { value, done } = await reader.read();
|
|
28
26
|
if (done)
|
|
29
27
|
break;
|
|
30
|
-
stats.bytes += value.byteLength;
|
|
31
28
|
controller.enqueue(value);
|
|
29
|
+
if (isErrorChunk(value)) {
|
|
30
|
+
const status = value.error.type === "invalid_request_error" ? 422 : 502;
|
|
31
|
+
close(status, value.error.message);
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
32
34
|
}
|
|
33
35
|
finish(200);
|
|
34
36
|
controller.close();
|
|
35
37
|
}
|
|
36
38
|
catch (err) {
|
|
37
39
|
const status = signal?.aborted ? 499 : err?.name === "AbortError" ? 503 : 502;
|
|
38
|
-
|
|
39
|
-
reader.cancel(err).catch(() => { });
|
|
40
|
-
controller.close();
|
|
40
|
+
close(status, err);
|
|
41
41
|
}
|
|
42
42
|
finally {
|
|
43
43
|
try {
|
package/dist/types.d.ts
CHANGED
|
@@ -72,10 +72,6 @@ export type GatewayContext = {
|
|
|
72
72
|
* Response object returned by the handler.
|
|
73
73
|
*/
|
|
74
74
|
response?: Response;
|
|
75
|
-
/**
|
|
76
|
-
* Structured object result for streaming requests. Only available at the end of the stream.
|
|
77
|
-
*/
|
|
78
|
-
streamResult?: ChatCompletions;
|
|
79
75
|
};
|
|
80
76
|
/**
|
|
81
77
|
* Hook context: all fields readonly except `state`.
|
|
@@ -126,6 +122,7 @@ export type GatewayHooks = {
|
|
|
126
122
|
*/
|
|
127
123
|
onResponse?: (ctx: OnResponseHookContext) => void | Response | Promise<void | Response>;
|
|
128
124
|
};
|
|
125
|
+
export type TelemetrySignalLevel = "off" | "required" | "recommended" | "full";
|
|
129
126
|
/**
|
|
130
127
|
* Main configuration object for the gateway.
|
|
131
128
|
*/
|
|
@@ -146,6 +143,10 @@ export type GatewayConfig = {
|
|
|
146
143
|
* Optional lifecycle hooks for routing, auth, and response shaping.
|
|
147
144
|
*/
|
|
148
145
|
hooks?: GatewayHooks;
|
|
146
|
+
/**
|
|
147
|
+
* Preferred logger configuration: custom logger or default logger settings.
|
|
148
|
+
*/
|
|
149
|
+
logger?: Logger | LoggerConfig | null;
|
|
149
150
|
/**
|
|
150
151
|
* Optional AI SDK telemetry configuration.
|
|
151
152
|
*/
|
|
@@ -160,17 +161,18 @@ export type GatewayConfig = {
|
|
|
160
161
|
*/
|
|
161
162
|
tracer?: Tracer;
|
|
162
163
|
/**
|
|
163
|
-
*
|
|
164
|
-
* -
|
|
164
|
+
* Telemetry signal levels by namespace.
|
|
165
|
+
* - off: disable the namespace
|
|
166
|
+
* - required: minimal baseline
|
|
165
167
|
* - recommended: practical defaults
|
|
166
|
-
* - full: include all available
|
|
168
|
+
* - full: include all available details
|
|
167
169
|
*/
|
|
168
|
-
|
|
170
|
+
signals?: {
|
|
171
|
+
gen_ai?: TelemetrySignalLevel;
|
|
172
|
+
http?: TelemetrySignalLevel;
|
|
173
|
+
hebo?: TelemetrySignalLevel;
|
|
174
|
+
};
|
|
169
175
|
};
|
|
170
|
-
/**
|
|
171
|
-
* Preferred logger configuration: custom logger or default logger settings.
|
|
172
|
-
*/
|
|
173
|
-
logger?: Logger | LoggerConfig | null;
|
|
174
176
|
};
|
|
175
177
|
export declare const kParsed: unique symbol;
|
|
176
178
|
export type GatewayConfigParsed = GatewayConfig & {
|
package/dist/utils/headers.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
export declare const REQUEST_ID_HEADER = "x-request-id";
|
|
2
|
-
type HeaderSource =
|
|
2
|
+
type HeaderSource = Request | ResponseInit | undefined;
|
|
3
3
|
export declare const resolveRequestId: (source: HeaderSource) => string | undefined;
|
|
4
4
|
export {};
|
package/dist/utils/headers.js
CHANGED
|
@@ -1,18 +1,16 @@
|
|
|
1
1
|
export const REQUEST_ID_HEADER = "x-request-id";
|
|
2
2
|
export const resolveRequestId = (source) => {
|
|
3
|
-
if (!source
|
|
3
|
+
if (!source)
|
|
4
4
|
return undefined;
|
|
5
|
-
if (source instanceof Request
|
|
5
|
+
if (source instanceof Request) {
|
|
6
6
|
return source.headers.get(REQUEST_ID_HEADER) ?? undefined;
|
|
7
7
|
}
|
|
8
|
-
const headers =
|
|
9
|
-
if (!headers
|
|
8
|
+
const headers = source.headers;
|
|
9
|
+
if (!headers)
|
|
10
10
|
return undefined;
|
|
11
|
-
if (
|
|
12
|
-
return headers[REQUEST_ID_HEADER] ?? undefined;
|
|
13
|
-
}
|
|
14
|
-
if (headers instanceof Headers)
|
|
11
|
+
if (headers instanceof Headers) {
|
|
15
12
|
return headers.get(REQUEST_ID_HEADER) ?? undefined;
|
|
13
|
+
}
|
|
16
14
|
if (Array.isArray(headers)) {
|
|
17
15
|
for (const [key, value] of headers) {
|
|
18
16
|
if (key.toLowerCase() === REQUEST_ID_HEADER)
|
|
@@ -20,5 +18,5 @@ export const resolveRequestId = (source) => {
|
|
|
20
18
|
}
|
|
21
19
|
return undefined;
|
|
22
20
|
}
|
|
23
|
-
return
|
|
21
|
+
return headers[REQUEST_ID_HEADER];
|
|
24
22
|
};
|
package/dist/utils/request.d.ts
CHANGED
|
@@ -1,8 +1,4 @@
|
|
|
1
1
|
import type { RequestPatch } from "../types";
|
|
2
2
|
export declare const prepareRequestHeaders: (request: Request) => Headers | undefined;
|
|
3
|
-
export declare const prepareRequestBody: (request: Request) => Promise<{
|
|
4
|
-
body: ArrayBuffer | undefined;
|
|
5
|
-
requestBytes: number;
|
|
6
|
-
}>;
|
|
7
3
|
export declare const prepareForwardHeaders: (request: Request) => Record<string, string>;
|
|
8
4
|
export declare const maybeApplyRequestPatch: (request: Request, patch: RequestPatch) => Request;
|
package/dist/utils/request.js
CHANGED
|
@@ -10,15 +10,6 @@ export const prepareRequestHeaders = (request) => {
|
|
|
10
10
|
headers.set(REQUEST_ID_HEADER, requestId);
|
|
11
11
|
return headers;
|
|
12
12
|
};
|
|
13
|
-
export const prepareRequestBody = async (request) => {
|
|
14
|
-
let requestBytes = 0;
|
|
15
|
-
let body;
|
|
16
|
-
if (request.body) {
|
|
17
|
-
body = await request.arrayBuffer();
|
|
18
|
-
requestBytes = body.byteLength;
|
|
19
|
-
}
|
|
20
|
-
return { body, requestBytes };
|
|
21
|
-
};
|
|
22
13
|
export const prepareForwardHeaders = (request) => {
|
|
23
14
|
const userAgent = request.headers.get("user-agent");
|
|
24
15
|
const appendedUserAgent = userAgent
|
package/dist/utils/response.js
CHANGED
|
@@ -13,7 +13,7 @@ class JsonToSseTransformStream extends TransformStream {
|
|
|
13
13
|
}
|
|
14
14
|
}
|
|
15
15
|
export const prepareResponseInit = (request) => ({
|
|
16
|
-
headers: { [REQUEST_ID_HEADER]: resolveRequestId(request
|
|
16
|
+
headers: { [REQUEST_ID_HEADER]: resolveRequestId(request) },
|
|
17
17
|
});
|
|
18
18
|
export const mergeResponseInit = (defaultHeaders, responseInit) => {
|
|
19
19
|
const headers = new Headers(defaultHeaders);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hebo-ai/gateway",
|
|
3
|
-
"version": "0.4.0-beta.
|
|
3
|
+
"version": "0.4.0-beta.4",
|
|
4
4
|
"description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -156,7 +156,6 @@
|
|
|
156
156
|
"dependencies": {
|
|
157
157
|
"@ai-sdk/provider": "^3.0.7",
|
|
158
158
|
"@ai-sdk/provider-utils": "^4.0.13",
|
|
159
|
-
"@opentelemetry/api": "^1.9.0",
|
|
160
159
|
"ai": "^6.0.67",
|
|
161
160
|
"serialize-error": "^13.0.1",
|
|
162
161
|
"zod": "^4.3.6"
|
|
@@ -170,6 +169,8 @@
|
|
|
170
169
|
"@ai-sdk/openai": "^3.0.23",
|
|
171
170
|
"@aws-sdk/credential-providers": "^3.981.0",
|
|
172
171
|
"@mjackson/node-fetch-server": "^0.7.0",
|
|
172
|
+
"@opentelemetry/api": "^1.9.0",
|
|
173
|
+
"@opentelemetry/context-async-hooks": "^2.5.1",
|
|
173
174
|
"@opentelemetry/sdk-trace-base": "^2.5.1",
|
|
174
175
|
"@tanstack/react-router": "^1.157.16",
|
|
175
176
|
"@tanstack/react-start": "^1.157.16",
|
|
@@ -195,6 +196,7 @@
|
|
|
195
196
|
"@ai-sdk/google-vertex": "^4.0.37",
|
|
196
197
|
"@ai-sdk/groq": "^3.0.19",
|
|
197
198
|
"@ai-sdk/openai": "^3.0.23",
|
|
199
|
+
"@opentelemetry/api": "^1.9.0",
|
|
198
200
|
"typescript": "^5.9.3",
|
|
199
201
|
"voyage-ai-provider": "^3.0.0"
|
|
200
202
|
},
|
package/src/config.ts
CHANGED
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
import { isLogger, logger, setLoggerInstance } from "./logger";
|
|
2
2
|
import { createDefaultLogger } from "./logger/default";
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
kParsed,
|
|
5
|
+
type GatewayConfig,
|
|
6
|
+
type GatewayConfigParsed,
|
|
7
|
+
type TelemetrySignalLevel,
|
|
8
|
+
} from "./types";
|
|
4
9
|
|
|
5
10
|
export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
|
|
6
|
-
// If it has been parsed before, just return
|
|
11
|
+
// If it has been parsed before, just return.
|
|
7
12
|
if (kParsed in config) return config as GatewayConfigParsed;
|
|
8
13
|
|
|
9
14
|
const providers = config.providers ?? {};
|
|
10
15
|
const parsedProviders = {} as typeof providers;
|
|
11
16
|
const models = config.models ?? {};
|
|
12
17
|
|
|
13
|
-
// Set the global logger instance
|
|
18
|
+
// Set the global logger instance.
|
|
14
19
|
if (config.logger === undefined) {
|
|
15
20
|
setLoggerInstance(createDefaultLogger({}));
|
|
16
21
|
} else if (config.logger !== null) {
|
|
@@ -23,7 +28,7 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
|
|
|
23
28
|
);
|
|
24
29
|
}
|
|
25
30
|
|
|
26
|
-
// Strip providers that are not configured
|
|
31
|
+
// Strip providers that are not configured.
|
|
27
32
|
for (const id in providers) {
|
|
28
33
|
const provider = providers[id];
|
|
29
34
|
if (provider === undefined) {
|
|
@@ -37,7 +42,7 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
|
|
|
37
42
|
throw new Error("No providers configured (config.providers is empty)");
|
|
38
43
|
}
|
|
39
44
|
|
|
40
|
-
// Strip providers that are not configured from models
|
|
45
|
+
// Strip providers that are not configured from models.
|
|
41
46
|
const parsedModels = {} as typeof models;
|
|
42
47
|
const warnings = new Set<string>();
|
|
43
48
|
for (const id in models) {
|
|
@@ -60,12 +65,28 @@ export const parseConfig = (config: GatewayConfig): GatewayConfigParsed => {
|
|
|
60
65
|
throw new Error("No models configured (config.models is empty)");
|
|
61
66
|
}
|
|
62
67
|
|
|
68
|
+
// Default for the telemetry settings.
|
|
69
|
+
const telemetryEnabled = config.telemetry?.enabled ?? false;
|
|
70
|
+
const telemetrySignals: Record<"http" | "gen_ai" | "hebo", TelemetrySignalLevel> =
|
|
71
|
+
telemetryEnabled
|
|
72
|
+
? {
|
|
73
|
+
http: config.telemetry?.signals?.http ?? "recommended",
|
|
74
|
+
gen_ai: config.telemetry?.signals?.gen_ai ?? "full",
|
|
75
|
+
hebo: config.telemetry?.signals?.hebo ?? "off",
|
|
76
|
+
}
|
|
77
|
+
: {
|
|
78
|
+
http: "off",
|
|
79
|
+
gen_ai: "off",
|
|
80
|
+
hebo: "off",
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// Return parsed config.
|
|
63
84
|
return {
|
|
64
85
|
...config,
|
|
65
|
-
logger: config.logger,
|
|
66
86
|
telemetry: {
|
|
67
87
|
...config.telemetry,
|
|
68
|
-
enabled:
|
|
88
|
+
enabled: telemetryEnabled,
|
|
89
|
+
signals: telemetrySignals,
|
|
69
90
|
},
|
|
70
91
|
providers: parsedProviders,
|
|
71
92
|
models: parsedModels,
|
|
@@ -185,6 +185,7 @@ export function fromChatCompletionsAssistantMessage(
|
|
|
185
185
|
|
|
186
186
|
if (tool_calls?.length) {
|
|
187
187
|
for (const tc of tool_calls) {
|
|
188
|
+
// eslint-disable-next-line no-shadow
|
|
188
189
|
const { id, function: fn, extra_content } = tc;
|
|
189
190
|
const out: ToolCallPart = {
|
|
190
191
|
type: "tool-call",
|
|
@@ -535,9 +536,8 @@ export class ChatCompletionsStream extends TransformStream<
|
|
|
535
536
|
|
|
536
537
|
case "error": {
|
|
537
538
|
const error = part.error;
|
|
538
|
-
// FUTURE mask in production mode and return responseID
|
|
539
539
|
controller.enqueue(toOpenAIError(error));
|
|
540
|
-
|
|
540
|
+
controller.terminate();
|
|
541
541
|
}
|
|
542
542
|
}
|
|
543
543
|
},
|