@hebo-ai/gateway 0.4.0-beta.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -5
- package/dist/config.js +21 -7
- package/dist/endpoints/chat-completions/converters.d.ts +3 -3
- package/dist/endpoints/chat-completions/converters.js +16 -8
- package/dist/endpoints/chat-completions/handler.js +34 -27
- package/dist/endpoints/chat-completions/otel.d.ts +6 -0
- package/dist/endpoints/chat-completions/otel.js +127 -0
- package/dist/endpoints/embeddings/handler.js +19 -10
- package/dist/endpoints/embeddings/otel.d.ts +6 -0
- package/dist/endpoints/embeddings/otel.js +35 -0
- package/dist/endpoints/models/handler.js +3 -4
- package/dist/errors/gateway.d.ts +1 -1
- package/dist/errors/gateway.js +3 -4
- package/dist/errors/openai.js +11 -12
- package/dist/errors/utils.d.ts +3 -4
- package/dist/errors/utils.js +6 -6
- package/dist/gateway.js +1 -1
- package/dist/lifecycle.js +71 -29
- package/dist/middleware/matcher.js +1 -1
- package/dist/models/amazon/presets.d.ts +37 -37
- package/dist/models/amazon/presets.js +1 -1
- package/dist/models/anthropic/presets.d.ts +56 -56
- package/dist/models/cohere/presets.d.ts +54 -54
- package/dist/models/cohere/presets.js +2 -2
- package/dist/models/google/presets.d.ts +31 -31
- package/dist/models/google/presets.js +1 -1
- package/dist/models/meta/presets.d.ts +42 -42
- package/dist/models/openai/presets.d.ts +96 -96
- package/dist/models/openai/presets.js +1 -1
- package/dist/models/types.d.ts +1 -1
- package/dist/models/voyage/presets.d.ts +92 -92
- package/dist/models/voyage/presets.js +1 -1
- package/dist/providers/registry.js +2 -2
- package/dist/telemetry/baggage.d.ts +1 -0
- package/dist/telemetry/baggage.js +24 -0
- package/dist/telemetry/fetch.d.ts +2 -1
- package/dist/telemetry/fetch.js +13 -3
- package/dist/telemetry/gen-ai.d.ts +5 -0
- package/dist/telemetry/gen-ai.js +60 -0
- package/dist/telemetry/http.d.ts +3 -0
- package/dist/telemetry/http.js +57 -0
- package/dist/telemetry/memory.d.ts +2 -0
- package/dist/telemetry/memory.js +27 -0
- package/dist/telemetry/span.d.ts +6 -3
- package/dist/telemetry/span.js +24 -36
- package/dist/telemetry/stream.d.ts +3 -7
- package/dist/telemetry/stream.js +26 -29
- package/dist/types.d.ts +16 -15
- package/dist/utils/headers.d.ts +1 -1
- package/dist/utils/headers.js +7 -9
- package/dist/utils/request.d.ts +0 -4
- package/dist/utils/request.js +0 -9
- package/dist/utils/response.js +1 -1
- package/package.json +5 -2
- package/src/config.ts +28 -7
- package/src/endpoints/chat-completions/converters.ts +18 -11
- package/src/endpoints/chat-completions/handler.ts +46 -28
- package/src/endpoints/chat-completions/otel.ts +161 -0
- package/src/endpoints/embeddings/handler.test.ts +2 -2
- package/src/endpoints/embeddings/handler.ts +28 -10
- package/src/endpoints/embeddings/otel.ts +56 -0
- package/src/endpoints/models/handler.ts +3 -5
- package/src/errors/gateway.ts +5 -5
- package/src/errors/openai.ts +25 -17
- package/src/errors/utils.ts +6 -7
- package/src/gateway.ts +1 -1
- package/src/lifecycle.ts +85 -32
- package/src/middleware/matcher.ts +1 -1
- package/src/models/amazon/presets.ts +1 -1
- package/src/models/cohere/presets.ts +2 -2
- package/src/models/google/presets.ts +1 -1
- package/src/models/openai/presets.ts +1 -1
- package/src/models/types.ts +1 -1
- package/src/models/voyage/presets.ts +1 -1
- package/src/providers/registry.ts +2 -2
- package/src/telemetry/baggage.ts +27 -0
- package/src/telemetry/fetch.ts +15 -3
- package/src/telemetry/gen-ai.ts +88 -0
- package/src/telemetry/http.ts +65 -0
- package/src/telemetry/memory.ts +36 -0
- package/src/telemetry/span.ts +28 -40
- package/src/telemetry/stream.ts +36 -40
- package/src/types.ts +18 -18
- package/src/utils/headers.ts +8 -19
- package/src/utils/request.ts +0 -11
- package/src/utils/response.ts +1 -1
- package/dist/telemetry/otel.d.ts +0 -2
- package/dist/telemetry/otel.js +0 -50
- package/dist/telemetry/utils.d.ts +0 -4
- package/dist/telemetry/utils.js +0 -223
- package/src/telemetry/otel.ts +0 -91
- package/src/telemetry/utils.ts +0 -273
|
@@ -7,9 +7,9 @@ export const resolveProvider = (args) => {
|
|
|
7
7
|
if (!catalogModel) {
|
|
8
8
|
throw new GatewayError(`Model '${modelId}' not found in catalog`, 422, "MODEL_NOT_FOUND");
|
|
9
9
|
}
|
|
10
|
-
const modality = operation === "embeddings" ? "
|
|
10
|
+
const modality = operation === "embeddings" ? "embedding" : "text";
|
|
11
11
|
if (catalogModel.modalities && !catalogModel.modalities.output.includes(modality)) {
|
|
12
|
-
throw new GatewayError(`Model '${modelId}' does not support '${
|
|
12
|
+
throw new GatewayError(`Model '${modelId}' does not support '${modality}' output`, 422, "MODEL_UNSUPPORTED_OPERATION");
|
|
13
13
|
}
|
|
14
14
|
// FUTURE: implement fallback logic [e.g. runtime config invalid]
|
|
15
15
|
const resolvedProviderId = catalogModel.providers[0];
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const getBaggageAttributes: (request?: Request) => Record<string, string>;
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
const HEBO_BAGGAGE_PREFIX = "hebo.";
|
|
2
|
+
export const getBaggageAttributes = (request) => {
|
|
3
|
+
const h = request?.headers.get("baggage");
|
|
4
|
+
if (!h)
|
|
5
|
+
return {};
|
|
6
|
+
const attrs = {};
|
|
7
|
+
for (const part of h.split(",")) {
|
|
8
|
+
const [k, v] = part.trim().split("=", 2);
|
|
9
|
+
if (!k || !v)
|
|
10
|
+
continue;
|
|
11
|
+
const [rawValue] = v.split(";", 1);
|
|
12
|
+
if (!rawValue)
|
|
13
|
+
continue;
|
|
14
|
+
let value = rawValue;
|
|
15
|
+
try {
|
|
16
|
+
value = decodeURIComponent(rawValue);
|
|
17
|
+
}
|
|
18
|
+
catch { }
|
|
19
|
+
if (k.startsWith(HEBO_BAGGAGE_PREFIX)) {
|
|
20
|
+
attrs[k.slice(HEBO_BAGGAGE_PREFIX.length)] = value;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return attrs;
|
|
24
|
+
};
|
|
@@ -1 +1,2 @@
|
|
|
1
|
-
|
|
1
|
+
import type { TelemetrySignalLevel } from "../types";
|
|
2
|
+
export declare const initFetch: (level?: TelemetrySignalLevel) => void;
|
package/dist/telemetry/fetch.js
CHANGED
|
@@ -2,13 +2,23 @@ import { SpanKind } from "@opentelemetry/api";
|
|
|
2
2
|
import { withSpan } from "./span";
|
|
3
3
|
const ORIGINAL_FETCH_KEY = Symbol.for("@hebo/fetch/original-fetch");
|
|
4
4
|
const g = globalThis;
|
|
5
|
-
|
|
5
|
+
let fetchTracingEnabled = false;
|
|
6
|
+
const shouldTraceFetch = (init) => typeof init?.headers?.["user-agent"] === "string" &&
|
|
7
|
+
init.headers["user-agent"].indexOf("ai-sdk/provider-utils") !== -1;
|
|
8
|
+
const otelFetch = (input, init) => {
|
|
6
9
|
const original = g[ORIGINAL_FETCH_KEY];
|
|
10
|
+
if (!fetchTracingEnabled)
|
|
11
|
+
return original(input, init);
|
|
12
|
+
if (!shouldTraceFetch(init))
|
|
13
|
+
return original(input, init);
|
|
7
14
|
return withSpan("fetch", () => original(input, init), { kind: SpanKind.CLIENT });
|
|
8
15
|
};
|
|
9
|
-
export const initFetch = () => {
|
|
16
|
+
export const initFetch = (level) => {
|
|
17
|
+
fetchTracingEnabled = level === "full";
|
|
18
|
+
if (!fetchTracingEnabled)
|
|
19
|
+
return;
|
|
10
20
|
if (g[ORIGINAL_FETCH_KEY])
|
|
11
21
|
return;
|
|
12
22
|
g[ORIGINAL_FETCH_KEY] = globalThis.fetch.bind(globalThis);
|
|
13
|
-
globalThis.fetch =
|
|
23
|
+
globalThis.fetch = otelFetch;
|
|
14
24
|
};
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import { type Attributes } from "@opentelemetry/api";
|
|
2
|
+
import type { TelemetrySignalLevel } from "../types";
|
|
3
|
+
export declare const recordRequestDuration: (start: number, attrs: Attributes, signalLevel?: TelemetrySignalLevel) => void;
|
|
4
|
+
export declare const recordTimePerOutputToken: (start: number, tokenAttrs: Attributes, metricAttrs: Attributes, signalLevel?: TelemetrySignalLevel) => void;
|
|
5
|
+
export declare const recordTokenUsage: (tokenAttrs: Attributes, metricAttrs: Attributes, signalLevel?: TelemetrySignalLevel) => void;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { metrics } from "@opentelemetry/api";
|
|
2
|
+
const meter = metrics.getMeter("@hebo/gateway");
|
|
3
|
+
const requestDurationHistogram = meter.createHistogram("gen_ai.server.request.duration", {
|
|
4
|
+
description: "End-to-end gateway request duration",
|
|
5
|
+
unit: "s",
|
|
6
|
+
advice: {
|
|
7
|
+
explicitBucketBoundaries: [
|
|
8
|
+
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120, 240,
|
|
9
|
+
],
|
|
10
|
+
},
|
|
11
|
+
});
|
|
12
|
+
const timePerOutputTokenHistogram = meter.createHistogram("gen_ai.server.time_per_output_token", {
|
|
13
|
+
description: "End-to-end gateway request duration per output token",
|
|
14
|
+
unit: "s",
|
|
15
|
+
advice: {
|
|
16
|
+
explicitBucketBoundaries: [
|
|
17
|
+
0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, 1.0, 2.5,
|
|
18
|
+
],
|
|
19
|
+
},
|
|
20
|
+
});
|
|
21
|
+
const tokenUsageHistogram = meter.createHistogram("gen_ai.client.token.usage", {
|
|
22
|
+
description: "Token usage reported by upstream model responses",
|
|
23
|
+
unit: "{token}",
|
|
24
|
+
advice: {
|
|
25
|
+
explicitBucketBoundaries: [
|
|
26
|
+
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072, 262144,
|
|
27
|
+
524288, 1048576,
|
|
28
|
+
],
|
|
29
|
+
},
|
|
30
|
+
});
|
|
31
|
+
// FUTURE: record unsuccessful calls
|
|
32
|
+
export const recordRequestDuration = (start, attrs, signalLevel) => {
|
|
33
|
+
if (!signalLevel || signalLevel === "off")
|
|
34
|
+
return;
|
|
35
|
+
requestDurationHistogram.record((performance.now() - start) / 1000, attrs);
|
|
36
|
+
};
|
|
37
|
+
// FUTURE: record unsuccessful calls
|
|
38
|
+
export const recordTimePerOutputToken = (start, tokenAttrs, metricAttrs, signalLevel) => {
|
|
39
|
+
if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full"))
|
|
40
|
+
return;
|
|
41
|
+
const outputTokens = tokenAttrs["gen_ai.usage.output_tokens"];
|
|
42
|
+
if (typeof outputTokens !== "number" || outputTokens <= 0)
|
|
43
|
+
return;
|
|
44
|
+
timePerOutputTokenHistogram.record((performance.now() - start) / 1000 / outputTokens, metricAttrs);
|
|
45
|
+
};
|
|
46
|
+
// FUTURE: record unsuccessful calls
|
|
47
|
+
export const recordTokenUsage = (tokenAttrs, metricAttrs, signalLevel) => {
|
|
48
|
+
if (!signalLevel || (signalLevel !== "recommended" && signalLevel !== "full"))
|
|
49
|
+
return;
|
|
50
|
+
const record = (value, tokenType) => {
|
|
51
|
+
if (typeof value !== "number")
|
|
52
|
+
return;
|
|
53
|
+
tokenUsageHistogram.record(value, Object.assign({}, metricAttrs, { "gen_ai.token.type": tokenType }));
|
|
54
|
+
};
|
|
55
|
+
record(tokenAttrs["gen_ai.usage.input_tokens"], "input");
|
|
56
|
+
record(tokenAttrs["gen_ai.usage.output_tokens"], "output");
|
|
57
|
+
record(tokenAttrs["gen_ai.usage.total_tokens"], "total");
|
|
58
|
+
record(tokenAttrs["gen_ai.usage.cached_tokens"], "cached");
|
|
59
|
+
record(tokenAttrs["gen_ai.usage.reasoning_tokens"], "reasoning");
|
|
60
|
+
};
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import {} from "../types";
|
|
2
|
+
import { resolveRequestId } from "../utils/headers";
|
|
3
|
+
const headerArr = (h, k) => (h.has(k) ? [h.get(k)] : undefined);
|
|
4
|
+
export const getRequestAttributes = (request, signalLevel) => {
|
|
5
|
+
if (!signalLevel || signalLevel === "off")
|
|
6
|
+
return {};
|
|
7
|
+
let url;
|
|
8
|
+
try {
|
|
9
|
+
// FUTURE: reuse URL from lifecycle
|
|
10
|
+
url = new URL(request.url);
|
|
11
|
+
}
|
|
12
|
+
catch { }
|
|
13
|
+
const attrs = {
|
|
14
|
+
"http.request.method": request.method,
|
|
15
|
+
"url.full": request.url,
|
|
16
|
+
"url.path": url?.pathname,
|
|
17
|
+
"url.scheme": url?.protocol.replace(":", ""),
|
|
18
|
+
"server.address": url?.hostname,
|
|
19
|
+
"server.port": url
|
|
20
|
+
? url.port
|
|
21
|
+
? Number(url.port)
|
|
22
|
+
: url.protocol === "https:"
|
|
23
|
+
? 443
|
|
24
|
+
: 80
|
|
25
|
+
: undefined,
|
|
26
|
+
};
|
|
27
|
+
if (signalLevel !== "required") {
|
|
28
|
+
Object.assign(attrs, {
|
|
29
|
+
// FUTURE: does ElysiaJS and other frameworks attach request id?
|
|
30
|
+
"http.request.id": resolveRequestId(request),
|
|
31
|
+
"user_agent.original": request.headers.get("user-agent") ?? undefined,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
if (signalLevel === "full") {
|
|
35
|
+
Object.assign(attrs, {
|
|
36
|
+
// FUTURE: "url.query"
|
|
37
|
+
"http.request.header.content-type": headerArr(request.headers, "content-type"),
|
|
38
|
+
"http.request.header.content-length": headerArr(request.headers, "content-length"),
|
|
39
|
+
// FUTURE: "client.address"
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
return attrs;
|
|
43
|
+
};
|
|
44
|
+
export const getResponseAttributes = (response, signalLevel) => {
|
|
45
|
+
if (!signalLevel || signalLevel === "off")
|
|
46
|
+
return {};
|
|
47
|
+
const attrs = {
|
|
48
|
+
"http.response.status_code": response.status,
|
|
49
|
+
};
|
|
50
|
+
if (signalLevel === "full") {
|
|
51
|
+
Object.assign(attrs, {
|
|
52
|
+
"http.response.header.content-type": [headerArr(response.headers, "content-type")],
|
|
53
|
+
"http.response.header.content-length": [headerArr(response.headers, "content-length")],
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
return attrs;
|
|
57
|
+
};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { metrics } from "@opentelemetry/api";
|
|
2
|
+
const meter = metrics.getMeter("@hebo/gateway");
|
|
3
|
+
const defaultHeapSpaceAttrs = { "v8js.heap.space.name": "total" };
|
|
4
|
+
const heapUsedCounter = meter.createUpDownCounter("v8js.memory.heap.used", {
|
|
5
|
+
description: "Used bytes in the V8 heap",
|
|
6
|
+
unit: "By",
|
|
7
|
+
});
|
|
8
|
+
const heapSpacePhysicalSizeCounter = meter.createUpDownCounter("v8js.memory.heap.space.physical_size", {
|
|
9
|
+
description: "Physical bytes allocated for the V8 heap space",
|
|
10
|
+
unit: "By",
|
|
11
|
+
});
|
|
12
|
+
const isEnabled = (level) => level === "recommended" || level === "full";
|
|
13
|
+
export const recordV8jsMemory = (level) => {
|
|
14
|
+
if (!isEnabled(level))
|
|
15
|
+
return;
|
|
16
|
+
let usage;
|
|
17
|
+
try {
|
|
18
|
+
usage = globalThis.process?.memoryUsage?.();
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
if (!usage)
|
|
24
|
+
return;
|
|
25
|
+
heapUsedCounter.add(usage.heapUsed, defaultHeapSpaceAttrs);
|
|
26
|
+
heapSpacePhysicalSizeCounter.add(usage.rss, defaultHeapSpaceAttrs);
|
|
27
|
+
};
|
package/dist/telemetry/span.d.ts
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
|
-
import type { Attributes,
|
|
2
|
-
|
|
1
|
+
import type { Attributes, SpanOptions, Tracer } from "@opentelemetry/api";
|
|
2
|
+
import type { TelemetrySignalLevel } from "../types";
|
|
3
|
+
export declare const setSpanTracer: (tracer?: Tracer) => void;
|
|
4
|
+
export declare const setSpanEventsEnabled: (level?: TelemetrySignalLevel) => void;
|
|
5
|
+
export declare const startSpan: (name: string, options?: SpanOptions) => import("@opentelemetry/api").Span & {
|
|
3
6
|
runWithContext: <T>(fn: () => Promise<T> | T) => T | Promise<T>;
|
|
4
7
|
recordError: (_error: unknown) => void;
|
|
5
8
|
finish: () => void;
|
|
@@ -7,4 +10,4 @@ export declare const startSpan: (name: string, options?: SpanOptions, customTrac
|
|
|
7
10
|
};
|
|
8
11
|
export declare const withSpan: <T>(name: string, run: () => Promise<T> | T, options?: SpanOptions) => Promise<T>;
|
|
9
12
|
export declare const addSpanEvent: (name: string, attributes?: Attributes) => void;
|
|
10
|
-
export declare const
|
|
13
|
+
export declare const setSpanAttributes: (attributes?: Attributes) => void;
|
package/dist/telemetry/span.js
CHANGED
|
@@ -1,51 +1,41 @@
|
|
|
1
1
|
import { INVALID_SPAN_CONTEXT, SpanKind, SpanStatusCode, context, trace } from "@opentelemetry/api";
|
|
2
|
-
const DEFAULT_TRACER_NAME = "@hebo
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
const maybeSetDynamicAttributes = (span, getAttributes) => {
|
|
6
|
-
const attrs = getAttributes();
|
|
7
|
-
if (Object.keys(attrs).length === 0)
|
|
8
|
-
return;
|
|
9
|
-
span.setAttributes(attrs);
|
|
10
|
-
};
|
|
11
|
-
const getMemoryAttributes = () => {
|
|
12
|
-
const memory = mem();
|
|
13
|
-
if (!memory)
|
|
14
|
-
return {};
|
|
15
|
-
return {
|
|
16
|
-
"process.memory.usage": memory.rss,
|
|
17
|
-
"process.memory.heap.used": memory.heapUsed,
|
|
18
|
-
"process.memory.heap.total": memory.heapTotal,
|
|
19
|
-
};
|
|
20
|
-
};
|
|
2
|
+
const DEFAULT_TRACER_NAME = "@hebo/gateway";
|
|
3
|
+
let spanTracer;
|
|
4
|
+
let spanEventsEnabled = false;
|
|
21
5
|
const NOOP_SPAN = {
|
|
22
6
|
runWithContext: (fn) => fn(),
|
|
23
7
|
recordError: (_error) => { },
|
|
24
8
|
finish: () => { },
|
|
25
9
|
isExisting: true,
|
|
26
10
|
};
|
|
27
|
-
export const
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
11
|
+
export const setSpanTracer = (tracer) => {
|
|
12
|
+
spanTracer = tracer ?? trace.getTracer(DEFAULT_TRACER_NAME);
|
|
13
|
+
};
|
|
14
|
+
export const setSpanEventsEnabled = (level) => {
|
|
15
|
+
spanEventsEnabled = level === "recommended" || level === "full";
|
|
16
|
+
};
|
|
17
|
+
export const startSpan = (name, options) => {
|
|
18
|
+
if (!spanTracer) {
|
|
33
19
|
return Object.assign(trace.wrapSpanContext(INVALID_SPAN_CONTEXT), NOOP_SPAN);
|
|
34
20
|
}
|
|
35
|
-
|
|
21
|
+
const parentContext = context.active();
|
|
22
|
+
const activeSpan = trace.getActiveSpan();
|
|
23
|
+
const span = spanTracer.startSpan(name, { kind: activeSpan ? SpanKind.INTERNAL : SpanKind.SERVER, ...options }, parentContext);
|
|
36
24
|
const runWithContext = (fn) => context.with(trace.setSpan(parentContext, span), fn);
|
|
37
25
|
const recordError = (error) => {
|
|
38
|
-
const err =
|
|
26
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
39
27
|
span.recordException(err);
|
|
40
28
|
span.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
|
|
41
29
|
};
|
|
42
30
|
const finish = () => {
|
|
43
|
-
maybeSetDynamicAttributes(span, getMemoryAttributes);
|
|
44
31
|
span.end();
|
|
45
32
|
};
|
|
46
33
|
return Object.assign(span, { runWithContext, recordError, finish, isExisting: !!activeSpan });
|
|
47
34
|
};
|
|
48
35
|
export const withSpan = async (name, run, options) => {
|
|
36
|
+
if (!spanTracer) {
|
|
37
|
+
return await run();
|
|
38
|
+
}
|
|
49
39
|
const started = startSpan(name, options);
|
|
50
40
|
try {
|
|
51
41
|
return await started.runWithContext(run);
|
|
@@ -59,14 +49,12 @@ export const withSpan = async (name, run, options) => {
|
|
|
59
49
|
}
|
|
60
50
|
};
|
|
61
51
|
export const addSpanEvent = (name, attributes) => {
|
|
62
|
-
|
|
63
|
-
|
|
52
|
+
if (!spanEventsEnabled)
|
|
53
|
+
return;
|
|
54
|
+
trace.getActiveSpan()?.addEvent(name, attributes);
|
|
64
55
|
};
|
|
65
|
-
export const
|
|
66
|
-
|
|
67
|
-
if (!span)
|
|
56
|
+
export const setSpanAttributes = (attributes) => {
|
|
57
|
+
if (!attributes)
|
|
68
58
|
return;
|
|
69
|
-
|
|
70
|
-
span.recordException(err);
|
|
71
|
-
span.setStatus({ code: SpanStatusCode.ERROR, message: err.message });
|
|
59
|
+
trace.getActiveSpan()?.setAttributes(attributes);
|
|
72
60
|
};
|
|
@@ -1,7 +1,3 @@
|
|
|
1
|
-
export
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
}) => void;
|
|
5
|
-
onError?: (error: unknown, status: number) => void;
|
|
6
|
-
};
|
|
7
|
-
export declare const instrumentStream: (src: ReadableStream<Uint8Array>, hooks: InstrumentStreamHooks, signal?: AbortSignal) => ReadableStream<Uint8Array>;
|
|
1
|
+
export declare const wrapStream: (src: ReadableStream, hooks: {
|
|
2
|
+
onDone?: (status: number, reason: unknown) => void;
|
|
3
|
+
}) => ReadableStream;
|
package/dist/telemetry/stream.js
CHANGED
|
@@ -1,43 +1,37 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
reason = signal?.reason;
|
|
10
|
-
if (status >= 400) {
|
|
11
|
-
hooks.onError?.(reason, status);
|
|
1
|
+
import { toOpenAIError } from "#/errors/openai";
|
|
2
|
+
const isErrorChunk = (v) => v instanceof Error || !!v?.error;
|
|
3
|
+
export const wrapStream = (src, hooks) => {
|
|
4
|
+
let finished = false;
|
|
5
|
+
const done = (reader, controller, status, reason) => {
|
|
6
|
+
if (!finished) {
|
|
7
|
+
finished = true;
|
|
8
|
+
hooks.onDone?.(status, reason);
|
|
12
9
|
}
|
|
13
|
-
|
|
10
|
+
reader.cancel(reason).catch(() => { });
|
|
11
|
+
controller.close();
|
|
14
12
|
};
|
|
15
13
|
return new ReadableStream({
|
|
16
14
|
async start(controller) {
|
|
17
15
|
const reader = src.getReader();
|
|
18
16
|
try {
|
|
19
17
|
for (;;) {
|
|
20
|
-
if (signal?.aborted) {
|
|
21
|
-
finish(499, signal.reason);
|
|
22
|
-
reader.cancel(signal.reason).catch(() => { });
|
|
23
|
-
controller.close();
|
|
24
|
-
return;
|
|
25
|
-
}
|
|
26
18
|
// eslint-disable-next-line no-await-in-loop
|
|
27
|
-
const { value, done } = await reader.read();
|
|
28
|
-
if (
|
|
19
|
+
const { value, done: eof } = await reader.read();
|
|
20
|
+
if (eof)
|
|
29
21
|
break;
|
|
30
|
-
|
|
31
|
-
controller.enqueue(
|
|
22
|
+
const out = isErrorChunk(value) ? toOpenAIError(value) : value;
|
|
23
|
+
controller.enqueue(out);
|
|
24
|
+
if (out !== value) {
|
|
25
|
+
const status = out.error?.type === "invalid_request_error" ? 422 : 502;
|
|
26
|
+
done(reader, controller, status, value);
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
32
29
|
}
|
|
33
|
-
|
|
34
|
-
controller.close();
|
|
30
|
+
done(reader, controller, 200);
|
|
35
31
|
}
|
|
36
32
|
catch (err) {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
reader.cancel(err).catch(() => { });
|
|
40
|
-
controller.close();
|
|
33
|
+
controller.enqueue(toOpenAIError(err));
|
|
34
|
+
done(reader, controller, 502, err);
|
|
41
35
|
}
|
|
42
36
|
finally {
|
|
43
37
|
try {
|
|
@@ -47,7 +41,10 @@ export const instrumentStream = (src, hooks, signal) => {
|
|
|
47
41
|
}
|
|
48
42
|
},
|
|
49
43
|
cancel(reason) {
|
|
50
|
-
|
|
44
|
+
if (!finished) {
|
|
45
|
+
finished = true;
|
|
46
|
+
hooks.onDone?.(499, reason);
|
|
47
|
+
}
|
|
51
48
|
src.cancel(reason).catch(() => { });
|
|
52
49
|
},
|
|
53
50
|
});
|
package/dist/types.d.ts
CHANGED
|
@@ -3,7 +3,6 @@ import type { Tracer } from "@opentelemetry/api";
|
|
|
3
3
|
import type { ChatCompletions, ChatCompletionsBody, ChatCompletionsChunk } from "./endpoints/chat-completions/schema";
|
|
4
4
|
import type { Embeddings, EmbeddingsBody } from "./endpoints/embeddings/schema";
|
|
5
5
|
import type { Model, ModelList } from "./endpoints/models";
|
|
6
|
-
import type { OpenAIError } from "./errors/openai";
|
|
7
6
|
import type { Logger, LoggerConfig } from "./logger";
|
|
8
7
|
import type { ModelCatalog, ModelId } from "./models/types";
|
|
9
8
|
import type { ProviderId, ProviderRegistry } from "./providers/types";
|
|
@@ -67,15 +66,11 @@ export type GatewayContext = {
|
|
|
67
66
|
/**
|
|
68
67
|
* Result returned by the handler (pre-response).
|
|
69
68
|
*/
|
|
70
|
-
result?: ChatCompletions | ReadableStream<ChatCompletionsChunk |
|
|
69
|
+
result?: ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | Model | ModelList;
|
|
71
70
|
/**
|
|
72
71
|
* Response object returned by the handler.
|
|
73
72
|
*/
|
|
74
73
|
response?: Response;
|
|
75
|
-
/**
|
|
76
|
-
* Structured object result for streaming requests. Only available at the end of the stream.
|
|
77
|
-
*/
|
|
78
|
-
streamResult?: ChatCompletions;
|
|
79
74
|
};
|
|
80
75
|
/**
|
|
81
76
|
* Hook context: all fields readonly except `state`.
|
|
@@ -119,13 +114,14 @@ export type GatewayHooks = {
|
|
|
119
114
|
* Runs after the endpoint handler.
|
|
120
115
|
* @returns Result to replace, or undefined to keep original.
|
|
121
116
|
*/
|
|
122
|
-
after?: (ctx: AfterHookContext) => void | ChatCompletions | ReadableStream<ChatCompletionsChunk |
|
|
117
|
+
after?: (ctx: AfterHookContext) => void | ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings | Promise<void | ChatCompletions | ReadableStream<ChatCompletionsChunk | Error> | Embeddings>;
|
|
123
118
|
/**
|
|
124
119
|
* Runs after the lifecycle has produced the final Response.
|
|
125
120
|
* @returns Replacement Response, or undefined to keep original.
|
|
126
121
|
*/
|
|
127
122
|
onResponse?: (ctx: OnResponseHookContext) => void | Response | Promise<void | Response>;
|
|
128
123
|
};
|
|
124
|
+
export type TelemetrySignalLevel = "off" | "required" | "recommended" | "full";
|
|
129
125
|
/**
|
|
130
126
|
* Main configuration object for the gateway.
|
|
131
127
|
*/
|
|
@@ -146,6 +142,10 @@ export type GatewayConfig = {
|
|
|
146
142
|
* Optional lifecycle hooks for routing, auth, and response shaping.
|
|
147
143
|
*/
|
|
148
144
|
hooks?: GatewayHooks;
|
|
145
|
+
/**
|
|
146
|
+
* Preferred logger configuration: custom logger or default logger settings.
|
|
147
|
+
*/
|
|
148
|
+
logger?: Logger | LoggerConfig | null;
|
|
149
149
|
/**
|
|
150
150
|
* Optional AI SDK telemetry configuration.
|
|
151
151
|
*/
|
|
@@ -160,17 +160,18 @@ export type GatewayConfig = {
|
|
|
160
160
|
*/
|
|
161
161
|
tracer?: Tracer;
|
|
162
162
|
/**
|
|
163
|
-
*
|
|
164
|
-
* -
|
|
163
|
+
* Telemetry signal levels by namespace.
|
|
164
|
+
* - off: disable the namespace
|
|
165
|
+
* - required: minimal baseline
|
|
165
166
|
* - recommended: practical defaults
|
|
166
|
-
* - full: include all available
|
|
167
|
+
* - full: include all available details
|
|
167
168
|
*/
|
|
168
|
-
|
|
169
|
+
signals?: {
|
|
170
|
+
gen_ai?: TelemetrySignalLevel;
|
|
171
|
+
http?: TelemetrySignalLevel;
|
|
172
|
+
hebo?: TelemetrySignalLevel;
|
|
173
|
+
};
|
|
169
174
|
};
|
|
170
|
-
/**
|
|
171
|
-
* Preferred logger configuration: custom logger or default logger settings.
|
|
172
|
-
*/
|
|
173
|
-
logger?: Logger | LoggerConfig | null;
|
|
174
175
|
};
|
|
175
176
|
export declare const kParsed: unique symbol;
|
|
176
177
|
export type GatewayConfigParsed = GatewayConfig & {
|
package/dist/utils/headers.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
export declare const REQUEST_ID_HEADER = "x-request-id";
|
|
2
|
-
type HeaderSource =
|
|
2
|
+
type HeaderSource = Request | ResponseInit | undefined;
|
|
3
3
|
export declare const resolveRequestId: (source: HeaderSource) => string | undefined;
|
|
4
4
|
export {};
|
package/dist/utils/headers.js
CHANGED
|
@@ -1,18 +1,16 @@
|
|
|
1
1
|
export const REQUEST_ID_HEADER = "x-request-id";
|
|
2
2
|
export const resolveRequestId = (source) => {
|
|
3
|
-
if (!source
|
|
3
|
+
if (!source)
|
|
4
4
|
return undefined;
|
|
5
|
-
if (source instanceof Request
|
|
5
|
+
if (source instanceof Request) {
|
|
6
6
|
return source.headers.get(REQUEST_ID_HEADER) ?? undefined;
|
|
7
7
|
}
|
|
8
|
-
const headers =
|
|
9
|
-
if (!headers
|
|
8
|
+
const headers = source.headers;
|
|
9
|
+
if (!headers)
|
|
10
10
|
return undefined;
|
|
11
|
-
if (
|
|
12
|
-
return headers[REQUEST_ID_HEADER] ?? undefined;
|
|
13
|
-
}
|
|
14
|
-
if (headers instanceof Headers)
|
|
11
|
+
if (headers instanceof Headers) {
|
|
15
12
|
return headers.get(REQUEST_ID_HEADER) ?? undefined;
|
|
13
|
+
}
|
|
16
14
|
if (Array.isArray(headers)) {
|
|
17
15
|
for (const [key, value] of headers) {
|
|
18
16
|
if (key.toLowerCase() === REQUEST_ID_HEADER)
|
|
@@ -20,5 +18,5 @@ export const resolveRequestId = (source) => {
|
|
|
20
18
|
}
|
|
21
19
|
return undefined;
|
|
22
20
|
}
|
|
23
|
-
return
|
|
21
|
+
return headers[REQUEST_ID_HEADER];
|
|
24
22
|
};
|
package/dist/utils/request.d.ts
CHANGED
|
@@ -1,8 +1,4 @@
|
|
|
1
1
|
import type { RequestPatch } from "../types";
|
|
2
2
|
export declare const prepareRequestHeaders: (request: Request) => Headers | undefined;
|
|
3
|
-
export declare const prepareRequestBody: (request: Request) => Promise<{
|
|
4
|
-
body: ArrayBuffer | undefined;
|
|
5
|
-
requestBytes: number;
|
|
6
|
-
}>;
|
|
7
3
|
export declare const prepareForwardHeaders: (request: Request) => Record<string, string>;
|
|
8
4
|
export declare const maybeApplyRequestPatch: (request: Request, patch: RequestPatch) => Request;
|
package/dist/utils/request.js
CHANGED
|
@@ -10,15 +10,6 @@ export const prepareRequestHeaders = (request) => {
|
|
|
10
10
|
headers.set(REQUEST_ID_HEADER, requestId);
|
|
11
11
|
return headers;
|
|
12
12
|
};
|
|
13
|
-
export const prepareRequestBody = async (request) => {
|
|
14
|
-
let requestBytes = 0;
|
|
15
|
-
let body;
|
|
16
|
-
if (request.body) {
|
|
17
|
-
body = await request.arrayBuffer();
|
|
18
|
-
requestBytes = body.byteLength;
|
|
19
|
-
}
|
|
20
|
-
return { body, requestBytes };
|
|
21
|
-
};
|
|
22
13
|
export const prepareForwardHeaders = (request) => {
|
|
23
14
|
const userAgent = request.headers.get("user-agent");
|
|
24
15
|
const appendedUserAgent = userAgent
|
package/dist/utils/response.js
CHANGED
|
@@ -13,7 +13,7 @@ class JsonToSseTransformStream extends TransformStream {
|
|
|
13
13
|
}
|
|
14
14
|
}
|
|
15
15
|
export const prepareResponseInit = (request) => ({
|
|
16
|
-
headers: { [REQUEST_ID_HEADER]: resolveRequestId(request
|
|
16
|
+
headers: { [REQUEST_ID_HEADER]: resolveRequestId(request) },
|
|
17
17
|
});
|
|
18
18
|
export const mergeResponseInit = (defaultHeaders, responseInit) => {
|
|
19
19
|
const headers = new Headers(defaultHeaders);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hebo-ai/gateway",
|
|
3
|
-
"version": "0.4.0
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -156,7 +156,6 @@
|
|
|
156
156
|
"dependencies": {
|
|
157
157
|
"@ai-sdk/provider": "^3.0.7",
|
|
158
158
|
"@ai-sdk/provider-utils": "^4.0.13",
|
|
159
|
-
"@opentelemetry/api": "^1.9.0",
|
|
160
159
|
"ai": "^6.0.67",
|
|
161
160
|
"serialize-error": "^13.0.1",
|
|
162
161
|
"zod": "^4.3.6"
|
|
@@ -169,7 +168,10 @@
|
|
|
169
168
|
"@ai-sdk/groq": "^3.0.19",
|
|
170
169
|
"@ai-sdk/openai": "^3.0.23",
|
|
171
170
|
"@aws-sdk/credential-providers": "^3.981.0",
|
|
171
|
+
"@langfuse/otel": "^4.6.1",
|
|
172
172
|
"@mjackson/node-fetch-server": "^0.7.0",
|
|
173
|
+
"@opentelemetry/api": "^1.9.0",
|
|
174
|
+
"@opentelemetry/context-async-hooks": "^2.5.1",
|
|
173
175
|
"@opentelemetry/sdk-trace-base": "^2.5.1",
|
|
174
176
|
"@tanstack/react-router": "^1.157.16",
|
|
175
177
|
"@tanstack/react-start": "^1.157.16",
|
|
@@ -195,6 +197,7 @@
|
|
|
195
197
|
"@ai-sdk/google-vertex": "^4.0.37",
|
|
196
198
|
"@ai-sdk/groq": "^3.0.19",
|
|
197
199
|
"@ai-sdk/openai": "^3.0.23",
|
|
200
|
+
"@opentelemetry/api": "^1.9.0",
|
|
198
201
|
"typescript": "^5.9.3",
|
|
199
202
|
"voyage-ai-provider": "^3.0.0"
|
|
200
203
|
},
|