@hebo-ai/gateway 0.4.1 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -3
- package/dist/endpoints/chat-completions/handler.js +2 -4
- package/dist/endpoints/embeddings/handler.js +2 -4
- package/dist/lifecycle.js +7 -12
- package/dist/telemetry/http.js +0 -3
- package/dist/types.d.ts +10 -20
- package/dist/utils/request.d.ts +1 -3
- package/dist/utils/request.js +3 -26
- package/dist/utils/response.d.ts +1 -1
- package/dist/utils/response.js +3 -3
- package/package.json +1 -1
- package/src/endpoints/chat-completions/handler.ts +2 -5
- package/src/endpoints/embeddings/handler.ts +5 -5
- package/src/lifecycle.ts +7 -11
- package/src/telemetry/http.ts +0 -3
- package/src/types.ts +19 -23
- package/src/utils/request.ts +5 -33
- package/src/utils/response.ts +3 -3
package/README.md
CHANGED
|
@@ -286,10 +286,9 @@ const gw = gateway({
|
|
|
286
286
|
/**
|
|
287
287
|
* Runs before any endpoint handler logic.
|
|
288
288
|
* @param ctx.request Incoming request.
|
|
289
|
-
* @returns Optional
|
|
290
|
-
* Returning a Response stops execution of the endpoint.
|
|
289
|
+
* @returns Optional Response to short-circuit the request.
|
|
291
290
|
*/
|
|
292
|
-
onRequest: async (ctx: { request: Request }): Promise<
|
|
291
|
+
onRequest: async (ctx: { request: Request }): Promise<Response | void> => {
|
|
293
292
|
// Example Use Cases:
|
|
294
293
|
// - Verify authentication
|
|
295
294
|
// - Enforce rate limits
|
|
@@ -7,7 +7,6 @@ import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
8
|
import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
9
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
10
|
-
import { resolveRequestId } from "../../utils/headers";
|
|
11
10
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
12
11
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
13
12
|
import { getChatGeneralAttributes, getChatRequestAttributes, getChatResponseAttributes, } from "./otel";
|
|
@@ -22,7 +21,6 @@ export const chatCompletions = (config) => {
|
|
|
22
21
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
23
22
|
throw new GatewayError("Method Not Allowed", 405);
|
|
24
23
|
}
|
|
25
|
-
const requestId = resolveRequestId(ctx.request);
|
|
26
24
|
// Parse + validate input.
|
|
27
25
|
try {
|
|
28
26
|
ctx.body = await ctx.request.json();
|
|
@@ -68,7 +66,7 @@ export const chatCompletions = (config) => {
|
|
|
68
66
|
// Convert inputs to AI SDK call options.
|
|
69
67
|
const textOptions = convertToTextCallOptions(inputs);
|
|
70
68
|
logger.trace({
|
|
71
|
-
requestId,
|
|
69
|
+
requestId: ctx.requestId,
|
|
72
70
|
options: textOptions,
|
|
73
71
|
}, "[chat] AI SDK options");
|
|
74
72
|
addSpanEvent("hebo.options.prepared");
|
|
@@ -127,7 +125,7 @@ export const chatCompletions = (config) => {
|
|
|
127
125
|
},
|
|
128
126
|
...textOptions,
|
|
129
127
|
});
|
|
130
|
-
logger.trace({ requestId, result }, "[chat] AI SDK result");
|
|
128
|
+
logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
|
|
131
129
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
132
130
|
// Transform result.
|
|
133
131
|
ctx.result = toChatCompletions(result, ctx.resolvedModelId);
|
|
@@ -7,7 +7,6 @@ import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
8
|
import { recordRequestDuration, recordTimePerOutputToken, recordTokenUsage, } from "../../telemetry/gen-ai";
|
|
9
9
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
10
|
-
import { resolveRequestId } from "../../utils/headers";
|
|
11
10
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
12
11
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
13
12
|
import { getEmbeddingsGeneralAttributes, getEmbeddingsRequestAttributes, getEmbeddingsResponseAttributes, } from "./otel";
|
|
@@ -22,7 +21,6 @@ export const embeddings = (config) => {
|
|
|
22
21
|
if (!ctx.request || ctx.request.method !== "POST") {
|
|
23
22
|
throw new GatewayError("Method Not Allowed", 405);
|
|
24
23
|
}
|
|
25
|
-
const requestId = resolveRequestId(ctx.request);
|
|
26
24
|
// Parse + validate input.
|
|
27
25
|
try {
|
|
28
26
|
ctx.body = await ctx.request.json();
|
|
@@ -67,7 +65,7 @@ export const embeddings = (config) => {
|
|
|
67
65
|
setSpanAttributes(genAiGeneralAttrs);
|
|
68
66
|
// Convert inputs to AI SDK call options.
|
|
69
67
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
70
|
-
logger.trace({ requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
68
|
+
logger.trace({ requestId: ctx.requestId, options: embedOptions }, "[embeddings] AI SDK options");
|
|
71
69
|
addSpanEvent("hebo.options.prepared");
|
|
72
70
|
setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
|
|
73
71
|
// Build middleware chain (model -> forward params -> provider).
|
|
@@ -83,7 +81,7 @@ export const embeddings = (config) => {
|
|
|
83
81
|
abortSignal: ctx.request.signal,
|
|
84
82
|
...embedOptions,
|
|
85
83
|
});
|
|
86
|
-
logger.trace({ requestId, result }, "[embeddings] AI SDK result");
|
|
84
|
+
logger.trace({ requestId: ctx.requestId, result }, "[embeddings] AI SDK result");
|
|
87
85
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
88
86
|
// Transform result.
|
|
89
87
|
ctx.result = toEmbeddings(result, ctx.modelId);
|
package/dist/lifecycle.js
CHANGED
|
@@ -8,8 +8,7 @@ import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
|
|
|
8
8
|
import { recordV8jsMemory } from "./telemetry/memory";
|
|
9
9
|
import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
|
|
10
10
|
import { wrapStream } from "./telemetry/stream";
|
|
11
|
-
import {
|
|
12
|
-
import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
|
|
11
|
+
import { resolveOrCreateRequestId } from "./utils/request";
|
|
13
12
|
import { prepareResponseInit, toResponse } from "./utils/response";
|
|
14
13
|
export const winterCgHandler = (run, config) => {
|
|
15
14
|
const parsedConfig = parseConfig(config);
|
|
@@ -24,14 +23,13 @@ export const winterCgHandler = (run, config) => {
|
|
|
24
23
|
state: state ?? {},
|
|
25
24
|
providers: parsedConfig.providers,
|
|
26
25
|
models: parsedConfig.models,
|
|
26
|
+
requestId: resolveOrCreateRequestId(request),
|
|
27
27
|
};
|
|
28
|
-
const headers = prepareRequestHeaders(ctx.request);
|
|
29
|
-
if (headers)
|
|
30
|
-
ctx.request = new Request(ctx.request, { headers });
|
|
31
28
|
const span = startSpan(ctx.request.url);
|
|
32
29
|
span.setAttributes(getBaggageAttributes(ctx.request));
|
|
33
30
|
if (!span.isExisting) {
|
|
34
31
|
span.setAttributes(getRequestAttributes(ctx.request, parsedConfig.telemetry?.signals?.http));
|
|
32
|
+
span.setAttributes({ "http.request.id": ctx.requestId });
|
|
35
33
|
}
|
|
36
34
|
const finalize = (status, reason) => {
|
|
37
35
|
if (ctx.operation) {
|
|
@@ -47,8 +45,8 @@ export const winterCgHandler = (run, config) => {
|
|
|
47
45
|
else if (status === 200 && ctx.response?.status)
|
|
48
46
|
realStatus = ctx.response.status;
|
|
49
47
|
if (realStatus !== 200) {
|
|
50
|
-
|
|
51
|
-
requestId:
|
|
48
|
+
logger[realStatus >= 500 ? "error" : "warn"]({
|
|
49
|
+
requestId: ctx.requestId,
|
|
52
50
|
err: reason ?? ctx.request.signal.reason,
|
|
53
51
|
});
|
|
54
52
|
if (realStatus >= 500)
|
|
@@ -65,16 +63,13 @@ export const winterCgHandler = (run, config) => {
|
|
|
65
63
|
if (onRequest instanceof Response) {
|
|
66
64
|
ctx.response = onRequest;
|
|
67
65
|
}
|
|
68
|
-
else if (onRequest) {
|
|
69
|
-
ctx.request = maybeApplyRequestPatch(ctx.request, onRequest);
|
|
70
|
-
}
|
|
71
66
|
}
|
|
72
67
|
if (!ctx.response) {
|
|
73
68
|
ctx.result = (await span.runWithContext(() => run(ctx)));
|
|
74
69
|
if (ctx.result instanceof ReadableStream) {
|
|
75
70
|
ctx.result = wrapStream(ctx.result, { onDone: finalize });
|
|
76
71
|
}
|
|
77
|
-
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.
|
|
72
|
+
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.requestId));
|
|
78
73
|
}
|
|
79
74
|
if (parsedConfig.hooks?.onResponse) {
|
|
80
75
|
const onResponse = await parsedConfig.hooks.onResponse(ctx);
|
|
@@ -91,7 +86,7 @@ export const winterCgHandler = (run, config) => {
|
|
|
91
86
|
catch (error) {
|
|
92
87
|
ctx.response = toOpenAIErrorResponse(ctx.request.signal.aborted
|
|
93
88
|
? new GatewayError(error ?? ctx.request.signal.reason, 499)
|
|
94
|
-
: error, prepareResponseInit(ctx.
|
|
89
|
+
: error, prepareResponseInit(ctx.requestId));
|
|
95
90
|
finalize(ctx.response.status, error);
|
|
96
91
|
}
|
|
97
92
|
return ctx.response ?? new Response("Internal Server Error", { status: 500 });
|
package/dist/telemetry/http.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import {} from "../types";
|
|
2
|
-
import { resolveRequestId } from "../utils/headers";
|
|
3
2
|
const headerArr = (h, k) => (h.has(k) ? [h.get(k)] : undefined);
|
|
4
3
|
export const getRequestAttributes = (request, signalLevel) => {
|
|
5
4
|
if (!signalLevel || signalLevel === "off")
|
|
@@ -26,8 +25,6 @@ export const getRequestAttributes = (request, signalLevel) => {
|
|
|
26
25
|
};
|
|
27
26
|
if (signalLevel !== "required") {
|
|
28
27
|
Object.assign(attrs, {
|
|
29
|
-
// FUTURE: does ElysiaJS and other frameworks attach request id?
|
|
30
|
-
"http.request.id": resolveRequestId(request),
|
|
31
28
|
"user_agent.original": request.headers.get("user-agent") ?? undefined,
|
|
32
29
|
});
|
|
33
30
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -6,19 +6,6 @@ import type { Model, ModelList } from "./endpoints/models";
|
|
|
6
6
|
import type { Logger, LoggerConfig } from "./logger";
|
|
7
7
|
import type { ModelCatalog, ModelId } from "./models/types";
|
|
8
8
|
import type { ProviderId, ProviderRegistry } from "./providers/types";
|
|
9
|
-
/**
|
|
10
|
-
* Request overrides returned from the `onRequest` hook.
|
|
11
|
-
*/
|
|
12
|
-
export type RequestPatch = {
|
|
13
|
-
/**
|
|
14
|
-
* Headers to merge into the incoming request.
|
|
15
|
-
*/
|
|
16
|
-
headers?: HeadersInit;
|
|
17
|
-
/**
|
|
18
|
-
* Body to replace on the incoming request.
|
|
19
|
-
*/
|
|
20
|
-
body?: BodyInit;
|
|
21
|
-
};
|
|
22
9
|
/**
|
|
23
10
|
* Per-request context shared across handlers and hooks.
|
|
24
11
|
*/
|
|
@@ -39,6 +26,10 @@ export type GatewayContext = {
|
|
|
39
26
|
* Incoming request for the handler.
|
|
40
27
|
*/
|
|
41
28
|
request: Request;
|
|
29
|
+
/**
|
|
30
|
+
* Resolved request ID for logging and telemetry.
|
|
31
|
+
*/
|
|
32
|
+
requestId: string;
|
|
42
33
|
/**
|
|
43
34
|
* Parsed body from the request.
|
|
44
35
|
*/
|
|
@@ -80,10 +71,10 @@ export type HookContext = Omit<Readonly<GatewayContext>, "state"> & {
|
|
|
80
71
|
};
|
|
81
72
|
type RequiredHookContext<K extends keyof GatewayContext> = Omit<HookContext, K> & Required<Pick<HookContext, K>>;
|
|
82
73
|
export type OnRequestHookContext = RequiredHookContext<"request">;
|
|
83
|
-
export type BeforeHookContext = RequiredHookContext<"request" | "
|
|
84
|
-
export type ResolveModelHookContext = RequiredHookContext<"request" | "body" | "modelId">;
|
|
85
|
-
export type ResolveProviderHookContext = RequiredHookContext<"request" | "
|
|
86
|
-
export type AfterHookContext = RequiredHookContext<"request" | "
|
|
74
|
+
export type BeforeHookContext = RequiredHookContext<"request" | "operation" | "body">;
|
|
75
|
+
export type ResolveModelHookContext = RequiredHookContext<"request" | "operation" | "body" | "modelId">;
|
|
76
|
+
export type ResolveProviderHookContext = RequiredHookContext<"request" | "operation" | "body" | "modelId" | "resolvedModelId">;
|
|
77
|
+
export type AfterHookContext = RequiredHookContext<"request" | "operation" | "body" | "modelId" | "resolvedModelId" | "provider" | "resolvedProviderId" | "result">;
|
|
87
78
|
export type OnResponseHookContext = RequiredHookContext<"request" | "response">;
|
|
88
79
|
/**
|
|
89
80
|
* Hooks to plugin to the gateway lifecycle.
|
|
@@ -91,10 +82,9 @@ export type OnResponseHookContext = RequiredHookContext<"request" | "response">;
|
|
|
91
82
|
export type GatewayHooks = {
|
|
92
83
|
/**
|
|
93
84
|
* Runs before any endpoint handler logic.
|
|
94
|
-
* @returns Optional
|
|
95
|
-
* or Response to short-circuit the request.
|
|
85
|
+
* @returns Optional Response to short-circuit the request.
|
|
96
86
|
*/
|
|
97
|
-
onRequest?: (ctx: OnRequestHookContext) => void |
|
|
87
|
+
onRequest?: (ctx: OnRequestHookContext) => void | Response | Promise<void | Response>;
|
|
98
88
|
/**
|
|
99
89
|
* Runs after request JSON is parsed and validated for chat completions / embeddings.
|
|
100
90
|
* @returns Replacement parsed body, or undefined to keep original.
|
package/dist/utils/request.d.ts
CHANGED
|
@@ -1,4 +1,2 @@
|
|
|
1
|
-
|
|
2
|
-
export declare const prepareRequestHeaders: (request: Request) => Headers | undefined;
|
|
1
|
+
export declare const resolveOrCreateRequestId: (request: Request) => string;
|
|
3
2
|
export declare const prepareForwardHeaders: (request: Request) => Record<string, string>;
|
|
4
|
-
export declare const maybeApplyRequestPatch: (request: Request, patch: RequestPatch) => Request;
|
package/dist/utils/request.js
CHANGED
|
@@ -1,15 +1,8 @@
|
|
|
1
1
|
import pkg from "../../package.json" with { type: "json" };
|
|
2
|
-
import {
|
|
2
|
+
import { resolveRequestId } from "./headers";
|
|
3
3
|
const GATEWAY_VERSION = pkg.version;
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
if (existingRequestId)
|
|
7
|
-
return;
|
|
8
|
-
const requestId = "req_" + crypto.getRandomValues(new Uint32Array(2)).reduce((s, n) => s + n.toString(36), "");
|
|
9
|
-
const headers = new Headers(request.headers);
|
|
10
|
-
headers.set(REQUEST_ID_HEADER, requestId);
|
|
11
|
-
return headers;
|
|
12
|
-
};
|
|
4
|
+
const createRequestId = () => "req_" + crypto.getRandomValues(new Uint32Array(2)).reduce((s, n) => s + n.toString(36), "");
|
|
5
|
+
export const resolveOrCreateRequestId = (request) => resolveRequestId(request) ?? createRequestId();
|
|
13
6
|
export const prepareForwardHeaders = (request) => {
|
|
14
7
|
const userAgent = request.headers.get("user-agent");
|
|
15
8
|
const appendedUserAgent = userAgent
|
|
@@ -19,19 +12,3 @@ export const prepareForwardHeaders = (request) => {
|
|
|
19
12
|
"user-agent": appendedUserAgent,
|
|
20
13
|
};
|
|
21
14
|
};
|
|
22
|
-
export const maybeApplyRequestPatch = (request, patch) => {
|
|
23
|
-
if (!patch.headers && patch.body === undefined)
|
|
24
|
-
return request;
|
|
25
|
-
if (!patch.headers) {
|
|
26
|
-
// eslint-disable-next-line no-invalid-fetch-options
|
|
27
|
-
return new Request(request, { body: patch.body });
|
|
28
|
-
}
|
|
29
|
-
const headers = new Headers(request.headers);
|
|
30
|
-
for (const [key, value] of new Headers(patch.headers)) {
|
|
31
|
-
headers.set(key, value);
|
|
32
|
-
}
|
|
33
|
-
const init = { headers };
|
|
34
|
-
if (patch.body !== undefined)
|
|
35
|
-
init.body = patch.body;
|
|
36
|
-
return new Request(request, init);
|
|
37
|
-
};
|
package/dist/utils/response.d.ts
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
export declare const prepareResponseInit: (
|
|
1
|
+
export declare const prepareResponseInit: (requestId: string) => ResponseInit;
|
|
2
2
|
export declare const mergeResponseInit: (defaultHeaders: HeadersInit, responseInit?: ResponseInit) => ResponseInit;
|
|
3
3
|
export declare const toResponse: (result: ReadableStream | Uint8Array<ArrayBuffer> | object | string, responseInit?: ResponseInit) => Response;
|
package/dist/utils/response.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { REQUEST_ID_HEADER
|
|
1
|
+
import { REQUEST_ID_HEADER } from "./headers";
|
|
2
2
|
const TEXT_ENCODER = new TextEncoder();
|
|
3
3
|
class JsonToSseTransformStream extends TransformStream {
|
|
4
4
|
constructor() {
|
|
@@ -12,8 +12,8 @@ class JsonToSseTransformStream extends TransformStream {
|
|
|
12
12
|
});
|
|
13
13
|
}
|
|
14
14
|
}
|
|
15
|
-
export const prepareResponseInit = (
|
|
16
|
-
headers: { [REQUEST_ID_HEADER]:
|
|
15
|
+
export const prepareResponseInit = (requestId) => ({
|
|
16
|
+
headers: { [REQUEST_ID_HEADER]: requestId },
|
|
17
17
|
});
|
|
18
18
|
export const mergeResponseInit = (defaultHeaders, responseInit) => {
|
|
19
19
|
const headers = new Headers(defaultHeaders);
|
package/package.json
CHANGED
|
@@ -29,7 +29,6 @@ import {
|
|
|
29
29
|
recordTokenUsage,
|
|
30
30
|
} from "../../telemetry/gen-ai";
|
|
31
31
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
32
|
-
import { resolveRequestId } from "../../utils/headers";
|
|
33
32
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
34
33
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
35
34
|
import {
|
|
@@ -52,8 +51,6 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
52
51
|
throw new GatewayError("Method Not Allowed", 405);
|
|
53
52
|
}
|
|
54
53
|
|
|
55
|
-
const requestId = resolveRequestId(ctx.request);
|
|
56
|
-
|
|
57
54
|
// Parse + validate input.
|
|
58
55
|
try {
|
|
59
56
|
ctx.body = await ctx.request.json();
|
|
@@ -107,7 +104,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
107
104
|
const textOptions = convertToTextCallOptions(inputs);
|
|
108
105
|
logger.trace(
|
|
109
106
|
{
|
|
110
|
-
requestId,
|
|
107
|
+
requestId: ctx.requestId,
|
|
111
108
|
options: textOptions,
|
|
112
109
|
},
|
|
113
110
|
"[chat] AI SDK options",
|
|
@@ -178,7 +175,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
178
175
|
},
|
|
179
176
|
...textOptions,
|
|
180
177
|
});
|
|
181
|
-
logger.trace({ requestId, result }, "[chat] AI SDK result");
|
|
178
|
+
logger.trace({ requestId: ctx.requestId, result }, "[chat] AI SDK result");
|
|
182
179
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
183
180
|
|
|
184
181
|
// Transform result.
|
|
@@ -22,7 +22,6 @@ import {
|
|
|
22
22
|
recordTokenUsage,
|
|
23
23
|
} from "../../telemetry/gen-ai";
|
|
24
24
|
import { addSpanEvent, setSpanAttributes } from "../../telemetry/span";
|
|
25
|
-
import { resolveRequestId } from "../../utils/headers";
|
|
26
25
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
27
26
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
28
27
|
import {
|
|
@@ -45,8 +44,6 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
45
44
|
throw new GatewayError("Method Not Allowed", 405);
|
|
46
45
|
}
|
|
47
46
|
|
|
48
|
-
const requestId = resolveRequestId(ctx.request);
|
|
49
|
-
|
|
50
47
|
// Parse + validate input.
|
|
51
48
|
try {
|
|
52
49
|
ctx.body = await ctx.request.json();
|
|
@@ -98,7 +95,10 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
98
95
|
|
|
99
96
|
// Convert inputs to AI SDK call options.
|
|
100
97
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
101
|
-
logger.trace(
|
|
98
|
+
logger.trace(
|
|
99
|
+
{ requestId: ctx.requestId, options: embedOptions },
|
|
100
|
+
"[embeddings] AI SDK options",
|
|
101
|
+
);
|
|
102
102
|
addSpanEvent("hebo.options.prepared");
|
|
103
103
|
setSpanAttributes(getEmbeddingsRequestAttributes(inputs, genAiSignalLevel));
|
|
104
104
|
|
|
@@ -116,7 +116,7 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
116
116
|
abortSignal: ctx.request.signal,
|
|
117
117
|
...embedOptions,
|
|
118
118
|
});
|
|
119
|
-
logger.trace({ requestId, result }, "[embeddings] AI SDK result");
|
|
119
|
+
logger.trace({ requestId: ctx.requestId, result }, "[embeddings] AI SDK result");
|
|
120
120
|
addSpanEvent("hebo.ai-sdk.completed");
|
|
121
121
|
|
|
122
122
|
// Transform result.
|
package/src/lifecycle.ts
CHANGED
|
@@ -15,8 +15,7 @@ import { getRequestAttributes, getResponseAttributes } from "./telemetry/http";
|
|
|
15
15
|
import { recordV8jsMemory } from "./telemetry/memory";
|
|
16
16
|
import { addSpanEvent, setSpanEventsEnabled, setSpanTracer, startSpan } from "./telemetry/span";
|
|
17
17
|
import { wrapStream } from "./telemetry/stream";
|
|
18
|
-
import {
|
|
19
|
-
import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
|
|
18
|
+
import { resolveOrCreateRequestId } from "./utils/request";
|
|
20
19
|
import { prepareResponseInit, toResponse } from "./utils/response";
|
|
21
20
|
|
|
22
21
|
export const winterCgHandler = (
|
|
@@ -37,15 +36,14 @@ export const winterCgHandler = (
|
|
|
37
36
|
state: state ?? {},
|
|
38
37
|
providers: parsedConfig.providers,
|
|
39
38
|
models: parsedConfig.models,
|
|
39
|
+
requestId: resolveOrCreateRequestId(request),
|
|
40
40
|
};
|
|
41
41
|
|
|
42
|
-
const headers = prepareRequestHeaders(ctx.request);
|
|
43
|
-
if (headers) ctx.request = new Request(ctx.request, { headers });
|
|
44
|
-
|
|
45
42
|
const span = startSpan(ctx.request.url);
|
|
46
43
|
span.setAttributes(getBaggageAttributes(ctx.request));
|
|
47
44
|
if (!span.isExisting) {
|
|
48
45
|
span.setAttributes(getRequestAttributes(ctx.request, parsedConfig.telemetry?.signals?.http));
|
|
46
|
+
span.setAttributes({ "http.request.id": ctx.requestId });
|
|
49
47
|
}
|
|
50
48
|
|
|
51
49
|
const finalize = (status: number, reason?: unknown) => {
|
|
@@ -65,8 +63,8 @@ export const winterCgHandler = (
|
|
|
65
63
|
else if (status === 200 && ctx.response?.status) realStatus = ctx.response.status;
|
|
66
64
|
|
|
67
65
|
if (realStatus !== 200) {
|
|
68
|
-
|
|
69
|
-
requestId:
|
|
66
|
+
logger[realStatus >= 500 ? "error" : "warn"]({
|
|
67
|
+
requestId: ctx.requestId,
|
|
70
68
|
err: reason ?? ctx.request.signal.reason,
|
|
71
69
|
});
|
|
72
70
|
|
|
@@ -86,8 +84,6 @@ export const winterCgHandler = (
|
|
|
86
84
|
|
|
87
85
|
if (onRequest instanceof Response) {
|
|
88
86
|
ctx.response = onRequest;
|
|
89
|
-
} else if (onRequest) {
|
|
90
|
-
ctx.request = maybeApplyRequestPatch(ctx.request, onRequest);
|
|
91
87
|
}
|
|
92
88
|
}
|
|
93
89
|
|
|
@@ -98,7 +94,7 @@ export const winterCgHandler = (
|
|
|
98
94
|
ctx.result = wrapStream(ctx.result, { onDone: finalize });
|
|
99
95
|
}
|
|
100
96
|
|
|
101
|
-
ctx.response = toResponse(ctx.result!, prepareResponseInit(ctx.
|
|
97
|
+
ctx.response = toResponse(ctx.result!, prepareResponseInit(ctx.requestId));
|
|
102
98
|
}
|
|
103
99
|
|
|
104
100
|
if (parsedConfig.hooks?.onResponse) {
|
|
@@ -118,7 +114,7 @@ export const winterCgHandler = (
|
|
|
118
114
|
ctx.request.signal.aborted
|
|
119
115
|
? new GatewayError(error ?? ctx.request.signal.reason, 499)
|
|
120
116
|
: error,
|
|
121
|
-
prepareResponseInit(ctx.
|
|
117
|
+
prepareResponseInit(ctx.requestId),
|
|
122
118
|
);
|
|
123
119
|
finalize(ctx.response.status, error);
|
|
124
120
|
}
|
package/src/telemetry/http.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { type TelemetrySignalLevel } from "../types";
|
|
2
|
-
import { resolveRequestId } from "../utils/headers";
|
|
3
2
|
|
|
4
3
|
const headerArr = (h: Headers, k: string) => (h.has(k) ? [h.get(k)!] : undefined);
|
|
5
4
|
|
|
@@ -29,8 +28,6 @@ export const getRequestAttributes = (request: Request, signalLevel?: TelemetrySi
|
|
|
29
28
|
|
|
30
29
|
if (signalLevel !== "required") {
|
|
31
30
|
Object.assign(attrs, {
|
|
32
|
-
// FUTURE: does ElysiaJS and other frameworks attach request id?
|
|
33
|
-
"http.request.id": resolveRequestId(request),
|
|
34
31
|
"user_agent.original": request.headers.get("user-agent") ?? undefined,
|
|
35
32
|
});
|
|
36
33
|
}
|
package/src/types.ts
CHANGED
|
@@ -12,20 +12,6 @@ import type { Logger, LoggerConfig } from "./logger";
|
|
|
12
12
|
import type { ModelCatalog, ModelId } from "./models/types";
|
|
13
13
|
import type { ProviderId, ProviderRegistry } from "./providers/types";
|
|
14
14
|
|
|
15
|
-
/**
|
|
16
|
-
* Request overrides returned from the `onRequest` hook.
|
|
17
|
-
*/
|
|
18
|
-
export type RequestPatch = {
|
|
19
|
-
/**
|
|
20
|
-
* Headers to merge into the incoming request.
|
|
21
|
-
*/
|
|
22
|
-
headers?: HeadersInit;
|
|
23
|
-
/**
|
|
24
|
-
* Body to replace on the incoming request.
|
|
25
|
-
*/
|
|
26
|
-
body?: BodyInit;
|
|
27
|
-
};
|
|
28
|
-
|
|
29
15
|
/**
|
|
30
16
|
* Per-request context shared across handlers and hooks.
|
|
31
17
|
*/
|
|
@@ -46,6 +32,10 @@ export type GatewayContext = {
|
|
|
46
32
|
* Incoming request for the handler.
|
|
47
33
|
*/
|
|
48
34
|
request: Request;
|
|
35
|
+
/**
|
|
36
|
+
* Resolved request ID for logging and telemetry.
|
|
37
|
+
*/
|
|
38
|
+
requestId: string;
|
|
49
39
|
/**
|
|
50
40
|
* Parsed body from the request.
|
|
51
41
|
*/
|
|
@@ -95,13 +85,22 @@ export type HookContext = Omit<Readonly<GatewayContext>, "state"> & {
|
|
|
95
85
|
type RequiredHookContext<K extends keyof GatewayContext> = Omit<HookContext, K> &
|
|
96
86
|
Required<Pick<HookContext, K>>;
|
|
97
87
|
export type OnRequestHookContext = RequiredHookContext<"request">;
|
|
98
|
-
export type BeforeHookContext = RequiredHookContext<"request" | "
|
|
99
|
-
export type ResolveModelHookContext = RequiredHookContext<
|
|
88
|
+
export type BeforeHookContext = RequiredHookContext<"request" | "operation" | "body">;
|
|
89
|
+
export type ResolveModelHookContext = RequiredHookContext<
|
|
90
|
+
"request" | "operation" | "body" | "modelId"
|
|
91
|
+
>;
|
|
100
92
|
export type ResolveProviderHookContext = RequiredHookContext<
|
|
101
|
-
"request" | "
|
|
93
|
+
"request" | "operation" | "body" | "modelId" | "resolvedModelId"
|
|
102
94
|
>;
|
|
103
95
|
export type AfterHookContext = RequiredHookContext<
|
|
104
|
-
|
|
96
|
+
| "request"
|
|
97
|
+
| "operation"
|
|
98
|
+
| "body"
|
|
99
|
+
| "modelId"
|
|
100
|
+
| "resolvedModelId"
|
|
101
|
+
| "provider"
|
|
102
|
+
| "resolvedProviderId"
|
|
103
|
+
| "result"
|
|
105
104
|
>;
|
|
106
105
|
export type OnResponseHookContext = RequiredHookContext<"request" | "response">;
|
|
107
106
|
|
|
@@ -111,12 +110,9 @@ export type OnResponseHookContext = RequiredHookContext<"request" | "response">;
|
|
|
111
110
|
export type GatewayHooks = {
|
|
112
111
|
/**
|
|
113
112
|
* Runs before any endpoint handler logic.
|
|
114
|
-
* @returns Optional
|
|
115
|
-
* or Response to short-circuit the request.
|
|
113
|
+
* @returns Optional Response to short-circuit the request.
|
|
116
114
|
*/
|
|
117
|
-
onRequest?: (
|
|
118
|
-
ctx: OnRequestHookContext,
|
|
119
|
-
) => void | RequestPatch | Response | Promise<void | RequestPatch | Response>;
|
|
115
|
+
onRequest?: (ctx: OnRequestHookContext) => void | Response | Promise<void | Response>;
|
|
120
116
|
/**
|
|
121
117
|
* Runs after request JSON is parsed and validated for chat completions / embeddings.
|
|
122
118
|
* @returns Replacement parsed body, or undefined to keep original.
|
package/src/utils/request.ts
CHANGED
|
@@ -1,22 +1,13 @@
|
|
|
1
|
-
import type { RequestPatch } from "../types";
|
|
2
|
-
|
|
3
1
|
import pkg from "../../package.json" with { type: "json" };
|
|
4
|
-
import {
|
|
2
|
+
import { resolveRequestId } from "./headers";
|
|
5
3
|
|
|
6
4
|
const GATEWAY_VERSION = pkg.version;
|
|
7
5
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
if (existingRequestId) return;
|
|
11
|
-
|
|
12
|
-
const requestId =
|
|
13
|
-
"req_" + crypto.getRandomValues(new Uint32Array(2)).reduce((s, n) => s + n.toString(36), "");
|
|
6
|
+
const createRequestId = () =>
|
|
7
|
+
"req_" + crypto.getRandomValues(new Uint32Array(2)).reduce((s, n) => s + n.toString(36), "");
|
|
14
8
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
return headers;
|
|
19
|
-
};
|
|
9
|
+
export const resolveOrCreateRequestId = (request: Request) =>
|
|
10
|
+
resolveRequestId(request) ?? createRequestId();
|
|
20
11
|
|
|
21
12
|
export const prepareForwardHeaders = (request: Request): Record<string, string> => {
|
|
22
13
|
const userAgent = request.headers.get("user-agent");
|
|
@@ -28,22 +19,3 @@ export const prepareForwardHeaders = (request: Request): Record<string, string>
|
|
|
28
19
|
"user-agent": appendedUserAgent,
|
|
29
20
|
};
|
|
30
21
|
};
|
|
31
|
-
|
|
32
|
-
export const maybeApplyRequestPatch = (request: Request, patch: RequestPatch) => {
|
|
33
|
-
if (!patch.headers && patch.body === undefined) return request;
|
|
34
|
-
|
|
35
|
-
if (!patch.headers) {
|
|
36
|
-
// eslint-disable-next-line no-invalid-fetch-options
|
|
37
|
-
return new Request(request, { body: patch.body });
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
const headers = new Headers(request.headers);
|
|
41
|
-
for (const [key, value] of new Headers(patch.headers)) {
|
|
42
|
-
headers.set(key, value);
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
const init: RequestInit = { headers };
|
|
46
|
-
if (patch.body !== undefined) init.body = patch.body;
|
|
47
|
-
|
|
48
|
-
return new Request(request, init);
|
|
49
|
-
};
|
package/src/utils/response.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { REQUEST_ID_HEADER
|
|
1
|
+
import { REQUEST_ID_HEADER } from "./headers";
|
|
2
2
|
|
|
3
3
|
const TEXT_ENCODER = new TextEncoder();
|
|
4
4
|
|
|
@@ -15,8 +15,8 @@ class JsonToSseTransformStream extends TransformStream<unknown, string> {
|
|
|
15
15
|
}
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
export const prepareResponseInit = (
|
|
19
|
-
headers: { [REQUEST_ID_HEADER]:
|
|
18
|
+
export const prepareResponseInit = (requestId: string): ResponseInit => ({
|
|
19
|
+
headers: { [REQUEST_ID_HEADER]: requestId },
|
|
20
20
|
});
|
|
21
21
|
|
|
22
22
|
export const mergeResponseInit = (
|