@hebo-ai/gateway 0.3.0-rc.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/endpoints/chat-completions/handler.js +15 -4
- package/dist/endpoints/embeddings/handler.js +6 -2
- package/dist/errors/openai.js +13 -2
- package/dist/errors/utils.d.ts +0 -1
- package/dist/errors/utils.js +2 -5
- package/dist/lifecycle.js +14 -8
- package/dist/logger/default.js +10 -15
- package/dist/logger/index.d.ts +0 -1
- package/dist/telemetry/access-log.js +11 -8
- package/dist/telemetry/fetch.d.ts +1 -0
- package/dist/telemetry/fetch.js +16 -0
- package/dist/telemetry/perf.d.ts +11 -0
- package/dist/telemetry/perf.js +60 -0
- package/dist/utils/headers.d.ts +4 -0
- package/dist/utils/headers.js +24 -0
- package/dist/utils/request.js +4 -3
- package/dist/utils/response.d.ts +1 -0
- package/dist/utils/response.js +18 -6
- package/package.json +1 -1
- package/src/endpoints/chat-completions/handler.ts +15 -7
- package/src/endpoints/embeddings/handler.ts +6 -2
- package/src/errors/openai.ts +15 -2
- package/src/errors/utils.ts +2 -5
- package/src/lifecycle.ts +14 -8
- package/src/logger/default.ts +13 -15
- package/src/logger/index.ts +0 -1
- package/src/telemetry/access-log.ts +14 -12
- package/src/telemetry/fetch.ts +24 -0
- package/src/telemetry/perf.ts +89 -0
- package/src/utils/headers.ts +38 -0
- package/src/utils/request.ts +4 -3
- package/src/utils/response.ts +21 -6
|
@@ -5,6 +5,8 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
+
import { markPerf } from "../../telemetry/perf";
|
|
9
|
+
import { resolveRequestId } from "../../utils/headers";
|
|
8
10
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
9
11
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
10
12
|
import { ChatCompletionsBodySchema } from "./schema";
|
|
@@ -50,7 +52,7 @@ export const chatCompletions = (config) => {
|
|
|
50
52
|
// Convert inputs to AI SDK call options.
|
|
51
53
|
const textOptions = convertToTextCallOptions(inputs);
|
|
52
54
|
logger.trace({
|
|
53
|
-
requestId: ctx.request
|
|
55
|
+
requestId: resolveRequestId(ctx.request),
|
|
54
56
|
options: textOptions,
|
|
55
57
|
}, "[chat] AI SDK options");
|
|
56
58
|
// Build middleware chain (model -> forward params -> provider).
|
|
@@ -59,6 +61,7 @@ export const chatCompletions = (config) => {
|
|
|
59
61
|
middleware: modelMiddlewareMatcher.for(ctx.resolvedModelId, languageModel.provider),
|
|
60
62
|
});
|
|
61
63
|
// Execute request (streaming vs. non-streaming).
|
|
64
|
+
markPerf(ctx.request, "aiSdkStart");
|
|
62
65
|
if (stream) {
|
|
63
66
|
const result = streamText({
|
|
64
67
|
model: languageModelWithMiddleware,
|
|
@@ -66,33 +69,41 @@ export const chatCompletions = (config) => {
|
|
|
66
69
|
// No abort signal here, otherwise we can't detect upstream from client cancellations
|
|
67
70
|
// abortSignal: ctx.request.signal,
|
|
68
71
|
onError: ({ error }) => {
|
|
69
|
-
logger.error(
|
|
70
|
-
requestId: ctx.request
|
|
72
|
+
logger.error({
|
|
73
|
+
requestId: resolveRequestId(ctx.request),
|
|
74
|
+
err: error instanceof Error ? error : new Error(String(error)),
|
|
71
75
|
});
|
|
72
76
|
throw error;
|
|
73
77
|
},
|
|
74
78
|
onAbort: () => {
|
|
75
79
|
throw new DOMException("Upstream failed", "AbortError");
|
|
76
80
|
},
|
|
81
|
+
timeout: {
|
|
82
|
+
chunkMs: 5 * 60 * 1000,
|
|
83
|
+
},
|
|
77
84
|
experimental_include: {
|
|
78
85
|
requestBody: false,
|
|
79
86
|
},
|
|
80
87
|
includeRawChunks: false,
|
|
81
88
|
...textOptions,
|
|
82
89
|
});
|
|
90
|
+
markPerf(ctx.request, "aiSdkEnd");
|
|
83
91
|
return toChatCompletionsStream(result, ctx.modelId);
|
|
84
92
|
}
|
|
85
93
|
const result = await generateText({
|
|
86
94
|
model: languageModelWithMiddleware,
|
|
87
95
|
headers: prepareForwardHeaders(ctx.request),
|
|
96
|
+
// FUTURE: currently can't tell whether upstream or downstream abort
|
|
88
97
|
abortSignal: ctx.request.signal,
|
|
89
98
|
experimental_include: {
|
|
90
99
|
requestBody: false,
|
|
91
100
|
responseBody: false,
|
|
92
101
|
},
|
|
102
|
+
timeout: 5 * 60 * 1000,
|
|
93
103
|
...textOptions,
|
|
94
104
|
});
|
|
95
|
-
|
|
105
|
+
markPerf(ctx.request, "aiSdkEnd");
|
|
106
|
+
logger.trace({ requestId: resolveRequestId(ctx.request), result }, "[chat] AI SDK result");
|
|
96
107
|
return toChatCompletions(result, ctx.modelId);
|
|
97
108
|
};
|
|
98
109
|
return { handler: winterCgHandler(handler, config) };
|
|
@@ -5,6 +5,8 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
5
5
|
import { logger } from "../../logger";
|
|
6
6
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
7
7
|
import { resolveProvider } from "../../providers/registry";
|
|
8
|
+
import { markPerf } from "../../telemetry/perf";
|
|
9
|
+
import { resolveRequestId } from "../../utils/headers";
|
|
8
10
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
9
11
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
10
12
|
import { EmbeddingsBodySchema } from "./schema";
|
|
@@ -49,20 +51,22 @@ export const embeddings = (config) => {
|
|
|
49
51
|
logger.debug(`[embeddings] using ${embeddingModel.provider} for ${ctx.resolvedModelId}`);
|
|
50
52
|
// Convert inputs to AI SDK call options.
|
|
51
53
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
52
|
-
logger.trace({ requestId: ctx.request
|
|
54
|
+
logger.trace({ requestId: resolveRequestId(ctx.request), options: embedOptions }, "[embeddings] AI SDK options");
|
|
53
55
|
// Build middleware chain (model -> forward params -> provider).
|
|
54
56
|
const embeddingModelWithMiddleware = wrapEmbeddingModel({
|
|
55
57
|
model: embeddingModel,
|
|
56
58
|
middleware: modelMiddlewareMatcher.forEmbedding(ctx.resolvedModelId, embeddingModel.provider),
|
|
57
59
|
});
|
|
58
60
|
// Execute request.
|
|
61
|
+
markPerf(ctx.request, "aiSdkStart");
|
|
59
62
|
const result = await embedMany({
|
|
60
63
|
model: embeddingModelWithMiddleware,
|
|
61
64
|
headers: prepareForwardHeaders(ctx.request),
|
|
62
65
|
abortSignal: ctx.request.signal,
|
|
63
66
|
...embedOptions,
|
|
64
67
|
});
|
|
65
|
-
|
|
68
|
+
markPerf(ctx.request, "aiSdkEnd");
|
|
69
|
+
logger.trace({ requestId: resolveRequestId(ctx.request), result }, "[embeddings] AI SDK result");
|
|
66
70
|
return toEmbeddings(result, ctx.modelId);
|
|
67
71
|
};
|
|
68
72
|
return { handler: winterCgHandler(handler, config) };
|
package/dist/errors/openai.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
|
+
import { isProduction } from "../utils/env";
|
|
3
|
+
import { resolveRequestId } from "../utils/headers";
|
|
2
4
|
import { toResponse } from "../utils/response";
|
|
3
|
-
import { getErrorMeta } from "./utils";
|
|
5
|
+
import { getErrorMeta, STATUS_CODE } from "./utils";
|
|
4
6
|
export const OpenAIErrorSchema = z.object({
|
|
5
7
|
error: z.object({
|
|
6
8
|
message: z.string(),
|
|
@@ -21,7 +23,16 @@ export function toOpenAIError(error) {
|
|
|
21
23
|
}
|
|
22
24
|
export function toOpenAIErrorResponse(error, responseInit) {
|
|
23
25
|
const meta = getErrorMeta(error);
|
|
24
|
-
|
|
26
|
+
const shouldMask = isProduction() && (meta.status >= 500 || meta.code.includes("UPSTREAM"));
|
|
27
|
+
let message;
|
|
28
|
+
if (shouldMask) {
|
|
29
|
+
const requestId = resolveRequestId(responseInit);
|
|
30
|
+
message = `${STATUS_CODE(meta.status)} (${requestId})`;
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
message = meta.message;
|
|
34
|
+
}
|
|
35
|
+
return toResponse(new OpenAIError(message, meta.type, meta.code), {
|
|
25
36
|
...responseInit,
|
|
26
37
|
status: meta.status,
|
|
27
38
|
statusText: meta.code,
|
package/dist/errors/utils.d.ts
CHANGED
package/dist/errors/utils.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { isProduction } from "../utils/env";
|
|
2
1
|
import { normalizeAiSdkError } from "./ai-sdk";
|
|
3
2
|
import { GatewayError } from "./gateway";
|
|
4
3
|
export const STATUS_CODES = {
|
|
@@ -23,7 +22,7 @@ export const STATUS_CODE = (status) => {
|
|
|
23
22
|
return status >= 400 && status < 500 ? STATUS_CODES[400] : STATUS_CODES[500];
|
|
24
23
|
};
|
|
25
24
|
export function getErrorMeta(error) {
|
|
26
|
-
const
|
|
25
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
27
26
|
let code;
|
|
28
27
|
let status;
|
|
29
28
|
let param = "";
|
|
@@ -41,7 +40,5 @@ export function getErrorMeta(error) {
|
|
|
41
40
|
}
|
|
42
41
|
}
|
|
43
42
|
const type = status < 500 ? "invalid_request_error" : "server_error";
|
|
44
|
-
|
|
45
|
-
const message = shouldMask ? STATUS_CODE(status) : rawMessage;
|
|
46
|
-
return { code, status, param, type, message, rawMessage };
|
|
43
|
+
return { code, status, param, type, message };
|
|
47
44
|
}
|
package/dist/lifecycle.js
CHANGED
|
@@ -2,15 +2,13 @@ import { parseConfig } from "./config";
|
|
|
2
2
|
import { toOpenAIErrorResponse } from "./errors/openai";
|
|
3
3
|
import { isLoggerDisabled, logger } from "./logger";
|
|
4
4
|
import { withAccessLog } from "./telemetry/access-log";
|
|
5
|
+
import { resolveRequestId } from "./utils/headers";
|
|
5
6
|
import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
|
|
6
|
-
import { toResponse } from "./utils/response";
|
|
7
|
+
import { prepareResponseInit, toResponse } from "./utils/response";
|
|
7
8
|
export const winterCgHandler = (run, config) => {
|
|
8
9
|
const parsedConfig = parseConfig(config);
|
|
9
10
|
const core = async (ctx) => {
|
|
10
11
|
try {
|
|
11
|
-
const headers = prepareRequestHeaders(ctx.request);
|
|
12
|
-
if (headers)
|
|
13
|
-
ctx.request = new Request(ctx.request, { headers });
|
|
14
12
|
const before = await parsedConfig.hooks?.before?.(ctx);
|
|
15
13
|
if (before) {
|
|
16
14
|
if (before instanceof Response) {
|
|
@@ -23,13 +21,18 @@ export const winterCgHandler = (run, config) => {
|
|
|
23
21
|
const after = await parsedConfig.hooks?.after?.(ctx);
|
|
24
22
|
if (after)
|
|
25
23
|
ctx.result = after;
|
|
26
|
-
|
|
24
|
+
if (ctx.result instanceof Response) {
|
|
25
|
+
ctx.response = ctx.result;
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.request));
|
|
27
29
|
}
|
|
28
30
|
catch (error) {
|
|
29
|
-
logger.error(
|
|
30
|
-
requestId: ctx.request
|
|
31
|
+
logger.error({
|
|
32
|
+
requestId: resolveRequestId(ctx.request),
|
|
33
|
+
err: error instanceof Error ? error : new Error(String(error)),
|
|
31
34
|
});
|
|
32
|
-
ctx.response = toOpenAIErrorResponse(error);
|
|
35
|
+
ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
|
|
33
36
|
}
|
|
34
37
|
};
|
|
35
38
|
const handler = isLoggerDisabled(parsedConfig.logger) ? core : withAccessLog(core);
|
|
@@ -40,6 +43,9 @@ export const winterCgHandler = (run, config) => {
|
|
|
40
43
|
providers: parsedConfig.providers,
|
|
41
44
|
models: parsedConfig.models,
|
|
42
45
|
};
|
|
46
|
+
const headers = prepareRequestHeaders(ctx.request);
|
|
47
|
+
if (headers)
|
|
48
|
+
ctx.request = new Request(ctx.request, { headers });
|
|
43
49
|
await handler(ctx);
|
|
44
50
|
return ctx.response ?? new Response("Internal Server Error", { status: 500 });
|
|
45
51
|
};
|
package/dist/logger/default.js
CHANGED
|
@@ -15,39 +15,34 @@ const isRecord = (value) => typeof value === "object" && value !== null && !(val
|
|
|
15
15
|
const buildLogObject = (level, args) => {
|
|
16
16
|
if (args.length === 0)
|
|
17
17
|
return {};
|
|
18
|
-
const [first, second
|
|
18
|
+
const [first, second] = args;
|
|
19
19
|
let obj;
|
|
20
20
|
let err;
|
|
21
21
|
let msg;
|
|
22
22
|
if (first instanceof Error) {
|
|
23
23
|
err = first;
|
|
24
|
-
if (isRecord(second)) {
|
|
25
|
-
obj = second;
|
|
26
|
-
if (third !== undefined) {
|
|
27
|
-
msg = String(third);
|
|
28
|
-
}
|
|
29
|
-
}
|
|
30
|
-
else if (second !== undefined) {
|
|
31
|
-
msg = String(second);
|
|
32
|
-
}
|
|
33
24
|
}
|
|
34
25
|
else if (isRecord(first)) {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
26
|
+
if (first["err"] !== undefined) {
|
|
27
|
+
err = first["err"];
|
|
28
|
+
delete first["err"];
|
|
38
29
|
}
|
|
30
|
+
obj = first;
|
|
39
31
|
}
|
|
40
32
|
else {
|
|
41
33
|
msg = String(first);
|
|
42
34
|
}
|
|
35
|
+
if (second !== undefined) {
|
|
36
|
+
msg = String(second);
|
|
37
|
+
}
|
|
43
38
|
if (err && msg === undefined) {
|
|
44
|
-
msg = err.message;
|
|
39
|
+
msg = err instanceof Error ? err.message : String(err);
|
|
45
40
|
}
|
|
46
41
|
return {
|
|
47
42
|
level,
|
|
48
43
|
time: Date.now(),
|
|
49
44
|
...(msg ? { msg } : {}),
|
|
50
|
-
...(err ? { err: serializeError(err) } : {}),
|
|
45
|
+
...(err ? { err: err instanceof Error ? serializeError(err) : err } : {}),
|
|
51
46
|
...obj,
|
|
52
47
|
};
|
|
53
48
|
};
|
package/dist/logger/index.d.ts
CHANGED
|
@@ -2,7 +2,6 @@ export type LogFn = {
|
|
|
2
2
|
(msg: string): void;
|
|
3
3
|
(obj: Record<string, unknown>, msg?: string): void;
|
|
4
4
|
(err: Error, msg?: string): void;
|
|
5
|
-
(err: Error, obj?: Record<string, unknown>, msg?: string): void;
|
|
6
5
|
};
|
|
7
6
|
export type Logger = Record<"trace" | "debug" | "info" | "warn" | "error", LogFn>;
|
|
8
7
|
export type LogLevel = "trace" | "debug" | "info" | "warn" | "error" | "silent";
|
|
@@ -1,26 +1,27 @@
|
|
|
1
1
|
import { logger } from "../logger";
|
|
2
|
+
import { resolveRequestId } from "../utils/headers";
|
|
3
|
+
import { clearPerf, getMemoryMeta, getPerfMeta, initPerf, markPerf } from "./perf";
|
|
2
4
|
import { instrumentStream } from "./stream";
|
|
3
5
|
import { getAIMeta, getRequestMeta, getResponseMeta } from "./utils";
|
|
4
6
|
export const withAccessLog = (run) => async (ctx) => {
|
|
5
|
-
|
|
7
|
+
initPerf(ctx.request);
|
|
6
8
|
const requestBytes = (() => {
|
|
7
9
|
const n = Number(ctx.request.headers.get("content-length"));
|
|
8
10
|
return Number.isFinite(n) ? n : undefined;
|
|
9
11
|
})();
|
|
10
12
|
const logAccess = (status, stats) => {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
+
if (!stats)
|
|
14
|
+
markPerf(ctx.request, "responseTime");
|
|
15
|
+
markPerf(ctx.request, "totalDuration");
|
|
13
16
|
const requestMeta = getRequestMeta(ctx.request);
|
|
14
17
|
const responseMeta = getResponseMeta(ctx.response);
|
|
15
18
|
const meta = {
|
|
16
|
-
requestId: ctx.request
|
|
19
|
+
requestId: resolveRequestId(ctx.request),
|
|
17
20
|
ai: getAIMeta(ctx),
|
|
18
21
|
request: requestMeta,
|
|
19
22
|
response: responseMeta,
|
|
20
|
-
timings:
|
|
21
|
-
|
|
22
|
-
responseTime: responseTime ?? totalDuration,
|
|
23
|
-
},
|
|
23
|
+
timings: getPerfMeta(ctx.request),
|
|
24
|
+
memory: getMemoryMeta(ctx.request),
|
|
24
25
|
bytes: {
|
|
25
26
|
in: requestBytes,
|
|
26
27
|
out: stats?.bytes ?? responseMeta["contentLength"],
|
|
@@ -29,6 +30,7 @@ export const withAccessLog = (run) => async (ctx) => {
|
|
|
29
30
|
const realStatus = status === 200 ? (ctx.response?.status ?? status) : status;
|
|
30
31
|
const msg = `${ctx.request.method} ${requestMeta["path"]} ${realStatus}`;
|
|
31
32
|
logger.info(meta, msg);
|
|
33
|
+
clearPerf(ctx.request);
|
|
32
34
|
};
|
|
33
35
|
await run(ctx);
|
|
34
36
|
if (ctx.response.body instanceof ReadableStream) {
|
|
@@ -40,6 +42,7 @@ export const withAccessLog = (run) => async (ctx) => {
|
|
|
40
42
|
statusText: ctx.response.statusText,
|
|
41
43
|
headers: ctx.response.headers,
|
|
42
44
|
});
|
|
45
|
+
markPerf(ctx.request, "responseTime");
|
|
43
46
|
return;
|
|
44
47
|
}
|
|
45
48
|
logAccess(ctx.response.status);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const initFetch: () => void;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { markPerf, markPerfOnce } from "./perf";
|
|
2
|
+
const ORIGINAL_FETCH_KEY = Symbol.for("@hebo/fetch/original-fetch");
|
|
3
|
+
const g = globalThis;
|
|
4
|
+
const perfFetch = async (input, init) => {
|
|
5
|
+
const original = g[ORIGINAL_FETCH_KEY];
|
|
6
|
+
markPerfOnce(init ?? input, "fetchStart");
|
|
7
|
+
const response = await original(input, init);
|
|
8
|
+
markPerf(init ?? input, "fetchEnd");
|
|
9
|
+
return response;
|
|
10
|
+
};
|
|
11
|
+
export const initFetch = () => {
|
|
12
|
+
if (g[ORIGINAL_FETCH_KEY])
|
|
13
|
+
return;
|
|
14
|
+
g[ORIGINAL_FETCH_KEY] = globalThis.fetch.bind(globalThis);
|
|
15
|
+
globalThis.fetch = perfFetch;
|
|
16
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
type RequestIdSource = string | URL | Request | RequestInit;
|
|
2
|
+
export declare const initPerf: (source: RequestIdSource) => void;
|
|
3
|
+
export declare const markPerf: (source: RequestIdSource, name: string) => number | undefined;
|
|
4
|
+
export declare const markPerfOnce: (source: RequestIdSource, name: string) => number | undefined;
|
|
5
|
+
export declare const clearPerf: (source: RequestIdSource) => void;
|
|
6
|
+
export declare const getPerfMeta: (source: RequestIdSource) => Record<string, number>;
|
|
7
|
+
export declare const getMemoryMeta: (source: RequestIdSource) => {
|
|
8
|
+
total: number | undefined;
|
|
9
|
+
request: number;
|
|
10
|
+
} | undefined;
|
|
11
|
+
export {};
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { resolveRequestId } from "../utils/headers";
|
|
2
|
+
import { initFetch } from "./fetch";
|
|
3
|
+
const REQ_PERF_KEY = Symbol.for("@hebo/perf/by-request");
|
|
4
|
+
const g = globalThis;
|
|
5
|
+
const perfByRequestId = (g[REQ_PERF_KEY] ??= new Map());
|
|
6
|
+
const toMb = (bytes) => +(bytes / (1024 * 1024)).toFixed(2);
|
|
7
|
+
const mem = () => process?.memoryUsage?.();
|
|
8
|
+
const samplePeakMemory = (perf) => {
|
|
9
|
+
const heapUsed = mem()?.heapUsed;
|
|
10
|
+
if (perf.memory && heapUsed && heapUsed > perf.memory.peakHeapUsed)
|
|
11
|
+
perf.memory.peakHeapUsed = heapUsed;
|
|
12
|
+
};
|
|
13
|
+
const getPerfStore = (source) => {
|
|
14
|
+
const id = resolveRequestId(source);
|
|
15
|
+
return id ? perfByRequestId.get(id) : undefined;
|
|
16
|
+
};
|
|
17
|
+
export const initPerf = (source) => {
|
|
18
|
+
initFetch();
|
|
19
|
+
const id = resolveRequestId(source);
|
|
20
|
+
if (!id || perfByRequestId.has(id))
|
|
21
|
+
return;
|
|
22
|
+
const heapUsed = mem()?.heapUsed;
|
|
23
|
+
perfByRequestId.set(id, {
|
|
24
|
+
timers: {},
|
|
25
|
+
origin: performance.now(),
|
|
26
|
+
// eslint-disable-next-line eqeqeq
|
|
27
|
+
memory: heapUsed == null ? undefined : { steadyHeapUsed: heapUsed, peakHeapUsed: heapUsed },
|
|
28
|
+
});
|
|
29
|
+
};
|
|
30
|
+
const mark = (source, name, once) => {
|
|
31
|
+
const perf = getPerfStore(source);
|
|
32
|
+
if (!perf)
|
|
33
|
+
return;
|
|
34
|
+
const existing = perf.timers[name];
|
|
35
|
+
if (once && existing !== undefined)
|
|
36
|
+
return existing;
|
|
37
|
+
const value = +(performance.now() - perf.origin).toFixed(2);
|
|
38
|
+
perf.timers[name] = value;
|
|
39
|
+
samplePeakMemory(perf);
|
|
40
|
+
return value;
|
|
41
|
+
};
|
|
42
|
+
export const markPerf = (source, name) => mark(source, name, false);
|
|
43
|
+
export const markPerfOnce = (source, name) => mark(source, name, true);
|
|
44
|
+
export const clearPerf = (source) => {
|
|
45
|
+
const id = resolveRequestId(source);
|
|
46
|
+
if (id)
|
|
47
|
+
perfByRequestId.delete(id);
|
|
48
|
+
};
|
|
49
|
+
export const getPerfMeta = (source) => getPerfStore(source)?.timers ?? {};
|
|
50
|
+
export const getMemoryMeta = (source) => {
|
|
51
|
+
const perf = getPerfStore(source);
|
|
52
|
+
if (!perf?.memory)
|
|
53
|
+
return;
|
|
54
|
+
samplePeakMemory(perf);
|
|
55
|
+
const memory = mem();
|
|
56
|
+
return {
|
|
57
|
+
total: memory ? toMb(memory.rss) : undefined,
|
|
58
|
+
request: toMb(perf.memory.peakHeapUsed - perf.memory.steadyHeapUsed),
|
|
59
|
+
};
|
|
60
|
+
};
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export declare const REQUEST_ID_HEADER = "x-request-id";
|
|
2
|
+
type HeaderSource = string | URL | Headers | Request | Response | RequestInit | ResponseInit | HeadersInit | undefined;
|
|
3
|
+
export declare const resolveRequestId: (source: HeaderSource) => string | undefined;
|
|
4
|
+
export {};
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
export const REQUEST_ID_HEADER = "x-request-id";
|
|
2
|
+
export const resolveRequestId = (source) => {
|
|
3
|
+
if (!source || typeof source === "string" || source instanceof URL)
|
|
4
|
+
return undefined;
|
|
5
|
+
if (source instanceof Request || source instanceof Response) {
|
|
6
|
+
return source.headers.get(REQUEST_ID_HEADER) ?? undefined;
|
|
7
|
+
}
|
|
8
|
+
const headers = "headers" in source ? source.headers : source;
|
|
9
|
+
if (!headers || typeof headers === "string")
|
|
10
|
+
return undefined;
|
|
11
|
+
if (Object.getPrototypeOf(headers) === Object.prototype) {
|
|
12
|
+
return headers[REQUEST_ID_HEADER] ?? undefined;
|
|
13
|
+
}
|
|
14
|
+
if (headers instanceof Headers)
|
|
15
|
+
return headers.get(REQUEST_ID_HEADER) ?? undefined;
|
|
16
|
+
if (Array.isArray(headers)) {
|
|
17
|
+
for (const [key, value] of headers) {
|
|
18
|
+
if (key.toLowerCase() === REQUEST_ID_HEADER)
|
|
19
|
+
return value;
|
|
20
|
+
}
|
|
21
|
+
return undefined;
|
|
22
|
+
}
|
|
23
|
+
return undefined;
|
|
24
|
+
};
|
package/dist/utils/request.js
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import pkg from "../../package.json" with { type: "json" };
|
|
2
|
+
import { REQUEST_ID_HEADER } from "./headers";
|
|
2
3
|
const GATEWAY_VERSION = pkg.version;
|
|
3
4
|
export const prepareRequestHeaders = (request) => {
|
|
4
|
-
const existingRequestId = request.headers.get(
|
|
5
|
+
const existingRequestId = request.headers.get(REQUEST_ID_HEADER);
|
|
5
6
|
if (existingRequestId)
|
|
6
7
|
return;
|
|
7
8
|
const requestId = request.headers.get("x-correlation-id") ??
|
|
8
9
|
request.headers.get("x-trace-id") ??
|
|
9
10
|
crypto.randomUUID();
|
|
10
11
|
const headers = new Headers(request.headers);
|
|
11
|
-
headers.set(
|
|
12
|
+
headers.set(REQUEST_ID_HEADER, requestId);
|
|
12
13
|
return headers;
|
|
13
14
|
};
|
|
14
15
|
export const prepareRequestBody = async (request) => {
|
|
@@ -26,7 +27,7 @@ export const prepareForwardHeaders = (request) => {
|
|
|
26
27
|
? `${userAgent} @hebo-ai/gateway/${GATEWAY_VERSION}`
|
|
27
28
|
: `@hebo-ai/gateway/${GATEWAY_VERSION}`;
|
|
28
29
|
return {
|
|
29
|
-
|
|
30
|
+
[REQUEST_ID_HEADER]: request.headers.get(REQUEST_ID_HEADER),
|
|
30
31
|
"user-agent": appendedUserAgent,
|
|
31
32
|
};
|
|
32
33
|
};
|
package/dist/utils/response.d.ts
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
1
|
+
export declare const prepareResponseInit: (request: Request) => ResponseInit;
|
|
1
2
|
export declare const mergeResponseInit: (defaultHeaders: HeadersInit, responseInit?: ResponseInit) => ResponseInit;
|
|
2
3
|
export declare const toResponse: (result: ReadableStream<Uint8Array> | Uint8Array<ArrayBuffer> | object | string, responseInit?: ResponseInit) => Response;
|
package/dist/utils/response.js
CHANGED
|
@@ -1,11 +1,21 @@
|
|
|
1
|
+
import { REQUEST_ID_HEADER, resolveRequestId } from "./headers";
|
|
1
2
|
const TEXT_ENCODER = new TextEncoder();
|
|
3
|
+
export const prepareResponseInit = (request) => ({
|
|
4
|
+
headers: { [REQUEST_ID_HEADER]: resolveRequestId(request.headers) },
|
|
5
|
+
});
|
|
2
6
|
export const mergeResponseInit = (defaultHeaders, responseInit) => {
|
|
3
7
|
const headers = new Headers(defaultHeaders);
|
|
4
8
|
const override = responseInit?.headers;
|
|
5
9
|
if (override) {
|
|
6
10
|
new Headers(override).forEach((value, key) => headers.set(key, value));
|
|
7
11
|
}
|
|
8
|
-
|
|
12
|
+
if (!responseInit)
|
|
13
|
+
return { headers };
|
|
14
|
+
return {
|
|
15
|
+
status: responseInit.status,
|
|
16
|
+
statusText: responseInit.statusText,
|
|
17
|
+
headers,
|
|
18
|
+
};
|
|
9
19
|
};
|
|
10
20
|
export const toResponse = (result, responseInit) => {
|
|
11
21
|
let body;
|
|
@@ -23,11 +33,13 @@ export const toResponse = (result, responseInit) => {
|
|
|
23
33
|
body = TEXT_ENCODER.encode(JSON.stringify(result));
|
|
24
34
|
}
|
|
25
35
|
const contentLength = body instanceof Uint8Array ? String(body.byteLength) : "";
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
36
|
+
const isError = result instanceof Error;
|
|
37
|
+
if (!responseInit?.statusText) {
|
|
38
|
+
const status = responseInit?.status ?? (isError ? 500 : 200);
|
|
39
|
+
const statusText = isError ? "REQUEST_FAILED" : "OK";
|
|
40
|
+
const headers = responseInit?.headers;
|
|
41
|
+
responseInit = headers ? { status, statusText, headers } : { status, statusText };
|
|
42
|
+
}
|
|
31
43
|
const init = mergeResponseInit(isStream
|
|
32
44
|
? {
|
|
33
45
|
"content-type": "text/event-stream",
|
package/package.json
CHANGED
|
@@ -14,6 +14,8 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
14
14
|
import { logger } from "../../logger";
|
|
15
15
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
16
16
|
import { resolveProvider } from "../../providers/registry";
|
|
17
|
+
import { markPerf } from "../../telemetry/perf";
|
|
18
|
+
import { resolveRequestId } from "../../utils/headers";
|
|
17
19
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
18
20
|
import { convertToTextCallOptions, toChatCompletions, toChatCompletionsStream } from "./converters";
|
|
19
21
|
import { ChatCompletionsBodySchema } from "./schema";
|
|
@@ -68,7 +70,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
68
70
|
const textOptions = convertToTextCallOptions(inputs);
|
|
69
71
|
logger.trace(
|
|
70
72
|
{
|
|
71
|
-
requestId: ctx.request
|
|
73
|
+
requestId: resolveRequestId(ctx.request),
|
|
72
74
|
options: textOptions,
|
|
73
75
|
},
|
|
74
76
|
"[chat] AI SDK options",
|
|
@@ -81,6 +83,7 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
81
83
|
});
|
|
82
84
|
|
|
83
85
|
// Execute request (streaming vs. non-streaming).
|
|
86
|
+
markPerf(ctx.request, "aiSdkStart");
|
|
84
87
|
if (stream) {
|
|
85
88
|
const result = streamText({
|
|
86
89
|
model: languageModelWithMiddleware,
|
|
@@ -88,20 +91,25 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
88
91
|
// No abort signal here, otherwise we can't detect upstream from client cancellations
|
|
89
92
|
// abortSignal: ctx.request.signal,
|
|
90
93
|
onError: ({ error }) => {
|
|
91
|
-
logger.error(
|
|
92
|
-
requestId: ctx.request
|
|
94
|
+
logger.error({
|
|
95
|
+
requestId: resolveRequestId(ctx.request),
|
|
96
|
+
err: error instanceof Error ? error : new Error(String(error)),
|
|
93
97
|
});
|
|
94
98
|
throw error;
|
|
95
99
|
},
|
|
96
100
|
onAbort: () => {
|
|
97
101
|
throw new DOMException("Upstream failed", "AbortError");
|
|
98
102
|
},
|
|
103
|
+
timeout: {
|
|
104
|
+
chunkMs: 5 * 60 * 1000,
|
|
105
|
+
},
|
|
99
106
|
experimental_include: {
|
|
100
107
|
requestBody: false,
|
|
101
108
|
},
|
|
102
109
|
includeRawChunks: false,
|
|
103
110
|
...textOptions,
|
|
104
111
|
});
|
|
112
|
+
markPerf(ctx.request, "aiSdkEnd");
|
|
105
113
|
|
|
106
114
|
return toChatCompletionsStream(result, ctx.modelId);
|
|
107
115
|
}
|
|
@@ -109,18 +117,18 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
|
|
|
109
117
|
const result = await generateText({
|
|
110
118
|
model: languageModelWithMiddleware,
|
|
111
119
|
headers: prepareForwardHeaders(ctx.request),
|
|
120
|
+
// FUTURE: currently can't tell whether upstream or downstream abort
|
|
112
121
|
abortSignal: ctx.request.signal,
|
|
113
122
|
experimental_include: {
|
|
114
123
|
requestBody: false,
|
|
115
124
|
responseBody: false,
|
|
116
125
|
},
|
|
126
|
+
timeout: 5 * 60 * 1000,
|
|
117
127
|
...textOptions,
|
|
118
128
|
});
|
|
129
|
+
markPerf(ctx.request, "aiSdkEnd");
|
|
119
130
|
|
|
120
|
-
logger.trace(
|
|
121
|
-
{ requestId: ctx.request.headers.get("x-request-id"), result },
|
|
122
|
-
"[chat] AI SDK result",
|
|
123
|
-
);
|
|
131
|
+
logger.trace({ requestId: resolveRequestId(ctx.request), result }, "[chat] AI SDK result");
|
|
124
132
|
|
|
125
133
|
return toChatCompletions(result, ctx.modelId);
|
|
126
134
|
};
|
|
@@ -14,6 +14,8 @@ import { winterCgHandler } from "../../lifecycle";
|
|
|
14
14
|
import { logger } from "../../logger";
|
|
15
15
|
import { modelMiddlewareMatcher } from "../../middleware/matcher";
|
|
16
16
|
import { resolveProvider } from "../../providers/registry";
|
|
17
|
+
import { markPerf } from "../../telemetry/perf";
|
|
18
|
+
import { resolveRequestId } from "../../utils/headers";
|
|
17
19
|
import { prepareForwardHeaders } from "../../utils/request";
|
|
18
20
|
import { convertToEmbedCallOptions, toEmbeddings } from "./converters";
|
|
19
21
|
import { EmbeddingsBodySchema } from "./schema";
|
|
@@ -67,7 +69,7 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
67
69
|
// Convert inputs to AI SDK call options.
|
|
68
70
|
const embedOptions = convertToEmbedCallOptions(inputs);
|
|
69
71
|
logger.trace(
|
|
70
|
-
{ requestId: ctx.request
|
|
72
|
+
{ requestId: resolveRequestId(ctx.request), options: embedOptions },
|
|
71
73
|
"[embeddings] AI SDK options",
|
|
72
74
|
);
|
|
73
75
|
|
|
@@ -78,15 +80,17 @@ export const embeddings = (config: GatewayConfig): Endpoint => {
|
|
|
78
80
|
});
|
|
79
81
|
|
|
80
82
|
// Execute request.
|
|
83
|
+
markPerf(ctx.request, "aiSdkStart");
|
|
81
84
|
const result = await embedMany({
|
|
82
85
|
model: embeddingModelWithMiddleware,
|
|
83
86
|
headers: prepareForwardHeaders(ctx.request),
|
|
84
87
|
abortSignal: ctx.request.signal,
|
|
85
88
|
...embedOptions,
|
|
86
89
|
});
|
|
90
|
+
markPerf(ctx.request, "aiSdkEnd");
|
|
87
91
|
|
|
88
92
|
logger.trace(
|
|
89
|
-
{ requestId: ctx.request
|
|
93
|
+
{ requestId: resolveRequestId(ctx.request), result },
|
|
90
94
|
"[embeddings] AI SDK result",
|
|
91
95
|
);
|
|
92
96
|
|
package/src/errors/openai.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import * as z from "zod";
|
|
2
2
|
|
|
3
|
+
import { isProduction } from "../utils/env";
|
|
4
|
+
import { resolveRequestId } from "../utils/headers";
|
|
3
5
|
import { toResponse } from "../utils/response";
|
|
4
|
-
import { getErrorMeta } from "./utils";
|
|
6
|
+
import { getErrorMeta, STATUS_CODE } from "./utils";
|
|
5
7
|
|
|
6
8
|
export const OpenAIErrorSchema = z.object({
|
|
7
9
|
error: z.object({
|
|
@@ -27,7 +29,18 @@ export function toOpenAIError(error: unknown): OpenAIError {
|
|
|
27
29
|
|
|
28
30
|
export function toOpenAIErrorResponse(error: unknown, responseInit?: ResponseInit) {
|
|
29
31
|
const meta = getErrorMeta(error);
|
|
30
|
-
|
|
32
|
+
|
|
33
|
+
const shouldMask = isProduction() && (meta.status >= 500 || meta.code.includes("UPSTREAM"));
|
|
34
|
+
|
|
35
|
+
let message;
|
|
36
|
+
if (shouldMask) {
|
|
37
|
+
const requestId = resolveRequestId(responseInit);
|
|
38
|
+
message = `${STATUS_CODE(meta.status)} (${requestId})`;
|
|
39
|
+
} else {
|
|
40
|
+
message = meta.message;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return toResponse(new OpenAIError(message, meta.type, meta.code), {
|
|
31
44
|
...responseInit,
|
|
32
45
|
status: meta.status,
|
|
33
46
|
statusText: meta.code,
|
package/src/errors/utils.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { isProduction } from "../utils/env";
|
|
2
1
|
import { normalizeAiSdkError } from "./ai-sdk";
|
|
3
2
|
import { GatewayError } from "./gateway";
|
|
4
3
|
|
|
@@ -25,7 +24,7 @@ export const STATUS_CODE = (status: number) => {
|
|
|
25
24
|
};
|
|
26
25
|
|
|
27
26
|
export function getErrorMeta(error: unknown) {
|
|
28
|
-
const
|
|
27
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
29
28
|
|
|
30
29
|
let code: string;
|
|
31
30
|
let status: number;
|
|
@@ -44,8 +43,6 @@ export function getErrorMeta(error: unknown) {
|
|
|
44
43
|
}
|
|
45
44
|
|
|
46
45
|
const type = status < 500 ? "invalid_request_error" : "server_error";
|
|
47
|
-
const shouldMask = !code.includes("UPSTREAM") && status >= 500 && isProduction();
|
|
48
|
-
const message = shouldMask ? STATUS_CODE(status) : rawMessage;
|
|
49
46
|
|
|
50
|
-
return { code, status, param, type, message
|
|
47
|
+
return { code, status, param, type, message };
|
|
51
48
|
}
|
package/src/lifecycle.ts
CHANGED
|
@@ -4,8 +4,9 @@ import { parseConfig } from "./config";
|
|
|
4
4
|
import { toOpenAIErrorResponse } from "./errors/openai";
|
|
5
5
|
import { isLoggerDisabled, logger } from "./logger";
|
|
6
6
|
import { withAccessLog } from "./telemetry/access-log";
|
|
7
|
+
import { resolveRequestId } from "./utils/headers";
|
|
7
8
|
import { maybeApplyRequestPatch, prepareRequestHeaders } from "./utils/request";
|
|
8
|
-
import { toResponse } from "./utils/response";
|
|
9
|
+
import { prepareResponseInit, toResponse } from "./utils/response";
|
|
9
10
|
|
|
10
11
|
export const winterCgHandler = (
|
|
11
12
|
run: (ctx: GatewayContext) => Promise<object | ReadableStream<Uint8Array>>,
|
|
@@ -15,9 +16,6 @@ export const winterCgHandler = (
|
|
|
15
16
|
|
|
16
17
|
const core = async (ctx: GatewayContext): Promise<void> => {
|
|
17
18
|
try {
|
|
18
|
-
const headers = prepareRequestHeaders(ctx.request);
|
|
19
|
-
if (headers) ctx.request = new Request(ctx.request, { headers });
|
|
20
|
-
|
|
21
19
|
const before = await parsedConfig.hooks?.before?.(ctx as BeforeHookContext);
|
|
22
20
|
if (before) {
|
|
23
21
|
if (before instanceof Response) {
|
|
@@ -32,12 +30,17 @@ export const winterCgHandler = (
|
|
|
32
30
|
const after = await parsedConfig.hooks?.after?.(ctx as AfterHookContext);
|
|
33
31
|
if (after) ctx.result = after;
|
|
34
32
|
|
|
35
|
-
|
|
33
|
+
if (ctx.result instanceof Response) {
|
|
34
|
+
ctx.response = ctx.result;
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.request));
|
|
36
38
|
} catch (error) {
|
|
37
|
-
logger.error(
|
|
38
|
-
requestId: ctx.request
|
|
39
|
+
logger.error({
|
|
40
|
+
requestId: resolveRequestId(ctx.request)!,
|
|
41
|
+
err: error instanceof Error ? error : new Error(String(error)),
|
|
39
42
|
});
|
|
40
|
-
ctx.response = toOpenAIErrorResponse(error);
|
|
43
|
+
ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
|
|
41
44
|
}
|
|
42
45
|
};
|
|
43
46
|
|
|
@@ -51,6 +54,9 @@ export const winterCgHandler = (
|
|
|
51
54
|
models: parsedConfig.models,
|
|
52
55
|
};
|
|
53
56
|
|
|
57
|
+
const headers = prepareRequestHeaders(ctx.request);
|
|
58
|
+
if (headers) ctx.request = new Request(ctx.request, { headers });
|
|
59
|
+
|
|
54
60
|
await handler(ctx);
|
|
55
61
|
|
|
56
62
|
return ctx.response ?? new Response("Internal Server Error", { status: 500 });
|
package/src/logger/default.ts
CHANGED
|
@@ -25,39 +25,37 @@ const isRecord = (value: unknown): value is Record<string, unknown> =>
|
|
|
25
25
|
const buildLogObject = (level: LogLevel, args: unknown[]): Record<string, unknown> => {
|
|
26
26
|
if (args.length === 0) return {};
|
|
27
27
|
|
|
28
|
-
const [first, second
|
|
28
|
+
const [first, second] = args;
|
|
29
|
+
|
|
29
30
|
let obj: Record<string, unknown> | undefined;
|
|
30
|
-
let err:
|
|
31
|
+
let err: unknown;
|
|
31
32
|
let msg: string | undefined;
|
|
32
33
|
|
|
33
34
|
if (first instanceof Error) {
|
|
34
35
|
err = first;
|
|
35
|
-
if (isRecord(second)) {
|
|
36
|
-
obj = second;
|
|
37
|
-
if (third !== undefined) {
|
|
38
|
-
msg = String(third);
|
|
39
|
-
}
|
|
40
|
-
} else if (second !== undefined) {
|
|
41
|
-
msg = String(second);
|
|
42
|
-
}
|
|
43
36
|
} else if (isRecord(first)) {
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
37
|
+
if (first["err"] !== undefined) {
|
|
38
|
+
err = first["err"];
|
|
39
|
+
delete first["err"];
|
|
47
40
|
}
|
|
41
|
+
obj = first;
|
|
48
42
|
} else {
|
|
49
43
|
msg = String(first);
|
|
50
44
|
}
|
|
51
45
|
|
|
46
|
+
if (second !== undefined) {
|
|
47
|
+
msg = String(second);
|
|
48
|
+
}
|
|
49
|
+
|
|
52
50
|
if (err && msg === undefined) {
|
|
53
|
-
msg = err.message;
|
|
51
|
+
msg = err instanceof Error ? err.message : String(err);
|
|
54
52
|
}
|
|
55
53
|
|
|
56
54
|
return {
|
|
57
55
|
level,
|
|
58
56
|
time: Date.now(),
|
|
59
57
|
...(msg ? { msg } : {}),
|
|
60
|
-
...(err ? { err: serializeError(err) } : {}),
|
|
58
|
+
...(err ? { err: err instanceof Error ? serializeError(err) : err } : {}),
|
|
61
59
|
...obj,
|
|
62
60
|
};
|
|
63
61
|
};
|
package/src/logger/index.ts
CHANGED
|
@@ -4,7 +4,6 @@ export type LogFn = {
|
|
|
4
4
|
(msg: string): void;
|
|
5
5
|
(obj: Record<string, unknown>, msg?: string): void;
|
|
6
6
|
(err: Error, msg?: string): void;
|
|
7
|
-
(err: Error, obj?: Record<string, unknown>, msg?: string): void;
|
|
8
7
|
};
|
|
9
8
|
|
|
10
9
|
export type Logger = Record<"trace" | "debug" | "info" | "warn" | "error", LogFn>;
|
|
@@ -1,36 +1,34 @@
|
|
|
1
1
|
import type { GatewayContext } from "../types";
|
|
2
2
|
|
|
3
3
|
import { logger } from "../logger";
|
|
4
|
+
import { resolveRequestId } from "../utils/headers";
|
|
5
|
+
import { clearPerf, getMemoryMeta, getPerfMeta, initPerf, markPerf } from "./perf";
|
|
4
6
|
import { instrumentStream } from "./stream";
|
|
5
7
|
import { getAIMeta, getRequestMeta, getResponseMeta } from "./utils";
|
|
6
8
|
|
|
7
9
|
export const withAccessLog =
|
|
8
10
|
(run: (ctx: GatewayContext) => Promise<void>) => async (ctx: GatewayContext) => {
|
|
9
|
-
|
|
11
|
+
initPerf(ctx.request);
|
|
10
12
|
|
|
11
13
|
const requestBytes = (() => {
|
|
12
14
|
const n = Number(ctx.request.headers.get("content-length"));
|
|
13
15
|
return Number.isFinite(n) ? n : undefined;
|
|
14
16
|
})();
|
|
15
17
|
|
|
16
|
-
const logAccess = (
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
const totalDuration = +((stats?.streamEnd ?? performance.now()) - start).toFixed(2);
|
|
21
|
-
const responseTime = stats?.streamStart && +(stats.streamStart - start).toFixed(2);
|
|
18
|
+
const logAccess = (status: number, stats?: { bytes?: number }) => {
|
|
19
|
+
if (!stats) markPerf(ctx.request, "responseTime");
|
|
20
|
+
markPerf(ctx.request, "totalDuration");
|
|
21
|
+
|
|
22
22
|
const requestMeta = getRequestMeta(ctx.request);
|
|
23
23
|
const responseMeta = getResponseMeta(ctx.response);
|
|
24
24
|
|
|
25
25
|
const meta: Record<string, unknown> = {
|
|
26
|
-
requestId: ctx.request
|
|
26
|
+
requestId: resolveRequestId(ctx.request),
|
|
27
27
|
ai: getAIMeta(ctx),
|
|
28
28
|
request: requestMeta,
|
|
29
29
|
response: responseMeta,
|
|
30
|
-
timings:
|
|
31
|
-
|
|
32
|
-
responseTime: responseTime ?? totalDuration,
|
|
33
|
-
},
|
|
30
|
+
timings: getPerfMeta(ctx.request),
|
|
31
|
+
memory: getMemoryMeta(ctx.request),
|
|
34
32
|
bytes: {
|
|
35
33
|
in: requestBytes,
|
|
36
34
|
out: stats?.bytes ?? responseMeta["contentLength"],
|
|
@@ -42,6 +40,8 @@ export const withAccessLog =
|
|
|
42
40
|
const msg = `${ctx.request.method} ${requestMeta["path"]} ${realStatus}`;
|
|
43
41
|
|
|
44
42
|
logger.info(meta, msg);
|
|
43
|
+
|
|
44
|
+
clearPerf(ctx.request);
|
|
45
45
|
};
|
|
46
46
|
|
|
47
47
|
await run(ctx);
|
|
@@ -61,6 +61,8 @@ export const withAccessLog =
|
|
|
61
61
|
headers: ctx.response!.headers,
|
|
62
62
|
});
|
|
63
63
|
|
|
64
|
+
markPerf(ctx.request, "responseTime");
|
|
65
|
+
|
|
64
66
|
return;
|
|
65
67
|
}
|
|
66
68
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { markPerf, markPerfOnce } from "./perf";
|
|
2
|
+
|
|
3
|
+
const ORIGINAL_FETCH_KEY = Symbol.for("@hebo/fetch/original-fetch");
|
|
4
|
+
|
|
5
|
+
type GlobalFetchState = typeof globalThis & {
|
|
6
|
+
[ORIGINAL_FETCH_KEY]?: typeof fetch;
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
const g = globalThis as GlobalFetchState;
|
|
10
|
+
|
|
11
|
+
const perfFetch = async (input: RequestInfo | URL, init?: RequestInit) => {
|
|
12
|
+
const original = g[ORIGINAL_FETCH_KEY]!;
|
|
13
|
+
markPerfOnce(init ?? input, "fetchStart");
|
|
14
|
+
const response = await original(input, init);
|
|
15
|
+
markPerf(init ?? input, "fetchEnd");
|
|
16
|
+
return response;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
export const initFetch = () => {
|
|
20
|
+
if (g[ORIGINAL_FETCH_KEY]) return;
|
|
21
|
+
|
|
22
|
+
g[ORIGINAL_FETCH_KEY] = globalThis.fetch.bind(globalThis);
|
|
23
|
+
globalThis.fetch = perfFetch as typeof fetch;
|
|
24
|
+
};
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { resolveRequestId } from "../utils/headers";
|
|
2
|
+
import { initFetch } from "./fetch";
|
|
3
|
+
|
|
4
|
+
type PerfStore = {
|
|
5
|
+
timers: Record<string, number>;
|
|
6
|
+
origin: number;
|
|
7
|
+
memory?: {
|
|
8
|
+
steadyHeapUsed: number;
|
|
9
|
+
peakHeapUsed: number;
|
|
10
|
+
};
|
|
11
|
+
};
|
|
12
|
+
type RequestIdSource = string | URL | Request | RequestInit;
|
|
13
|
+
|
|
14
|
+
const REQ_PERF_KEY = Symbol.for("@hebo/perf/by-request");
|
|
15
|
+
|
|
16
|
+
type GlobalPerfState = typeof globalThis & {
|
|
17
|
+
[REQ_PERF_KEY]?: Map<string, PerfStore>;
|
|
18
|
+
};
|
|
19
|
+
const g = globalThis as GlobalPerfState;
|
|
20
|
+
const perfByRequestId = (g[REQ_PERF_KEY] ??= new Map<string, PerfStore>());
|
|
21
|
+
|
|
22
|
+
const toMb = (bytes: number) => +(bytes / (1024 * 1024)).toFixed(2);
|
|
23
|
+
const mem = () => process?.memoryUsage?.();
|
|
24
|
+
|
|
25
|
+
const samplePeakMemory = (perf: PerfStore) => {
|
|
26
|
+
const heapUsed = mem()?.heapUsed;
|
|
27
|
+
if (perf.memory && heapUsed && heapUsed > perf.memory.peakHeapUsed)
|
|
28
|
+
perf.memory.peakHeapUsed = heapUsed;
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const getPerfStore = (source: RequestIdSource) => {
|
|
32
|
+
const id = resolveRequestId(source);
|
|
33
|
+
return id ? perfByRequestId.get(id) : undefined;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export const initPerf = (source: RequestIdSource) => {
|
|
37
|
+
initFetch();
|
|
38
|
+
|
|
39
|
+
const id = resolveRequestId(source);
|
|
40
|
+
if (!id || perfByRequestId.has(id)) return;
|
|
41
|
+
|
|
42
|
+
const heapUsed = mem()?.heapUsed;
|
|
43
|
+
|
|
44
|
+
perfByRequestId.set(id, {
|
|
45
|
+
timers: {},
|
|
46
|
+
origin: performance.now(),
|
|
47
|
+
// eslint-disable-next-line eqeqeq
|
|
48
|
+
memory: heapUsed == null ? undefined : { steadyHeapUsed: heapUsed, peakHeapUsed: heapUsed },
|
|
49
|
+
});
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
const mark = (source: RequestIdSource, name: string, once: boolean) => {
|
|
53
|
+
const perf = getPerfStore(source);
|
|
54
|
+
if (!perf) return;
|
|
55
|
+
|
|
56
|
+
const existing = perf.timers[name];
|
|
57
|
+
if (once && existing !== undefined) return existing;
|
|
58
|
+
|
|
59
|
+
const value = +(performance.now() - perf.origin).toFixed(2);
|
|
60
|
+
perf.timers[name] = value;
|
|
61
|
+
|
|
62
|
+
samplePeakMemory(perf);
|
|
63
|
+
|
|
64
|
+
return value;
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
export const markPerf = (source: RequestIdSource, name: string) => mark(source, name, false);
|
|
68
|
+
|
|
69
|
+
export const markPerfOnce = (source: RequestIdSource, name: string) => mark(source, name, true);
|
|
70
|
+
|
|
71
|
+
export const clearPerf = (source: RequestIdSource) => {
|
|
72
|
+
const id = resolveRequestId(source);
|
|
73
|
+
if (id) perfByRequestId.delete(id);
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
export const getPerfMeta = (source: RequestIdSource) => getPerfStore(source)?.timers ?? {};
|
|
77
|
+
|
|
78
|
+
export const getMemoryMeta = (source: RequestIdSource) => {
|
|
79
|
+
const perf = getPerfStore(source);
|
|
80
|
+
if (!perf?.memory) return;
|
|
81
|
+
|
|
82
|
+
samplePeakMemory(perf);
|
|
83
|
+
const memory = mem();
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
total: memory ? toMb(memory.rss) : undefined,
|
|
87
|
+
request: toMb(perf.memory.peakHeapUsed - perf.memory.steadyHeapUsed),
|
|
88
|
+
};
|
|
89
|
+
};
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
export const REQUEST_ID_HEADER = "x-request-id";
|
|
2
|
+
|
|
3
|
+
type HeaderSource =
|
|
4
|
+
| string
|
|
5
|
+
| URL
|
|
6
|
+
| Headers
|
|
7
|
+
| Request
|
|
8
|
+
| Response
|
|
9
|
+
| RequestInit
|
|
10
|
+
| ResponseInit
|
|
11
|
+
| HeadersInit
|
|
12
|
+
| undefined;
|
|
13
|
+
|
|
14
|
+
export const resolveRequestId = (source: HeaderSource): string | undefined => {
|
|
15
|
+
if (!source || typeof source === "string" || source instanceof URL) return undefined;
|
|
16
|
+
|
|
17
|
+
if (source instanceof Request || source instanceof Response) {
|
|
18
|
+
return source.headers.get(REQUEST_ID_HEADER) ?? undefined;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const headers = "headers" in source ? source.headers : source;
|
|
22
|
+
if (!headers || typeof headers === "string") return undefined;
|
|
23
|
+
|
|
24
|
+
if (Object.getPrototypeOf(headers) === Object.prototype) {
|
|
25
|
+
return (headers as Record<string, string>)[REQUEST_ID_HEADER] ?? undefined;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (headers instanceof Headers) return headers.get(REQUEST_ID_HEADER) ?? undefined;
|
|
29
|
+
|
|
30
|
+
if (Array.isArray(headers)) {
|
|
31
|
+
for (const [key, value] of headers) {
|
|
32
|
+
if (key.toLowerCase() === REQUEST_ID_HEADER) return value;
|
|
33
|
+
}
|
|
34
|
+
return undefined;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return undefined;
|
|
38
|
+
};
|
package/src/utils/request.ts
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import type { RequestPatch } from "../types";
|
|
2
2
|
|
|
3
3
|
import pkg from "../../package.json" with { type: "json" };
|
|
4
|
+
import { REQUEST_ID_HEADER } from "./headers";
|
|
4
5
|
|
|
5
6
|
const GATEWAY_VERSION = pkg.version;
|
|
6
7
|
|
|
7
8
|
export const prepareRequestHeaders = (request: Request) => {
|
|
8
|
-
const existingRequestId = request.headers.get(
|
|
9
|
+
const existingRequestId = request.headers.get(REQUEST_ID_HEADER);
|
|
9
10
|
if (existingRequestId) return;
|
|
10
11
|
|
|
11
12
|
const requestId =
|
|
@@ -14,7 +15,7 @@ export const prepareRequestHeaders = (request: Request) => {
|
|
|
14
15
|
crypto.randomUUID();
|
|
15
16
|
|
|
16
17
|
const headers = new Headers(request.headers);
|
|
17
|
-
headers.set(
|
|
18
|
+
headers.set(REQUEST_ID_HEADER, requestId);
|
|
18
19
|
|
|
19
20
|
return headers;
|
|
20
21
|
};
|
|
@@ -37,7 +38,7 @@ export const prepareForwardHeaders = (request: Request): Record<string, string>
|
|
|
37
38
|
: `@hebo-ai/gateway/${GATEWAY_VERSION}`;
|
|
38
39
|
|
|
39
40
|
return {
|
|
40
|
-
|
|
41
|
+
[REQUEST_ID_HEADER]: request.headers.get(REQUEST_ID_HEADER)!,
|
|
41
42
|
"user-agent": appendedUserAgent,
|
|
42
43
|
};
|
|
43
44
|
};
|
package/src/utils/response.ts
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
|
+
import { REQUEST_ID_HEADER, resolveRequestId } from "./headers";
|
|
2
|
+
|
|
1
3
|
const TEXT_ENCODER = new TextEncoder();
|
|
2
4
|
|
|
5
|
+
export const prepareResponseInit = (request: Request): ResponseInit => ({
|
|
6
|
+
headers: { [REQUEST_ID_HEADER]: resolveRequestId(request.headers)! },
|
|
7
|
+
});
|
|
8
|
+
|
|
3
9
|
export const mergeResponseInit = (
|
|
4
10
|
defaultHeaders: HeadersInit,
|
|
5
11
|
responseInit?: ResponseInit,
|
|
@@ -9,7 +15,13 @@ export const mergeResponseInit = (
|
|
|
9
15
|
if (override) {
|
|
10
16
|
new Headers(override).forEach((value, key) => headers.set(key, value));
|
|
11
17
|
}
|
|
12
|
-
|
|
18
|
+
if (!responseInit) return { headers };
|
|
19
|
+
|
|
20
|
+
return {
|
|
21
|
+
status: responseInit.status,
|
|
22
|
+
statusText: responseInit.statusText,
|
|
23
|
+
headers,
|
|
24
|
+
};
|
|
13
25
|
};
|
|
14
26
|
|
|
15
27
|
export const toResponse = (
|
|
@@ -30,12 +42,15 @@ export const toResponse = (
|
|
|
30
42
|
}
|
|
31
43
|
|
|
32
44
|
const contentLength = body instanceof Uint8Array ? String(body.byteLength) : "";
|
|
45
|
+
const isError = result instanceof Error;
|
|
33
46
|
|
|
34
|
-
if (!responseInit)
|
|
35
|
-
responseInit
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
47
|
+
if (!responseInit?.statusText) {
|
|
48
|
+
const status = responseInit?.status ?? (isError ? 500 : 200);
|
|
49
|
+
const statusText = isError ? "REQUEST_FAILED" : "OK";
|
|
50
|
+
const headers = responseInit?.headers;
|
|
51
|
+
|
|
52
|
+
responseInit = headers ? { status, statusText, headers } : { status, statusText };
|
|
53
|
+
}
|
|
39
54
|
|
|
40
55
|
const init = mergeResponseInit(
|
|
41
56
|
isStream
|