@hebo-ai/gateway 0.3.0-rc.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,14 +69,18 @@ export const chatCompletions = (config) => {
69
69
  // No abort signal here, otherwise we can't detect upstream from client cancellations
70
70
  // abortSignal: ctx.request.signal,
71
71
  onError: ({ error }) => {
72
- logger.error(error instanceof Error ? error : new Error(String(error)), {
72
+ logger.error({
73
73
  requestId: resolveRequestId(ctx.request),
74
+ err: error instanceof Error ? error : new Error(String(error)),
74
75
  });
75
76
  throw error;
76
77
  },
77
78
  onAbort: () => {
78
79
  throw new DOMException("Upstream failed", "AbortError");
79
80
  },
81
+ timeout: {
82
+ chunkMs: 5 * 60 * 1000,
83
+ },
80
84
  experimental_include: {
81
85
  requestBody: false,
82
86
  },
@@ -89,11 +93,13 @@ export const chatCompletions = (config) => {
89
93
  const result = await generateText({
90
94
  model: languageModelWithMiddleware,
91
95
  headers: prepareForwardHeaders(ctx.request),
96
+ // FUTURE: currently can't tell whether upstream or downstream abort
92
97
  abortSignal: ctx.request.signal,
93
98
  experimental_include: {
94
99
  requestBody: false,
95
100
  responseBody: false,
96
101
  },
102
+ timeout: 5 * 60 * 1000,
97
103
  ...textOptions,
98
104
  });
99
105
  markPerf(ctx.request, "aiSdkEnd");
package/dist/lifecycle.js CHANGED
@@ -28,8 +28,9 @@ export const winterCgHandler = (run, config) => {
28
28
  ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.request));
29
29
  }
30
30
  catch (error) {
31
- logger.error(error instanceof Error ? error : new Error(String(error)), {
31
+ logger.error({
32
32
  requestId: resolveRequestId(ctx.request),
33
+ err: error instanceof Error ? error : new Error(String(error)),
33
34
  });
34
35
  ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
35
36
  }
@@ -15,39 +15,34 @@ const isRecord = (value) => typeof value === "object" && value !== null && !(val
15
15
  const buildLogObject = (level, args) => {
16
16
  if (args.length === 0)
17
17
  return {};
18
- const [first, second, third] = args;
18
+ const [first, second] = args;
19
19
  let obj;
20
20
  let err;
21
21
  let msg;
22
22
  if (first instanceof Error) {
23
23
  err = first;
24
- if (isRecord(second)) {
25
- obj = second;
26
- if (third !== undefined) {
27
- msg = String(third);
28
- }
29
- }
30
- else if (second !== undefined) {
31
- msg = String(second);
32
- }
33
24
  }
34
25
  else if (isRecord(first)) {
35
- obj = first;
36
- if (second !== undefined) {
37
- msg = String(second);
26
+ if (first["err"] !== undefined) {
27
+ err = first["err"];
28
+ delete first["err"];
38
29
  }
30
+ obj = first;
39
31
  }
40
32
  else {
41
33
  msg = String(first);
42
34
  }
35
+ if (second !== undefined) {
36
+ msg = String(second);
37
+ }
43
38
  if (err && msg === undefined) {
44
- msg = err.message;
39
+ msg = err instanceof Error ? err.message : String(err);
45
40
  }
46
41
  return {
47
42
  level,
48
43
  time: Date.now(),
49
44
  ...(msg ? { msg } : {}),
50
- ...(err ? { err: serializeError(err) } : {}),
45
+ ...(err ? { err: err instanceof Error ? serializeError(err) : err } : {}),
51
46
  ...obj,
52
47
  };
53
48
  };
@@ -2,7 +2,6 @@ export type LogFn = {
2
2
  (msg: string): void;
3
3
  (obj: Record<string, unknown>, msg?: string): void;
4
4
  (err: Error, msg?: string): void;
5
- (err: Error, obj?: Record<string, unknown>, msg?: string): void;
6
5
  };
7
6
  export type Logger = Record<"trace" | "debug" | "info" | "warn" | "error", LogFn>;
8
7
  export type LogLevel = "trace" | "debug" | "info" | "warn" | "error" | "silent";
@@ -1,6 +1,6 @@
1
1
  import { logger } from "../logger";
2
2
  import { resolveRequestId } from "../utils/headers";
3
- import { clearPerf, getPerfMeta, initPerf, markPerf } from "./perf";
3
+ import { clearPerf, getMemoryMeta, getPerfMeta, initPerf, markPerf } from "./perf";
4
4
  import { instrumentStream } from "./stream";
5
5
  import { getAIMeta, getRequestMeta, getResponseMeta } from "./utils";
6
6
  export const withAccessLog = (run) => async (ctx) => {
@@ -21,6 +21,7 @@ export const withAccessLog = (run) => async (ctx) => {
21
21
  request: requestMeta,
22
22
  response: responseMeta,
23
23
  timings: getPerfMeta(ctx.request),
24
+ memory: getMemoryMeta(ctx.request),
24
25
  bytes: {
25
26
  in: requestBytes,
26
27
  out: stats?.bytes ?? responseMeta["contentLength"],
@@ -4,4 +4,8 @@ export declare const markPerf: (source: RequestIdSource, name: string) => number
4
4
  export declare const markPerfOnce: (source: RequestIdSource, name: string) => number | undefined;
5
5
  export declare const clearPerf: (source: RequestIdSource) => void;
6
6
  export declare const getPerfMeta: (source: RequestIdSource) => Record<string, number>;
7
+ export declare const getMemoryMeta: (source: RequestIdSource) => {
8
+ total: number | undefined;
9
+ request: number;
10
+ } | undefined;
7
11
  export {};
@@ -3,22 +3,28 @@ import { initFetch } from "./fetch";
3
3
  const REQ_PERF_KEY = Symbol.for("@hebo/perf/by-request");
4
4
  const g = globalThis;
5
5
  const perfByRequestId = (g[REQ_PERF_KEY] ??= new Map());
6
+ const toMb = (bytes) => +(bytes / (1024 * 1024)).toFixed(2);
7
+ const mem = () => process?.memoryUsage?.();
8
+ const samplePeakMemory = (perf) => {
9
+ const heapUsed = mem()?.heapUsed;
10
+ if (perf.memory && heapUsed && heapUsed > perf.memory.peakHeapUsed)
11
+ perf.memory.peakHeapUsed = heapUsed;
12
+ };
6
13
  const getPerfStore = (source) => {
7
- const requestId = resolveRequestId(source);
8
- if (!requestId)
9
- return undefined;
10
- return perfByRequestId.get(requestId);
14
+ const id = resolveRequestId(source);
15
+ return id ? perfByRequestId.get(id) : undefined;
11
16
  };
12
17
  export const initPerf = (source) => {
13
18
  initFetch();
14
- const requestId = resolveRequestId(source);
15
- if (!requestId)
16
- return;
17
- if (perfByRequestId.has(requestId))
19
+ const id = resolveRequestId(source);
20
+ if (!id || perfByRequestId.has(id))
18
21
  return;
19
- perfByRequestId.set(requestId, {
22
+ const heapUsed = mem()?.heapUsed;
23
+ perfByRequestId.set(id, {
20
24
  timers: {},
21
25
  origin: performance.now(),
26
+ // eslint-disable-next-line eqeqeq
27
+ memory: heapUsed == null ? undefined : { steadyHeapUsed: heapUsed, peakHeapUsed: heapUsed },
22
28
  });
23
29
  };
24
30
  const mark = (source, name, once) => {
@@ -30,21 +36,25 @@ const mark = (source, name, once) => {
30
36
  return existing;
31
37
  const value = +(performance.now() - perf.origin).toFixed(2);
32
38
  perf.timers[name] = value;
39
+ samplePeakMemory(perf);
33
40
  return value;
34
41
  };
35
- export const markPerf = (source, name) => {
36
- return mark(source, name, false);
37
- };
38
- export const markPerfOnce = (source, name) => {
39
- return mark(source, name, true);
40
- };
42
+ export const markPerf = (source, name) => mark(source, name, false);
43
+ export const markPerfOnce = (source, name) => mark(source, name, true);
41
44
  export const clearPerf = (source) => {
42
- const requestId = resolveRequestId(source);
43
- if (!requestId)
44
- return;
45
- perfByRequestId.delete(requestId);
45
+ const id = resolveRequestId(source);
46
+ if (id)
47
+ perfByRequestId.delete(id);
46
48
  };
47
- export const getPerfMeta = (source) => {
49
+ export const getPerfMeta = (source) => getPerfStore(source)?.timers ?? {};
50
+ export const getMemoryMeta = (source) => {
48
51
  const perf = getPerfStore(source);
49
- return perf?.timers ?? {};
52
+ if (!perf?.memory)
53
+ return;
54
+ samplePeakMemory(perf);
55
+ const memory = mem();
56
+ return {
57
+ total: memory ? toMb(memory.rss) : undefined,
58
+ request: toMb(perf.memory.peakHeapUsed - perf.memory.steadyHeapUsed),
59
+ };
50
60
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hebo-ai/gateway",
3
- "version": "0.3.0-rc.3",
3
+ "version": "0.3.0",
4
4
  "description": "AI gateway as a framework. For full control over models, routing & lifecycle. OpenAI-compatible /chat/completions, /embeddings & /models.",
5
5
  "keywords": [
6
6
  "ai",
@@ -91,14 +91,18 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
91
91
  // No abort signal here, otherwise we can't detect upstream from client cancellations
92
92
  // abortSignal: ctx.request.signal,
93
93
  onError: ({ error }) => {
94
- logger.error(error instanceof Error ? error : new Error(String(error)), {
94
+ logger.error({
95
95
  requestId: resolveRequestId(ctx.request),
96
+ err: error instanceof Error ? error : new Error(String(error)),
96
97
  });
97
98
  throw error;
98
99
  },
99
100
  onAbort: () => {
100
101
  throw new DOMException("Upstream failed", "AbortError");
101
102
  },
103
+ timeout: {
104
+ chunkMs: 5 * 60 * 1000,
105
+ },
102
106
  experimental_include: {
103
107
  requestBody: false,
104
108
  },
@@ -113,11 +117,13 @@ export const chatCompletions = (config: GatewayConfig): Endpoint => {
113
117
  const result = await generateText({
114
118
  model: languageModelWithMiddleware,
115
119
  headers: prepareForwardHeaders(ctx.request),
120
+ // FUTURE: currently can't tell whether upstream or downstream abort
116
121
  abortSignal: ctx.request.signal,
117
122
  experimental_include: {
118
123
  requestBody: false,
119
124
  responseBody: false,
120
125
  },
126
+ timeout: 5 * 60 * 1000,
121
127
  ...textOptions,
122
128
  });
123
129
  markPerf(ctx.request, "aiSdkEnd");
package/src/lifecycle.ts CHANGED
@@ -36,8 +36,9 @@ export const winterCgHandler = (
36
36
  }
37
37
  ctx.response = toResponse(ctx.result, prepareResponseInit(ctx.request));
38
38
  } catch (error) {
39
- logger.error(error instanceof Error ? error : new Error(String(error)), {
39
+ logger.error({
40
40
  requestId: resolveRequestId(ctx.request)!,
41
+ err: error instanceof Error ? error : new Error(String(error)),
41
42
  });
42
43
  ctx.response = toOpenAIErrorResponse(error, prepareResponseInit(ctx.request));
43
44
  }
@@ -25,39 +25,37 @@ const isRecord = (value: unknown): value is Record<string, unknown> =>
25
25
  const buildLogObject = (level: LogLevel, args: unknown[]): Record<string, unknown> => {
26
26
  if (args.length === 0) return {};
27
27
 
28
- const [first, second, third] = args;
28
+ const [first, second] = args;
29
+
29
30
  let obj: Record<string, unknown> | undefined;
30
- let err: Error | undefined;
31
+ let err: unknown;
31
32
  let msg: string | undefined;
32
33
 
33
34
  if (first instanceof Error) {
34
35
  err = first;
35
- if (isRecord(second)) {
36
- obj = second;
37
- if (third !== undefined) {
38
- msg = String(third);
39
- }
40
- } else if (second !== undefined) {
41
- msg = String(second);
42
- }
43
36
  } else if (isRecord(first)) {
44
- obj = first;
45
- if (second !== undefined) {
46
- msg = String(second);
37
+ if (first["err"] !== undefined) {
38
+ err = first["err"];
39
+ delete first["err"];
47
40
  }
41
+ obj = first;
48
42
  } else {
49
43
  msg = String(first);
50
44
  }
51
45
 
46
+ if (second !== undefined) {
47
+ msg = String(second);
48
+ }
49
+
52
50
  if (err && msg === undefined) {
53
- msg = err.message;
51
+ msg = err instanceof Error ? err.message : String(err);
54
52
  }
55
53
 
56
54
  return {
57
55
  level,
58
56
  time: Date.now(),
59
57
  ...(msg ? { msg } : {}),
60
- ...(err ? { err: serializeError(err) } : {}),
58
+ ...(err ? { err: err instanceof Error ? serializeError(err) : err } : {}),
61
59
  ...obj,
62
60
  };
63
61
  };
@@ -4,7 +4,6 @@ export type LogFn = {
4
4
  (msg: string): void;
5
5
  (obj: Record<string, unknown>, msg?: string): void;
6
6
  (err: Error, msg?: string): void;
7
- (err: Error, obj?: Record<string, unknown>, msg?: string): void;
8
7
  };
9
8
 
10
9
  export type Logger = Record<"trace" | "debug" | "info" | "warn" | "error", LogFn>;
@@ -2,7 +2,7 @@ import type { GatewayContext } from "../types";
2
2
 
3
3
  import { logger } from "../logger";
4
4
  import { resolveRequestId } from "../utils/headers";
5
- import { clearPerf, getPerfMeta, initPerf, markPerf } from "./perf";
5
+ import { clearPerf, getMemoryMeta, getPerfMeta, initPerf, markPerf } from "./perf";
6
6
  import { instrumentStream } from "./stream";
7
7
  import { getAIMeta, getRequestMeta, getResponseMeta } from "./utils";
8
8
 
@@ -28,6 +28,7 @@ export const withAccessLog =
28
28
  request: requestMeta,
29
29
  response: responseMeta,
30
30
  timings: getPerfMeta(ctx.request),
31
+ memory: getMemoryMeta(ctx.request),
31
32
  bytes: {
32
33
  in: requestBytes,
33
34
  out: stats?.bytes ?? responseMeta["contentLength"],
@@ -4,6 +4,10 @@ import { initFetch } from "./fetch";
4
4
  type PerfStore = {
5
5
  timers: Record<string, number>;
6
6
  origin: number;
7
+ memory?: {
8
+ steadyHeapUsed: number;
9
+ peakHeapUsed: number;
10
+ };
7
11
  };
8
12
  type RequestIdSource = string | URL | Request | RequestInit;
9
13
 
@@ -15,49 +19,71 @@ type GlobalPerfState = typeof globalThis & {
15
19
  const g = globalThis as GlobalPerfState;
16
20
  const perfByRequestId = (g[REQ_PERF_KEY] ??= new Map<string, PerfStore>());
17
21
 
18
- const getPerfStore = (source: RequestIdSource): PerfStore | undefined => {
19
- const requestId = resolveRequestId(source);
20
- if (!requestId) return undefined;
21
- return perfByRequestId.get(requestId);
22
+ const toMb = (bytes: number) => +(bytes / (1024 * 1024)).toFixed(2);
23
+ const mem = () => process?.memoryUsage?.();
24
+
25
+ const samplePeakMemory = (perf: PerfStore) => {
26
+ const heapUsed = mem()?.heapUsed;
27
+ if (perf.memory && heapUsed && heapUsed > perf.memory.peakHeapUsed)
28
+ perf.memory.peakHeapUsed = heapUsed;
29
+ };
30
+
31
+ const getPerfStore = (source: RequestIdSource) => {
32
+ const id = resolveRequestId(source);
33
+ return id ? perfByRequestId.get(id) : undefined;
22
34
  };
23
35
 
24
36
  export const initPerf = (source: RequestIdSource) => {
25
37
  initFetch();
26
- const requestId = resolveRequestId(source);
27
- if (!requestId) return;
28
- if (perfByRequestId.has(requestId)) return;
29
- perfByRequestId.set(requestId, {
38
+
39
+ const id = resolveRequestId(source);
40
+ if (!id || perfByRequestId.has(id)) return;
41
+
42
+ const heapUsed = mem()?.heapUsed;
43
+
44
+ perfByRequestId.set(id, {
30
45
  timers: {},
31
46
  origin: performance.now(),
47
+ // eslint-disable-next-line eqeqeq
48
+ memory: heapUsed == null ? undefined : { steadyHeapUsed: heapUsed, peakHeapUsed: heapUsed },
32
49
  });
33
50
  };
34
51
 
35
52
  const mark = (source: RequestIdSource, name: string, once: boolean) => {
36
53
  const perf = getPerfStore(source);
37
54
  if (!perf) return;
55
+
38
56
  const existing = perf.timers[name];
39
57
  if (once && existing !== undefined) return existing;
40
58
 
41
59
  const value = +(performance.now() - perf.origin).toFixed(2);
42
60
  perf.timers[name] = value;
61
+
62
+ samplePeakMemory(perf);
63
+
43
64
  return value;
44
65
  };
45
66
 
46
- export const markPerf = (source: RequestIdSource, name: string) => {
47
- return mark(source, name, false);
48
- };
67
+ export const markPerf = (source: RequestIdSource, name: string) => mark(source, name, false);
49
68
 
50
- export const markPerfOnce = (source: RequestIdSource, name: string) => {
51
- return mark(source, name, true);
52
- };
69
+ export const markPerfOnce = (source: RequestIdSource, name: string) => mark(source, name, true);
53
70
 
54
71
  export const clearPerf = (source: RequestIdSource) => {
55
- const requestId = resolveRequestId(source);
56
- if (!requestId) return;
57
- perfByRequestId.delete(requestId);
72
+ const id = resolveRequestId(source);
73
+ if (id) perfByRequestId.delete(id);
58
74
  };
59
75
 
60
- export const getPerfMeta = (source: RequestIdSource) => {
76
+ export const getPerfMeta = (source: RequestIdSource) => getPerfStore(source)?.timers ?? {};
77
+
78
+ export const getMemoryMeta = (source: RequestIdSource) => {
61
79
  const perf = getPerfStore(source);
62
- return perf?.timers ?? {};
80
+ if (!perf?.memory) return;
81
+
82
+ samplePeakMemory(perf);
83
+ const memory = mem();
84
+
85
+ return {
86
+ total: memory ? toMb(memory.rss) : undefined,
87
+ request: toMb(perf.memory.peakHeapUsed - perf.memory.steadyHeapUsed),
88
+ };
63
89
  };