@superblocksteam/sdk 2.0.130-next.0 → 2.0.130-next.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/dist/dev-utils/dev-server.d.mts.map +1 -1
- package/dist/dev-utils/dev-server.mjs +47 -7
- package/dist/dev-utils/dev-server.mjs.map +1 -1
- package/dist/dev-utils/fatal-exit.d.mts +87 -0
- package/dist/dev-utils/fatal-exit.d.mts.map +1 -0
- package/dist/dev-utils/fatal-exit.mjs +78 -0
- package/dist/dev-utils/fatal-exit.mjs.map +1 -0
- package/dist/dev-utils/fatal-exit.test.d.mts +2 -0
- package/dist/dev-utils/fatal-exit.test.d.mts.map +1 -0
- package/dist/dev-utils/fatal-exit.test.mjs +68 -0
- package/dist/dev-utils/fatal-exit.test.mjs.map +1 -0
- package/package.json +11 -11
- package/src/dev-utils/dev-server.mts +59 -7
- package/src/dev-utils/fatal-exit.mts +113 -0
- package/src/dev-utils/fatal-exit.test.mts +81 -0
- package/tsconfig.tsbuildinfo +1 -1
- package/turbo.json +1 -0
|
@@ -65,6 +65,7 @@ import {
|
|
|
65
65
|
type DevServerFailureType,
|
|
66
66
|
devServerMetrics,
|
|
67
67
|
} from "./dev-server-metrics.mjs";
|
|
68
|
+
import { buildFatalExitLog, parseIsWarm } from "./fatal-exit.mjs";
|
|
68
69
|
import {
|
|
69
70
|
formatViteDevServerStartedLog,
|
|
70
71
|
logViteBuildError,
|
|
@@ -1101,6 +1102,18 @@ export async function createDevServer({
|
|
|
1101
1102
|
}
|
|
1102
1103
|
});
|
|
1103
1104
|
|
|
1105
|
+
// Read once at startup. The pod's warm-or-cold launch mode is set when the pod
|
|
1106
|
+
// starts and never changes, so this flag stays correct in the fatal-exit log
|
|
1107
|
+
// even if the pod is activated later.
|
|
1108
|
+
const isWarm = parseIsWarm(process.env.SUPERBLOCKS_WARM_STANDBY);
|
|
1109
|
+
|
|
1110
|
+
// Only the first fatal event writes a fatal-exit line and starts shutdown.
|
|
1111
|
+
// Without this, one crash could write several lines: an unhandled rejection
|
|
1112
|
+
// fires once per rejected promise, so a burst of failures (say a dead DB pool
|
|
1113
|
+
// rejecting every in-flight query) would each write a line and each start
|
|
1114
|
+
// shutdown, making one crash look like many in the logs and the crash count.
|
|
1115
|
+
let fatalExitHandled = false;
|
|
1116
|
+
|
|
1104
1117
|
// Signal handlers attach `.catch` so a synchronous throw in
|
|
1105
1118
|
// `runGracefulShutdown` (e.g. logger init or lockService shutdown throwing
|
|
1106
1119
|
// before the first `await`) is logged rather than surfacing as an
|
|
@@ -1139,13 +1152,25 @@ export async function createDevServer({
|
|
|
1139
1152
|
);
|
|
1140
1153
|
});
|
|
1141
1154
|
|
|
1142
|
-
// `
|
|
1143
|
-
//
|
|
1144
|
-
//
|
|
1145
|
-
// through `.then` without firing `.catch`; without the `.finally` the
|
|
1146
|
-
// process would linger in event-loop limbo until the OS killed it.
|
|
1155
|
+
// The `.finally` always ends the process once shutdown settles. If an earlier
|
|
1156
|
+
// signal already ran shutdown, this handler still needs to exit; without the
|
|
1157
|
+
// final `process.exit` the process could hang instead of stopping.
|
|
1147
1158
|
process.on("uncaughtException", (error) => {
|
|
1148
|
-
|
|
1159
|
+
if (fatalExitHandled) {
|
|
1160
|
+
return;
|
|
1161
|
+
}
|
|
1162
|
+
fatalExitHandled = true;
|
|
1163
|
+
// Write the fatal-exit line first, and synchronously, so the reason reaches
|
|
1164
|
+
// the logs before anything else runs. Shutdown below may finish first and
|
|
1165
|
+
// call `process.exit(0)`, so the container's real exit code can be 0 even
|
|
1166
|
+
// though this was a crash. That is why `exit_code=1` here is the code this
|
|
1167
|
+
// handler means to use, not a promise of what the container reports: for the
|
|
1168
|
+
// JS-handler lines, trust that the line exists (and the `signal` field) over
|
|
1169
|
+
// its `exit_code` when comparing against the pod's exit status.
|
|
1170
|
+
logger.error(
|
|
1171
|
+
buildFatalExitLog({ handler: "uncaughtException", exitCode: 1, isWarm }),
|
|
1172
|
+
getErrorMeta(error),
|
|
1173
|
+
);
|
|
1149
1174
|
gracefulShutdown({
|
|
1150
1175
|
logger,
|
|
1151
1176
|
serverInitiated: false,
|
|
@@ -1154,7 +1179,34 @@ export async function createDevServer({
|
|
|
1154
1179
|
.catch((shutdownError) => {
|
|
1155
1180
|
logger.error(
|
|
1156
1181
|
"Error during shutdown after uncaught exception:",
|
|
1157
|
-
shutdownError,
|
|
1182
|
+
getErrorMeta(shutdownError),
|
|
1183
|
+
);
|
|
1184
|
+
})
|
|
1185
|
+
.finally(() => process.exit(1));
|
|
1186
|
+
});
|
|
1187
|
+
|
|
1188
|
+
// Without its own listener, Node turns an unhandled promise rejection into an
|
|
1189
|
+
// uncaught exception, which would log it under the wrong cause. Handle it here
|
|
1190
|
+
// so a rejection is labelled as a rejection, then exit through the same
|
|
1191
|
+
// shutdown path as an uncaught exception.
|
|
1192
|
+
process.on("unhandledRejection", (reason) => {
|
|
1193
|
+
if (fatalExitHandled) {
|
|
1194
|
+
return;
|
|
1195
|
+
}
|
|
1196
|
+
fatalExitHandled = true;
|
|
1197
|
+
logger.error(
|
|
1198
|
+
buildFatalExitLog({ handler: "unhandledRejection", exitCode: 1, isWarm }),
|
|
1199
|
+
getErrorMeta(reason),
|
|
1200
|
+
);
|
|
1201
|
+
gracefulShutdown({
|
|
1202
|
+
logger,
|
|
1203
|
+
serverInitiated: false,
|
|
1204
|
+
source: "unhandledRejection",
|
|
1205
|
+
})
|
|
1206
|
+
.catch((shutdownError) => {
|
|
1207
|
+
logger.error(
|
|
1208
|
+
"Error during shutdown after unhandled rejection:",
|
|
1209
|
+
getErrorMeta(shutdownError),
|
|
1158
1210
|
);
|
|
1159
1211
|
})
|
|
1160
1212
|
.finally(() => process.exit(1));
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured fatal-exit logging for the dev server.
|
|
3
|
+
*
|
|
4
|
+
* The most common cause of live-edit pod deaths is a bare Node exit code 1 with
|
|
5
|
+
* no reason attached: the supervisor only sees the container exit code from
|
|
6
|
+
* Kubernetes, and the gVisor runtime reports no CPU/memory/event-loop metrics,
|
|
7
|
+
* so there is no way to tell why the process stopped. These helpers build one
|
|
8
|
+
* log line, easy to search for, that says why the process is exiting. It is
|
|
9
|
+
* written right before the process exits so the reason still lands in the pod
|
|
10
|
+
* logs even when the batched telemetry export never gets flushed.
|
|
11
|
+
*
|
|
12
|
+
* The fields go in the message body as `key=value` markers instead of as
|
|
13
|
+
* telemetry attributes, on purpose: at exit time the synchronous stdout write is
|
|
14
|
+
* the only sink we can rely on, while the batched OTel log exporter is usually
|
|
15
|
+
* dropped by `process.exit`. The existing warm-standby failure path in
|
|
16
|
+
* dev-server.mts already puts its markers in the body for the same reason. And
|
|
17
|
+
* for the same reason there is deliberately no OTel counter for fatal exits (a
|
|
18
|
+
* counter increment would be dropped at exit); `FATAL_EXIT_EVENT` below is the
|
|
19
|
+
* log token to query and alert on.
|
|
20
|
+
*
|
|
21
|
+
* Counting note for alerting: one crash can produce up to two of these lines.
|
|
22
|
+
* When a JS handler runs and its own `process.exit(1)` runs before shutdown can
|
|
23
|
+
* exit, the child emits a `handler=uncaughtException`/`unhandledRejection` line
|
|
24
|
+
* AND the supervisor (CLI parent, or the warm entrypoint shell) emits a
|
|
25
|
+
* `handler=containerExit` line for the same exit. A signal kill / OOM emits only
|
|
26
|
+
* the `containerExit` line (no JS handler ran); a JS crash where shutdown exits
|
|
27
|
+
* first with code 0 emits only the JS-handler line (the supervisor sees a clean
|
|
28
|
+
* exit). So a raw `count(event=dev_server_fatal_exit)` over-counts the double
|
|
29
|
+
* case: deduplicate by pod within a short time window for a true unique-crash
|
|
30
|
+
* count.
|
|
31
|
+
*
|
|
32
|
+
* Signal-kill field note: for a signal kill the cold parent emits
|
|
33
|
+
* `exit_code=null signal=SIGKILL` (Node's `child.on("exit")` gives a null code +
|
|
34
|
+
* the signal), while the warm entrypoint shell emits `exit_code=128+N
|
|
35
|
+
* signal=SIGKILL` (its `wait` returns the raw 128+N status). Both are correct
|
|
36
|
+
* for their context; query on the `signal` field, not `exit_code`, for
|
|
37
|
+
* signal-kill alerts.
|
|
38
|
+
*/
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Where the fatal exit came from, so each kind can be told apart in the logs.
|
|
42
|
+
*
|
|
43
|
+
* uncaughtException / unhandledRejection A JS handler ran and knows the error
|
|
44
|
+
* name/message/stack.
|
|
45
|
+
* containerExit No JS handler ran (signal kill / OOM,
|
|
46
|
+
* or the graceful-shutdown exit race);
|
|
47
|
+
* the supervisor only knows the code +
|
|
48
|
+
* signal. The CLI parent (dev-parent.mts)
|
|
49
|
+
* and the warm entrypoint shell emit this
|
|
50
|
+
* value. They cannot import this module,
|
|
51
|
+
* so they repeat the same string.
|
|
52
|
+
*/
|
|
53
|
+
export type FatalExitHandler =
|
|
54
|
+
| "uncaughtException"
|
|
55
|
+
| "unhandledRejection"
|
|
56
|
+
| "containerExit";
|
|
57
|
+
|
|
58
|
+
/** Fixed token so a log-based metric or a log search can pick out these lines. */
|
|
59
|
+
export const FATAL_EXIT_EVENT = "dev_server_fatal_exit";
|
|
60
|
+
|
|
61
|
+
export interface FatalExitFields {
|
|
62
|
+
/** Where the fatal exit came from. */
|
|
63
|
+
handler: FatalExitHandler;
|
|
64
|
+
/**
|
|
65
|
+
* Exit code the process is about to use (JS handler path) or the code the
|
|
66
|
+
* supervisor saw (container-exit path). `null` when the child was killed by a
|
|
67
|
+
* signal and has no exit code.
|
|
68
|
+
*/
|
|
69
|
+
exitCode: number | null;
|
|
70
|
+
/** Signal that killed the process; absent for a JS-level exit. */
|
|
71
|
+
signal?: string | null;
|
|
72
|
+
/** Whether this pod was started as a warm-standby pod (entrypoint `--warm`). */
|
|
73
|
+
isWarm: boolean;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Parse the entrypoint's warm-standby env var the same way
|
|
78
|
+
* `scripts/dev-server-entrypoint.sh` does, so `is_warm` in the log matches the
|
|
79
|
+
* pod's launch mode whether or not it was activated later.
|
|
80
|
+
*/
|
|
81
|
+
export function parseIsWarm(value: string | undefined): boolean {
|
|
82
|
+
switch ((value ?? "").trim().toLowerCase()) {
|
|
83
|
+
case "true":
|
|
84
|
+
case "1":
|
|
85
|
+
case "yes":
|
|
86
|
+
case "on":
|
|
87
|
+
return true;
|
|
88
|
+
default:
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Build the fatal-exit log message body. Every value is a simple primitive
|
|
95
|
+
* (enum / number / boolean), so the line is safe to write as-is even on the CSB
|
|
96
|
+
* log path.
|
|
97
|
+
*
|
|
98
|
+
* SYNC: this format is reproduced in two places that cannot import this module —
|
|
99
|
+
* `buildFatalExitLog` in `packages/cli/packages/cli/src/commands/dev-parent.mts`
|
|
100
|
+
* (the parent stays on `node:` builtins) and the `echo` line in
|
|
101
|
+
* `scripts/dev-server-entrypoint.sh` (bash). A field added here must be added in
|
|
102
|
+
* both, or `event=dev_server_fatal_exit` queries will miss one emitter.
|
|
103
|
+
*/
|
|
104
|
+
export function buildFatalExitLog(fields: FatalExitFields): string {
|
|
105
|
+
return [
|
|
106
|
+
"Dev server fatal exit",
|
|
107
|
+
`event=${FATAL_EXIT_EVENT}`,
|
|
108
|
+
`handler=${fields.handler}`,
|
|
109
|
+
`exit_code=${fields.exitCode ?? "null"}`,
|
|
110
|
+
`signal=${fields.signal ?? "none"}`,
|
|
111
|
+
`is_warm=${fields.isWarm}`,
|
|
112
|
+
].join(" ");
|
|
113
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
buildFatalExitLog,
|
|
5
|
+
FATAL_EXIT_EVENT,
|
|
6
|
+
parseIsWarm,
|
|
7
|
+
} from "./fatal-exit.mjs";
|
|
8
|
+
|
|
9
|
+
describe("parseIsWarm", () => {
|
|
10
|
+
it("treats the same truthy spellings as the entrypoint shell", () => {
|
|
11
|
+
for (const value of ["true", "1", "yes", "on", "TRUE", "Yes", " on "]) {
|
|
12
|
+
expect(parseIsWarm(value)).toBe(true);
|
|
13
|
+
}
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it("treats anything else (including undefined) as not warm", () => {
|
|
17
|
+
for (const value of ["false", "0", "no", "off", "", "cold", undefined]) {
|
|
18
|
+
expect(parseIsWarm(value)).toBe(false);
|
|
19
|
+
}
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
describe("buildFatalExitLog", () => {
|
|
24
|
+
it("carries the stable event marker so a log-based metric can isolate the line", () => {
|
|
25
|
+
const line = buildFatalExitLog({
|
|
26
|
+
handler: "uncaughtException",
|
|
27
|
+
exitCode: 1,
|
|
28
|
+
isWarm: true,
|
|
29
|
+
});
|
|
30
|
+
expect(line).toContain(`event=${FATAL_EXIT_EVENT}`);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it("encodes the handler, exit code, signal, and warm flag as snake_case markers", () => {
|
|
34
|
+
const line = buildFatalExitLog({
|
|
35
|
+
handler: "uncaughtException",
|
|
36
|
+
exitCode: 1,
|
|
37
|
+
signal: "SIGABRT",
|
|
38
|
+
isWarm: true,
|
|
39
|
+
});
|
|
40
|
+
expect(line).toContain("handler=uncaughtException");
|
|
41
|
+
expect(line).toContain("exit_code=1");
|
|
42
|
+
expect(line).toContain("signal=SIGABRT");
|
|
43
|
+
expect(line).toContain("is_warm=true");
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("defaults the signal to none for a JS-level exit and renders a null code", () => {
|
|
47
|
+
const line = buildFatalExitLog({
|
|
48
|
+
handler: "containerExit",
|
|
49
|
+
exitCode: null,
|
|
50
|
+
isWarm: false,
|
|
51
|
+
});
|
|
52
|
+
expect(line).toContain("signal=none");
|
|
53
|
+
expect(line).toContain("exit_code=null");
|
|
54
|
+
expect(line).toContain("is_warm=false");
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it("renders an explicit null signal as none (interface allows null and undefined)", () => {
|
|
58
|
+
const line = buildFatalExitLog({
|
|
59
|
+
handler: "uncaughtException",
|
|
60
|
+
exitCode: 1,
|
|
61
|
+
signal: null,
|
|
62
|
+
isWarm: false,
|
|
63
|
+
});
|
|
64
|
+
expect(line).toContain("signal=none");
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it("produces the exact format-contract string (field names, order, separators)", () => {
|
|
68
|
+
// The full line is the log-query/alerting contract shared across the three
|
|
69
|
+
// emitters. A reordered field, changed separator, or added prefix would slip
|
|
70
|
+
// past the per-field toContain checks but break queries; this locks it.
|
|
71
|
+
const line = buildFatalExitLog({
|
|
72
|
+
handler: "uncaughtException",
|
|
73
|
+
exitCode: 1,
|
|
74
|
+
signal: null,
|
|
75
|
+
isWarm: false,
|
|
76
|
+
});
|
|
77
|
+
expect(line).toBe(
|
|
78
|
+
"Dev server fatal exit event=dev_server_fatal_exit handler=uncaughtException exit_code=1 signal=none is_warm=false",
|
|
79
|
+
);
|
|
80
|
+
});
|
|
81
|
+
});
|