@nathapp/nax 0.37.0 → 0.38.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/nax.js +3258 -2894
- package/package.json +4 -1
- package/src/agents/claude-complete.ts +72 -0
- package/src/agents/claude-execution.ts +189 -0
- package/src/agents/claude-interactive.ts +77 -0
- package/src/agents/claude-plan.ts +23 -8
- package/src/agents/claude.ts +64 -349
- package/src/analyze/classifier.ts +2 -1
- package/src/cli/config-descriptions.ts +206 -0
- package/src/cli/config-diff.ts +103 -0
- package/src/cli/config-display.ts +285 -0
- package/src/cli/config-get.ts +55 -0
- package/src/cli/config.ts +7 -618
- package/src/cli/prompts-export.ts +58 -0
- package/src/cli/prompts-init.ts +200 -0
- package/src/cli/prompts-main.ts +237 -0
- package/src/cli/prompts-tdd.ts +78 -0
- package/src/cli/prompts.ts +10 -541
- package/src/commands/logs-formatter.ts +201 -0
- package/src/commands/logs-reader.ts +171 -0
- package/src/commands/logs.ts +11 -362
- package/src/config/loader.ts +4 -15
- package/src/config/runtime-types.ts +448 -0
- package/src/config/schema-types.ts +53 -0
- package/src/config/types.ts +49 -486
- package/src/context/auto-detect.ts +2 -1
- package/src/context/builder.ts +3 -2
- package/src/execution/crash-heartbeat.ts +77 -0
- package/src/execution/crash-recovery.ts +23 -365
- package/src/execution/crash-signals.ts +149 -0
- package/src/execution/crash-writer.ts +154 -0
- package/src/execution/parallel-coordinator.ts +278 -0
- package/src/execution/parallel-executor-rectification-pass.ts +117 -0
- package/src/execution/parallel-executor-rectify.ts +135 -0
- package/src/execution/parallel-executor.ts +19 -211
- package/src/execution/parallel-worker.ts +148 -0
- package/src/execution/parallel.ts +5 -404
- package/src/execution/pid-registry.ts +3 -8
- package/src/execution/runner-completion.ts +160 -0
- package/src/execution/runner-execution.ts +221 -0
- package/src/execution/runner-setup.ts +82 -0
- package/src/execution/runner.ts +53 -202
- package/src/execution/timeout-handler.ts +100 -0
- package/src/hooks/runner.ts +11 -21
- package/src/metrics/tracker.ts +7 -30
- package/src/pipeline/runner.ts +2 -1
- package/src/pipeline/stages/completion.ts +0 -1
- package/src/pipeline/stages/context.ts +2 -1
- package/src/plugins/extensions.ts +225 -0
- package/src/plugins/loader.ts +2 -1
- package/src/plugins/types.ts +16 -221
- package/src/prd/index.ts +2 -1
- package/src/prd/validate.ts +41 -0
- package/src/precheck/checks-blockers.ts +15 -419
- package/src/precheck/checks-cli.ts +68 -0
- package/src/precheck/checks-config.ts +102 -0
- package/src/precheck/checks-git.ts +87 -0
- package/src/precheck/checks-system.ts +163 -0
- package/src/review/orchestrator.ts +19 -6
- package/src/review/runner.ts +17 -5
- package/src/routing/chain.ts +2 -1
- package/src/routing/loader.ts +2 -5
- package/src/tdd/orchestrator.ts +2 -1
- package/src/tdd/verdict-reader.ts +266 -0
- package/src/tdd/verdict.ts +6 -271
- package/src/utils/errors.ts +12 -0
- package/src/utils/git.ts +12 -5
- package/src/utils/json-file.ts +72 -0
- package/src/verification/executor.ts +2 -1
- package/src/verification/smart-runner.ts +23 -3
- package/src/worktree/manager.ts +9 -3
- package/src/worktree/merge.ts +3 -2
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import { appendFileSync } from "node:fs";
|
|
2
1
|
/**
|
|
3
|
-
* Crash Recovery
|
|
2
|
+
* Crash Recovery Orchestrator
|
|
4
3
|
*
|
|
5
4
|
* Implements US-007:
|
|
6
5
|
* - SIGTERM/SIGINT/SIGHUP handlers
|
|
@@ -8,15 +7,30 @@ import { appendFileSync } from "node:fs";
|
|
|
8
7
|
* - Fatal log + status.json update to "crashed"
|
|
9
8
|
* - Heartbeat every 60s during agent execution
|
|
10
9
|
* - Exit summary entry on normal exit
|
|
10
|
+
*
|
|
11
|
+
* Re-exports crash detection and writer modules.
|
|
11
12
|
*/
|
|
12
13
|
|
|
13
|
-
import {
|
|
14
|
+
import { stopHeartbeat } from "./crash-heartbeat";
|
|
15
|
+
import { installSignalHandlers } from "./crash-signals";
|
|
14
16
|
import type { PidRegistry } from "./pid-registry";
|
|
15
17
|
import type { StatusWriter } from "./status-writer";
|
|
16
18
|
|
|
19
|
+
// Re-export for backward compatibility
|
|
20
|
+
export {
|
|
21
|
+
type RunCompleteContext,
|
|
22
|
+
updateStatusToCrashed,
|
|
23
|
+
writeFatalLog,
|
|
24
|
+
writeRunComplete,
|
|
25
|
+
writeExitSummary,
|
|
26
|
+
} from "./crash-writer";
|
|
27
|
+
|
|
28
|
+
export { type SignalHandlerContext, installSignalHandlers } from "./crash-signals";
|
|
29
|
+
|
|
30
|
+
export { startHeartbeat, stopHeartbeat, _isHeartbeatActive } from "./crash-heartbeat";
|
|
31
|
+
|
|
17
32
|
/**
|
|
18
33
|
* Crash recovery context — dependencies injected at setup
|
|
19
|
-
* (BUG-1 fix: use getters to avoid capturing stale closure values)
|
|
20
34
|
*/
|
|
21
35
|
export interface CrashRecoveryContext {
|
|
22
36
|
statusWriter: StatusWriter;
|
|
@@ -24,381 +38,33 @@ export interface CrashRecoveryContext {
|
|
|
24
38
|
getIterations: () => number;
|
|
25
39
|
jsonlFilePath?: string;
|
|
26
40
|
pidRegistry?: PidRegistry;
|
|
27
|
-
// BUG-017: Additional context for run.complete event on SIGTERM
|
|
28
41
|
runId?: string;
|
|
29
42
|
feature?: string;
|
|
30
|
-
// SFC-002: Feature directory for writing feature-level status on crash
|
|
31
43
|
featureDir?: string;
|
|
32
44
|
getStartTime?: () => number;
|
|
33
45
|
getTotalStories?: () => number;
|
|
34
46
|
getStoriesCompleted?: () => number;
|
|
35
|
-
/** Optional callback to emit run:errored event (fire-and-forget) */
|
|
36
47
|
emitError?: (reason: string) => void;
|
|
37
48
|
}
|
|
38
49
|
|
|
39
|
-
/**
|
|
40
|
-
* Heartbeat timer handle (for cleanup)
|
|
41
|
-
*/
|
|
42
|
-
let heartbeatTimer: Timer | null = null;
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* Track whether crash handlers have been installed
|
|
46
|
-
*/
|
|
47
50
|
let handlersInstalled = false;
|
|
48
51
|
|
|
49
52
|
/**
|
|
50
|
-
*
|
|
51
|
-
*/
|
|
52
|
-
async function writeFatalLog(jsonlFilePath: string | undefined, signal: string, error?: Error): Promise<void> {
|
|
53
|
-
if (!jsonlFilePath) return;
|
|
54
|
-
|
|
55
|
-
try {
|
|
56
|
-
const fatalEntry = {
|
|
57
|
-
timestamp: new Date().toISOString(),
|
|
58
|
-
level: "error",
|
|
59
|
-
stage: "crash-recovery",
|
|
60
|
-
message: error ? `Uncaught exception: ${error.message}` : `Process terminated by ${signal}`,
|
|
61
|
-
data: {
|
|
62
|
-
signal,
|
|
63
|
-
...(error && {
|
|
64
|
-
stack: error.stack,
|
|
65
|
-
name: error.name,
|
|
66
|
-
}),
|
|
67
|
-
},
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
const line = `${JSON.stringify(fatalEntry)}\n`;
|
|
71
|
-
// Use Bun.write with append: true
|
|
72
|
-
appendFileSync(jsonlFilePath, line);
|
|
73
|
-
} catch (err) {
|
|
74
|
-
console.error("[crash-recovery] Failed to write fatal log:", err);
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* Write run.complete event to JSONL file (BUG-017)
|
|
80
|
-
* Called on SIGTERM to emit final run summary before exit
|
|
81
|
-
*/
|
|
82
|
-
async function writeRunComplete(ctx: CrashRecoveryContext, exitReason: string): Promise<void> {
|
|
83
|
-
if (!ctx.jsonlFilePath || !ctx.runId || !ctx.feature) return;
|
|
84
|
-
|
|
85
|
-
const logger = getSafeLogger();
|
|
86
|
-
|
|
87
|
-
try {
|
|
88
|
-
const totalCost = ctx.getTotalCost();
|
|
89
|
-
const iterations = ctx.getIterations();
|
|
90
|
-
const startTime = ctx.getStartTime?.() ?? Date.now();
|
|
91
|
-
const durationMs = Date.now() - startTime;
|
|
92
|
-
const totalStories = ctx.getTotalStories?.() ?? 0;
|
|
93
|
-
const storiesCompleted = ctx.getStoriesCompleted?.() ?? 0;
|
|
94
|
-
|
|
95
|
-
const runCompleteEntry = {
|
|
96
|
-
timestamp: new Date().toISOString(),
|
|
97
|
-
level: "info",
|
|
98
|
-
stage: "run.complete",
|
|
99
|
-
message: "Feature execution terminated",
|
|
100
|
-
data: {
|
|
101
|
-
runId: ctx.runId,
|
|
102
|
-
feature: ctx.feature,
|
|
103
|
-
success: false,
|
|
104
|
-
exitReason,
|
|
105
|
-
totalCost,
|
|
106
|
-
iterations,
|
|
107
|
-
totalStories,
|
|
108
|
-
storiesCompleted,
|
|
109
|
-
durationMs,
|
|
110
|
-
},
|
|
111
|
-
};
|
|
112
|
-
|
|
113
|
-
const line = `${JSON.stringify(runCompleteEntry)}\n`;
|
|
114
|
-
appendFileSync(ctx.jsonlFilePath, line);
|
|
115
|
-
logger?.debug("crash-recovery", "run.complete event written", { exitReason });
|
|
116
|
-
} catch (err) {
|
|
117
|
-
console.error("[crash-recovery] Failed to write run.complete event:", err);
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
/**
|
|
122
|
-
* Update status.json to "crashed" state (both project-level and feature-level)
|
|
123
|
-
*/
|
|
124
|
-
async function updateStatusToCrashed(
|
|
125
|
-
statusWriter: StatusWriter,
|
|
126
|
-
totalCost: number,
|
|
127
|
-
iterations: number,
|
|
128
|
-
signal: string,
|
|
129
|
-
featureDir?: string,
|
|
130
|
-
): Promise<void> {
|
|
131
|
-
try {
|
|
132
|
-
statusWriter.setRunStatus("crashed");
|
|
133
|
-
await statusWriter.update(totalCost, iterations, {
|
|
134
|
-
crashedAt: new Date().toISOString(),
|
|
135
|
-
crashSignal: signal,
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
// Write feature-level status (SFC-002)
|
|
139
|
-
if (featureDir) {
|
|
140
|
-
await statusWriter.writeFeatureStatus(featureDir, totalCost, iterations, {
|
|
141
|
-
crashedAt: new Date().toISOString(),
|
|
142
|
-
crashSignal: signal,
|
|
143
|
-
});
|
|
144
|
-
}
|
|
145
|
-
} catch (err) {
|
|
146
|
-
console.error("[crash-recovery] Failed to update status.json:", err);
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
/**
|
|
151
|
-
* Install signal handlers for crash recovery
|
|
152
|
-
* (MEM-1 fix: return cleanup function to unregister handlers)
|
|
53
|
+
* Install crash handlers for recovery
|
|
153
54
|
*/
|
|
154
55
|
export function installCrashHandlers(ctx: CrashRecoveryContext): () => void {
|
|
155
56
|
if (handlersInstalled) {
|
|
156
|
-
return () => {};
|
|
57
|
+
return () => {};
|
|
157
58
|
}
|
|
158
59
|
|
|
159
|
-
const
|
|
160
|
-
|
|
161
|
-
// Signal handler
|
|
162
|
-
const handleSignal = async (signal: NodeJS.Signals) => {
|
|
163
|
-
// Hard deadline: force exit if any async operation hangs (FIX-H5)
|
|
164
|
-
const hardDeadline = setTimeout(() => {
|
|
165
|
-
process.exit(128 + getSignalNumber(signal));
|
|
166
|
-
}, 10_000);
|
|
167
|
-
if (hardDeadline.unref) hardDeadline.unref();
|
|
168
|
-
|
|
169
|
-
logger?.error("crash-recovery", `Received ${signal}, shutting down...`, { signal });
|
|
170
|
-
|
|
171
|
-
// Kill all spawned agent processes
|
|
172
|
-
if (ctx.pidRegistry) {
|
|
173
|
-
await ctx.pidRegistry.killAll();
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
// Emit run:errored event (fire-and-forget)
|
|
177
|
-
ctx.emitError?.(signal.toLowerCase());
|
|
178
|
-
|
|
179
|
-
// Write fatal log
|
|
180
|
-
await writeFatalLog(ctx.jsonlFilePath, signal);
|
|
181
|
-
|
|
182
|
-
// Write run.complete event (BUG-017)
|
|
183
|
-
await writeRunComplete(ctx, signal.toLowerCase());
|
|
184
|
-
|
|
185
|
-
// Update status.json to crashed (SFC-002: include feature-level status)
|
|
186
|
-
await updateStatusToCrashed(ctx.statusWriter, ctx.getTotalCost(), ctx.getIterations(), signal, ctx.featureDir);
|
|
187
|
-
|
|
188
|
-
// Stop heartbeat
|
|
189
|
-
stopHeartbeat();
|
|
190
|
-
|
|
191
|
-
clearTimeout(hardDeadline);
|
|
192
|
-
// Exit cleanly
|
|
193
|
-
process.exit(128 + getSignalNumber(signal));
|
|
194
|
-
};
|
|
195
|
-
|
|
196
|
-
const sigtermHandler = () => handleSignal("SIGTERM");
|
|
197
|
-
const sigintHandler = () => handleSignal("SIGINT");
|
|
198
|
-
const sighupHandler = () => handleSignal("SIGHUP");
|
|
199
|
-
|
|
200
|
-
// Install signal handlers
|
|
201
|
-
process.on("SIGTERM", sigtermHandler);
|
|
202
|
-
process.on("SIGINT", sigintHandler);
|
|
203
|
-
process.on("SIGHUP", sighupHandler);
|
|
204
|
-
|
|
205
|
-
// Uncaught exception handler
|
|
206
|
-
const uncaughtExceptionHandler = async (error: Error) => {
|
|
207
|
-
logger?.error("crash-recovery", "Uncaught exception", {
|
|
208
|
-
error: error.message,
|
|
209
|
-
stack: error.stack,
|
|
210
|
-
});
|
|
211
|
-
|
|
212
|
-
// Kill all spawned agent processes
|
|
213
|
-
if (ctx.pidRegistry) {
|
|
214
|
-
await ctx.pidRegistry.killAll();
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
// Emit run:errored event (fire-and-forget)
|
|
218
|
-
ctx.emitError?.("uncaughtException");
|
|
219
|
-
|
|
220
|
-
// Write fatal log with stack trace
|
|
221
|
-
await writeFatalLog(ctx.jsonlFilePath, "uncaughtException", error);
|
|
222
|
-
|
|
223
|
-
// Update status.json to crashed (SFC-002: include feature-level status)
|
|
224
|
-
await updateStatusToCrashed(
|
|
225
|
-
ctx.statusWriter,
|
|
226
|
-
ctx.getTotalCost(),
|
|
227
|
-
ctx.getIterations(),
|
|
228
|
-
"uncaughtException",
|
|
229
|
-
ctx.featureDir,
|
|
230
|
-
);
|
|
231
|
-
|
|
232
|
-
// Stop heartbeat
|
|
233
|
-
stopHeartbeat();
|
|
234
|
-
|
|
235
|
-
// Exit with error code
|
|
236
|
-
process.exit(1);
|
|
237
|
-
};
|
|
238
|
-
process.on("uncaughtException", uncaughtExceptionHandler);
|
|
239
|
-
|
|
240
|
-
// Unhandled promise rejection handler
|
|
241
|
-
const unhandledRejectionHandler = async (reason: unknown, promise: Promise<unknown>) => {
|
|
242
|
-
const error = reason instanceof Error ? reason : new Error(String(reason));
|
|
243
|
-
logger?.error("crash-recovery", "Unhandled promise rejection", {
|
|
244
|
-
error: error.message,
|
|
245
|
-
stack: error.stack,
|
|
246
|
-
});
|
|
247
|
-
|
|
248
|
-
// Kill all spawned agent processes
|
|
249
|
-
if (ctx.pidRegistry) {
|
|
250
|
-
await ctx.pidRegistry.killAll();
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
// Emit run:errored event (fire-and-forget)
|
|
254
|
-
ctx.emitError?.("unhandledRejection");
|
|
255
|
-
|
|
256
|
-
// Write fatal log
|
|
257
|
-
await writeFatalLog(ctx.jsonlFilePath, "unhandledRejection", error);
|
|
258
|
-
|
|
259
|
-
// Update status.json to crashed (SFC-002: include feature-level status)
|
|
260
|
-
await updateStatusToCrashed(
|
|
261
|
-
ctx.statusWriter,
|
|
262
|
-
ctx.getTotalCost(),
|
|
263
|
-
ctx.getIterations(),
|
|
264
|
-
"unhandledRejection",
|
|
265
|
-
ctx.featureDir,
|
|
266
|
-
);
|
|
267
|
-
|
|
268
|
-
// Stop heartbeat
|
|
269
|
-
stopHeartbeat();
|
|
270
|
-
|
|
271
|
-
// Exit with error code
|
|
272
|
-
process.exit(1);
|
|
273
|
-
};
|
|
274
|
-
process.on("unhandledRejection", unhandledRejectionHandler);
|
|
275
|
-
|
|
60
|
+
const cleanup = installSignalHandlers(ctx);
|
|
276
61
|
handlersInstalled = true;
|
|
277
|
-
logger?.debug("crash-recovery", "Crash handlers installed");
|
|
278
62
|
|
|
279
|
-
// Return cleanup function
|
|
280
63
|
return () => {
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
process.removeListener("SIGHUP", sighupHandler);
|
|
284
|
-
process.removeListener("uncaughtException", uncaughtExceptionHandler);
|
|
285
|
-
process.removeListener("unhandledRejection", unhandledRejectionHandler);
|
|
64
|
+
cleanup();
|
|
65
|
+
stopHeartbeat();
|
|
286
66
|
handlersInstalled = false;
|
|
287
|
-
logger?.debug("crash-recovery", "Crash handlers unregistered");
|
|
288
|
-
};
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
/**
|
|
292
|
-
* Start heartbeat timer (60s interval)
|
|
293
|
-
*/
|
|
294
|
-
export function startHeartbeat(
|
|
295
|
-
statusWriter: StatusWriter,
|
|
296
|
-
getTotalCost: () => number,
|
|
297
|
-
getIterations: () => number,
|
|
298
|
-
jsonlFilePath?: string,
|
|
299
|
-
): void {
|
|
300
|
-
const logger = getSafeLogger();
|
|
301
|
-
|
|
302
|
-
// Stop any existing heartbeat first
|
|
303
|
-
stopHeartbeat();
|
|
304
|
-
|
|
305
|
-
heartbeatTimer = setInterval(async () => {
|
|
306
|
-
logger?.debug("crash-recovery", "Heartbeat");
|
|
307
|
-
|
|
308
|
-
// Write heartbeat to JSONL
|
|
309
|
-
if (jsonlFilePath) {
|
|
310
|
-
try {
|
|
311
|
-
const heartbeatEntry = {
|
|
312
|
-
timestamp: new Date().toISOString(),
|
|
313
|
-
level: "debug",
|
|
314
|
-
stage: "heartbeat",
|
|
315
|
-
message: "Process alive",
|
|
316
|
-
data: {
|
|
317
|
-
pid: process.pid,
|
|
318
|
-
memoryUsageMB: Math.round(process.memoryUsage().heapUsed / 1024 / 1024),
|
|
319
|
-
},
|
|
320
|
-
};
|
|
321
|
-
const line = `${JSON.stringify(heartbeatEntry)}\n`;
|
|
322
|
-
appendFileSync(jsonlFilePath, line);
|
|
323
|
-
} catch (err) {
|
|
324
|
-
logger?.warn("crash-recovery", "Failed to write heartbeat", { error: (err as Error).message });
|
|
325
|
-
}
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
// Update status.json (no-op if nothing changed, but keeps lastHeartbeat fresh)
|
|
329
|
-
try {
|
|
330
|
-
await statusWriter.update(getTotalCost(), getIterations(), {
|
|
331
|
-
lastHeartbeat: new Date().toISOString(),
|
|
332
|
-
});
|
|
333
|
-
} catch (err) {
|
|
334
|
-
logger?.warn("crash-recovery", "Failed to update status during heartbeat", {
|
|
335
|
-
error: (err as Error).message,
|
|
336
|
-
});
|
|
337
|
-
}
|
|
338
|
-
}, 60_000); // 60 seconds
|
|
339
|
-
|
|
340
|
-
logger?.debug("crash-recovery", "Heartbeat started (60s interval)");
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
/**
|
|
344
|
-
* Stop heartbeat timer
|
|
345
|
-
*/
|
|
346
|
-
export function stopHeartbeat(): void {
|
|
347
|
-
if (heartbeatTimer) {
|
|
348
|
-
clearInterval(heartbeatTimer);
|
|
349
|
-
heartbeatTimer = null;
|
|
350
|
-
getSafeLogger()?.debug("crash-recovery", "Heartbeat stopped");
|
|
351
|
-
}
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
/**
|
|
355
|
-
* Write exit summary entry to JSONL
|
|
356
|
-
*/
|
|
357
|
-
export async function writeExitSummary(
|
|
358
|
-
jsonlFilePath: string | undefined,
|
|
359
|
-
totalCost: number,
|
|
360
|
-
iterations: number,
|
|
361
|
-
storiesCompleted: number,
|
|
362
|
-
durationMs: number,
|
|
363
|
-
): Promise<void> {
|
|
364
|
-
if (!jsonlFilePath) return;
|
|
365
|
-
|
|
366
|
-
const logger = getSafeLogger();
|
|
367
|
-
|
|
368
|
-
try {
|
|
369
|
-
const summaryEntry = {
|
|
370
|
-
timestamp: new Date().toISOString(),
|
|
371
|
-
level: "info",
|
|
372
|
-
stage: "exit-summary",
|
|
373
|
-
message: "Run completed",
|
|
374
|
-
data: {
|
|
375
|
-
totalCost,
|
|
376
|
-
iterations,
|
|
377
|
-
storiesCompleted,
|
|
378
|
-
durationMs,
|
|
379
|
-
exitedCleanly: true,
|
|
380
|
-
},
|
|
381
|
-
};
|
|
382
|
-
|
|
383
|
-
const line = `${JSON.stringify(summaryEntry)}\n`;
|
|
384
|
-
// Use Bun.write with append: true
|
|
385
|
-
appendFileSync(jsonlFilePath, line);
|
|
386
|
-
logger?.debug("crash-recovery", "Exit summary written");
|
|
387
|
-
} catch (err) {
|
|
388
|
-
logger?.warn("crash-recovery", "Failed to write exit summary", { error: (err as Error).message });
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
/**
|
|
393
|
-
* Get numeric signal number for exit code
|
|
394
|
-
*/
|
|
395
|
-
function getSignalNumber(signal: NodeJS.Signals): number {
|
|
396
|
-
const signalMap: Record<string, number> = {
|
|
397
|
-
SIGTERM: 15,
|
|
398
|
-
SIGINT: 2,
|
|
399
|
-
SIGHUP: 1,
|
|
400
67
|
};
|
|
401
|
-
return signalMap[signal] ?? 15;
|
|
402
68
|
}
|
|
403
69
|
|
|
404
70
|
/**
|
|
@@ -409,11 +75,3 @@ export function resetCrashHandlers(): void {
|
|
|
409
75
|
handlersInstalled = false;
|
|
410
76
|
stopHeartbeat();
|
|
411
77
|
}
|
|
412
|
-
|
|
413
|
-
/**
|
|
414
|
-
* Returns true if heartbeat timer is currently active.
|
|
415
|
-
* @internal - test use only.
|
|
416
|
-
*/
|
|
417
|
-
export function _isHeartbeatActive(): boolean {
|
|
418
|
-
return heartbeatTimer !== null;
|
|
419
|
-
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Crash detection — Signal and exception handlers
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { getSafeLogger } from "../logger";
|
|
6
|
+
import { type RunCompleteContext, updateStatusToCrashed, writeFatalLog, writeRunComplete } from "./crash-writer";
|
|
7
|
+
import type { PidRegistry } from "./pid-registry";
|
|
8
|
+
import type { StatusWriter } from "./status-writer";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Handler context for signal/exception management
|
|
12
|
+
*/
|
|
13
|
+
export interface SignalHandlerContext extends RunCompleteContext {
|
|
14
|
+
statusWriter: StatusWriter;
|
|
15
|
+
pidRegistry?: PidRegistry;
|
|
16
|
+
featureDir?: string;
|
|
17
|
+
emitError?: (reason: string) => void;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Get numeric signal number for exit code
|
|
22
|
+
*/
|
|
23
|
+
function getSignalNumber(signal: NodeJS.Signals): number {
|
|
24
|
+
const signalMap: Record<string, number> = {
|
|
25
|
+
SIGTERM: 15,
|
|
26
|
+
SIGINT: 2,
|
|
27
|
+
SIGHUP: 1,
|
|
28
|
+
};
|
|
29
|
+
return signalMap[signal] ?? 15;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Create signal handler
|
|
34
|
+
*/
|
|
35
|
+
function createSignalHandler(ctx: SignalHandlerContext): (signal: NodeJS.Signals) => Promise<void> {
|
|
36
|
+
return async (signal: NodeJS.Signals) => {
|
|
37
|
+
const hardDeadline = setTimeout(() => {
|
|
38
|
+
process.exit(128 + getSignalNumber(signal));
|
|
39
|
+
}, 10_000);
|
|
40
|
+
if (hardDeadline.unref) hardDeadline.unref();
|
|
41
|
+
|
|
42
|
+
const logger = getSafeLogger();
|
|
43
|
+
logger?.error("crash-recovery", `Received ${signal}, shutting down...`, { signal });
|
|
44
|
+
|
|
45
|
+
if (ctx.pidRegistry) {
|
|
46
|
+
await ctx.pidRegistry.killAll();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
ctx.emitError?.(signal.toLowerCase());
|
|
50
|
+
|
|
51
|
+
await writeFatalLog(ctx.jsonlFilePath, signal);
|
|
52
|
+
await writeRunComplete(ctx, signal.toLowerCase());
|
|
53
|
+
await updateStatusToCrashed(ctx.statusWriter, ctx.getTotalCost(), ctx.getIterations(), signal, ctx.featureDir);
|
|
54
|
+
|
|
55
|
+
clearTimeout(hardDeadline);
|
|
56
|
+
process.exit(128 + getSignalNumber(signal));
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Create uncaught exception handler
|
|
62
|
+
*/
|
|
63
|
+
function createUncaughtExceptionHandler(ctx: SignalHandlerContext): (error: Error) => Promise<void> {
|
|
64
|
+
return async (error: Error) => {
|
|
65
|
+
const logger = getSafeLogger();
|
|
66
|
+
logger?.error("crash-recovery", "Uncaught exception", {
|
|
67
|
+
error: error.message,
|
|
68
|
+
stack: error.stack,
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
if (ctx.pidRegistry) {
|
|
72
|
+
await ctx.pidRegistry.killAll();
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
ctx.emitError?.("uncaughtException");
|
|
76
|
+
await writeFatalLog(ctx.jsonlFilePath, "uncaughtException", error);
|
|
77
|
+
await updateStatusToCrashed(
|
|
78
|
+
ctx.statusWriter,
|
|
79
|
+
ctx.getTotalCost(),
|
|
80
|
+
ctx.getIterations(),
|
|
81
|
+
"uncaughtException",
|
|
82
|
+
ctx.featureDir,
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
process.exit(1);
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Create unhandled promise rejection handler
|
|
91
|
+
*/
|
|
92
|
+
function createUnhandledRejectionHandler(ctx: SignalHandlerContext): (reason: unknown) => Promise<void> {
|
|
93
|
+
return async (reason: unknown) => {
|
|
94
|
+
const error = reason instanceof Error ? reason : new Error(String(reason));
|
|
95
|
+
const logger = getSafeLogger();
|
|
96
|
+
logger?.error("crash-recovery", "Unhandled promise rejection", {
|
|
97
|
+
error: error.message,
|
|
98
|
+
stack: error.stack,
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
if (ctx.pidRegistry) {
|
|
102
|
+
await ctx.pidRegistry.killAll();
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
ctx.emitError?.("unhandledRejection");
|
|
106
|
+
await writeFatalLog(ctx.jsonlFilePath, "unhandledRejection", error);
|
|
107
|
+
await updateStatusToCrashed(
|
|
108
|
+
ctx.statusWriter,
|
|
109
|
+
ctx.getTotalCost(),
|
|
110
|
+
ctx.getIterations(),
|
|
111
|
+
"unhandledRejection",
|
|
112
|
+
ctx.featureDir,
|
|
113
|
+
);
|
|
114
|
+
|
|
115
|
+
process.exit(1);
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Install signal and exception handlers, return cleanup function
|
|
121
|
+
*/
|
|
122
|
+
export function installSignalHandlers(ctx: SignalHandlerContext): () => void {
|
|
123
|
+
const logger = getSafeLogger();
|
|
124
|
+
|
|
125
|
+
const signalHandler = createSignalHandler(ctx);
|
|
126
|
+
const uncaughtExceptionHandler = createUncaughtExceptionHandler(ctx);
|
|
127
|
+
const unhandledRejectionHandler = createUnhandledRejectionHandler(ctx);
|
|
128
|
+
|
|
129
|
+
const sigtermHandler = () => signalHandler("SIGTERM");
|
|
130
|
+
const sigintHandler = () => signalHandler("SIGINT");
|
|
131
|
+
const sighupHandler = () => signalHandler("SIGHUP");
|
|
132
|
+
|
|
133
|
+
process.on("SIGTERM", sigtermHandler);
|
|
134
|
+
process.on("SIGINT", sigintHandler);
|
|
135
|
+
process.on("SIGHUP", sighupHandler);
|
|
136
|
+
process.on("uncaughtException", uncaughtExceptionHandler);
|
|
137
|
+
process.on("unhandledRejection", (reason) => unhandledRejectionHandler(reason));
|
|
138
|
+
|
|
139
|
+
logger?.debug("crash-recovery", "Signal handlers installed");
|
|
140
|
+
|
|
141
|
+
return () => {
|
|
142
|
+
process.removeListener("SIGTERM", sigtermHandler);
|
|
143
|
+
process.removeListener("SIGINT", sigintHandler);
|
|
144
|
+
process.removeListener("SIGHUP", sighupHandler);
|
|
145
|
+
process.removeListener("uncaughtException", uncaughtExceptionHandler);
|
|
146
|
+
process.removeListener("unhandledRejection", (reason) => unhandledRejectionHandler(reason));
|
|
147
|
+
logger?.debug("crash-recovery", "Signal handlers unregistered");
|
|
148
|
+
};
|
|
149
|
+
}
|