@link-assistant/hive-mind 1.72.7 → 1.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/package.json +1 -1
- package/src/exit-handler.lib.mjs +97 -2
- package/src/hive.config.lib.mjs +11 -0
- package/src/hive.mjs +58 -57
- package/src/hive.shutdown.lib.mjs +161 -0
- package/src/interruptible-sleep.lib.mjs +16 -6
- package/src/solve.config.lib.mjs +5 -0
- package/src/solve.mjs +30 -0
- package/src/working-session.lib.mjs +166 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# @link-assistant/hive-mind
|
|
2
2
|
|
|
3
|
+
## 1.73.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 1cd647d: Fix all errors on graceful shutdown and add an experimental working-session guard.
|
|
8
|
+
|
|
9
|
+
`hive` now fully waits for every in-flight `/solve` to finish before exiting on CTRL+C / `--stop`: signal handling is delegated to a single owner (resolving a double SIGINT-handler race that called `process.exit(130)` and cut the wait short), each solve worker is spawned in its own detached process group so the terminal's SIGINT no longer aborts solve/codex mid-task, and the wait has no time cap. Worker stderr is no longer mislabeled as `ERROR` — the child exit code remains the authoritative failure signal.
|
|
10
|
+
|
|
11
|
+
Building on that, a new experimental `--do-not-shutdown-in-the-middle-of-working-session` option is added to `solve` and enabled by default for `hive`. With it, an interrupt (CTRL+C / SIGTERM) no longer aborts the AI tool mid-run: if an AI working session is in progress, solve finishes it, auto-commits any uncommitted changes, then shuts down gracefully (exit 130/143); if solve is only idle-waiting (e.g. for CI/CD) it stops immediately, and a second interrupt force-stops. `hive` now forwards a controlled SIGTERM to each in-flight `/solve` worker on the first CTRL+C (instead of only waiting) and passes the flag to every worker (opt out with `--no-do-not-shutdown-in-the-middle-of-working-session`). Graceful shutdown is treated as a normal stop, so it no longer posts a spurious "solution draft failed" comment. Standalone `solve` keeps the flag off by default, so its behavior is unchanged except that an interrupt now always auto-commits uncommitted changes before exiting.
|
|
12
|
+
|
|
3
13
|
## 1.72.7
|
|
4
14
|
|
|
5
15
|
### Patch Changes
|
package/package.json
CHANGED
package/src/exit-handler.lib.mjs
CHANGED
|
@@ -6,6 +6,10 @@
|
|
|
6
6
|
* the process exits, whether due to normal completion, errors, or signals.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
+
// Issue #1823: working-session guard for --do-not-shutdown-in-the-middle-of-working-session.
|
|
10
|
+
// Static import is safe: working-session.lib.mjs has no heavy deps and does NOT import this module.
|
|
11
|
+
import { isFlagEnabled as isWorkingSessionFlagEnabled, isWorkingSessionActive, requestShutdown as requestWorkingSessionShutdown, forceKillActiveChildren as forceKillWorkingSessionChildren } from './working-session.lib.mjs';
|
|
12
|
+
|
|
9
13
|
// Lazy-load Sentry to avoid keeping the event loop alive when not needed
|
|
10
14
|
let Sentry = null;
|
|
11
15
|
const getSentry = async () => {
|
|
@@ -30,6 +34,14 @@ let interruptHandlerRan = false;
|
|
|
30
34
|
let preExitFunction = null;
|
|
31
35
|
let preExitHandlerRan = false;
|
|
32
36
|
|
|
37
|
+
// Issue #1823: When an external owner (e.g. hive's gracefulShutdown) takes over signal
|
|
38
|
+
// handling, the global SIGINT/SIGTERM handlers must stand down and NOT call process.exit().
|
|
39
|
+
// Otherwise the global handler's process.exit() races with the external graceful handler
|
|
40
|
+
// and cuts its wait short — the root cause of premature shutdown that aborts an in-flight
|
|
41
|
+
// /solve (and its codex child) mid-turn. Defaults to false to preserve existing behavior
|
|
42
|
+
// for solve.mjs, telegram-bot, and other entry points that rely on the global handlers.
|
|
43
|
+
let signalHandlingDelegated = false;
|
|
44
|
+
|
|
33
45
|
/**
|
|
34
46
|
* Initialize the exit handler with required dependencies
|
|
35
47
|
* @param {Function} getLogPath - Function that returns the current log path
|
|
@@ -50,6 +62,20 @@ export const setPreExitHandler = preExit => {
|
|
|
50
62
|
preExitFunction = preExit;
|
|
51
63
|
};
|
|
52
64
|
|
|
65
|
+
/**
|
|
66
|
+
* Issue #1823: Delegate SIGINT/SIGTERM handling to an external graceful shutdown owner.
|
|
67
|
+
*
|
|
68
|
+
* When enabled, the global SIGINT/SIGTERM handlers installed by installGlobalExitHandlers()
|
|
69
|
+
* stand down (return early) instead of calling process.exit(). This lets a caller such as
|
|
70
|
+
* hive's gracefulShutdown() fully wait for in-progress work (e.g. an executing /solve) to
|
|
71
|
+
* finish and then exit via safeExit(), without the global handler racing it to process.exit().
|
|
72
|
+
*
|
|
73
|
+
* @param {boolean} enabled - true to delegate (caller owns exit), false to restore default.
|
|
74
|
+
*/
|
|
75
|
+
export const delegateSignalHandling = (enabled = true) => {
|
|
76
|
+
signalHandlingDelegated = enabled;
|
|
77
|
+
};
|
|
78
|
+
|
|
53
79
|
/**
|
|
54
80
|
* Display the exit message with log path
|
|
55
81
|
*/
|
|
@@ -203,11 +229,17 @@ export const logActiveHandles = async (log = null) => {
|
|
|
203
229
|
|
|
204
230
|
/**
|
|
205
231
|
* Safe exit function that ensures log path is shown
|
|
232
|
+
*
|
|
233
|
+
* @param {number} code - Process exit code
|
|
234
|
+
* @param {string} reason - Human-readable exit reason
|
|
235
|
+
* @param {object} [options]
|
|
236
|
+
* @param {boolean} [options.skipPreExit=false] - Issue #1823: skip the pre-exit failure notifier
|
|
237
|
+
* (e.g. on graceful shutdown, which is NOT a failure and must not post a "solver failed" comment).
|
|
206
238
|
*/
|
|
207
|
-
export const safeExit = async (code = 0, reason = 'Process completed') => {
|
|
239
|
+
export const safeExit = async (code = 0, reason = 'Process completed', { skipPreExit = false } = {}) => {
|
|
208
240
|
await showExitMessage(reason, code);
|
|
209
241
|
|
|
210
|
-
if (code !== 0 && preExitFunction && !preExitHandlerRan) {
|
|
242
|
+
if (!skipPreExit && code !== 0 && preExitFunction && !preExitHandlerRan) {
|
|
211
243
|
preExitHandlerRan = true;
|
|
212
244
|
try {
|
|
213
245
|
await preExitFunction({ code, reason });
|
|
@@ -273,6 +305,34 @@ export const installGlobalExitHandlers = () => {
|
|
|
273
305
|
|
|
274
306
|
// Handle SIGINT (CTRL+C)
|
|
275
307
|
process.on('SIGINT', async () => {
|
|
308
|
+
// Issue #1823: If an external graceful-shutdown owner is registered, stand down.
|
|
309
|
+
// That owner (e.g. hive's gracefulShutdown) is responsible for waiting for in-progress
|
|
310
|
+
// work and exiting via safeExit(). Calling process.exit(130) here would race with it
|
|
311
|
+
// and cut the wait short — the root cause of the premature shutdown.
|
|
312
|
+
if (signalHandlingDelegated) {
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
315
|
+
// Issue #1823: With --do-not-shutdown-in-the-middle-of-working-session, defer shutdown while
|
|
316
|
+
// an AI working session is in progress so the AI tool is never aborted mid-run.
|
|
317
|
+
if (isWorkingSessionFlagEnabled() && isWorkingSessionActive()) {
|
|
318
|
+
const { first } = requestWorkingSessionShutdown('SIGINT');
|
|
319
|
+
if (first) {
|
|
320
|
+
if (logFunction) {
|
|
321
|
+
await logFunction('\n⚠️ Shutdown requested (CTRL+C). Finishing the current AI working session, then auto-committing and stopping. Press CTRL+C again to force-stop now.', { level: 'warning' });
|
|
322
|
+
}
|
|
323
|
+
return; // defer — solve will auto-commit + exit once the session ends
|
|
324
|
+
}
|
|
325
|
+
// Second interrupt → operator insists. Force-kill the AI child group, then fall through to
|
|
326
|
+
// auto-commit + exit below.
|
|
327
|
+
if (logFunction) {
|
|
328
|
+
await logFunction('\n⚠️ Second interrupt — force-stopping the AI working session now.', { level: 'warning' });
|
|
329
|
+
}
|
|
330
|
+
try {
|
|
331
|
+
forceKillWorkingSessionChildren();
|
|
332
|
+
} catch {
|
|
333
|
+
// ignore — child may already be gone
|
|
334
|
+
}
|
|
335
|
+
}
|
|
276
336
|
// Run interrupt handler first (auto-commit, log upload, etc.) — guard against double invocation
|
|
277
337
|
if (interruptFunction && !interruptHandlerRan) {
|
|
278
338
|
interruptHandlerRan = true;
|
|
@@ -303,6 +363,40 @@ export const installGlobalExitHandlers = () => {
|
|
|
303
363
|
|
|
304
364
|
// Handle SIGTERM
|
|
305
365
|
process.on('SIGTERM', async () => {
|
|
366
|
+
// Issue #1823: Stand down when an external graceful-shutdown owner is registered.
|
|
367
|
+
if (signalHandlingDelegated) {
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
370
|
+
// Issue #1823: hive forwards the operator's CTRL+C to each /solve worker as SIGTERM (which
|
|
371
|
+
// command-stream ignores). With --do-not-shutdown-in-the-middle-of-working-session, defer
|
|
372
|
+
// shutdown while an AI working session is in progress so the AI tool finishes its turn.
|
|
373
|
+
if (isWorkingSessionFlagEnabled() && isWorkingSessionActive()) {
|
|
374
|
+
const { first } = requestWorkingSessionShutdown('SIGTERM');
|
|
375
|
+
if (first) {
|
|
376
|
+
if (logFunction) {
|
|
377
|
+
await logFunction('\n⚠️ Shutdown requested. Finishing the current AI working session, then auto-committing and stopping. Send the signal again to force-stop now.', { level: 'warning' });
|
|
378
|
+
}
|
|
379
|
+
return; // defer — solve will auto-commit + exit once the session ends
|
|
380
|
+
}
|
|
381
|
+
if (logFunction) {
|
|
382
|
+
await logFunction('\n⚠️ Second signal — force-stopping the AI working session now.', { level: 'warning' });
|
|
383
|
+
}
|
|
384
|
+
try {
|
|
385
|
+
forceKillWorkingSessionChildren();
|
|
386
|
+
} catch {
|
|
387
|
+
// ignore — child may already be gone
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
// Issue #1823: Auto-commit uncommitted changes on SIGTERM too (previously only SIGINT did).
|
|
391
|
+
// This ensures graceful shutdown preserves work in ALL signal paths.
|
|
392
|
+
if (interruptFunction && !interruptHandlerRan) {
|
|
393
|
+
interruptHandlerRan = true;
|
|
394
|
+
try {
|
|
395
|
+
await interruptFunction();
|
|
396
|
+
} catch {
|
|
397
|
+
// Ignore interrupt handler errors
|
|
398
|
+
}
|
|
399
|
+
}
|
|
306
400
|
if (cleanupFunction) {
|
|
307
401
|
try {
|
|
308
402
|
await cleanupFunction();
|
|
@@ -377,4 +471,5 @@ export const installGlobalExitHandlers = () => {
|
|
|
377
471
|
export const resetExitHandler = () => {
|
|
378
472
|
exitMessageShown = false;
|
|
379
473
|
interruptHandlerRan = false;
|
|
474
|
+
signalHandlingDelegated = false;
|
|
380
475
|
};
|
package/src/hive.config.lib.mjs
CHANGED
|
@@ -50,6 +50,17 @@ const HIVE_CUSTOM_SOLVE_OPTIONS = {
|
|
|
50
50
|
choices: ['claude', 'opencode', 'codex', 'agent', 'qwen', 'gemini'],
|
|
51
51
|
default: 'claude',
|
|
52
52
|
},
|
|
53
|
+
// Issue #1823: hive enables the experimental working-session guard for every /solve worker by
|
|
54
|
+
// default. This is the ONLY change to how CTRL+C behaves in the hive workflow: instead of
|
|
55
|
+
// aborting the AI tool mid-run, a forwarded interrupt lets the worker finish its current AI
|
|
56
|
+
// working session, auto-commit, then shut down gracefully. solve keeps default:false (standalone
|
|
57
|
+
// behavior unchanged); hive overrides the default to true so the loop below forwards the flag.
|
|
58
|
+
// Operators can opt out with --no-do-not-shutdown-in-the-middle-of-working-session.
|
|
59
|
+
'do-not-shutdown-in-the-middle-of-working-session': {
|
|
60
|
+
type: 'boolean',
|
|
61
|
+
description: '[EXPERIMENTAL] On CTRL+C, let each /solve worker finish its current AI working session and auto-commit before shutting down, instead of aborting it mid-run. If a worker is only idle-waiting (e.g. for CI/CD), it stops immediately. Press CTRL+C again to force-stop. Enabled by default for the hive workflow.',
|
|
62
|
+
default: true,
|
|
63
|
+
},
|
|
53
64
|
};
|
|
54
65
|
|
|
55
66
|
// Compute the set of solve options that hive auto-registers from SOLVE_OPTION_DEFINITIONS.
|
package/src/hive.mjs
CHANGED
|
@@ -35,6 +35,7 @@ if (earlyArgs.includes('--help') || earlyArgs.includes('-h')) {
|
|
|
35
35
|
}
|
|
36
36
|
export { createYargsConfig } from './hive.config.lib.mjs';
|
|
37
37
|
import { isDirectExecution, withTimeout } from './hive.bootstrap.lib.mjs';
|
|
38
|
+
import { createShutdownManager } from './hive.shutdown.lib.mjs';
|
|
38
39
|
const isRunningDirectly = isDirectExecution(process.argv[1], import.meta.url);
|
|
39
40
|
if (isRunningDirectly) {
|
|
40
41
|
console.log('🐝 Hive Mind - AI-powered issue solver');
|
|
@@ -88,7 +89,7 @@ if (isRunningDirectly) {
|
|
|
88
89
|
const memCheck = await import('./memory-check.mjs');
|
|
89
90
|
const { checkSystem } = memCheck;
|
|
90
91
|
const exitHandler = await import('./exit-handler.lib.mjs');
|
|
91
|
-
const { initializeExitHandler, installGlobalExitHandlers, safeExit } = exitHandler;
|
|
92
|
+
const { initializeExitHandler, installGlobalExitHandlers, safeExit, delegateSignalHandling } = exitHandler;
|
|
92
93
|
const sentryLib = await import('./sentry.lib.mjs');
|
|
93
94
|
const { initializeSentry, withSentry, addBreadcrumb, reportError } = sentryLib;
|
|
94
95
|
const graphqlLib = await import('./github.graphql.lib.mjs');
|
|
@@ -709,8 +710,10 @@ if (isRunningDirectly) {
|
|
|
709
710
|
// Create global queue instance
|
|
710
711
|
const issueQueue = new IssueQueue();
|
|
711
712
|
|
|
712
|
-
//
|
|
713
|
-
|
|
713
|
+
// Issue #1823: Track in-flight solve child processes. A *first* interrupt forwards a
|
|
714
|
+
// controlled SIGTERM to each (they run in their own detached process group, so the
|
|
715
|
+
// terminal's SIGINT never reaches them); a *second* interrupt force-kills the groups.
|
|
716
|
+
const activeSolveChildren = new Set();
|
|
714
717
|
|
|
715
718
|
// Worker function to process issues from queue
|
|
716
719
|
async function worker(workerId) {
|
|
@@ -739,6 +742,8 @@ if (isRunningDirectly) {
|
|
|
739
742
|
|
|
740
743
|
// Track if this issue failed
|
|
741
744
|
let issueFailed = false;
|
|
745
|
+
// Issue #1823: Track a graceful shutdown stop so it is neither failed nor completed.
|
|
746
|
+
let gracefulStop = false;
|
|
742
747
|
|
|
743
748
|
// Process the issue multiple times if needed
|
|
744
749
|
for (let prNum = 1; prNum <= argv.pullRequestsPerIssue; prNum++) {
|
|
@@ -811,8 +816,17 @@ if (isRunningDirectly) {
|
|
|
811
816
|
const child = spawn(solveCommand, args, {
|
|
812
817
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
813
818
|
env: process.env,
|
|
819
|
+
// Issue #1823: run solve in its own process group so a terminal SIGINT (or the
|
|
820
|
+
// \003 `$ --stop`/screen injects) hits only hive, not solve+codex. hive instead
|
|
821
|
+
// forwards a controlled SIGTERM (see gracefulShutdown). stdio stays piped and we
|
|
822
|
+
// must NOT unref() — hive keeps waiting. See docs/case-studies/issue-1823.
|
|
823
|
+
detached: true,
|
|
814
824
|
});
|
|
815
825
|
|
|
826
|
+
// Issue #1823: register the in-flight child for optional force-kill on a 2nd signal
|
|
827
|
+
activeSolveChildren.add(child);
|
|
828
|
+
log(` 🧒 Spawned ${solveCommand} worker-${workerId} (pid ${child.pid}, detached process group)`, { verbose: true }).catch(() => {});
|
|
829
|
+
|
|
816
830
|
// Handle stdout data - stream output in real-time
|
|
817
831
|
child.stdout.on('data', data => {
|
|
818
832
|
const lines = data.toString().split('\n');
|
|
@@ -829,16 +843,20 @@ if (isRunningDirectly) {
|
|
|
829
843
|
}
|
|
830
844
|
});
|
|
831
845
|
|
|
832
|
-
// Handle stderr data - stream
|
|
846
|
+
// Handle stderr data - stream output in real-time.
|
|
847
|
+
// Issue #1823: Do NOT blanket-tag stderr as ERROR — solve relays non-error
|
|
848
|
+
// diagnostics there (codex DEBUG/INFO traces, git branch messages, etc.), which
|
|
849
|
+
// produced hundreds of false errors. The authoritative failure signal is the
|
|
850
|
+
// child's non-zero exit code (below), so log stderr at default level.
|
|
833
851
|
child.stderr.on('data', data => {
|
|
834
852
|
const lines = data.toString().split('\n');
|
|
835
853
|
for (const line of lines) {
|
|
836
854
|
if (line.trim()) {
|
|
837
|
-
log(` [${solveCommand} worker-${workerId}
|
|
855
|
+
log(` [${solveCommand} worker-${workerId} stderr] ${line}`).catch(logError => {
|
|
838
856
|
reportError(logError, {
|
|
839
857
|
context: 'worker_stderr_log',
|
|
840
858
|
workerId,
|
|
841
|
-
operation: '
|
|
859
|
+
operation: 'log_stderr',
|
|
842
860
|
});
|
|
843
861
|
});
|
|
844
862
|
}
|
|
@@ -847,12 +865,14 @@ if (isRunningDirectly) {
|
|
|
847
865
|
|
|
848
866
|
// Handle process completion
|
|
849
867
|
child.on('close', code => {
|
|
868
|
+
activeSolveChildren.delete(child); // Issue #1823: no longer in-flight
|
|
850
869
|
exitCode = code || 0;
|
|
851
870
|
resolve();
|
|
852
871
|
});
|
|
853
872
|
|
|
854
873
|
// Handle process errors
|
|
855
874
|
child.on('error', error => {
|
|
875
|
+
activeSolveChildren.delete(child); // Issue #1823: no longer in-flight
|
|
856
876
|
exitCode = 1;
|
|
857
877
|
log(` [${solveCommand} worker-${workerId} ERROR] Process error: ${error.message}`, {
|
|
858
878
|
level: 'error',
|
|
@@ -871,6 +891,13 @@ if (isRunningDirectly) {
|
|
|
871
891
|
|
|
872
892
|
if (exitCode === 0) {
|
|
873
893
|
await log(` ✅ Worker ${workerId} completed ${issueUrl} (${duration}s)`);
|
|
894
|
+
} else if (!issueQueue.isRunning && (exitCode === 130 || exitCode === 143)) {
|
|
895
|
+
// Issue #1823: during shutdown, solve auto-commits and exits 130/143 — a graceful
|
|
896
|
+
// stop, NOT a failure. Don't throw/post an error; leave the issue in "processing"
|
|
897
|
+
// (neither completed nor failed) since work was cut short. See case-study issue-1823.
|
|
898
|
+
await log(` 🛑 Worker ${workerId} stopped gracefully during shutdown on ${issueUrl} (exit ${exitCode}, ${duration}s)`);
|
|
899
|
+
gracefulStop = true;
|
|
900
|
+
break; // stop processing more PRs for this issue
|
|
874
901
|
} else {
|
|
875
902
|
throw new Error(`${solveCommand} exited with code ${exitCode}`);
|
|
876
903
|
}
|
|
@@ -895,8 +922,10 @@ if (isRunningDirectly) {
|
|
|
895
922
|
}
|
|
896
923
|
}
|
|
897
924
|
|
|
898
|
-
// Only mark as completed if it didn't fail
|
|
899
|
-
|
|
925
|
+
// Only mark as completed if it didn't fail and wasn't gracefully stopped mid-shutdown.
|
|
926
|
+
// Issue #1823: a graceful stop is neither a success nor a failure — leave it in
|
|
927
|
+
// "processing" so it is not miscounted as completed (which would also trigger cleanup).
|
|
928
|
+
if (!issueFailed && !gracefulStop) {
|
|
900
929
|
issueQueue.markCompleted(issueUrl);
|
|
901
930
|
}
|
|
902
931
|
|
|
@@ -1384,55 +1413,27 @@ if (isRunningDirectly) {
|
|
|
1384
1413
|
await log(` 📁 Full log file: ${absoluteLogPath}`);
|
|
1385
1414
|
}
|
|
1386
1415
|
|
|
1387
|
-
// Graceful shutdown
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
while (issueQueue.getStats().processing > 0 && Date.now() - startTime < maxWaitTime) {
|
|
1409
|
-
await new Promise(resolve => setTimeout(resolve, 500));
|
|
1410
|
-
}
|
|
1411
|
-
}
|
|
1412
|
-
|
|
1413
|
-
await Promise.all(issueQueue.workers);
|
|
1414
|
-
|
|
1415
|
-
// Perform cleanup if enabled and there were successful completions
|
|
1416
|
-
const finalStats = issueQueue.getStats();
|
|
1417
|
-
if (finalStats.completed > 0) {
|
|
1418
|
-
await cleanupTempDirectories(argv);
|
|
1419
|
-
}
|
|
1420
|
-
|
|
1421
|
-
await log(' ✅ Shutdown complete');
|
|
1422
|
-
await log(` 📁 Full log file: ${absoluteLogPath}`);
|
|
1423
|
-
} catch (error) {
|
|
1424
|
-
reportError(error, {
|
|
1425
|
-
context: 'monitor_issues_shutdown',
|
|
1426
|
-
operation: 'cleanup_and_exit',
|
|
1427
|
-
});
|
|
1428
|
-
await log(` ⚠️ Error during shutdown: ${cleanErrorMessage(error)}`, { level: 'error' });
|
|
1429
|
-
await log(` 📁 Full log file: ${absoluteLogPath}`);
|
|
1430
|
-
}
|
|
1431
|
-
|
|
1432
|
-
await safeExit(0, 'Process completed');
|
|
1433
|
-
}
|
|
1434
|
-
|
|
1435
|
-
// Handle graceful shutdown
|
|
1416
|
+
// Issue #1823: Graceful-shutdown + force-kill logic lives in hive.shutdown.lib.mjs.
|
|
1417
|
+
// gracefulShutdown waits (uncapped) for in-flight solve workers to finish on the first
|
|
1418
|
+
// interrupt; on a second interrupt it force-kills their detached process groups.
|
|
1419
|
+
const { gracefulShutdown } = createShutdownManager({
|
|
1420
|
+
log,
|
|
1421
|
+
safeExit,
|
|
1422
|
+
reportError,
|
|
1423
|
+
cleanErrorMessage,
|
|
1424
|
+
cleanupTempDirectories,
|
|
1425
|
+
issueQueue,
|
|
1426
|
+
argv,
|
|
1427
|
+
absoluteLogPath,
|
|
1428
|
+
activeSolveChildren,
|
|
1429
|
+
});
|
|
1430
|
+
|
|
1431
|
+
// Handle graceful shutdown.
|
|
1432
|
+
// Issue #1823: Tell the global exit handler (installed earlier via installGlobalExitHandlers)
|
|
1433
|
+
// to stand down on SIGINT/SIGTERM so it does not call process.exit() and race us. From here
|
|
1434
|
+
// on, gracefulShutdown is the SOLE owner of these signals: it waits for in-progress solve
|
|
1435
|
+
// worker(s) to finish and then exits via safeExit().
|
|
1436
|
+
delegateSignalHandling(true);
|
|
1436
1437
|
process.on('SIGINT', () => gracefulShutdown('interrupt'));
|
|
1437
1438
|
process.on('SIGTERM', () => gracefulShutdown('termination'));
|
|
1438
1439
|
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Issue #1823: Graceful-shutdown manager for the hive command.
|
|
3
|
+
*
|
|
4
|
+
* Extracted from hive.mjs so the shutdown logic stays focused and independently testable
|
|
5
|
+
* (and to keep hive.mjs within the repo's max-lines lint budget).
|
|
6
|
+
*
|
|
7
|
+
* Behavior contract (see issue #1823):
|
|
8
|
+
* - On the FIRST interrupt (SIGINT/SIGTERM, or the \003 that `$ --stop`/screen injects),
|
|
9
|
+
* hive stops accepting new work and waits — without any time cap — for every in-flight
|
|
10
|
+
* `/solve` worker to finish NATURALLY, then exits 0. Because each solve runs in its own
|
|
11
|
+
* detached process group, the terminal's signal never reached it, so it keeps running.
|
|
12
|
+
* - On a SECOND interrupt (operator insists on stopping now), hive force-kills the in-flight
|
|
13
|
+
* solve process group(s) — negative PID, so codex and any grandchildren die too — and
|
|
14
|
+
* exits 130 immediately.
|
|
15
|
+
*
|
|
16
|
+
* @param {object} deps - Injected hive-scope dependencies.
|
|
17
|
+
* @param {Function} deps.log - Async logger (matches hive's log()).
|
|
18
|
+
* @param {Function} deps.safeExit - Async exit helper from exit-handler.lib.mjs.
|
|
19
|
+
* @param {Function} deps.reportError - Sentry error reporter.
|
|
20
|
+
* @param {Function} deps.cleanErrorMessage - Formats an error for logging.
|
|
21
|
+
* @param {Function} deps.cleanupTempDirectories - Cleans temp dirs after successful runs.
|
|
22
|
+
* @param {object} deps.issueQueue - The producer/consumer queue (stop/getStats/workers).
|
|
23
|
+
* @param {object} deps.argv - Parsed CLI args (passed through to cleanup).
|
|
24
|
+
* @param {string} deps.absoluteLogPath - Resolved log file path (for the final log line).
|
|
25
|
+
* @param {Set} deps.activeSolveChildren - Live set of in-flight solve child processes.
|
|
26
|
+
* @returns {{ gracefulShutdown: Function, forceKillActiveSolveChildren: Function }}
|
|
27
|
+
*/
|
|
28
|
+
export const createShutdownManager = ({ log, safeExit, reportError, cleanErrorMessage, cleanupTempDirectories, issueQueue, argv, absoluteLogPath, activeSolveChildren }) => {
|
|
29
|
+
// Global shutdown state to prevent duplicate shutdown messages / re-entrancy.
|
|
30
|
+
let isShuttingDown = false;
|
|
31
|
+
|
|
32
|
+
// Issue #1823: Forward the operator's interrupt to each in-flight solve worker as SIGTERM,
|
|
33
|
+
// signalling the solve PROCESS itself (positive PID), NOT its process group (negative PID).
|
|
34
|
+
// Rationale (validated — see experiments/command-stream-signals.mjs): command-stream installs
|
|
35
|
+
// only a SIGINT handler and ignores SIGTERM, so signalling solve with SIGTERM never collaterally
|
|
36
|
+
// kills the AI child mid-turn. solve's own session-aware handler then decides what to do:
|
|
37
|
+
// - if an AI working session is in progress, it finishes it, auto-commits, and exits 143;
|
|
38
|
+
// - if it is only idle-waiting (e.g. for CI/CD), it stops immediately.
|
|
39
|
+
// This implements "send CTRL+C to solve command also" while still letting the AI session finish.
|
|
40
|
+
async function forwardShutdownToActiveSolveChildren() {
|
|
41
|
+
for (const child of activeSolveChildren) {
|
|
42
|
+
if (!child || child.pid == null) {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
try {
|
|
46
|
+
process.kill(child.pid, 'SIGTERM'); // positive pid → just the solve process, not its group
|
|
47
|
+
} catch (signalError) {
|
|
48
|
+
await log(` ⚠️ Could not forward SIGTERM to solve (pid ${child.pid}): ${signalError.message}`, {
|
|
49
|
+
verbose: true,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Issue #1823: Force-kill all in-flight detached solve children (and their codex
|
|
56
|
+
// descendants) by signalling their process groups. Used only when the operator insists on
|
|
57
|
+
// an immediate exit (a SECOND interrupt). A negative PID targets the whole process group,
|
|
58
|
+
// so this also terminates codex and any grandchildren spawned by solve.
|
|
59
|
+
async function forceKillActiveSolveChildren(signalName = 'SIGTERM') {
|
|
60
|
+
for (const child of activeSolveChildren) {
|
|
61
|
+
if (!child || child.pid == null) {
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
try {
|
|
65
|
+
process.kill(-child.pid, signalName); // negative pid → whole process group
|
|
66
|
+
} catch (killError) {
|
|
67
|
+
// The group may already be gone; fall back to signalling just the child.
|
|
68
|
+
try {
|
|
69
|
+
child.kill(signalName);
|
|
70
|
+
} catch {
|
|
71
|
+
// Child already exited — nothing to do.
|
|
72
|
+
}
|
|
73
|
+
await log(` ⚠️ Could not signal solve process group (pid ${child.pid}): ${killError.message}`, {
|
|
74
|
+
verbose: true,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Graceful shutdown handler.
|
|
81
|
+
async function gracefulShutdown(signal) {
|
|
82
|
+
if (isShuttingDown) {
|
|
83
|
+
// Issue #1823: A second interrupt while already shutting down means the operator wants
|
|
84
|
+
// to stop NOW. Force-kill the in-flight solve process group(s) and exit immediately,
|
|
85
|
+
// overriding the default "wait for solve to finish" behavior.
|
|
86
|
+
await log(`\n\n⚠️ Received second ${signal} signal — force-stopping ${activeSolveChildren.size} in-flight solve worker(s) and exiting now.`, {
|
|
87
|
+
level: 'warning',
|
|
88
|
+
});
|
|
89
|
+
await forceKillActiveSolveChildren('SIGTERM');
|
|
90
|
+
await safeExit(130, 'Force interrupted by repeated signal');
|
|
91
|
+
return;
|
|
92
|
+
}
|
|
93
|
+
isShuttingDown = true;
|
|
94
|
+
|
|
95
|
+
try {
|
|
96
|
+
await log(`\n\n🛑 Received ${signal} signal, shutting down gracefully...`);
|
|
97
|
+
await log(' ℹ️ Forwarding the interrupt to in-progress solve worker(s); each finishes its current AI working session, auto-commits, then stops. Press CTRL+C again to force-stop.');
|
|
98
|
+
|
|
99
|
+
// Stop the queue so each worker exits its loop after its current solve completes.
|
|
100
|
+
issueQueue.stop();
|
|
101
|
+
|
|
102
|
+
// Issue #1823: Forward the operator's CTRL+C to each in-flight solve worker (as SIGTERM).
|
|
103
|
+
// Previously hive only waited; now it actively tells solve to wind down so a worker that is
|
|
104
|
+
// merely idle-waiting (e.g. for CI/CD) stops promptly instead of sleeping out its interval,
|
|
105
|
+
// while a worker mid-AI-session still finishes that session before exiting (see solve's
|
|
106
|
+
// --do-not-shutdown-in-the-middle-of-working-session guard, which hive enables by default).
|
|
107
|
+
if (activeSolveChildren.size > 0) {
|
|
108
|
+
await log(` 📨 Forwarding shutdown to ${activeSolveChildren.size} in-flight solve worker(s)...`);
|
|
109
|
+
await forwardShutdownToActiveSolveChildren();
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Issue #1823: Wait for in-flight solve commands to FINISH NATURALLY. We intentionally
|
|
113
|
+
// do NOT cap this wait — the issue requires that CTRL+C / `$ --stop` fully waits for each
|
|
114
|
+
// running /solve to complete before shutting down. Because solve runs in its own detached
|
|
115
|
+
// process group, the interrupt did not reach it, so it keeps running until done.
|
|
116
|
+
// Promise.all(issueQueue.workers) is the authoritative wait; a periodic progress line
|
|
117
|
+
// makes it clear hive is still waiting (and is unref'd so it never blocks exit itself).
|
|
118
|
+
const stats = issueQueue.getStats();
|
|
119
|
+
let progressTimer = null;
|
|
120
|
+
if (stats.processing > 0) {
|
|
121
|
+
const waitStart = Date.now();
|
|
122
|
+
await log(` ⏳ Waiting for ${stats.processing} worker(s) to finish current tasks...`);
|
|
123
|
+
progressTimer = setInterval(() => {
|
|
124
|
+
const current = issueQueue.getStats();
|
|
125
|
+
if (current.processing > 0) {
|
|
126
|
+
const elapsed = Math.round((Date.now() - waitStart) / 1000);
|
|
127
|
+
log(` ⏳ Still waiting for ${current.processing} solve worker(s) to finish (${elapsed}s elapsed)...`).catch(() => {});
|
|
128
|
+
}
|
|
129
|
+
}, 15000);
|
|
130
|
+
if (typeof progressTimer.unref === 'function') {
|
|
131
|
+
progressTimer.unref();
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
await Promise.all(issueQueue.workers);
|
|
136
|
+
if (progressTimer) {
|
|
137
|
+
clearInterval(progressTimer);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Perform cleanup if enabled and there were successful completions
|
|
141
|
+
const finalStats = issueQueue.getStats();
|
|
142
|
+
if (finalStats.completed > 0) {
|
|
143
|
+
await cleanupTempDirectories(argv);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
await log(' ✅ Shutdown complete');
|
|
147
|
+
await log(` 📁 Full log file: ${absoluteLogPath}`);
|
|
148
|
+
} catch (error) {
|
|
149
|
+
reportError(error, {
|
|
150
|
+
context: 'monitor_issues_shutdown',
|
|
151
|
+
operation: 'cleanup_and_exit',
|
|
152
|
+
});
|
|
153
|
+
await log(` ⚠️ Error during shutdown: ${cleanErrorMessage(error)}`, { level: 'error' });
|
|
154
|
+
await log(` 📁 Full log file: ${absoluteLogPath}`);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
await safeExit(0, 'Process completed');
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return { gracefulShutdown, forceKillActiveSolveChildren, forwardShutdownToActiveSolveChildren };
|
|
161
|
+
};
|
|
@@ -9,14 +9,18 @@
|
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
11
|
/**
|
|
12
|
-
* Sleep for `ms` milliseconds, but resolve early if SIGINT is received.
|
|
12
|
+
* Sleep for `ms` milliseconds, but resolve early if SIGINT or SIGTERM is received.
|
|
13
13
|
*
|
|
14
|
-
* When
|
|
15
|
-
* resolves with `{ interrupted: true }`. The existing
|
|
16
|
-
* exit-handler.lib.mjs)
|
|
14
|
+
* When the signal fires during the sleep, the timer is cleared and the promise
|
|
15
|
+
* resolves with `{ interrupted: true }`. The existing signal handlers (from
|
|
16
|
+
* exit-handler.lib.mjs) continue to run normally — this function does NOT
|
|
17
17
|
* consume or re-emit the signal, it only ensures its own timer doesn't
|
|
18
18
|
* block the event loop.
|
|
19
19
|
*
|
|
20
|
+
* Issue #1823: SIGTERM is also honoured because hive forwards the operator's CTRL+C to each
|
|
21
|
+
* /solve worker as SIGTERM. When solve is only idle-waiting here (e.g. for CI/CD), it must stop
|
|
22
|
+
* immediately rather than sleep out the remaining delay.
|
|
23
|
+
*
|
|
20
24
|
* @param {number} ms - Duration in milliseconds
|
|
21
25
|
* @returns {Promise<{interrupted: boolean}>}
|
|
22
26
|
*/
|
|
@@ -24,18 +28,24 @@ export function interruptibleSleep(ms) {
|
|
|
24
28
|
return new Promise(resolve => {
|
|
25
29
|
let timer;
|
|
26
30
|
|
|
31
|
+
const cleanupListeners = () => {
|
|
32
|
+
process.removeListener('SIGINT', onInterrupt);
|
|
33
|
+
process.removeListener('SIGTERM', onInterrupt);
|
|
34
|
+
};
|
|
35
|
+
|
|
27
36
|
const onInterrupt = () => {
|
|
28
37
|
clearTimeout(timer);
|
|
29
|
-
|
|
38
|
+
cleanupListeners();
|
|
30
39
|
resolve({ interrupted: true });
|
|
31
40
|
};
|
|
32
41
|
|
|
33
42
|
timer = setTimeout(() => {
|
|
34
|
-
|
|
43
|
+
cleanupListeners();
|
|
35
44
|
resolve({ interrupted: false });
|
|
36
45
|
}, ms);
|
|
37
46
|
|
|
38
47
|
process.on('SIGINT', onInterrupt);
|
|
48
|
+
process.on('SIGTERM', onInterrupt);
|
|
39
49
|
});
|
|
40
50
|
}
|
|
41
51
|
|
package/src/solve.config.lib.mjs
CHANGED
|
@@ -115,6 +115,11 @@ export const SOLVE_OPTION_DEFINITIONS = {
|
|
|
115
115
|
description: '[EXPERIMENTAL] Temporarily copy AGENTS.md/agents.md to CLAUDE.md while Claude runs, then remove the temporary copy',
|
|
116
116
|
default: false,
|
|
117
117
|
},
|
|
118
|
+
'do-not-shutdown-in-the-middle-of-working-session': {
|
|
119
|
+
type: 'boolean',
|
|
120
|
+
description: '[EXPERIMENTAL] On interrupt (CTRL+C / SIGTERM), do not abort the AI tool mid-run. If an AI working session is in progress, wait for it to finish, auto-commit any uncommitted changes, then shut down gracefully. If solve is only idle-waiting (e.g. for CI/CD), stop immediately. A second interrupt force-stops. hive passes this automatically to every /solve worker.',
|
|
121
|
+
default: false,
|
|
122
|
+
},
|
|
118
123
|
'attach-logs': {
|
|
119
124
|
type: 'boolean',
|
|
120
125
|
description: 'Upload the solution draft log file to the Pull Request on completion (⚠️ WARNING: May expose sensitive data)',
|
package/src/solve.mjs
CHANGED
|
@@ -48,6 +48,8 @@ const { runAutoEnsureRequirements } = await import('./solve.auto-ensure.lib.mjs'
|
|
|
48
48
|
const exitHandler = await import('./exit-handler.lib.mjs');
|
|
49
49
|
const { initializeExitHandler, installGlobalExitHandlers, safeExit, logActiveHandles } = exitHandler;
|
|
50
50
|
const { createInterruptWrapper } = await import('./solve.interrupt.lib.mjs');
|
|
51
|
+
// Issue #1823: working-session guard for --do-not-shutdown-in-the-middle-of-working-session.
|
|
52
|
+
const { configureWorkingSession, beginWorkingSession, endWorkingSession } = await import('./working-session.lib.mjs');
|
|
51
53
|
const getResourceSnapshot = memoryCheck.getResourceSnapshot;
|
|
52
54
|
const { handleAutoPrCreation } = await import('./solve.auto-pr.lib.mjs');
|
|
53
55
|
const { setupRepositoryAndClone, verifyDefaultBranchAndStatus } = await import('./solve.repo-setup.lib.mjs');
|
|
@@ -148,6 +150,11 @@ const cleanupWrapper = async () => {
|
|
|
148
150
|
const interruptWrapper = createInterruptWrapper({ cleanupContext, checkForUncommittedChanges, shouldAttachLogs, attachLogToGitHub, getLogFile, sanitizeLogContent, $, log });
|
|
149
151
|
initializeExitHandler(getAbsoluteLogPath, log, cleanupWrapper, interruptWrapper, ({ code, reason }) => notifyIssueAboutPrePullRequestFailure({ code, reason, argv, globalState: global, $, log, getLogFile, shouldAttachLogs, attachLogToGitHub, sanitizeLogContent, rawCommand }));
|
|
150
152
|
installGlobalExitHandlers();
|
|
153
|
+
// Issue #1823: Configure the working-session guard. When the experimental
|
|
154
|
+
// --do-not-shutdown-in-the-middle-of-working-session flag is set (hive passes it to every
|
|
155
|
+
// worker), an interrupt received during an AI working session is deferred: solve lets the AI
|
|
156
|
+
// finish, auto-commits, then shuts down gracefully instead of aborting the AI tool mid-run.
|
|
157
|
+
configureWorkingSession({ enabled: argv['do-not-shutdown-in-the-middle-of-working-session'] === true, log });
|
|
151
158
|
const markFailureNotificationPosted = targetType => {
|
|
152
159
|
global.preExitFailureNotificationPosted = true;
|
|
153
160
|
if (targetType === 'pr') {
|
|
@@ -705,6 +712,11 @@ try {
|
|
|
705
712
|
// Execute tool command with all prompts and settings
|
|
706
713
|
let toolResult;
|
|
707
714
|
|
|
715
|
+
// Issue #1823: Mark the start of the AI working session. While this is active and the
|
|
716
|
+
// --do-not-shutdown-in-the-middle-of-working-session flag is set, an interrupt (CTRL+C/SIGTERM)
|
|
717
|
+
// is deferred until the AI tool finishes its turn (see exit-handler.lib.mjs + working-session.lib.mjs).
|
|
718
|
+
beginWorkingSession();
|
|
719
|
+
|
|
708
720
|
// If --use-agent-commander is enabled, use agent-commander for all tools
|
|
709
721
|
if (argv.useAgentCommander) {
|
|
710
722
|
// Ensure agent-commander is available
|
|
@@ -813,6 +825,24 @@ try {
|
|
|
813
825
|
toolResult = claudeResult;
|
|
814
826
|
}
|
|
815
827
|
|
|
828
|
+
// Issue #1823: Mark the end of the AI working session. If a graceful-shutdown interrupt arrived
|
|
829
|
+
// during the session (deferred by the working-session guard), honor it now: auto-commit any
|
|
830
|
+
// uncommitted changes and exit gracefully — only AFTER the AI tool has fully finished its turn.
|
|
831
|
+
const workingSessionState = endWorkingSession();
|
|
832
|
+
if (workingSessionState.shutdownRequested) {
|
|
833
|
+
const shutdownExitCode = workingSessionState.shutdownSignal === 'SIGINT' ? 130 : 143;
|
|
834
|
+
await log('\n🛑 Graceful shutdown requested during the AI working session — the session has finished.', { level: 'warning' });
|
|
835
|
+
await log(' Auto-committing any uncommitted changes, then shutting down...', { level: 'warning' });
|
|
836
|
+
try {
|
|
837
|
+
await interruptWrapper();
|
|
838
|
+
} catch (interruptError) {
|
|
839
|
+
await log(`⚠️ Auto-commit on graceful shutdown failed: ${cleanErrorMessage(interruptError)}`, { level: 'warning' });
|
|
840
|
+
}
|
|
841
|
+
// Graceful shutdown is NOT a failure: skip the pre-exit failure notifier so no spurious
|
|
842
|
+
// "solver failed" comment is posted (issue #1823: no errors on graceful shutdown).
|
|
843
|
+
await safeExit(shutdownExitCode, 'Graceful shutdown after AI working session', { skipPreExit: true });
|
|
844
|
+
}
|
|
845
|
+
|
|
816
846
|
const { success } = toolResult;
|
|
817
847
|
let sessionId = toolResult.sessionId;
|
|
818
848
|
let anthropicTotalCostUSD = toolResult.anthropicTotalCostUSD;
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Issue #1823: "AI working session" guard for solve's graceful shutdown.
|
|
3
|
+
*
|
|
4
|
+
* An *AI working session* is the window during which the AI tool child
|
|
5
|
+
* (claude/codex/gemini/opencode/qwen/agent) is actively running and streaming. When the
|
|
6
|
+
* experimental flag `--do-not-shutdown-in-the-middle-of-working-session` is enabled:
|
|
7
|
+
*
|
|
8
|
+
* - An interrupt (CTRL+C / SIGINT, or SIGTERM) received DURING a protected session is
|
|
9
|
+
* *deferred*: solve lets the AI session finish, auto-commits any uncommitted changes, then
|
|
10
|
+
* shuts down gracefully. It does NOT abort the AI tool mid-run.
|
|
11
|
+
* - An interrupt received OUTSIDE a protected session (e.g. solve is only idle-waiting for
|
|
12
|
+
* CI/CD) stops solve immediately.
|
|
13
|
+
* - A SECOND interrupt force-stops now: the active AI child is killed and solve exits.
|
|
14
|
+
*
|
|
15
|
+
* Background (validated empirically — see experiments/command-stream-signals.mjs):
|
|
16
|
+
* command-stream installs only a SIGINT handler that forwards SIGINT to the active AI child's
|
|
17
|
+
* process group (killing it); it has NO SIGTERM handler. hive therefore forwards the operator's
|
|
18
|
+
* CTRL+C to each /solve worker as SIGTERM, which command-stream ignores — so the AI child is
|
|
19
|
+
* never collaterally killed by the library and this module + exit-handler decide what to do.
|
|
20
|
+
* For the force path (a second interrupt) we *reuse* command-stream's own SIGINT handler to
|
|
21
|
+
* kill the active child's process group, guarding against its embedded process.exit(130) so we
|
|
22
|
+
* can still auto-commit before exiting.
|
|
23
|
+
*
|
|
24
|
+
* This module holds module-level state on purpose: it is a per-process singleton, mirroring how
|
|
25
|
+
* exit-handler.lib.mjs and command-stream manage global signal state.
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
let flagEnabled = false;
|
|
29
|
+
let logFn = null;
|
|
30
|
+
let protectedSessionActive = false;
|
|
31
|
+
let shutdownRequested = false;
|
|
32
|
+
let shutdownSignal = null;
|
|
33
|
+
let forceRequested = false;
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Heuristic to recognise command-stream's SIGINT listener among process SIGINT listeners.
|
|
37
|
+
* Matches the same internal helper names command-stream itself uses for self-detection
|
|
38
|
+
* (see node_modules/command-stream .../$.state.mjs isOurHandlerInstalled()).
|
|
39
|
+
* @param {Function} listener
|
|
40
|
+
* @returns {boolean}
|
|
41
|
+
*/
|
|
42
|
+
const isCommandStreamSigintListener = listener => {
|
|
43
|
+
const s = listener.toString();
|
|
44
|
+
return s.includes('findActiveRunners') || s.includes('forwardSigintToRunners') || s.includes('handleSigintExit') || s.includes('activeProcessRunners');
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Internal verbose tracer for issue #1823 shutdown diagnostics. No-op unless a logger was
|
|
49
|
+
* provided via configureWorkingSession(). Fire-and-forget: logging must never break shutdown.
|
|
50
|
+
* @param {string} message
|
|
51
|
+
*/
|
|
52
|
+
const trace = message => {
|
|
53
|
+
if (typeof logFn !== 'function') {
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
try {
|
|
57
|
+
const result = logFn(message, { verbose: true });
|
|
58
|
+
if (result && typeof result.catch === 'function') {
|
|
59
|
+
result.catch(() => {});
|
|
60
|
+
}
|
|
61
|
+
} catch {
|
|
62
|
+
// Diagnostics must never interfere with the shutdown path.
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Configure the working-session guard. Call once at solve startup.
|
|
68
|
+
* @param {object} opts
|
|
69
|
+
* @param {boolean} opts.enabled - Whether --do-not-shutdown-in-the-middle-of-working-session is set.
|
|
70
|
+
* @param {Function} [opts.log] - Optional async logger.
|
|
71
|
+
*/
|
|
72
|
+
export const configureWorkingSession = ({ enabled = false, log = null } = {}) => {
|
|
73
|
+
flagEnabled = !!enabled;
|
|
74
|
+
logFn = log;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
export const isFlagEnabled = () => flagEnabled;
|
|
78
|
+
export const isWorkingSessionActive = () => protectedSessionActive;
|
|
79
|
+
export const isShutdownRequested = () => shutdownRequested;
|
|
80
|
+
export const getShutdownSignal = () => shutdownSignal;
|
|
81
|
+
export const isForceRequested = () => forceRequested;
|
|
82
|
+
|
|
83
|
+
/** Mark the start of a protected AI working session. */
|
|
84
|
+
export const beginWorkingSession = () => {
|
|
85
|
+
protectedSessionActive = true;
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Mark the end of a protected AI working session.
|
|
90
|
+
* @returns {{shutdownRequested: boolean, shutdownSignal: string|null, forceRequested: boolean}}
|
|
91
|
+
*/
|
|
92
|
+
export const endWorkingSession = () => {
|
|
93
|
+
protectedSessionActive = false;
|
|
94
|
+
return { shutdownRequested, shutdownSignal, forceRequested };
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Record a graceful-shutdown request received during a protected session.
|
|
99
|
+
* @param {string} signal - 'SIGINT' | 'SIGTERM'
|
|
100
|
+
* @returns {{first: boolean}} first=true the first time; false on a repeat (operator insists → force).
|
|
101
|
+
*/
|
|
102
|
+
export const requestShutdown = signal => {
|
|
103
|
+
if (shutdownRequested) {
|
|
104
|
+
forceRequested = true;
|
|
105
|
+
trace(`[working-session] repeat ${signal} during protected session → force requested`);
|
|
106
|
+
return { first: false };
|
|
107
|
+
}
|
|
108
|
+
shutdownRequested = true;
|
|
109
|
+
shutdownSignal = signal || shutdownSignal;
|
|
110
|
+
trace(`[working-session] ${shutdownSignal} deferred until the AI working session finishes`);
|
|
111
|
+
return { first: true };
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Force-kill the active AI child process group(s) by reusing command-stream's own SIGINT handler,
|
|
116
|
+
* which forwards SIGINT to every active runner's process group. We temporarily install a no-op
|
|
117
|
+
* SIGINT listener first so command-stream sees "other handlers present" and does NOT call
|
|
118
|
+
* process.exit(130) itself — leaving us in control to auto-commit and exit afterward.
|
|
119
|
+
* @returns {number} Count of command-stream listeners invoked (0 if none / no active child).
|
|
120
|
+
*/
|
|
121
|
+
export const forceKillActiveChildren = () => {
|
|
122
|
+
const live = process.listeners('SIGINT').filter(isCommandStreamSigintListener);
|
|
123
|
+
if (live.length === 0) {
|
|
124
|
+
trace('[working-session] force-kill requested but no active command-stream child found');
|
|
125
|
+
return 0;
|
|
126
|
+
}
|
|
127
|
+
trace(`[working-session] force-killing ${live.length} active AI child process group(s)`);
|
|
128
|
+
const noop = () => {};
|
|
129
|
+
process.on('SIGINT', noop); // guarantee listeners.length > 1 → command-stream won't process.exit
|
|
130
|
+
try {
|
|
131
|
+
for (const listener of live) {
|
|
132
|
+
try {
|
|
133
|
+
listener();
|
|
134
|
+
} catch {
|
|
135
|
+
// ignore — child group may already be gone
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
} finally {
|
|
139
|
+
process.removeListener('SIGINT', noop);
|
|
140
|
+
}
|
|
141
|
+
return live.length;
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
/** Reset all module state (used by tests). */
|
|
145
|
+
export const resetWorkingSession = () => {
|
|
146
|
+
flagEnabled = false;
|
|
147
|
+
logFn = null;
|
|
148
|
+
protectedSessionActive = false;
|
|
149
|
+
shutdownRequested = false;
|
|
150
|
+
shutdownSignal = null;
|
|
151
|
+
forceRequested = false;
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
export default {
|
|
155
|
+
configureWorkingSession,
|
|
156
|
+
isFlagEnabled,
|
|
157
|
+
isWorkingSessionActive,
|
|
158
|
+
isShutdownRequested,
|
|
159
|
+
getShutdownSignal,
|
|
160
|
+
isForceRequested,
|
|
161
|
+
beginWorkingSession,
|
|
162
|
+
endWorkingSession,
|
|
163
|
+
requestShutdown,
|
|
164
|
+
forceKillActiveChildren,
|
|
165
|
+
resetWorkingSession,
|
|
166
|
+
};
|