@link-assistant/hive-mind 1.72.7 → 1.73.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.73.1
4
+
5
+ ### Patch Changes
6
+
7
+ - df8b776: Stop the auto-restart-until-mergeable and watch loops from treating the AI agent's own session comments (e.g. free-form "CI now green" status updates posted through the authenticated account) as new human feedback, which caused an endless restart loop until the iteration limit (issue #1827). The check window is now advanced monotonically, every comment the authenticated account posts during a session is tracked by ID, and watch-mode feedback counting excludes tool-generated comments by marker and tracked ID.
8
+
9
+ ## 1.73.0
10
+
11
+ ### Minor Changes
12
+
13
+ - 1cd647d: Fix all errors on graceful shutdown and add an experimental working-session guard.
14
+
15
+ `hive` now fully waits for every in-flight `/solve` to finish before exiting on CTRL+C / `--stop`: signal handling is delegated to a single owner (resolving a double SIGINT-handler race that called `process.exit(130)` and cut the wait short), each solve worker is spawned in its own detached process group so the terminal's SIGINT no longer aborts solve/codex mid-task, and the wait has no time cap. Worker stderr is no longer mislabeled as `ERROR` — the child exit code remains the authoritative failure signal.
16
+
17
+ Building on that, a new experimental `--do-not-shutdown-in-the-middle-of-working-session` option is added to `solve` and enabled by default for `hive`. With it, an interrupt (CTRL+C / SIGTERM) no longer aborts the AI tool mid-run: if an AI working session is in progress, solve finishes it, auto-commits any uncommitted changes, then shuts down gracefully (exit 130/143); if solve is only idle-waiting (e.g. for CI/CD) it stops immediately, and a second interrupt force-stops. `hive` now forwards a controlled SIGTERM to each in-flight `/solve` worker on the first CTRL+C (instead of only waiting) and passes the flag to every worker (opt out with `--no-do-not-shutdown-in-the-middle-of-working-session`). Graceful shutdown is treated as a normal stop, so it no longer posts a spurious "solution draft failed" comment. Standalone `solve` keeps the flag off by default, so its behavior is unchanged except that an interrupt now always auto-commits uncommitted changes before exiting.
18
+
3
19
  ## 1.72.7
4
20
 
5
21
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.72.7",
3
+ "version": "1.73.1",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -6,6 +6,10 @@
6
6
  * the process exits, whether due to normal completion, errors, or signals.
7
7
  */
8
8
 
9
+ // Issue #1823: working-session guard for --do-not-shutdown-in-the-middle-of-working-session.
10
+ // Static import is safe: working-session.lib.mjs has no heavy deps and does NOT import this module.
11
+ import { isFlagEnabled as isWorkingSessionFlagEnabled, isWorkingSessionActive, requestShutdown as requestWorkingSessionShutdown, forceKillActiveChildren as forceKillWorkingSessionChildren } from './working-session.lib.mjs';
12
+
9
13
  // Lazy-load Sentry to avoid keeping the event loop alive when not needed
10
14
  let Sentry = null;
11
15
  const getSentry = async () => {
@@ -30,6 +34,14 @@ let interruptHandlerRan = false;
30
34
  let preExitFunction = null;
31
35
  let preExitHandlerRan = false;
32
36
 
37
+ // Issue #1823: When an external owner (e.g. hive's gracefulShutdown) takes over signal
38
+ // handling, the global SIGINT/SIGTERM handlers must stand down and NOT call process.exit().
39
+ // Otherwise the global handler's process.exit() races with the external graceful handler
40
+ // and cuts its wait short — the root cause of premature shutdown that aborts an in-flight
41
+ // /solve (and its codex child) mid-turn. Defaults to false to preserve existing behavior
42
+ // for solve.mjs, telegram-bot, and other entry points that rely on the global handlers.
43
+ let signalHandlingDelegated = false;
44
+
33
45
  /**
34
46
  * Initialize the exit handler with required dependencies
35
47
  * @param {Function} getLogPath - Function that returns the current log path
@@ -50,6 +62,20 @@ export const setPreExitHandler = preExit => {
50
62
  preExitFunction = preExit;
51
63
  };
52
64
 
65
+ /**
66
+ * Issue #1823: Delegate SIGINT/SIGTERM handling to an external graceful shutdown owner.
67
+ *
68
+ * When enabled, the global SIGINT/SIGTERM handlers installed by installGlobalExitHandlers()
69
+ * stand down (return early) instead of calling process.exit(). This lets a caller such as
70
+ * hive's gracefulShutdown() fully wait for in-progress work (e.g. an executing /solve) to
71
+ * finish and then exit via safeExit(), without the global handler racing it to process.exit().
72
+ *
73
+ * @param {boolean} enabled - true to delegate (caller owns exit), false to restore default.
74
+ */
75
+ export const delegateSignalHandling = (enabled = true) => {
76
+ signalHandlingDelegated = enabled;
77
+ };
78
+
53
79
  /**
54
80
  * Display the exit message with log path
55
81
  */
@@ -203,11 +229,17 @@ export const logActiveHandles = async (log = null) => {
203
229
 
204
230
  /**
205
231
  * Safe exit function that ensures log path is shown
232
+ *
233
+ * @param {number} code - Process exit code
234
+ * @param {string} reason - Human-readable exit reason
235
+ * @param {object} [options]
236
+ * @param {boolean} [options.skipPreExit=false] - Issue #1823: skip the pre-exit failure notifier
237
+ * (e.g. on graceful shutdown, which is NOT a failure and must not post a "solver failed" comment).
206
238
  */
207
- export const safeExit = async (code = 0, reason = 'Process completed') => {
239
+ export const safeExit = async (code = 0, reason = 'Process completed', { skipPreExit = false } = {}) => {
208
240
  await showExitMessage(reason, code);
209
241
 
210
- if (code !== 0 && preExitFunction && !preExitHandlerRan) {
242
+ if (!skipPreExit && code !== 0 && preExitFunction && !preExitHandlerRan) {
211
243
  preExitHandlerRan = true;
212
244
  try {
213
245
  await preExitFunction({ code, reason });
@@ -273,6 +305,34 @@ export const installGlobalExitHandlers = () => {
273
305
 
274
306
  // Handle SIGINT (CTRL+C)
275
307
  process.on('SIGINT', async () => {
308
+ // Issue #1823: If an external graceful-shutdown owner is registered, stand down.
309
+ // That owner (e.g. hive's gracefulShutdown) is responsible for waiting for in-progress
310
+ // work and exiting via safeExit(). Calling process.exit(130) here would race with it
311
+ // and cut the wait short — the root cause of the premature shutdown.
312
+ if (signalHandlingDelegated) {
313
+ return;
314
+ }
315
+ // Issue #1823: With --do-not-shutdown-in-the-middle-of-working-session, defer shutdown while
316
+ // an AI working session is in progress so the AI tool is never aborted mid-run.
317
+ if (isWorkingSessionFlagEnabled() && isWorkingSessionActive()) {
318
+ const { first } = requestWorkingSessionShutdown('SIGINT');
319
+ if (first) {
320
+ if (logFunction) {
321
+ await logFunction('\n⚠️ Shutdown requested (CTRL+C). Finishing the current AI working session, then auto-committing and stopping. Press CTRL+C again to force-stop now.', { level: 'warning' });
322
+ }
323
+ return; // defer — solve will auto-commit + exit once the session ends
324
+ }
325
+ // Second interrupt → operator insists. Force-kill the AI child group, then fall through to
326
+ // auto-commit + exit below.
327
+ if (logFunction) {
328
+ await logFunction('\n⚠️ Second interrupt — force-stopping the AI working session now.', { level: 'warning' });
329
+ }
330
+ try {
331
+ forceKillWorkingSessionChildren();
332
+ } catch {
333
+ // ignore — child may already be gone
334
+ }
335
+ }
276
336
  // Run interrupt handler first (auto-commit, log upload, etc.) — guard against double invocation
277
337
  if (interruptFunction && !interruptHandlerRan) {
278
338
  interruptHandlerRan = true;
@@ -303,6 +363,40 @@ export const installGlobalExitHandlers = () => {
303
363
 
304
364
  // Handle SIGTERM
305
365
  process.on('SIGTERM', async () => {
366
+ // Issue #1823: Stand down when an external graceful-shutdown owner is registered.
367
+ if (signalHandlingDelegated) {
368
+ return;
369
+ }
370
+ // Issue #1823: hive forwards the operator's CTRL+C to each /solve worker as SIGTERM (which
371
+ // command-stream ignores). With --do-not-shutdown-in-the-middle-of-working-session, defer
372
+ // shutdown while an AI working session is in progress so the AI tool finishes its turn.
373
+ if (isWorkingSessionFlagEnabled() && isWorkingSessionActive()) {
374
+ const { first } = requestWorkingSessionShutdown('SIGTERM');
375
+ if (first) {
376
+ if (logFunction) {
377
+ await logFunction('\n⚠️ Shutdown requested. Finishing the current AI working session, then auto-committing and stopping. Send the signal again to force-stop now.', { level: 'warning' });
378
+ }
379
+ return; // defer — solve will auto-commit + exit once the session ends
380
+ }
381
+ if (logFunction) {
382
+ await logFunction('\n⚠️ Second signal — force-stopping the AI working session now.', { level: 'warning' });
383
+ }
384
+ try {
385
+ forceKillWorkingSessionChildren();
386
+ } catch {
387
+ // ignore — child may already be gone
388
+ }
389
+ }
390
+ // Issue #1823: Auto-commit uncommitted changes on SIGTERM too (previously only SIGINT did).
391
+ // This ensures graceful shutdown preserves work in ALL signal paths.
392
+ if (interruptFunction && !interruptHandlerRan) {
393
+ interruptHandlerRan = true;
394
+ try {
395
+ await interruptFunction();
396
+ } catch {
397
+ // Ignore interrupt handler errors
398
+ }
399
+ }
306
400
  if (cleanupFunction) {
307
401
  try {
308
402
  await cleanupFunction();
@@ -377,4 +471,5 @@ export const installGlobalExitHandlers = () => {
377
471
  export const resetExitHandler = () => {
378
472
  exitMessageShown = false;
379
473
  interruptHandlerRan = false;
474
+ signalHandlingDelegated = false;
380
475
  };
@@ -50,6 +50,17 @@ const HIVE_CUSTOM_SOLVE_OPTIONS = {
50
50
  choices: ['claude', 'opencode', 'codex', 'agent', 'qwen', 'gemini'],
51
51
  default: 'claude',
52
52
  },
53
+ // Issue #1823: hive enables the experimental working-session guard for every /solve worker by
54
+ // default. This is the ONLY change to how CTRL+C behaves in the hive workflow: instead of
55
+ // aborting the AI tool mid-run, a forwarded interrupt lets the worker finish its current AI
56
+ // working session, auto-commit, then shut down gracefully. solve keeps default:false (standalone
57
+ // behavior unchanged); hive overrides the default to true so the loop below forwards the flag.
58
+ // Operators can opt out with --no-do-not-shutdown-in-the-middle-of-working-session.
59
+ 'do-not-shutdown-in-the-middle-of-working-session': {
60
+ type: 'boolean',
61
+ description: '[EXPERIMENTAL] On CTRL+C, let each /solve worker finish its current AI working session and auto-commit before shutting down, instead of aborting it mid-run. If a worker is only idle-waiting (e.g. for CI/CD), it stops immediately. Press CTRL+C again to force-stop. Enabled by default for the hive workflow.',
62
+ default: true,
63
+ },
53
64
  };
54
65
 
55
66
  // Compute the set of solve options that hive auto-registers from SOLVE_OPTION_DEFINITIONS.
package/src/hive.mjs CHANGED
@@ -35,6 +35,7 @@ if (earlyArgs.includes('--help') || earlyArgs.includes('-h')) {
35
35
  }
36
36
  export { createYargsConfig } from './hive.config.lib.mjs';
37
37
  import { isDirectExecution, withTimeout } from './hive.bootstrap.lib.mjs';
38
+ import { createShutdownManager } from './hive.shutdown.lib.mjs';
38
39
  const isRunningDirectly = isDirectExecution(process.argv[1], import.meta.url);
39
40
  if (isRunningDirectly) {
40
41
  console.log('🐝 Hive Mind - AI-powered issue solver');
@@ -88,7 +89,7 @@ if (isRunningDirectly) {
88
89
  const memCheck = await import('./memory-check.mjs');
89
90
  const { checkSystem } = memCheck;
90
91
  const exitHandler = await import('./exit-handler.lib.mjs');
91
- const { initializeExitHandler, installGlobalExitHandlers, safeExit } = exitHandler;
92
+ const { initializeExitHandler, installGlobalExitHandlers, safeExit, delegateSignalHandling } = exitHandler;
92
93
  const sentryLib = await import('./sentry.lib.mjs');
93
94
  const { initializeSentry, withSentry, addBreadcrumb, reportError } = sentryLib;
94
95
  const graphqlLib = await import('./github.graphql.lib.mjs');
@@ -709,8 +710,10 @@ if (isRunningDirectly) {
709
710
  // Create global queue instance
710
711
  const issueQueue = new IssueQueue();
711
712
 
712
- // Global shutdown state to prevent duplicate shutdown messages
713
- let isShuttingDown = false;
713
+ // Issue #1823: Track in-flight solve child processes. A *first* interrupt forwards a
714
+ // controlled SIGTERM to each (they run in their own detached process group, so the
715
+ // terminal's SIGINT never reaches them); a *second* interrupt force-kills the groups.
716
+ const activeSolveChildren = new Set();
714
717
 
715
718
  // Worker function to process issues from queue
716
719
  async function worker(workerId) {
@@ -739,6 +742,8 @@ if (isRunningDirectly) {
739
742
 
740
743
  // Track if this issue failed
741
744
  let issueFailed = false;
745
+ // Issue #1823: Track a graceful shutdown stop so it is neither failed nor completed.
746
+ let gracefulStop = false;
742
747
 
743
748
  // Process the issue multiple times if needed
744
749
  for (let prNum = 1; prNum <= argv.pullRequestsPerIssue; prNum++) {
@@ -811,8 +816,17 @@ if (isRunningDirectly) {
811
816
  const child = spawn(solveCommand, args, {
812
817
  stdio: ['pipe', 'pipe', 'pipe'],
813
818
  env: process.env,
819
+ // Issue #1823: run solve in its own process group so a terminal SIGINT (or the
820
+ // \003 `$ --stop`/screen injects) hits only hive, not solve+codex. hive instead
821
+ // forwards a controlled SIGTERM (see gracefulShutdown). stdio stays piped and we
822
+ // must NOT unref() — hive keeps waiting. See docs/case-studies/issue-1823.
823
+ detached: true,
814
824
  });
815
825
 
826
+ // Issue #1823: register the in-flight child for optional force-kill on a 2nd signal
827
+ activeSolveChildren.add(child);
828
+ log(` 🧒 Spawned ${solveCommand} worker-${workerId} (pid ${child.pid}, detached process group)`, { verbose: true }).catch(() => {});
829
+
816
830
  // Handle stdout data - stream output in real-time
817
831
  child.stdout.on('data', data => {
818
832
  const lines = data.toString().split('\n');
@@ -829,16 +843,20 @@ if (isRunningDirectly) {
829
843
  }
830
844
  });
831
845
 
832
- // Handle stderr data - stream errors in real-time
846
+ // Handle stderr data - stream output in real-time.
847
+ // Issue #1823: Do NOT blanket-tag stderr as ERROR — solve relays non-error
848
+ // diagnostics there (codex DEBUG/INFO traces, git branch messages, etc.), which
849
+ // produced hundreds of false errors. The authoritative failure signal is the
850
+ // child's non-zero exit code (below), so log stderr at default level.
833
851
  child.stderr.on('data', data => {
834
852
  const lines = data.toString().split('\n');
835
853
  for (const line of lines) {
836
854
  if (line.trim()) {
837
- log(` [${solveCommand} worker-${workerId} ERROR] ${line}`, { level: 'error' }).catch(logError => {
855
+ log(` [${solveCommand} worker-${workerId} stderr] ${line}`).catch(logError => {
838
856
  reportError(logError, {
839
857
  context: 'worker_stderr_log',
840
858
  workerId,
841
- operation: 'log_error',
859
+ operation: 'log_stderr',
842
860
  });
843
861
  });
844
862
  }
@@ -847,12 +865,14 @@ if (isRunningDirectly) {
847
865
 
848
866
  // Handle process completion
849
867
  child.on('close', code => {
868
+ activeSolveChildren.delete(child); // Issue #1823: no longer in-flight
850
869
  exitCode = code || 0;
851
870
  resolve();
852
871
  });
853
872
 
854
873
  // Handle process errors
855
874
  child.on('error', error => {
875
+ activeSolveChildren.delete(child); // Issue #1823: no longer in-flight
856
876
  exitCode = 1;
857
877
  log(` [${solveCommand} worker-${workerId} ERROR] Process error: ${error.message}`, {
858
878
  level: 'error',
@@ -871,6 +891,13 @@ if (isRunningDirectly) {
871
891
 
872
892
  if (exitCode === 0) {
873
893
  await log(` ✅ Worker ${workerId} completed ${issueUrl} (${duration}s)`);
894
+ } else if (!issueQueue.isRunning && (exitCode === 130 || exitCode === 143)) {
895
+ // Issue #1823: during shutdown, solve auto-commits and exits 130/143 — a graceful
896
+ // stop, NOT a failure. Don't throw/post an error; leave the issue in "processing"
897
+ // (neither completed nor failed) since work was cut short. See case-study issue-1823.
898
+ await log(` 🛑 Worker ${workerId} stopped gracefully during shutdown on ${issueUrl} (exit ${exitCode}, ${duration}s)`);
899
+ gracefulStop = true;
900
+ break; // stop processing more PRs for this issue
874
901
  } else {
875
902
  throw new Error(`${solveCommand} exited with code ${exitCode}`);
876
903
  }
@@ -895,8 +922,10 @@ if (isRunningDirectly) {
895
922
  }
896
923
  }
897
924
 
898
- // Only mark as completed if it didn't fail
899
- if (!issueFailed) {
925
+ // Only mark as completed if it didn't fail and wasn't gracefully stopped mid-shutdown.
926
+ // Issue #1823: a graceful stop is neither a success nor a failure — leave it in
927
+ // "processing" so it is not miscounted as completed (which would also trigger cleanup).
928
+ if (!issueFailed && !gracefulStop) {
900
929
  issueQueue.markCompleted(issueUrl);
901
930
  }
902
931
 
@@ -1384,55 +1413,27 @@ if (isRunningDirectly) {
1384
1413
  await log(` 📁 Full log file: ${absoluteLogPath}`);
1385
1414
  }
1386
1415
 
1387
- // Graceful shutdown handler
1388
- async function gracefulShutdown(signal) {
1389
- if (isShuttingDown) {
1390
- return; // Prevent duplicate shutdown messages
1391
- }
1392
- isShuttingDown = true;
1393
-
1394
- try {
1395
- await log(`\n\n🛑 Received ${signal} signal, shutting down gracefully...`);
1396
-
1397
- // Stop the queue and wait for workers to finish
1398
- issueQueue.stop();
1399
-
1400
- // Give workers a moment to finish their current tasks
1401
- const stats = issueQueue.getStats();
1402
- if (stats.processing > 0) {
1403
- await log(` ⏳ Waiting for ${stats.processing} worker(s) to finish current tasks...`);
1404
-
1405
- // Wait up to 10 seconds for workers to finish
1406
- const maxWaitTime = 10000;
1407
- const startTime = Date.now();
1408
- while (issueQueue.getStats().processing > 0 && Date.now() - startTime < maxWaitTime) {
1409
- await new Promise(resolve => setTimeout(resolve, 500));
1410
- }
1411
- }
1412
-
1413
- await Promise.all(issueQueue.workers);
1414
-
1415
- // Perform cleanup if enabled and there were successful completions
1416
- const finalStats = issueQueue.getStats();
1417
- if (finalStats.completed > 0) {
1418
- await cleanupTempDirectories(argv);
1419
- }
1420
-
1421
- await log(' ✅ Shutdown complete');
1422
- await log(` 📁 Full log file: ${absoluteLogPath}`);
1423
- } catch (error) {
1424
- reportError(error, {
1425
- context: 'monitor_issues_shutdown',
1426
- operation: 'cleanup_and_exit',
1427
- });
1428
- await log(` ⚠️ Error during shutdown: ${cleanErrorMessage(error)}`, { level: 'error' });
1429
- await log(` 📁 Full log file: ${absoluteLogPath}`);
1430
- }
1431
-
1432
- await safeExit(0, 'Process completed');
1433
- }
1434
-
1435
- // Handle graceful shutdown
1416
+ // Issue #1823: Graceful-shutdown + force-kill logic lives in hive.shutdown.lib.mjs.
1417
+ // gracefulShutdown waits (uncapped) for in-flight solve workers to finish on the first
1418
+ // interrupt; on a second interrupt it force-kills their detached process groups.
1419
+ const { gracefulShutdown } = createShutdownManager({
1420
+ log,
1421
+ safeExit,
1422
+ reportError,
1423
+ cleanErrorMessage,
1424
+ cleanupTempDirectories,
1425
+ issueQueue,
1426
+ argv,
1427
+ absoluteLogPath,
1428
+ activeSolveChildren,
1429
+ });
1430
+
1431
+ // Handle graceful shutdown.
1432
+ // Issue #1823: Tell the global exit handler (installed earlier via installGlobalExitHandlers)
1433
+ // to stand down on SIGINT/SIGTERM so it does not call process.exit() and race us. From here
1434
+ // on, gracefulShutdown is the SOLE owner of these signals: it waits for in-progress solve
1435
+ // worker(s) to finish and then exits via safeExit().
1436
+ delegateSignalHandling(true);
1436
1437
  process.on('SIGINT', () => gracefulShutdown('interrupt'));
1437
1438
  process.on('SIGTERM', () => gracefulShutdown('termination'));
1438
1439
 
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Issue #1823: Graceful-shutdown manager for the hive command.
3
+ *
4
+ * Extracted from hive.mjs so the shutdown logic stays focused and independently testable
5
+ * (and to keep hive.mjs within the repo's max-lines lint budget).
6
+ *
7
+ * Behavior contract (see issue #1823):
8
+ * - On the FIRST interrupt (SIGINT/SIGTERM, or the \003 that `$ --stop`/screen injects),
9
+ * hive stops accepting new work and waits — without any time cap — for every in-flight
10
+ * `/solve` worker to finish NATURALLY, then exits 0. Because each solve runs in its own
11
+ * detached process group, the terminal's signal never reached it, so it keeps running.
12
+ * - On a SECOND interrupt (operator insists on stopping now), hive force-kills the in-flight
13
+ * solve process group(s) — negative PID, so codex and any grandchildren die too — and
14
+ * exits 130 immediately.
15
+ *
16
+ * @param {object} deps - Injected hive-scope dependencies.
17
+ * @param {Function} deps.log - Async logger (matches hive's log()).
18
+ * @param {Function} deps.safeExit - Async exit helper from exit-handler.lib.mjs.
19
+ * @param {Function} deps.reportError - Sentry error reporter.
20
+ * @param {Function} deps.cleanErrorMessage - Formats an error for logging.
21
+ * @param {Function} deps.cleanupTempDirectories - Cleans temp dirs after successful runs.
22
+ * @param {object} deps.issueQueue - The producer/consumer queue (stop/getStats/workers).
23
+ * @param {object} deps.argv - Parsed CLI args (passed through to cleanup).
24
+ * @param {string} deps.absoluteLogPath - Resolved log file path (for the final log line).
25
+ * @param {Set} deps.activeSolveChildren - Live set of in-flight solve child processes.
26
+ * @returns {{ gracefulShutdown: Function, forceKillActiveSolveChildren: Function }}
27
+ */
28
+ export const createShutdownManager = ({ log, safeExit, reportError, cleanErrorMessage, cleanupTempDirectories, issueQueue, argv, absoluteLogPath, activeSolveChildren }) => {
29
+ // Global shutdown state to prevent duplicate shutdown messages / re-entrancy.
30
+ let isShuttingDown = false;
31
+
32
+ // Issue #1823: Forward the operator's interrupt to each in-flight solve worker as SIGTERM,
33
+ // signalling the solve PROCESS itself (positive PID), NOT its process group (negative PID).
34
+ // Rationale (validated — see experiments/command-stream-signals.mjs): command-stream installs
35
+ // only a SIGINT handler and ignores SIGTERM, so signalling solve with SIGTERM never collaterally
36
+ // kills the AI child mid-turn. solve's own session-aware handler then decides what to do:
37
+ // - if an AI working session is in progress, it finishes it, auto-commits, and exits 143;
38
+ // - if it is only idle-waiting (e.g. for CI/CD), it stops immediately.
39
+ // This implements "send CTRL+C to solve command also" while still letting the AI session finish.
40
+ async function forwardShutdownToActiveSolveChildren() {
41
+ for (const child of activeSolveChildren) {
42
+ if (!child || child.pid == null) {
43
+ continue;
44
+ }
45
+ try {
46
+ process.kill(child.pid, 'SIGTERM'); // positive pid → just the solve process, not its group
47
+ } catch (signalError) {
48
+ await log(` ⚠️ Could not forward SIGTERM to solve (pid ${child.pid}): ${signalError.message}`, {
49
+ verbose: true,
50
+ });
51
+ }
52
+ }
53
+ }
54
+
55
+ // Issue #1823: Force-kill all in-flight detached solve children (and their codex
56
+ // descendants) by signalling their process groups. Used only when the operator insists on
57
+ // an immediate exit (a SECOND interrupt). A negative PID targets the whole process group,
58
+ // so this also terminates codex and any grandchildren spawned by solve.
59
+ async function forceKillActiveSolveChildren(signalName = 'SIGTERM') {
60
+ for (const child of activeSolveChildren) {
61
+ if (!child || child.pid == null) {
62
+ continue;
63
+ }
64
+ try {
65
+ process.kill(-child.pid, signalName); // negative pid → whole process group
66
+ } catch (killError) {
67
+ // The group may already be gone; fall back to signalling just the child.
68
+ try {
69
+ child.kill(signalName);
70
+ } catch {
71
+ // Child already exited — nothing to do.
72
+ }
73
+ await log(` ⚠️ Could not signal solve process group (pid ${child.pid}): ${killError.message}`, {
74
+ verbose: true,
75
+ });
76
+ }
77
+ }
78
+ }
79
+
80
+ // Graceful shutdown handler.
81
+ async function gracefulShutdown(signal) {
82
+ if (isShuttingDown) {
83
+ // Issue #1823: A second interrupt while already shutting down means the operator wants
84
+ // to stop NOW. Force-kill the in-flight solve process group(s) and exit immediately,
85
+ // overriding the default "wait for solve to finish" behavior.
86
+ await log(`\n\n⚠️ Received second ${signal} signal — force-stopping ${activeSolveChildren.size} in-flight solve worker(s) and exiting now.`, {
87
+ level: 'warning',
88
+ });
89
+ await forceKillActiveSolveChildren('SIGTERM');
90
+ await safeExit(130, 'Force interrupted by repeated signal');
91
+ return;
92
+ }
93
+ isShuttingDown = true;
94
+
95
+ try {
96
+ await log(`\n\n🛑 Received ${signal} signal, shutting down gracefully...`);
97
+ await log(' ℹ️ Forwarding the interrupt to in-progress solve worker(s); each finishes its current AI working session, auto-commits, then stops. Press CTRL+C again to force-stop.');
98
+
99
+ // Stop the queue so each worker exits its loop after its current solve completes.
100
+ issueQueue.stop();
101
+
102
+ // Issue #1823: Forward the operator's CTRL+C to each in-flight solve worker (as SIGTERM).
103
+ // Previously hive only waited; now it actively tells solve to wind down so a worker that is
104
+ // merely idle-waiting (e.g. for CI/CD) stops promptly instead of sleeping out its interval,
105
+ // while a worker mid-AI-session still finishes that session before exiting (see solve's
106
+ // --do-not-shutdown-in-the-middle-of-working-session guard, which hive enables by default).
107
+ if (activeSolveChildren.size > 0) {
108
+ await log(` 📨 Forwarding shutdown to ${activeSolveChildren.size} in-flight solve worker(s)...`);
109
+ await forwardShutdownToActiveSolveChildren();
110
+ }
111
+
112
+ // Issue #1823: Wait for in-flight solve commands to FINISH NATURALLY. We intentionally
113
+ // do NOT cap this wait — the issue requires that CTRL+C / `$ --stop` fully waits for each
114
+ // running /solve to complete before shutting down. Because solve runs in its own detached
115
+ // process group, the interrupt did not reach it, so it keeps running until done.
116
+ // Promise.all(issueQueue.workers) is the authoritative wait; a periodic progress line
117
+ // makes it clear hive is still waiting (and is unref'd so it never blocks exit itself).
118
+ const stats = issueQueue.getStats();
119
+ let progressTimer = null;
120
+ if (stats.processing > 0) {
121
+ const waitStart = Date.now();
122
+ await log(` ⏳ Waiting for ${stats.processing} worker(s) to finish current tasks...`);
123
+ progressTimer = setInterval(() => {
124
+ const current = issueQueue.getStats();
125
+ if (current.processing > 0) {
126
+ const elapsed = Math.round((Date.now() - waitStart) / 1000);
127
+ log(` ⏳ Still waiting for ${current.processing} solve worker(s) to finish (${elapsed}s elapsed)...`).catch(() => {});
128
+ }
129
+ }, 15000);
130
+ if (typeof progressTimer.unref === 'function') {
131
+ progressTimer.unref();
132
+ }
133
+ }
134
+
135
+ await Promise.all(issueQueue.workers);
136
+ if (progressTimer) {
137
+ clearInterval(progressTimer);
138
+ }
139
+
140
+ // Perform cleanup if enabled and there were successful completions
141
+ const finalStats = issueQueue.getStats();
142
+ if (finalStats.completed > 0) {
143
+ await cleanupTempDirectories(argv);
144
+ }
145
+
146
+ await log(' ✅ Shutdown complete');
147
+ await log(` 📁 Full log file: ${absoluteLogPath}`);
148
+ } catch (error) {
149
+ reportError(error, {
150
+ context: 'monitor_issues_shutdown',
151
+ operation: 'cleanup_and_exit',
152
+ });
153
+ await log(` ⚠️ Error during shutdown: ${cleanErrorMessage(error)}`, { level: 'error' });
154
+ await log(` 📁 Full log file: ${absoluteLogPath}`);
155
+ }
156
+
157
+ await safeExit(0, 'Process completed');
158
+ }
159
+
160
+ return { gracefulShutdown, forceKillActiveSolveChildren, forwardShutdownToActiveSolveChildren };
161
+ };
@@ -9,14 +9,18 @@
9
9
  */
10
10
 
11
11
  /**
12
- * Sleep for `ms` milliseconds, but resolve early if SIGINT is received.
12
+ * Sleep for `ms` milliseconds, but resolve early if SIGINT or SIGTERM is received.
13
13
  *
14
- * When SIGINT fires during the sleep, the timer is cleared and the promise
15
- * resolves with `{ interrupted: true }`. The existing SIGINT handler (from
16
- * exit-handler.lib.mjs) continues to run normally — this function does NOT
14
+ * When the signal fires during the sleep, the timer is cleared and the promise
15
+ * resolves with `{ interrupted: true }`. The existing signal handlers (from
16
+ * exit-handler.lib.mjs) continue to run normally — this function does NOT
17
17
  * consume or re-emit the signal, it only ensures its own timer doesn't
18
18
  * block the event loop.
19
19
  *
20
+ * Issue #1823: SIGTERM is also honoured because hive forwards the operator's CTRL+C to each
21
+ * /solve worker as SIGTERM. When solve is only idle-waiting here (e.g. for CI/CD), it must stop
22
+ * immediately rather than sleep out the remaining delay.
23
+ *
20
24
  * @param {number} ms - Duration in milliseconds
21
25
  * @returns {Promise<{interrupted: boolean}>}
22
26
  */
@@ -24,18 +28,24 @@ export function interruptibleSleep(ms) {
24
28
  return new Promise(resolve => {
25
29
  let timer;
26
30
 
31
+ const cleanupListeners = () => {
32
+ process.removeListener('SIGINT', onInterrupt);
33
+ process.removeListener('SIGTERM', onInterrupt);
34
+ };
35
+
27
36
  const onInterrupt = () => {
28
37
  clearTimeout(timer);
29
- process.removeListener('SIGINT', onInterrupt);
38
+ cleanupListeners();
30
39
  resolve({ interrupted: true });
31
40
  };
32
41
 
33
42
  timer = setTimeout(() => {
34
- process.removeListener('SIGINT', onInterrupt);
43
+ cleanupListeners();
35
44
  resolve({ interrupted: false });
36
45
  }, ms);
37
46
 
38
47
  process.on('SIGINT', onInterrupt);
48
+ process.on('SIGTERM', onInterrupt);
39
49
  });
40
50
  }
41
51
 
@@ -76,7 +76,7 @@ const formatRunLine = run => {
76
76
  // search scope for checkForExistingComment() stays in lock-step with the
77
77
  // markers actually embedded in tool-posted comments.
78
78
  const toolComments = await import('./tool-comments.lib.mjs');
79
- const { SESSION_ENDING_MARKERS, isToolGeneratedComment, isToolTrackedCommentId } = toolComments;
79
+ const { SESSION_ENDING_MARKERS, isToolGeneratedComment, isToolTrackedCommentId, trackToolCommentId } = toolComments;
80
80
 
81
81
  /**
82
82
  * Issue #1323: Check if a comment with specific content already exists on the PR
@@ -292,6 +292,121 @@ export const checkForNonBotComments = async (owner, repo, prNumber, issueNumber,
292
292
  }
293
293
  };
294
294
 
295
+ /**
296
+ * Issue #1827: Compute the next monotonic check-window cutoff for the
297
+ * auto-restart-until-mergeable loop. The cutoff must never move backwards:
298
+ * after an AI session, lastCheckTime is set to a moment *after* the agent's own
299
+ * comments, so rewinding it to the iteration's start time (captured before the
300
+ * AI ran) would re-detect those comments as new feedback — the root cause of
301
+ * the restart loop in #1827. Returns whichever timestamp is later.
302
+ *
303
+ * @param {Date} lastCheckTime - current cutoff
304
+ * @param {Date} candidate - proposed new cutoff (usually the iteration start time)
305
+ * @returns {Date} the later of the two timestamps
306
+ */
307
+ export const nextMonotonicCheckTime = (lastCheckTime, candidate) => {
308
+ if (!(lastCheckTime instanceof Date)) return candidate;
309
+ if (!(candidate instanceof Date)) return lastCheckTime;
310
+ return candidate.getTime() > lastCheckTime.getTime() ? candidate : lastCheckTime;
311
+ };
312
+
313
+ /**
314
+ * Issue #1827: Register every comment authored by the authenticated GitHub
315
+ * account during an AI working session as a tool-generated comment.
316
+ *
317
+ * During a session, the AI agent can post free-form status comments through the
318
+ * authenticated account (e.g. "✅ CI now green", "✅ Verification pass"). These
319
+ * are NOT routed through postTrackedComment(), so their IDs were never captured,
320
+ * and they match none of the tool markers. Once issue #1821 made the watch loop
321
+ * trust same-account comments as human feedback, the very next iteration
322
+ * re-detected these comments as fresh feedback and triggered an endless
323
+ * auto-restart loop until the limit was hit.
324
+ *
325
+ * Because the authenticated account is busy running the AI for the whole
326
+ * session window, any comment it authored within that window is the tool's own,
327
+ * not human feedback. Tracking those IDs makes checkForNonBotComments filter
328
+ * them by ID regardless of timestamps — a defense that also survives clock skew
329
+ * between the local clock and GitHub's `created_at` (which a purely
330
+ * time-based cutoff cannot).
331
+ *
332
+ * @param {string} owner - Repository owner
333
+ * @param {string} repo - Repository name
334
+ * @param {number} prNumber - Pull request number
335
+ * @param {number} issueNumber - Issue number (may equal prNumber)
336
+ * @param {Date|string|number} sinceTime - Start of the session window
337
+ * @param {Function} commandRunner - Tagged-template command runner, injectable for tests
338
+ * @param {Object} options
339
+ * @param {boolean} [options.verbose=false]
340
+ * @param {string} [options.currentUser] - Pre-resolved authenticated login (skips the `gh api user` call)
341
+ * @returns {Promise<string[]>} Newly tracked comment IDs (as strings)
342
+ */
343
+ export const trackAuthenticatedUserCommentsSince = async (owner, repo, prNumber, issueNumber, sinceTime, commandRunner = $, options = {}) => {
344
+ const { verbose = false, currentUser: providedUser } = options;
345
+ const trackedIds = [];
346
+
347
+ try {
348
+ let currentUser = providedUser || null;
349
+ if (!currentUser) {
350
+ try {
351
+ const userResult = await commandRunner`gh api user --jq .login`;
352
+ if (userResult.code === 0) {
353
+ currentUser = userResult.stdout.toString().trim();
354
+ }
355
+ } catch {
356
+ // Without the authenticated login we cannot attribute comments; bail out.
357
+ }
358
+ }
359
+ if (!currentUser) return trackedIds;
360
+
361
+ const since = sinceTime instanceof Date ? sinceTime : new Date(sinceTime);
362
+
363
+ const fetchComments = async path => {
364
+ try {
365
+ const result = await commandRunner`gh api ${path} --paginate`;
366
+ if (result.code === 0 && result.stdout) {
367
+ return JSON.parse(result.stdout.toString() || '[]');
368
+ }
369
+ } catch {
370
+ // Ignore fetch/parse failures for an individual endpoint.
371
+ }
372
+ return [];
373
+ };
374
+
375
+ const prComments = await fetchComments(`repos/${owner}/${repo}/issues/${prNumber}/comments`);
376
+ const prReviewComments = await fetchComments(`repos/${owner}/${repo}/pulls/${prNumber}/comments`);
377
+ let issueComments = [];
378
+ if (issueNumber && issueNumber !== prNumber) {
379
+ issueComments = await fetchComments(`repos/${owner}/${repo}/issues/${issueNumber}/comments`);
380
+ }
381
+
382
+ const allComments = [...prComments, ...prReviewComments, ...issueComments];
383
+ for (const comment of allComments) {
384
+ const login = comment.user?.login;
385
+ if (!login || login !== currentUser) continue;
386
+ // Inclusive lower bound: a comment posted at the exact session start is
387
+ // still the tool's own. created_at uses GitHub's clock, so allow equality.
388
+ const createdAt = new Date(comment.created_at);
389
+ if (createdAt < since) continue;
390
+ if (isToolTrackedCommentId(comment.id)) continue;
391
+ trackToolCommentId(comment.id);
392
+ trackedIds.push(String(comment.id));
393
+ if (verbose) {
394
+ console.log(`[VERBOSE] Tracking authenticated-user session comment ${comment.id} from ${login} at ${comment.created_at}`);
395
+ }
396
+ }
397
+ } catch (error) {
398
+ reportError(error, {
399
+ context: 'track_authenticated_user_comments',
400
+ owner,
401
+ repo,
402
+ prNumber,
403
+ operation: 'track_session_comments',
404
+ });
405
+ }
406
+
407
+ return trackedIds;
408
+ };
409
+
295
410
  /**
296
411
  * Get the reasons why PR is not mergeable
297
412
  * Issue #1314: Comprehensive CI/CD status handling covering all possible states:
@@ -53,7 +53,7 @@ import { limitReset } from './config.lib.mjs';
53
53
 
54
54
  // Import helper functions extracted for file size management (Issue #1593)
55
55
  const autoMergeHelpers = await import('./solve.auto-merge-helpers.lib.mjs');
56
- const { checkForExistingComment, checkForNonBotComments, getMergeBlockers } = autoMergeHelpers;
56
+ const { checkForExistingComment, checkForNonBotComments, getMergeBlockers, trackAuthenticatedUserCommentsSince, nextMonotonicCheckTime } = autoMergeHelpers;
57
57
 
58
58
  // Issue #1769: cancelled/stale CI re-run failures need a human action stop, not polling forever.
59
59
  const cancelledCiRerunLib = await import('./cancelled-ci-rerun.lib.mjs');
@@ -1031,6 +1031,26 @@ No further AI sessions will be started automatically for this run. Please review
1031
1031
  await log(formatAligned('✅', `${argv.tool.toUpperCase()} execution completed:`, 'Checking if PR is now mergeable...'));
1032
1032
  }
1033
1033
 
1034
+ // Issue #1827: Register every comment the authenticated account posted
1035
+ // during this AI session (free-form status comments like "✅ CI now
1036
+ // green" the agent writes itself, which bypass postTrackedComment and
1037
+ // match no tool marker). Tracking their IDs stops the next iteration's
1038
+ // checkForNonBotComments from mistaking them for fresh human feedback.
1039
+ try {
1040
+ const tracked = await trackAuthenticatedUserCommentsSince(owner, repo, prNumber, issueNumber, iterationStartTime, $, { verbose: argv.verbose });
1041
+ if (argv.verbose && tracked.length > 0) {
1042
+ await log(formatAligned('🧷', 'Tracked own session comments:', `${tracked.length} (won't count as new feedback)`, 2));
1043
+ }
1044
+ } catch (trackError) {
1045
+ reportError(trackError, {
1046
+ context: 'track_authenticated_user_session_comments',
1047
+ prNumber,
1048
+ owner,
1049
+ repo,
1050
+ operation: 'track_session_comments',
1051
+ });
1052
+ }
1053
+
1034
1054
  // Update last check time after restart
1035
1055
  lastCheckTime = new Date();
1036
1056
  } else if (blockers.length > 0) {
@@ -1071,8 +1091,16 @@ No further AI sessions will be started automatically for this run. Please review
1071
1091
  await log(formatAligned('', 'No action needed', 'Continuing to monitor...', 2));
1072
1092
  }
1073
1093
 
1074
- // Update last check time
1075
- lastCheckTime = currentTime;
1094
+ // Issue #1827: Advance the check window monotonically — never move it
1095
+ // backwards. In the restart branch above, lastCheckTime was already set
1096
+ // to a moment *after* the AI session (and after any comments the agent
1097
+ // posted). currentTime was captured at the *start* of this iteration,
1098
+ // before the AI ran, so assigning it unconditionally here would rewind
1099
+ // the window and re-detect the agent's own comments as new feedback
1100
+ // (the root cause of the auto-restart loop in #1827). In the non-restart
1101
+ // branches lastCheckTime is still the previous iteration's value, which
1102
+ // is < currentTime, so this correctly advances it.
1103
+ lastCheckTime = nextMonotonicCheckTime(lastCheckTime, currentTime);
1076
1104
  } catch (error) {
1077
1105
  reportError(error, {
1078
1106
  context: 'watch_until_mergeable',
@@ -115,6 +115,11 @@ export const SOLVE_OPTION_DEFINITIONS = {
115
115
  description: '[EXPERIMENTAL] Temporarily copy AGENTS.md/agents.md to CLAUDE.md while Claude runs, then remove the temporary copy',
116
116
  default: false,
117
117
  },
118
+ 'do-not-shutdown-in-the-middle-of-working-session': {
119
+ type: 'boolean',
120
+ description: '[EXPERIMENTAL] On interrupt (CTRL+C / SIGTERM), do not abort the AI tool mid-run. If an AI working session is in progress, wait for it to finish, auto-commit any uncommitted changes, then shut down gracefully. If solve is only idle-waiting (e.g. for CI/CD), stop immediately. A second interrupt force-stops. hive passes this automatically to every /solve worker.',
121
+ default: false,
122
+ },
118
123
  'attach-logs': {
119
124
  type: 'boolean',
120
125
  description: 'Upload the solution draft log file to the Pull Request on completion (⚠️ WARNING: May expose sensitive data)',
@@ -7,6 +7,9 @@
7
7
  import { reportError } from './sentry.lib.mjs';
8
8
 
9
9
  import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): gh API calls flow through $ wrapped by caller
10
+ // Issue #1827: tool-generated comments (markers + in-memory tracked IDs) must
11
+ // not count as feedback in watch/continue mode, mirroring checkForNonBotComments.
12
+ import { isToolGeneratedComment, isToolTrackedCommentId } from './tool-comments.lib.mjs';
10
13
  export const detectAndCountFeedback = async params => {
11
14
  const { prNumber, branchName, owner, repo, issueNumber, isContinueMode, argv, mergeStateStatus, prState, workStartTime, log, formatAligned, cleanErrorMessage, $, repositoryPath = null } = params;
12
15
 
@@ -93,6 +96,14 @@ export const detectAndCountFeedback = async params => {
93
96
  // Define log patterns to filter out comments containing logs from solve.mjs
94
97
  const logPatterns = [/📊.*Log file|solution\s+draft.*log/i, /🔗.*Link:|💻.*Session:/i, /Generated with.*solve\.mjs/i, /Session ID:|Log file available:/i];
95
98
 
99
+ // Issue #1827: A comment is tool-generated if its ID was tracked in
100
+ // memory during this run (system status comments AND the agent's own
101
+ // session comments) or if its body carries a known tool marker (catches
102
+ // comments from previous runs whose IDs are gone). These must never
103
+ // count as feedback — otherwise the agent's own "CI now green" / status
104
+ // comments trigger an endless restart loop (see PR link-foundation/rust-web-box#34).
105
+ const isToolComment = comment => isToolTrackedCommentId(comment.id) || isToolGeneratedComment(comment.body);
106
+
96
107
  // Count new PR comments after last commit (both code review comments and conversation comments)
97
108
  let prReviewComments = [];
98
109
  let prConversationComments = [];
@@ -112,6 +123,10 @@ export const detectAndCountFeedback = async params => {
112
123
 
113
124
  // Helper function to filter comments based on time and log patterns
114
125
  const filterComment = comment => {
126
+ // Issue #1827: never count tool-generated comments as feedback.
127
+ if (isToolComment(comment)) {
128
+ return false;
129
+ }
115
130
  const commentTime = new Date(comment.created_at);
116
131
  const isAfterCommit = commentTime > lastCommitTime;
117
132
  const isNotLogPattern = !logPatterns.some(pattern => pattern.test(comment.body || ''));
@@ -145,6 +160,10 @@ export const detectAndCountFeedback = async params => {
145
160
  if (issueCommentsResult.code === 0) {
146
161
  const issueComments = JSON.parse(issueCommentsResult.stdout.toString());
147
162
  const filteredIssueComments = issueComments.filter(comment => {
163
+ // Issue #1827: never count tool-generated comments as feedback.
164
+ if (isToolComment(comment)) {
165
+ return false;
166
+ }
148
167
  const commentTime = new Date(comment.created_at);
149
168
  const isAfterCommit = commentTime > lastCommitTime;
150
169
  const isNotLogPattern = !logPatterns.some(pattern => pattern.test(comment.body || ''));
package/src/solve.mjs CHANGED
@@ -48,6 +48,8 @@ const { runAutoEnsureRequirements } = await import('./solve.auto-ensure.lib.mjs'
48
48
  const exitHandler = await import('./exit-handler.lib.mjs');
49
49
  const { initializeExitHandler, installGlobalExitHandlers, safeExit, logActiveHandles } = exitHandler;
50
50
  const { createInterruptWrapper } = await import('./solve.interrupt.lib.mjs');
51
+ // Issue #1823: working-session guard for --do-not-shutdown-in-the-middle-of-working-session.
52
+ const { configureWorkingSession, beginWorkingSession, endWorkingSession } = await import('./working-session.lib.mjs');
51
53
  const getResourceSnapshot = memoryCheck.getResourceSnapshot;
52
54
  const { handleAutoPrCreation } = await import('./solve.auto-pr.lib.mjs');
53
55
  const { setupRepositoryAndClone, verifyDefaultBranchAndStatus } = await import('./solve.repo-setup.lib.mjs');
@@ -148,6 +150,11 @@ const cleanupWrapper = async () => {
148
150
  const interruptWrapper = createInterruptWrapper({ cleanupContext, checkForUncommittedChanges, shouldAttachLogs, attachLogToGitHub, getLogFile, sanitizeLogContent, $, log });
149
151
  initializeExitHandler(getAbsoluteLogPath, log, cleanupWrapper, interruptWrapper, ({ code, reason }) => notifyIssueAboutPrePullRequestFailure({ code, reason, argv, globalState: global, $, log, getLogFile, shouldAttachLogs, attachLogToGitHub, sanitizeLogContent, rawCommand }));
150
152
  installGlobalExitHandlers();
153
+ // Issue #1823: Configure the working-session guard. When the experimental
154
+ // --do-not-shutdown-in-the-middle-of-working-session flag is set (hive passes it to every
155
+ // worker), an interrupt received during an AI working session is deferred: solve lets the AI
156
+ // finish, auto-commits, then shuts down gracefully instead of aborting the AI tool mid-run.
157
+ configureWorkingSession({ enabled: argv['do-not-shutdown-in-the-middle-of-working-session'] === true, log });
151
158
  const markFailureNotificationPosted = targetType => {
152
159
  global.preExitFailureNotificationPosted = true;
153
160
  if (targetType === 'pr') {
@@ -705,6 +712,11 @@ try {
705
712
  // Execute tool command with all prompts and settings
706
713
  let toolResult;
707
714
 
715
+ // Issue #1823: Mark the start of the AI working session. While this is active and the
716
+ // --do-not-shutdown-in-the-middle-of-working-session flag is set, an interrupt (CTRL+C/SIGTERM)
717
+ // is deferred until the AI tool finishes its turn (see exit-handler.lib.mjs + working-session.lib.mjs).
718
+ beginWorkingSession();
719
+
708
720
  // If --use-agent-commander is enabled, use agent-commander for all tools
709
721
  if (argv.useAgentCommander) {
710
722
  // Ensure agent-commander is available
@@ -813,6 +825,24 @@ try {
813
825
  toolResult = claudeResult;
814
826
  }
815
827
 
828
+ // Issue #1823: Mark the end of the AI working session. If a graceful-shutdown interrupt arrived
829
+ // during the session (deferred by the working-session guard), honor it now: auto-commit any
830
+ // uncommitted changes and exit gracefully — only AFTER the AI tool has fully finished its turn.
831
+ const workingSessionState = endWorkingSession();
832
+ if (workingSessionState.shutdownRequested) {
833
+ const shutdownExitCode = workingSessionState.shutdownSignal === 'SIGINT' ? 130 : 143;
834
+ await log('\n🛑 Graceful shutdown requested during the AI working session — the session has finished.', { level: 'warning' });
835
+ await log(' Auto-committing any uncommitted changes, then shutting down...', { level: 'warning' });
836
+ try {
837
+ await interruptWrapper();
838
+ } catch (interruptError) {
839
+ await log(`⚠️ Auto-commit on graceful shutdown failed: ${cleanErrorMessage(interruptError)}`, { level: 'warning' });
840
+ }
841
+ // Graceful shutdown is NOT a failure: skip the pre-exit failure notifier so no spurious
842
+ // "solver failed" comment is posted (issue #1823: no errors on graceful shutdown).
843
+ await safeExit(shutdownExitCode, 'Graceful shutdown after AI working session', { skipPreExit: true });
844
+ }
845
+
816
846
  const { success } = toolResult;
817
847
  let sessionId = toolResult.sessionId;
818
848
  let anthropicTotalCostUSD = toolResult.anthropicTotalCostUSD;
@@ -46,6 +46,12 @@ const { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIte
46
46
  const toolComments = await import('./tool-comments.lib.mjs');
47
47
  const { AUTO_RESTART_MARKER, postTrackedComment } = toolComments;
48
48
 
49
+ // Issue #1827: After each AI session, register the authenticated account's own
50
+ // comments (free-form status updates the agent posts itself) so the next
51
+ // detectAndCountFeedback() call doesn't mistake them for new human feedback.
52
+ const autoMergeHelpers = await import('./solve.auto-merge-helpers.lib.mjs');
53
+ const { trackAuthenticatedUserCommentsSince } = autoMergeHelpers;
54
+
49
55
  // Issue #1728: Per-iteration working session summary attachment helper
50
56
  // Issue #1763: Per-iteration PR ↔ issue link verification (in case the AI
51
57
  // agent overwrites the PR body without a closing keyword and the iteration
@@ -340,6 +346,24 @@ export const watchForFeedback = async params => {
340
346
  global.previousSessionId = toolResult.sessionId;
341
347
  }
342
348
 
349
+ // Issue #1827: Track the authenticated account's own comments posted
350
+ // during this session window so they are filtered (by ID) on the next
351
+ // feedback check instead of re-triggering a restart.
352
+ try {
353
+ const tracked = await trackAuthenticatedUserCommentsSince(owner, repo, prNumber, issueNumber, iterationStartTime, $, { verbose: argv.verbose });
354
+ if (argv.verbose && tracked.length > 0) {
355
+ await log(formatAligned('🧷', 'Tracked own session comments:', `${tracked.length} (won't count as feedback)`, 2));
356
+ }
357
+ } catch (trackError) {
358
+ reportError(trackError, {
359
+ context: 'track_authenticated_user_session_comments',
360
+ prNumber,
361
+ owner,
362
+ repo,
363
+ operation: 'track_session_comments',
364
+ });
365
+ }
366
+
343
367
  if (!toolResult.success) {
344
368
  // Check if this is an API error using shared utility
345
369
  if (isApiError(toolResult)) {
@@ -0,0 +1,166 @@
1
+ /**
2
+ * Issue #1823: "AI working session" guard for solve's graceful shutdown.
3
+ *
4
+ * An *AI working session* is the window during which the AI tool child
5
+ * (claude/codex/gemini/opencode/qwen/agent) is actively running and streaming. When the
6
+ * experimental flag `--do-not-shutdown-in-the-middle-of-working-session` is enabled:
7
+ *
8
+ * - An interrupt (CTRL+C / SIGINT, or SIGTERM) received DURING a protected session is
9
+ * *deferred*: solve lets the AI session finish, auto-commits any uncommitted changes, then
10
+ * shuts down gracefully. It does NOT abort the AI tool mid-run.
11
+ * - An interrupt received OUTSIDE a protected session (e.g. solve is only idle-waiting for
12
+ * CI/CD) stops solve immediately.
13
+ * - A SECOND interrupt force-stops now: the active AI child is killed and solve exits.
14
+ *
15
+ * Background (validated empirically — see experiments/command-stream-signals.mjs):
16
+ * command-stream installs only a SIGINT handler that forwards SIGINT to the active AI child's
17
+ * process group (killing it); it has NO SIGTERM handler. hive therefore forwards the operator's
18
+ * CTRL+C to each /solve worker as SIGTERM, which command-stream ignores — so the AI child is
19
+ * never collaterally killed by the library and this module + exit-handler decide what to do.
20
+ * For the force path (a second interrupt) we *reuse* command-stream's own SIGINT handler to
21
+ * kill the active child's process group, guarding against its embedded process.exit(130) so we
22
+ * can still auto-commit before exiting.
23
+ *
24
+ * This module holds module-level state on purpose: it is a per-process singleton, mirroring how
25
+ * exit-handler.lib.mjs and command-stream manage global signal state.
26
+ */
27
+
28
+ let flagEnabled = false;
29
+ let logFn = null;
30
+ let protectedSessionActive = false;
31
+ let shutdownRequested = false;
32
+ let shutdownSignal = null;
33
+ let forceRequested = false;
34
+
35
+ /**
36
+ * Heuristic to recognise command-stream's SIGINT listener among process SIGINT listeners.
37
+ * Matches the same internal helper names command-stream itself uses for self-detection
38
+ * (see node_modules/command-stream .../$.state.mjs isOurHandlerInstalled()).
39
+ * @param {Function} listener
40
+ * @returns {boolean}
41
+ */
42
+ const isCommandStreamSigintListener = listener => {
43
+ const s = listener.toString();
44
+ return s.includes('findActiveRunners') || s.includes('forwardSigintToRunners') || s.includes('handleSigintExit') || s.includes('activeProcessRunners');
45
+ };
46
+
47
+ /**
48
+ * Internal verbose tracer for issue #1823 shutdown diagnostics. No-op unless a logger was
49
+ * provided via configureWorkingSession(). Fire-and-forget: logging must never break shutdown.
50
+ * @param {string} message
51
+ */
52
+ const trace = message => {
53
+ if (typeof logFn !== 'function') {
54
+ return;
55
+ }
56
+ try {
57
+ const result = logFn(message, { verbose: true });
58
+ if (result && typeof result.catch === 'function') {
59
+ result.catch(() => {});
60
+ }
61
+ } catch {
62
+ // Diagnostics must never interfere with the shutdown path.
63
+ }
64
+ };
65
+
66
+ /**
67
+ * Configure the working-session guard. Call once at solve startup.
68
+ * @param {object} opts
69
+ * @param {boolean} opts.enabled - Whether --do-not-shutdown-in-the-middle-of-working-session is set.
70
+ * @param {Function} [opts.log] - Optional async logger.
71
+ */
72
+ export const configureWorkingSession = ({ enabled = false, log = null } = {}) => {
73
+ flagEnabled = !!enabled;
74
+ logFn = log;
75
+ };
76
+
77
+ export const isFlagEnabled = () => flagEnabled;
78
+ export const isWorkingSessionActive = () => protectedSessionActive;
79
+ export const isShutdownRequested = () => shutdownRequested;
80
+ export const getShutdownSignal = () => shutdownSignal;
81
+ export const isForceRequested = () => forceRequested;
82
+
83
+ /** Mark the start of a protected AI working session. */
84
+ export const beginWorkingSession = () => {
85
+ protectedSessionActive = true;
86
+ };
87
+
88
+ /**
89
+ * Mark the end of a protected AI working session.
90
+ * @returns {{shutdownRequested: boolean, shutdownSignal: string|null, forceRequested: boolean}}
91
+ */
92
+ export const endWorkingSession = () => {
93
+ protectedSessionActive = false;
94
+ return { shutdownRequested, shutdownSignal, forceRequested };
95
+ };
96
+
97
+ /**
98
+ * Record a graceful-shutdown request received during a protected session.
99
+ * @param {string} signal - 'SIGINT' | 'SIGTERM'
100
+ * @returns {{first: boolean}} first=true the first time; false on a repeat (operator insists → force).
101
+ */
102
+ export const requestShutdown = signal => {
103
+ if (shutdownRequested) {
104
+ forceRequested = true;
105
+ trace(`[working-session] repeat ${signal} during protected session → force requested`);
106
+ return { first: false };
107
+ }
108
+ shutdownRequested = true;
109
+ shutdownSignal = signal || shutdownSignal;
110
+ trace(`[working-session] ${shutdownSignal} deferred until the AI working session finishes`);
111
+ return { first: true };
112
+ };
113
+
114
+ /**
115
+ * Force-kill the active AI child process group(s) by reusing command-stream's own SIGINT handler,
116
+ * which forwards SIGINT to every active runner's process group. We temporarily install a no-op
117
+ * SIGINT listener first so command-stream sees "other handlers present" and does NOT call
118
+ * process.exit(130) itself — leaving us in control to auto-commit and exit afterward.
119
+ * @returns {number} Count of command-stream listeners invoked (0 if none / no active child).
120
+ */
121
+ export const forceKillActiveChildren = () => {
122
+ const live = process.listeners('SIGINT').filter(isCommandStreamSigintListener);
123
+ if (live.length === 0) {
124
+ trace('[working-session] force-kill requested but no active command-stream child found');
125
+ return 0;
126
+ }
127
+ trace(`[working-session] force-killing ${live.length} active AI child process group(s)`);
128
+ const noop = () => {};
129
+ process.on('SIGINT', noop); // guarantee listeners.length > 1 → command-stream won't process.exit
130
+ try {
131
+ for (const listener of live) {
132
+ try {
133
+ listener();
134
+ } catch {
135
+ // ignore — child group may already be gone
136
+ }
137
+ }
138
+ } finally {
139
+ process.removeListener('SIGINT', noop);
140
+ }
141
+ return live.length;
142
+ };
143
+
144
+ /** Reset all module state (used by tests). */
145
+ export const resetWorkingSession = () => {
146
+ flagEnabled = false;
147
+ logFn = null;
148
+ protectedSessionActive = false;
149
+ shutdownRequested = false;
150
+ shutdownSignal = null;
151
+ forceRequested = false;
152
+ };
153
+
154
+ export default {
155
+ configureWorkingSession,
156
+ isFlagEnabled,
157
+ isWorkingSessionActive,
158
+ isShutdownRequested,
159
+ getShutdownSignal,
160
+ isForceRequested,
161
+ beginWorkingSession,
162
+ endWorkingSession,
163
+ requestShutdown,
164
+ forceKillActiveChildren,
165
+ resetWorkingSession,
166
+ };