npm - @link-assistant/hive-mind - Versions diffs - 1.72.7 → 1.73.1 - Mend

@link-assistant/hive-mind 1.72.7 → 1.73.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/CHANGELOG.md +16 -0
package/package.json +1 -1
package/src/exit-handler.lib.mjs +97 -2
package/src/hive.config.lib.mjs +11 -0
package/src/hive.mjs +58 -57
package/src/hive.shutdown.lib.mjs +161 -0
package/src/interruptible-sleep.lib.mjs +16 -6
package/src/solve.auto-merge-helpers.lib.mjs +116 -1
package/src/solve.auto-merge.lib.mjs +31 -3
package/src/solve.config.lib.mjs +5 -0
package/src/solve.feedback.lib.mjs +19 -0
package/src/solve.mjs +30 -0
package/src/solve.watch.lib.mjs +24 -0
package/src/working-session.lib.mjs +166 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,21 @@
 # @link-assistant/hive-mind
+## 1.73.1
+### Patch Changes
+- df8b776: Stop the auto-restart-until-mergeable and watch loops from treating the AI agent's own session comments (e.g. free-form "CI now green" status updates posted through the authenticated account) as new human feedback, which caused an endless restart loop until the iteration limit (issue #1827). The check window is now advanced monotonically, every comment the authenticated account posts during a session is tracked by ID, and watch-mode feedback counting excludes tool-generated comments by marker and tracked ID.
+## 1.73.0
+### Minor Changes
+- 1cd647d: Fix all errors on graceful shutdown and add an experimental working-session guard.
+  `hive` now fully waits for every in-flight `/solve` to finish before exiting on CTRL+C / `--stop`: signal handling is delegated to a single owner (resolving a double SIGINT-handler race that called `process.exit(130)` and cut the wait short), each solve worker is spawned in its own detached process group so the terminal's SIGINT no longer aborts solve/codex mid-task, and the wait has no time cap. Worker stderr is no longer mislabeled as `ERROR` — the child exit code remains the authoritative failure signal.
+  Building on that, a new experimental `--do-not-shutdown-in-the-middle-of-working-session` option is added to `solve` and enabled by default for `hive`. With it, an interrupt (CTRL+C / SIGTERM) no longer aborts the AI tool mid-run: if an AI working session is in progress, solve finishes it, auto-commits any uncommitted changes, then shuts down gracefully (exit 130/143); if solve is only idle-waiting (e.g. for CI/CD) it stops immediately, and a second interrupt force-stops. `hive` now forwards a controlled SIGTERM to each in-flight `/solve` worker on the first CTRL+C (instead of only waiting) and passes the flag to every worker (opt out with `--no-do-not-shutdown-in-the-middle-of-working-session`). Graceful shutdown is treated as a normal stop, so it no longer posts a spurious "solution draft failed" comment. Standalone `solve` keeps the flag off by default, so its behavior is unchanged except that an interrupt now always auto-commits uncommitted changes before exiting.
 ## 1.72.7
 ### Patch Changes

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@link-assistant/hive-mind",
-  "version": "1.72.7",
+  "version": "1.73.1",
   "description": "AI-powered issue solver and hive mind for collaborative problem solving",
   "main": "src/hive.mjs",
   "type": "module",

package/src/exit-handler.lib.mjs CHANGED Viewed

@@ -6,6 +6,10 @@
  * the process exits, whether due to normal completion, errors, or signals.
  */
+// Issue #1823: working-session guard for --do-not-shutdown-in-the-middle-of-working-session.
+// Static import is safe: working-session.lib.mjs has no heavy deps and does NOT import this module.
+import { isFlagEnabled as isWorkingSessionFlagEnabled, isWorkingSessionActive, requestShutdown as requestWorkingSessionShutdown, forceKillActiveChildren as forceKillWorkingSessionChildren } from './working-session.lib.mjs';
 // Lazy-load Sentry to avoid keeping the event loop alive when not needed
 let Sentry = null;
 const getSentry = async () => {
@@ -30,6 +34,14 @@ let interruptHandlerRan = false;
 let preExitFunction = null;
 let preExitHandlerRan = false;
+// Issue #1823: When an external owner (e.g. hive's gracefulShutdown) takes over signal
+// handling, the global SIGINT/SIGTERM handlers must stand down and NOT call process.exit().
+// Otherwise the global handler's process.exit() races with the external graceful handler
+// and cuts its wait short — the root cause of premature shutdown that aborts an in-flight
+// /solve (and its codex child) mid-turn. Defaults to false to preserve existing behavior
+// for solve.mjs, telegram-bot, and other entry points that rely on the global handlers.
+let signalHandlingDelegated = false;
 /**
  * Initialize the exit handler with required dependencies
  * @param {Function} getLogPath - Function that returns the current log path
@@ -50,6 +62,20 @@ export const setPreExitHandler = preExit => {
   preExitFunction = preExit;
 };
+/**
+ * Issue #1823: Delegate SIGINT/SIGTERM handling to an external graceful shutdown owner.
+ *
+ * When enabled, the global SIGINT/SIGTERM handlers installed by installGlobalExitHandlers()
+ * stand down (return early) instead of calling process.exit(). This lets a caller such as
+ * hive's gracefulShutdown() fully wait for in-progress work (e.g. an executing /solve) to
+ * finish and then exit via safeExit(), without the global handler racing it to process.exit().
+ *
+ * @param {boolean} enabled - true to delegate (caller owns exit), false to restore default.
+ */
+export const delegateSignalHandling = (enabled = true) => {
+  signalHandlingDelegated = enabled;
+};
 /**
  * Display the exit message with log path
  */
@@ -203,11 +229,17 @@ export const logActiveHandles = async (log = null) => {
 /**
  * Safe exit function that ensures log path is shown
+ *
+ * @param {number} code - Process exit code
+ * @param {string} reason - Human-readable exit reason
+ * @param {object} [options]
+ * @param {boolean} [options.skipPreExit=false] - Issue #1823: skip the pre-exit failure notifier
+ *   (e.g. on graceful shutdown, which is NOT a failure and must not post a "solver failed" comment).
  */
-export const safeExit = async (code = 0, reason = 'Process completed') => {
+export const safeExit = async (code = 0, reason = 'Process completed', { skipPreExit = false } = {}) => {
   await showExitMessage(reason, code);
-  if (code !== 0 && preExitFunction && !preExitHandlerRan) {
+  if (!skipPreExit && code !== 0 && preExitFunction && !preExitHandlerRan) {
     preExitHandlerRan = true;
     try {
       await preExitFunction({ code, reason });
@@ -273,6 +305,34 @@ export const installGlobalExitHandlers = () => {
   // Handle SIGINT (CTRL+C)
   process.on('SIGINT', async () => {
+    // Issue #1823: If an external graceful-shutdown owner is registered, stand down.
+    // That owner (e.g. hive's gracefulShutdown) is responsible for waiting for in-progress
+    // work and exiting via safeExit(). Calling process.exit(130) here would race with it
+    // and cut the wait short — the root cause of the premature shutdown.
+    if (signalHandlingDelegated) {
+      return;
+    }
+    // Issue #1823: With --do-not-shutdown-in-the-middle-of-working-session, defer shutdown while
+    // an AI working session is in progress so the AI tool is never aborted mid-run.
+    if (isWorkingSessionFlagEnabled() && isWorkingSessionActive()) {
+      const { first } = requestWorkingSessionShutdown('SIGINT');
+      if (first) {
+        if (logFunction) {
+          await logFunction('\n⚠️  Shutdown requested (CTRL+C). Finishing the current AI working session, then auto-committing and stopping. Press CTRL+C again to force-stop now.', { level: 'warning' });
+        }
+        return; // defer — solve will auto-commit + exit once the session ends
+      }
+      // Second interrupt → operator insists. Force-kill the AI child group, then fall through to
+      // auto-commit + exit below.
+      if (logFunction) {
+        await logFunction('\n⚠️  Second interrupt — force-stopping the AI working session now.', { level: 'warning' });
+      }
+      try {
+        forceKillWorkingSessionChildren();
+      } catch {
+        // ignore — child may already be gone
+      }
+    }
     // Run interrupt handler first (auto-commit, log upload, etc.) — guard against double invocation
     if (interruptFunction && !interruptHandlerRan) {
       interruptHandlerRan = true;
@@ -303,6 +363,40 @@ export const installGlobalExitHandlers = () => {
   // Handle SIGTERM
   process.on('SIGTERM', async () => {
+    // Issue #1823: Stand down when an external graceful-shutdown owner is registered.
+    if (signalHandlingDelegated) {
+      return;
+    }
+    // Issue #1823: hive forwards the operator's CTRL+C to each /solve worker as SIGTERM (which
+    // command-stream ignores). With --do-not-shutdown-in-the-middle-of-working-session, defer
+    // shutdown while an AI working session is in progress so the AI tool finishes its turn.
+    if (isWorkingSessionFlagEnabled() && isWorkingSessionActive()) {
+      const { first } = requestWorkingSessionShutdown('SIGTERM');
+      if (first) {
+        if (logFunction) {
+          await logFunction('\n⚠️  Shutdown requested. Finishing the current AI working session, then auto-committing and stopping. Send the signal again to force-stop now.', { level: 'warning' });
+        }
+        return; // defer — solve will auto-commit + exit once the session ends
+      }
+      if (logFunction) {
+        await logFunction('\n⚠️  Second signal — force-stopping the AI working session now.', { level: 'warning' });
+      }
+      try {
+        forceKillWorkingSessionChildren();
+      } catch {
+        // ignore — child may already be gone
+      }
+    }
+    // Issue #1823: Auto-commit uncommitted changes on SIGTERM too (previously only SIGINT did).
+    // This ensures graceful shutdown preserves work in ALL signal paths.
+    if (interruptFunction && !interruptHandlerRan) {
+      interruptHandlerRan = true;
+      try {
+        await interruptFunction();
+      } catch {
+        // Ignore interrupt handler errors
+      }
+    }
     if (cleanupFunction) {
       try {
         await cleanupFunction();
@@ -377,4 +471,5 @@ export const installGlobalExitHandlers = () => {
 export const resetExitHandler = () => {
   exitMessageShown = false;
   interruptHandlerRan = false;
+  signalHandlingDelegated = false;
 };

package/src/hive.config.lib.mjs CHANGED Viewed

@@ -50,6 +50,17 @@ const HIVE_CUSTOM_SOLVE_OPTIONS = {
     choices: ['claude', 'opencode', 'codex', 'agent', 'qwen', 'gemini'],
     default: 'claude',
   },
+  // Issue #1823: hive enables the experimental working-session guard for every /solve worker by
+  // default. This is the ONLY change to how CTRL+C behaves in the hive workflow: instead of
+  // aborting the AI tool mid-run, a forwarded interrupt lets the worker finish its current AI
+  // working session, auto-commit, then shut down gracefully. solve keeps default:false (standalone
+  // behavior unchanged); hive overrides the default to true so the loop below forwards the flag.
+  // Operators can opt out with --no-do-not-shutdown-in-the-middle-of-working-session.
+  'do-not-shutdown-in-the-middle-of-working-session': {
+    type: 'boolean',
+    description: '[EXPERIMENTAL] On CTRL+C, let each /solve worker finish its current AI working session and auto-commit before shutting down, instead of aborting it mid-run. If a worker is only idle-waiting (e.g. for CI/CD), it stops immediately. Press CTRL+C again to force-stop. Enabled by default for the hive workflow.',
+    default: true,
+  },
 };
 // Compute the set of solve options that hive auto-registers from SOLVE_OPTION_DEFINITIONS.

package/src/hive.mjs CHANGED Viewed

@@ -35,6 +35,7 @@ if (earlyArgs.includes('--help') || earlyArgs.includes('-h')) {
 }
 export { createYargsConfig } from './hive.config.lib.mjs';
 import { isDirectExecution, withTimeout } from './hive.bootstrap.lib.mjs';
+import { createShutdownManager } from './hive.shutdown.lib.mjs';
 const isRunningDirectly = isDirectExecution(process.argv[1], import.meta.url);
 if (isRunningDirectly) {
   console.log('🐝 Hive Mind - AI-powered issue solver');
@@ -88,7 +89,7 @@ if (isRunningDirectly) {
     const memCheck = await import('./memory-check.mjs');
     const { checkSystem } = memCheck;
     const exitHandler = await import('./exit-handler.lib.mjs');
-    const { initializeExitHandler, installGlobalExitHandlers, safeExit } = exitHandler;
+    const { initializeExitHandler, installGlobalExitHandlers, safeExit, delegateSignalHandling } = exitHandler;
     const sentryLib = await import('./sentry.lib.mjs');
     const { initializeSentry, withSentry, addBreadcrumb, reportError } = sentryLib;
     const graphqlLib = await import('./github.graphql.lib.mjs');
@@ -709,8 +710,10 @@ if (isRunningDirectly) {
     // Create global queue instance
     const issueQueue = new IssueQueue();
-    // Global shutdown state to prevent duplicate shutdown messages
-    let isShuttingDown = false;
+    // Issue #1823: Track in-flight solve child processes. A *first* interrupt forwards a
+    // controlled SIGTERM to each (they run in their own detached process group, so the
+    // terminal's SIGINT never reaches them); a *second* interrupt force-kills the groups.
+    const activeSolveChildren = new Set();
     // Worker function to process issues from queue
     async function worker(workerId) {
@@ -739,6 +742,8 @@ if (isRunningDirectly) {
         // Track if this issue failed
         let issueFailed = false;
+        // Issue #1823: Track a graceful shutdown stop so it is neither failed nor completed.
+        let gracefulStop = false;
         // Process the issue multiple times if needed
         for (let prNum = 1; prNum <= argv.pullRequestsPerIssue; prNum++) {
@@ -811,8 +816,17 @@ if (isRunningDirectly) {
               const child = spawn(solveCommand, args, {
                 stdio: ['pipe', 'pipe', 'pipe'],
                 env: process.env,
+                // Issue #1823: run solve in its own process group so a terminal SIGINT (or the
+                // \003 `$ --stop`/screen injects) hits only hive, not solve+codex. hive instead
+                // forwards a controlled SIGTERM (see gracefulShutdown). stdio stays piped and we
+                // must NOT unref() — hive keeps waiting. See docs/case-studies/issue-1823.
+                detached: true,
               });
+              // Issue #1823: register the in-flight child for optional force-kill on a 2nd signal
+              activeSolveChildren.add(child);
+              log(`   🧒 Spawned ${solveCommand} worker-${workerId} (pid ${child.pid}, detached process group)`, { verbose: true }).catch(() => {});
               // Handle stdout data - stream output in real-time
               child.stdout.on('data', data => {
                 const lines = data.toString().split('\n');
@@ -829,16 +843,20 @@ if (isRunningDirectly) {
                 }
               });
-              // Handle stderr data - stream errors in real-time
+              // Handle stderr data - stream output in real-time.
+              // Issue #1823: Do NOT blanket-tag stderr as ERROR — solve relays non-error
+              // diagnostics there (codex DEBUG/INFO traces, git branch messages, etc.), which
+              // produced hundreds of false errors. The authoritative failure signal is the
+              // child's non-zero exit code (below), so log stderr at default level.
               child.stderr.on('data', data => {
                 const lines = data.toString().split('\n');
                 for (const line of lines) {
                   if (line.trim()) {
-                    log(`   [${solveCommand} worker-${workerId} ERROR] ${line}`, { level: 'error' }).catch(logError => {
+                    log(`   [${solveCommand} worker-${workerId} stderr] ${line}`).catch(logError => {
                       reportError(logError, {
                         context: 'worker_stderr_log',
                         workerId,
-                        operation: 'log_error',
+                        operation: 'log_stderr',
                       });
                     });
                   }
@@ -847,12 +865,14 @@ if (isRunningDirectly) {
               // Handle process completion
               child.on('close', code => {
+                activeSolveChildren.delete(child); // Issue #1823: no longer in-flight
                 exitCode = code || 0;
                 resolve();
               });
               // Handle process errors
               child.on('error', error => {
+                activeSolveChildren.delete(child); // Issue #1823: no longer in-flight
                 exitCode = 1;
                 log(`   [${solveCommand} worker-${workerId} ERROR] Process error: ${error.message}`, {
                   level: 'error',
@@ -871,6 +891,13 @@ if (isRunningDirectly) {
             if (exitCode === 0) {
               await log(`   ✅ Worker ${workerId} completed ${issueUrl} (${duration}s)`);
+            } else if (!issueQueue.isRunning && (exitCode === 130 || exitCode === 143)) {
+              // Issue #1823: during shutdown, solve auto-commits and exits 130/143 — a graceful
+              // stop, NOT a failure. Don't throw/post an error; leave the issue in "processing"
+              // (neither completed nor failed) since work was cut short. See case-study issue-1823.
+              await log(`   🛑 Worker ${workerId} stopped gracefully during shutdown on ${issueUrl} (exit ${exitCode}, ${duration}s)`);
+              gracefulStop = true;
+              break; // stop processing more PRs for this issue
             } else {
               throw new Error(`${solveCommand} exited with code ${exitCode}`);
             }
@@ -895,8 +922,10 @@ if (isRunningDirectly) {
           }
         }
-        // Only mark as completed if it didn't fail
-        if (!issueFailed) {
+        // Only mark as completed if it didn't fail and wasn't gracefully stopped mid-shutdown.
+        // Issue #1823: a graceful stop is neither a success nor a failure — leave it in
+        // "processing" so it is not miscounted as completed (which would also trigger cleanup).
+        if (!issueFailed && !gracefulStop) {
           issueQueue.markCompleted(issueUrl);
         }
@@ -1384,55 +1413,27 @@ if (isRunningDirectly) {
       await log(`   📁 Full log file: ${absoluteLogPath}`);
     }
-    // Graceful shutdown handler
-    async function gracefulShutdown(signal) {
-      if (isShuttingDown) {
-        return; // Prevent duplicate shutdown messages
-      }
-      isShuttingDown = true;
-      try {
-        await log(`\n\n🛑 Received ${signal} signal, shutting down gracefully...`);
-        // Stop the queue and wait for workers to finish
-        issueQueue.stop();
-        // Give workers a moment to finish their current tasks
-        const stats = issueQueue.getStats();
-        if (stats.processing > 0) {
-          await log(`   ⏳ Waiting for ${stats.processing} worker(s) to finish current tasks...`);
-          // Wait up to 10 seconds for workers to finish
-          const maxWaitTime = 10000;
-          const startTime = Date.now();
-          while (issueQueue.getStats().processing > 0 && Date.now() - startTime < maxWaitTime) {
-            await new Promise(resolve => setTimeout(resolve, 500));
-          }
-        }
-        await Promise.all(issueQueue.workers);
-        // Perform cleanup if enabled and there were successful completions
-        const finalStats = issueQueue.getStats();
-        if (finalStats.completed > 0) {
-          await cleanupTempDirectories(argv);
-        }
-        await log('   ✅ Shutdown complete');
-        await log(`   📁 Full log file: ${absoluteLogPath}`);
-      } catch (error) {
-        reportError(error, {
-          context: 'monitor_issues_shutdown',
-          operation: 'cleanup_and_exit',
-        });
-        await log(`   ⚠️  Error during shutdown: ${cleanErrorMessage(error)}`, { level: 'error' });
-        await log(`   📁 Full log file: ${absoluteLogPath}`);
-      }
-      await safeExit(0, 'Process completed');
-    }
-    // Handle graceful shutdown
+    // Issue #1823: Graceful-shutdown + force-kill logic lives in hive.shutdown.lib.mjs.
+    // gracefulShutdown waits (uncapped) for in-flight solve workers to finish on the first
+    // interrupt; on a second interrupt it force-kills their detached process groups.
+    const { gracefulShutdown } = createShutdownManager({
+      log,
+      safeExit,
+      reportError,
+      cleanErrorMessage,
+      cleanupTempDirectories,
+      issueQueue,
+      argv,
+      absoluteLogPath,
+      activeSolveChildren,
+    });
+    // Handle graceful shutdown.
+    // Issue #1823: Tell the global exit handler (installed earlier via installGlobalExitHandlers)
+    // to stand down on SIGINT/SIGTERM so it does not call process.exit() and race us. From here
+    // on, gracefulShutdown is the SOLE owner of these signals: it waits for in-progress solve
+    // worker(s) to finish and then exits via safeExit().
+    delegateSignalHandling(true);
     process.on('SIGINT', () => gracefulShutdown('interrupt'));
     process.on('SIGTERM', () => gracefulShutdown('termination'));

package/src/hive.shutdown.lib.mjs ADDED Viewed

@@ -0,0 +1,161 @@
+/**
+ * Issue #1823: Graceful-shutdown manager for the hive command.
+ *
+ * Extracted from hive.mjs so the shutdown logic stays focused and independently testable
+ * (and to keep hive.mjs within the repo's max-lines lint budget).
+ *
+ * Behavior contract (see issue #1823):
+ *   - On the FIRST interrupt (SIGINT/SIGTERM, or the \003 that `$ --stop`/screen injects),
+ *     hive stops accepting new work and waits — without any time cap — for every in-flight
+ *     `/solve` worker to finish NATURALLY, then exits 0. Because each solve runs in its own
+ *     detached process group, the terminal's signal never reached it, so it keeps running.
+ *   - On a SECOND interrupt (operator insists on stopping now), hive force-kills the in-flight
+ *     solve process group(s) — negative PID, so codex and any grandchildren die too — and
+ *     exits 130 immediately.
+ *
+ * @param {object} deps - Injected hive-scope dependencies.
+ * @param {Function} deps.log - Async logger (matches hive's log()).
+ * @param {Function} deps.safeExit - Async exit helper from exit-handler.lib.mjs.
+ * @param {Function} deps.reportError - Sentry error reporter.
+ * @param {Function} deps.cleanErrorMessage - Formats an error for logging.
+ * @param {Function} deps.cleanupTempDirectories - Cleans temp dirs after successful runs.
+ * @param {object}   deps.issueQueue - The producer/consumer queue (stop/getStats/workers).
+ * @param {object}   deps.argv - Parsed CLI args (passed through to cleanup).
+ * @param {string}   deps.absoluteLogPath - Resolved log file path (for the final log line).
+ * @param {Set}      deps.activeSolveChildren - Live set of in-flight solve child processes.
+ * @returns {{ gracefulShutdown: Function, forceKillActiveSolveChildren: Function }}
+ */
+export const createShutdownManager = ({ log, safeExit, reportError, cleanErrorMessage, cleanupTempDirectories, issueQueue, argv, absoluteLogPath, activeSolveChildren }) => {
+  // Global shutdown state to prevent duplicate shutdown messages / re-entrancy.
+  let isShuttingDown = false;
+  // Issue #1823: Forward the operator's interrupt to each in-flight solve worker as SIGTERM,
+  // signalling the solve PROCESS itself (positive PID), NOT its process group (negative PID).
+  // Rationale (validated — see experiments/command-stream-signals.mjs): command-stream installs
+  // only a SIGINT handler and ignores SIGTERM, so signalling solve with SIGTERM never collaterally
+  // kills the AI child mid-turn. solve's own session-aware handler then decides what to do:
+  //   - if an AI working session is in progress, it finishes it, auto-commits, and exits 143;
+  //   - if it is only idle-waiting (e.g. for CI/CD), it stops immediately.
+  // This implements "send CTRL+C to solve command also" while still letting the AI session finish.
+  async function forwardShutdownToActiveSolveChildren() {
+    for (const child of activeSolveChildren) {
+      if (!child || child.pid == null) {
+        continue;
+      }
+      try {
+        process.kill(child.pid, 'SIGTERM'); // positive pid → just the solve process, not its group
+      } catch (signalError) {
+        await log(`   ⚠️  Could not forward SIGTERM to solve (pid ${child.pid}): ${signalError.message}`, {
+          verbose: true,
+        });
+      }
+    }
+  }
+  // Issue #1823: Force-kill all in-flight detached solve children (and their codex
+  // descendants) by signalling their process groups. Used only when the operator insists on
+  // an immediate exit (a SECOND interrupt). A negative PID targets the whole process group,
+  // so this also terminates codex and any grandchildren spawned by solve.
+  async function forceKillActiveSolveChildren(signalName = 'SIGTERM') {
+    for (const child of activeSolveChildren) {
+      if (!child || child.pid == null) {
+        continue;
+      }
+      try {
+        process.kill(-child.pid, signalName); // negative pid → whole process group
+      } catch (killError) {
+        // The group may already be gone; fall back to signalling just the child.
+        try {
+          child.kill(signalName);
+        } catch {
+          // Child already exited — nothing to do.
+        }
+        await log(`   ⚠️  Could not signal solve process group (pid ${child.pid}): ${killError.message}`, {
+          verbose: true,
+        });
+      }
+    }
+  }
+  // Graceful shutdown handler.
+  async function gracefulShutdown(signal) {
+    if (isShuttingDown) {
+      // Issue #1823: A second interrupt while already shutting down means the operator wants
+      // to stop NOW. Force-kill the in-flight solve process group(s) and exit immediately,
+      // overriding the default "wait for solve to finish" behavior.
+      await log(`\n\n⚠️  Received second ${signal} signal — force-stopping ${activeSolveChildren.size} in-flight solve worker(s) and exiting now.`, {
+        level: 'warning',
+      });
+      await forceKillActiveSolveChildren('SIGTERM');
+      await safeExit(130, 'Force interrupted by repeated signal');
+      return;
+    }
+    isShuttingDown = true;
+    try {
+      await log(`\n\n🛑 Received ${signal} signal, shutting down gracefully...`);
+      await log('   ℹ️  Forwarding the interrupt to in-progress solve worker(s); each finishes its current AI working session, auto-commits, then stops. Press CTRL+C again to force-stop.');
+      // Stop the queue so each worker exits its loop after its current solve completes.
+      issueQueue.stop();
+      // Issue #1823: Forward the operator's CTRL+C to each in-flight solve worker (as SIGTERM).
+      // Previously hive only waited; now it actively tells solve to wind down so a worker that is
+      // merely idle-waiting (e.g. for CI/CD) stops promptly instead of sleeping out its interval,
+      // while a worker mid-AI-session still finishes that session before exiting (see solve's
+      // --do-not-shutdown-in-the-middle-of-working-session guard, which hive enables by default).
+      if (activeSolveChildren.size > 0) {
+        await log(`   📨 Forwarding shutdown to ${activeSolveChildren.size} in-flight solve worker(s)...`);
+        await forwardShutdownToActiveSolveChildren();
+      }
+      // Issue #1823: Wait for in-flight solve commands to FINISH NATURALLY. We intentionally
+      // do NOT cap this wait — the issue requires that CTRL+C / `$ --stop` fully waits for each
+      // running /solve to complete before shutting down. Because solve runs in its own detached
+      // process group, the interrupt did not reach it, so it keeps running until done.
+      // Promise.all(issueQueue.workers) is the authoritative wait; a periodic progress line
+      // makes it clear hive is still waiting (and is unref'd so it never blocks exit itself).
+      const stats = issueQueue.getStats();
+      let progressTimer = null;
+      if (stats.processing > 0) {
+        const waitStart = Date.now();
+        await log(`   ⏳ Waiting for ${stats.processing} worker(s) to finish current tasks...`);
+        progressTimer = setInterval(() => {
+          const current = issueQueue.getStats();
+          if (current.processing > 0) {
+            const elapsed = Math.round((Date.now() - waitStart) / 1000);
+            log(`   ⏳ Still waiting for ${current.processing} solve worker(s) to finish (${elapsed}s elapsed)...`).catch(() => {});
+          }
+        }, 15000);
+        if (typeof progressTimer.unref === 'function') {
+          progressTimer.unref();
+        }
+      }
+      await Promise.all(issueQueue.workers);
+      if (progressTimer) {
+        clearInterval(progressTimer);
+      }
+      // Perform cleanup if enabled and there were successful completions
+      const finalStats = issueQueue.getStats();
+      if (finalStats.completed > 0) {
+        await cleanupTempDirectories(argv);
+      }
+      await log('   ✅ Shutdown complete');
+      await log(`   📁 Full log file: ${absoluteLogPath}`);
+    } catch (error) {
+      reportError(error, {
+        context: 'monitor_issues_shutdown',
+        operation: 'cleanup_and_exit',
+      });
+      await log(`   ⚠️  Error during shutdown: ${cleanErrorMessage(error)}`, { level: 'error' });
+      await log(`   📁 Full log file: ${absoluteLogPath}`);
+    }
+    await safeExit(0, 'Process completed');
+  }
+  return { gracefulShutdown, forceKillActiveSolveChildren, forwardShutdownToActiveSolveChildren };
+};

package/src/interruptible-sleep.lib.mjs CHANGED Viewed

@@ -9,14 +9,18 @@
  */
 /**
- * Sleep for `ms` milliseconds, but resolve early if SIGINT is received.
+ * Sleep for `ms` milliseconds, but resolve early if SIGINT or SIGTERM is received.
  *
- * When SIGINT fires during the sleep, the timer is cleared and the promise
- * resolves with `{ interrupted: true }`. The existing SIGINT handler (from
- * exit-handler.lib.mjs) continues to run normally — this function does NOT
+ * When the signal fires during the sleep, the timer is cleared and the promise
+ * resolves with `{ interrupted: true }`. The existing signal handlers (from
+ * exit-handler.lib.mjs) continue to run normally — this function does NOT
  * consume or re-emit the signal, it only ensures its own timer doesn't
  * block the event loop.
  *
+ * Issue #1823: SIGTERM is also honoured because hive forwards the operator's CTRL+C to each
+ * /solve worker as SIGTERM. When solve is only idle-waiting here (e.g. for CI/CD), it must stop
+ * immediately rather than sleep out the remaining delay.
+ *
  * @param {number} ms - Duration in milliseconds
  * @returns {Promise<{interrupted: boolean}>}
  */
@@ -24,18 +28,24 @@ export function interruptibleSleep(ms) {
   return new Promise(resolve => {
     let timer;
+    const cleanupListeners = () => {
+      process.removeListener('SIGINT', onInterrupt);
+      process.removeListener('SIGTERM', onInterrupt);
+    };
     const onInterrupt = () => {
       clearTimeout(timer);
-      process.removeListener('SIGINT', onInterrupt);
+      cleanupListeners();
       resolve({ interrupted: true });
     };
     timer = setTimeout(() => {
-      process.removeListener('SIGINT', onInterrupt);
+      cleanupListeners();
       resolve({ interrupted: false });
     }, ms);
     process.on('SIGINT', onInterrupt);
+    process.on('SIGTERM', onInterrupt);
   });
 }

package/src/solve.auto-merge-helpers.lib.mjs CHANGED Viewed

@@ -76,7 +76,7 @@ const formatRunLine = run => {
 // search scope for checkForExistingComment() stays in lock-step with the
 // markers actually embedded in tool-posted comments.
 const toolComments = await import('./tool-comments.lib.mjs');
-const { SESSION_ENDING_MARKERS, isToolGeneratedComment, isToolTrackedCommentId } = toolComments;
+const { SESSION_ENDING_MARKERS, isToolGeneratedComment, isToolTrackedCommentId, trackToolCommentId } = toolComments;
 /**
  * Issue #1323: Check if a comment with specific content already exists on the PR
@@ -292,6 +292,121 @@ export const checkForNonBotComments = async (owner, repo, prNumber, issueNumber,
   }
 };
+/**
+ * Issue #1827: Compute the next monotonic check-window cutoff for the
+ * auto-restart-until-mergeable loop. The cutoff must never move backwards:
+ * after an AI session, lastCheckTime is set to a moment *after* the agent's own
+ * comments, so rewinding it to the iteration's start time (captured before the
+ * AI ran) would re-detect those comments as new feedback — the root cause of
+ * the restart loop in #1827. Returns whichever timestamp is later.
+ *
+ * @param {Date} lastCheckTime - current cutoff
+ * @param {Date} candidate - proposed new cutoff (usually the iteration start time)
+ * @returns {Date} the later of the two timestamps
+ */
+export const nextMonotonicCheckTime = (lastCheckTime, candidate) => {
+  if (!(lastCheckTime instanceof Date)) return candidate;
+  if (!(candidate instanceof Date)) return lastCheckTime;
+  return candidate.getTime() > lastCheckTime.getTime() ? candidate : lastCheckTime;
+};
+/**
+ * Issue #1827: Register every comment authored by the authenticated GitHub
+ * account during an AI working session as a tool-generated comment.
+ *
+ * During a session, the AI agent can post free-form status comments through the
+ * authenticated account (e.g. "✅ CI now green", "✅ Verification pass"). These
+ * are NOT routed through postTrackedComment(), so their IDs were never captured,
+ * and they match none of the tool markers. Once issue #1821 made the watch loop
+ * trust same-account comments as human feedback, the very next iteration
+ * re-detected these comments as fresh feedback and triggered an endless
+ * auto-restart loop until the limit was hit.
+ *
+ * Because the authenticated account is busy running the AI for the whole
+ * session window, any comment it authored within that window is the tool's own,
+ * not human feedback. Tracking those IDs makes checkForNonBotComments filter
+ * them by ID regardless of timestamps — a defense that also survives clock skew
+ * between the local clock and GitHub's `created_at` (which a purely
+ * time-based cutoff cannot).
+ *
+ * @param {string} owner - Repository owner
+ * @param {string} repo - Repository name
+ * @param {number} prNumber - Pull request number
+ * @param {number} issueNumber - Issue number (may equal prNumber)
+ * @param {Date|string|number} sinceTime - Start of the session window
+ * @param {Function} commandRunner - Tagged-template command runner, injectable for tests
+ * @param {Object} options
+ * @param {boolean} [options.verbose=false]
+ * @param {string} [options.currentUser] - Pre-resolved authenticated login (skips the `gh api user` call)
+ * @returns {Promise<string[]>} Newly tracked comment IDs (as strings)
+ */
+export const trackAuthenticatedUserCommentsSince = async (owner, repo, prNumber, issueNumber, sinceTime, commandRunner = $, options = {}) => {
+  const { verbose = false, currentUser: providedUser } = options;
+  const trackedIds = [];
+  try {
+    let currentUser = providedUser || null;
+    if (!currentUser) {
+      try {
+        const userResult = await commandRunner`gh api user --jq .login`;
+        if (userResult.code === 0) {
+          currentUser = userResult.stdout.toString().trim();
+        }
+      } catch {
+        // Without the authenticated login we cannot attribute comments; bail out.
+      }
+    }
+    if (!currentUser) return trackedIds;
+    const since = sinceTime instanceof Date ? sinceTime : new Date(sinceTime);
+    const fetchComments = async path => {
+      try {
+        const result = await commandRunner`gh api ${path} --paginate`;
+        if (result.code === 0 && result.stdout) {
+          return JSON.parse(result.stdout.toString() || '[]');
+        }
+      } catch {
+        // Ignore fetch/parse failures for an individual endpoint.
+      }
+      return [];
+    };
+    const prComments = await fetchComments(`repos/${owner}/${repo}/issues/${prNumber}/comments`);
+    const prReviewComments = await fetchComments(`repos/${owner}/${repo}/pulls/${prNumber}/comments`);
+    let issueComments = [];
+    if (issueNumber && issueNumber !== prNumber) {
+      issueComments = await fetchComments(`repos/${owner}/${repo}/issues/${issueNumber}/comments`);
+    }
+    const allComments = [...prComments, ...prReviewComments, ...issueComments];
+    for (const comment of allComments) {
+      const login = comment.user?.login;
+      if (!login || login !== currentUser) continue;
+      // Inclusive lower bound: a comment posted at the exact session start is
+      // still the tool's own. created_at uses GitHub's clock, so allow equality.
+      const createdAt = new Date(comment.created_at);
+      if (createdAt < since) continue;
+      if (isToolTrackedCommentId(comment.id)) continue;
+      trackToolCommentId(comment.id);
+      trackedIds.push(String(comment.id));
+      if (verbose) {
+        console.log(`[VERBOSE] Tracking authenticated-user session comment ${comment.id} from ${login} at ${comment.created_at}`);
+      }
+    }
+  } catch (error) {
+    reportError(error, {
+      context: 'track_authenticated_user_comments',
+      owner,
+      repo,
+      prNumber,
+      operation: 'track_session_comments',
+    });
+  }
+  return trackedIds;
+};
 /**
  * Get the reasons why PR is not mergeable
  * Issue #1314: Comprehensive CI/CD status handling covering all possible states:

package/src/solve.auto-merge.lib.mjs CHANGED Viewed

@@ -53,7 +53,7 @@ import { limitReset } from './config.lib.mjs';
 // Import helper functions extracted for file size management (Issue #1593)
 const autoMergeHelpers = await import('./solve.auto-merge-helpers.lib.mjs');
-const { checkForExistingComment, checkForNonBotComments, getMergeBlockers } = autoMergeHelpers;
+const { checkForExistingComment, checkForNonBotComments, getMergeBlockers, trackAuthenticatedUserCommentsSince, nextMonotonicCheckTime } = autoMergeHelpers;
 // Issue #1769: cancelled/stale CI re-run failures need a human action stop, not polling forever.
 const cancelledCiRerunLib = await import('./cancelled-ci-rerun.lib.mjs');
@@ -1031,6 +1031,26 @@ No further AI sessions will be started automatically for this run. Please review
           await log(formatAligned('✅', `${argv.tool.toUpperCase()} execution completed:`, 'Checking if PR is now mergeable...'));
         }
+        // Issue #1827: Register every comment the authenticated account posted
+        // during this AI session (free-form status comments like "✅ CI now
+        // green" the agent writes itself, which bypass postTrackedComment and
+        // match no tool marker). Tracking their IDs stops the next iteration's
+        // checkForNonBotComments from mistaking them for fresh human feedback.
+        try {
+          const tracked = await trackAuthenticatedUserCommentsSince(owner, repo, prNumber, issueNumber, iterationStartTime, $, { verbose: argv.verbose });
+          if (argv.verbose && tracked.length > 0) {
+            await log(formatAligned('🧷', 'Tracked own session comments:', `${tracked.length} (won't count as new feedback)`, 2));
+          }
+        } catch (trackError) {
+          reportError(trackError, {
+            context: 'track_authenticated_user_session_comments',
+            prNumber,
+            owner,
+            repo,
+            operation: 'track_session_comments',
+          });
+        }
         // Update last check time after restart
         lastCheckTime = new Date();
       } else if (blockers.length > 0) {
@@ -1071,8 +1091,16 @@ No further AI sessions will be started automatically for this run. Please review
         await log(formatAligned('', 'No action needed', 'Continuing to monitor...', 2));
       }
-      // Update last check time
-      lastCheckTime = currentTime;
+      // Issue #1827: Advance the check window monotonically — never move it
+      // backwards. In the restart branch above, lastCheckTime was already set
+      // to a moment *after* the AI session (and after any comments the agent
+      // posted). currentTime was captured at the *start* of this iteration,
+      // before the AI ran, so assigning it unconditionally here would rewind
+      // the window and re-detect the agent's own comments as new feedback
+      // (the root cause of the auto-restart loop in #1827). In the non-restart
+      // branches lastCheckTime is still the previous iteration's value, which
+      // is < currentTime, so this correctly advances it.
+      lastCheckTime = nextMonotonicCheckTime(lastCheckTime, currentTime);
     } catch (error) {
       reportError(error, {
         context: 'watch_until_mergeable',

package/src/solve.config.lib.mjs CHANGED Viewed

@@ -115,6 +115,11 @@ export const SOLVE_OPTION_DEFINITIONS = {
     description: '[EXPERIMENTAL] Temporarily copy AGENTS.md/agents.md to CLAUDE.md while Claude runs, then remove the temporary copy',
     default: false,
   },
+  'do-not-shutdown-in-the-middle-of-working-session': {
+    type: 'boolean',
+    description: '[EXPERIMENTAL] On interrupt (CTRL+C / SIGTERM), do not abort the AI tool mid-run. If an AI working session is in progress, wait for it to finish, auto-commit any uncommitted changes, then shut down gracefully. If solve is only idle-waiting (e.g. for CI/CD), stop immediately. A second interrupt force-stops. hive passes this automatically to every /solve worker.',
+    default: false,
+  },
   'attach-logs': {
     type: 'boolean',
     description: 'Upload the solution draft log file to the Pull Request on completion (⚠️ WARNING: May expose sensitive data)',

package/src/solve.feedback.lib.mjs CHANGED Viewed

@@ -7,6 +7,9 @@
 import { reportError } from './sentry.lib.mjs';
 import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): gh API calls flow through $ wrapped by caller
+// Issue #1827: tool-generated comments (markers + in-memory tracked IDs) must
+// not count as feedback in watch/continue mode, mirroring checkForNonBotComments.
+import { isToolGeneratedComment, isToolTrackedCommentId } from './tool-comments.lib.mjs';
 export const detectAndCountFeedback = async params => {
   const { prNumber, branchName, owner, repo, issueNumber, isContinueMode, argv, mergeStateStatus, prState, workStartTime, log, formatAligned, cleanErrorMessage, $, repositoryPath = null } = params;
@@ -93,6 +96,14 @@ export const detectAndCountFeedback = async params => {
         // Define log patterns to filter out comments containing logs from solve.mjs
         const logPatterns = [/📊.*Log file|solution\s+draft.*log/i, /🔗.*Link:|💻.*Session:/i, /Generated with.*solve\.mjs/i, /Session ID:|Log file available:/i];
+        // Issue #1827: A comment is tool-generated if its ID was tracked in
+        // memory during this run (system status comments AND the agent's own
+        // session comments) or if its body carries a known tool marker (catches
+        // comments from previous runs whose IDs are gone). These must never
+        // count as feedback — otherwise the agent's own "CI now green" / status
+        // comments trigger an endless restart loop (see PR link-foundation/rust-web-box#34).
+        const isToolComment = comment => isToolTrackedCommentId(comment.id) || isToolGeneratedComment(comment.body);
         // Count new PR comments after last commit (both code review comments and conversation comments)
         let prReviewComments = [];
         let prConversationComments = [];
@@ -112,6 +123,10 @@ export const detectAndCountFeedback = async params => {
         // Helper function to filter comments based on time and log patterns
         const filterComment = comment => {
+          // Issue #1827: never count tool-generated comments as feedback.
+          if (isToolComment(comment)) {
+            return false;
+          }
           const commentTime = new Date(comment.created_at);
           const isAfterCommit = commentTime > lastCommitTime;
           const isNotLogPattern = !logPatterns.some(pattern => pattern.test(comment.body || ''));
@@ -145,6 +160,10 @@ export const detectAndCountFeedback = async params => {
         if (issueCommentsResult.code === 0) {
           const issueComments = JSON.parse(issueCommentsResult.stdout.toString());
           const filteredIssueComments = issueComments.filter(comment => {
+            // Issue #1827: never count tool-generated comments as feedback.
+            if (isToolComment(comment)) {
+              return false;
+            }
             const commentTime = new Date(comment.created_at);
             const isAfterCommit = commentTime > lastCommitTime;
             const isNotLogPattern = !logPatterns.some(pattern => pattern.test(comment.body || ''));

package/src/solve.mjs CHANGED Viewed

@@ -48,6 +48,8 @@ const { runAutoEnsureRequirements } = await import('./solve.auto-ensure.lib.mjs'
 const exitHandler = await import('./exit-handler.lib.mjs');
 const { initializeExitHandler, installGlobalExitHandlers, safeExit, logActiveHandles } = exitHandler;
 const { createInterruptWrapper } = await import('./solve.interrupt.lib.mjs');
+// Issue #1823: working-session guard for --do-not-shutdown-in-the-middle-of-working-session.
+const { configureWorkingSession, beginWorkingSession, endWorkingSession } = await import('./working-session.lib.mjs');
 const getResourceSnapshot = memoryCheck.getResourceSnapshot;
 const { handleAutoPrCreation } = await import('./solve.auto-pr.lib.mjs');
 const { setupRepositoryAndClone, verifyDefaultBranchAndStatus } = await import('./solve.repo-setup.lib.mjs');
@@ -148,6 +150,11 @@ const cleanupWrapper = async () => {
 const interruptWrapper = createInterruptWrapper({ cleanupContext, checkForUncommittedChanges, shouldAttachLogs, attachLogToGitHub, getLogFile, sanitizeLogContent, $, log });
 initializeExitHandler(getAbsoluteLogPath, log, cleanupWrapper, interruptWrapper, ({ code, reason }) => notifyIssueAboutPrePullRequestFailure({ code, reason, argv, globalState: global, $, log, getLogFile, shouldAttachLogs, attachLogToGitHub, sanitizeLogContent, rawCommand }));
 installGlobalExitHandlers();
+// Issue #1823: Configure the working-session guard. When the experimental
+// --do-not-shutdown-in-the-middle-of-working-session flag is set (hive passes it to every
+// worker), an interrupt received during an AI working session is deferred: solve lets the AI
+// finish, auto-commits, then shuts down gracefully instead of aborting the AI tool mid-run.
+configureWorkingSession({ enabled: argv['do-not-shutdown-in-the-middle-of-working-session'] === true, log });
 const markFailureNotificationPosted = targetType => {
   global.preExitFailureNotificationPosted = true;
   if (targetType === 'pr') {
@@ -705,6 +712,11 @@ try {
   // Execute tool command with all prompts and settings
   let toolResult;
+  // Issue #1823: Mark the start of the AI working session. While this is active and the
+  // --do-not-shutdown-in-the-middle-of-working-session flag is set, an interrupt (CTRL+C/SIGTERM)
+  // is deferred until the AI tool finishes its turn (see exit-handler.lib.mjs + working-session.lib.mjs).
+  beginWorkingSession();
   // If --use-agent-commander is enabled, use agent-commander for all tools
   if (argv.useAgentCommander) {
     // Ensure agent-commander is available
@@ -813,6 +825,24 @@ try {
     toolResult = claudeResult;
   }
+  // Issue #1823: Mark the end of the AI working session. If a graceful-shutdown interrupt arrived
+  // during the session (deferred by the working-session guard), honor it now: auto-commit any
+  // uncommitted changes and exit gracefully — only AFTER the AI tool has fully finished its turn.
+  const workingSessionState = endWorkingSession();
+  if (workingSessionState.shutdownRequested) {
+    const shutdownExitCode = workingSessionState.shutdownSignal === 'SIGINT' ? 130 : 143;
+    await log('\n🛑 Graceful shutdown requested during the AI working session — the session has finished.', { level: 'warning' });
+    await log('   Auto-committing any uncommitted changes, then shutting down...', { level: 'warning' });
+    try {
+      await interruptWrapper();
+    } catch (interruptError) {
+      await log(`⚠️  Auto-commit on graceful shutdown failed: ${cleanErrorMessage(interruptError)}`, { level: 'warning' });
+    }
+    // Graceful shutdown is NOT a failure: skip the pre-exit failure notifier so no spurious
+    // "solver failed" comment is posted (issue #1823: no errors on graceful shutdown).
+    await safeExit(shutdownExitCode, 'Graceful shutdown after AI working session', { skipPreExit: true });
+  }
   const { success } = toolResult;
   let sessionId = toolResult.sessionId;
   let anthropicTotalCostUSD = toolResult.anthropicTotalCostUSD;

package/src/solve.watch.lib.mjs CHANGED Viewed

@@ -46,6 +46,12 @@ const { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIte
 const toolComments = await import('./tool-comments.lib.mjs');
 const { AUTO_RESTART_MARKER, postTrackedComment } = toolComments;
+// Issue #1827: After each AI session, register the authenticated account's own
+// comments (free-form status updates the agent posts itself) so the next
+// detectAndCountFeedback() call doesn't mistake them for new human feedback.
+const autoMergeHelpers = await import('./solve.auto-merge-helpers.lib.mjs');
+const { trackAuthenticatedUserCommentsSince } = autoMergeHelpers;
 // Issue #1728: Per-iteration working session summary attachment helper
 // Issue #1763: Per-iteration PR ↔ issue link verification (in case the AI
 // agent overwrites the PR body without a closing keyword and the iteration
@@ -340,6 +346,24 @@ export const watchForFeedback = async params => {
           global.previousSessionId = toolResult.sessionId;
         }
+        // Issue #1827: Track the authenticated account's own comments posted
+        // during this session window so they are filtered (by ID) on the next
+        // feedback check instead of re-triggering a restart.
+        try {
+          const tracked = await trackAuthenticatedUserCommentsSince(owner, repo, prNumber, issueNumber, iterationStartTime, $, { verbose: argv.verbose });
+          if (argv.verbose && tracked.length > 0) {
+            await log(formatAligned('🧷', 'Tracked own session comments:', `${tracked.length} (won't count as feedback)`, 2));
+          }
+        } catch (trackError) {
+          reportError(trackError, {
+            context: 'track_authenticated_user_session_comments',
+            prNumber,
+            owner,
+            repo,
+            operation: 'track_session_comments',
+          });
+        }
         if (!toolResult.success) {
           // Check if this is an API error using shared utility
           if (isApiError(toolResult)) {

package/src/working-session.lib.mjs ADDED Viewed

@@ -0,0 +1,166 @@
+/**
+ * Issue #1823: "AI working session" guard for solve's graceful shutdown.
+ *
+ * An *AI working session* is the window during which the AI tool child
+ * (claude/codex/gemini/opencode/qwen/agent) is actively running and streaming. When the
+ * experimental flag `--do-not-shutdown-in-the-middle-of-working-session` is enabled:
+ *
+ *   - An interrupt (CTRL+C / SIGINT, or SIGTERM) received DURING a protected session is
+ *     *deferred*: solve lets the AI session finish, auto-commits any uncommitted changes, then
+ *     shuts down gracefully. It does NOT abort the AI tool mid-run.
+ *   - An interrupt received OUTSIDE a protected session (e.g. solve is only idle-waiting for
+ *     CI/CD) stops solve immediately.
+ *   - A SECOND interrupt force-stops now: the active AI child is killed and solve exits.
+ *
+ * Background (validated empirically — see experiments/command-stream-signals.mjs):
+ *   command-stream installs only a SIGINT handler that forwards SIGINT to the active AI child's
+ *   process group (killing it); it has NO SIGTERM handler. hive therefore forwards the operator's
+ *   CTRL+C to each /solve worker as SIGTERM, which command-stream ignores — so the AI child is
+ *   never collaterally killed by the library and this module + exit-handler decide what to do.
+ *   For the force path (a second interrupt) we *reuse* command-stream's own SIGINT handler to
+ *   kill the active child's process group, guarding against its embedded process.exit(130) so we
+ *   can still auto-commit before exiting.
+ *
+ * This module holds module-level state on purpose: it is a per-process singleton, mirroring how
+ * exit-handler.lib.mjs and command-stream manage global signal state.
+ */
+let flagEnabled = false;
+let logFn = null;
+let protectedSessionActive = false;
+let shutdownRequested = false;
+let shutdownSignal = null;
+let forceRequested = false;
+/**
+ * Heuristic to recognise command-stream's SIGINT listener among process SIGINT listeners.
+ * Matches the same internal helper names command-stream itself uses for self-detection
+ * (see node_modules/command-stream .../$.state.mjs isOurHandlerInstalled()).
+ * @param {Function} listener
+ * @returns {boolean}
+ */
+const isCommandStreamSigintListener = listener => {
+  const s = listener.toString();
+  return s.includes('findActiveRunners') || s.includes('forwardSigintToRunners') || s.includes('handleSigintExit') || s.includes('activeProcessRunners');
+};
+/**
+ * Internal verbose tracer for issue #1823 shutdown diagnostics. No-op unless a logger was
+ * provided via configureWorkingSession(). Fire-and-forget: logging must never break shutdown.
+ * @param {string} message
+ */
+const trace = message => {
+  if (typeof logFn !== 'function') {
+    return;
+  }
+  try {
+    const result = logFn(message, { verbose: true });
+    if (result && typeof result.catch === 'function') {
+      result.catch(() => {});
+    }
+  } catch {
+    // Diagnostics must never interfere with the shutdown path.
+  }
+};
+/**
+ * Configure the working-session guard. Call once at solve startup.
+ * @param {object} opts
+ * @param {boolean} opts.enabled - Whether --do-not-shutdown-in-the-middle-of-working-session is set.
+ * @param {Function} [opts.log] - Optional async logger.
+ */
+export const configureWorkingSession = ({ enabled = false, log = null } = {}) => {
+  flagEnabled = !!enabled;
+  logFn = log;
+};
+export const isFlagEnabled = () => flagEnabled;
+export const isWorkingSessionActive = () => protectedSessionActive;
+export const isShutdownRequested = () => shutdownRequested;
+export const getShutdownSignal = () => shutdownSignal;
+export const isForceRequested = () => forceRequested;
+/** Mark the start of a protected AI working session. */
+export const beginWorkingSession = () => {
+  protectedSessionActive = true;
+};
+/**
+ * Mark the end of a protected AI working session.
+ * @returns {{shutdownRequested: boolean, shutdownSignal: string|null, forceRequested: boolean}}
+ */
+export const endWorkingSession = () => {
+  protectedSessionActive = false;
+  return { shutdownRequested, shutdownSignal, forceRequested };
+};
+/**
+ * Record a graceful-shutdown request received during a protected session.
+ * @param {string} signal - 'SIGINT' | 'SIGTERM'
+ * @returns {{first: boolean}} first=true the first time; false on a repeat (operator insists → force).
+ */
+export const requestShutdown = signal => {
+  if (shutdownRequested) {
+    forceRequested = true;
+    trace(`[working-session] repeat ${signal} during protected session → force requested`);
+    return { first: false };
+  }
+  shutdownRequested = true;
+  shutdownSignal = signal || shutdownSignal;
+  trace(`[working-session] ${shutdownSignal} deferred until the AI working session finishes`);
+  return { first: true };
+};
+/**
+ * Force-kill the active AI child process group(s) by reusing command-stream's own SIGINT handler,
+ * which forwards SIGINT to every active runner's process group. We temporarily install a no-op
+ * SIGINT listener first so command-stream sees "other handlers present" and does NOT call
+ * process.exit(130) itself — leaving us in control to auto-commit and exit afterward.
+ * @returns {number} Count of command-stream listeners invoked (0 if none / no active child).
+ */
+export const forceKillActiveChildren = () => {
+  const live = process.listeners('SIGINT').filter(isCommandStreamSigintListener);
+  if (live.length === 0) {
+    trace('[working-session] force-kill requested but no active command-stream child found');
+    return 0;
+  }
+  trace(`[working-session] force-killing ${live.length} active AI child process group(s)`);
+  const noop = () => {};
+  process.on('SIGINT', noop); // guarantee listeners.length > 1 → command-stream won't process.exit
+  try {
+    for (const listener of live) {
+      try {
+        listener();
+      } catch {
+        // ignore — child group may already be gone
+      }
+    }
+  } finally {
+    process.removeListener('SIGINT', noop);
+  }
+  return live.length;
+};
+/** Reset all module state (used by tests). */
+export const resetWorkingSession = () => {
+  flagEnabled = false;
+  logFn = null;
+  protectedSessionActive = false;
+  shutdownRequested = false;
+  shutdownSignal = null;
+  forceRequested = false;
+};
+export default {
+  configureWorkingSession,
+  isFlagEnabled,
+  isWorkingSessionActive,
+  isShutdownRequested,
+  getShutdownSignal,
+  isForceRequested,
+  beginWorkingSession,
+  endWorkingSession,
+  requestShutdown,
+  forceKillActiveChildren,
+  resetWorkingSession,
+};