@link-assistant/hive-mind 1.78.8 → 1.78.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,40 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.78.9
4
+
5
+ ### Patch Changes
6
+
7
+ - a3d4d41: fix(isolation): use native Docker isolation and seed the nested daemon for `--isolation docker` (#1914)
8
+
9
+ Two problems made `--isolation docker` behave wrong on the Docker-in-Docker bot
10
+ host:
11
+ 1. **It wasn't real Docker isolation.** Hive Mind launched isolated tasks as
12
+ `$ --isolated screen -- docker run …`, so `$ --status` reported
13
+ `options / isolated screen` — a screen wrapper around a raw `docker run`, not
14
+ the native Docker backend. Hive Mind now builds
15
+ `$ --isolated docker --image <img> [--privileged] --shell sh … --detached --session <uuid> -- '<cmd>'`,
16
+ so start-command owns the container lifecycle and `--status` reports real
17
+ Docker isolation.
18
+ 2. **The 30+ GB image was re-downloaded for every task.** The bot runs inside a
19
+ DinD container whose nested `dockerd` starts with an empty image store. box
20
+ can seed that daemon from the host (host-image passthrough), but only when the
21
+ host Docker socket is bind-mounted — and when it isn't, passthrough is a
22
+ _silent_ no-op, so the first isolated task pulled the whole image from the
23
+ registry. Hive Mind now runs a startup preflight (`preflightDockerIsolation`)
24
+ that probes the nested daemon and, when the image is absent, prints the exact
25
+ remediation (mount `/var/run/docker.sock` + set `DIND_HOST_PASSTHROUGH_IMAGES`,
26
+ or run `scripts/preload-dind-isolation-image.mjs`). The production deploy
27
+ script was the real root cause — its `docker run` never mounted the host
28
+ socket — and has been fixed to pass `-v /var/run/docker.sock:…:ro` plus the
29
+ allowlist.
30
+
31
+ Also filed the silent-passthrough footgun upstream as link-foundation/box#102
32
+ (warn when an allowlist is set but no socket is mounted) — **now fixed and shipped
33
+ in box v2.3.2** — and bumped this repo's base images from `konard/box:2.3.1` /
34
+ `konard/box-dind:2.3.1` to `2.3.2` so the upstream warning ships at the source.
35
+ Added a deep case study with the full reproduction, timeline, and root-cause
36
+ analysis under `docs/case-studies/issue-1914`.
37
+
3
38
  ## 1.78.8
4
39
 
5
40
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.78.8",
3
+ "version": "1.78.9",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -32,13 +32,21 @@ const TERMINAL_SESSION_STATUSES = new Set(['executed', 'completed', 'failed', 'c
32
32
  const HIVE_MIND_IMAGE_REPO = 'konard/hive-mind';
33
33
  const HIVE_MIND_DIND_IMAGE_REPO = 'konard/hive-mind-dind';
34
34
  const DEFAULT_HIVE_MIND_IMAGE_TAG = 'latest';
35
- // Docker's `--pull` accepts these policies. We only emit the flag when an
36
- // operator explicitly opts in; otherwise Docker's own default ("missing")
37
- // applies and `docker run` reuses any locally present image. See issue #1879.
38
- const VALID_DOCKER_PULL_POLICIES = new Set(['always', 'missing', 'never']);
39
- const DOCKER_ISOLATION_TRACKING_BACKEND = 'screen';
40
35
  const DOCKER_CONTAINER_HOME = '/home/box';
41
- const DOCKER_CONTAINER_PREFIX = 'hive-mind-isolation';
36
+ // Default path where the host Docker socket is bind-mounted inside a DinD
37
+ // container so box's host-image passthrough can copy host images into the
38
+ // nested daemon. Matches box's own DIND_HOST_DOCKER_SOCK default. The deploy
39
+ // must mount it (`-v /var/run/docker.sock:/var/run/host-docker.sock:ro`) or the
40
+ // nested daemon starts empty and the first isolated task pulls the full,
41
+ // multi-gigabyte image. See issue #1914.
42
+ const DEFAULT_HOST_DOCKER_SOCK = '/var/run/host-docker.sock';
43
+ // Force a POSIX shell for the inner command of Docker-isolated tasks. solve/
44
+ // hive/task live on the image's baked-in PATH, so `sh -c` resolves them without
45
+ // needing a login shell. Forcing the shell (instead of start's 'auto') also
46
+ // skips start's shell-detection probe, which would otherwise `docker run` a
47
+ // throwaway container — booting the dind image's dockerd entrypoint — purely to
48
+ // check whether bash exists. See issue #1914.
49
+ const DOCKER_ISOLATION_SHELL = 'sh';
42
50
 
43
51
  function normalizeProcessIds(value) {
44
52
  if (!value || typeof value !== 'object') return {};
@@ -62,19 +70,10 @@ function shellQuote(value) {
62
70
  return `'${stringValue.replaceAll("'", "'\\''")}'`;
63
71
  }
64
72
 
65
- function shellDoubleQuote(value) {
66
- return `"${String(value).replaceAll('\\', '\\\\').replaceAll('"', '\\"').replaceAll('$', '\\$').replaceAll('`', '\\`')}"`;
67
- }
68
-
69
73
  function buildShellCommand(command, args = []) {
70
74
  return [command, ...args].map(shellQuote).join(' ');
71
75
  }
72
76
 
73
- function makeDockerContainerName(sessionId) {
74
- const normalizedSession = String(sessionId || crypto.randomUUID()).replace(/[^a-zA-Z0-9_.-]/g, '-');
75
- return `${DOCKER_CONTAINER_PREFIX}-${normalizedSession}`;
76
- }
77
-
78
77
  function shouldRunPrivilegedDockerIsolation(image, env = process.env) {
79
78
  return String(env.HIVE_MIND_IMAGE_VARIANT || '').toLowerCase() === 'dind' || String(image || '').includes('hive-mind-dind');
80
79
  }
@@ -117,20 +116,15 @@ export function getDockerIsolationImage({ env = process.env } = {}) {
117
116
  }
118
117
 
119
118
  /**
120
- * Resolve the Docker `--pull` policy for isolated tasks.
121
- *
122
- * Returns one of `always` | `missing` | `never`, or `null` when unset (in which
123
- * case the `--pull` flag is omitted and Docker's default applies). Operators set
124
- * `HIVE_MIND_DOCKER_ISOLATION_PULL=never` to force reuse of an image already
125
- * present in the (possibly nested) daemon and fail fast instead of silently
126
- * re-downloading it. Invalid values are ignored. See issue #1879.
119
+ * Resolve the path where the host Docker socket is expected to be mounted inside
120
+ * a DinD container. box's entrypoint reads this socket to copy host images into
121
+ * the nested daemon (host-image passthrough). Defaults to
122
+ * `/var/run/host-docker.sock` and can be overridden with `DIND_HOST_DOCKER_SOCK`
123
+ * (the same variable box honors). See issue #1914.
127
124
  */
128
- export function getDockerIsolationPullPolicy({ env = process.env } = {}) {
129
- const raw = String(env.HIVE_MIND_DOCKER_ISOLATION_PULL || '')
130
- .trim()
131
- .toLowerCase();
132
- if (!raw) return null;
133
- return VALID_DOCKER_PULL_POLICIES.has(raw) ? raw : null;
125
+ export function resolveHostDockerSock({ env = process.env } = {}) {
126
+ const explicit = String(env.DIND_HOST_DOCKER_SOCK || '').trim();
127
+ return explicit || DEFAULT_HOST_DOCKER_SOCK;
134
128
  }
135
129
 
136
130
  /**
@@ -157,46 +151,63 @@ export function getDockerIsolationAuthMounts({ tool = 'claude', env = process.en
157
151
  }
158
152
 
159
153
  /**
160
- * Build the shell command executed inside a start-command wrapper session for
161
- * Docker isolation. The wrapper remains a start-command session so Telegram can
162
- * keep using the same status/log lifecycle while Hive Mind controls image and
163
- * auth mounts directly.
154
+ * Resolve the image-variant marker recorded inside the isolated container.
155
+ * A `hive-mind-dind` image is always the dind variant; otherwise fall back to
156
+ * the parent's `HIVE_MIND_IMAGE_VARIANT` (or `regular`).
164
157
  */
165
- export function buildDockerIsolationCommand(command, args = [], options = {}) {
158
+ function resolveImageVariant(image, env = process.env) {
159
+ return image.includes('hive-mind-dind') ? 'dind' : env.HIVE_MIND_IMAGE_VARIANT || 'regular';
160
+ }
161
+
162
+ /**
163
+ * Build the `$` (start-command) arguments that launch a Docker-isolated task
164
+ * using start-command's NATIVE Docker backend (`$ --isolated docker`).
165
+ *
166
+ * Issue #1914: earlier versions wrapped a hand-rolled `docker run` inside a
167
+ * `screen` session (`$ --isolated screen -- docker run …`). That was *screen*
168
+ * isolation merely shelling out to Docker — not Docker isolation. We now hand
169
+ * the container lifecycle to start-command itself and only contribute the
170
+ * pieces Hive Mind must control: which image to run, privileged mode for the
171
+ * dind variant, the environment markers, and the credential mounts scoped to
172
+ * the selected tool.
173
+ *
174
+ * start-command's Docker backend reuses a locally present image and only pulls
175
+ * when it is missing (`docker run` with Docker's default "missing" pull
176
+ * policy), so a host image seeded into the nested daemon via box passthrough is
177
+ * reused instead of re-downloaded — no `--pull` plumbing required (issue #1879).
178
+ */
179
+ export function buildDockerIsolationStartArgs(command, args = [], options = {}) {
166
180
  const { sessionId, tool = 'claude', env = process.env, homeDir = os.homedir(), existsSync = fs.existsSync } = options;
167
181
  const image = getDockerIsolationImage({ env });
168
- const innerCommand = buildShellCommand(command, args);
169
- const dockerArgs = ['docker', 'run', '--rm'];
170
-
171
- // Reuse a locally present image instead of re-downloading it when the
172
- // operator opts in. Omitted by default so Docker's "missing" policy applies.
173
- const pullPolicy = getDockerIsolationPullPolicy({ env });
174
- if (pullPolicy) {
175
- dockerArgs.push('--pull', pullPolicy);
176
- }
177
182
 
178
- dockerArgs.push('--name', makeDockerContainerName(sessionId), '--workdir', DOCKER_CONTAINER_HOME, '-e', `HOME=${DOCKER_CONTAINER_HOME}`, '-e', `HIVE_MIND_PARENT_SESSION_ID=${sessionId || ''}`);
183
+ const startArgs = ['--isolated', 'docker', '--image', image];
179
184
 
180
185
  if (shouldRunPrivilegedDockerIsolation(image, env)) {
181
- dockerArgs.push('--privileged');
186
+ startArgs.push('--privileged');
182
187
  }
183
188
 
184
- const imageVariant = image.includes('hive-mind-dind') ? 'dind' : env.HIVE_MIND_IMAGE_VARIANT || 'regular';
185
- dockerArgs.push('-e', `HIVE_MIND_IMAGE_VARIANT=${imageVariant}`);
189
+ // Force the inner shell so start-command does not probe the image to detect
190
+ // one (see DOCKER_ISOLATION_SHELL).
191
+ startArgs.push('--shell', DOCKER_ISOLATION_SHELL);
192
+
193
+ // The image already sets HOME=/home/box and WORKDIR /home/box; pass HOME
194
+ // explicitly anyway so the credential mounts under /home/box resolve even if
195
+ // a future image forgets to. start-command has no --workdir flag, so the
196
+ // working directory comes from the image's WORKDIR.
197
+ startArgs.push('-e', `HOME=${DOCKER_CONTAINER_HOME}`, '-e', `HIVE_MIND_PARENT_SESSION_ID=${sessionId || ''}`, '-e', `HIVE_MIND_IMAGE_VARIANT=${resolveImageVariant(image, env)}`);
186
198
 
187
199
  for (const mount of getDockerIsolationAuthMounts({ tool, env, homeDir, existsSync })) {
188
- dockerArgs.push('--volume', `${mount.source}:${mount.target}`);
200
+ startArgs.push('--volume', `${mount.source}:${mount.target}`);
189
201
  }
190
202
 
191
- dockerArgs.push(image, 'bash', '-lc');
192
-
193
- return [...dockerArgs.map(shellQuote), shellDoubleQuote(innerCommand)].join(' ');
203
+ startArgs.push('--detached', '--session', sessionId, '--', buildShellCommand(command, args));
204
+ return startArgs;
194
205
  }
195
206
 
196
207
  export function buildStartCommandArgs(command, args = [], options = {}) {
197
208
  const { backend, sessionId } = options;
198
209
  if (backend === 'docker') {
199
- return ['--isolated', DOCKER_ISOLATION_TRACKING_BACKEND, '--detached', '--session', sessionId, '--', buildDockerIsolationCommand(command, args, options)];
210
+ return buildDockerIsolationStartArgs(command, args, { ...options, sessionId });
200
211
  }
201
212
  return ['--isolated', backend, '--detached', '--session', sessionId, '--', buildShellCommand(command, args)];
202
213
  }
@@ -413,10 +424,11 @@ export async function executeWithIsolation(command, args, options = {}) {
413
424
  if (backend === 'docker') {
414
425
  const env = options.env || process.env;
415
426
  const image = getDockerIsolationImage({ env });
416
- const pullPolicy = getDockerIsolationPullPolicy({ env });
417
427
  const mounts = getDockerIsolationAuthMounts({ tool: options.tool, env, homeDir: options.homeDir || os.homedir(), existsSync: options.existsSync || fs.existsSync });
428
+ console.log('[VERBOSE] isolation-runner: Docker isolation backend: native ($ --isolated docker)');
418
429
  console.log(`[VERBOSE] isolation-runner: Docker isolation image: ${image}`);
419
- console.log(`[VERBOSE] isolation-runner: Docker isolation pull policy: ${pullPolicy || '(docker default: missing — reuse local image if present)'}`);
430
+ console.log(`[VERBOSE] isolation-runner: Docker isolation privileged: ${shouldRunPrivilegedDockerIsolation(image, env)}`);
431
+ console.log('[VERBOSE] isolation-runner: Docker isolation pull: reuse local image if present, pull only if missing (start-command default)');
420
432
  console.log(`[VERBOSE] isolation-runner: Docker isolation mounts: ${mounts.map(m => m.target).join(', ') || '(none)'}`);
421
433
  }
422
434
  }
@@ -553,12 +565,126 @@ export async function checkScreenSessionRunning(sessionName, verbose = false) {
553
565
  }
554
566
  }
555
567
 
568
+ /**
569
+ * Check whether the Docker container backing a native `$ --isolated docker`
570
+ * session is still running.
571
+ *
572
+ * start-command names the container after the `--session` value, so the
573
+ * (possibly nested) Docker daemon can be queried directly. This is the
574
+ * native-Docker analogue of the `screen -ls` fallback: it is consulted only
575
+ * when `$ --status` has no usable record. The bot runs inside a Docker-in-
576
+ * Docker container, so `docker` here talks to the same nested daemon that
577
+ * start-command launched the task container on. See issue #1914.
578
+ *
579
+ * @param {string} containerName - Container name (the session UUID)
580
+ * @param {boolean} [verbose] - Enable verbose logging
581
+ * @returns {Promise<boolean>} True if the container exists and is running
582
+ */
583
+ export async function checkDockerContainerRunning(containerName, verbose = false) {
584
+ try {
585
+ const result = await $({ mirror: false })`docker inspect -f ${'{{.State.Running}}'} ${containerName}`;
586
+ const running = (result.stdout?.toString() || '').trim() === 'true';
587
+ if (verbose) {
588
+ console.log(`[VERBOSE] isolation-runner: docker inspect for '${containerName}': ${running ? 'running' : 'not running'}`);
589
+ }
590
+ return running;
591
+ } catch {
592
+ // `docker inspect` exits non-zero when no such container exists.
593
+ return false;
594
+ }
595
+ }
596
+
597
+ /**
598
+ * Check whether an image is present in the local Docker daemon.
599
+ *
600
+ * Inside a Docker-in-Docker container "local" is the NESTED daemon. `docker
601
+ * image inspect` exits 0 only when the image exists, so a non-zero exit (or a
602
+ * missing docker binary) is treated as absent. Used by the startup preflight to
603
+ * predict whether the first isolated task will trigger a full image pull.
604
+ * See issue #1914.
605
+ *
606
+ * @param {string} image - Image reference (repo:tag)
607
+ * @param {boolean} [verbose] - Enable verbose logging
608
+ * @returns {Promise<boolean>} True if the image is present locally
609
+ */
610
+ export async function checkDockerImagePresent(image, verbose = false) {
611
+ try {
612
+ await $({ mirror: false })`docker image inspect ${image}`;
613
+ if (verbose) console.log(`[VERBOSE] isolation-runner: docker image inspect '${image}': present`);
614
+ return true;
615
+ } catch {
616
+ if (verbose) console.log(`[VERBOSE] isolation-runner: docker image inspect '${image}': absent`);
617
+ return false;
618
+ }
619
+ }
620
+
621
+ /**
622
+ * Startup preflight for `--isolation docker`.
623
+ *
624
+ * The bot usually runs inside a Docker-in-Docker container whose NESTED daemon
625
+ * starts with an empty image store. If the isolation image is not already in
626
+ * that nested daemon, the first isolated task makes `docker run` pull a fresh
627
+ * copy — which for the Hive Mind images is multiple gigabytes (issues #1914,
628
+ * #1879). box can seed the nested daemon automatically (host-image passthrough)
629
+ * but only when the host Docker socket is bind-mounted into the container; if it
630
+ * is not mounted, passthrough is a SILENT no-op and the re-download is the first
631
+ * symptom an operator sees.
632
+ *
633
+ * This preflight makes that condition observable at startup instead: it reports
634
+ * whether the image is already present (reuse, no pull) and, when it is absent,
635
+ * warns loudly with the exact remediation (mount the host socket / set the
636
+ * passthrough allowlist, or run the preload script). It never throws and never
637
+ * blocks startup — a misconfigured passthrough should degrade to a slow first
638
+ * task, not a dead bot.
639
+ *
640
+ * @param {Object} [options]
641
+ * @param {Object} [options.env] - Environment (defaults to process.env)
642
+ * @param {Function} [options.existsSync] - fs.existsSync (injectable for tests)
643
+ * @param {boolean} [options.verbose] - Enable verbose logging
644
+ * @param {Object} [options.logger] - Logger with .log/.warn (defaults to console)
645
+ * @param {Function} [options.checkImagePresent] - Image-presence probe (injectable for tests)
646
+ * @returns {Promise<{image: string, sock: string, socketMounted: boolean, imagePresent: boolean, isDind: boolean, ok: boolean, warnings: string[]}>}
647
+ */
648
+ export async function preflightDockerIsolation(options = {}) {
649
+ const { env = process.env, existsSync = fs.existsSync, verbose = false, logger = console, checkImagePresent = checkDockerImagePresent } = options;
650
+
651
+ const image = getDockerIsolationImage({ env });
652
+ const sock = resolveHostDockerSock({ env });
653
+ const isDind = shouldRunPrivilegedDockerIsolation(image, env);
654
+ const socketMounted = Boolean(existsSync(sock));
655
+ const imagePresent = Boolean(await checkImagePresent(image, verbose));
656
+
657
+ const result = { image, sock, socketMounted, imagePresent, isDind, ok: imagePresent, warnings: [] };
658
+ const info = typeof logger.log === 'function' ? logger.log.bind(logger) : () => {};
659
+ const warn = typeof logger.warn === 'function' ? logger.warn.bind(logger) : info;
660
+
661
+ if (imagePresent) {
662
+ info(`✅ Docker isolation image '${image}' is already present locally — isolated tasks reuse it (no multi-GB pull). See issue #1914.`);
663
+ return result;
664
+ }
665
+
666
+ // Image absent: the first isolated task will pull the full image. Explain the
667
+ // most likely cause and the exact fix instead of letting the operator first
668
+ // discover it as a surprise multi-gigabyte download mid-task.
669
+ const preload = `node scripts/preload-dind-isolation-image.mjs --image ${image}`;
670
+ if (isDind && !socketMounted) {
671
+ result.warnings.push(`Docker isolation image '${image}' is NOT in the nested Docker daemon and the host Docker socket is not mounted at ${sock}. ` + `box host-image passthrough cannot seed the nested daemon, so the FIRST isolated task will pull the full image (the Hive Mind images are multiple GB). ` + `Fix the deployment: add '-v /var/run/docker.sock:${sock}:ro' and '-e DIND_HOST_PASSTHROUGH_IMAGES="konard/hive-mind konard/hive-mind-dind"' to the bot container's 'docker run', or seed it now with: ${preload}`);
672
+ } else if (isDind && socketMounted) {
673
+ result.warnings.push(`Docker isolation image '${image}' is NOT in the nested Docker daemon even though the host Docker socket is mounted at ${sock}. ` + `box host-image passthrough may have skipped it (check DIND_HOST_PASSTHROUGH mode, the DIND_HOST_PASSTHROUGH_IMAGES allowlist, and that the host actually has '${image}' with a registry digest). ` + `The first isolated task will pull the full image. Seed it now with: ${preload}`);
674
+ } else {
675
+ result.warnings.push(`Docker isolation image '${image}' is not present locally; the first isolated task will pull it. ` + `If this host already has it under a different tag, pin HIVE_MIND_DOCKER_ISOLATION_IMAGE_TAG, or seed it with: ${preload}`);
676
+ }
677
+ for (const w of result.warnings) warn(`⚠️ ${w}`);
678
+ return result;
679
+ }
680
+
556
681
  /**
557
682
  * Check if an isolated session is still running.
558
- * Uses `$ --status` first, with a `screen -ls` fallback for screen-backend
559
- * sessions to work around start-command UUID mismatch issues.
683
+ * Uses `$ --status` first, with a backend-specific fallback (screen -ls for
684
+ * screen, docker inspect for docker) to work around start-command UUID
685
+ * mismatch issues.
560
686
  *
561
- * @param {string} sessionId - UUID of the session (used for both $ --status and screen session name)
687
+ * @param {string} sessionId - UUID of the session (also the screen session name / docker container name)
562
688
  * @param {Object} [options] - Options
563
689
  * @param {string} [options.backend] - Isolation backend ('screen', 'tmux', 'docker')
564
690
  * @param {boolean} [options.verbose] - Enable verbose logging
@@ -579,19 +705,29 @@ export async function isSessionRunning(sessionId, options = {}) {
579
705
  }
580
706
  }
581
707
 
582
- // Fallback: for screen-backed sessions, check screen -ls directly.
583
- // Docker isolation is also tracked through a screen wrapper so Hive Mind can
584
- // control image selection and credential mounts while preserving logs/status.
585
- // Only use this when $ --status has no usable record. This works around
586
- // older start-command bugs where:
587
- // 1. $ --status can't find session by --session name (only by internal UUID)
588
- // See: https://github.com/link-assistant/hive-mind/issues/1545
589
- if ((backend === 'screen' || backend === 'docker') && shouldFallbackToScreenStatus(result)) {
590
- const screenRunning = await checkScreenSessionRunning(sessionId, verbose);
591
- if (screenRunning && verbose) {
592
- console.log(`[VERBOSE] isolation-runner: $ --status says not running, but screen -ls confirms session '${sessionId}' is still active`);
708
+ // Fallback used only when `$ --status` has no usable record. This works
709
+ // around older start-command bugs where `$ --status` can't resolve a session
710
+ // by its --session name (only by an internal UUID). See issue #1545.
711
+ // - screen sessions: confirm via `screen -ls`.
712
+ // - docker sessions: confirm via `docker inspect` on the container that
713
+ // start-command named after the session UUID. Native Docker isolation
714
+ // (issue #1914) is a real container, not a screen wrapper, so the screen
715
+ // check no longer applies to it.
716
+ if (shouldFallbackToScreenStatus(result)) {
717
+ if (backend === 'screen') {
718
+ const screenRunning = await checkScreenSessionRunning(sessionId, verbose);
719
+ if (screenRunning && verbose) {
720
+ console.log(`[VERBOSE] isolation-runner: $ --status says not running, but screen -ls confirms session '${sessionId}' is still active`);
721
+ }
722
+ return screenRunning;
723
+ }
724
+ if (backend === 'docker') {
725
+ const containerRunning = await checkDockerContainerRunning(sessionId, verbose);
726
+ if (containerRunning && verbose) {
727
+ console.log(`[VERBOSE] isolation-runner: $ --status says not running, but docker inspect confirms container '${sessionId}' is still active`);
728
+ }
729
+ return containerRunning;
593
730
  }
594
- return screenRunning;
595
731
  }
596
732
 
597
733
  return false;
@@ -180,6 +180,16 @@ if (ISOLATION_BACKEND) {
180
180
  }
181
181
  console.log(`🔒 Isolation mode enabled: ${ISOLATION_BACKEND} (experimental)`);
182
182
  isolationRunner = await import('./isolation-runner.lib.mjs');
183
+ // For docker isolation, run a startup preflight so a missing/un-passed-through
184
+ // image surfaces as a loud, actionable warning instead of a surprise multi-GB
185
+ // pull on the first isolated task (issues #1914, #1879). Never throws.
186
+ if (ISOLATION_BACKEND === 'docker' && typeof isolationRunner.preflightDockerIsolation === 'function') {
187
+ try {
188
+ await isolationRunner.preflightDockerIsolation({ verbose: VERBOSE });
189
+ } catch (preflightError) {
190
+ console.error(`⚠️ Docker isolation preflight failed (continuing): ${preflightError?.message || preflightError}`);
191
+ }
192
+ }
183
193
  }
184
194
 
185
195
  // Validate solve overrides early using solve's yargs config