@link-assistant/hive-mind 2.0.2 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,51 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 2.0.3
4
+
5
+ ### Patch Changes
6
+
7
+ - 40fbf3d: fix(isolation): mount git identity into docker-isolated containers and stop trusting premature terminal status (#1939)
8
+
9
+ A `solve` task launched with `--isolation docker` inside a Docker-in-Docker host
10
+ (`konard/hive-mind-dind:2.0.2`) failed at the system-check stage with
11
+ `❌ Git identity not configured`, even though `gh` was fully authenticated
12
+ (account `konard`). The captured terminal log shows the native start-command
13
+ (`$`) invocation mounting only `~/.config/gh`, `~/.claude`, and `~/.claude.json`
14
+ — **no git identity** — so `git config user.name`/`user.email` were unset inside
15
+ the container and `solve` aborted before doing any work.
16
+
17
+ Root cause: `getDockerIsolationAuthMounts` (`src/isolation-runner.lib.mjs`)
18
+ mounted `gh` and the per-tool credentials but never the git identity. `gh`
19
+ authentication is not a git identity. The fix mounts the host git identity
20
+ (`~/.gitconfig` and the XDG `~/.config/git`, honoring `GIT_CONFIG_GLOBAL` /
21
+ `XDG_CONFIG_HOME`) for **every** tool, alongside `gh`, so the fix applies to all
22
+ isolation callers at once. A new self-healing preflight,
23
+ `ensureHostGitIdentityForIsolation`, gives the mount something to mount: when the
24
+ host has no git identity it derives one from the authenticated `gh` account
25
+ (`gh-setup-git-identity` / `repairGitIdentity`) and, if that is impossible, emits
26
+ one actionable warning naming the exact downstream failure.
27
+
28
+ The same run also exposed a second problem: `$ --list` reported the detached
29
+ session as `status executed` with `exitCode -1` (and no `containerId`) while the
30
+ container was still running, masking the live container and its real exit code.
31
+ `isUnknownDockerExitCode` plus a docker-only cross-check in `isSessionRunning`
32
+ and `getIsolationSessionState` (`src/session-monitor.lib.mjs`) keep an ambiguous
33
+ `terminal + -1` docker session "running" until `docker inspect` confirms the
34
+ container has actually exited; real exit codes and non-docker backends are
35
+ unaffected. A verbose post-launch diagnostic now records `$ --status`, container
36
+ state, and local image presence so the next iteration can confirm the premature
37
+ status and the image re-pull from data.
38
+
39
+ The premature-terminal-status behaviour was reported upstream to
40
+ link-foundation/start and fixed there in `start-command@0.29.1`
41
+ (link-foundation/start#136); `Dockerfile` and `Dockerfile.dind` now pin
42
+ `start-command@0.29.1` so the fixed `$` binary ships in the images, while the
43
+ downstream cross-check stays as defense-in-depth for older hosts.
44
+
45
+ Added `tests/test-issue-1939-docker-isolation.mjs` (25 assertions) and a full
46
+ case study with timeline, root-cause analysis, and the captured logs under
47
+ `docs/case-studies/issue-1939`.
48
+
3
49
  ## 2.0.2
4
50
 
5
51
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "2.0.2",
3
+ "version": "2.0.3",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -53,6 +53,15 @@ const DOCKER_ISOLATION_SHELL = 'sh';
53
53
  // less headroom than this cannot safely pull one. Diagnostic only — never
54
54
  // blocks startup. See issue #1914.
55
55
  const DOCKER_ISOLATION_LOW_DISK_GIB = 40;
56
+ // Sentinel start-command's detached docker logger records when it cannot capture
57
+ // the container's real exit code. A terminal `$ --status` carrying this value is
58
+ // ambiguous — the container may still be running — so we cross-check it against
59
+ // a live `docker inspect` before concluding the session finished. See #1939.
60
+ // The upstream emission of this premature sentinel was fixed in
61
+ // start-command 0.29.1 (link-foundation/start#136), which the Hive Mind images
62
+ // now pin; this cross-check is retained as defense-in-depth so an older `$` on
63
+ // an operator's PATH cannot resurrect the bug.
64
+ const DOCKER_UNKNOWN_EXIT_CODE = -1;
56
65
 
57
66
  function normalizeProcessIds(value) {
58
67
  if (!value || typeof value !== 'object') return {};
@@ -137,9 +146,14 @@ export function resolveHostDockerSock({ env = process.env } = {}) {
137
146
  /**
138
147
  * Build host auth mounts for a Docker-isolated task.
139
148
  *
140
- * GitHub auth is mounted for every task because solve/hive/task need gh. Tool
141
- * credentials are deliberately scoped: Codex sessions do not receive Claude
142
- * files and Claude sessions do not receive Codex files.
149
+ * GitHub auth is mounted for every task because solve/hive/task need gh. Git
150
+ * identity (`~/.gitconfig` and the XDG `~/.config/git` directory) is mounted for
151
+ * every task too: it is tool-agnostic and `solve` aborts early with "Git
152
+ * identity not configured" when `user.name`/`user.email` are absent, so a child
153
+ * container that authenticates with gh but inherits no git identity still cannot
154
+ * commit. See issue #1939. Tool credentials are deliberately scoped: Codex
155
+ * sessions do not receive Claude files and Claude sessions do not receive Codex
156
+ * files.
143
157
  */
144
158
  export function getDockerIsolationAuthMounts({ tool = 'claude', env = process.env, homeDir = os.homedir(), existsSync = fs.existsSync } = {}) {
145
159
  const mounts = [];
@@ -147,6 +161,14 @@ export function getDockerIsolationAuthMounts({ tool = 'claude', env = process.en
147
161
 
148
162
  maybeAddMount(mounts, env.GH_CONFIG_DIR || path.join(homeDir, '.config', 'gh'), path.join(DOCKER_CONTAINER_HOME, '.config', 'gh'), existsSync);
149
163
 
164
+ // Git identity (tool-agnostic, required for commits). Honor the same env vars
165
+ // git itself reads for an alternate global config location (GIT_CONFIG_GLOBAL)
166
+ // and the XDG base dir, falling back to the conventional `~/.gitconfig` and
167
+ // `~/.config/git`. Missing host paths are skipped, so a container image that
168
+ // already bakes a git identity is left untouched. See issue #1939.
169
+ maybeAddMount(mounts, env.GIT_CONFIG_GLOBAL || path.join(homeDir, '.gitconfig'), path.join(DOCKER_CONTAINER_HOME, '.gitconfig'), existsSync);
170
+ maybeAddMount(mounts, env.XDG_CONFIG_HOME ? path.join(env.XDG_CONFIG_HOME, 'git') : path.join(homeDir, '.config', 'git'), path.join(DOCKER_CONTAINER_HOME, '.config', 'git'), existsSync);
171
+
150
172
  if (normalizedTool === 'codex') {
151
173
  maybeAddMount(mounts, path.join(homeDir, '.codex'), path.join(DOCKER_CONTAINER_HOME, '.codex'), existsSync);
152
174
  } else if (normalizedTool === 'claude') {
@@ -365,6 +387,24 @@ export function isTerminalSessionStatus(status) {
365
387
  return TERMINAL_SESSION_STATUSES.has(String(status || '').toLowerCase());
366
388
  }
367
389
 
390
+ /**
391
+ * Decide whether a detached-docker exit code is "unknown" (not a real result).
392
+ *
393
+ * start-command's detached docker logger writes the exit-code footer only after
394
+ * `docker logs -f` returns, capturing the real code via `docker inspect`. When
395
+ * it cannot capture one it records the sentinel `-1`. A `$ --status` that
396
+ * reports a terminal status ("executed") while still carrying that sentinel — or
397
+ * no exit code at all — is therefore ambiguous: the container may actually still
398
+ * be running. Callers treat such a status as provisional and cross-check the
399
+ * live container before declaring the session finished. See issue #1939.
400
+ *
401
+ * @param {number|null|undefined} exitCode
402
+ * @returns {boolean} True when the exit code carries no real result.
403
+ */
404
+ export function isUnknownDockerExitCode(exitCode) {
405
+ return exitCode === null || exitCode === undefined || Number(exitCode) === DOCKER_UNKNOWN_EXIT_CODE;
406
+ }
407
+
368
408
  export function shouldFallbackToScreenStatus(statusResult) {
369
409
  return !statusResult?.exists || !statusResult?.status;
370
410
  }
@@ -383,6 +423,41 @@ async function findStartCommandBinary() {
383
423
  }
384
424
  }
385
425
 
426
+ /**
427
+ * Verbose post-launch diagnostics for a native docker-isolated session.
428
+ *
429
+ * Logs, side by side: what `$ --status` reports (status + exit code) and what
430
+ * the nested Docker daemon reports for the container (running state + image
431
+ * presence). The two together make problems #1 and #2 of issue #1939
432
+ * observable on the next run — a status of "executed"/-1 while `docker inspect`
433
+ * says the container is running is the premature-completion symptom (problem
434
+ * #1); an isolation image that is absent right after launch points at a missing
435
+ * host-image passthrough that forced a re-pull (problem #2). Best-effort: any
436
+ * probe failure is swallowed so diagnostics never disrupt the task.
437
+ *
438
+ * @param {string} sessionId - Session UUID (also the container name)
439
+ * @param {Object} [env] - Environment used to resolve the isolation image
440
+ */
441
+ async function logDockerIsolationPostLaunchDiagnostics(sessionId, env = process.env) {
442
+ try {
443
+ const status = await querySessionStatus(sessionId, false);
444
+ console.log(`[VERBOSE] isolation-runner: Docker post-launch $ --status: status=${status.status ?? '(none)'} exitCode=${status.exitCode ?? '(none)'} exists=${status.exists} (issue #1939)`);
445
+ const containerRunning = await checkDockerContainerRunning(sessionId, false);
446
+ console.log(`[VERBOSE] isolation-runner: Docker post-launch container '${sessionId}' running=${containerRunning} (issue #1939)`);
447
+ if (status.exists && isTerminalSessionStatus(status.status) && isUnknownDockerExitCode(status.exitCode) && containerRunning) {
448
+ console.log(`[VERBOSE] isolation-runner: ⚠️ Docker session '${sessionId}' reports a terminal status with the unknown exit-code sentinel while its container is still running — premature-completion symptom (issue #1939, problem #1)`);
449
+ }
450
+ const image = getDockerIsolationImage({ env });
451
+ const imagePresent = await checkDockerImagePresent(image, false);
452
+ console.log(`[VERBOSE] isolation-runner: Docker post-launch isolation image '${image}' present=${imagePresent} (issue #1939)`);
453
+ if (!imagePresent) {
454
+ console.log(`[VERBOSE] isolation-runner: ⚠️ Docker isolation image '${image}' is absent right after launch — host-image passthrough likely did not seed the nested daemon, so the task re-pulled it (issue #1939, problem #2)`);
455
+ }
456
+ } catch {
457
+ // Diagnostics are best-effort; never let a probe failure affect the task.
458
+ }
459
+ }
460
+
386
461
  /**
387
462
  * Execute a command with isolation via `$` from start-command
388
463
  *
@@ -437,6 +512,8 @@ export async function executeWithIsolation(command, args, options = {}) {
437
512
  console.log(`[VERBOSE] isolation-runner: Docker isolation privileged: ${shouldRunPrivilegedDockerIsolation(image, env)}`);
438
513
  console.log('[VERBOSE] isolation-runner: Docker isolation pull: reuse local image if present, pull only if missing (start-command default)');
439
514
  console.log(`[VERBOSE] isolation-runner: Docker isolation mounts: ${mounts.map(m => m.target).join(', ') || '(none)'}`);
515
+ const gitIdentityMounted = mounts.some(m => m.target === path.join(DOCKER_CONTAINER_HOME, '.gitconfig') || m.target === path.join(DOCKER_CONTAINER_HOME, '.config', 'git'));
516
+ console.log(`[VERBOSE] isolation-runner: Docker isolation git identity propagated: ${gitIdentityMounted ? 'yes' : 'no (host ~/.gitconfig missing — child may fail with "Git identity not configured", issue #1939)'}`);
440
517
  }
441
518
  }
442
519
 
@@ -448,6 +525,14 @@ export async function executeWithIsolation(command, args, options = {}) {
448
525
  if (result.error) stream(`[VERBOSE] isolation-runner: Error: ${result.error}`);
449
526
  }
450
527
 
528
+ // Issue #1939: capture the freshly-launched docker session's reported status
529
+ // and the live container state together, so the next iteration has the data to
530
+ // diagnose a premature "executed/-1" status (problem #1) or a surprise image
531
+ // re-pull (problem #2). Best-effort and verbose-only — never affects the run.
532
+ if (verbose && backend === 'docker' && result.success) {
533
+ await logDockerIsolationPostLaunchDiagnostics(sessionId, options.env || process.env);
534
+ }
535
+
451
536
  if (result.success) {
452
537
  return {
453
538
  success: true,
@@ -795,6 +880,89 @@ export async function preflightDockerIsolation(options = {}) {
795
880
  return result;
796
881
  }
797
882
 
883
+ /**
884
+ * Host paths that, when present, propagate a git identity into a docker-isolated
885
+ * container via getDockerIsolationAuthMounts. Honors the same env vars git reads
886
+ * for an alternate global config (GIT_CONFIG_GLOBAL) and the XDG base dir, then
887
+ * the conventional `~/.gitconfig` and `~/.config/git`. See issue #1939.
888
+ */
889
+ export function resolveHostGitIdentityPaths({ env = process.env, homeDir = os.homedir() } = {}) {
890
+ return [env.GIT_CONFIG_GLOBAL || path.join(homeDir, '.gitconfig'), env.XDG_CONFIG_HOME ? path.join(env.XDG_CONFIG_HOME, 'git') : path.join(homeDir, '.config', 'git')];
891
+ }
892
+
893
+ /**
894
+ * True when the host exposes a git identity that getDockerIsolationAuthMounts can
895
+ * mount into an isolated container. See issue #1939.
896
+ */
897
+ export function hostHasMountableGitIdentity({ env = process.env, homeDir = os.homedir(), existsSync = fs.existsSync } = {}) {
898
+ return resolveHostGitIdentityPaths({ env, homeDir }).some(p => Boolean(existsSync(p)));
899
+ }
900
+
901
+ /**
902
+ * Startup git-identity preflight for `--isolation docker`.
903
+ *
904
+ * A docker-isolated child container starts from a clean image and inherits the
905
+ * host's git identity ONLY through the mounted `~/.gitconfig`
906
+ * (getDockerIsolationAuthMounts). If the host has no git identity to mount, the
907
+ * child `solve` aborts with "Git identity not configured" even though gh is
908
+ * authenticated — the exact failure in issue #1939.
909
+ *
910
+ * This makes the deployment self-healing: when the host has no mountable git
911
+ * identity but `gh-setup-git-identity` is installed (the Hive Mind images bake
912
+ * it in) and gh is authenticated, it derives an identity from the gh account so
913
+ * the mount has something to propagate. The repair is idempotent — it runs only
914
+ * when no identity exists, so it never overwrites a configured one — and
915
+ * best-effort: any failure degrades to a loud, actionable warning rather than a
916
+ * thrown error. When neither a host identity nor a repair is possible, the
917
+ * warning tells the operator exactly how to fix it.
918
+ *
919
+ * @param {Object} [options]
920
+ * @param {Object} [options.env] - Environment (defaults to process.env)
921
+ * @param {string} [options.homeDir] - Home dir (injectable for tests)
922
+ * @param {Function} [options.existsSync] - fs.existsSync (injectable for tests)
923
+ * @param {Object} [options.logger] - Logger with .log/.warn (defaults to console)
924
+ * @param {Function} [options.repair] - repairGitIdentity-style probe (injectable for tests)
925
+ * @returns {Promise<{present: boolean, repaired: boolean, warnings: string[]}>}
926
+ */
927
+ export async function ensureHostGitIdentityForIsolation(options = {}) {
928
+ const { env = process.env, homeDir = os.homedir(), existsSync = fs.existsSync, logger = console, repair = null } = options;
929
+ const info = typeof logger.log === 'function' ? logger.log.bind(logger) : () => {};
930
+ const warn = typeof logger.warn === 'function' ? logger.warn.bind(logger) : info;
931
+ const result = { present: false, repaired: false, warnings: [] };
932
+
933
+ if (hostHasMountableGitIdentity({ env, homeDir, existsSync })) {
934
+ result.present = true;
935
+ info('✅ Host git identity present — docker-isolated tasks inherit it via the mounted ~/.gitconfig (issue #1939).');
936
+ return result;
937
+ }
938
+
939
+ // No mountable identity. Try to derive one from the authenticated gh account
940
+ // so the next isolated task does not fail with "Git identity not configured".
941
+ const repairFn =
942
+ repair ||
943
+ (async () => {
944
+ const gitLib = await import('./git.lib.mjs');
945
+ return gitLib.repairGitIdentity();
946
+ });
947
+ let repairOutcome = null;
948
+ try {
949
+ repairOutcome = await repairFn();
950
+ } catch (error) {
951
+ repairOutcome = { success: false, error: error?.message || String(error) };
952
+ }
953
+
954
+ if (repairOutcome?.success && hostHasMountableGitIdentity({ env, homeDir, existsSync })) {
955
+ result.present = true;
956
+ result.repaired = true;
957
+ info('✅ Host git identity was missing; derived it from the authenticated gh account via gh-setup-git-identity so docker-isolated tasks can mount it (issue #1939).');
958
+ return result;
959
+ }
960
+
961
+ result.warnings.push(`No host git identity (~/.gitconfig) to mount into docker-isolated containers, so isolated 'solve' tasks will fail with "Git identity not configured" even though gh is authenticated (issue #1939). ` + `Configure one on the bot host: run 'gh-setup-git-identity' (derives it from the authenticated gh account), set 'git config --global user.name/.email', or pass '--auto-gh-configuration-repair' to solve.` + (repairOutcome?.error ? ` Auto-repair attempt failed: ${repairOutcome.error}` : ''));
962
+ for (const w of result.warnings) warn(`⚠️ ${w}`);
963
+ return result;
964
+ }
965
+
798
966
  /**
799
967
  * Check if an isolated session is still running.
800
968
  * Uses `$ --status` first, with a backend-specific fallback (screen -ls for
@@ -818,6 +986,21 @@ export async function isSessionRunning(sessionId, options = {}) {
818
986
  return true;
819
987
  }
820
988
  if (isTerminalSessionStatus(result.status)) {
989
+ // Issue #1939: a native docker session can report a terminal status
990
+ // ("executed") while the container is still alive, carrying the unknown
991
+ // exit-code sentinel (-1) because start-command's detached logger marks
992
+ // the launcher process executed before the container exits. Trust the
993
+ // terminal status only when a real exit code was captured; otherwise
994
+ // cross-check the live container before declaring the session finished.
995
+ if (backend === 'docker' && isUnknownDockerExitCode(result.exitCode)) {
996
+ const containerRunning = await checkDockerContainerRunning(sessionId, verbose);
997
+ if (containerRunning) {
998
+ if (verbose) {
999
+ console.log(`[VERBOSE] isolation-runner: $ --status reports '${result.status}' (exitCode ${result.exitCode}) for docker session '${sessionId}', but docker inspect shows the container is still running — treating as active (issue #1939)`);
1000
+ }
1001
+ return true;
1002
+ }
1003
+ }
821
1004
  return false;
822
1005
  }
823
1006
  }
@@ -40,6 +40,24 @@ export function resetSessionMonitorForTests() {
40
40
  activeSessions.clear();
41
41
  }
42
42
 
43
+ /**
44
+ * Inject a stub isolation runner so tests can drive getIsolationSessionState
45
+ * without spawning real `$ --status` / docker probes. Pass `null` to restore the
46
+ * lazy real import on the next call. See issue #1939.
47
+ */
48
+ export function __setIsolationRunnerForTests(runner) {
49
+ _isolationRunner = runner;
50
+ }
51
+
52
+ /**
53
+ * Test-only accessor for getIsolationSessionState (otherwise module-private).
54
+ * Used by tests/test-issue-1939-docker-isolation.mjs to verify that an ambiguous
55
+ * docker terminal status falls through to the live container cross-check.
56
+ */
57
+ export function getIsolationSessionStateForTests(sessionName, sessionInfo, options = {}) {
58
+ return getIsolationSessionState(sessionName, sessionInfo, options);
59
+ }
60
+
43
61
  /**
44
62
  * Issue #1586: Timeout for non-isolation sessions.
45
63
  * Non-isolation (plain start-screen) sessions cannot reliably detect completion
@@ -227,12 +245,20 @@ async function getIsolationSessionState(sessionName, sessionInfo, options = {})
227
245
  return { running: true, exitCode: null, status: statusResult.status, statusResult };
228
246
  }
229
247
  if (runner.isTerminalSessionStatus(statusResult.status)) {
230
- return {
231
- running: false,
232
- exitCode: statusResult.exitCode !== undefined ? statusResult.exitCode : null,
233
- status: statusResult.status,
234
- statusResult,
235
- };
248
+ // Issue #1939: a native docker session can report a terminal status
249
+ // ("executed") with the unknown exit-code sentinel (-1) while the
250
+ // container is still running. Such a status is provisional — fall
251
+ // through to isSessionRunning(), which cross-checks the live container
252
+ // via `docker inspect` before we notify the user the work finished.
253
+ const ambiguousDockerTerminal = sessionInfo.isolationBackend === 'docker' && typeof runner.isUnknownDockerExitCode === 'function' && runner.isUnknownDockerExitCode(statusResult.exitCode);
254
+ if (!ambiguousDockerTerminal) {
255
+ return {
256
+ running: false,
257
+ exitCode: statusResult.exitCode !== undefined ? statusResult.exitCode : null,
258
+ status: statusResult.status,
259
+ statusResult,
260
+ };
261
+ }
236
262
  }
237
263
  }
238
264
 
@@ -174,6 +174,17 @@ if (ISOLATION_BACKEND) {
174
174
  } catch (preflightError) {
175
175
  console.error(`⚠️ Docker isolation preflight failed (continuing): ${preflightError?.message || preflightError}`);
176
176
  }
177
+ // A docker-isolated child inherits the host git identity only through the
178
+ // mounted ~/.gitconfig. Ensure the host has one (deriving it from the authed
179
+ // gh account when missing) so isolated `solve` does not fail with "Git
180
+ // identity not configured". Never throws. See issue #1939.
181
+ if (typeof isolationRunner.ensureHostGitIdentityForIsolation === 'function') {
182
+ try {
183
+ await isolationRunner.ensureHostGitIdentityForIsolation({});
184
+ } catch (gitIdentityError) {
185
+ console.error(`⚠️ Docker isolation git-identity preflight failed (continuing): ${gitIdentityError?.message || gitIdentityError}`);
186
+ }
187
+ }
177
188
  }
178
189
  }
179
190