@link-assistant/hive-mind 2.0.2 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +138 -0
- package/package.json +1 -1
- package/src/bot-lifecycle.lib.mjs +128 -0
- package/src/bot-logger.lib.mjs +253 -0
- package/src/cleanup.lib.mjs +22 -4
- package/src/cleanup.mjs +15 -2
- package/src/cleanup.os.lib.mjs +94 -8
- package/src/isolation-runner.lib.mjs +378 -11
- package/src/session-monitor.lib.mjs +389 -18
- package/src/session-resume.lib.mjs +269 -0
- package/src/session-status.lib.mjs +141 -0
- package/src/session-store.lib.mjs +232 -0
- package/src/telegram-bot.mjs +65 -13
- package/src/telegram-command-execution.lib.mjs +3 -1
- package/src/telegram-terminal-watch-command.lib.mjs +47 -6
- package/src/work-session-formatting.lib.mjs +44 -11
package/src/cleanup.os.lib.mjs
CHANGED
|
@@ -570,6 +570,12 @@ export function listActiveTaskRefsFromProc() {
|
|
|
570
570
|
* Discover currently-running isolation session UUIDs from start-command's live
|
|
571
571
|
* session managers (screen / tmux). These names are the session UUIDs.
|
|
572
572
|
*
|
|
573
|
+
* @deprecated Superseded by {@link listSessionTasks}, which sources every
|
|
574
|
+
* session (active *and* finished) from the single `$ --list` catalog rather
|
|
575
|
+
* than re-deriving liveness from `screen -ls`/`tmux ls`. Retained as a
|
|
576
|
+
* documented building block (issue #1848 case study) and for callers that only
|
|
577
|
+
* want live screen/tmux UUIDs without start-command.
|
|
578
|
+
*
|
|
573
579
|
* @returns {string[]}
|
|
574
580
|
*/
|
|
575
581
|
export function listLiveSessionIds() {
|
|
@@ -597,6 +603,11 @@ export function listLiveSessionIds() {
|
|
|
597
603
|
* Query `$ --status <uuid>` for each live session and extract task references
|
|
598
604
|
* from executing sessions' command lines. Optional; reuses isolation-runner.
|
|
599
605
|
*
|
|
606
|
+
* @deprecated Superseded by {@link listSessionTasks} (issue #1927 review), which
|
|
607
|
+
* reads the whole catalog from one `$ --list` call instead of N per-session
|
|
608
|
+
* `$ --status` queries and also surfaces finished sessions. Kept for the issue
|
|
609
|
+
* #1848 case study and backward compatibility.
|
|
610
|
+
*
|
|
600
611
|
* @param {string[]} sessionIds
|
|
601
612
|
* @returns {Promise<Array<{owner, repo, type, number}>>}
|
|
602
613
|
*/
|
|
@@ -650,33 +661,108 @@ export function resolvePrHeadBranch(ref) {
|
|
|
650
661
|
return out || null;
|
|
651
662
|
}
|
|
652
663
|
|
|
664
|
+
/**
|
|
665
|
+
* Enumerate ALL tasks known to start-command from the single `$ --list` source
|
|
666
|
+
* (issue #1927 review): one record per GitHub issue/PR reference found in each
|
|
667
|
+
* session's command line, carrying that session's id/name/status/workspace and a
|
|
668
|
+
* `terminal` flag (whether the session has finished). Unlike
|
|
669
|
+
* {@link listActiveTaskRefsFromSessions}, this includes *completed* sessions so a
|
|
670
|
+
* stale `gh-issue-solver-*` folder can be annotated with the PR and session it
|
|
671
|
+
* once belonged to — even after the task is no longer running.
|
|
672
|
+
*
|
|
673
|
+
* This consolidates session enumeration onto start-command's own `$ --list`
|
|
674
|
+
* (which knows every session, not just the ones still alive in screen/tmux) so
|
|
675
|
+
* `/queue`, `/limits`, the monitor and cleanup all read the same `$` data.
|
|
676
|
+
*
|
|
677
|
+
* @param {Object} [options]
|
|
678
|
+
* @param {boolean} [options.verbose=false]
|
|
679
|
+
* @param {boolean} [options.resolveBranches=false] - resolve PR head branches via gh
|
|
680
|
+
* @returns {Promise<Array<{owner, repo, type, number, branch: string|null, sessionId: string|null, sessionName: string|null, status: string|null, workspace: string|null, terminal: boolean, startTime: string|null}>>}
|
|
681
|
+
*/
|
|
682
|
+
export async function listSessionTasks(options = {}) {
|
|
683
|
+
const { verbose = false, resolveBranches = false } = options;
|
|
684
|
+
let listIsolationSessions;
|
|
685
|
+
let isTerminalSessionStatus;
|
|
686
|
+
try {
|
|
687
|
+
({ listIsolationSessions, isTerminalSessionStatus } = await import('./isolation-runner.lib.mjs'));
|
|
688
|
+
} catch {
|
|
689
|
+
return [];
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
let sessions = [];
|
|
693
|
+
try {
|
|
694
|
+
sessions = await listIsolationSessions(verbose);
|
|
695
|
+
} catch {
|
|
696
|
+
return [];
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
// Newest session first, so when several sessions worked the same issue/PR the
|
|
700
|
+
// most recent one is the match a folder gets annotated with.
|
|
701
|
+
const sorted = [...sessions].sort((a, b) => new Date(b.startTime || 0).getTime() - new Date(a.startTime || 0).getTime());
|
|
702
|
+
|
|
703
|
+
const tasks = [];
|
|
704
|
+
for (const session of sorted) {
|
|
705
|
+
if (!session || !session.command) continue;
|
|
706
|
+
const terminal = !!(session.status && isTerminalSessionStatus(session.status));
|
|
707
|
+
for (const ref of extractTaskRefsFromCommand(session.command)) {
|
|
708
|
+
tasks.push({
|
|
709
|
+
...ref,
|
|
710
|
+
branch: null,
|
|
711
|
+
sessionId: session.uuid || null,
|
|
712
|
+
sessionName: session.sessionName || null,
|
|
713
|
+
status: session.status || null,
|
|
714
|
+
workspace: session.workingDirectory || null,
|
|
715
|
+
terminal,
|
|
716
|
+
startTime: session.startTime || null,
|
|
717
|
+
});
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
if (resolveBranches) {
|
|
722
|
+
const branchCache = new Map();
|
|
723
|
+
for (const task of tasks) {
|
|
724
|
+
if (task.type !== 'pull') continue;
|
|
725
|
+
const key = `${task.owner}/${task.repo}#${task.number}`;
|
|
726
|
+
if (!branchCache.has(key)) branchCache.set(key, resolvePrHeadBranch(task));
|
|
727
|
+
task.branch = branchCache.get(key);
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
return tasks;
|
|
732
|
+
}
|
|
733
|
+
|
|
653
734
|
/**
|
|
654
735
|
* Build the full active-task list, resolving PR head branches where possible.
|
|
655
736
|
*
|
|
656
737
|
* @param {Object} [options]
|
|
657
|
-
* @param {boolean} [options.useSessions=true] - also
|
|
738
|
+
* @param {boolean} [options.useSessions=true] - also consult `$ --list` sessions
|
|
658
739
|
* @param {boolean} [options.resolveBranches=true] - resolve PR head branches via gh
|
|
740
|
+
* @param {Array} [options.sessionTasks] - pre-fetched `listSessionTasks()` result to reuse
|
|
659
741
|
* @returns {Promise<Array<{owner, repo, type, number, branch: string|null}>>}
|
|
660
742
|
*/
|
|
661
743
|
export async function getActiveTasks(options = {}) {
|
|
662
|
-
const { useSessions = true, resolveBranches = true } = options;
|
|
744
|
+
const { useSessions = true, resolveBranches = true, sessionTasks = null } = options;
|
|
663
745
|
const refs = [...listActiveTaskRefsFromProc()];
|
|
664
746
|
const seen = new Set(refs.map(r => `${r.owner}/${r.repo}#${r.number}:${r.type}`));
|
|
665
747
|
|
|
666
748
|
if (useSessions) {
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
749
|
+
// Active = sessions start-command still reports as non-terminal. Reuse the
|
|
750
|
+
// shared `$ --list` enumeration (optionally pre-fetched by the caller so the
|
|
751
|
+
// catalog is read only once).
|
|
752
|
+
const allSessionTasks = sessionTasks || (await listSessionTasks({ verbose: false, resolveBranches: false }));
|
|
753
|
+
for (const task of allSessionTasks) {
|
|
754
|
+
if (task.terminal) continue;
|
|
755
|
+
const key = `${task.owner}/${task.repo}#${task.number}:${task.type}`;
|
|
670
756
|
if (!seen.has(key)) {
|
|
671
757
|
seen.add(key);
|
|
672
|
-
refs.push(
|
|
758
|
+
refs.push(task);
|
|
673
759
|
}
|
|
674
760
|
}
|
|
675
761
|
}
|
|
676
762
|
|
|
677
763
|
return refs.map(ref => {
|
|
678
|
-
let branch = null;
|
|
679
|
-
if (ref.type === 'pull' && resolveBranches) {
|
|
764
|
+
let branch = ref.branch || null;
|
|
765
|
+
if (!branch && ref.type === 'pull' && resolveBranches) {
|
|
680
766
|
branch = resolvePrHeadBranch(ref);
|
|
681
767
|
}
|
|
682
768
|
return { ...ref, branch };
|
|
@@ -18,6 +18,7 @@ import { spawn } from 'node:child_process';
|
|
|
18
18
|
import fs from 'node:fs';
|
|
19
19
|
import os from 'node:os';
|
|
20
20
|
import path from 'node:path';
|
|
21
|
+
import { isExecutingSessionStatus, isTerminalSessionStatus } from './session-status.lib.mjs';
|
|
21
22
|
|
|
22
23
|
if (typeof use === 'undefined') {
|
|
23
24
|
await ensureUseM();
|
|
@@ -25,10 +26,14 @@ if (typeof use === 'undefined') {
|
|
|
25
26
|
|
|
26
27
|
const { $ } = await use('command-stream');
|
|
27
28
|
|
|
29
|
+
// Re-export the shared status predicates so existing callers that reach them via
|
|
30
|
+
// the isolation-runner module (e.g. session-monitor's `runner.isExecutingSessionStatus`)
|
|
31
|
+
// keep working. The canonical definitions live in session-status.lib.mjs so the
|
|
32
|
+
// killed/terminated/oom vocabulary stays consistent everywhere (issue #1927).
|
|
33
|
+
export { isExecutingSessionStatus, isTerminalSessionStatus, isKilledSessionStatus } from './session-status.lib.mjs';
|
|
34
|
+
|
|
28
35
|
// Valid isolation backends
|
|
29
36
|
const VALID_ISOLATION_BACKENDS = ['screen', 'tmux', 'docker'];
|
|
30
|
-
const RUNNING_SESSION_STATUSES = new Set(['executing', 'running']);
|
|
31
|
-
const TERMINAL_SESSION_STATUSES = new Set(['executed', 'completed', 'failed', 'cancelled', 'canceled', 'error']);
|
|
32
37
|
const HIVE_MIND_IMAGE_REPO = 'konard/hive-mind';
|
|
33
38
|
const HIVE_MIND_DIND_IMAGE_REPO = 'konard/hive-mind-dind';
|
|
34
39
|
const DEFAULT_HIVE_MIND_IMAGE_TAG = 'latest';
|
|
@@ -53,6 +58,15 @@ const DOCKER_ISOLATION_SHELL = 'sh';
|
|
|
53
58
|
// less headroom than this cannot safely pull one. Diagnostic only — never
|
|
54
59
|
// blocks startup. See issue #1914.
|
|
55
60
|
const DOCKER_ISOLATION_LOW_DISK_GIB = 40;
|
|
61
|
+
// Sentinel start-command's detached docker logger records when it cannot capture
|
|
62
|
+
// the container's real exit code. A terminal `$ --status` carrying this value is
|
|
63
|
+
// ambiguous — the container may still be running — so we cross-check it against
|
|
64
|
+
// a live `docker inspect` before concluding the session finished. See #1939.
|
|
65
|
+
// The upstream emission of this premature sentinel was fixed in
|
|
66
|
+
// start-command 0.29.1 (link-foundation/start#136), which the Hive Mind images
|
|
67
|
+
// now pin; this cross-check is retained as defense-in-depth so an older `$` on
|
|
68
|
+
// an operator's PATH cannot resurrect the bug.
|
|
69
|
+
const DOCKER_UNKNOWN_EXIT_CODE = -1;
|
|
56
70
|
|
|
57
71
|
function normalizeProcessIds(value) {
|
|
58
72
|
if (!value || typeof value !== 'object') return {};
|
|
@@ -137,9 +151,14 @@ export function resolveHostDockerSock({ env = process.env } = {}) {
|
|
|
137
151
|
/**
|
|
138
152
|
* Build host auth mounts for a Docker-isolated task.
|
|
139
153
|
*
|
|
140
|
-
* GitHub auth is mounted for every task because solve/hive/task need gh.
|
|
141
|
-
*
|
|
142
|
-
*
|
|
154
|
+
* GitHub auth is mounted for every task because solve/hive/task need gh. Git
|
|
155
|
+
* identity (`~/.gitconfig` and the XDG `~/.config/git` directory) is mounted for
|
|
156
|
+
* every task too: it is tool-agnostic and `solve` aborts early with "Git
|
|
157
|
+
* identity not configured" when `user.name`/`user.email` are absent, so a child
|
|
158
|
+
* container that authenticates with gh but inherits no git identity still cannot
|
|
159
|
+
* commit. See issue #1939. Tool credentials are deliberately scoped: Codex
|
|
160
|
+
* sessions do not receive Claude files and Claude sessions do not receive Codex
|
|
161
|
+
* files.
|
|
143
162
|
*/
|
|
144
163
|
export function getDockerIsolationAuthMounts({ tool = 'claude', env = process.env, homeDir = os.homedir(), existsSync = fs.existsSync } = {}) {
|
|
145
164
|
const mounts = [];
|
|
@@ -147,6 +166,14 @@ export function getDockerIsolationAuthMounts({ tool = 'claude', env = process.en
|
|
|
147
166
|
|
|
148
167
|
maybeAddMount(mounts, env.GH_CONFIG_DIR || path.join(homeDir, '.config', 'gh'), path.join(DOCKER_CONTAINER_HOME, '.config', 'gh'), existsSync);
|
|
149
168
|
|
|
169
|
+
// Git identity (tool-agnostic, required for commits). Honor the same env vars
|
|
170
|
+
// git itself reads for an alternate global config location (GIT_CONFIG_GLOBAL)
|
|
171
|
+
// and the XDG base dir, falling back to the conventional `~/.gitconfig` and
|
|
172
|
+
// `~/.config/git`. Missing host paths are skipped, so a container image that
|
|
173
|
+
// already bakes a git identity is left untouched. See issue #1939.
|
|
174
|
+
maybeAddMount(mounts, env.GIT_CONFIG_GLOBAL || path.join(homeDir, '.gitconfig'), path.join(DOCKER_CONTAINER_HOME, '.gitconfig'), existsSync);
|
|
175
|
+
maybeAddMount(mounts, env.XDG_CONFIG_HOME ? path.join(env.XDG_CONFIG_HOME, 'git') : path.join(homeDir, '.config', 'git'), path.join(DOCKER_CONTAINER_HOME, '.config', 'git'), existsSync);
|
|
176
|
+
|
|
150
177
|
if (normalizedTool === 'codex') {
|
|
151
178
|
maybeAddMount(mounts, path.join(homeDir, '.codex'), path.join(DOCKER_CONTAINER_HOME, '.codex'), existsSync);
|
|
152
179
|
} else if (normalizedTool === 'claude') {
|
|
@@ -357,18 +384,104 @@ export function parseSessionStatusOutput(output) {
|
|
|
357
384
|
};
|
|
358
385
|
}
|
|
359
386
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
387
|
+
/**
|
|
388
|
+
* Decide whether a detached-docker exit code is "unknown" (not a real result).
|
|
389
|
+
*
|
|
390
|
+
* start-command's detached docker logger writes the exit-code footer only after
|
|
391
|
+
* `docker logs -f` returns, capturing the real code via `docker inspect`. When
|
|
392
|
+
* it cannot capture one it records the sentinel `-1`. A `$ --status` that
|
|
393
|
+
* reports a terminal status ("executed") while still carrying that sentinel — or
|
|
394
|
+
* no exit code at all — is therefore ambiguous: the container may actually still
|
|
395
|
+
* be running. Callers treat such a status as provisional and cross-check the
|
|
396
|
+
* live container before declaring the session finished. See issue #1939.
|
|
397
|
+
*
|
|
398
|
+
* @param {number|null|undefined} exitCode
|
|
399
|
+
* @returns {boolean} True when the exit code carries no real result.
|
|
400
|
+
*/
|
|
401
|
+
export function isUnknownDockerExitCode(exitCode) {
|
|
402
|
+
return exitCode === null || exitCode === undefined || Number(exitCode) === DOCKER_UNKNOWN_EXIT_CODE;
|
|
366
403
|
}
|
|
367
404
|
|
|
368
405
|
export function shouldFallbackToScreenStatus(statusResult) {
|
|
369
406
|
return !statusResult?.exists || !statusResult?.status;
|
|
370
407
|
}
|
|
371
408
|
|
|
409
|
+
/**
|
|
410
|
+
* Parse the footer start-command appends to every execution log when the wrapped
|
|
411
|
+
* command exits. The footer is authoritative about the terminal exit code even
|
|
412
|
+
* when `$ --status` is wrong: start-command writes it from the command's own
|
|
413
|
+
* `close`/`exited` handler, so its presence proves the command terminated.
|
|
414
|
+
*
|
|
415
|
+
* Footer shape (see start-command spawn-helpers.js):
|
|
416
|
+
*
|
|
417
|
+
* ==================================================
|
|
418
|
+
* Finished: 2026-06-14 19:10:49.822
|
|
419
|
+
* Exit Code: 137
|
|
420
|
+
*
|
|
421
|
+
* Issue #1927: start-command's `enrichDetachedStatus` can flip a completed
|
|
422
|
+
* `executed/137` record back to `executing` (nulling the exit code) when a
|
|
423
|
+
* lingering shell keeps the screen session alive — so `$ --status` reports
|
|
424
|
+
* `executing` forever and the bot never notices the kill. Reading this footer
|
|
425
|
+
* lets hive-mind detect the real terminal exit regardless of that flip.
|
|
426
|
+
*
|
|
427
|
+
* @param {string} text - Log text (typically the tail of the log file)
|
|
428
|
+
* @returns {{finished: boolean, exitCode: number|null, endTime: string|null}}
|
|
429
|
+
*/
|
|
430
|
+
export function parseSessionExitFooter(text) {
|
|
431
|
+
if (!text) return { finished: false, exitCode: null, endTime: null };
|
|
432
|
+
// Match the LAST footer block in the text (a re-run could append more than
|
|
433
|
+
// one). Anchor on the `=` separator so command output that merely prints
|
|
434
|
+
// "Exit Code: N" mid-stream is not mistaken for the footer.
|
|
435
|
+
const re = /={10,}\s*\r?\nFinished:\s*([^\r\n]+)\r?\nExit Code:\s*(-?\d+)/g;
|
|
436
|
+
let match;
|
|
437
|
+
let last = null;
|
|
438
|
+
while ((match = re.exec(text)) !== null) last = match;
|
|
439
|
+
if (!last) return { finished: false, exitCode: null, endTime: null };
|
|
440
|
+
return { finished: true, exitCode: Number(last[2]), endTime: last[1].trim() };
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* Read the terminal exit code from the tail of a start-command execution log.
|
|
445
|
+
*
|
|
446
|
+
* Only the last `tailBytes` of the file are read (the footer lives at the end),
|
|
447
|
+
* so this is cheap even for multi-megabyte logs. Never throws — a missing or
|
|
448
|
+
* unreadable log yields `{ finished: false }`.
|
|
449
|
+
*
|
|
450
|
+
* @param {string} logPath
|
|
451
|
+
* @param {Object} [options]
|
|
452
|
+
* @param {Object} [options.fsImpl=fs] - Injectable fs (for tests)
|
|
453
|
+
* @param {number} [options.tailBytes=16384] - How many trailing bytes to scan
|
|
454
|
+
* @param {boolean} [options.verbose]
|
|
455
|
+
* @returns {{finished: boolean, exitCode: number|null, endTime: string|null}}
|
|
456
|
+
*/
|
|
457
|
+
export function readSessionExitFromLog(logPath, options = {}) {
|
|
458
|
+
const { fsImpl = fs, tailBytes = 16384, verbose = false } = options;
|
|
459
|
+
if (!logPath) return { finished: false, exitCode: null, endTime: null };
|
|
460
|
+
try {
|
|
461
|
+
const { size } = fsImpl.statSync(logPath);
|
|
462
|
+
if (!size) return { finished: false, exitCode: null, endTime: null };
|
|
463
|
+
const start = Math.max(0, size - tailBytes);
|
|
464
|
+
const length = size - start;
|
|
465
|
+
const buffer = Buffer.alloc(length);
|
|
466
|
+
const fd = fsImpl.openSync(logPath, 'r');
|
|
467
|
+
try {
|
|
468
|
+
fsImpl.readSync(fd, buffer, 0, length, start);
|
|
469
|
+
} finally {
|
|
470
|
+
fsImpl.closeSync(fd);
|
|
471
|
+
}
|
|
472
|
+
const result = parseSessionExitFooter(buffer.toString('utf8'));
|
|
473
|
+
if (verbose && result.finished) {
|
|
474
|
+
console.log(`[VERBOSE] isolation-runner: log footer for ${logPath} reports exit ${result.exitCode} (finished ${result.endTime})`);
|
|
475
|
+
}
|
|
476
|
+
return result;
|
|
477
|
+
} catch (error) {
|
|
478
|
+
if (verbose) {
|
|
479
|
+
console.log(`[VERBOSE] isolation-runner: could not read exit footer from ${logPath}: ${error.message}`);
|
|
480
|
+
}
|
|
481
|
+
return { finished: false, exitCode: null, endTime: null };
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
372
485
|
/**
|
|
373
486
|
* Find the `$` CLI binary path
|
|
374
487
|
* @returns {Promise<string|null>} Path to `$` binary or null
|
|
@@ -383,6 +496,41 @@ async function findStartCommandBinary() {
|
|
|
383
496
|
}
|
|
384
497
|
}
|
|
385
498
|
|
|
499
|
+
/**
|
|
500
|
+
* Verbose post-launch diagnostics for a native docker-isolated session.
|
|
501
|
+
*
|
|
502
|
+
* Logs, side by side: what `$ --status` reports (status + exit code) and what
|
|
503
|
+
* the nested Docker daemon reports for the container (running state + image
|
|
504
|
+
* presence). The two together make problems #1 and #2 of issue #1939
|
|
505
|
+
* observable on the next run — a status of "executed"/-1 while `docker inspect`
|
|
506
|
+
* says the container is running is the premature-completion symptom (problem
|
|
507
|
+
* #1); an isolation image that is absent right after launch points at a missing
|
|
508
|
+
* host-image passthrough that forced a re-pull (problem #2). Best-effort: any
|
|
509
|
+
* probe failure is swallowed so diagnostics never disrupt the task.
|
|
510
|
+
*
|
|
511
|
+
* @param {string} sessionId - Session UUID (also the container name)
|
|
512
|
+
* @param {Object} [env] - Environment used to resolve the isolation image
|
|
513
|
+
*/
|
|
514
|
+
async function logDockerIsolationPostLaunchDiagnostics(sessionId, env = process.env) {
|
|
515
|
+
try {
|
|
516
|
+
const status = await querySessionStatus(sessionId, false);
|
|
517
|
+
console.log(`[VERBOSE] isolation-runner: Docker post-launch $ --status: status=${status.status ?? '(none)'} exitCode=${status.exitCode ?? '(none)'} exists=${status.exists} (issue #1939)`);
|
|
518
|
+
const containerRunning = await checkDockerContainerRunning(sessionId, false);
|
|
519
|
+
console.log(`[VERBOSE] isolation-runner: Docker post-launch container '${sessionId}' running=${containerRunning} (issue #1939)`);
|
|
520
|
+
if (status.exists && isTerminalSessionStatus(status.status) && isUnknownDockerExitCode(status.exitCode) && containerRunning) {
|
|
521
|
+
console.log(`[VERBOSE] isolation-runner: ⚠️ Docker session '${sessionId}' reports a terminal status with the unknown exit-code sentinel while its container is still running — premature-completion symptom (issue #1939, problem #1)`);
|
|
522
|
+
}
|
|
523
|
+
const image = getDockerIsolationImage({ env });
|
|
524
|
+
const imagePresent = await checkDockerImagePresent(image, false);
|
|
525
|
+
console.log(`[VERBOSE] isolation-runner: Docker post-launch isolation image '${image}' present=${imagePresent} (issue #1939)`);
|
|
526
|
+
if (!imagePresent) {
|
|
527
|
+
console.log(`[VERBOSE] isolation-runner: ⚠️ Docker isolation image '${image}' is absent right after launch — host-image passthrough likely did not seed the nested daemon, so the task re-pulled it (issue #1939, problem #2)`);
|
|
528
|
+
}
|
|
529
|
+
} catch {
|
|
530
|
+
// Diagnostics are best-effort; never let a probe failure affect the task.
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
386
534
|
/**
|
|
387
535
|
* Execute a command with isolation via `$` from start-command
|
|
388
536
|
*
|
|
@@ -437,6 +585,8 @@ export async function executeWithIsolation(command, args, options = {}) {
|
|
|
437
585
|
console.log(`[VERBOSE] isolation-runner: Docker isolation privileged: ${shouldRunPrivilegedDockerIsolation(image, env)}`);
|
|
438
586
|
console.log('[VERBOSE] isolation-runner: Docker isolation pull: reuse local image if present, pull only if missing (start-command default)');
|
|
439
587
|
console.log(`[VERBOSE] isolation-runner: Docker isolation mounts: ${mounts.map(m => m.target).join(', ') || '(none)'}`);
|
|
588
|
+
const gitIdentityMounted = mounts.some(m => m.target === path.join(DOCKER_CONTAINER_HOME, '.gitconfig') || m.target === path.join(DOCKER_CONTAINER_HOME, '.config', 'git'));
|
|
589
|
+
console.log(`[VERBOSE] isolation-runner: Docker isolation git identity propagated: ${gitIdentityMounted ? 'yes' : 'no (host ~/.gitconfig missing — child may fail with "Git identity not configured", issue #1939)'}`);
|
|
440
590
|
}
|
|
441
591
|
}
|
|
442
592
|
|
|
@@ -448,6 +598,14 @@ export async function executeWithIsolation(command, args, options = {}) {
|
|
|
448
598
|
if (result.error) stream(`[VERBOSE] isolation-runner: Error: ${result.error}`);
|
|
449
599
|
}
|
|
450
600
|
|
|
601
|
+
// Issue #1939: capture the freshly-launched docker session's reported status
|
|
602
|
+
// and the live container state together, so the next iteration has the data to
|
|
603
|
+
// diagnose a premature "executed/-1" status (problem #1) or a surprise image
|
|
604
|
+
// re-pull (problem #2). Best-effort and verbose-only — never affects the run.
|
|
605
|
+
if (verbose && backend === 'docker' && result.success) {
|
|
606
|
+
await logDockerIsolationPostLaunchDiagnostics(sessionId, options.env || process.env);
|
|
607
|
+
}
|
|
608
|
+
|
|
451
609
|
if (result.success) {
|
|
452
610
|
return {
|
|
453
611
|
success: true,
|
|
@@ -498,6 +656,78 @@ export async function querySessionStatus(sessionId, verbose = false) {
|
|
|
498
656
|
}
|
|
499
657
|
}
|
|
500
658
|
|
|
659
|
+
/**
|
|
660
|
+
* Parse output from `$ --list --output-format json`.
|
|
661
|
+
*
|
|
662
|
+
* start-command may return a top-level array, or an object with an
|
|
663
|
+
* `executions`/`sessions` array. Each entry is normalized to the same shape used
|
|
664
|
+
* by {@link parseSessionStatusOutput} (uuid/status/exitCode/command/isolation/…).
|
|
665
|
+
* Tolerant of unknown layouts — anything unparseable yields an empty list.
|
|
666
|
+
*
|
|
667
|
+
* @param {string} output - Raw stdout from `$ --list`
|
|
668
|
+
* @returns {Array<{uuid: string|null, status: string|null, exitCode: number|null, startTime: string|null, endTime: string|null, command: string|null, isolation: string|null, workingDirectory: string|null, sessionName: string|null}>}
|
|
669
|
+
*/
|
|
670
|
+
export function parseSessionListOutput(output) {
|
|
671
|
+
const raw = (output || '').trim();
|
|
672
|
+
if (!raw) return [];
|
|
673
|
+
let parsed;
|
|
674
|
+
try {
|
|
675
|
+
parsed = JSON.parse(raw);
|
|
676
|
+
} catch {
|
|
677
|
+
return [];
|
|
678
|
+
}
|
|
679
|
+
const records = Array.isArray(parsed) ? parsed : Array.isArray(parsed?.executions) ? parsed.executions : Array.isArray(parsed?.sessions) ? parsed.sessions : parsed && typeof parsed === 'object' ? [parsed] : [];
|
|
680
|
+
|
|
681
|
+
return records
|
|
682
|
+
.map(data => {
|
|
683
|
+
if (!data || typeof data !== 'object') return null;
|
|
684
|
+
const isolationCandidate = (typeof data.isolation === 'string' && data.isolation) || (typeof data.options?.isolated === 'string' && data.options.isolated) || (typeof data.options?.isolation === 'string' && data.options.isolation) || null;
|
|
685
|
+
return {
|
|
686
|
+
uuid: data.uuid || data.session || data.sessionId || null,
|
|
687
|
+
status: typeof data.status === 'string' ? data.status.toLowerCase() : null,
|
|
688
|
+
exitCode: data.exitCode !== undefined && data.exitCode !== null ? Number(data.exitCode) : null,
|
|
689
|
+
startTime: data.startTime || null,
|
|
690
|
+
endTime: data.endTime || null,
|
|
691
|
+
command: data.command || null,
|
|
692
|
+
isolation: isolationCandidate ? isolationCandidate.toLowerCase() : null,
|
|
693
|
+
workingDirectory: data.workingDirectory || null,
|
|
694
|
+
sessionName: data.sessionName || data.options?.sessionName || null,
|
|
695
|
+
};
|
|
696
|
+
})
|
|
697
|
+
.filter(Boolean);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
/**
|
|
701
|
+
* List all executions known to start-command via `$ --list --output-format json`.
|
|
702
|
+
*
|
|
703
|
+
* Unlike `$ --status`, the `--list` path does NOT run start-command's
|
|
704
|
+
* `enrichDetachedStatus` liveness gate, so it reports the recorded status/exit
|
|
705
|
+
* code as stored. Used by the bot's restart-resume scan to discover detached
|
|
706
|
+
* solve/hive/task sessions that were launched before the bot last started
|
|
707
|
+
* (issue #1927, requirement #2). Never throws — returns an empty list on any
|
|
708
|
+
* failure.
|
|
709
|
+
*
|
|
710
|
+
* @param {boolean} [verbose]
|
|
711
|
+
* @returns {Promise<Array<object>>} Normalized session records (see parseSessionListOutput)
|
|
712
|
+
*/
|
|
713
|
+
export async function listIsolationSessions(verbose = false) {
|
|
714
|
+
const binPath = await findStartCommandBinary();
|
|
715
|
+
if (!binPath) {
|
|
716
|
+
if (verbose) console.log('[VERBOSE] isolation-runner: Cannot list sessions - $ binary not found');
|
|
717
|
+
return [];
|
|
718
|
+
}
|
|
719
|
+
try {
|
|
720
|
+
const result = await $({ mirror: false })`${binPath} --list --output-format json`;
|
|
721
|
+
const stdout = result.stdout?.toString().trim() || '';
|
|
722
|
+
const sessions = parseSessionListOutput(stdout);
|
|
723
|
+
if (verbose) console.log(`[VERBOSE] isolation-runner: $ --list returned ${sessions.length} session(s)`);
|
|
724
|
+
return sessions;
|
|
725
|
+
} catch (error) {
|
|
726
|
+
if (verbose) console.log(`[VERBOSE] isolation-runner: $ --list error: ${error.message}`);
|
|
727
|
+
return [];
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
|
|
501
731
|
/**
|
|
502
732
|
* Ask the `$` CLI to gracefully stop an isolated session by sending CTRL+C.
|
|
503
733
|
*
|
|
@@ -601,6 +831,45 @@ export async function checkDockerContainerRunning(containerName, verbose = false
|
|
|
601
831
|
}
|
|
602
832
|
}
|
|
603
833
|
|
|
834
|
+
/**
|
|
835
|
+
* Check whether a tmux session with the given name still exists.
|
|
836
|
+
* `tmux has-session -t <name>` exits 0 when it exists and non-zero otherwise,
|
|
837
|
+
* so command-stream throwing is treated as "not found".
|
|
838
|
+
*
|
|
839
|
+
* @param {string} sessionName
|
|
840
|
+
* @param {boolean} [verbose]
|
|
841
|
+
* @returns {Promise<boolean>}
|
|
842
|
+
*/
|
|
843
|
+
export async function checkTmuxSessionRunning(sessionName, verbose = false) {
|
|
844
|
+
try {
|
|
845
|
+
await $({ mirror: false })`tmux has-session -t ${sessionName}`;
|
|
846
|
+
if (verbose) console.log(`[VERBOSE] isolation-runner: tmux has-session '${sessionName}': running`);
|
|
847
|
+
return true;
|
|
848
|
+
} catch {
|
|
849
|
+
if (verbose) console.log(`[VERBOSE] isolation-runner: tmux has-session '${sessionName}': not found`);
|
|
850
|
+
return false;
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
/**
|
|
855
|
+
* Directly probe whether the backend session/container is still alive, bypassing
|
|
856
|
+
* `$ --status`. This is the cross-check used to detect a session that
|
|
857
|
+
* start-command still reports as `executing` even though its backing process is
|
|
858
|
+
* gone (issue #1927). Returns `null` for unknown backends so callers can treat
|
|
859
|
+
* an indeterminate probe as "no signal" rather than "dead".
|
|
860
|
+
*
|
|
861
|
+
* @param {string} sessionId - Session UUID (also the screen name / container name)
|
|
862
|
+
* @param {string} backend - 'screen' | 'tmux' | 'docker'
|
|
863
|
+
* @param {boolean} [verbose]
|
|
864
|
+
* @returns {Promise<boolean|null>}
|
|
865
|
+
*/
|
|
866
|
+
export async function checkBackendSessionAlive(sessionId, backend, verbose = false) {
|
|
867
|
+
if (backend === 'screen') return checkScreenSessionRunning(sessionId, verbose);
|
|
868
|
+
if (backend === 'tmux') return checkTmuxSessionRunning(sessionId, verbose);
|
|
869
|
+
if (backend === 'docker') return checkDockerContainerRunning(sessionId, verbose);
|
|
870
|
+
return null;
|
|
871
|
+
}
|
|
872
|
+
|
|
604
873
|
/**
|
|
605
874
|
* Check whether an image is present in the local Docker daemon.
|
|
606
875
|
*
|
|
@@ -795,6 +1064,89 @@ export async function preflightDockerIsolation(options = {}) {
|
|
|
795
1064
|
return result;
|
|
796
1065
|
}
|
|
797
1066
|
|
|
1067
|
+
/**
|
|
1068
|
+
* Host paths that, when present, propagate a git identity into a docker-isolated
|
|
1069
|
+
* container via getDockerIsolationAuthMounts. Honors the same env vars git reads
|
|
1070
|
+
* for an alternate global config (GIT_CONFIG_GLOBAL) and the XDG base dir, then
|
|
1071
|
+
* the conventional `~/.gitconfig` and `~/.config/git`. See issue #1939.
|
|
1072
|
+
*/
|
|
1073
|
+
export function resolveHostGitIdentityPaths({ env = process.env, homeDir = os.homedir() } = {}) {
|
|
1074
|
+
return [env.GIT_CONFIG_GLOBAL || path.join(homeDir, '.gitconfig'), env.XDG_CONFIG_HOME ? path.join(env.XDG_CONFIG_HOME, 'git') : path.join(homeDir, '.config', 'git')];
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
/**
|
|
1078
|
+
* True when the host exposes a git identity that getDockerIsolationAuthMounts can
|
|
1079
|
+
* mount into an isolated container. See issue #1939.
|
|
1080
|
+
*/
|
|
1081
|
+
export function hostHasMountableGitIdentity({ env = process.env, homeDir = os.homedir(), existsSync = fs.existsSync } = {}) {
|
|
1082
|
+
return resolveHostGitIdentityPaths({ env, homeDir }).some(p => Boolean(existsSync(p)));
|
|
1083
|
+
}
|
|
1084
|
+
|
|
1085
|
+
/**
|
|
1086
|
+
* Startup git-identity preflight for `--isolation docker`.
|
|
1087
|
+
*
|
|
1088
|
+
* A docker-isolated child container starts from a clean image and inherits the
|
|
1089
|
+
* host's git identity ONLY through the mounted `~/.gitconfig`
|
|
1090
|
+
* (getDockerIsolationAuthMounts). If the host has no git identity to mount, the
|
|
1091
|
+
* child `solve` aborts with "Git identity not configured" even though gh is
|
|
1092
|
+
* authenticated — the exact failure in issue #1939.
|
|
1093
|
+
*
|
|
1094
|
+
* This makes the deployment self-healing: when the host has no mountable git
|
|
1095
|
+
* identity but `gh-setup-git-identity` is installed (the Hive Mind images bake
|
|
1096
|
+
* it in) and gh is authenticated, it derives an identity from the gh account so
|
|
1097
|
+
* the mount has something to propagate. The repair is idempotent — it runs only
|
|
1098
|
+
* when no identity exists, so it never overwrites a configured one — and
|
|
1099
|
+
* best-effort: any failure degrades to a loud, actionable warning rather than a
|
|
1100
|
+
* thrown error. When neither a host identity nor a repair is possible, the
|
|
1101
|
+
* warning tells the operator exactly how to fix it.
|
|
1102
|
+
*
|
|
1103
|
+
* @param {Object} [options]
|
|
1104
|
+
* @param {Object} [options.env] - Environment (defaults to process.env)
|
|
1105
|
+
* @param {string} [options.homeDir] - Home dir (injectable for tests)
|
|
1106
|
+
* @param {Function} [options.existsSync] - fs.existsSync (injectable for tests)
|
|
1107
|
+
* @param {Object} [options.logger] - Logger with .log/.warn (defaults to console)
|
|
1108
|
+
* @param {Function} [options.repair] - repairGitIdentity-style probe (injectable for tests)
|
|
1109
|
+
* @returns {Promise<{present: boolean, repaired: boolean, warnings: string[]}>}
|
|
1110
|
+
*/
|
|
1111
|
+
export async function ensureHostGitIdentityForIsolation(options = {}) {
|
|
1112
|
+
const { env = process.env, homeDir = os.homedir(), existsSync = fs.existsSync, logger = console, repair = null } = options;
|
|
1113
|
+
const info = typeof logger.log === 'function' ? logger.log.bind(logger) : () => {};
|
|
1114
|
+
const warn = typeof logger.warn === 'function' ? logger.warn.bind(logger) : info;
|
|
1115
|
+
const result = { present: false, repaired: false, warnings: [] };
|
|
1116
|
+
|
|
1117
|
+
if (hostHasMountableGitIdentity({ env, homeDir, existsSync })) {
|
|
1118
|
+
result.present = true;
|
|
1119
|
+
info('✅ Host git identity present — docker-isolated tasks inherit it via the mounted ~/.gitconfig (issue #1939).');
|
|
1120
|
+
return result;
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
// No mountable identity. Try to derive one from the authenticated gh account
|
|
1124
|
+
// so the next isolated task does not fail with "Git identity not configured".
|
|
1125
|
+
const repairFn =
|
|
1126
|
+
repair ||
|
|
1127
|
+
(async () => {
|
|
1128
|
+
const gitLib = await import('./git.lib.mjs');
|
|
1129
|
+
return gitLib.repairGitIdentity();
|
|
1130
|
+
});
|
|
1131
|
+
let repairOutcome = null;
|
|
1132
|
+
try {
|
|
1133
|
+
repairOutcome = await repairFn();
|
|
1134
|
+
} catch (error) {
|
|
1135
|
+
repairOutcome = { success: false, error: error?.message || String(error) };
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
if (repairOutcome?.success && hostHasMountableGitIdentity({ env, homeDir, existsSync })) {
|
|
1139
|
+
result.present = true;
|
|
1140
|
+
result.repaired = true;
|
|
1141
|
+
info('✅ Host git identity was missing; derived it from the authenticated gh account via gh-setup-git-identity so docker-isolated tasks can mount it (issue #1939).');
|
|
1142
|
+
return result;
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
result.warnings.push(`No host git identity (~/.gitconfig) to mount into docker-isolated containers, so isolated 'solve' tasks will fail with "Git identity not configured" even though gh is authenticated (issue #1939). ` + `Configure one on the bot host: run 'gh-setup-git-identity' (derives it from the authenticated gh account), set 'git config --global user.name/.email', or pass '--auto-gh-configuration-repair' to solve.` + (repairOutcome?.error ? ` Auto-repair attempt failed: ${repairOutcome.error}` : ''));
|
|
1146
|
+
for (const w of result.warnings) warn(`⚠️ ${w}`);
|
|
1147
|
+
return result;
|
|
1148
|
+
}
|
|
1149
|
+
|
|
798
1150
|
/**
|
|
799
1151
|
* Check if an isolated session is still running.
|
|
800
1152
|
* Uses `$ --status` first, with a backend-specific fallback (screen -ls for
|
|
@@ -818,6 +1170,21 @@ export async function isSessionRunning(sessionId, options = {}) {
|
|
|
818
1170
|
return true;
|
|
819
1171
|
}
|
|
820
1172
|
if (isTerminalSessionStatus(result.status)) {
|
|
1173
|
+
// Issue #1939: a native docker session can report a terminal status
|
|
1174
|
+
// ("executed") while the container is still alive, carrying the unknown
|
|
1175
|
+
// exit-code sentinel (-1) because start-command's detached logger marks
|
|
1176
|
+
// the launcher process executed before the container exits. Trust the
|
|
1177
|
+
// terminal status only when a real exit code was captured; otherwise
|
|
1178
|
+
// cross-check the live container before declaring the session finished.
|
|
1179
|
+
if (backend === 'docker' && isUnknownDockerExitCode(result.exitCode)) {
|
|
1180
|
+
const containerRunning = await checkDockerContainerRunning(sessionId, verbose);
|
|
1181
|
+
if (containerRunning) {
|
|
1182
|
+
if (verbose) {
|
|
1183
|
+
console.log(`[VERBOSE] isolation-runner: $ --status reports '${result.status}' (exitCode ${result.exitCode}) for docker session '${sessionId}', but docker inspect shows the container is still running — treating as active (issue #1939)`);
|
|
1184
|
+
}
|
|
1185
|
+
return true;
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
821
1188
|
return false;
|
|
822
1189
|
}
|
|
823
1190
|
}
|