@link-assistant/hive-mind 1.78.11 → 1.78.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,65 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.78.13
4
+
5
+ ### Patch Changes
6
+
7
+ - a8035e9: Fail fast when watched GitHub repositories, issues, pull requests, or branches are deleted, closed, or no longer accessible instead of retrying them as unknown CI states.
8
+
9
+ Also fall back to a pinned working `use-m` bootstrap when the upstream latest unpkg entry is missing, so local and CI test startup remains stable.
10
+
11
+ ## 1.78.12
12
+
13
+ ### Patch Changes
14
+
15
+ - 5f60c04: fix(isolation): default nested Docker daemon to fuse-overlayfs so multi-GB images fit on disk + add storage-driver/disk preflight diagnostics (#1914)
16
+
17
+ `--isolation docker` was reopened after PR #1915: native Docker isolation and
18
+ host-image passthrough now work, but the first isolated task on the >30 GB
19
+ `konard/hive-mind-dind` image still died with:
20
+
21
+ ```
22
+ failed to register layer: no space left on device
23
+ ```
24
+
25
+ even though most layers reported `Already exists` (the daemon was correctly
26
+ seeded — passthrough is working). The failure was during layer **registration**,
27
+ not download.
28
+
29
+ **Root cause (in this repo).** `Dockerfile.dind` baked `ENV
30
+ DIND_STORAGE_DRIVER="vfs"` (commit 44d2c29e). `vfs` performs **no copy-on-write**:
31
+ it materializes a full, independent copy of the entire filesystem for _every_
32
+ layer, so a multi-GB image's on-disk footprint becomes the _sum_ of all
33
+ cumulative layer sizes — many times the image size — and overflows the disk.
34
+ Worse, pinning the env var **defeated box-dind's storage-driver auto-detection**
35
+ (`overlay2 → fuse-overlayfs → vfs`, with graceful fallback): box would otherwise
36
+ have picked a copy-on-write driver here. `/dev/fuse` is present (the dind
37
+ container runs `--privileged`), the `fuse-overlayfs` binary ships in box-dind,
38
+ and `overlay` is in `/proc/filesystems` — so copy-on-write was available the
39
+ whole time but was being bypassed by the `vfs` pin.
40
+
41
+ **Fix.** `Dockerfile.dind` now pins `ENV DIND_STORAGE_DRIVER="fuse-overlayfs"` — a
42
+ copy-on-write driver that also works overlay-on-overlay (the compatibility reason
43
+ `vfs` was originally chosen; `overlay2` can fail on the overlay-backed hosts our
44
+ deploys run on). Under `fuse-overlayfs`, registering a 498 MB top layer on a
45
+ ~30 GB base costs ~498 MB instead of ~30 GB, so the image fits. Empirically
46
+ verified in the box-dind environment (`docs/case-studies/issue-1914/data/fuse-overlayfs-capability-proof.log`).
47
+
48
+ **Self-diagnosing preflight.** `src/isolation-runner.lib.mjs` gained two probes —
49
+ `checkDockerStorageDriver()` and `checkDockerDiskSpace()` — wired into
50
+ `preflightDockerIsolation()`. Before running an isolated task it now warns, with
51
+ an actionable remedy, when the nested daemon is on `vfs` (even if the image is
52
+ already present) or when free space at the Docker data root is below 40 GiB, so
53
+ the next operator hitting this gets a clear breadcrumb instead of a cryptic
54
+ `no space left on device`. Both probes are best-effort and never throw.
55
+
56
+ Added `tests/test-issue-1914-storage-driver-diagnostics.mjs` (34 assertions),
57
+ extended `tests/test-issue-1914-preflight-passthrough.mjs` and
58
+ `tests/test-docker-dind-variant.mjs`, refreshed `docs/DOCKER*.md`, and expanded
59
+ the `docs/case-studies/issue-1914` case study with the reopen timeline, refined
60
+ root-cause analysis, captured evidence, and an upstream observability request
61
+ (link-foundation/box#104: warn when the nested daemon lands on `vfs`).
62
+
3
63
  ## 1.78.11
4
64
 
5
65
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.78.11",
3
+ "version": "1.78.13",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -19,6 +19,7 @@ const execRaw = promisify(execCallback);
19
19
  import { parseGitHubUrl } from './github.lib.mjs';
20
20
  import { githubLimits } from './config.lib.mjs';
21
21
  import { ghWithRateLimitRetry } from './github-rate-limit.lib.mjs';
22
+ import { getTerminalGitHubEntityErrorMessage, isTerminalGitHubEntityError } from './github-terminal-state.lib.mjs';
22
23
 
23
24
  // Issue #1722: gh api `--paginate --slurp` responses for repos with many
24
25
  // historical workflow runs can easily exceed Node's default 1 MB exec buffer
@@ -411,6 +412,17 @@ export async function checkPRCIStatus(owner, repo, prNumber, verbose = false) {
411
412
  hasPending,
412
413
  };
413
414
  } catch (error) {
415
+ if (isTerminalGitHubEntityError(error)) {
416
+ const terminalError = getTerminalGitHubEntityErrorMessage(error);
417
+ if (verbose) console.log(`[VERBOSE] /merge: Terminal GitHub entity error while checking CI status for PR #${prNumber}: ${terminalError}`);
418
+ return {
419
+ status: 'terminal_github_entity_error',
420
+ checks: [],
421
+ allPassed: false,
422
+ hasPending: false,
423
+ error: terminalError,
424
+ };
425
+ }
414
426
  if (verbose) {
415
427
  console.log(`[VERBOSE] /merge: Error checking CI status: ${error.message}`);
416
428
  }
@@ -434,7 +446,7 @@ export async function checkPRCIStatus(owner, repo, prNumber, verbose = false) {
434
446
  * @param {string} repo - Repository name
435
447
  * @param {number} prNumber - Pull request number
436
448
  * @param {boolean} verbose - Whether to log verbose output
437
- * @returns {Promise<{mergeable: boolean, reason: string|null}>}
449
+ * @returns {Promise<{mergeable: boolean, reason: string|null, terminal?: boolean}>}
438
450
  */
439
451
  export async function checkPRMergeable(owner, repo, prNumber, verbose = false) {
440
452
  // Issue #1339: GitHub computes mergeability asynchronously. When mergeStateStatus is
@@ -495,6 +507,12 @@ export async function checkPRMergeable(owner, repo, prNumber, verbose = false) {
495
507
 
496
508
  return { mergeable, reason };
497
509
  } catch (error) {
510
+ if (isTerminalGitHubEntityError(error)) {
511
+ const terminalError = getTerminalGitHubEntityErrorMessage(error);
512
+ if (verbose) console.log(`[VERBOSE] /merge: Terminal GitHub entity error while checking mergeability for PR #${prNumber}: ${terminalError}`);
513
+ return { mergeable: false, reason: terminalError, terminal: true };
514
+ }
515
+
498
516
  if (verbose) {
499
517
  console.log(`[VERBOSE] /merge: Error checking mergeability: ${error.message}`);
500
518
  }
@@ -652,6 +670,14 @@ export async function waitForCI(owner, repo, prNumber, options = {}, verbose = f
652
670
  return { success: false, status: 'failure', error: 'CI checks failed' };
653
671
  }
654
672
 
673
+ if (ciStatus.status === 'terminal_github_entity_error') {
674
+ return {
675
+ success: false,
676
+ status: 'terminal_github_entity_error',
677
+ error: ciStatus.error || 'GitHub repository, pull request, issue, or branch is no longer accessible',
678
+ };
679
+ }
680
+
655
681
  if (ciStatus.status === 'pending') {
656
682
  if (verbose) {
657
683
  console.log(`[VERBOSE] /merge: Waiting for CI... (${Math.round((Date.now() - startTime) / 1000)}s elapsed)`);
@@ -1240,6 +1266,28 @@ export async function getDetailedCIStatus(owner, repo, prNumber, verbose = false
1240
1266
  passedChecks,
1241
1267
  };
1242
1268
  } catch (error) {
1269
+ if (isTerminalGitHubEntityError(error)) {
1270
+ const terminalError = getTerminalGitHubEntityErrorMessage(error);
1271
+ if (verbose) console.log(`[VERBOSE] /merge: Terminal GitHub entity error while getting detailed CI status for PR #${prNumber}: ${terminalError}`);
1272
+ return {
1273
+ status: 'terminal_github_entity_error',
1274
+ checks: [],
1275
+ sha: null,
1276
+ hasFailures: false,
1277
+ hasCancelled: false,
1278
+ hasStale: false,
1279
+ hasPending: false,
1280
+ hasQueued: false,
1281
+ allPassed: false,
1282
+ failedChecks: [],
1283
+ cancelledChecks: [],
1284
+ staleChecks: [],
1285
+ pendingChecks: [],
1286
+ queuedChecks: [],
1287
+ passedChecks: [],
1288
+ error: terminalError,
1289
+ };
1290
+ }
1243
1291
  if (verbose) {
1244
1292
  console.log(`[VERBOSE] /merge: Error getting detailed CI status: ${error.message}`);
1245
1293
  }
@@ -0,0 +1,266 @@
1
+ #!/usr/bin/env node
2
+ import { ensureUseM } from './use-m-bootstrap.lib.mjs';
3
+
4
+ /**
5
+ * Detect terminal GitHub entity states for long-running watch/merge loops.
6
+ *
7
+ * These checks intentionally treat 404-style repository, PR, issue, and branch
8
+ * responses as terminal. In a solver loop, deleted entities and lost access are
9
+ * not transient CI states; retrying them indefinitely wastes time and tokens.
10
+ *
11
+ * @see https://github.com/link-assistant/hive-mind/issues/1931
12
+ */
13
+
14
+ let defaultCommandRunner = null;
15
+
16
+ const getDefaultCommandRunner = async () => {
17
+ if (defaultCommandRunner) return defaultCommandRunner;
18
+ if (typeof globalThis.use === 'undefined') {
19
+ await ensureUseM();
20
+ }
21
+ const use = globalThis.use;
22
+ const { $: rawDollar } = await use('command-stream');
23
+ const { wrapDollarWithGhRetry } = await import('./github-rate-limit.lib.mjs');
24
+ defaultCommandRunner = wrapDollarWithGhRetry(rawDollar);
25
+ return defaultCommandRunner;
26
+ };
27
+
28
+ const textFrom = value => {
29
+ if (!value) return '';
30
+ if (typeof value === 'string') return value;
31
+ if (value instanceof Error) {
32
+ return [value.message, value.stdout?.toString?.(), value.stderr?.toString?.()].filter(Boolean).join('\n');
33
+ }
34
+ return [value.message, value.stdout?.toString?.(), value.stderr?.toString?.(), value.output?.toString?.()].filter(Boolean).join('\n');
35
+ };
36
+
37
+ export const getGitHubCommandOutput = result => [result?.stdout?.toString?.() || '', result?.stderr?.toString?.() || '', result?.output?.toString?.() || ''].filter(Boolean).join('\n');
38
+
39
+ export const isTerminalGitHubEntityError = value => {
40
+ const text = textFrom(value);
41
+ if (!text) return false;
42
+
43
+ return [/\bHTTP\s+404\b/i, /\bHTTP\s+410\b/i, /\b404\s+Not Found\b/i, /\b410\s+Gone\b/i, /\bNot Found\s+\(HTTP 404\)/i, /"status"\s*:\s*"404"/i, /"status"\s*:\s*"410"/i, /\bstatus['"]?\s*:\s*404\b/i, /\bstatus['"]?\s*:\s*410\b/i, /Could not resolve to a Repository/i, /Could not resolve to a PullRequest/i, /Could not resolve to an Issue/i, /Could not resolve to a Branch/i, /Could not resolve to a Repository with the name/i, /GraphQL:.*Could not resolve.*Repository/i, /\brepository not found\b/i, /\bgh:\s*Not Found\b/i].some(pattern => pattern.test(text));
44
+ };
45
+
46
+ export const getTerminalGitHubEntityErrorMessage = (value, fallback = 'GitHub entity is no longer accessible') => {
47
+ const text = textFrom(value)
48
+ .split('\n')
49
+ .map(line => line.trim())
50
+ .filter(Boolean)
51
+ .join(' ');
52
+ return text || fallback;
53
+ };
54
+
55
+ const terminal = ({ reason, message, details = [], success = false, data = null }) => ({
56
+ terminal: true,
57
+ success,
58
+ reason,
59
+ message,
60
+ details,
61
+ data,
62
+ });
63
+
64
+ const ok = (data = {}) => ({
65
+ terminal: false,
66
+ success: null,
67
+ reason: null,
68
+ message: null,
69
+ details: [],
70
+ data,
71
+ });
72
+
73
+ const safeJsonParse = value => {
74
+ const text = value?.toString?.().trim() || '';
75
+ if (!text) return null;
76
+ try {
77
+ return JSON.parse(text);
78
+ } catch {
79
+ return null;
80
+ }
81
+ };
82
+
83
+ const commandFailedTerminally = result => {
84
+ const code = result?.code ?? 0;
85
+ return code !== 0 && isTerminalGitHubEntityError(getGitHubCommandOutput(result));
86
+ };
87
+
88
+ const toTemplateStrings = strings => Object.assign([...strings], { raw: [...strings] });
89
+
90
+ const runCommand = async (commandRunner, strings, ...values) => {
91
+ try {
92
+ return await commandRunner(toTemplateStrings(strings), ...values);
93
+ } catch (error) {
94
+ return {
95
+ code: error.code || 1,
96
+ stdout: error.stdout || '',
97
+ stderr: error.stderr || error.message || '',
98
+ output: textFrom(error),
99
+ };
100
+ }
101
+ };
102
+
103
+ const checkBranch = async ({ commandRunner, repoFullName, branchName, reason, label }) => {
104
+ if (!repoFullName || !branchName) {
105
+ return ok();
106
+ }
107
+
108
+ const encodedBranchName = encodeURIComponent(branchName);
109
+ const branchResult = await runCommand(commandRunner, ['gh api repos/', '/branches/', ''], repoFullName, encodedBranchName);
110
+
111
+ if (commandFailedTerminally(branchResult)) {
112
+ return terminal({
113
+ reason,
114
+ message: `${label} branch '${branchName}' is no longer accessible in ${repoFullName}.`,
115
+ details: [getTerminalGitHubEntityErrorMessage(branchResult)],
116
+ });
117
+ }
118
+
119
+ return ok();
120
+ };
121
+
122
+ /**
123
+ * Check whether the GitHub entities watched by a long-running operation reached
124
+ * a terminal state.
125
+ *
126
+ * @param {Object} options
127
+ * @param {string} options.owner
128
+ * @param {string} options.repo
129
+ * @param {number|string|null} [options.issueNumber]
130
+ * @param {number|string|null} [options.prNumber]
131
+ * @param {string|null} [options.sourceBranchName]
132
+ * @param {string|null} [options.targetBranchName]
133
+ * @param {Function} [options.commandRunner] command-stream tagged template
134
+ * @returns {Promise<{terminal: boolean, success: boolean|null, reason: string|null, message: string|null, details: string[], data?: Object}>}
135
+ */
136
+ export const checkGitHubTerminalState = async ({ owner, repo, issueNumber = null, prNumber = null, sourceBranchName = null, targetBranchName = null, commandRunner = null }) => {
137
+ const runner = commandRunner || (await getDefaultCommandRunner());
138
+ const repoResult = await runCommand(runner, ['gh api repos/', '/', " --jq '{full_name: .full_name, default_branch: .default_branch}'"], owner, repo);
139
+ if (commandFailedTerminally(repoResult)) {
140
+ return terminal({
141
+ reason: 'repository_unavailable',
142
+ message: `Repository ${owner}/${repo} is no longer accessible.`,
143
+ details: [getTerminalGitHubEntityErrorMessage(repoResult)],
144
+ });
145
+ }
146
+
147
+ const repoData = safeJsonParse(repoResult.stdout) || {};
148
+
149
+ if (prNumber) {
150
+ const prResult = await runCommand(runner, ['gh api repos/', '/', '/pulls/', ''], owner, repo, prNumber);
151
+ if (commandFailedTerminally(prResult)) {
152
+ return terminal({
153
+ reason: 'pull_request_unavailable',
154
+ message: `Pull request #${prNumber} in ${owner}/${repo} is no longer accessible.`,
155
+ details: [getTerminalGitHubEntityErrorMessage(prResult)],
156
+ });
157
+ }
158
+
159
+ const prData = safeJsonParse(prResult.stdout);
160
+ if (prData) {
161
+ const prState = String(prData.state || '').toLowerCase();
162
+ if (prData.merged === true) {
163
+ return terminal({
164
+ reason: 'pull_request_merged',
165
+ message: `Pull request #${prNumber} has been merged.`,
166
+ success: true,
167
+ data: { pr: prData, repo: repoData },
168
+ });
169
+ }
170
+ if (prState === 'closed') {
171
+ return terminal({
172
+ reason: 'pull_request_closed',
173
+ message: `Pull request #${prNumber} has been closed without merging.`,
174
+ data: { pr: prData, repo: repoData },
175
+ });
176
+ }
177
+
178
+ const headRepo = prData.head?.repo?.full_name || null;
179
+ const headRef = prData.head?.ref || sourceBranchName;
180
+ if (!headRepo && headRef) {
181
+ return terminal({
182
+ reason: 'source_branch_unavailable',
183
+ message: `Source repository for branch '${headRef}' is no longer accessible.`,
184
+ details: ['GitHub returned no head repository for the open pull request.'],
185
+ data: { pr: prData, repo: repoData },
186
+ });
187
+ }
188
+
189
+ const sourceBranchState = await checkBranch({
190
+ commandRunner: runner,
191
+ repoFullName: headRepo,
192
+ branchName: headRef,
193
+ reason: 'source_branch_unavailable',
194
+ label: 'Source',
195
+ });
196
+ if (sourceBranchState.terminal) return sourceBranchState;
197
+
198
+ const baseRepo = prData.base?.repo?.full_name || `${owner}/${repo}`;
199
+ const baseRef = prData.base?.ref || targetBranchName || repoData.default_branch;
200
+ if (!baseRepo && baseRef) {
201
+ return terminal({
202
+ reason: 'target_branch_unavailable',
203
+ message: `Target repository for branch '${baseRef}' is no longer accessible.`,
204
+ details: ['GitHub returned no base repository for the open pull request.'],
205
+ data: { pr: prData, repo: repoData },
206
+ });
207
+ }
208
+
209
+ const targetBranchState = await checkBranch({
210
+ commandRunner: runner,
211
+ repoFullName: baseRepo,
212
+ branchName: baseRef,
213
+ reason: 'target_branch_unavailable',
214
+ label: 'Target',
215
+ });
216
+ if (targetBranchState.terminal) return targetBranchState;
217
+ }
218
+ } else {
219
+ const sourceBranchState = await checkBranch({
220
+ commandRunner: runner,
221
+ repoFullName: `${owner}/${repo}`,
222
+ branchName: sourceBranchName,
223
+ reason: 'source_branch_unavailable',
224
+ label: 'Source',
225
+ });
226
+ if (sourceBranchState.terminal) return sourceBranchState;
227
+
228
+ const targetBranchState = await checkBranch({
229
+ commandRunner: runner,
230
+ repoFullName: `${owner}/${repo}`,
231
+ branchName: targetBranchName,
232
+ reason: 'target_branch_unavailable',
233
+ label: 'Target',
234
+ });
235
+ if (targetBranchState.terminal) return targetBranchState;
236
+ }
237
+
238
+ if (issueNumber && String(issueNumber) !== String(prNumber)) {
239
+ const issueResult = await runCommand(runner, ['gh api repos/', '/', '/issues/', ''], owner, repo, issueNumber);
240
+ if (commandFailedTerminally(issueResult)) {
241
+ return terminal({
242
+ reason: 'issue_unavailable',
243
+ message: `Issue #${issueNumber} in ${owner}/${repo} is no longer accessible.`,
244
+ details: [getTerminalGitHubEntityErrorMessage(issueResult)],
245
+ });
246
+ }
247
+
248
+ const issueData = safeJsonParse(issueResult.stdout);
249
+ if (String(issueData?.state || '').toLowerCase() === 'closed') {
250
+ return terminal({
251
+ reason: 'issue_closed',
252
+ message: `Issue #${issueNumber} has been closed.`,
253
+ data: { issue: issueData, repo: repoData },
254
+ });
255
+ }
256
+ }
257
+
258
+ return ok({ repo: repoData });
259
+ };
260
+
261
+ export default {
262
+ checkGitHubTerminalState,
263
+ getGitHubCommandOutput,
264
+ getTerminalGitHubEntityErrorMessage,
265
+ isTerminalGitHubEntityError,
266
+ };
@@ -47,6 +47,12 @@ const DEFAULT_HOST_DOCKER_SOCK = '/var/run/host-docker.sock';
47
47
  // throwaway container — booting the dind image's dockerd entrypoint — purely to
48
48
  // check whether bash exists. See issue #1914.
49
49
  const DOCKER_ISOLATION_SHELL = 'sh';
50
+ // Free-space floor (GiB) below which the preflight warns that an impending
51
+ // isolation-image pull may fail with `no space left on device`. The Hive Mind
52
+ // isolation images are well over 30 GB extracted, so a host/nested daemon with
53
+ // less headroom than this cannot safely pull one. Diagnostic only — never
54
+ // blocks startup. See issue #1914.
55
+ const DOCKER_ISOLATION_LOW_DISK_GIB = 40;
50
56
 
51
57
  function normalizeProcessIds(value) {
52
58
  if (!value || typeof value !== 'object') return {};
@@ -87,12 +93,13 @@ function maybeAddMount(mounts, source, target, existsSync) {
87
93
  /**
88
94
  * Resolve the tag used for the Docker isolation image.
89
95
  *
90
- * Defaults to `latest`, but operators can pin it (e.g. to the exact version
91
- * already present on the host) via `HIVE_MIND_DOCKER_ISOLATION_IMAGE_TAG`.
92
- * Pinning matters for Docker-in-Docker deployments: the nested daemon starts
93
- * with an empty image store, so an unpinned `:latest` whose registry digest has
94
- * drifted from the host copy forces a fresh multi-gigabyte pull on every task.
95
- * A pinned tag lets a pre-seeded image be reused instead. See issue #1879.
96
+ * Release Docker images bake this env var from `HIVE_MIND_VERSION`, so a parent
97
+ * container started via `:latest` still launches child isolation containers from
98
+ * the same immutable release tag. Local/PR builds fall back to `latest`, and
99
+ * operators can override the tag explicitly when using custom images. Pinning
100
+ * matters for Docker-in-Docker deployments: the nested daemon starts with an
101
+ * empty image store, so a `:latest` digest drift from the host copy forces a
102
+ * fresh multi-gigabyte pull. See issue #1879.
96
103
  */
97
104
  export function resolveDockerIsolationImageTag({ env = process.env } = {}) {
98
105
  const explicit = String(env.HIVE_MIND_DOCKER_ISOLATION_IMAGE_TAG || '').trim();
@@ -618,6 +625,80 @@ export async function checkDockerImagePresent(image, verbose = false) {
618
625
  }
619
626
  }
620
627
 
628
+ /**
629
+ * Report the storage driver the (nested) Docker daemon is using.
630
+ *
631
+ * `vfs` performs NO copy-on-write — it stores a full copy of every image layer
632
+ * — so the multi-gigabyte Hive Mind images consume many times their real size
633
+ * on disk and the first isolated `docker run`/pull dies with
634
+ * `failed to register layer: no space left on device` (issue #1914 reopen).
635
+ * The preflight uses this to warn loudly when the daemon is on `vfs` instead of
636
+ * letting the disk silently overflow mid-task.
637
+ *
638
+ * Never throws: returns the lowercased driver name, or `null` when docker is
639
+ * unavailable / the daemon is unreachable.
640
+ *
641
+ * @param {boolean} [verbose] - Enable verbose logging
642
+ * @returns {Promise<string|null>} e.g. 'fuse-overlayfs', 'overlay2', 'vfs', or null
643
+ */
644
+ export async function checkDockerStorageDriver(verbose = false) {
645
+ try {
646
+ const result = await $({ mirror: false })`docker info --format ${'{{.Driver}}'}`;
647
+ const driver = (result.stdout?.toString() || '').trim().toLowerCase() || null;
648
+ if (verbose) console.log(`[VERBOSE] isolation-runner: docker storage driver: ${driver || '(unknown)'}`);
649
+ return driver;
650
+ } catch {
651
+ if (verbose) console.log('[VERBOSE] isolation-runner: docker info unavailable; storage driver unknown');
652
+ return null;
653
+ }
654
+ }
655
+
656
+ /**
657
+ * Report the free space (in GiB) on the Docker daemon's data root.
658
+ *
659
+ * The Hive Mind isolation images are multiple gigabytes; when the nested daemon
660
+ * has to pull one, it needs room for the extracted layers. This lets the
661
+ * preflight predict a `no space left on device` failure (issue #1914) instead
662
+ * of discovering it mid-pull. Resolves the daemon's real data root via
663
+ * `docker info` and falls back to `/var/lib/docker`, then reads `df -Pk`.
664
+ *
665
+ * Never throws: returns `{ availableGiB, dataRoot }`, or `null` when the
666
+ * information cannot be determined (no docker, no df, unparseable output).
667
+ *
668
+ * @param {boolean} [verbose] - Enable verbose logging
669
+ * @returns {Promise<{availableGiB: number, dataRoot: string}|null>}
670
+ */
671
+ export async function checkDockerDiskSpace(verbose = false) {
672
+ try {
673
+ let dataRoot = '/var/lib/docker';
674
+ try {
675
+ const info = await $({ mirror: false })`docker info --format ${'{{.DockerRootDir}}'}`;
676
+ const root = (info.stdout?.toString() || '').trim();
677
+ if (root) dataRoot = root;
678
+ } catch {
679
+ // Daemon unreachable: fall back to the conventional data root. If df then
680
+ // fails on it (e.g. the path does not exist) we return null below.
681
+ }
682
+
683
+ const df = await $({ mirror: false })`df -Pk ${dataRoot}`;
684
+ // `df -P` guarantees one logical line per filesystem (no wrapping). The last
685
+ // line is the data row: Filesystem 1024-blocks Used Available Capacity Mount
686
+ const lines = (df.stdout?.toString() || '').trim().split('\n');
687
+ const cols = (lines[lines.length - 1] || '').trim().split(/\s+/);
688
+ const availableKb = Number(cols[3]);
689
+ if (!Number.isFinite(availableKb)) {
690
+ if (verbose) console.log('[VERBOSE] isolation-runner: could not parse df output for Docker disk space');
691
+ return null;
692
+ }
693
+ const availableGiB = availableKb / (1024 * 1024);
694
+ if (verbose) console.log(`[VERBOSE] isolation-runner: Docker data root '${dataRoot}' has ${availableGiB.toFixed(1)} GiB free`);
695
+ return { availableGiB, dataRoot };
696
+ } catch {
697
+ if (verbose) console.log('[VERBOSE] isolation-runner: df unavailable; Docker disk space unknown');
698
+ return null;
699
+ }
700
+ }
701
+
621
702
  /**
622
703
  * Startup preflight for `--isolation docker`.
623
704
  *
@@ -637,42 +718,78 @@ export async function checkDockerImagePresent(image, verbose = false) {
637
718
  * blocks startup — a misconfigured passthrough should degrade to a slow first
638
719
  * task, not a dead bot.
639
720
  *
721
+ * It also surfaces the two root causes of the issue #1914 reopen
722
+ * (`failed to register layer: no space left on device`): a non-copy-on-write
723
+ * storage driver (`vfs`, which copies every layer in full) and a Docker data
724
+ * root with too little free space to hold the >30 GB image. Both are reported
725
+ * as loud, actionable warnings so the disk overflow is self-diagnosing at
726
+ * startup instead of surfacing mid-task.
727
+ *
640
728
  * @param {Object} [options]
641
729
  * @param {Object} [options.env] - Environment (defaults to process.env)
642
730
  * @param {Function} [options.existsSync] - fs.existsSync (injectable for tests)
643
731
  * @param {boolean} [options.verbose] - Enable verbose logging
644
732
  * @param {Object} [options.logger] - Logger with .log/.warn (defaults to console)
645
733
  * @param {Function} [options.checkImagePresent] - Image-presence probe (injectable for tests)
646
- * @returns {Promise<{image: string, sock: string, socketMounted: boolean, imagePresent: boolean, isDind: boolean, ok: boolean, warnings: string[]}>}
734
+ * @param {Function} [options.checkStorageDriver] - Storage-driver probe (injectable for tests)
735
+ * @param {Function} [options.checkDiskSpace] - Disk-space probe (injectable for tests)
736
+ * @returns {Promise<{image: string, sock: string, socketMounted: boolean, imagePresent: boolean, isDind: boolean, storageDriver: (string|null), storageDriverOk: boolean, diskAvailableGiB: (number|null), ok: boolean, warnings: string[]}>}
647
737
  */
648
738
  export async function preflightDockerIsolation(options = {}) {
649
- const { env = process.env, existsSync = fs.existsSync, verbose = false, logger = console, checkImagePresent = checkDockerImagePresent } = options;
739
+ const { env = process.env, existsSync = fs.existsSync, verbose = false, logger = console, checkImagePresent = checkDockerImagePresent, checkStorageDriver = checkDockerStorageDriver, checkDiskSpace = checkDockerDiskSpace } = options;
650
740
 
651
741
  const image = getDockerIsolationImage({ env });
652
742
  const sock = resolveHostDockerSock({ env });
653
743
  const isDind = shouldRunPrivilegedDockerIsolation(image, env);
654
744
  const socketMounted = Boolean(existsSync(sock));
655
745
  const imagePresent = Boolean(await checkImagePresent(image, verbose));
656
-
657
- const result = { image, sock, socketMounted, imagePresent, isDind, ok: imagePresent, warnings: [] };
746
+ const storageDriver = await checkStorageDriver(verbose);
747
+ const disk = await checkDiskSpace(verbose);
748
+ const diskAvailableGiB = disk && Number.isFinite(disk.availableGiB) ? disk.availableGiB : null;
749
+ // Unknown driver (probe returned null) is treated as ok — we only flag the
750
+ // one driver known to overflow the disk, never block on missing information.
751
+ const storageDriverOk = storageDriver !== 'vfs';
752
+
753
+ const result = { image, sock, socketMounted, imagePresent, isDind, storageDriver, storageDriverOk, diskAvailableGiB, ok: imagePresent, warnings: [] };
658
754
  const info = typeof logger.log === 'function' ? logger.log.bind(logger) : () => {};
659
755
  const warn = typeof logger.warn === 'function' ? logger.warn.bind(logger) : info;
660
756
 
661
- if (imagePresent) {
662
- info(`✅ Docker isolation image '${image}' is already present locally — isolated tasks reuse it (no multi-GB pull). See issue #1914.`);
663
- return result;
757
+ const preload = `node scripts/preload-dind-isolation-image.mjs --image ${image}`;
758
+
759
+ // Root Cause A of the issue #1914 reopen: a non-copy-on-write storage driver.
760
+ // `vfs` stores a full copy of every image layer, so the multi-GB images
761
+ // consume many times their size on disk and any layer write (pull, run,
762
+ // commit) can fail with `failed to register layer: no space left on device`.
763
+ // This is dangerous even when the image is already present — a task that
764
+ // commits or pulls more layers still overflows — so we warn independent of
765
+ // image presence.
766
+ if (storageDriver === 'vfs') {
767
+ result.warnings.push(`The Docker daemon backing '--isolation docker' is using the 'vfs' storage driver, which performs NO copy-on-write: ` + `it stores a full copy of every image layer, so the multi-GB Hive Mind images consume many times their size on disk and isolated tasks can fail with 'failed to register layer: no space left on device' (issue #1914). ` + `Switch to a copy-on-write driver: rebuild/redeploy with the current Dockerfile.dind (it defaults to 'fuse-overlayfs'), or for an already-running container add '-e DIND_STORAGE_DRIVER=fuse-overlayfs' to the bot container's 'docker run' and recreate it.`);
664
768
  }
665
769
 
666
- // Image absent: the first isolated task will pull the full image. Explain the
667
- // most likely cause and the exact fix instead of letting the operator first
668
- // discover it as a surprise multi-gigabyte download mid-task.
669
- const preload = `node scripts/preload-dind-isolation-image.mjs --image ${image}`;
670
- if (isDind && !socketMounted) {
671
- result.warnings.push(`Docker isolation image '${image}' is NOT in the nested Docker daemon and the host Docker socket is not mounted at ${sock}. ` + `box host-image passthrough cannot seed the nested daemon, so the FIRST isolated task will pull the full image (the Hive Mind images are multiple GB). ` + `Fix the deployment: add '-v /var/run/docker.sock:${sock}:ro' and '-e DIND_HOST_PASSTHROUGH_IMAGES="konard/hive-mind konard/hive-mind-dind"' to the bot container's 'docker run', or seed it now with: ${preload}`);
672
- } else if (isDind && socketMounted) {
673
- result.warnings.push(`Docker isolation image '${image}' is NOT in the nested Docker daemon even though the host Docker socket is mounted at ${sock}. ` + `box host-image passthrough may have skipped it (check DIND_HOST_PASSTHROUGH mode, the DIND_HOST_PASSTHROUGH_IMAGES allowlist, and that the host actually has '${image}' with a registry digest). ` + `The first isolated task will pull the full image. Seed it now with: ${preload}`);
674
- } else {
675
- result.warnings.push(`Docker isolation image '${image}' is not present locally; the first isolated task will pull it. ` + `If this host already has it under a different tag, pin HIVE_MIND_DOCKER_ISOLATION_IMAGE_TAG, or seed it with: ${preload}`);
770
+ if (!imagePresent) {
771
+ // Image absent: the first isolated task will pull the full image. Explain
772
+ // the most likely cause and the exact fix instead of letting the operator
773
+ // first discover it as a surprise multi-gigabyte download mid-task.
774
+ if (isDind && !socketMounted) {
775
+ result.warnings.push(`Docker isolation image '${image}' is NOT in the nested Docker daemon and the host Docker socket is not mounted at ${sock}. ` + `box host-image passthrough cannot seed the nested daemon, so the FIRST isolated task will pull the full image (the Hive Mind images are multiple GB). ` + `Fix the deployment: add '-v /var/run/docker.sock:${sock}:ro' and '-e DIND_HOST_PASSTHROUGH_IMAGES="konard/hive-mind konard/hive-mind-dind"' to the bot container's 'docker run', or seed it now with: ${preload}`);
776
+ } else if (isDind && socketMounted) {
777
+ result.warnings.push(`Docker isolation image '${image}' is NOT in the nested Docker daemon even though the host Docker socket is mounted at ${sock}. ` + `box host-image passthrough may have skipped it (check DIND_HOST_PASSTHROUGH mode, the DIND_HOST_PASSTHROUGH_IMAGES allowlist, and that the host actually has '${image}' with a registry digest). ` + `The first isolated task will pull the full image. Seed it now with: ${preload}`);
778
+ } else {
779
+ result.warnings.push(`Docker isolation image '${image}' is not present locally; the first isolated task will pull it. ` + `If this host already has it under a different tag, pin HIVE_MIND_DOCKER_ISOLATION_IMAGE_TAG, or seed it with: ${preload}`);
780
+ }
781
+
782
+ // Root Cause B of the issue #1914 reopen: too little disk for the pull. The
783
+ // image is well over 30 GB extracted; predict the `no space left on device`
784
+ // failure here rather than hitting it mid-pull.
785
+ if (diskAvailableGiB != null && diskAvailableGiB < DOCKER_ISOLATION_LOW_DISK_GIB) {
786
+ const root = disk?.dataRoot || 'the Docker data root';
787
+ result.warnings.push(`Only ~${diskAvailableGiB.toFixed(0)} GiB free on ${root} and the isolation image '${image}' is not present yet. ` + `The Hive Mind isolation image is well over 30 GB extracted, so the first isolated task's pull may fail with 'no space left on device' (issue #1914). ` + `Seed it via host passthrough (mount the host docker socket) or with '${preload}', and free space on the Docker data root.`);
788
+ }
789
+ }
790
+
791
+ if (imagePresent) {
792
+ info(`✅ Docker isolation image '${image}' is already present locally — isolated tasks reuse it (no multi-GB pull). See issue #1914.`);
676
793
  }
677
794
  for (const w of result.warnings) warn(`⚠️ ${w}`);
678
795
  return result;
@@ -870,6 +870,13 @@ export const getMergeBlockers = async (owner, repo, prNumber, verbose = false, c
870
870
  });
871
871
  }
872
872
  }
873
+ } else if (ciStatus.status === 'terminal_github_entity_error') {
874
+ blockers.push({
875
+ type: 'terminal_github_entity_error',
876
+ message: ciStatus.error || 'GitHub repository, pull request, issue, or branch is no longer accessible',
877
+ details: [],
878
+ });
879
+ return { blockers, ciStatus, noCiConfigured: false, noCiTriggered: false, noWorkflowRunsForCommit };
873
880
  } else if (ciStatus.status === 'unknown') {
874
881
  // Unable to determine CI status - treat as pending to be safe
875
882
  // Do NOT treat as mergeable (which would be incorrect)
@@ -882,6 +889,15 @@ export const getMergeBlockers = async (owner, repo, prNumber, verbose = false, c
882
889
 
883
890
  // Check mergeability
884
891
  const mergeStatus = await checkPRMergeable(owner, repo, prNumber, verbose);
892
+ if (mergeStatus.terminal) {
893
+ blockers.push({
894
+ type: 'terminal_github_entity_error',
895
+ message: mergeStatus.reason || 'GitHub repository, pull request, issue, or branch is no longer accessible',
896
+ details: [],
897
+ });
898
+ return { blockers, ciStatus, noCiConfigured: false, noCiTriggered: false, noWorkflowRunsForCommit };
899
+ }
900
+
885
901
  if (!mergeStatus.mergeable) {
886
902
  blockers.push({
887
903
  type: 'not_mergeable',
@@ -43,7 +43,12 @@ const { sanitizeLogContent, attachLogToGitHub } = githubLib;
43
43
 
44
44
  // Import shared utilities from the restart-shared module
45
45
  const restartShared = await import('./solve.restart-shared.lib.mjs');
46
- const { checkPRMerged, checkPRClosed, checkForUncommittedChanges, getUncommittedChangesDetails, executeToolIteration, buildAutoRestartInstructions, isUsageLimitReached } = restartShared;
46
+ const { checkForUncommittedChanges, getUncommittedChangesDetails, executeToolIteration, buildAutoRestartInstructions, isUsageLimitReached } = restartShared;
47
+
48
+ // Issue #1931: deleted/inaccessible repositories, PRs, issues, and branches
49
+ // are terminal states for long-running watch loops, not retryable CI states.
50
+ const terminalStateLib = await import('./github-terminal-state.lib.mjs');
51
+ const { checkGitHubTerminalState } = terminalStateLib;
47
52
 
48
53
  // Import validation functions for time parsing (used for usage limit wait)
49
54
  const validation = await import('./solve.validation.lib.mjs');
@@ -155,24 +160,30 @@ export const watchUntilMergeable = async params => {
155
160
  iteration++;
156
161
  const currentTime = new Date();
157
162
 
158
- // Check if PR is merged
159
- const isMerged = await checkPRMerged(owner, repo, prNumber);
160
- if (isMerged) {
163
+ const terminalState = await checkGitHubTerminalState({
164
+ owner,
165
+ repo,
166
+ issueNumber,
167
+ prNumber,
168
+ sourceBranchName: prBranch || branchName,
169
+ commandRunner: $,
170
+ });
171
+ if (terminalState.terminal && terminalState.success) {
161
172
  await log('');
162
173
  await log(formatAligned('🎉', 'PR MERGED!', 'Stopping auto-restart-until-mergeable mode'));
163
174
  await log(formatAligned('', 'Pull request:', `#${prNumber} has been merged`, 2));
164
175
  await log('');
165
176
  return { success: true, reason: 'merged', latestSessionId, latestAnthropicCost };
166
177
  }
167
-
168
- // Check if PR is closed (not merged)
169
- const isClosed = await checkPRClosed(owner, repo, prNumber);
170
- if (isClosed) {
178
+ if (terminalState.terminal) {
171
179
  await log('');
172
- await log(formatAligned('🚫', 'PR CLOSED!', 'Stopping auto-restart-until-mergeable mode'));
173
- await log(formatAligned('', 'Pull request:', `#${prNumber} has been closed without merging`, 2));
180
+ await log(formatAligned('', 'GITHUB TARGET UNAVAILABLE:', terminalState.message, 2), { level: 'error' });
181
+ for (const detail of terminalState.details || []) {
182
+ await log(formatAligned('', 'Detail:', detail, 4), { level: 'error' });
183
+ }
184
+ await log(formatAligned('', 'Action:', 'Stopping auto-restart-until-mergeable mode', 2), { level: 'error' });
174
185
  await log('');
175
- return { success: false, reason: 'closed', latestSessionId, latestAnthropicCost };
186
+ return { success: false, reason: terminalState.reason, latestSessionId, latestAnthropicCost };
176
187
  }
177
188
 
178
189
  await log(formatAligned('🔍', `Check #${iteration}:`, currentTime.toLocaleTimeString()));
@@ -205,6 +216,18 @@ export const watchUntilMergeable = async params => {
205
216
  // Get merge blockers
206
217
  const { blockers, noCiConfigured, noCiTriggered, workflowRunConclusions, ciStatus, noWorkflowRunsForCommit } = await getMergeBlockers(owner, repo, prNumber, argv.verbose, consecutiveNoRunsChecks, prBranch);
207
218
 
219
+ const terminalGitHubBlocker = blockers.find(b => b.type === 'terminal_github_entity_error');
220
+ if (terminalGitHubBlocker) {
221
+ await log('');
222
+ await log(formatAligned('❌', 'GITHUB TARGET UNAVAILABLE:', terminalGitHubBlocker.message, 2), { level: 'error' });
223
+ for (const detail of terminalGitHubBlocker.details || []) {
224
+ await log(formatAligned('', 'Detail:', detail, 4), { level: 'error' });
225
+ }
226
+ await log(formatAligned('', 'Action:', 'Stopping auto-restart-until-mergeable mode', 2), { level: 'error' });
227
+ await log('');
228
+ return { success: false, reason: 'terminal_github_entity_error', latestSessionId, latestAnthropicCost };
229
+ }
230
+
208
231
  // Issue #1503/#1918: Reset counter when CI checks exist (safety valve only for
209
232
  // consecutive "no runs"). Issue #1918: do NOT reset while getMergeBlockers is still
210
233
  // waiting for PR-triggered workflow runs to register (noWorkflowRunsForCommit). A
@@ -1201,6 +1224,25 @@ export const attemptAutoMerge = async params => {
1201
1224
  await log('');
1202
1225
  await log(formatAligned('🔀', 'AUTO-MERGE:', 'Checking if PR can be merged...'));
1203
1226
 
1227
+ const terminalState = await checkGitHubTerminalState({
1228
+ owner,
1229
+ repo,
1230
+ issueNumber,
1231
+ prNumber,
1232
+ commandRunner: $,
1233
+ });
1234
+ if (terminalState.terminal) {
1235
+ if (terminalState.success) {
1236
+ await log(formatAligned('🎉', 'PR already merged:', `#${prNumber}`, 2));
1237
+ return { success: true, reason: 'merged' };
1238
+ }
1239
+ await log(formatAligned('❌', 'GITHUB TARGET UNAVAILABLE:', terminalState.message, 2), { level: 'error' });
1240
+ for (const detail of terminalState.details || []) {
1241
+ await log(formatAligned('', 'Detail:', detail, 4), { level: 'error' });
1242
+ }
1243
+ return { success: false, reason: terminalState.reason, error: terminalState.message };
1244
+ }
1245
+
1204
1246
  // Issue #1226: Check merge permissions before attempting
1205
1247
  const { canMerge, permission } = await checkMergePermissions(owner, repo, argv.verbose);
1206
1248
  if (!canMerge) {
@@ -1234,6 +1276,11 @@ export const attemptAutoMerge = async params => {
1234
1276
 
1235
1277
  // Check if PR is mergeable
1236
1278
  const mergeStatus = await checkPRMergeable(owner, repo, prNumber, argv.verbose);
1279
+ if (mergeStatus.terminal) {
1280
+ await log(formatAligned('❌', 'GITHUB TARGET UNAVAILABLE:', mergeStatus.reason || 'GitHub repository, pull request, issue, or branch is no longer accessible', 2), { level: 'error' });
1281
+ return { success: false, reason: 'terminal_github_entity_error', error: mergeStatus.reason };
1282
+ }
1283
+
1237
1284
  if (!mergeStatus.mergeable) {
1238
1285
  await log(formatAligned('⚠️', 'PR not mergeable:', mergeStatus.reason || 'Unknown reason', 2));
1239
1286
  return { success: false, reason: 'not_mergeable', error: mergeStatus.reason };
@@ -39,6 +39,11 @@ const { detectAndCountFeedback } = feedbackLib;
39
39
  const restartShared = await import('./solve.restart-shared.lib.mjs');
40
40
  const { checkPRMerged, checkForUncommittedChanges, getUncommittedChangesDetails, executeToolIteration, buildUncommittedChangesFeedback, isApiError } = restartShared;
41
41
 
42
+ // Issue #1931: deleted/inaccessible repositories, PRs, issues, and branches
43
+ // are terminal states for watch mode, not retryable feedback checks.
44
+ const terminalStateLib = await import('./github-terminal-state.lib.mjs');
45
+ const { checkGitHubTerminalState } = terminalStateLib;
46
+
42
47
  // Issue #1574: Interruptible sleep so CTRL+C is never blocked by a lingering timer
43
48
  const { interruptibleSleep } = await import('./interruptible-sleep.lib.mjs');
44
49
  const { formatAutoIterationLimit, hasReachedAutoIterationLimit, normalizeAutoIterationLimit } = await import('./auto-iteration-limits.lib.mjs');
@@ -111,8 +116,27 @@ export const watchForFeedback = async params => {
111
116
  iteration++;
112
117
  const currentTime = new Date();
113
118
 
119
+ const terminalState = await checkGitHubTerminalState({
120
+ owner,
121
+ repo,
122
+ issueNumber,
123
+ prNumber,
124
+ sourceBranchName: prBranch || branchName,
125
+ commandRunner: $,
126
+ });
127
+ if (terminalState.terminal && !terminalState.success) {
128
+ await log('');
129
+ await log(formatAligned('❌', 'GITHUB TARGET UNAVAILABLE:', terminalState.message, 2), { level: 'error' });
130
+ for (const detail of terminalState.details || []) {
131
+ await log(formatAligned('', 'Detail:', detail, 4), { level: 'error' });
132
+ }
133
+ await log(formatAligned('', 'Action:', 'Stopping watch mode', 2), { level: 'error' });
134
+ await log('');
135
+ break;
136
+ }
137
+
114
138
  // Check if PR is merged
115
- const isMerged = await checkPRMerged(owner, repo, prNumber);
139
+ const isMerged = terminalState.terminal && terminalState.success ? true : await checkPRMerged(owner, repo, prNumber);
116
140
  if (isMerged) {
117
141
  await log('');
118
142
  await log(formatAligned('🎉', 'PR MERGED!', 'Stopping watch mode'));
@@ -12,8 +12,6 @@ if (typeof use === 'undefined') {
12
12
  }
13
13
 
14
14
  const { lino } = await import('./lino.lib.mjs');
15
- const { buildUserMention } = await import('./buildUserMention.lib.mjs');
16
- const { reportError, initializeSentry, addBreadcrumb } = await import('./sentry.lib.mjs');
17
15
  const { loadLenvConfig } = await import('./lenv-reader.lib.mjs');
18
16
  const { getLinoYargsFactory, getenv, hideBin } = await import('./cli-arguments.lib.mjs');
19
17
 
@@ -27,26 +25,8 @@ await loadLenvConfig({ override: true, quiet: true });
27
25
  const yargs = getLinoYargsFactory();
28
26
  const { createYargsConfig: createSolveYargsConfig, detectMalformedFlags } = await import('./solve.config.lib.mjs');
29
27
  const { createYargsConfig: createHiveYargsConfig } = await import('./hive.config.lib.mjs');
30
- const { parseGitHubUrl, validateGitHubEntityExistence } = await import('./github.lib.mjs');
31
- const { validateModelName, buildModelOptionDescription } = await import('./models/index.mjs');
32
28
  const { validateBranchInArgs } = await import('./solve.branch.lib.mjs');
33
- const { extractIsolationFromArgs, isValidPerCommandIsolation, resolveIsolation, createIsolationAwareQueueCallback } = await import('./telegram-isolation.lib.mjs');
34
- const limitsLib = await import('./limits.lib.mjs');
35
- const { formatUsageMessage, formatCodexLimitsSection, getAllCachedLimits } = limitsLib;
36
- const { handleShowLimitsFlag, captureStartSnapshotAndAppend } = await import('./telegram-show-limits.lib.mjs'); // #594
37
- const { getVersionInfo, formatVersionMessage } = await import('./version-info.lib.mjs');
38
- const { escapeMarkdown, escapeMarkdownV2, cleanNonPrintableChars, makeSpecialCharsVisible } = await import('./telegram-markdown.lib.mjs');
39
- const { getSolveQueue, createQueueExecuteCallback } = await import('./telegram-solve-queue.lib.mjs');
40
- const { applySolveToolAlias, getFirstParsedPositionalArg, getSolveCommandNameFromText, getSolveToolAliasFromText, moveArgumentToFront, parseArgsWithYargs, parseCommandArgs, SOLVE_COMMAND_NAMES } = await import('./telegram-solve-command.lib.mjs');
41
- const { executeStartScreen: executeStartScreenCommand, buildExecuteAndUpdateMessage } = await import('./telegram-command-execution.lib.mjs');
42
- const { isChatStopped, getChatStopInfo, getStoppedChatRejectMessage, DEFAULT_STOP_REASON } = await import('./telegram-start-stop-command.lib.mjs');
43
- const { isOldMessage: _isOldMessage, isGroupChat: _isGroupChat, isChatAuthorized: _isChatAuthorized, isForwarded: _isForwarded, isForwardedOrReply: _isForwardedOrReply, extractCommandFromText, extractGitHubUrl: _extractGitHubUrl } = await import('./telegram-message-filters.lib.mjs');
44
- const { installTelegramFormattingFallback, isTelegramFormattingError, isTelegramMessageTooLongError, safeEditMessageText, safeReply, TELEGRAM_TEXT_LIMIT } = await import('./telegram-safe-reply.lib.mjs');
45
- const { registerTerminalWatchCommand, startAutoTerminalWatchForSession } = await import('./telegram-terminal-watch-command.lib.mjs');
46
- const { launchBotWithRetry } = await import('./telegram-bot-launcher.lib.mjs');
47
- const { trackSession, startSessionMonitoring, hasActiveSessionForUrlAsync, findStoppableSessionByUrl } = await import('./session-monitor.lib.mjs');
48
- const { formatExecutingWorkSessionMessage, formatStartingWorkSessionMessage } = await import('./work-session-formatting.lib.mjs');
49
- const { buildTelegramHelpMessage, buildTelegramInfoBlock, buildSolveQueuedMessage } = await import('./telegram-ui-messages.lib.mjs');
29
+ const { extractIsolationFromArgs, isValidPerCommandIsolation } = await import('./telegram-isolation.lib.mjs');
50
30
 
51
31
  const config = yargs(hideBin(process.argv))
52
32
  .usage('Usage: hive-telegram-bot [options]')
@@ -179,11 +159,16 @@ if (ISOLATION_BACKEND) {
179
159
  process.exit(1);
180
160
  }
181
161
  console.log(`🔒 Isolation mode enabled: ${ISOLATION_BACKEND} (experimental)`);
182
- isolationRunner = await import('./isolation-runner.lib.mjs');
162
+ // Dry-run mode validates configuration and exits before any command can be
163
+ // executed, so avoid loading start-command/command-stream and their optional
164
+ // native dependencies on parser-only runs.
165
+ if (!config.dryRun) {
166
+ isolationRunner = await import('./isolation-runner.lib.mjs');
167
+ }
183
168
  // For docker isolation, run a startup preflight so a missing/un-passed-through
184
169
  // image surfaces as a loud, actionable warning instead of a surprise multi-GB
185
170
  // pull on the first isolated task (issues #1914, #1879). Never throws.
186
- if (ISOLATION_BACKEND === 'docker' && typeof isolationRunner.preflightDockerIsolation === 'function') {
171
+ if (!config.dryRun && ISOLATION_BACKEND === 'docker' && typeof isolationRunner.preflightDockerIsolation === 'function') {
187
172
  try {
188
173
  await isolationRunner.preflightDockerIsolation({ verbose: VERBOSE });
189
174
  } catch (preflightError) {
@@ -314,6 +299,28 @@ if (config.dryRun) {
314
299
  // === HEAVY DEPENDENCIES LOADED BELOW (skipped in dry-run mode) ===
315
300
  // These imports are after dry-run check to speed up config validation. Telegraf can take 3-8s to load on cold start (issue #801).
316
301
 
302
+ const { buildUserMention } = await import('./buildUserMention.lib.mjs');
303
+ const { reportError, initializeSentry, addBreadcrumb } = await import('./sentry.lib.mjs');
304
+ const { parseGitHubUrl, validateGitHubEntityExistence } = await import('./github.lib.mjs');
305
+ const { validateModelName, buildModelOptionDescription } = await import('./models/index.mjs');
306
+ const { resolveIsolation, createIsolationAwareQueueCallback } = await import('./telegram-isolation.lib.mjs');
307
+ const limitsLib = await import('./limits.lib.mjs');
308
+ const { formatUsageMessage, formatCodexLimitsSection, getAllCachedLimits } = limitsLib;
309
+ const { handleShowLimitsFlag, captureStartSnapshotAndAppend } = await import('./telegram-show-limits.lib.mjs'); // #594
310
+ const { getVersionInfo, formatVersionMessage } = await import('./version-info.lib.mjs');
311
+ const { escapeMarkdown, escapeMarkdownV2, cleanNonPrintableChars, makeSpecialCharsVisible } = await import('./telegram-markdown.lib.mjs');
312
+ const { getSolveQueue, createQueueExecuteCallback } = await import('./telegram-solve-queue.lib.mjs');
313
+ const { applySolveToolAlias, getFirstParsedPositionalArg, getSolveCommandNameFromText, getSolveToolAliasFromText, moveArgumentToFront, parseArgsWithYargs, parseCommandArgs, SOLVE_COMMAND_NAMES } = await import('./telegram-solve-command.lib.mjs');
314
+ const { executeStartScreen: executeStartScreenCommand, buildExecuteAndUpdateMessage } = await import('./telegram-command-execution.lib.mjs');
315
+ const { isChatStopped, getChatStopInfo, getStoppedChatRejectMessage, DEFAULT_STOP_REASON } = await import('./telegram-start-stop-command.lib.mjs');
316
+ const { isOldMessage: _isOldMessage, isGroupChat: _isGroupChat, isChatAuthorized: _isChatAuthorized, isForwarded: _isForwarded, isForwardedOrReply: _isForwardedOrReply, extractCommandFromText, extractGitHubUrl: _extractGitHubUrl } = await import('./telegram-message-filters.lib.mjs');
317
+ const { installTelegramFormattingFallback, isTelegramFormattingError, isTelegramMessageTooLongError, safeEditMessageText, safeReply, TELEGRAM_TEXT_LIMIT } = await import('./telegram-safe-reply.lib.mjs');
318
+ const { registerTerminalWatchCommand, startAutoTerminalWatchForSession } = await import('./telegram-terminal-watch-command.lib.mjs');
319
+ const { launchBotWithRetry } = await import('./telegram-bot-launcher.lib.mjs');
320
+ const { trackSession, startSessionMonitoring, hasActiveSessionForUrlAsync, findStoppableSessionByUrl } = await import('./session-monitor.lib.mjs');
321
+ const { formatExecutingWorkSessionMessage, formatStartingWorkSessionMessage } = await import('./work-session-formatting.lib.mjs');
322
+ const { buildTelegramHelpMessage, buildTelegramInfoBlock, buildSolveQueuedMessage } = await import('./telegram-ui-messages.lib.mjs');
323
+
317
324
  // Initialize Sentry for error tracking
318
325
  await initializeSentry({
319
326
  debug: VERBOSE,
@@ -432,6 +432,14 @@ export class MergeQueueProcessor {
432
432
  item.status = MergeItemStatus.CHECKING_CI;
433
433
  const mergeableCheck = await checkPRMergeable(this.owner, this.repo, item.pr.number, this.verbose);
434
434
 
435
+ if (mergeableCheck.terminal) {
436
+ item.status = MergeItemStatus.FAILED;
437
+ item.error = mergeableCheck.reason || 'GitHub repository, pull request, issue, or branch is no longer accessible';
438
+ this.stats.failed++;
439
+ this.log(`Failed PR #${item.pr.number}: ${item.error}`);
440
+ return;
441
+ }
442
+
435
443
  if (!mergeableCheck.mergeable) {
436
444
  item.status = MergeItemStatus.SKIPPED;
437
445
  item.error = mergeableCheck.reason;
@@ -452,6 +460,14 @@ export class MergeQueueProcessor {
452
460
  return;
453
461
  }
454
462
 
463
+ if (ciStatus.status === 'terminal_github_entity_error') {
464
+ item.status = MergeItemStatus.FAILED;
465
+ item.error = ciStatus.error || 'GitHub repository, pull request, issue, or branch is no longer accessible';
466
+ this.stats.failed++;
467
+ this.log(`Failed PR #${item.pr.number}: ${item.error}`);
468
+ return;
469
+ }
470
+
455
471
  // Step 3: Wait for CI if pending
456
472
  if (ciStatus.status === 'pending') {
457
473
  item.status = MergeItemStatus.WAITING_CI;
@@ -1,6 +1,19 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- const defaultFetchUseMCode = async () => (await fetch('https://unpkg.com/use-m/use.js')).text();
3
+ export const USE_M_BOOTSTRAP_URL = 'https://unpkg.com/use-m/use.js';
4
+ export const USE_M_FALLBACK_BOOTSTRAP_URL = 'https://unpkg.com/use-m@8.13.8/use.js';
5
+
6
+ const fetchUseMCodeFromUrl = async url => {
7
+ const response = await fetch(url);
8
+ const code = await response.text();
9
+ if (!response.ok || /^Not found:/i.test(code.trim())) {
10
+ throw new Error(`failed to load use-m bootstrap from ${url}: ${response.status} ${response.statusText}`);
11
+ }
12
+ return code;
13
+ };
14
+
15
+ const defaultFetchUseMCode = async () => fetchUseMCodeFromUrl(USE_M_BOOTSTRAP_URL);
16
+ const fallbackFetchUseMCode = async () => fetchUseMCodeFromUrl(USE_M_FALLBACK_BOOTSTRAP_URL);
4
17
 
5
18
  /**
6
19
  * Load the shared use-m bootstrap.
@@ -10,9 +23,14 @@ const defaultFetchUseMCode = async () => (await fetch('https://unpkg.com/use-m/u
10
23
  * @returns {Promise<Function>} The global use-m `use` function.
11
24
  */
12
25
  export const ensureUseM = async (options = {}) => {
13
- const { fetchUseMCode = defaultFetchUseMCode } = options;
26
+ const { fetchUseMCode = defaultFetchUseMCode, log = null } = options;
14
27
  if (typeof globalThis.use === 'undefined') {
15
- globalThis.use = (await eval(await fetchUseMCode())).use;
28
+ try {
29
+ globalThis.use = (await eval(await fetchUseMCode())).use;
30
+ } catch (error) {
31
+ if (typeof log === 'function') log(` use-m latest bootstrap failed (${error.message}); trying ${USE_M_FALLBACK_BOOTSTRAP_URL}`);
32
+ globalThis.use = (await eval(await fallbackFetchUseMCode())).use;
33
+ }
16
34
  }
17
35
  return globalThis.use;
18
36
  };