@link-assistant/hive-mind 1.65.1 → 1.65.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.65.2
4
+
5
+ ### Patch Changes
6
+
7
+ - 0214c9e: Retry transient 5xx/network errors across all `gh` exec sites. Previously a single 504 from the GitHub GraphQL endpoint could abort `solve` during `gh pr create`. The retry helper now handles HTTP 502/503/504, socket hang up, ECONNRESET, ETIMEDOUT, and TLS handshake timeouts in addition to rate-limit errors, with a separate retry budget and exponential backoff. All direct `execAsync('gh ...')` sites are routed through `execGhWithRetry`.
8
+
3
9
  ## 1.65.1
4
10
 
5
11
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.65.1",
3
+ "version": "1.65.2",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -169,46 +169,98 @@ const sleepWithCountdown = async (ms, log) => {
169
169
  }
170
170
  };
171
171
 
172
+ /**
173
+ * Patterns matched against an error's combined message/stderr/stdout to decide
174
+ * whether the failure is a transient network/edge fault that deserves a retry.
175
+ * Mirrors `isTransientNetworkError` in `src/lib.mjs` (issue #1536); duplicated
176
+ * here to avoid a circular import — `lib.mjs` already imports from this file.
177
+ *
178
+ * Issue #1756: `gh pr create` failed with `HTTP 504: 504 Gateway Timeout
179
+ * (https://api.github.com/graphql)`. `execGhWithRetry`/`ghWithRateLimitRetry`
180
+ * only handled rate-limit errors before — a single 504 was fatal.
181
+ */
182
+ const TRANSIENT_NETWORK_PATTERNS = ['i/o timeout', 'dial tcp', 'connection refused', 'connection reset', 'econnreset', 'etimedout', 'enotfound', 'ehostunreach', 'enetunreach', 'network is unreachable', 'temporary failure', 'http 502', 'http 503', 'http 504', 'bad gateway', 'service unavailable', 'gateway timeout', 'tls handshake timeout', 'ssl_error', 'socket hang up', 'unexpected eof'];
183
+
184
+ const isTransientNetworkError = error => {
185
+ const text = collectErrorText(error).toLowerCase();
186
+ if (!text) return false;
187
+ return TRANSIENT_NETWORK_PATTERNS.some(pattern => text.includes(pattern));
188
+ };
189
+
172
190
  /**
173
191
  * Wrap `fn` so that GitHub rate-limit errors are converted into a sleep until
174
- * (resetTime + bufferMs + jitterMs) followed by a retry. Non-rate-limit errors
175
- * are rethrown immediately so we don't mask programming bugs or 404s.
192
+ * (resetTime + bufferMs + jitterMs) followed by a retry. Transient network
193
+ * errors (504/502/503, socket hang up, TLS timeouts) get exponential backoff
194
+ * and a separate retry budget. Other errors are rethrown immediately so we
195
+ * don't mask programming bugs or 404s.
196
+ *
197
+ * Issue #1726 — rate-limit retry. Issue #1756 — transient network retry.
176
198
  *
177
199
  * @template T
178
200
  * @param {() => Promise<T>} fn
179
201
  * @param {object} [options]
180
202
  * @param {number} [options.maxAttempts] - hard cap on rate-limit retries (default `retryLimits.maxApiRetries`).
203
+ * @param {number} [options.transientMaxAttempts] - hard cap on transient network retries (default `retryLimits.maxApiRetries`).
204
+ * @param {number} [options.transientDelay] - initial transient retry delay in ms (default 1000).
205
+ * @param {number} [options.transientBackoff] - backoff multiplier for transient retries (default 2).
181
206
  * @param {string} [options.label] - prefix for log messages.
182
207
  * @param {(msg: string) => Promise<void>|void} [options.log] - logger. Defaults to console.warn.
183
208
  * @returns {Promise<T>}
184
209
  */
185
210
  export const ghWithRateLimitRetry = async (fn, options = {}) => {
186
211
  const maxAttempts = options.maxAttempts ?? retryLimits.maxApiRetries;
212
+ const transientMaxAttempts = options.transientMaxAttempts ?? retryLimits.maxApiRetries;
213
+ const transientDelay = options.transientDelay ?? 1000;
214
+ const transientBackoff = options.transientBackoff ?? 2;
187
215
  const label = options.label || 'gh';
188
216
  const log = options.log || (msg => console.warn(msg));
189
217
 
218
+ // Two independent retry budgets — a long string of rate-limit responses
219
+ // shouldn't burn the transient-error retries, and vice versa.
220
+ let rateLimitAttempts = 0;
221
+ let transientAttempts = 0;
190
222
  let lastError;
191
- for (let attempt = 1; attempt <= maxAttempts; attempt++) {
223
+ // Hard cap so a permanently broken endpoint can't loop forever — sum of
224
+ // both budgets plus a safety margin.
225
+ const hardCap = maxAttempts + transientMaxAttempts + 1;
226
+
227
+ for (let i = 0; i < hardCap; i++) {
192
228
  try {
193
229
  return await fn();
194
230
  } catch (error) {
195
231
  lastError = error;
196
- if (!isRateLimitError(error)) throw error;
197
232
 
198
- if (attempt === maxAttempts) {
199
- await Promise.resolve(log(`❌ ${label}: rate limit still active after ${attempt} attempts; giving up.`));
200
- throw error;
233
+ if (isRateLimitError(error)) {
234
+ rateLimitAttempts++;
235
+ if (rateLimitAttempts >= maxAttempts) {
236
+ await Promise.resolve(log(`❌ ${label}: rate limit still active after ${rateLimitAttempts} attempts; giving up.`));
237
+ throw error;
238
+ }
239
+ const reset = parseRateLimitReset(error) || (await fetchNextRateLimitReset());
240
+ const { waitMs, deadline, bufferMs, jitterMs } = computeRateLimitWait(reset);
241
+ const waitMinutes = Math.round(waitMs / 60_000);
242
+ const resetSummary = reset ? `reset at ${reset.toISOString()}` : 'reset time unknown (using buffer + jitter only)';
243
+ await Promise.resolve(log(`⏳ ${label}: GitHub API rate limit hit (attempt ${rateLimitAttempts}/${maxAttempts}). Waiting ${waitMinutes} min (${resetSummary}; buffer ${Math.round(bufferMs / 60_000)} min + jitter ${Math.round(jitterMs / 1000)}s) until ${deadline.toISOString()}.`));
244
+ await sleepWithCountdown(waitMs, log);
245
+ continue;
201
246
  }
202
247
 
203
- const reset = parseRateLimitReset(error) || (await fetchNextRateLimitReset());
204
- const { waitMs, deadline, bufferMs, jitterMs } = computeRateLimitWait(reset);
205
- const waitMinutes = Math.round(waitMs / 60_000);
206
- const resetSummary = reset ? `reset at ${reset.toISOString()}` : 'reset time unknown (using buffer + jitter only)';
207
- await Promise.resolve(log(`⏳ ${label}: GitHub API rate limit hit (attempt ${attempt}/${maxAttempts}). Waiting ${waitMinutes} min (${resetSummary}; buffer ${Math.round(bufferMs / 60_000)} min + jitter ${Math.round(jitterMs / 1000)}s) until ${deadline.toISOString()}.`));
208
- await sleepWithCountdown(waitMs, log);
248
+ if (isTransientNetworkError(error)) {
249
+ transientAttempts++;
250
+ if (transientAttempts >= transientMaxAttempts) {
251
+ await Promise.resolve(log(`❌ ${label}: transient network error persisted after ${transientAttempts} attempts; giving up.`));
252
+ throw error;
253
+ }
254
+ const waitMs = transientDelay * Math.pow(transientBackoff, transientAttempts - 1);
255
+ await Promise.resolve(log(`⚠️ ${label}: transient network error (attempt ${transientAttempts}/${transientMaxAttempts}), retrying in ${Math.round(waitMs / 1000)}s...`));
256
+ await sleepWithCountdown(waitMs, log);
257
+ continue;
258
+ }
259
+
260
+ throw error;
209
261
  }
210
262
  }
211
- // Unreachable — loop either returns or throws.
263
+ // Unreachable — loop either returns or throws via the budgets above.
212
264
  throw lastError;
213
265
  };
214
266
 
@@ -265,8 +317,11 @@ export const wrapDollarWithGhRetry = (dollar, options = {}) => {
265
317
  return wrapped;
266
318
  };
267
319
 
320
+ export { isTransientNetworkError };
321
+
268
322
  export default {
269
323
  isRateLimitError,
324
+ isTransientNetworkError,
270
325
  parseRateLimitReset,
271
326
  fetchNextRateLimitReset,
272
327
  computeRateLimitWait,
@@ -11,7 +11,7 @@ if (typeof globalThis.use === 'undefined') {
11
11
  import { log, cleanErrorMessage } from './lib.mjs';
12
12
  import { githubLimits, timeouts } from './config.lib.mjs';
13
13
 
14
- import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): gh API calls flow through $ wrapped by caller
14
+ import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry, execGhWithRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): gh API calls flow through $ wrapped by caller. execGhWithRetry adds transient-network retry (#1756).
15
15
  /**
16
16
  * Check if a PR body/title indicates it fixes/closes/resolves a specific issue number
17
17
  * GitHub auto-closes issues when PR body contains keywords like "fixes #123", "closes #123", "resolves #123"
@@ -124,14 +124,14 @@ export async function batchCheckPullRequestsForIssues(owner, repo, issueNumbers)
124
124
  await new Promise(resolve => setTimeout(resolve, timeouts.githubRepoDelay));
125
125
  }
126
126
 
127
- // Execute GraphQL query
128
- const { exec } = await import('child_process');
129
- const { promisify } = await import('util');
130
- const execAsync = promisify(exec);
131
- const { stdout } = await execAsync(`gh api graphql -f query='${query}'`, {
132
- encoding: 'utf8',
133
- maxBuffer: githubLimits.bufferMaxSize,
134
- env: process.env,
127
+ // Execute GraphQL query (#1756: route through execGhWithRetry for transient 5xx + rate-limit)
128
+ const { stdout } = await execGhWithRetry(`gh api graphql -f query='${query}'`, {
129
+ execOptions: {
130
+ encoding: 'utf8',
131
+ maxBuffer: githubLimits.bufferMaxSize,
132
+ env: process.env,
133
+ },
134
+ label: 'gh api graphql (batch PR check)',
135
135
  });
136
136
 
137
137
  const data = JSON.parse(stdout);
@@ -191,12 +191,13 @@ export async function batchCheckPullRequestsForIssues(owner, repo, issueNumbers)
191
191
 
192
192
  for (const issueNum of batch) {
193
193
  try {
194
- const { exec } = await import('child_process');
195
- const { promisify } = await import('util');
196
- const execAsync = promisify(exec);
197
194
  const cmd = `gh api repos/${owner}/${repo}/issues/${issueNum}/timeline --paginate --jq '[.[] | select(.event == "cross-referenced" and .source.issue.pull_request != null and .source.issue.state == "open")] | length'`;
198
195
 
199
- const { stdout } = await execAsync(cmd, { encoding: 'utf8', env: process.env });
196
+ // #1756: route REST fallback through execGhWithRetry for transient 5xx + rate-limit
197
+ const { stdout } = await execGhWithRetry(cmd, {
198
+ execOptions: { encoding: 'utf8', env: process.env },
199
+ label: `gh api timeline (issue #${issueNum})`,
200
+ });
200
201
  const openPrCount = parseInt(stdout.trim()) || 0;
201
202
 
202
203
  results[issueNum] = {
@@ -271,14 +272,14 @@ export async function batchCheckArchivedRepositories(repositories) {
271
272
  await new Promise(resolve => setTimeout(resolve, timeouts.githubRepoDelay));
272
273
  }
273
274
 
274
- // Execute GraphQL query
275
- const { exec } = await import('child_process');
276
- const { promisify } = await import('util');
277
- const execAsync = promisify(exec);
278
- const { stdout } = await execAsync(`gh api graphql -f query='${query}'`, {
279
- encoding: 'utf8',
280
- maxBuffer: githubLimits.bufferMaxSize,
281
- env: process.env,
275
+ // Execute GraphQL query (#1756: route through execGhWithRetry for transient 5xx + rate-limit)
276
+ const { stdout } = await execGhWithRetry(`gh api graphql -f query='${query}'`, {
277
+ execOptions: {
278
+ encoding: 'utf8',
279
+ maxBuffer: githubLimits.bufferMaxSize,
280
+ env: process.env,
281
+ },
282
+ label: 'gh api graphql (batch archived check)',
282
283
  });
283
284
 
284
285
  const data = JSON.parse(stdout);
@@ -301,12 +302,13 @@ export async function batchCheckArchivedRepositories(repositories) {
301
302
 
302
303
  for (const repo of batch) {
303
304
  try {
304
- const { exec } = await import('child_process');
305
- const { promisify } = await import('util');
306
- const execAsync = promisify(exec);
307
305
  const cmd = `gh api repos/${repo.owner}/${repo.name} --jq .archived`;
308
306
 
309
- const { stdout } = await execAsync(cmd, { encoding: 'utf8', env: process.env });
307
+ // #1756: route REST fallback through execGhWithRetry for transient 5xx + rate-limit
308
+ const { stdout } = await execGhWithRetry(cmd, {
309
+ execOptions: { encoding: 'utf8', env: process.env },
310
+ label: `gh api repos (${repo.owner}/${repo.name})`,
311
+ });
310
312
  const isArchived = stdout.trim() === 'true';
311
313
 
312
314
  const repoKey = `${repo.owner}/${repo.name}`;
@@ -3,6 +3,8 @@
3
3
  * This module provides functions to fetch issues using GitHub's GraphQL API
4
4
  */
5
5
 
6
+ import { execGhWithRetry } from './github-rate-limit.lib.mjs'; // #1756: route gh exec through transient + rate-limit retry wrapper
7
+
6
8
  /**
7
9
  * Fetch issues from a single repository with pagination support for >100 issues
8
10
  * @param {string} owner - Repository owner
@@ -13,9 +15,6 @@
13
15
  * @returns {Promise<Array>} Array of issues
14
16
  */
15
17
  async function fetchRepositoryIssuesWithPagination(owner, repoName, log, cleanErrorMessage, issueLimit = 100) {
16
- const { exec } = await import('child_process');
17
- const { promisify } = await import('util');
18
- const execAsync = promisify(exec);
19
18
  const allIssues = [];
20
19
  let hasNextPage = true;
21
20
  let cursor = null;
@@ -59,7 +58,10 @@ async function fetchRepositoryIssuesWithPagination(owner, repoName, log, cleanEr
59
58
  // Add delay for rate limiting
60
59
  await new Promise(resolve => setTimeout(resolve, 1000));
61
60
 
62
- const { stdout } = await execAsync(graphqlCmd, { encoding: 'utf8', env: process.env });
61
+ const { stdout } = await execGhWithRetry(graphqlCmd, {
62
+ execOptions: { encoding: 'utf8', env: process.env },
63
+ label: `gh api graphql (issues page ${pageNum} of ${owner}/${repoName})`,
64
+ });
63
65
  const data = JSON.parse(stdout);
64
66
  const issuesData = data.data.repository.issues;
65
67
 
@@ -95,10 +97,6 @@ async function fetchRepositoryIssuesWithPagination(owner, repoName, log, cleanEr
95
97
  * @returns {Promise<{success: boolean, issues: Array, repoCount: number}>}
96
98
  */
97
99
  export async function tryFetchIssuesWithGraphQL(owner, scope, log, cleanErrorMessage, repoLimit = 100, issueLimit = 100) {
98
- const { exec } = await import('child_process');
99
- const { promisify } = await import('util');
100
- const execAsync = promisify(exec);
101
-
102
100
  try {
103
101
  await log(' 🧪 Attempting GraphQL approach with pagination support...', { verbose: true });
104
102
 
@@ -174,7 +172,10 @@ export async function tryFetchIssuesWithGraphQL(owner, scope, log, cleanErrorMes
174
172
  // Add delay for rate limiting
175
173
  await new Promise(resolve => setTimeout(resolve, 2000));
176
174
 
177
- const { stdout } = await execAsync(graphqlCmd, { encoding: 'utf8', env: process.env });
175
+ const { stdout } = await execGhWithRetry(graphqlCmd, {
176
+ execOptions: { encoding: 'utf8', env: process.env },
177
+ label: `gh api graphql (repos page ${repoPageNum} of ${owner})`,
178
+ });
178
179
  const data = JSON.parse(stdout);
179
180
  const repos = isOrg ? data.data.organization.repositories : data.data.user.repositories;
180
181
 
@@ -16,6 +16,8 @@ export { getToolDisplayName }; // Re-export for use by other modules
16
16
  import { buildBudgetStatsString } from './claude.budget-stats.lib.mjs';
17
17
  import { buildCostInfoString } from './github-cost-info.lib.mjs';
18
18
  export { buildCostInfoString };
19
+ // #1756: route gh exec calls through transient + rate-limit retry wrapper
20
+ import { execGhWithRetry } from './github-rate-limit.lib.mjs';
19
21
  // Issue #1625: Named marker constants (single source of truth) + in-memory
20
22
  // tracking for tool-posted comments. See tool-comments.lib.mjs for design.
21
23
  import { SOLUTION_DRAFT_LOG_MARKER, SOLUTION_DRAFT_FAILED_MARKER, SOLUTION_DRAFT_FINISHED_WITH_ERRORS_MARKER, USAGE_LIMIT_REACHED_MARKER, NOW_WORKING_SESSION_IS_ENDED_MARKER, postTrackedComment, postTrackedCommentFromFile } from './tool-comments.lib.mjs';
@@ -858,9 +860,6 @@ export function isRateLimitError(error) {
858
860
  * @returns {Promise<Array>} Array of issues
859
861
  */
860
862
  export async function fetchAllIssuesWithPagination(baseCommand) {
861
- const { exec } = await import('child_process');
862
- const { promisify } = await import('util');
863
- const execAsync = promisify(exec);
864
863
  // Import log and cleanErrorMessage from lib.mjs
865
864
  const { log, cleanErrorMessage } = await import('./lib.mjs');
866
865
  try {
@@ -876,7 +875,11 @@ export async function fetchAllIssuesWithPagination(baseCommand) {
876
875
  const maxPageSize = isSearchCommand ? 100 : 1000;
877
876
  const improvedCommand = `${commandWithoutLimit} --limit ${maxPageSize}`;
878
877
  await log(` 🔎 Executing: ${improvedCommand}`, { verbose: true });
879
- const { stdout } = await execAsync(improvedCommand, { encoding: 'utf8', env: process.env });
878
+ // #1756: use execGhWithRetry so transient 5xx (e.g., 504) auto-retry
879
+ const { stdout } = await execGhWithRetry(improvedCommand, {
880
+ execOptions: { encoding: 'utf8', env: process.env },
881
+ label: 'gh search/list issues (paginated)',
882
+ });
880
883
  const endTime = Date.now();
881
884
  const issues = JSON.parse(stdout || '[]');
882
885
  await log(` ✅ Fetched ${issues.length} issues in ${Math.round((endTime - startTime) / 1000)}s`);
@@ -913,7 +916,11 @@ export async function fetchAllIssuesWithPagination(baseCommand) {
913
916
  await log(' 🔄 Falling back to default behavior...', { verbose: true });
914
917
  const fallbackCommand = baseCommand.includes('--limit') ? baseCommand : `${baseCommand} --limit 100`;
915
918
  await new Promise(resolve => setTimeout(resolve, timeouts.githubRepoDelay)); // Shorter delay for fallback
916
- const { stdout } = await execAsync(fallbackCommand, { encoding: 'utf8', env: process.env });
919
+ // #1756: use execGhWithRetry on fallback too
920
+ const { stdout } = await execGhWithRetry(fallbackCommand, {
921
+ execOptions: { encoding: 'utf8', env: process.env },
922
+ label: 'gh search/list issues (fallback)',
923
+ });
917
924
  const issues = JSON.parse(stdout || '[]');
918
925
  await log(` ⚠️ Fallback: fetched ${issues.length} issues (limited to 100)`, { level: 'warning' });
919
926
  return issues;
package/src/hive.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  // Import Sentry instrumentation first (must be before other imports)
3
3
  import './instrument.mjs';
4
- import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): gh API calls flow through $ wrapped by caller
4
+ import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry, execGhWithRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): gh API calls flow through $ wrapped by caller. execGhWithRetry adds transient-network retry (#1756).
5
5
  const earlyArgs = process.argv.slice(2);
6
6
  if (earlyArgs.includes('--version')) {
7
7
  const { getVersion } = await import('./version.lib.mjs');
@@ -112,9 +112,6 @@ if (isRunningDirectly) {
112
112
  * @returns {Promise<Array>} Array of issues
113
113
  */
114
114
  async function fetchIssuesFromRepositories(owner, scope, monitorTag, fetchAllIssues = false) {
115
- const { exec } = await import('child_process');
116
- const { promisify } = await import('util');
117
- const execAsync = promisify(exec);
118
115
  try {
119
116
  await log(` 🔄 Using repository-by-repository fallback for ${scope}: ${owner}`);
120
117
  // Strategy 1: Try GraphQL approach first (faster but has limitations)
@@ -141,7 +138,11 @@ if (isRunningDirectly) {
141
138
 
142
139
  // Add delay for rate limiting
143
140
  await new Promise(resolve => setTimeout(resolve, 2000));
144
- const { stdout: repoOutput } = await execAsync(repoListCmd, { encoding: 'utf8', env: process.env });
141
+ // #1756: route through execGhWithRetry for transient 5xx + rate-limit
142
+ const { stdout: repoOutput } = await execGhWithRetry(repoListCmd, {
143
+ execOptions: { encoding: 'utf8', env: process.env },
144
+ label: `gh api ${scope} repos (paginated)`,
145
+ });
145
146
  // Parse the output line by line, as gh api with --jq outputs one JSON object per line
146
147
  const repoLines = repoOutput
147
148
  .trim()
@@ -12,7 +12,7 @@ import { promisify } from 'node:util';
12
12
  import dayjs from 'dayjs';
13
13
  import utc from 'dayjs/plugin/utc.js';
14
14
 
15
- import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): gh API calls flow through $ wrapped by caller
15
+ import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry, execGhWithRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): gh API calls flow through $ wrapped by caller. execGhWithRetry adds transient-network retry (#1756).
16
16
  // Initialize dayjs plugins
17
17
  dayjs.extend(utc);
18
18
 
@@ -316,7 +316,8 @@ function getDisplayCpuCoresUsed(loadAvg5, cpuCount) {
316
316
  */
317
317
  export async function getGitHubRateLimits(verbose = false) {
318
318
  try {
319
- const { stdout } = await execAsync('gh api rate_limit 2>/dev/null');
319
+ // #1756: route through execGhWithRetry for transient 5xx; skip rate-limit retry budget (this is the endpoint we'd consult to know about rate limits).
320
+ const { stdout } = await execGhWithRetry('gh api rate_limit 2>/dev/null', { label: 'gh api rate_limit', maxAttempts: 1 });
320
321
  const data = JSON.parse(stdout);
321
322
 
322
323
  if (verbose) {
@@ -5,7 +5,7 @@ const { use } = eval(await (await fetch('https://unpkg.com/use-m/use.js')).text(
5
5
 
6
6
  // Use command-stream for consistent $ behavior across runtimes
7
7
  const { $: __rawDollar$ } = await use('command-stream');
8
- const { wrapDollarWithGhRetry } = await import('./github-rate-limit.lib.mjs');
8
+ const { wrapDollarWithGhRetry, execGhWithRetry } = await import('./github-rate-limit.lib.mjs');
9
9
  const $ = wrapDollarWithGhRetry(__rawDollar$);
10
10
  const { getLinoYargsFactory, hideBin, parseCliArgumentsWithLino } = await import('./cli-arguments.lib.mjs');
11
11
  const path = (await use('path')).default;
@@ -378,20 +378,19 @@ async function reviewer(reviewerId) {
378
378
  // Function to check if a PR already has approvals
379
379
  async function hasApprovals(prUrl) {
380
380
  try {
381
- const { exec } = await import('child_process');
382
- const { promisify } = await import('util');
383
- const execAsync = promisify(exec);
384
-
385
381
  // Extract owner, repo, and PR number from URL
386
382
  const urlMatch = prUrl.match(/github\.com\/([^/]+)\/([^/]+)\/pull\/(\d+)/);
387
383
  if (!urlMatch) return false;
388
384
 
389
385
  const [, prOwner, prRepo, prNumber] = urlMatch;
390
386
 
391
- // Check for reviews using GitHub API
387
+ // Check for reviews using GitHub API (#1756: retry on transient 5xx + rate-limit)
392
388
  const cmd = `gh api repos/${prOwner}/${prRepo}/pulls/${prNumber}/reviews --paginate --jq '[.[] | select(.state == "APPROVED")] | length'`;
393
389
 
394
- const { stdout } = await execAsync(cmd, { encoding: 'utf8', env: process.env });
390
+ const { stdout } = await execGhWithRetry(cmd, {
391
+ execOptions: { encoding: 'utf8', env: process.env },
392
+ label: `gh api reviews (PR #${prNumber})`,
393
+ });
395
394
  const approvalCount = parseInt(stdout.trim()) || 0;
396
395
 
397
396
  if (approvalCount > 0) {
@@ -432,25 +431,24 @@ async function fetchPullRequests() {
432
431
 
433
432
  await log(` 🔎 Command: ${searchCmd}`, { verbose: true });
434
433
 
435
- // Use async exec to avoid escaping issues
436
- const { exec } = await import('child_process');
437
- const { promisify } = await import('util');
438
- const execAsync = promisify(exec);
439
- const { stdout } = await execAsync(searchCmd, { encoding: 'utf8', env: process.env });
434
+ // #1756: route through execGhWithRetry to retry transient 5xx + rate-limit
435
+ const { stdout } = await execGhWithRetry(searchCmd, {
436
+ execOptions: { encoding: 'utf8', env: process.env },
437
+ label: 'gh search prs (all PRs)',
438
+ });
440
439
  prs = JSON.parse(stdout || '[]');
441
440
  } else {
442
- // Use label filter
443
- const { exec } = await import('child_process');
444
- const { promisify } = await import('util');
445
- const execAsync = promisify(exec);
446
-
447
441
  // For repositories, use gh pr list which works better
448
442
  if (scope === 'repository') {
449
443
  const listCmd = `gh pr list --repo ${owner}/${repo} --state open --label "${argv.reviewLabel}" --limit 100 --json url,title,number,isDraft`;
450
444
  await log(` 🔎 Command: ${listCmd}`, { verbose: true });
451
445
 
452
446
  try {
453
- const { stdout } = await execAsync(listCmd, { encoding: 'utf8', env: process.env });
447
+ // #1756: retry on transient 5xx + rate-limit
448
+ const { stdout } = await execGhWithRetry(listCmd, {
449
+ execOptions: { encoding: 'utf8', env: process.env },
450
+ label: 'gh pr list (label filter)',
451
+ });
454
452
  prs = JSON.parse(stdout || '[]');
455
453
  } catch (listError) {
456
454
  await log(` ⚠️ List failed: ${listError.message.split('\n')[0]}`, { verbose: true });
@@ -481,7 +479,11 @@ async function fetchPullRequests() {
481
479
  await log(` 🔎 Command: ${searchCmd}`, { verbose: true });
482
480
 
483
481
  try {
484
- const { stdout } = await execAsync(searchCmd, { encoding: 'utf8', env: process.env });
482
+ // #1756: retry on transient 5xx + rate-limit
483
+ const { stdout } = await execGhWithRetry(searchCmd, {
484
+ execOptions: { encoding: 'utf8', env: process.env },
485
+ label: 'gh search prs (label filter)',
486
+ });
485
487
  prs = JSON.parse(stdout || '[]');
486
488
  } catch (searchError) {
487
489
  await log(` ⚠️ Search failed: ${searchError.message.split('\n')[0]}`, { verbose: true });
@@ -6,7 +6,7 @@
6
6
  import { closingIssueNumbersContain, parseClosingIssueNumbers } from './pr-issue-linking.lib.mjs';
7
7
  import { buildPushRejectionExplanation, getRemoteBranchDivergenceSnapshot, synchronizeExistingIssueBranchBeforeAutoPrCreation } from './solve.branch-divergence.lib.mjs';
8
8
 
9
- import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): gh API calls flow through $ wrapped by caller
9
+ import { wrapDollarWithGhRetry as _wrapDollarWithGhRetry, execGhWithRetry } from './github-rate-limit.lib.mjs'; // rate-limit marker (#1726): gh API calls flow through $ wrapped by caller. Issue #1756: execGhWithRetry retries on transient 5xx (504) too.
10
10
 
11
11
  export async function handleAutoPrCreation({ argv, tempDir, branchName, issueNumber, owner, repo, defaultBranch, forkedRepo, isContinueMode, prNumber, log, formatAligned, $, reportError, path, fs }) {
12
12
  // Skip auto-PR creation if:
@@ -903,16 +903,16 @@ Proceed.
903
903
  await log(` Current user: ${currentUser}`, { verbose: true });
904
904
 
905
905
  // Check if user has push access (is a collaborator or owner)
906
- // IMPORTANT: We need to completely suppress the JSON error output
907
- // Using async exec to have full control over stderr
906
+ // IMPORTANT: We need to completely suppress the JSON error output.
907
+ // Issue #1756: route through execGhWithRetry so transient 5xx
908
+ // (504) and rate-limit responses are retried instead of being
909
+ // mistaken for "user is not a collaborator".
908
910
  try {
909
- const { exec } = await import('child_process');
910
- const { promisify } = await import('util');
911
- const execAsync = promisify(exec);
912
911
  // This will throw if user doesn't have access, but won't print anything
913
- await execAsync(`gh api repos/${owner}/${repo}/collaborators/${currentUser} 2>/dev/null`, {
914
- encoding: 'utf8',
915
- env: process.env,
912
+ await execGhWithRetry(`gh api repos/${owner}/${repo}/collaborators/${currentUser} 2>/dev/null`, {
913
+ execOptions: { encoding: 'utf8', env: process.env },
914
+ label: `gh api collaborators (${owner}/${repo}/${currentUser})`,
915
+ log: msg => log(msg, { level: 'warn' }),
916
916
  });
917
917
  canAssign = true;
918
918
  await log(' User has collaborator access', { verbose: true });
@@ -1093,13 +1093,11 @@ ${prBody}`,
1093
1093
  );
1094
1094
  }
1095
1095
 
1096
- // Use async exec for gh pr create to avoid command-stream output issues
1097
- // Similar to how create-test-repo.mjs handles it
1096
+ // Issue #1756: route `gh pr create` through execGhWithRetry so a
1097
+ // single transient 5xx (e.g. `HTTP 504: 504 Gateway Timeout
1098
+ // (https://api.github.com/graphql)`) or rate-limit response retries
1099
+ // instead of aborting the whole solve session.
1098
1100
  try {
1099
- const { exec } = await import('child_process');
1100
- const { promisify } = await import('util');
1101
- const execAsync = promisify(exec);
1102
-
1103
1101
  // Write PR body to temp file to avoid shell escaping issues
1104
1102
  const prBodyFile = `/tmp/pr-body-${Date.now()}.md`;
1105
1103
  await fs.writeFile(prBodyFile, prBody);
@@ -1135,9 +1133,16 @@ ${prBody}`,
1135
1133
  let prCreateStderr = '';
1136
1134
  let assigneeFailed = false;
1137
1135
 
1136
+ const prCreateExecOptions = { encoding: 'utf8', cwd: tempDir, env: process.env };
1137
+ const prCreateRetryLogger = msg => log(msg, { level: 'warn' });
1138
+
1138
1139
  // Try to create PR with assignee first (if specified)
1139
1140
  try {
1140
- const result = await execAsync(command, { encoding: 'utf8', cwd: tempDir, env: process.env });
1141
+ const result = await execGhWithRetry(command, {
1142
+ execOptions: prCreateExecOptions,
1143
+ label: 'gh pr create',
1144
+ log: prCreateRetryLogger,
1145
+ });
1141
1146
  output = result.stdout;
1142
1147
  prCreateStderr = result.stderr || '';
1143
1148
  } catch (firstError) {
@@ -1165,7 +1170,11 @@ ${prBody}`,
1165
1170
  }
1166
1171
 
1167
1172
  // Retry without assignee - if this fails, let the error propagate to outer catch
1168
- const retryResult = await execAsync(command, { encoding: 'utf8', cwd: tempDir, env: process.env });
1173
+ const retryResult = await execGhWithRetry(command, {
1174
+ execOptions: prCreateExecOptions,
1175
+ label: 'gh pr create (no assignee)',
1176
+ log: prCreateRetryLogger,
1177
+ });
1169
1178
  output = retryResult.stdout;
1170
1179
  prCreateStderr = retryResult.stderr || '';
1171
1180
  } else {