bosun 0.33.10 → 0.34.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/.env.example +13 -1
  2. package/README.md +9 -3
  3. package/agent-event-bus.mjs +4 -3
  4. package/agent-pool.mjs +275 -65
  5. package/agent-prompts.mjs +4 -1
  6. package/agent-work-report.mjs +362 -0
  7. package/anomaly-detector.mjs +36 -0
  8. package/cli.mjs +132 -35
  9. package/codex-config.mjs +31 -3
  10. package/codex-shell.mjs +54 -5
  11. package/config-doctor.mjs +3 -3
  12. package/config.mjs +49 -1
  13. package/copilot-shell.mjs +2 -2
  14. package/error-detector.mjs +5 -0
  15. package/github-reconciler.mjs +113 -17
  16. package/kanban-adapter.mjs +308 -47
  17. package/lib/logger.mjs +10 -2
  18. package/maintenance.mjs +134 -17
  19. package/monitor.mjs +527 -73
  20. package/package.json +2 -1
  21. package/preflight.mjs +36 -0
  22. package/primary-agent.mjs +28 -8
  23. package/repo-config.mjs +1 -1
  24. package/shared-state-manager.mjs +14 -2
  25. package/task-executor.mjs +184 -33
  26. package/telegram-bot.mjs +34 -0
  27. package/telegram-sentinel.mjs +27 -2
  28. package/ui/app.js +28 -25
  29. package/ui/components/agent-selector.js +253 -16
  30. package/ui/modules/api.js +7 -2
  31. package/ui/modules/router.js +14 -0
  32. package/ui/modules/streaming.js +1 -1
  33. package/ui/styles/components.css +149 -1
  34. package/ui/styles/layout.css +1 -1
  35. package/ui/styles/variables.css +1 -1
  36. package/ui/tabs/agents.js +307 -66
  37. package/ui/tabs/chat.js +4 -2
  38. package/ui/tabs/dashboard.js +1 -1
  39. package/ui/tabs/library.js +5 -5
  40. package/ui/tabs/logs.js +12 -0
  41. package/ui/tabs/settings.js +1 -2
  42. package/ui/tabs/workflows.js +16 -13
  43. package/ui-server.mjs +93 -36
  44. package/update-check.mjs +194 -35
  45. package/ve-orchestrator.ps1 +223 -4
  46. package/workflow-engine.mjs +19 -14
package/.env.example CHANGED
@@ -39,6 +39,15 @@ TELEGRAM_CHAT_ID=
39
39
  # TELEGRAM_ALLOWED_CHAT_IDS=
40
40
  # Minutes between periodic status summaries (default: 10)
41
41
  TELEGRAM_INTERVAL_MIN=10
42
+ # Weekly report scheduler (default: disabled)
43
+ # Sends the same report as /weekly automatically on a UTC schedule.
44
+ # TELEGRAM_WEEKLY_REPORT_ENABLED=false
45
+ # Day of week in UTC: 0=Sunday ... 6=Saturday (default: 0)
46
+ # TELEGRAM_WEEKLY_REPORT_DAY=0
47
+ # Hour in UTC (0-23) when weekly report is sent (default: 9)
48
+ # TELEGRAM_WEEKLY_REPORT_HOUR=9
49
+ # Lookback window in days for weekly report generation (default: 7)
50
+ # TELEGRAM_WEEKLY_REPORT_DAYS=7
42
51
  # Long-poll timeout for getUpdates in seconds (default: 20)
43
52
  TELEGRAM_COMMAND_POLL_TIMEOUT_SEC=20
44
53
  # Agent execution timeout for Telegram-triggered SDK runs (ms).
@@ -497,7 +506,7 @@ TELEGRAM_MINIAPP_ENABLED=false
497
506
  # Controls which Codex CLI features are enabled in config.toml during setup.
498
507
  # All default to "true" (enabled). Set to "false" to disable a specific feature.
499
508
  # CODEX_FEATURES_CHILD_AGENTS_MD=true # Sub-agent discovery via CODEX.md (KEY for sub-agents)
500
- # CODEX_FEATURES_MEMORY_TOOL=true # Persistent memory across sessions
509
+ # CODEX_FEATURES_MEMORIES=true # Persistent memory across sessions
501
510
  # CODEX_FEATURES_UNDO=true # Undo/rollback support
502
511
  # CODEX_FEATURES_MULTI_AGENT=true # Multi Agent mode
503
512
  # CODEX_FEATURES_COLLABORATION_MODES=true # Mode selection for collaboration
@@ -1033,6 +1042,9 @@ COPILOT_CLOUD_DISABLED=true
1033
1042
  # AGENT_WORK_LOGGING_ENABLED=true
1034
1043
  # Enable/disable live stream analyzer (default: true)
1035
1044
  # AGENT_WORK_ANALYZER_ENABLED=true
1045
+ # Enrich missing task metadata from VK for agent work logs (default: true)
1046
+ # AGENT_WORK_LOGGING_ENRICH_VK=true
1047
+ # Task metadata cache (auto-managed): .cache/agent-work-logs/task-metadata.json
1036
1048
  # Log directory (default: .cache/agent-work-logs)
1037
1049
  # AGENT_WORK_LOG_DIR=.cache/agent-work-logs
1038
1050
  # Session log retention count - keep last N session transcripts (default: 100)
package/README.md CHANGED
@@ -54,6 +54,12 @@ Requires:
54
54
 
55
55
  ---
56
56
 
57
+ ## Telegram weekly report
58
+
59
+ - Run `/weekly` to generate the operator weekly agent-work report on demand.
60
+ - Use `/report weekly` as an alias.
61
+ - Optional scheduler knobs in `.env`: `TELEGRAM_WEEKLY_REPORT_ENABLED`, `TELEGRAM_WEEKLY_REPORT_DAY`, `TELEGRAM_WEEKLY_REPORT_HOUR` (UTC), and `TELEGRAM_WEEKLY_REPORT_DAYS`.
62
+
57
63
  ## Documentation
58
64
 
59
65
  **Published docs (website):** https://bosun.virtengine.com/docs/
@@ -89,13 +95,13 @@ Local commands you can run any time:
89
95
 
90
96
  ```bash
91
97
  # Syntax + tests for bosun package
92
- npm -C scripts/bosun test
98
+ npm test
93
99
 
94
100
  # Prepublish safety checks
95
- npm -C scripts/bosun run prepublishOnly
101
+ npm run prepublishOnly
96
102
 
97
103
  # Install local git hooks (pre-commit + pre-push)
98
- npm -C scripts/bosun run hooks:install
104
+ npm run hooks:install
99
105
  ```
100
106
 
101
107
  ---
@@ -114,6 +114,7 @@ export class AgentEventBus {
114
114
  options.staleCheckIntervalMs || DEFAULTS.staleCheckIntervalMs;
115
115
  this._maxAutoRetries =
116
116
  options.maxAutoRetries ?? DEFAULTS.maxAutoRetries;
117
+ this._dedupeWindowMs = options.dedupeWindowMs || DEFAULTS.dedupeWindowMs;
117
118
 
118
119
  /** @type {Array<{type: string, taskId: string, payload: object, ts: number}>} ring buffer */
119
120
  this._eventLog = [];
@@ -202,11 +203,11 @@ export class AgentEventBus {
202
203
 
203
204
  // ── Dedup
204
205
  const key = `${type}:${taskId}`;
205
- const last = this._recentEmits.get(key) || 0;
206
- if (ts - last < DEFAULTS.dedupeWindowMs) return;
206
+ const last = this._recentEmits.get(key);
207
+ if (typeof last === "number" && ts - last < this._dedupeWindowMs) return;
207
208
  this._recentEmits.set(key, ts);
208
209
  if (this._recentEmits.size > 200) {
209
- const cutoff = ts - DEFAULTS.dedupeWindowMs * 2;
210
+ const cutoff = ts - this._dedupeWindowMs * 2;
210
211
  for (const [k, v] of this._recentEmits) {
211
212
  if (v < cutoff) this._recentEmits.delete(k);
212
213
  }
package/agent-pool.mjs CHANGED
@@ -110,6 +110,57 @@ function buildCopilotPermissionHandler() {
110
110
  return async () => ({ kind: "approved" });
111
111
  }
112
112
 
113
+ /**
114
+ * Build a per-attempt abort controller that preserves external abort semantics.
115
+ *
116
+ * Internal attempt timeouts must not abort the caller-provided controller.
117
+ * Otherwise retries are immediately pre-aborted and treated as external kills.
118
+ *
119
+ * @param {AbortController|null|undefined} externalAC
120
+ * @param {number} timeoutMs
121
+ * @returns {{ controller: AbortController, cleanup: () => void }}
122
+ */
123
+ function createScopedAbortController(externalAC, timeoutMs) {
124
+ const controller = new AbortController();
125
+ const externalSignal = externalAC?.signal || null;
126
+ const forwardExternalAbort = () => {
127
+ if (!controller.signal.aborted) {
128
+ controller.abort(externalSignal?.reason || "external_abort");
129
+ }
130
+ };
131
+
132
+ if (externalSignal) {
133
+ if (externalSignal.aborted) {
134
+ forwardExternalAbort();
135
+ } else {
136
+ externalSignal.addEventListener("abort", forwardExternalAbort, {
137
+ once: true,
138
+ });
139
+ }
140
+ }
141
+
142
+ const timeoutHandle = setTimeout(() => {
143
+ if (!controller.signal.aborted) {
144
+ controller.abort("timeout");
145
+ }
146
+ }, timeoutMs);
147
+ if (timeoutHandle && typeof timeoutHandle.unref === "function") {
148
+ timeoutHandle.unref();
149
+ }
150
+
151
+ let cleaned = false;
152
+ const cleanup = () => {
153
+ if (cleaned) return;
154
+ cleaned = true;
155
+ clearTimeout(timeoutHandle);
156
+ if (externalSignal) {
157
+ externalSignal.removeEventListener("abort", forwardExternalAbort);
158
+ }
159
+ };
160
+
161
+ return { controller, cleanup };
162
+ }
163
+
113
164
  function shouldFallbackForSdkError(error) {
114
165
  if (!error) return false;
115
166
  const message = String(error).toLowerCase();
@@ -127,13 +178,42 @@ function shouldFallbackForSdkError(error) {
127
178
  if (message.includes("invalid model")) return true;
128
179
  // Auth / key errors — SDK isn't properly configured
129
180
  if (message.includes("unauthorized") || message.includes("401")) return true;
130
- if (message.includes("api key") && (message.includes("invalid") || message.includes("missing") || message.includes("required"))) return true;
131
- if (message.includes("authentication") && (message.includes("failed") || message.includes("required") || message.includes("error"))) return true;
181
+ if (
182
+ message.includes("api key") &&
183
+ (message.includes("invalid") ||
184
+ message.includes("missing") ||
185
+ message.includes("required"))
186
+ ) {
187
+ return true;
188
+ }
189
+ if (
190
+ message.includes("authentication") &&
191
+ (message.includes("failed") ||
192
+ message.includes("required") ||
193
+ message.includes("error"))
194
+ ) {
195
+ return true;
196
+ }
132
197
  if (message.includes("forbidden") || message.includes("403")) return true;
133
198
  // Connection errors — SDK endpoint is unreachable
134
199
  if (message.includes("econnrefused")) return true;
135
200
  if (message.includes("enotfound")) return true;
136
201
  if (message.includes("connection refused")) return true;
202
+ if (message.includes("connection reset")) return true;
203
+ if (message.includes("etimedout")) return true;
204
+ // Runtime/provider instability: fail over to next SDK immediately.
205
+ if (message.includes("timeout")) return true;
206
+ if (message.includes("rate limit") || message.includes("429")) return true;
207
+ if (message.includes("service unavailable") || message.includes("503")) {
208
+ return true;
209
+ }
210
+ if (message.includes("bad gateway") || message.includes("502")) return true;
211
+ if (message.includes("gateway timeout") || message.includes("504")) {
212
+ return true;
213
+ }
214
+ if (message.includes("overloaded") || message.includes("server error")) {
215
+ return true;
216
+ }
137
217
  return false;
138
218
  }
139
219
 
@@ -349,6 +429,64 @@ function isDisabled(name) {
349
429
  return envFlagEnabled(process.env[adapter.envDisableKey]);
350
430
  }
351
431
 
432
+ const DEFAULT_SDK_FAILURE_COOLDOWN_MS = 5 * 60 * 1000;
433
+ const sdkFailureCooldownUntil = new Map();
434
+
435
+ function getSdkFailureCooldownMs() {
436
+ const parsed = Number(process.env.AGENT_POOL_SDK_FAILURE_COOLDOWN_MS);
437
+ if (!Number.isFinite(parsed) || parsed < 0) {
438
+ return DEFAULT_SDK_FAILURE_COOLDOWN_MS;
439
+ }
440
+ return Math.trunc(parsed);
441
+ }
442
+
443
+ function getSdkCooldownRemainingMs(name, nowMs = Date.now()) {
444
+ const untilMs = Number(sdkFailureCooldownUntil.get(name) || 0);
445
+ if (!untilMs || untilMs <= nowMs) {
446
+ sdkFailureCooldownUntil.delete(name);
447
+ return 0;
448
+ }
449
+ return untilMs - nowMs;
450
+ }
451
+
452
+ function shouldApplySdkCooldown(error) {
453
+ if (!error) return false;
454
+ const message = String(error).toLowerCase();
455
+ if (!message) return false;
456
+ if (message.includes("timeout")) return true;
457
+ if (message.includes("rate limit") || message.includes("429")) return true;
458
+ if (message.includes("service unavailable") || message.includes("503")) {
459
+ return true;
460
+ }
461
+ if (message.includes("bad gateway") || message.includes("502")) return true;
462
+ if (message.includes("gateway timeout") || message.includes("504")) {
463
+ return true;
464
+ }
465
+ if (message.includes("overloaded") || message.includes("server error")) {
466
+ return true;
467
+ }
468
+ if (message.includes("econnrefused")) return true;
469
+ if (message.includes("enotfound")) return true;
470
+ if (message.includes("connection reset")) return true;
471
+ if (message.includes("etimedout")) return true;
472
+ return false;
473
+ }
474
+
475
+ function applySdkFailureCooldown(name, error, nowMs = Date.now()) {
476
+ if (!name || !SDK_ADAPTERS[name]) return;
477
+ if (!shouldApplySdkCooldown(error)) return;
478
+ const cooldownMs = getSdkFailureCooldownMs();
479
+ if (cooldownMs <= 0) return;
480
+ const untilMs = nowMs + cooldownMs;
481
+ const previous = Number(sdkFailureCooldownUntil.get(name) || 0);
482
+ if (untilMs <= previous) return;
483
+ sdkFailureCooldownUntil.set(name, untilMs);
484
+ const roundedSec = Math.max(1, Math.ceil(cooldownMs / 1000));
485
+ console.warn(
486
+ `${TAG} SDK "${name}" entering failure cooldown for ${roundedSec}s after fallback-worthy error: ${error}`,
487
+ );
488
+ }
489
+
352
490
  const MONITOR_MONITOR_TASK_KEY = "monitor-monitor";
353
491
  let monitorMonitorTimeoutBoundsWarningKey = "";
354
492
  let monitorMonitorTimeoutAdjustmentKey = "";
@@ -604,7 +742,7 @@ async function launchCodexThread(prompt, cwd, timeoutMs, extra = {}) {
604
742
  features: {
605
743
  child_agents_md: true,
606
744
  multi_agent: true,
607
- memory_tool: true,
745
+ memories: true,
608
746
  undo: true,
609
747
  steer: true,
610
748
  },
@@ -642,8 +780,10 @@ async function launchCodexThread(prompt, cwd, timeoutMs, extra = {}) {
642
780
  }
643
781
 
644
782
  // ── 3. Timeout / abort wiring ────────────────────────────────────────────
645
- const controller = externalAC || new AbortController();
646
- const timer = setTimeout(() => controller.abort("timeout"), timeoutMs);
783
+ const { controller, cleanup: clearAbortScope } = createScopedAbortController(
784
+ externalAC,
785
+ timeoutMs,
786
+ );
647
787
 
648
788
  // Hard timeout: safety net if the SDK's async iterator ignores AbortSignal.
649
789
  // Fires HARD_TIMEOUT_BUFFER_MS after the soft timeout to forcibly break the loop.
@@ -692,7 +832,7 @@ async function launchCodexThread(prompt, cwd, timeoutMs, extra = {}) {
692
832
 
693
833
  await Promise.race([iterateEvents(), hardTimeoutPromise]);
694
834
  clearTimeout(hardTimer);
695
- clearTimeout(timer);
835
+ clearAbortScope();
696
836
 
697
837
  const output =
698
838
  finalResponse.trim() || "(Agent completed with no text output)";
@@ -706,7 +846,7 @@ async function launchCodexThread(prompt, cwd, timeoutMs, extra = {}) {
706
846
  threadId: thread.id || null,
707
847
  };
708
848
  } catch (err) {
709
- clearTimeout(timer);
849
+ clearAbortScope();
710
850
  if (hardTimer) clearTimeout(hardTimer);
711
851
  if (steerKey) unregisterActiveSession(steerKey);
712
852
  const isTimeout =
@@ -841,8 +981,10 @@ async function launchCopilotThread(prompt, cwd, timeoutMs, extra = {}) {
841
981
  const sessionMode = (process.env.COPILOT_SESSION_MODE || "local").trim().toLowerCase();
842
982
  const cliUrl = process.env.COPILOT_CLI_URL || undefined;
843
983
 
844
- const controller = externalAC || new AbortController();
845
- const timer = setTimeout(() => controller.abort("timeout"), timeoutMs);
984
+ const { controller, cleanup: clearAbortScope } = createScopedAbortController(
985
+ externalAC,
986
+ timeoutMs,
987
+ );
846
988
 
847
989
  let client;
848
990
  let unsubscribe = null;
@@ -884,7 +1026,7 @@ async function launchCopilotThread(prompt, cwd, timeoutMs, extra = {}) {
884
1026
  await client.start();
885
1027
  });
886
1028
  } catch (err) {
887
- clearTimeout(timer);
1029
+ clearAbortScope();
888
1030
  return {
889
1031
  success: false,
890
1032
  output: "",
@@ -1131,7 +1273,7 @@ async function launchCopilotThread(prompt, cwd, timeoutMs, extra = {}) {
1131
1273
  threadId: resumeThreadId,
1132
1274
  };
1133
1275
  } finally {
1134
- clearTimeout(timer);
1276
+ clearAbortScope();
1135
1277
  if (steerKey) unregisterActiveSession(steerKey);
1136
1278
  try {
1137
1279
  if (typeof unsubscribe === "function") unsubscribe();
@@ -1221,10 +1363,13 @@ async function launchClaudeThread(prompt, cwd, timeoutMs, extra = {}) {
1221
1363
  undefined;
1222
1364
 
1223
1365
  // ── 3. Build message queue ───────────────────────────────────────────────
1224
- const controller = externalAC || new AbortController();
1225
- const softTimer = setTimeout(() => controller.abort("timeout"), timeoutMs);
1366
+ const { controller, cleanup: clearAbortScope } = createScopedAbortController(
1367
+ externalAC,
1368
+ timeoutMs,
1369
+ );
1226
1370
  // Hard timeout: force-break Promise.race if SDK ignores abort signal
1227
1371
  const hardTimeoutMs = timeoutMs + HARD_TIMEOUT_BUFFER_MS;
1372
+ let hardTimer = null;
1228
1373
 
1229
1374
  /**
1230
1375
  * Minimal async message queue for the Claude SDK streaming interface.
@@ -1416,13 +1561,17 @@ async function launchClaudeThread(prompt, cwd, timeoutMs, extra = {}) {
1416
1561
  }
1417
1562
  })();
1418
1563
 
1419
- const hardTimeout = new Promise((_, reject) =>
1420
- setTimeout(() => reject(new Error("hard-timeout")), hardTimeoutMs),
1421
- );
1564
+ const hardTimeout = new Promise((_, reject) => {
1565
+ hardTimer = setTimeout(() => reject(new Error("hard-timeout")), hardTimeoutMs);
1566
+ if (hardTimer && typeof hardTimer.unref === "function") {
1567
+ hardTimer.unref();
1568
+ }
1569
+ });
1422
1570
 
1423
1571
  await Promise.race([sdkExecution, hardTimeout]);
1572
+ if (hardTimer) clearTimeout(hardTimer);
1424
1573
 
1425
- clearTimeout(softTimer);
1574
+ clearAbortScope();
1426
1575
  msgQueue.close();
1427
1576
  if (steerKey) unregisterActiveSession(steerKey);
1428
1577
 
@@ -1437,7 +1586,8 @@ async function launchClaudeThread(prompt, cwd, timeoutMs, extra = {}) {
1437
1586
  threadId: activeClaudeSessionId,
1438
1587
  };
1439
1588
  } catch (err) {
1440
- clearTimeout(softTimer);
1589
+ clearAbortScope();
1590
+ if (hardTimer) clearTimeout(hardTimer);
1441
1591
  if (steerKey) unregisterActiveSession(steerKey);
1442
1592
  const isTimeout =
1443
1593
  err.name === "AbortError" ||
@@ -1555,71 +1705,124 @@ export async function launchEphemeralThread(
1555
1705
  ? requestedSdk
1556
1706
  : resolvePoolSdkName();
1557
1707
 
1558
- const primaryAdapter = SDK_ADAPTERS[primaryName];
1559
-
1560
- // ── Try primary SDK ──────────────────────────────────────────────────────
1561
- if (primaryAdapter && !isDisabled(primaryName)) {
1562
- const prereq = hasSdkPrerequisites(primaryName);
1563
- if (!prereq.ok) {
1564
- console.warn(
1565
- `${TAG} primary SDK "${primaryName}" missing prerequisites: ${prereq.reason}; trying fallback chain`,
1566
- );
1567
- } else {
1568
- const launcher = await primaryAdapter.load();
1569
- const result = await launcher(prompt, cwd, timeoutMs, extra);
1570
-
1571
- // If it succeeded, or if the error isn't fallback-worthy, return as-is
1572
- if (result.success || !shouldFallbackForSdkError(result.error)) {
1573
- return result;
1574
- }
1708
+ const attemptOrder = extra?.disableFallback
1709
+ ? [primaryName]
1710
+ : [
1711
+ primaryName,
1712
+ ...SDK_FALLBACK_ORDER.filter((name) => name !== primaryName),
1713
+ ];
1575
1714
 
1576
- // Primary SDK not installed — fall through to fallback chain
1577
- console.warn(
1578
- `${TAG} primary SDK "${primaryName}" failed (${result.error}); trying fallback chain`,
1579
- );
1580
- }
1581
- }
1582
-
1583
- // ── Fallback chain ───────────────────────────────────────────────────────
1584
- for (const name of SDK_FALLBACK_ORDER) {
1585
- if (name === primaryName) continue; // already tried
1586
- if (isDisabled(name)) continue;
1715
+ let lastAttemptResult = null;
1716
+ const triedSdkNames = [];
1717
+ const missingPrereqSdks = [];
1718
+ const cooledDownSdks = [];
1719
+ const ignoreSdkCooldown = extra?.ignoreSdkCooldown === true;
1587
1720
 
1721
+ for (const name of attemptOrder) {
1588
1722
  const adapter = SDK_ADAPTERS[name];
1589
1723
  if (!adapter) continue;
1724
+ if (isDisabled(name)) continue;
1725
+
1726
+ const cooldownRemainingMs = ignoreSdkCooldown
1727
+ ? 0
1728
+ : getSdkCooldownRemainingMs(name);
1729
+ if (cooldownRemainingMs > 0) {
1730
+ cooledDownSdks.push({ name, cooldownRemainingMs });
1731
+ const remainingSec = Math.max(1, Math.ceil(cooldownRemainingMs / 1000));
1732
+ if (name === primaryName) {
1733
+ console.warn(
1734
+ `${TAG} primary SDK "${name}" cooling down (${remainingSec}s remaining); trying fallback chain`,
1735
+ );
1736
+ } else {
1737
+ console.log(
1738
+ `${TAG} skipping fallback SDK "${name}" due to cooldown (${remainingSec}s remaining)`,
1739
+ );
1740
+ }
1741
+ continue;
1742
+ }
1590
1743
 
1591
1744
  // Check prerequisites before wasting time trying an unconfigured SDK
1592
1745
  const prereq = hasSdkPrerequisites(name);
1593
1746
  if (!prereq.ok) {
1594
- console.log(
1595
- `${TAG} skipping fallback SDK "${name}": ${prereq.reason}`,
1596
- );
1747
+ missingPrereqSdks.push({ name, reason: prereq.reason });
1748
+ if (name === primaryName) {
1749
+ console.warn(
1750
+ `${TAG} primary SDK "${name}" missing prerequisites: ${prereq.reason}; not attempting fallback`,
1751
+ );
1752
+ return {
1753
+ success: false,
1754
+ output: "",
1755
+ items: [],
1756
+ error: `${TAG} ${name} unavailable: ${prereq.reason}`,
1757
+ sdk: primaryName,
1758
+ threadId: null,
1759
+ };
1760
+ } else {
1761
+ console.log(`${TAG} skipping fallback SDK "${name}": ${prereq.reason}`);
1762
+ }
1597
1763
  continue;
1598
1764
  }
1599
1765
 
1600
- console.log(`${TAG} trying fallback SDK: ${name}`);
1766
+ if (name !== primaryName) {
1767
+ console.log(`${TAG} trying fallback SDK: ${name}`);
1768
+ }
1769
+
1770
+ triedSdkNames.push(name);
1601
1771
  const launcher = await adapter.load();
1602
1772
  const result = await launcher(prompt, cwd, timeoutMs, extra);
1773
+ lastAttemptResult = result;
1603
1774
 
1604
- if (result.success || !shouldFallbackForSdkError(result.error)) {
1775
+ if (result.success) {
1605
1776
  return result;
1606
1777
  }
1778
+
1779
+ if (!shouldFallbackForSdkError(result.error)) {
1780
+ return result;
1781
+ }
1782
+
1783
+ applySdkFailureCooldown(name, result.error);
1784
+
1785
+ if (name === primaryName) {
1786
+ console.warn(
1787
+ `${TAG} primary SDK "${primaryName}" failed (${result.error}); trying fallback chain`,
1788
+ );
1789
+ } else {
1790
+ console.warn(
1791
+ `${TAG} fallback SDK "${name}" failed (${result.error}); trying next fallback`,
1792
+ );
1793
+ }
1607
1794
  }
1608
1795
 
1609
1796
  // ── All SDKs exhausted ───────────────────────────────────────────────────
1610
- const triedSdks = SDK_FALLBACK_ORDER.filter((n) => !isDisabled(n));
1611
- const configuredSdks = triedSdks.filter((n) => hasSdkPrerequisites(n).ok);
1612
- const skippedSdks = triedSdks.filter((n) => !hasSdkPrerequisites(n).ok);
1613
- let errorMsg = `${TAG} no SDK available.`;
1614
- if (configuredSdks.length > 0) {
1615
- errorMsg += ` Tried: ${configuredSdks.join(", ")}.`;
1797
+ if (lastAttemptResult) {
1798
+ return lastAttemptResult;
1616
1799
  }
1617
- if (skippedSdks.length > 0) {
1618
- errorMsg += ` Skipped (missing credentials): ${skippedSdks.map((n) => `${n} (${hasSdkPrerequisites(n).reason})`).join(", ")}.`;
1619
- }
1620
- if (triedSdks.length === 0) {
1800
+
1801
+ const eligibleSdks = Array.from(
1802
+ new Set(attemptOrder.filter((name) => SDK_ADAPTERS[name] && !isDisabled(name))),
1803
+ );
1804
+
1805
+ let errorMsg = `${TAG} no SDK available.`;
1806
+ if (triedSdkNames.length > 0) {
1807
+ errorMsg += ` Tried: ${triedSdkNames.join(", ")}.`;
1808
+ }
1809
+ if (missingPrereqSdks.length > 0) {
1810
+ errorMsg += ` Skipped (missing credentials): ${missingPrereqSdks
1811
+ .map((entry) => `${entry.name} (${entry.reason})`)
1812
+ .join(", ")}.`;
1813
+ }
1814
+ if (cooledDownSdks.length > 0) {
1815
+ errorMsg += ` Cooling down: ${cooledDownSdks
1816
+ .map(
1817
+ (entry) =>
1818
+ `${entry.name} (${Math.max(1, Math.ceil(entry.cooldownRemainingMs / 1000))}s remaining)`,
1819
+ )
1820
+ .join(", ")}.`;
1821
+ }
1822
+ if (eligibleSdks.length === 0) {
1621
1823
  errorMsg += " All SDKs are disabled.";
1622
1824
  }
1825
+
1623
1826
  return {
1624
1827
  success: false,
1625
1828
  output: "",
@@ -2027,8 +2230,10 @@ async function resumeCodexThread(threadId, prompt, cwd, timeoutMs, extra = {}) {
2027
2230
  };
2028
2231
  }
2029
2232
 
2030
- const controller = externalAC || new AbortController();
2031
- const timer = setTimeout(() => controller.abort("timeout"), timeoutMs);
2233
+ const { controller, cleanup: clearAbortScope } = createScopedAbortController(
2234
+ externalAC,
2235
+ timeoutMs,
2236
+ );
2032
2237
  let hardTimer;
2033
2238
 
2034
2239
  try {
@@ -2066,7 +2271,7 @@ async function resumeCodexThread(threadId, prompt, cwd, timeoutMs, extra = {}) {
2066
2271
 
2067
2272
  await Promise.race([iterateEvents(), hardTimeoutPromise]);
2068
2273
  clearTimeout(hardTimer);
2069
- clearTimeout(timer);
2274
+ clearAbortScope();
2070
2275
 
2071
2276
  const newThreadId = thread.id || threadId;
2072
2277
  return {
@@ -2078,7 +2283,7 @@ async function resumeCodexThread(threadId, prompt, cwd, timeoutMs, extra = {}) {
2078
2283
  threadId: newThreadId,
2079
2284
  };
2080
2285
  } catch (err) {
2081
- clearTimeout(timer);
2286
+ clearAbortScope();
2082
2287
  if (hardTimer) clearTimeout(hardTimer);
2083
2288
  const isTimeout =
2084
2289
  err.name === "AbortError" ||
@@ -2163,6 +2368,10 @@ export async function launchOrResumeThread(
2163
2368
  const { taskKey, ...restExtra } = extra;
2164
2369
  // Pass taskKey through as steer key so SDK launchers can register active sessions
2165
2370
  restExtra.taskKey = taskKey;
2371
+ if (restExtra.sdk) {
2372
+ // Task-bound runs with an explicit SDK should stay pinned to that SDK.
2373
+ restExtra.disableFallback = true;
2374
+ }
2166
2375
  timeoutMs = clampMonitorMonitorTimeout(timeoutMs, taskKey);
2167
2376
 
2168
2377
  // No taskKey — pure ephemeral (backward compatible)
@@ -2564,6 +2773,7 @@ export async function execWithRetry(prompt, options = {}) {
2564
2773
  model,
2565
2774
  onEvent,
2566
2775
  abortController,
2776
+ ignoreSdkCooldown: attempt > 1,
2567
2777
  });
2568
2778
 
2569
2779
  // Check post-launch if aborted with idle_continue (race: abort fired during execution)
package/agent-prompts.mjs CHANGED
@@ -760,7 +760,10 @@ function asPathCandidates(pathValue, configDir, repoRoot) {
760
760
  const candidates = [];
761
761
  if (repoRoot) candidates.push(resolve(repoRoot, raw));
762
762
  if (configDir) candidates.push(resolve(configDir, raw));
763
- candidates.push(resolve(process.cwd(), raw));
763
+ // Only fall back to cwd when no explicit roots were provided.
764
+ if (candidates.length === 0) {
765
+ candidates.push(resolve(process.cwd(), raw));
766
+ }
764
767
 
765
768
  return candidates.filter((p, idx, arr) => p && arr.indexOf(p) === idx);
766
769
  }