bosun 0.33.5 → 0.33.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -161,11 +161,16 @@ function extractTaskId(pathname) {
161
161
  }
162
162
 
163
163
  function isAlreadyExitedProcessError(err) {
164
- const detail = String(err?.stderr || err?.message || "").toLowerCase();
164
+ const detail = [err?.stderr, err?.stdout, err?.message]
165
+ .map((part) => String(part || ""))
166
+ .join("\n")
167
+ .toLowerCase();
165
168
  return (
166
169
  detail.includes("no running instance of the task") ||
167
- detail.includes("not found") ||
168
- detail.includes("no such process")
170
+ detail.includes("no running instance") ||
171
+ detail.includes("no such process") ||
172
+ detail.includes("cannot find the process") ||
173
+ detail.includes("esrch")
169
174
  );
170
175
  }
171
176
 
package/agent-hooks.mjs CHANGED
@@ -366,7 +366,7 @@ export function loadHooks(configPath) {
366
366
  *
367
367
  * @example
368
368
  * const id = registerHook("PrePush", {
369
- * command: "scripts/agent-preflight.ps1",
369
+ * command: "node preflight.mjs",
370
370
  * blocking: true,
371
371
  * timeout: 300000,
372
372
  * });
@@ -610,8 +610,7 @@ export async function executeBlockingHooks(event, context = {}) {
610
610
  * that run regardless of config file contents.
611
611
  *
612
612
  * Built-in hooks:
613
- * - **PrePush** — Runs `scripts/agent-preflight.ps1` (Windows) or
614
- * `scripts/agent-preflight.sh` (Unix) to validate quality gates.
613
+ * - **PrePush** — Runs `node preflight.mjs` to validate quality gates.
615
614
  * - **TaskComplete** — Runs a basic acceptance-criteria check via git log.
616
615
  */
617
616
  export function registerBuiltinHooks(options = {}) {
@@ -644,9 +643,7 @@ export function registerBuiltinHooks(options = {}) {
644
643
 
645
644
  // ── PrePush: agent preflight quality gate ──
646
645
  if (!skipPrePush) {
647
- const preflightScript = IS_WINDOWS
648
- ? "powershell -NoProfile -ExecutionPolicy Bypass -File scripts/agent-preflight.ps1"
649
- : "bash scripts/agent-preflight.sh";
646
+ const preflightScript = "node preflight.mjs";
650
647
 
651
648
  registerHook("PrePush", {
652
649
  id: "builtin-prepush-preflight",
package/monitor.mjs CHANGED
@@ -20,6 +20,7 @@ import { clearLine, createInterface, cursorTo } from "node:readline";
20
20
  import net from "node:net";
21
21
  import { resolve } from "node:path";
22
22
  import { fileURLToPath } from "node:url";
23
+ import { isMainThread } from "node:worker_threads";
23
24
 
24
25
  // Node.js Happy Eyeballs (RFC 8305) tries IPv6 first with a 250ms timeout
25
26
  // before falling back to IPv4. On networks where IPv6 is unreachable, the
@@ -566,7 +567,9 @@ let githubReconcile = githubReconcileConfig || {
566
567
  // trusted git directory, preventing "Not inside a trusted directory" errors.
567
568
  // Prefer agentRepoRoot (workspace-aware) over raw repoRoot.
568
569
  const effectiveRepoRoot = agentRepoRoot || repoRoot;
569
- if (effectiveRepoRoot && process.cwd() !== effectiveRepoRoot) {
570
+ if (!isMainThread) {
571
+ // Worker threads cannot call process.chdir(); skip to avoid noisy warnings.
572
+ } else if (effectiveRepoRoot && process.cwd() !== effectiveRepoRoot) {
570
573
  try {
571
574
  process.chdir(effectiveRepoRoot);
572
575
  console.log(`[monitor] changed CWD to repo root: ${effectiveRepoRoot}`);
@@ -11849,16 +11852,32 @@ function attemptSelfRestartAfterQuiet() {
11849
11852
  const protection = getRuntimeRestartProtection();
11850
11853
  if (protection.defer) {
11851
11854
  pendingSelfRestart = filename;
11852
- // Track how many times we've deferred. Never force-restart when internal
11853
- // task agents are active; just keep retrying with periodic reminders.
11854
11855
  const deferCount = (selfRestartDeferCount =
11855
11856
  (selfRestartDeferCount || 0) + 1);
11856
11857
  const retrySec = Math.round(SELF_RESTART_RETRY_MS / 1000);
11857
- if (deferCount >= 20) {
11858
+
11859
+ // Hard cap: after many deferrals the active agent is likely stuck.
11860
+ // Force-stop the task executor and proceed with the restart so the
11861
+ // monitor doesn't hang forever (or crash from resource exhaustion).
11862
+ const SELF_RESTART_DEFER_HARD_CAP = Number(
11863
+ process.env.SELF_RESTART_DEFER_HARD_CAP || "50",
11864
+ );
11865
+ if (deferCount >= SELF_RESTART_DEFER_HARD_CAP) {
11858
11866
  console.warn(
11859
- `[monitor] self-restart deferred ${deferCount} times still waiting for ${protection.reason}; continuing to defer`,
11867
+ `[monitor] self-restart deferred ${deferCount} times (hard cap ${SELF_RESTART_DEFER_HARD_CAP}) force-stopping active agents and restarting`,
11860
11868
  );
11869
+ if (internalTaskExecutor) {
11870
+ internalTaskExecutor.stop().catch(() => {});
11871
+ }
11861
11872
  selfRestartDeferCount = 0;
11873
+ selfRestartForSourceChange(filename);
11874
+ return;
11875
+ }
11876
+
11877
+ if (deferCount % 20 === 0) {
11878
+ console.warn(
11879
+ `[monitor] self-restart deferred ${deferCount} times — still waiting for ${protection.reason}; continuing to defer`,
11880
+ );
11862
11881
  }
11863
11882
  console.log(
11864
11883
  `[monitor] deferring self-restart (${filename}) — ${protection.reason}; retrying in ${retrySec}s (defer #${deferCount})`,
@@ -12456,6 +12475,15 @@ process.on("unhandledRejection", (reason) => {
12456
12475
 
12457
12476
  // ── Singleton guard: prevent ghost monitors ─────────────────────────────────
12458
12477
  if (!process.env.VITEST && !acquireMonitorLock(config.cacheDir)) {
12478
+ // During source-change self-restart, the previous monitor can still be
12479
+ // shutting down and holding the lock briefly. Ask cli.mjs to retry instead
12480
+ // of treating this as a hard crash.
12481
+ if (isSelfRestart) {
12482
+ console.warn(
12483
+ "[monitor] self-restart lock handoff still busy — retrying startup",
12484
+ );
12485
+ process.exit(SELF_RESTART_EXIT_CODE);
12486
+ }
12459
12487
  process.exit(1);
12460
12488
  }
12461
12489
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bosun",
3
- "version": "0.33.5",
3
+ "version": "0.33.6",
4
4
  "description": "AI-powered orchestrator supervisor — manages AI agent executors with failover, auto-restarts on failure, analyzes crashes with Codex SDK, creates PRs via Vibe-Kanban API, and sends Telegram notifications. Supports N executors with weighted distribution, multi-repo projects, and auto-setup.",
5
5
  "type": "module",
6
6
  "license": "Apache 2.0",
@@ -750,12 +750,22 @@ export class SessionTracker {
750
750
  if (!event || !event.type) return null;
751
751
 
752
752
  const ts = new Date().toISOString();
753
+ const toText = (value) => {
754
+ if (value == null) return "";
755
+ if (typeof value === "string") return value;
756
+ try {
757
+ return JSON.stringify(value);
758
+ } catch {
759
+ return String(value);
760
+ }
761
+ };
753
762
 
754
763
  // ── Codex SDK events ──
755
764
  if (event.type === "item.completed" && event.item) {
756
765
  const item = event.item;
766
+ const itemType = String(item.type || "").toLowerCase();
757
767
 
758
- if (item.type === "agent_message" && item.text) {
768
+ if (itemType === "agent_message" && item.text) {
759
769
  return {
760
770
  type: "agent_message",
761
771
  content: item.text.slice(0, MAX_MESSAGE_CHARS),
@@ -763,7 +773,7 @@ export class SessionTracker {
763
773
  };
764
774
  }
765
775
 
766
- if (item.type === "function_call") {
776
+ if (itemType === "function_call") {
767
777
  return {
768
778
  type: "tool_call",
769
779
  content: `${item.name}(${(item.arguments || "").slice(0, 500)})`,
@@ -772,7 +782,7 @@ export class SessionTracker {
772
782
  };
773
783
  }
774
784
 
775
- if (item.type === "function_call_output") {
785
+ if (itemType === "function_call_output") {
776
786
  return {
777
787
  type: "tool_result",
778
788
  content: (item.output || "").slice(0, MAX_MESSAGE_CHARS),
@@ -780,9 +790,108 @@ export class SessionTracker {
780
790
  };
781
791
  }
782
792
 
793
+ if (itemType === "command_execution" || itemType === "commandexecution") {
794
+ const command = toText(item.command || item.input || "").trim();
795
+ const exitCode = Number.isFinite(Number(item.exit_code)) ? Number(item.exit_code) : null;
796
+ const status = toText(item.status || "").trim();
797
+ const statusParts = [];
798
+ if (status) statusParts.push(status);
799
+ if (exitCode !== null) statusParts.push(`exit=${exitCode}`);
800
+ const statusLabel = statusParts.length ? ` [${statusParts.join(", ")}]` : "";
801
+ const output = toText(
802
+ item.aggregated_output || item.output || item.stderr || item.stdout || "",
803
+ ).trim();
804
+ const content = output
805
+ ? `${command || "(command)"}${statusLabel}
806
+ ${output}`
807
+ : `${command || "(command)"}${statusLabel}`;
808
+ return {
809
+ type: "tool_call",
810
+ content: content.slice(0, MAX_MESSAGE_CHARS),
811
+ timestamp: ts,
812
+ meta: { toolName: "command_execution" },
813
+ };
814
+ }
815
+
816
+ if (itemType === "reasoning") {
817
+ const detail = toText(item.text || item.summary || "");
818
+ if (!detail) return null;
819
+ return {
820
+ type: "system",
821
+ content: detail.slice(0, MAX_MESSAGE_CHARS),
822
+ timestamp: ts,
823
+ };
824
+ }
825
+
826
+ if (itemType === "file_change") {
827
+ const changes = Array.isArray(item.changes)
828
+ ? item.changes
829
+ .map((change) => {
830
+ const kind = toText(change?.kind || "update").trim();
831
+ const filePath = toText(change?.path || change?.file || "").trim();
832
+ return filePath ? `${kind} ${filePath}` : kind;
833
+ })
834
+ .filter(Boolean)
835
+ : [];
836
+ const summary = changes.length
837
+ ? `file changes: ${changes.slice(0, 5).join(", ")}`
838
+ : "file changes detected";
839
+ return {
840
+ type: "system",
841
+ content: summary.slice(0, MAX_MESSAGE_CHARS),
842
+ timestamp: ts,
843
+ };
844
+ }
845
+
846
+ if (itemType === "todo_list") {
847
+ const items = Array.isArray(item.items)
848
+ ? item.items
849
+ .map((entry) => {
850
+ const detail = toText(entry?.text || "").trim();
851
+ if (!detail) return "";
852
+ return `${entry?.completed ? "[x]" : "[ ]"} ${detail}`;
853
+ })
854
+ .filter(Boolean)
855
+ : [];
856
+ const summary = items.length ? `todo:
857
+ ${items.join("\n")}` : "todo updated";
858
+ return {
859
+ type: "system",
860
+ content: summary.slice(0, MAX_MESSAGE_CHARS),
861
+ timestamp: ts,
862
+ };
863
+ }
864
+
865
+ if (item.text || item.content) {
866
+ const fallback = toText(item.text || item.content);
867
+ if (fallback) {
868
+ return {
869
+ type: "system",
870
+ content: fallback.slice(0, MAX_MESSAGE_CHARS),
871
+ timestamp: ts,
872
+ };
873
+ }
874
+ }
875
+
783
876
  return null; // Skip other item types
784
877
  }
785
878
 
879
+ if (event.type === "assistant.message" && event.data?.content) {
880
+ return {
881
+ type: "agent_message",
882
+ content: toText(event.data.content).slice(0, MAX_MESSAGE_CHARS),
883
+ timestamp: ts,
884
+ };
885
+ }
886
+
887
+ if (event.type === "assistant.message_delta" && event.data?.deltaContent) {
888
+ return {
889
+ type: "agent_message",
890
+ content: toText(event.data.deltaContent).slice(0, MAX_MESSAGE_CHARS),
891
+ timestamp: ts,
892
+ };
893
+ }
894
+
786
895
  // ── Copilot SDK events ──
787
896
  if (event.type === "message" && event.content) {
788
897
  return {
package/task-claims.mjs CHANGED
@@ -868,7 +868,15 @@ export async function renewClaim(opts = {}) {
868
868
  state.repoRoot
869
869
  );
870
870
  if (!sharedResult.success) {
871
- console.info(`[task-claims] Shared state heartbeat renewal warning for ${taskId}: ${sharedResult.reason}`);
871
+ const reason = sharedResult.reason || "unknown";
872
+ // Token mismatch in shared state means another orchestrator has taken
873
+ // over — surface as a fatal claim renewal failure so the task-executor
874
+ // can abort the now-orphaned agent instead of letting it run forever.
875
+ if (reason === "attempt_token_mismatch" || reason === "owner_mismatch") {
876
+ console.warn(`[task-claims] Shared state heartbeat FATAL for ${taskId}: ${reason} — surfacing as claim failure`);
877
+ return { success: false, error: reason };
878
+ }
879
+ console.info(`[task-claims] Shared state heartbeat renewal warning for ${taskId}: ${reason}`);
872
880
  }
873
881
  } catch (err) {
874
882
  console.warn(`[task-claims] Shared state heartbeat renewal failed for ${taskId}: ${err.message}`);
package/task-executor.mjs CHANGED
@@ -109,6 +109,11 @@ const FATAL_CLAIM_RENEW_ERRORS = new Set([
109
109
  "task_claimed_by_different_instance",
110
110
  "claim_token_mismatch",
111
111
  "task_not_claimed",
112
+ // Shared-state token mismatches — another orchestrator instance has taken
113
+ // over the task in the shared state registry. Treat exactly the same as a
114
+ // direct claim-token mismatch: abort the now-orphaned local agent.
115
+ "attempt_token_mismatch",
116
+ "owner_mismatch",
112
117
  ]);
113
118
  const CODEX_TASK_LABELS = (() => {
114
119
  const raw = String(
package/ui/app.js CHANGED
@@ -1643,7 +1643,6 @@ function App() {
1643
1643
  setSidebarDrawerOpen(false);
1644
1644
  setInspectorDrawerOpen(false);
1645
1645
  }, []);
1646
- const sidebarToggleLabel = sidebarDrawerOpen ? "Close sidebar" : "Open sidebar";
1647
1646
  const inspectorToggleLabel = inspectorDrawerOpen
1648
1647
  ? "Close inspector"
1649
1648
  : "Open inspector";
@@ -1664,9 +1663,9 @@ function App() {
1664
1663
  <button
1665
1664
  class="btn btn-ghost btn-sm tablet-toggle"
1666
1665
  onClick=${toggleMore}
1667
- aria-label=${isMoreOpen ? "Close more menu" : "Open more menu"}
1666
+ aria-label=${isMoreOpen ? "Close navigation menu" : "Open navigation menu"}
1668
1667
  >
1669
- More
1668
+ Navigation
1670
1669
  </button>
1671
1670
  `
1672
1671
  : null;
@@ -1726,13 +1725,6 @@ function App() {
1726
1725
  ${showDrawerToggles
1727
1726
  ? html`
1728
1727
  <div class="tablet-action-bar">
1729
- <button
1730
- class="btn btn-ghost btn-sm tablet-toggle"
1731
- onClick=${toggleSidebar}
1732
- aria-label=${sidebarToggleLabel}
1733
- >
1734
- ☰ Navigation
1735
- </button>
1736
1728
  ${inspectorToggleButton}
1737
1729
  ${moreToggleButton}
1738
1730
  </div>
@@ -786,7 +786,7 @@ const COMMAND_PRESETS = {
786
786
  { label: "Push", cmd: "git push --set-upstream origin HEAD", icon: "rocket" },
787
787
  ],
788
788
  github: [
789
- { label: "Check CI", cmd: "gh pr checks --json name,state,conclusion", icon: "search" },
789
+ { label: "Check CI", cmd: "gh pr checks --json name,state", icon: "search" },
790
790
  { label: "Merge PR (squash)", cmd: "gh pr merge --auto --squash", icon: "git" },
791
791
  { label: "Close PR", cmd: 'gh pr close --comment "{{reason}}"', icon: "ban" },
792
792
  { label: "PR Diff", cmd: "gh pr diff --stat", icon: "chart" },
package/ui-server.mjs CHANGED
@@ -250,6 +250,7 @@ async function handleVendor(req, res, url) {
250
250
  }
251
251
  const statusPath = resolve(repoRoot, ".cache", "ve-orchestrator-status.json");
252
252
  const logsDir = resolve(__dirname, "logs");
253
+ const monitorMonitorLogsDir = resolve(repoRoot, ".cache", "monitor-monitor-logs");
253
254
  const agentLogsDirCandidates = [
254
255
  resolve(__dirname, "logs", "agents"),
255
256
  resolve(repoRoot, ".cache", "agent-logs"),
@@ -2255,15 +2256,55 @@ function normalizeAgentLogName(name) {
2255
2256
  return basename(String(name || "")).trim();
2256
2257
  }
2257
2258
 
2259
+ async function listDirFilesWithMtime(dir, predicate = () => true) {
2260
+ const names = await readdir(dir).catch(() => []);
2261
+ const entries = await Promise.all(
2262
+ names
2263
+ .filter((name) => predicate(name))
2264
+ .map(async (name) => {
2265
+ const fullPath = resolve(dir, name);
2266
+ const info = await stat(fullPath).catch(() => null);
2267
+ if (!info?.isFile?.()) return null;
2268
+ return {
2269
+ name,
2270
+ path: fullPath,
2271
+ mtimeMs: Number(info.mtimeMs || 0),
2272
+ };
2273
+ }),
2274
+ );
2275
+ return entries.filter(Boolean);
2276
+ }
2277
+
2278
+ async function resolvePreferredSystemLogPath() {
2279
+ const rootLogEntries = await listDirFilesWithMtime(
2280
+ logsDir,
2281
+ (name) => name.endsWith(".log"),
2282
+ );
2283
+ const nonDaemonEntries = rootLogEntries.filter((entry) => entry.name !== "daemon.log");
2284
+
2285
+ const monitorPromptEntries = await listDirFilesWithMtime(
2286
+ monitorMonitorLogsDir,
2287
+ (name) =>
2288
+ name.startsWith("monitor-monitor-") &&
2289
+ (name.endsWith(".prompt.md") || name.endsWith(".md")),
2290
+ );
2291
+
2292
+ const preferredEntries = [...nonDaemonEntries, ...monitorPromptEntries].sort(
2293
+ (a, b) => b.mtimeMs - a.mtimeMs,
2294
+ );
2295
+ if (preferredEntries.length > 0) return preferredEntries[0].path;
2296
+
2297
+ const daemonEntry = rootLogEntries.find((entry) => entry.name === "daemon.log");
2298
+ return daemonEntry ? daemonEntry.path : null;
2299
+ }
2300
+
2258
2301
  /**
2259
2302
  * Resolve the log file path for a given logType and optional query.
2260
2303
  * Returns null if no matching file found.
2261
2304
  */
2262
2305
  async function resolveLogPath(logType, query) {
2263
2306
  if (logType === "system") {
2264
- const files = await readdir(logsDir).catch(() => []);
2265
- const logFile = files.filter((f) => f.endsWith(".log")).sort().pop();
2266
- return logFile ? resolve(logsDir, logFile) : null;
2307
+ return resolvePreferredSystemLogPath();
2267
2308
  }
2268
2309
  if (logType === "agent") {
2269
2310
  const agentLogsDir = await resolveAgentLogsDir();
@@ -3252,17 +3293,10 @@ function normalizeBranchInput(input) {
3252
3293
  }
3253
3294
 
3254
3295
  async function getLatestLogTail(lineCount) {
3255
- const files = await readdir(logsDir).catch(() => []);
3256
- const logFile = files
3257
- .filter((f) => f.endsWith(".log"))
3258
- .sort()
3259
- .pop();
3260
- if (!logFile) return { file: null, lines: [] };
3261
- const logPath = resolve(logsDir, logFile);
3262
- const content = await readFile(logPath, "utf8");
3263
- const lines = content.split("\n").filter(Boolean);
3264
- const tail = lines.slice(-lineCount);
3265
- return { file: logFile, lines: tail };
3296
+ const logPath = await resolvePreferredSystemLogPath();
3297
+ if (!logPath) return { file: null, lines: [] };
3298
+ const tail = await tailFile(logPath, lineCount);
3299
+ return { file: basename(logPath), lines: tail.lines || [] };
3266
3300
  }
3267
3301
 
3268
3302
  async function tailFile(filePath, lineCount, maxBytes = 1_000_000) {
package/update-check.mjs CHANGED
@@ -26,6 +26,11 @@ import os from "node:os";
26
26
  const __dirname = dirname(fileURLToPath(import.meta.url));
27
27
  const PKG_NAME = "bosun";
28
28
  const CACHE_FILE = resolve(__dirname, "logs", ".update-check-cache.json");
29
+ const AUTO_UPDATE_STATE_FILE = resolve(__dirname, ".cache", "auto-update-state.json");
30
+ const AUTO_UPDATE_FAILURE_LIMIT =
31
+ Number(process.env.BOSUN_AUTO_UPDATE_FAILURE_LIMIT) || 3;
32
+ const AUTO_UPDATE_DISABLE_WINDOW_MS =
33
+ Number(process.env.BOSUN_AUTO_UPDATE_DISABLE_WINDOW_MS) || 24 * 60 * 60 * 1000;
29
34
  const STARTUP_CHECK_INTERVAL_MS = 60 * 60 * 1000; // 1 hour (startup notice)
30
35
  const AUTO_UPDATE_INTERVAL_MS = 10 * 60 * 1000; // 10 minutes (polling loop)
31
36
 
@@ -111,6 +116,79 @@ async function writeCache(data) {
111
116
  }
112
117
  }
113
118
 
119
+ const defaultAutoUpdateState = {
120
+ failureCount: 0,
121
+ lastFailureReason: null,
122
+ disabledUntil: 0,
123
+ lastNotifiedAt: 0,
124
+ };
125
+
126
+ async function readAutoUpdateState() {
127
+ try {
128
+ const raw = await readFile(AUTO_UPDATE_STATE_FILE, "utf8");
129
+ const parsed = JSON.parse(raw);
130
+ return { ...defaultAutoUpdateState, ...parsed };
131
+ } catch {
132
+ return { ...defaultAutoUpdateState };
133
+ }
134
+ }
135
+
136
+ async function writeAutoUpdateState(state) {
137
+ try {
138
+ await mkdir(dirname(AUTO_UPDATE_STATE_FILE), { recursive: true });
139
+ await writeFile(
140
+ AUTO_UPDATE_STATE_FILE,
141
+ JSON.stringify({ ...defaultAutoUpdateState, ...state }, null, 2),
142
+ );
143
+ } catch {
144
+ // non-critical
145
+ }
146
+ }
147
+
148
+ async function resetAutoUpdateState() {
149
+ await writeAutoUpdateState({ ...defaultAutoUpdateState });
150
+ return { ...defaultAutoUpdateState };
151
+ }
152
+
153
+ function classifyInstallError(err) {
154
+ const message = err?.message || String(err || "");
155
+ const code = err?.code || "";
156
+ if (code === "EINVAL" || message.includes("EINVAL")) return "EINVAL";
157
+ if (code) return code;
158
+ return message.slice(0, 160) || "unknown";
159
+ }
160
+
161
+ async function recordAutoUpdateFailure(state, reason) {
162
+ const now = Date.now();
163
+ const next = {
164
+ ...defaultAutoUpdateState,
165
+ ...state,
166
+ failureCount: (state?.failureCount || 0) + 1,
167
+ lastFailureReason: reason,
168
+ };
169
+
170
+ if (!next.disabledUntil && next.failureCount >= AUTO_UPDATE_FAILURE_LIMIT) {
171
+ next.disabledUntil = now + AUTO_UPDATE_DISABLE_WINDOW_MS;
172
+ next.lastNotifiedAt = 0;
173
+ }
174
+
175
+ await writeAutoUpdateState(next);
176
+ return next;
177
+ }
178
+
179
+ function isAutoUpdateDisabled(state, now = Date.now()) {
180
+ return Boolean(state?.disabledUntil && now < state.disabledUntil);
181
+ }
182
+
183
+ function buildDisableNotice(state) {
184
+ const hours = Math.round(AUTO_UPDATE_DISABLE_WINDOW_MS / (60 * 60 * 1000));
185
+ const reason = state?.lastFailureReason || "unknown";
186
+ return [
187
+ `[auto-update] ⛔ Disabled for ${hours}h after ${state?.failureCount || 0} failures (last: ${reason}).`,
188
+ "Recovery: set BOSUN_SKIP_AUTO_UPDATE=1 or delete .cache/auto-update-state.json then restart.",
189
+ ].join(' ');
190
+ }
191
+
114
192
  // ── Registry query ───────────────────────────────────────────────────────────
115
193
 
116
194
  async function fetchLatestVersion() {
@@ -324,6 +402,7 @@ export function startAutoUpdateLoop(opts = {}) {
324
402
  `[auto-update] Polling every ${Math.round(intervalMs / 1000 / 60)} min for upstream changes`,
325
403
  );
326
404
 
405
+
327
406
  async function poll() {
328
407
  // Safety check: Is parent process still alive?
329
408
  if (!isParentAlive()) {
@@ -336,17 +415,34 @@ export function startAutoUpdateLoop(opts = {}) {
336
415
 
337
416
  if (autoUpdateRunning) return;
338
417
  autoUpdateRunning = true;
418
+
419
+ let state = await readAutoUpdateState();
420
+ const now = Date.now();
421
+
339
422
  try {
423
+ if (isAutoUpdateDisabled(state, now)) {
424
+ if (!state.lastNotifiedAt) {
425
+ const notice = buildDisableNotice(state);
426
+ onNotify(notice);
427
+ console.log(notice);
428
+ state = { ...state, lastNotifiedAt: now };
429
+ await writeAutoUpdateState(state);
430
+ }
431
+ return;
432
+ }
433
+
434
+ if (state.disabledUntil && now >= state.disabledUntil) {
435
+ state = await resetAutoUpdateState();
436
+ }
437
+
340
438
  const currentVersion = getCurrentVersion();
341
439
  const latest = await fetchLatestVersion();
342
440
 
343
441
  if (!latest) {
344
- autoUpdateRunning = false;
345
442
  return; // registry unreachable — try again next cycle
346
443
  }
347
444
 
348
445
  if (!isNewer(latest, currentVersion)) {
349
- autoUpdateRunning = false;
350
446
  return; // already up to date
351
447
  }
352
448
 
@@ -364,7 +460,19 @@ export function startAutoUpdateLoop(opts = {}) {
364
460
  const errMsg = `[auto-update] ❌ Install failed: ${installErr.message || installErr}`;
365
461
  console.error(errMsg);
366
462
  onNotify(errMsg);
367
- autoUpdateRunning = false;
463
+
464
+ const updatedState = await recordAutoUpdateFailure(
465
+ state,
466
+ classifyInstallError(installErr),
467
+ );
468
+
469
+ if (updatedState.disabledUntil && !updatedState.lastNotifiedAt) {
470
+ const notice = buildDisableNotice(updatedState);
471
+ onNotify(notice);
472
+ console.log(notice);
473
+ updatedState.lastNotifiedAt = Date.now();
474
+ await writeAutoUpdateState(updatedState);
475
+ }
368
476
  return;
369
477
  }
370
478
 
@@ -374,11 +482,11 @@ export function startAutoUpdateLoop(opts = {}) {
374
482
  const errMsg = `[auto-update] ⚠️ Install ran but version unchanged (${newVersion}). Skipping restart.`;
375
483
  console.warn(errMsg);
376
484
  onNotify(errMsg);
377
- autoUpdateRunning = false;
378
485
  return;
379
486
  }
380
487
 
381
488
  await writeCache({ lastCheck: Date.now(), latestVersion: latest });
489
+ await resetAutoUpdateState();
382
490
 
383
491
  const successMsg = `[auto-update] ✅ Updated to v${latest}. Restarting...`;
384
492
  console.log(successMsg);
@@ -519,3 +627,17 @@ function promptConfirm(question) {
519
627
  });
520
628
  });
521
629
  }
630
+
631
+
632
+ export const __autoUpdateTestHooks = {
633
+ readAutoUpdateState,
634
+ writeAutoUpdateState,
635
+ resetAutoUpdateState,
636
+ recordAutoUpdateFailure,
637
+ isAutoUpdateDisabled,
638
+ classifyInstallError,
639
+ buildDisableNotice,
640
+ AUTO_UPDATE_STATE_FILE,
641
+ AUTO_UPDATE_FAILURE_LIMIT,
642
+ AUTO_UPDATE_DISABLE_WINDOW_MS,
643
+ };
@@ -39,7 +39,7 @@ export const PR_MERGE_STRATEGY_TEMPLATE = {
39
39
  }, { x: 400, y: 50 }),
40
40
 
41
41
  node("check-ci", "validation.build", "Check CI Status", {
42
- command: "gh pr checks {{prNumber}} --json name,state,conclusion",
42
+ command: "gh pr checks {{prNumber}} --json name,state",
43
43
  }, { x: 150, y: 200 }),
44
44
 
45
45
  node("get-diff", "action.run_command", "Get Diff Stats", {
@@ -47,7 +47,8 @@ export const PR_MERGE_STRATEGY_TEMPLATE = {
47
47
  }, { x: 650, y: 200 }),
48
48
 
49
49
  node("ci-passed", "condition.expression", "CI Passed?", {
50
- expression: "$ctx.getNodeOutput('check-ci')?.passed === true",
50
+ expression:
51
+ "(() => { const out = $ctx.getNodeOutput('check-ci'); if (!out || out.passed !== true) return false; let checks = []; try { checks = JSON.parse(out.output || '[]'); } catch { return false; } if (!Array.isArray(checks) || checks.length === 0) return false; const ok = new Set(['SUCCESS', 'PASSED', 'PASS', 'COMPLETED', 'NEUTRAL', 'SKIPPED']); return checks.every((c) => ok.has(String(c?.state || '').toUpperCase())); })()",
51
52
  }, { x: 150, y: 350, outputs: ["yes", "no"] }),
52
53
 
53
54
  node("wait-for-ci", "action.delay", "Wait for CI", {
@@ -294,7 +295,7 @@ Only fix conflicts, do NOT change any logic. Keep changes minimal.`,
294
295
  }, { x: 200, y: 500 }),
295
296
 
296
297
  node("verify-ci", "action.run_command", "Verify CI Green", {
297
- command: "gh pr checks --json name,state,conclusion | head -20",
298
+ command: "gh pr checks --json name,state",
298
299
  }, { x: 200, y: 660 }),
299
300
 
300
301
  node("auto-merge", "condition.expression", "CI Passed?", {
@@ -301,17 +301,17 @@ export const HEALTH_CHECK_TEMPLATE = {
301
301
  }, { x: 150, y: 200 }),
302
302
 
303
303
  node("check-git", "action.run_command", "Check Git State", {
304
- command: "git status --porcelain && git worktree list --porcelain | grep -c worktree",
304
+ command: "node -e \"const cp=require('node:child_process');const status=cp.execSync('git status --porcelain',{encoding:'utf8'});const wt=cp.execSync('git worktree list --porcelain',{encoding:'utf8'});const count=(wt.match(/^worktree /gm)||[]).length;process.stdout.write(status + (status.endsWith('\\\\n') ? '' : '\\\\n') + count + '\\\\n');\"",
305
305
  continueOnError: true,
306
306
  }, { x: 400, y: 200 }),
307
307
 
308
308
  node("check-agents", "action.run_command", "Check Agent Status", {
309
- command: "bosun --daemon-status 2>/dev/null || echo 'daemon not running'",
309
+ command: "node -e \"const cp=require('node:child_process');try{process.stdout.write(cp.execSync('bosun --daemon-status',{encoding:'utf8'}));}catch{process.stdout.write('daemon not running\\\\n');}\"",
310
310
  continueOnError: true,
311
311
  }, { x: 650, y: 200 }),
312
312
 
313
313
  node("has-issues", "condition.expression", "Any Issues?", {
314
- expression: "($ctx.getNodeOutput('check-config')?.output || '').includes('ERROR') || ($ctx.getNodeOutput('check-config')?.output || '').includes('CRITICAL')",
314
+ expression: "($ctx.getNodeOutput('check-config')?.success === false) || (($ctx.getNodeOutput('check-config')?.output || '').includes('ERROR')) || (($ctx.getNodeOutput('check-config')?.output || '').includes('CRITICAL')) || ($ctx.getNodeOutput('check-git')?.success === false) || ($ctx.getNodeOutput('check-agents')?.success === false)",
315
315
  }, { x: 400, y: 380 }),
316
316
 
317
317
  node("alert", "notify.telegram", "Alert Issues Found", {
@@ -349,4 +349,3 @@ export const HEALTH_CHECK_TEMPLATE = {
349
349
  },
350
350
  },
351
351
  };
352
-