bosun 0.33.5 → 0.33.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agent-endpoint.mjs +8 -3
- package/agent-hooks.mjs +3 -6
- package/monitor.mjs +33 -5
- package/package.json +1 -1
- package/session-tracker.mjs +112 -3
- package/task-claims.mjs +9 -1
- package/task-executor.mjs +5 -0
- package/ui/app.js +2 -10
- package/ui/tabs/workflows.js +1 -1
- package/ui-server.mjs +48 -14
- package/update-check.mjs +126 -4
- package/workflow-templates/github.mjs +4 -3
- package/workflow-templates/reliability.mjs +3 -4
package/agent-endpoint.mjs
CHANGED
|
@@ -161,11 +161,16 @@ function extractTaskId(pathname) {
|
|
|
161
161
|
}
|
|
162
162
|
|
|
163
163
|
function isAlreadyExitedProcessError(err) {
|
|
164
|
-
const detail =
|
|
164
|
+
const detail = [err?.stderr, err?.stdout, err?.message]
|
|
165
|
+
.map((part) => String(part || ""))
|
|
166
|
+
.join("\n")
|
|
167
|
+
.toLowerCase();
|
|
165
168
|
return (
|
|
166
169
|
detail.includes("no running instance of the task") ||
|
|
167
|
-
detail.includes("
|
|
168
|
-
detail.includes("no such process")
|
|
170
|
+
detail.includes("no running instance") ||
|
|
171
|
+
detail.includes("no such process") ||
|
|
172
|
+
detail.includes("cannot find the process") ||
|
|
173
|
+
detail.includes("esrch")
|
|
169
174
|
);
|
|
170
175
|
}
|
|
171
176
|
|
package/agent-hooks.mjs
CHANGED
|
@@ -366,7 +366,7 @@ export function loadHooks(configPath) {
|
|
|
366
366
|
*
|
|
367
367
|
* @example
|
|
368
368
|
* const id = registerHook("PrePush", {
|
|
369
|
-
* command: "
|
|
369
|
+
* command: "node preflight.mjs",
|
|
370
370
|
* blocking: true,
|
|
371
371
|
* timeout: 300000,
|
|
372
372
|
* });
|
|
@@ -610,8 +610,7 @@ export async function executeBlockingHooks(event, context = {}) {
|
|
|
610
610
|
* that run regardless of config file contents.
|
|
611
611
|
*
|
|
612
612
|
* Built-in hooks:
|
|
613
|
-
* - **PrePush** — Runs `
|
|
614
|
-
* `scripts/agent-preflight.sh` (Unix) to validate quality gates.
|
|
613
|
+
* - **PrePush** — Runs `node preflight.mjs` to validate quality gates.
|
|
615
614
|
* - **TaskComplete** — Runs a basic acceptance-criteria check via git log.
|
|
616
615
|
*/
|
|
617
616
|
export function registerBuiltinHooks(options = {}) {
|
|
@@ -644,9 +643,7 @@ export function registerBuiltinHooks(options = {}) {
|
|
|
644
643
|
|
|
645
644
|
// ── PrePush: agent preflight quality gate ──
|
|
646
645
|
if (!skipPrePush) {
|
|
647
|
-
const preflightScript =
|
|
648
|
-
? "powershell -NoProfile -ExecutionPolicy Bypass -File scripts/agent-preflight.ps1"
|
|
649
|
-
: "bash scripts/agent-preflight.sh";
|
|
646
|
+
const preflightScript = "node preflight.mjs";
|
|
650
647
|
|
|
651
648
|
registerHook("PrePush", {
|
|
652
649
|
id: "builtin-prepush-preflight",
|
package/monitor.mjs
CHANGED
|
@@ -20,6 +20,7 @@ import { clearLine, createInterface, cursorTo } from "node:readline";
|
|
|
20
20
|
import net from "node:net";
|
|
21
21
|
import { resolve } from "node:path";
|
|
22
22
|
import { fileURLToPath } from "node:url";
|
|
23
|
+
import { isMainThread } from "node:worker_threads";
|
|
23
24
|
|
|
24
25
|
// Node.js Happy Eyeballs (RFC 8305) tries IPv6 first with a 250ms timeout
|
|
25
26
|
// before falling back to IPv4. On networks where IPv6 is unreachable, the
|
|
@@ -566,7 +567,9 @@ let githubReconcile = githubReconcileConfig || {
|
|
|
566
567
|
// trusted git directory, preventing "Not inside a trusted directory" errors.
|
|
567
568
|
// Prefer agentRepoRoot (workspace-aware) over raw repoRoot.
|
|
568
569
|
const effectiveRepoRoot = agentRepoRoot || repoRoot;
|
|
569
|
-
if (
|
|
570
|
+
if (!isMainThread) {
|
|
571
|
+
// Worker threads cannot call process.chdir(); skip to avoid noisy warnings.
|
|
572
|
+
} else if (effectiveRepoRoot && process.cwd() !== effectiveRepoRoot) {
|
|
570
573
|
try {
|
|
571
574
|
process.chdir(effectiveRepoRoot);
|
|
572
575
|
console.log(`[monitor] changed CWD to repo root: ${effectiveRepoRoot}`);
|
|
@@ -11849,16 +11852,32 @@ function attemptSelfRestartAfterQuiet() {
|
|
|
11849
11852
|
const protection = getRuntimeRestartProtection();
|
|
11850
11853
|
if (protection.defer) {
|
|
11851
11854
|
pendingSelfRestart = filename;
|
|
11852
|
-
// Track how many times we've deferred. Never force-restart when internal
|
|
11853
|
-
// task agents are active; just keep retrying with periodic reminders.
|
|
11854
11855
|
const deferCount = (selfRestartDeferCount =
|
|
11855
11856
|
(selfRestartDeferCount || 0) + 1);
|
|
11856
11857
|
const retrySec = Math.round(SELF_RESTART_RETRY_MS / 1000);
|
|
11857
|
-
|
|
11858
|
+
|
|
11859
|
+
// Hard cap: after many deferrals the active agent is likely stuck.
|
|
11860
|
+
// Force-stop the task executor and proceed with the restart so the
|
|
11861
|
+
// monitor doesn't hang forever (or crash from resource exhaustion).
|
|
11862
|
+
const SELF_RESTART_DEFER_HARD_CAP = Number(
|
|
11863
|
+
process.env.SELF_RESTART_DEFER_HARD_CAP || "50",
|
|
11864
|
+
);
|
|
11865
|
+
if (deferCount >= SELF_RESTART_DEFER_HARD_CAP) {
|
|
11858
11866
|
console.warn(
|
|
11859
|
-
`[monitor] self-restart deferred ${deferCount} times
|
|
11867
|
+
`[monitor] self-restart deferred ${deferCount} times (hard cap ${SELF_RESTART_DEFER_HARD_CAP}) — force-stopping active agents and restarting`,
|
|
11860
11868
|
);
|
|
11869
|
+
if (internalTaskExecutor) {
|
|
11870
|
+
internalTaskExecutor.stop().catch(() => {});
|
|
11871
|
+
}
|
|
11861
11872
|
selfRestartDeferCount = 0;
|
|
11873
|
+
selfRestartForSourceChange(filename);
|
|
11874
|
+
return;
|
|
11875
|
+
}
|
|
11876
|
+
|
|
11877
|
+
if (deferCount % 20 === 0) {
|
|
11878
|
+
console.warn(
|
|
11879
|
+
`[monitor] self-restart deferred ${deferCount} times — still waiting for ${protection.reason}; continuing to defer`,
|
|
11880
|
+
);
|
|
11862
11881
|
}
|
|
11863
11882
|
console.log(
|
|
11864
11883
|
`[monitor] deferring self-restart (${filename}) — ${protection.reason}; retrying in ${retrySec}s (defer #${deferCount})`,
|
|
@@ -12456,6 +12475,15 @@ process.on("unhandledRejection", (reason) => {
|
|
|
12456
12475
|
|
|
12457
12476
|
// ── Singleton guard: prevent ghost monitors ─────────────────────────────────
|
|
12458
12477
|
if (!process.env.VITEST && !acquireMonitorLock(config.cacheDir)) {
|
|
12478
|
+
// During source-change self-restart, the previous monitor can still be
|
|
12479
|
+
// shutting down and holding the lock briefly. Ask cli.mjs to retry instead
|
|
12480
|
+
// of treating this as a hard crash.
|
|
12481
|
+
if (isSelfRestart) {
|
|
12482
|
+
console.warn(
|
|
12483
|
+
"[monitor] self-restart lock handoff still busy — retrying startup",
|
|
12484
|
+
);
|
|
12485
|
+
process.exit(SELF_RESTART_EXIT_CODE);
|
|
12486
|
+
}
|
|
12459
12487
|
process.exit(1);
|
|
12460
12488
|
}
|
|
12461
12489
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bosun",
|
|
3
|
-
"version": "0.33.
|
|
3
|
+
"version": "0.33.6",
|
|
4
4
|
"description": "AI-powered orchestrator supervisor — manages AI agent executors with failover, auto-restarts on failure, analyzes crashes with Codex SDK, creates PRs via Vibe-Kanban API, and sends Telegram notifications. Supports N executors with weighted distribution, multi-repo projects, and auto-setup.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "Apache 2.0",
|
package/session-tracker.mjs
CHANGED
|
@@ -750,12 +750,22 @@ export class SessionTracker {
|
|
|
750
750
|
if (!event || !event.type) return null;
|
|
751
751
|
|
|
752
752
|
const ts = new Date().toISOString();
|
|
753
|
+
const toText = (value) => {
|
|
754
|
+
if (value == null) return "";
|
|
755
|
+
if (typeof value === "string") return value;
|
|
756
|
+
try {
|
|
757
|
+
return JSON.stringify(value);
|
|
758
|
+
} catch {
|
|
759
|
+
return String(value);
|
|
760
|
+
}
|
|
761
|
+
};
|
|
753
762
|
|
|
754
763
|
// ── Codex SDK events ──
|
|
755
764
|
if (event.type === "item.completed" && event.item) {
|
|
756
765
|
const item = event.item;
|
|
766
|
+
const itemType = String(item.type || "").toLowerCase();
|
|
757
767
|
|
|
758
|
-
if (
|
|
768
|
+
if (itemType === "agent_message" && item.text) {
|
|
759
769
|
return {
|
|
760
770
|
type: "agent_message",
|
|
761
771
|
content: item.text.slice(0, MAX_MESSAGE_CHARS),
|
|
@@ -763,7 +773,7 @@ export class SessionTracker {
|
|
|
763
773
|
};
|
|
764
774
|
}
|
|
765
775
|
|
|
766
|
-
if (
|
|
776
|
+
if (itemType === "function_call") {
|
|
767
777
|
return {
|
|
768
778
|
type: "tool_call",
|
|
769
779
|
content: `${item.name}(${(item.arguments || "").slice(0, 500)})`,
|
|
@@ -772,7 +782,7 @@ export class SessionTracker {
|
|
|
772
782
|
};
|
|
773
783
|
}
|
|
774
784
|
|
|
775
|
-
if (
|
|
785
|
+
if (itemType === "function_call_output") {
|
|
776
786
|
return {
|
|
777
787
|
type: "tool_result",
|
|
778
788
|
content: (item.output || "").slice(0, MAX_MESSAGE_CHARS),
|
|
@@ -780,9 +790,108 @@ export class SessionTracker {
|
|
|
780
790
|
};
|
|
781
791
|
}
|
|
782
792
|
|
|
793
|
+
if (itemType === "command_execution" || itemType === "commandexecution") {
|
|
794
|
+
const command = toText(item.command || item.input || "").trim();
|
|
795
|
+
const exitCode = Number.isFinite(Number(item.exit_code)) ? Number(item.exit_code) : null;
|
|
796
|
+
const status = toText(item.status || "").trim();
|
|
797
|
+
const statusParts = [];
|
|
798
|
+
if (status) statusParts.push(status);
|
|
799
|
+
if (exitCode !== null) statusParts.push(`exit=${exitCode}`);
|
|
800
|
+
const statusLabel = statusParts.length ? ` [${statusParts.join(", ")}]` : "";
|
|
801
|
+
const output = toText(
|
|
802
|
+
item.aggregated_output || item.output || item.stderr || item.stdout || "",
|
|
803
|
+
).trim();
|
|
804
|
+
const content = output
|
|
805
|
+
? `${command || "(command)"}${statusLabel}
|
|
806
|
+
${output}`
|
|
807
|
+
: `${command || "(command)"}${statusLabel}`;
|
|
808
|
+
return {
|
|
809
|
+
type: "tool_call",
|
|
810
|
+
content: content.slice(0, MAX_MESSAGE_CHARS),
|
|
811
|
+
timestamp: ts,
|
|
812
|
+
meta: { toolName: "command_execution" },
|
|
813
|
+
};
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
if (itemType === "reasoning") {
|
|
817
|
+
const detail = toText(item.text || item.summary || "");
|
|
818
|
+
if (!detail) return null;
|
|
819
|
+
return {
|
|
820
|
+
type: "system",
|
|
821
|
+
content: detail.slice(0, MAX_MESSAGE_CHARS),
|
|
822
|
+
timestamp: ts,
|
|
823
|
+
};
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
if (itemType === "file_change") {
|
|
827
|
+
const changes = Array.isArray(item.changes)
|
|
828
|
+
? item.changes
|
|
829
|
+
.map((change) => {
|
|
830
|
+
const kind = toText(change?.kind || "update").trim();
|
|
831
|
+
const filePath = toText(change?.path || change?.file || "").trim();
|
|
832
|
+
return filePath ? `${kind} ${filePath}` : kind;
|
|
833
|
+
})
|
|
834
|
+
.filter(Boolean)
|
|
835
|
+
: [];
|
|
836
|
+
const summary = changes.length
|
|
837
|
+
? `file changes: ${changes.slice(0, 5).join(", ")}`
|
|
838
|
+
: "file changes detected";
|
|
839
|
+
return {
|
|
840
|
+
type: "system",
|
|
841
|
+
content: summary.slice(0, MAX_MESSAGE_CHARS),
|
|
842
|
+
timestamp: ts,
|
|
843
|
+
};
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
if (itemType === "todo_list") {
|
|
847
|
+
const items = Array.isArray(item.items)
|
|
848
|
+
? item.items
|
|
849
|
+
.map((entry) => {
|
|
850
|
+
const detail = toText(entry?.text || "").trim();
|
|
851
|
+
if (!detail) return "";
|
|
852
|
+
return `${entry?.completed ? "[x]" : "[ ]"} ${detail}`;
|
|
853
|
+
})
|
|
854
|
+
.filter(Boolean)
|
|
855
|
+
: [];
|
|
856
|
+
const summary = items.length ? `todo:
|
|
857
|
+
${items.join("\n")}` : "todo updated";
|
|
858
|
+
return {
|
|
859
|
+
type: "system",
|
|
860
|
+
content: summary.slice(0, MAX_MESSAGE_CHARS),
|
|
861
|
+
timestamp: ts,
|
|
862
|
+
};
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
if (item.text || item.content) {
|
|
866
|
+
const fallback = toText(item.text || item.content);
|
|
867
|
+
if (fallback) {
|
|
868
|
+
return {
|
|
869
|
+
type: "system",
|
|
870
|
+
content: fallback.slice(0, MAX_MESSAGE_CHARS),
|
|
871
|
+
timestamp: ts,
|
|
872
|
+
};
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
|
|
783
876
|
return null; // Skip other item types
|
|
784
877
|
}
|
|
785
878
|
|
|
879
|
+
if (event.type === "assistant.message" && event.data?.content) {
|
|
880
|
+
return {
|
|
881
|
+
type: "agent_message",
|
|
882
|
+
content: toText(event.data.content).slice(0, MAX_MESSAGE_CHARS),
|
|
883
|
+
timestamp: ts,
|
|
884
|
+
};
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
if (event.type === "assistant.message_delta" && event.data?.deltaContent) {
|
|
888
|
+
return {
|
|
889
|
+
type: "agent_message",
|
|
890
|
+
content: toText(event.data.deltaContent).slice(0, MAX_MESSAGE_CHARS),
|
|
891
|
+
timestamp: ts,
|
|
892
|
+
};
|
|
893
|
+
}
|
|
894
|
+
|
|
786
895
|
// ── Copilot SDK events ──
|
|
787
896
|
if (event.type === "message" && event.content) {
|
|
788
897
|
return {
|
package/task-claims.mjs
CHANGED
|
@@ -868,7 +868,15 @@ export async function renewClaim(opts = {}) {
|
|
|
868
868
|
state.repoRoot
|
|
869
869
|
);
|
|
870
870
|
if (!sharedResult.success) {
|
|
871
|
-
|
|
871
|
+
const reason = sharedResult.reason || "unknown";
|
|
872
|
+
// Token mismatch in shared state means another orchestrator has taken
|
|
873
|
+
// over — surface as a fatal claim renewal failure so the task-executor
|
|
874
|
+
// can abort the now-orphaned agent instead of letting it run forever.
|
|
875
|
+
if (reason === "attempt_token_mismatch" || reason === "owner_mismatch") {
|
|
876
|
+
console.warn(`[task-claims] Shared state heartbeat FATAL for ${taskId}: ${reason} — surfacing as claim failure`);
|
|
877
|
+
return { success: false, error: reason };
|
|
878
|
+
}
|
|
879
|
+
console.info(`[task-claims] Shared state heartbeat renewal warning for ${taskId}: ${reason}`);
|
|
872
880
|
}
|
|
873
881
|
} catch (err) {
|
|
874
882
|
console.warn(`[task-claims] Shared state heartbeat renewal failed for ${taskId}: ${err.message}`);
|
package/task-executor.mjs
CHANGED
|
@@ -109,6 +109,11 @@ const FATAL_CLAIM_RENEW_ERRORS = new Set([
|
|
|
109
109
|
"task_claimed_by_different_instance",
|
|
110
110
|
"claim_token_mismatch",
|
|
111
111
|
"task_not_claimed",
|
|
112
|
+
// Shared-state token mismatches — another orchestrator instance has taken
|
|
113
|
+
// over the task in the shared state registry. Treat exactly the same as a
|
|
114
|
+
// direct claim-token mismatch: abort the now-orphaned local agent.
|
|
115
|
+
"attempt_token_mismatch",
|
|
116
|
+
"owner_mismatch",
|
|
112
117
|
]);
|
|
113
118
|
const CODEX_TASK_LABELS = (() => {
|
|
114
119
|
const raw = String(
|
package/ui/app.js
CHANGED
|
@@ -1643,7 +1643,6 @@ function App() {
|
|
|
1643
1643
|
setSidebarDrawerOpen(false);
|
|
1644
1644
|
setInspectorDrawerOpen(false);
|
|
1645
1645
|
}, []);
|
|
1646
|
-
const sidebarToggleLabel = sidebarDrawerOpen ? "Close sidebar" : "Open sidebar";
|
|
1647
1646
|
const inspectorToggleLabel = inspectorDrawerOpen
|
|
1648
1647
|
? "Close inspector"
|
|
1649
1648
|
: "Open inspector";
|
|
@@ -1664,9 +1663,9 @@ function App() {
|
|
|
1664
1663
|
<button
|
|
1665
1664
|
class="btn btn-ghost btn-sm tablet-toggle"
|
|
1666
1665
|
onClick=${toggleMore}
|
|
1667
|
-
aria-label=${isMoreOpen ? "Close
|
|
1666
|
+
aria-label=${isMoreOpen ? "Close navigation menu" : "Open navigation menu"}
|
|
1668
1667
|
>
|
|
1669
|
-
⋯
|
|
1668
|
+
⋯ Navigation
|
|
1670
1669
|
</button>
|
|
1671
1670
|
`
|
|
1672
1671
|
: null;
|
|
@@ -1726,13 +1725,6 @@ function App() {
|
|
|
1726
1725
|
${showDrawerToggles
|
|
1727
1726
|
? html`
|
|
1728
1727
|
<div class="tablet-action-bar">
|
|
1729
|
-
<button
|
|
1730
|
-
class="btn btn-ghost btn-sm tablet-toggle"
|
|
1731
|
-
onClick=${toggleSidebar}
|
|
1732
|
-
aria-label=${sidebarToggleLabel}
|
|
1733
|
-
>
|
|
1734
|
-
☰ Navigation
|
|
1735
|
-
</button>
|
|
1736
1728
|
${inspectorToggleButton}
|
|
1737
1729
|
${moreToggleButton}
|
|
1738
1730
|
</div>
|
package/ui/tabs/workflows.js
CHANGED
|
@@ -786,7 +786,7 @@ const COMMAND_PRESETS = {
|
|
|
786
786
|
{ label: "Push", cmd: "git push --set-upstream origin HEAD", icon: "rocket" },
|
|
787
787
|
],
|
|
788
788
|
github: [
|
|
789
|
-
{ label: "Check CI", cmd: "gh pr checks --json name,state
|
|
789
|
+
{ label: "Check CI", cmd: "gh pr checks --json name,state", icon: "search" },
|
|
790
790
|
{ label: "Merge PR (squash)", cmd: "gh pr merge --auto --squash", icon: "git" },
|
|
791
791
|
{ label: "Close PR", cmd: 'gh pr close --comment "{{reason}}"', icon: "ban" },
|
|
792
792
|
{ label: "PR Diff", cmd: "gh pr diff --stat", icon: "chart" },
|
package/ui-server.mjs
CHANGED
|
@@ -250,6 +250,7 @@ async function handleVendor(req, res, url) {
|
|
|
250
250
|
}
|
|
251
251
|
const statusPath = resolve(repoRoot, ".cache", "ve-orchestrator-status.json");
|
|
252
252
|
const logsDir = resolve(__dirname, "logs");
|
|
253
|
+
const monitorMonitorLogsDir = resolve(repoRoot, ".cache", "monitor-monitor-logs");
|
|
253
254
|
const agentLogsDirCandidates = [
|
|
254
255
|
resolve(__dirname, "logs", "agents"),
|
|
255
256
|
resolve(repoRoot, ".cache", "agent-logs"),
|
|
@@ -2255,15 +2256,55 @@ function normalizeAgentLogName(name) {
|
|
|
2255
2256
|
return basename(String(name || "")).trim();
|
|
2256
2257
|
}
|
|
2257
2258
|
|
|
2259
|
+
async function listDirFilesWithMtime(dir, predicate = () => true) {
|
|
2260
|
+
const names = await readdir(dir).catch(() => []);
|
|
2261
|
+
const entries = await Promise.all(
|
|
2262
|
+
names
|
|
2263
|
+
.filter((name) => predicate(name))
|
|
2264
|
+
.map(async (name) => {
|
|
2265
|
+
const fullPath = resolve(dir, name);
|
|
2266
|
+
const info = await stat(fullPath).catch(() => null);
|
|
2267
|
+
if (!info?.isFile?.()) return null;
|
|
2268
|
+
return {
|
|
2269
|
+
name,
|
|
2270
|
+
path: fullPath,
|
|
2271
|
+
mtimeMs: Number(info.mtimeMs || 0),
|
|
2272
|
+
};
|
|
2273
|
+
}),
|
|
2274
|
+
);
|
|
2275
|
+
return entries.filter(Boolean);
|
|
2276
|
+
}
|
|
2277
|
+
|
|
2278
|
+
async function resolvePreferredSystemLogPath() {
|
|
2279
|
+
const rootLogEntries = await listDirFilesWithMtime(
|
|
2280
|
+
logsDir,
|
|
2281
|
+
(name) => name.endsWith(".log"),
|
|
2282
|
+
);
|
|
2283
|
+
const nonDaemonEntries = rootLogEntries.filter((entry) => entry.name !== "daemon.log");
|
|
2284
|
+
|
|
2285
|
+
const monitorPromptEntries = await listDirFilesWithMtime(
|
|
2286
|
+
monitorMonitorLogsDir,
|
|
2287
|
+
(name) =>
|
|
2288
|
+
name.startsWith("monitor-monitor-") &&
|
|
2289
|
+
(name.endsWith(".prompt.md") || name.endsWith(".md")),
|
|
2290
|
+
);
|
|
2291
|
+
|
|
2292
|
+
const preferredEntries = [...nonDaemonEntries, ...monitorPromptEntries].sort(
|
|
2293
|
+
(a, b) => b.mtimeMs - a.mtimeMs,
|
|
2294
|
+
);
|
|
2295
|
+
if (preferredEntries.length > 0) return preferredEntries[0].path;
|
|
2296
|
+
|
|
2297
|
+
const daemonEntry = rootLogEntries.find((entry) => entry.name === "daemon.log");
|
|
2298
|
+
return daemonEntry ? daemonEntry.path : null;
|
|
2299
|
+
}
|
|
2300
|
+
|
|
2258
2301
|
/**
|
|
2259
2302
|
* Resolve the log file path for a given logType and optional query.
|
|
2260
2303
|
* Returns null if no matching file found.
|
|
2261
2304
|
*/
|
|
2262
2305
|
async function resolveLogPath(logType, query) {
|
|
2263
2306
|
if (logType === "system") {
|
|
2264
|
-
|
|
2265
|
-
const logFile = files.filter((f) => f.endsWith(".log")).sort().pop();
|
|
2266
|
-
return logFile ? resolve(logsDir, logFile) : null;
|
|
2307
|
+
return resolvePreferredSystemLogPath();
|
|
2267
2308
|
}
|
|
2268
2309
|
if (logType === "agent") {
|
|
2269
2310
|
const agentLogsDir = await resolveAgentLogsDir();
|
|
@@ -3252,17 +3293,10 @@ function normalizeBranchInput(input) {
|
|
|
3252
3293
|
}
|
|
3253
3294
|
|
|
3254
3295
|
async function getLatestLogTail(lineCount) {
|
|
3255
|
-
const
|
|
3256
|
-
|
|
3257
|
-
|
|
3258
|
-
|
|
3259
|
-
.pop();
|
|
3260
|
-
if (!logFile) return { file: null, lines: [] };
|
|
3261
|
-
const logPath = resolve(logsDir, logFile);
|
|
3262
|
-
const content = await readFile(logPath, "utf8");
|
|
3263
|
-
const lines = content.split("\n").filter(Boolean);
|
|
3264
|
-
const tail = lines.slice(-lineCount);
|
|
3265
|
-
return { file: logFile, lines: tail };
|
|
3296
|
+
const logPath = await resolvePreferredSystemLogPath();
|
|
3297
|
+
if (!logPath) return { file: null, lines: [] };
|
|
3298
|
+
const tail = await tailFile(logPath, lineCount);
|
|
3299
|
+
return { file: basename(logPath), lines: tail.lines || [] };
|
|
3266
3300
|
}
|
|
3267
3301
|
|
|
3268
3302
|
async function tailFile(filePath, lineCount, maxBytes = 1_000_000) {
|
package/update-check.mjs
CHANGED
|
@@ -26,6 +26,11 @@ import os from "node:os";
|
|
|
26
26
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
27
27
|
const PKG_NAME = "bosun";
|
|
28
28
|
const CACHE_FILE = resolve(__dirname, "logs", ".update-check-cache.json");
|
|
29
|
+
const AUTO_UPDATE_STATE_FILE = resolve(__dirname, ".cache", "auto-update-state.json");
|
|
30
|
+
const AUTO_UPDATE_FAILURE_LIMIT =
|
|
31
|
+
Number(process.env.BOSUN_AUTO_UPDATE_FAILURE_LIMIT) || 3;
|
|
32
|
+
const AUTO_UPDATE_DISABLE_WINDOW_MS =
|
|
33
|
+
Number(process.env.BOSUN_AUTO_UPDATE_DISABLE_WINDOW_MS) || 24 * 60 * 60 * 1000;
|
|
29
34
|
const STARTUP_CHECK_INTERVAL_MS = 60 * 60 * 1000; // 1 hour (startup notice)
|
|
30
35
|
const AUTO_UPDATE_INTERVAL_MS = 10 * 60 * 1000; // 10 minutes (polling loop)
|
|
31
36
|
|
|
@@ -111,6 +116,79 @@ async function writeCache(data) {
|
|
|
111
116
|
}
|
|
112
117
|
}
|
|
113
118
|
|
|
119
|
+
const defaultAutoUpdateState = {
|
|
120
|
+
failureCount: 0,
|
|
121
|
+
lastFailureReason: null,
|
|
122
|
+
disabledUntil: 0,
|
|
123
|
+
lastNotifiedAt: 0,
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
async function readAutoUpdateState() {
|
|
127
|
+
try {
|
|
128
|
+
const raw = await readFile(AUTO_UPDATE_STATE_FILE, "utf8");
|
|
129
|
+
const parsed = JSON.parse(raw);
|
|
130
|
+
return { ...defaultAutoUpdateState, ...parsed };
|
|
131
|
+
} catch {
|
|
132
|
+
return { ...defaultAutoUpdateState };
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async function writeAutoUpdateState(state) {
|
|
137
|
+
try {
|
|
138
|
+
await mkdir(dirname(AUTO_UPDATE_STATE_FILE), { recursive: true });
|
|
139
|
+
await writeFile(
|
|
140
|
+
AUTO_UPDATE_STATE_FILE,
|
|
141
|
+
JSON.stringify({ ...defaultAutoUpdateState, ...state }, null, 2),
|
|
142
|
+
);
|
|
143
|
+
} catch {
|
|
144
|
+
// non-critical
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async function resetAutoUpdateState() {
|
|
149
|
+
await writeAutoUpdateState({ ...defaultAutoUpdateState });
|
|
150
|
+
return { ...defaultAutoUpdateState };
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function classifyInstallError(err) {
|
|
154
|
+
const message = err?.message || String(err || "");
|
|
155
|
+
const code = err?.code || "";
|
|
156
|
+
if (code === "EINVAL" || message.includes("EINVAL")) return "EINVAL";
|
|
157
|
+
if (code) return code;
|
|
158
|
+
return message.slice(0, 160) || "unknown";
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
async function recordAutoUpdateFailure(state, reason) {
|
|
162
|
+
const now = Date.now();
|
|
163
|
+
const next = {
|
|
164
|
+
...defaultAutoUpdateState,
|
|
165
|
+
...state,
|
|
166
|
+
failureCount: (state?.failureCount || 0) + 1,
|
|
167
|
+
lastFailureReason: reason,
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
if (!next.disabledUntil && next.failureCount >= AUTO_UPDATE_FAILURE_LIMIT) {
|
|
171
|
+
next.disabledUntil = now + AUTO_UPDATE_DISABLE_WINDOW_MS;
|
|
172
|
+
next.lastNotifiedAt = 0;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
await writeAutoUpdateState(next);
|
|
176
|
+
return next;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function isAutoUpdateDisabled(state, now = Date.now()) {
|
|
180
|
+
return Boolean(state?.disabledUntil && now < state.disabledUntil);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function buildDisableNotice(state) {
|
|
184
|
+
const hours = Math.round(AUTO_UPDATE_DISABLE_WINDOW_MS / (60 * 60 * 1000));
|
|
185
|
+
const reason = state?.lastFailureReason || "unknown";
|
|
186
|
+
return [
|
|
187
|
+
`[auto-update] ⛔ Disabled for ${hours}h after ${state?.failureCount || 0} failures (last: ${reason}).`,
|
|
188
|
+
"Recovery: set BOSUN_SKIP_AUTO_UPDATE=1 or delete .cache/auto-update-state.json then restart.",
|
|
189
|
+
].join(' ');
|
|
190
|
+
}
|
|
191
|
+
|
|
114
192
|
// ── Registry query ───────────────────────────────────────────────────────────
|
|
115
193
|
|
|
116
194
|
async function fetchLatestVersion() {
|
|
@@ -324,6 +402,7 @@ export function startAutoUpdateLoop(opts = {}) {
|
|
|
324
402
|
`[auto-update] Polling every ${Math.round(intervalMs / 1000 / 60)} min for upstream changes`,
|
|
325
403
|
);
|
|
326
404
|
|
|
405
|
+
|
|
327
406
|
async function poll() {
|
|
328
407
|
// Safety check: Is parent process still alive?
|
|
329
408
|
if (!isParentAlive()) {
|
|
@@ -336,17 +415,34 @@ export function startAutoUpdateLoop(opts = {}) {
|
|
|
336
415
|
|
|
337
416
|
if (autoUpdateRunning) return;
|
|
338
417
|
autoUpdateRunning = true;
|
|
418
|
+
|
|
419
|
+
let state = await readAutoUpdateState();
|
|
420
|
+
const now = Date.now();
|
|
421
|
+
|
|
339
422
|
try {
|
|
423
|
+
if (isAutoUpdateDisabled(state, now)) {
|
|
424
|
+
if (!state.lastNotifiedAt) {
|
|
425
|
+
const notice = buildDisableNotice(state);
|
|
426
|
+
onNotify(notice);
|
|
427
|
+
console.log(notice);
|
|
428
|
+
state = { ...state, lastNotifiedAt: now };
|
|
429
|
+
await writeAutoUpdateState(state);
|
|
430
|
+
}
|
|
431
|
+
return;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
if (state.disabledUntil && now >= state.disabledUntil) {
|
|
435
|
+
state = await resetAutoUpdateState();
|
|
436
|
+
}
|
|
437
|
+
|
|
340
438
|
const currentVersion = getCurrentVersion();
|
|
341
439
|
const latest = await fetchLatestVersion();
|
|
342
440
|
|
|
343
441
|
if (!latest) {
|
|
344
|
-
autoUpdateRunning = false;
|
|
345
442
|
return; // registry unreachable — try again next cycle
|
|
346
443
|
}
|
|
347
444
|
|
|
348
445
|
if (!isNewer(latest, currentVersion)) {
|
|
349
|
-
autoUpdateRunning = false;
|
|
350
446
|
return; // already up to date
|
|
351
447
|
}
|
|
352
448
|
|
|
@@ -364,7 +460,19 @@ export function startAutoUpdateLoop(opts = {}) {
|
|
|
364
460
|
const errMsg = `[auto-update] ❌ Install failed: ${installErr.message || installErr}`;
|
|
365
461
|
console.error(errMsg);
|
|
366
462
|
onNotify(errMsg);
|
|
367
|
-
|
|
463
|
+
|
|
464
|
+
const updatedState = await recordAutoUpdateFailure(
|
|
465
|
+
state,
|
|
466
|
+
classifyInstallError(installErr),
|
|
467
|
+
);
|
|
468
|
+
|
|
469
|
+
if (updatedState.disabledUntil && !updatedState.lastNotifiedAt) {
|
|
470
|
+
const notice = buildDisableNotice(updatedState);
|
|
471
|
+
onNotify(notice);
|
|
472
|
+
console.log(notice);
|
|
473
|
+
updatedState.lastNotifiedAt = Date.now();
|
|
474
|
+
await writeAutoUpdateState(updatedState);
|
|
475
|
+
}
|
|
368
476
|
return;
|
|
369
477
|
}
|
|
370
478
|
|
|
@@ -374,11 +482,11 @@ export function startAutoUpdateLoop(opts = {}) {
|
|
|
374
482
|
const errMsg = `[auto-update] ⚠️ Install ran but version unchanged (${newVersion}). Skipping restart.`;
|
|
375
483
|
console.warn(errMsg);
|
|
376
484
|
onNotify(errMsg);
|
|
377
|
-
autoUpdateRunning = false;
|
|
378
485
|
return;
|
|
379
486
|
}
|
|
380
487
|
|
|
381
488
|
await writeCache({ lastCheck: Date.now(), latestVersion: latest });
|
|
489
|
+
await resetAutoUpdateState();
|
|
382
490
|
|
|
383
491
|
const successMsg = `[auto-update] ✅ Updated to v${latest}. Restarting...`;
|
|
384
492
|
console.log(successMsg);
|
|
@@ -519,3 +627,17 @@ function promptConfirm(question) {
|
|
|
519
627
|
});
|
|
520
628
|
});
|
|
521
629
|
}
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
export const __autoUpdateTestHooks = {
|
|
633
|
+
readAutoUpdateState,
|
|
634
|
+
writeAutoUpdateState,
|
|
635
|
+
resetAutoUpdateState,
|
|
636
|
+
recordAutoUpdateFailure,
|
|
637
|
+
isAutoUpdateDisabled,
|
|
638
|
+
classifyInstallError,
|
|
639
|
+
buildDisableNotice,
|
|
640
|
+
AUTO_UPDATE_STATE_FILE,
|
|
641
|
+
AUTO_UPDATE_FAILURE_LIMIT,
|
|
642
|
+
AUTO_UPDATE_DISABLE_WINDOW_MS,
|
|
643
|
+
};
|
|
@@ -39,7 +39,7 @@ export const PR_MERGE_STRATEGY_TEMPLATE = {
|
|
|
39
39
|
}, { x: 400, y: 50 }),
|
|
40
40
|
|
|
41
41
|
node("check-ci", "validation.build", "Check CI Status", {
|
|
42
|
-
command: "gh pr checks {{prNumber}} --json name,state
|
|
42
|
+
command: "gh pr checks {{prNumber}} --json name,state",
|
|
43
43
|
}, { x: 150, y: 200 }),
|
|
44
44
|
|
|
45
45
|
node("get-diff", "action.run_command", "Get Diff Stats", {
|
|
@@ -47,7 +47,8 @@ export const PR_MERGE_STRATEGY_TEMPLATE = {
|
|
|
47
47
|
}, { x: 650, y: 200 }),
|
|
48
48
|
|
|
49
49
|
node("ci-passed", "condition.expression", "CI Passed?", {
|
|
50
|
-
expression:
|
|
50
|
+
expression:
|
|
51
|
+
"(() => { const out = $ctx.getNodeOutput('check-ci'); if (!out || out.passed !== true) return false; let checks = []; try { checks = JSON.parse(out.output || '[]'); } catch { return false; } if (!Array.isArray(checks) || checks.length === 0) return false; const ok = new Set(['SUCCESS', 'PASSED', 'PASS', 'COMPLETED', 'NEUTRAL', 'SKIPPED']); return checks.every((c) => ok.has(String(c?.state || '').toUpperCase())); })()",
|
|
51
52
|
}, { x: 150, y: 350, outputs: ["yes", "no"] }),
|
|
52
53
|
|
|
53
54
|
node("wait-for-ci", "action.delay", "Wait for CI", {
|
|
@@ -294,7 +295,7 @@ Only fix conflicts, do NOT change any logic. Keep changes minimal.`,
|
|
|
294
295
|
}, { x: 200, y: 500 }),
|
|
295
296
|
|
|
296
297
|
node("verify-ci", "action.run_command", "Verify CI Green", {
|
|
297
|
-
command: "gh pr checks --json name,state
|
|
298
|
+
command: "gh pr checks --json name,state",
|
|
298
299
|
}, { x: 200, y: 660 }),
|
|
299
300
|
|
|
300
301
|
node("auto-merge", "condition.expression", "CI Passed?", {
|
|
@@ -301,17 +301,17 @@ export const HEALTH_CHECK_TEMPLATE = {
|
|
|
301
301
|
}, { x: 150, y: 200 }),
|
|
302
302
|
|
|
303
303
|
node("check-git", "action.run_command", "Check Git State", {
|
|
304
|
-
command: "git status --porcelain
|
|
304
|
+
command: "node -e \"const cp=require('node:child_process');const status=cp.execSync('git status --porcelain',{encoding:'utf8'});const wt=cp.execSync('git worktree list --porcelain',{encoding:'utf8'});const count=(wt.match(/^worktree /gm)||[]).length;process.stdout.write(status + (status.endsWith('\\\\n') ? '' : '\\\\n') + count + '\\\\n');\"",
|
|
305
305
|
continueOnError: true,
|
|
306
306
|
}, { x: 400, y: 200 }),
|
|
307
307
|
|
|
308
308
|
node("check-agents", "action.run_command", "Check Agent Status", {
|
|
309
|
-
command: "bosun --daemon-status
|
|
309
|
+
command: "node -e \"const cp=require('node:child_process');try{process.stdout.write(cp.execSync('bosun --daemon-status',{encoding:'utf8'}));}catch{process.stdout.write('daemon not running\\\\n');}\"",
|
|
310
310
|
continueOnError: true,
|
|
311
311
|
}, { x: 650, y: 200 }),
|
|
312
312
|
|
|
313
313
|
node("has-issues", "condition.expression", "Any Issues?", {
|
|
314
|
-
expression: "($ctx.getNodeOutput('check-config')?.output || '').includes('ERROR') || ($ctx.getNodeOutput('check-config')?.output || '').includes('CRITICAL')",
|
|
314
|
+
expression: "($ctx.getNodeOutput('check-config')?.success === false) || (($ctx.getNodeOutput('check-config')?.output || '').includes('ERROR')) || (($ctx.getNodeOutput('check-config')?.output || '').includes('CRITICAL')) || ($ctx.getNodeOutput('check-git')?.success === false) || ($ctx.getNodeOutput('check-agents')?.success === false)",
|
|
315
315
|
}, { x: 400, y: 380 }),
|
|
316
316
|
|
|
317
317
|
node("alert", "notify.telegram", "Alert Issues Found", {
|
|
@@ -349,4 +349,3 @@ export const HEALTH_CHECK_TEMPLATE = {
|
|
|
349
349
|
},
|
|
350
350
|
},
|
|
351
351
|
};
|
|
352
|
-
|