bosun 0.40.16 → 0.40.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/agent/agent-pool.mjs
CHANGED
|
@@ -40,6 +40,8 @@
|
|
|
40
40
|
*/
|
|
41
41
|
|
|
42
42
|
import { resolve, dirname } from "node:path";
|
|
43
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
44
|
+
import { homedir } from "node:os";
|
|
43
45
|
import { fileURLToPath } from "node:url";
|
|
44
46
|
import { loadConfig } from "../config/config.mjs";
|
|
45
47
|
import { resolveRepoRoot, resolveAgentRepoRoot } from "../config/repo-root.mjs";
|
|
@@ -566,16 +568,28 @@ function hasSdkPrerequisites(name, runtimeEnv = process.env) {
|
|
|
566
568
|
}
|
|
567
569
|
|
|
568
570
|
if (name === "codex") {
|
|
569
|
-
// Codex needs an OpenAI API key (or Azure key, or profile-specific key)
|
|
571
|
+
// Codex needs an OpenAI API key (or Azure key, or profile-specific key),
|
|
572
|
+
// OR a valid ~/.codex/config.toml where an env_key reference is satisfied.
|
|
570
573
|
const hasKey =
|
|
571
574
|
runtimeEnv.OPENAI_API_KEY ||
|
|
572
575
|
runtimeEnv.AZURE_OPENAI_API_KEY ||
|
|
573
576
|
runtimeEnv.CODEX_MODEL_PROFILE_XL_API_KEY ||
|
|
574
577
|
runtimeEnv.CODEX_MODEL_PROFILE_M_API_KEY;
|
|
575
|
-
if (
|
|
576
|
-
|
|
578
|
+
if (hasKey) return { ok: true, reason: null };
|
|
579
|
+
// Check ~/.codex/config.toml — Codex CLI SDK reads auth env_key refs from there
|
|
580
|
+
try {
|
|
581
|
+
const configToml = resolve(homedir(), ".codex", "config.toml");
|
|
582
|
+
if (existsSync(configToml)) {
|
|
583
|
+
const tomlText = readFileSync(configToml, "utf8");
|
|
584
|
+
// Extract all env_key = "VAR_NAME" entries and check if any are set
|
|
585
|
+
for (const match of tomlText.matchAll(/env_key\s*=\s*"([^"]+)"/g)) {
|
|
586
|
+
if (runtimeEnv[match[1]]) return { ok: true, reason: null };
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
} catch {
|
|
590
|
+
// best effort — fall through to failure
|
|
577
591
|
}
|
|
578
|
-
return { ok:
|
|
592
|
+
return { ok: false, reason: "no API key (OPENAI_API_KEY / AZURE_OPENAI_API_KEY) and no satisfied env_key in ~/.codex/config.toml" };
|
|
579
593
|
}
|
|
580
594
|
if (name === "copilot") {
|
|
581
595
|
// Copilot auth can come from multiple sources (OAuth manager, gh auth,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "bosun",
|
|
3
|
-
"version": "0.40.
|
|
3
|
+
"version": "0.40.18",
|
|
4
4
|
"description": "Bosun Autonomous Engineering — manages AI agent executors with failover, extremely powerful workflow builder, and a massive amount of included default workflow templates for autonomous engineering, creates PRs via Vibe-Kanban API, and sends Telegram notifications. Supports N executors with weighted distribution, multi-repo projects, and auto-setup.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -2675,7 +2675,59 @@ export class WorkflowEngine extends EventEmitter {
|
|
|
2675
2675
|
|
|
2676
2676
|
console.log(`${TAG} Resuming ${runs.length} interrupted run(s)...`);
|
|
2677
2677
|
|
|
2678
|
+
// ── Deduplicate by taskId: keep only the most recent run per task ────
|
|
2679
|
+
// After N crash/restart cycles, N run entries accumulate for the same
|
|
2680
|
+
// taskId. Resuming all of them causes competing workflow runs that race
|
|
2681
|
+
// to claim the task → "claim was stolen" errors on every restart.
|
|
2682
|
+
// Solution: pre-scan detail files, keep latest startedAt per taskId,
|
|
2683
|
+
// and mark older duplicates as not-resumable before we even try them.
|
|
2684
|
+
const runDetailCache = new Map(); // runId → parsed detail
|
|
2685
|
+
const latestByTaskId = new Map(); // taskId → run entry (highest startedAt)
|
|
2686
|
+
|
|
2687
|
+
for (const run of runs) {
|
|
2688
|
+
const dp = resolve(this.runsDir, `${run.runId}.json`);
|
|
2689
|
+
if (!existsSync(dp)) continue;
|
|
2690
|
+
try {
|
|
2691
|
+
const d = JSON.parse(readFileSync(dp, "utf8"));
|
|
2692
|
+
runDetailCache.set(run.runId, d);
|
|
2693
|
+
const tid = d.data?.taskId || d.inputData?.taskId;
|
|
2694
|
+
if (!tid) continue;
|
|
2695
|
+
const prev = latestByTaskId.get(tid);
|
|
2696
|
+
if (!prev || (run.startedAt || 0) >= (prev.startedAt || 0)) {
|
|
2697
|
+
latestByTaskId.set(tid, run);
|
|
2698
|
+
}
|
|
2699
|
+
} catch {
|
|
2700
|
+
/* unreadable detail — handled in the main loop below */
|
|
2701
|
+
}
|
|
2702
|
+
}
|
|
2703
|
+
|
|
2704
|
+
// Mark older duplicate runs as not-resumable before entering the loop
|
|
2705
|
+
let dedupedCount = 0;
|
|
2706
|
+
for (const run of runs) {
|
|
2707
|
+
const d = runDetailCache.get(run.runId);
|
|
2708
|
+
const tid = d?.data?.taskId || d?.inputData?.taskId;
|
|
2709
|
+
if (!tid) continue;
|
|
2710
|
+
const latest = latestByTaskId.get(tid);
|
|
2711
|
+
if (latest && latest.runId !== run.runId) {
|
|
2712
|
+
this._markRunUnresumable(run.runId, "duplicate_task_run");
|
|
2713
|
+
dedupedCount++;
|
|
2714
|
+
}
|
|
2715
|
+
}
|
|
2716
|
+
if (dedupedCount > 0) {
|
|
2717
|
+
console.log(
|
|
2718
|
+
`${TAG} Skipped ${dedupedCount} duplicate interrupted run(s) (kept latest per taskId)`,
|
|
2719
|
+
);
|
|
2720
|
+
}
|
|
2721
|
+
|
|
2678
2722
|
for (const run of runs) {
|
|
2723
|
+
// Skip runs that were marked as duplicates above
|
|
2724
|
+
const _runDetail = runDetailCache.get(run.runId);
|
|
2725
|
+
const _tid = _runDetail?.data?.taskId || _runDetail?.inputData?.taskId;
|
|
2726
|
+
if (_tid) {
|
|
2727
|
+
const latest = latestByTaskId.get(_tid);
|
|
2728
|
+
if (latest && latest.runId !== run.runId) continue;
|
|
2729
|
+
}
|
|
2730
|
+
|
|
2679
2731
|
try {
|
|
2680
2732
|
// Check if the workflow definition still exists
|
|
2681
2733
|
const def = this.get(run.workflowId);
|
|
@@ -2693,7 +2745,8 @@ export class WorkflowEngine extends EventEmitter {
|
|
|
2693
2745
|
continue;
|
|
2694
2746
|
}
|
|
2695
2747
|
|
|
2696
|
-
|
|
2748
|
+
// Reuse cached detail if available (already parsed above)
|
|
2749
|
+
const detail = runDetailCache.get(run.runId) ?? JSON.parse(readFileSync(detailPath, "utf8"));
|
|
2697
2750
|
const nodeStatuses = detail.nodeStatuses || {};
|
|
2698
2751
|
const hasCompletedNodes = Object.values(nodeStatuses).some(
|
|
2699
2752
|
(s) => s === NodeStatus.COMPLETED,
|
|
@@ -131,15 +131,22 @@ async function loadRegistry(registryPath) {
|
|
|
131
131
|
const registry = JSON.parse(content);
|
|
132
132
|
|
|
133
133
|
// Validate structure
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
) {
|
|
134
|
+
// Repair instead of wipe: preserve any valid task entries while fixing
|
|
135
|
+
// missing/invalid structural fields. Wiping on minor corruption was causing
|
|
136
|
+
// active claims to be lost, leading to cascading "claim was stolen" failures.
|
|
137
|
+
let repaired = false;
|
|
138
|
+
if (!registry.version) {
|
|
139
|
+
registry.version = REGISTRY_VERSION;
|
|
140
|
+
repaired = true;
|
|
141
|
+
}
|
|
142
|
+
if (!registry.tasks || typeof registry.tasks !== "object" || Array.isArray(registry.tasks)) {
|
|
143
|
+
registry.tasks = {};
|
|
144
|
+
repaired = true;
|
|
145
|
+
}
|
|
146
|
+
if (repaired) {
|
|
139
147
|
console.warn(
|
|
140
|
-
"[SharedStateManager] Invalid registry structure,
|
|
148
|
+
"[SharedStateManager] Invalid registry structure, repaired (preserved existing task entries)",
|
|
141
149
|
);
|
|
142
|
-
return createEmptyRegistry();
|
|
143
150
|
}
|
|
144
151
|
|
|
145
152
|
return registry;
|