npm - bosun - Versions diffs - 0.40.16 → 0.40.18 - Mend

bosun 0.40.16 → 0.40.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/agent/agent-pool.mjs +18 -4
package/package.json +1 -1
package/workflow/workflow-engine.mjs +54 -1
package/workspace/shared-state-manager.mjs +14 -7

package/agent/agent-pool.mjs CHANGED Viewed

@@ -40,6 +40,8 @@
  */
 import { resolve, dirname } from "node:path";
+import { existsSync, readFileSync } from "node:fs";
+import { homedir } from "node:os";
 import { fileURLToPath } from "node:url";
 import { loadConfig } from "../config/config.mjs";
 import { resolveRepoRoot, resolveAgentRepoRoot } from "../config/repo-root.mjs";
@@ -566,16 +568,28 @@ function hasSdkPrerequisites(name, runtimeEnv = process.env) {
   }
   if (name === "codex") {
-    // Codex needs an OpenAI API key (or Azure key, or profile-specific key)
+    // Codex needs an OpenAI API key (or Azure key, or profile-specific key),
+    // OR a valid ~/.codex/config.toml where an env_key reference is satisfied.
     const hasKey =
       runtimeEnv.OPENAI_API_KEY ||
       runtimeEnv.AZURE_OPENAI_API_KEY ||
       runtimeEnv.CODEX_MODEL_PROFILE_XL_API_KEY ||
       runtimeEnv.CODEX_MODEL_PROFILE_M_API_KEY;
-    if (!hasKey) {
-      return { ok: false, reason: "no API key (OPENAI_API_KEY / AZURE_OPENAI_API_KEY)" };
+    if (hasKey) return { ok: true, reason: null };
+    // Check ~/.codex/config.toml — Codex CLI SDK reads auth env_key refs from there
+    try {
+      const configToml = resolve(homedir(), ".codex", "config.toml");
+      if (existsSync(configToml)) {
+        const tomlText = readFileSync(configToml, "utf8");
+        // Extract all env_key = "VAR_NAME" entries and check if any are set
+        for (const match of tomlText.matchAll(/env_key\s*=\s*"([^"]+)"/g)) {
+          if (runtimeEnv[match[1]]) return { ok: true, reason: null };
+        }
+      }
+    } catch {
+      // best effort — fall through to failure
     }
-    return { ok: true, reason: null };
+    return { ok: false, reason: "no API key (OPENAI_API_KEY / AZURE_OPENAI_API_KEY) and no satisfied env_key in ~/.codex/config.toml" };
   }
   if (name === "copilot") {
     // Copilot auth can come from multiple sources (OAuth manager, gh auth,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bosun",
-  "version": "0.40.16",
+  "version": "0.40.18",
   "description": "Bosun Autonomous Engineering — manages AI agent executors with failover, extremely powerful workflow builder, and a massive amount of included default workflow templates for autonomous engineering, creates PRs via Vibe-Kanban API, and sends Telegram notifications. Supports N executors with weighted distribution, multi-repo projects, and auto-setup.",
   "type": "module",
   "license": "Apache-2.0",

package/workflow/workflow-engine.mjs CHANGED Viewed

@@ -2675,7 +2675,59 @@ export class WorkflowEngine extends EventEmitter {
       console.log(`${TAG} Resuming ${runs.length} interrupted run(s)...`);
+      // ── Deduplicate by taskId: keep only the most recent run per task ────
+      // After N crash/restart cycles, N run entries accumulate for the same
+      // taskId. Resuming all of them causes competing workflow runs that race
+      // to claim the task → "claim was stolen" errors on every restart.
+      // Solution: pre-scan detail files, keep latest startedAt per taskId,
+      // and mark older duplicates as not-resumable before we even try them.
+      const runDetailCache = new Map(); // runId → parsed detail
+      const latestByTaskId = new Map(); // taskId → run entry (highest startedAt)
+      for (const run of runs) {
+        const dp = resolve(this.runsDir, `${run.runId}.json`);
+        if (!existsSync(dp)) continue;
+        try {
+          const d = JSON.parse(readFileSync(dp, "utf8"));
+          runDetailCache.set(run.runId, d);
+          const tid = d.data?.taskId || d.inputData?.taskId;
+          if (!tid) continue;
+          const prev = latestByTaskId.get(tid);
+          if (!prev || (run.startedAt || 0) >= (prev.startedAt || 0)) {
+            latestByTaskId.set(tid, run);
+          }
+        } catch {
+          /* unreadable detail — handled in the main loop below */
+        }
+      }
+      // Mark older duplicate runs as not-resumable before entering the loop
+      let dedupedCount = 0;
+      for (const run of runs) {
+        const d = runDetailCache.get(run.runId);
+        const tid = d?.data?.taskId || d?.inputData?.taskId;
+        if (!tid) continue;
+        const latest = latestByTaskId.get(tid);
+        if (latest && latest.runId !== run.runId) {
+          this._markRunUnresumable(run.runId, "duplicate_task_run");
+          dedupedCount++;
+        }
+      }
+      if (dedupedCount > 0) {
+        console.log(
+          `${TAG} Skipped ${dedupedCount} duplicate interrupted run(s) (kept latest per taskId)`,
+        );
+      }
       for (const run of runs) {
+        // Skip runs that were marked as duplicates above
+        const _runDetail = runDetailCache.get(run.runId);
+        const _tid = _runDetail?.data?.taskId || _runDetail?.inputData?.taskId;
+        if (_tid) {
+          const latest = latestByTaskId.get(_tid);
+          if (latest && latest.runId !== run.runId) continue;
+        }
         try {
           // Check if the workflow definition still exists
           const def = this.get(run.workflowId);
@@ -2693,7 +2745,8 @@ export class WorkflowEngine extends EventEmitter {
             continue;
           }
-          const detail = JSON.parse(readFileSync(detailPath, "utf8"));
+          // Reuse cached detail if available (already parsed above)
+          const detail = runDetailCache.get(run.runId) ?? JSON.parse(readFileSync(detailPath, "utf8"));
           const nodeStatuses = detail.nodeStatuses || {};
           const hasCompletedNodes = Object.values(nodeStatuses).some(
             (s) => s === NodeStatus.COMPLETED,

package/workspace/shared-state-manager.mjs CHANGED Viewed

@@ -131,15 +131,22 @@ async function loadRegistry(registryPath) {
     const registry = JSON.parse(content);
     // Validate structure
-    if (
-      !registry.version ||
-      !registry.tasks ||
-      typeof registry.tasks !== "object"
-    ) {
+    // Repair instead of wipe: preserve any valid task entries while fixing
+    // missing/invalid structural fields. Wiping on minor corruption was causing
+    // active claims to be lost, leading to cascading "claim was stolen" failures.
+    let repaired = false;
+    if (!registry.version) {
+      registry.version = REGISTRY_VERSION;
+      repaired = true;
+    }
+    if (!registry.tasks || typeof registry.tasks !== "object" || Array.isArray(registry.tasks)) {
+      registry.tasks = {};
+      repaired = true;
+    }
+    if (repaired) {
       console.warn(
-        "[SharedStateManager] Invalid registry structure, resetting",
+        "[SharedStateManager] Invalid registry structure, repaired (preserved existing task entries)",
       );
-      return createEmptyRegistry();
     }
     return registry;