npm - brainclaw - Versions diffs - 1.7.1 → 1.7.3 - Mend

brainclaw 1.7.1 → 1.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +116 -94
package/dist/brainclaw-vscode.vsix +0 -0
package/dist/cli.js +25 -3
package/dist/commands/dispatch.js +2 -0
package/dist/commands/doctor.js +17 -0
package/dist/commands/harvest.js +124 -1
package/dist/commands/mcp.js +32 -8
package/dist/core/agent-capability.js +67 -0
package/dist/core/agent-inventory.js +54 -7
package/dist/core/agentrun-reconciler.js +126 -52
package/dist/core/coordination.js +10 -9
package/dist/core/dirty-scope.js +11 -5
package/dist/core/dispatcher.js +109 -29
package/dist/core/entity-operations.js +54 -1
package/dist/core/execution-adapters.js +32 -51
package/dist/core/execution.js +14 -8
package/dist/core/instruction-templates.js +5 -4
package/dist/core/runtime-signals.js +102 -0
package/dist/core/schema.js +18 -0
package/dist/core/spawn-check.js +125 -0
package/dist/core/worktree.js +146 -7
package/dist/facts.js +3 -3
package/dist/facts.json +2 -2
package/docs/cli.md +8 -4
package/docs/integrations/mcp.md +48 -15
package/docs/mcp-schema-changelog.md +16 -5
package/docs/playbooks/team/index.md +7 -5
package/package.json +1 -1

package/dist/core/agent-capability.js CHANGED Viewed

@@ -46,6 +46,10 @@ const PROFILES = {
         invoke_binary: 'claude',
         invoke_review_template: 'claude -p --allowedTools "Read,Glob,Grep" {prompt}',
         invoke_consult_template: 'claude -p --allowedTools "Read,Glob,Grep" {prompt}',
+        // pln#520 step 3: model is selectable via `--model` — no need for a
+        // per-model pseudo-identity. `claude-sonnet` below is now redundant
+        // (run `claude-code --model sonnet`) and kept only for back-compat.
+        model_flag: '--model',
     },
     'claude-sonnet': {
         name: 'claude-sonnet', category: 'code-agent', workflowModel: 'interactive',
@@ -323,6 +327,63 @@ export function getCapabilityProfile(name) {
     const resolved = resolveAgentAlias(name);
     return _customProfiles.get(resolved) ?? PROFILES[resolved];
 }
+/**
+ * pln#520 step 3 — concurrency is a resolvable execution-config value, NOT a
+ * structural constant baked into agent identity.
+ *
+ * The host resource a concurrency cap actually protects is the binary on the
+ * machine (its API quota / its RAM/CPU footprint), not the agent label.
+ * `resolveResourceKey` returns that shared key so callers count usage across
+ * every identity that drives one binary. This kills the can_dc4e4a11 bug:
+ * `claude-code` and `claude-sonnet` are the SAME `claude` binary on the SAME
+ * host but were counted separately (3 + 6 → up to 9 concurrent `claude`
+ * processes, oversubscribing the machine + API).
+ */
+export function resolveResourceKey(name) {
+    const profile = getCapabilityProfile(name);
+    return profile?.invoke_binary ?? resolveAgentAlias(name);
+}
+/**
+ * Resolve the concurrency limit for an agent. `Infinity` = unlimited.
+ *
+ * Resolution chain (highest priority first), decoupled from agent identity:
+ *   1. explicit `override` (e.g. `brainclaw dispatch --max-concurrency N`)
+ *   2. host opt-in cap via `BRAINCLAW_MAX_CONCURRENCY` (protect one machine / quota)
+ *   3. structural floor — agents that cannot run headless in parallel
+ *      (IDE / desktop agents, i.e. not CLI-spawnable) stay hard-capped at their
+ *      profile `max_concurrent_tasks` (you can't spawn N IDE windows headlessly)
+ *   4. default for parallelizable CLI agents: UNLIMITED. There is no arbitrary
+ *      per-identity throttle — the operator opts into a cap when they want one.
+ *
+ * When a finite cap applies it is enforced per host-binary resource
+ * (see `resolveResourceKey`), so all variants of one binary share the pool.
+ */
+export function resolveConcurrencyLimit(name, opts = {}) {
+    if (opts.override !== undefined && opts.override > 0)
+        return opts.override;
+    const envCap = Number(process.env.BRAINCLAW_MAX_CONCURRENCY);
+    if (Number.isFinite(envCap) && envCap > 0)
+        return envCap;
+    const profile = getCapabilityProfile(name);
+    if (!profile?.runtime?.canBeSpawnedCli)
+        return profile?.max_concurrent_tasks ?? 1;
+    return Infinity;
+}
+/** JSON-safe rendering of a concurrency limit: `Infinity` → `null` (= unlimited). */
+export function serializeConcurrencyLimit(limit) {
+    return Number.isFinite(limit) ? limit : null;
+}
+/**
+ * pln#520 step 3 — resolve the model for a dispatch, decoupled from agent
+ * identity. Chain (highest priority first): explicit override (e.g.
+ * `dispatch --model`) → lane model → identity model → profile default.
+ * Returns `undefined` when nothing in the chain specifies one (the agent's
+ * template default applies).
+ */
+export function resolveModel(name, opts = {}) {
+    const profile = getCapabilityProfile(name);
+    return opts.override ?? opts.lane ?? opts.identity ?? profile?.default_model;
+}
 /**
  * Escape a string for safe use as a double-quoted shell argument.
  * Escapes characters that have special meaning inside double-quotes
@@ -490,6 +551,12 @@ export function buildInvokeCommand(name, prompt, options = {}) {
     const rawTokens = parseTemplateString(templateStr);
     if (rawTokens.length === 0)
         return undefined;
+    // pln#520 step 3: inject the resolved model right after the binary so model
+    // choice is decoupled from agent identity. Only when the profile declares a
+    // `model_flag` and the template doesn't already pin a model (don't double it).
+    if (options.model && profile.model_flag && !rawTokens.includes(profile.model_flag)) {
+        rawTokens.splice(1, 0, profile.model_flag, options.model);
+    }
     const executable = rawTokens[0];
     const interpolatedTokens = rawTokens.slice(1).map((tok) => tok === '{prompt}' ? embeddedPrompt : tok);
     // ── 5. Build the args array ───────────────────────────────────────────────

package/dist/core/agent-inventory.js CHANGED Viewed

@@ -5,6 +5,14 @@ import { spawnSync } from 'node:child_process';
 import yaml from 'yaml';
 import { MEMORY_DIR } from './io.js';
 import { detectHostExecutionProfile, } from './execution-profile.js';
+import { getCapabilityProfile } from './agent-capability.js';
+/**
+ * trp#427 — cold-start CLI `--version` probes need headroom; a 3s timeout
+ * false-negatived claude-code on first launch. The spawnable check (binary on
+ * PATH) is the robust signal, so this only affects version-string capture
+ * latency, not the installed/spawnable decision.
+ */
+const VERSION_PROBE_TIMEOUT_MS = 8000;
 function tryCommand(command, args, timeout = 5000) {
     try {
         const r = spawnSync(command, args, { encoding: 'utf-8', timeout, windowsHide: true });
@@ -14,12 +22,40 @@ function tryCommand(command, args, timeout = 5000) {
         return { ok: false, stdout: '' };
     }
 }
+/**
+ * trp#427 — fast PATH resolution for a binary (no process launch, unlike a
+ * `--version` probe). Uses `where` (Windows) / `which` (POSIX).
+ */
+function isBinaryOnPath(binary) {
+    if (!binary)
+        return false;
+    try {
+        const cmd = process.platform === 'win32' ? 'where' : 'which';
+        const r = spawnSync(cmd, [binary], { encoding: 'utf-8', timeout: 3000, windowsHide: true });
+        return r.status === 0 && (r.stdout ?? '').trim().length > 0;
+    }
+    catch {
+        return false;
+    }
+}
+/**
+ * trp#427 — an agent is SPAWNABLE when its capability profile is CLI-spawnable,
+ * declares an invoke binary, and that binary resolves on PATH. Decoupled from
+ * the `--version` health probe so a slow cold-start CLI is never misreported as
+ * "not installed" / undispatchable.
+ */
+export function detectSpawnable(agentName) {
+    const profile = getCapabilityProfile(agentName);
+    if (!profile || !profile.runtime?.canBeSpawnedCli || !profile.invoke_binary)
+        return false;
+    return isBinaryOnPath(profile.invoke_binary);
+}
 const AGENT_DEFINITIONS = [
     {
         name: 'claude-code',
         detect: (_home, env) => {
             // Check if claude CLI is available
-            const cli = tryCommand('claude', ['--version'], 3000);
+            const cli = tryCommand('claude', ['--version'], VERSION_PROBE_TIMEOUT_MS);
             if (cli.ok) {
                 const ver = cli.stdout.trim().match(/(\d+\.\d+\.\d+)/)?.[1];
                 return { installed: true, method: 'claude CLI', version: ver };
@@ -81,7 +117,7 @@ const AGENT_DEFINITIONS = [
             if (fs.existsSync(codexDir)) {
                 return { installed: true, method: '~/.codex directory' };
             }
-            const cli = tryCommand('codex', ['--version'], 3000);
+            const cli = tryCommand('codex', ['--version'], VERSION_PROBE_TIMEOUT_MS);
             if (cli.ok) {
                 const ver = cli.stdout.trim().match(/(\d+\.\d+\.\d+)/)?.[1];
                 return { installed: true, method: 'codex CLI', version: ver };
@@ -252,7 +288,7 @@ const AGENT_DEFINITIONS = [
             if (fs.existsSync(path.join(home, '.gemini', 'antigravity'))) {
                 return { installed: true, method: '~/.gemini/antigravity directory' };
             }
-            const cli = tryCommand('gemini', ['--version'], 3000);
+            const cli = tryCommand('gemini', ['--version'], VERSION_PROBE_TIMEOUT_MS);
             if (cli.ok) {
                 return { installed: true, method: 'gemini CLI', version: cli.stdout.trim() };
             }
@@ -309,7 +345,7 @@ const AGENT_DEFINITIONS = [
             if (fs.existsSync(path.join(home, '.hermes'))) {
                 return { installed: true, method: '~/.hermes directory' };
             }
-            const cli = tryCommand('hermes', ['--version'], 3000);
+            const cli = tryCommand('hermes', ['--version'], VERSION_PROBE_TIMEOUT_MS);
             if (cli.ok) {
                 return { installed: true, method: 'hermes CLI', version: cli.stdout.trim() };
             }
@@ -332,14 +368,23 @@ const AGENT_DEFINITIONS = [
 /**
  * Detect ALL installed agents on this machine (not just the running one).
  */
-export function buildAgentInventory(homeDir = os.homedir(), env = process.env) {
+export function buildAgentInventory(homeDir = os.homedir(), env = process.env, opts = {}) {
+    const spawnableResolver = opts.spawnableResolver ?? detectSpawnable;
     const agents = AGENT_DEFINITIONS.map(def => {
         const detection = def.detect(homeDir, env);
+        const spawnable = spawnableResolver(def.name);
+        // trp#427: an agent brainclaw can spawn (invoke binary on PATH) IS installed,
+        // even when the cold-start `--version` probe timed out. This decouples the
+        // dispatch decision (getInstalledAgentNames) from probe latency.
+        const installed = detection.installed || spawnable;
         return {
             name: def.name,
-            installed: detection.installed,
-            detection_method: detection.method,
+            installed,
+            detection_method: detection.installed
+                ? detection.method
+                : (spawnable ? 'spawnable: invoke binary on PATH' : detection.method),
             version: detection.version,
+            spawnable,
             models: def.models,
             native_tools: def.native_tools,
             mcp_support: def.mcp_support,
@@ -415,6 +460,8 @@ export function renderAgentInventorySummary(inventory) {
             features.push('Rules');
         if (agent.hooks_support)
             features.push('Hooks');
+        if (agent.spawnable)
+            features.push('Spawnable');
         lines.push(`  Features: ${features.join(', ') || 'none'}`);
         if (agent.instruction_file) {
             lines.push(`  Instructions: ${agent.instruction_file}`);

package/dist/core/agentrun-reconciler.js CHANGED Viewed

@@ -38,6 +38,7 @@ import { loadClaim } from './claims.js';
 import { loadAssignment } from './assignments.js';
 import { createRuntimeEvent } from './events.js';
 import { nowISO } from './ids.js';
+import { readHeartbeat, readLogTail, signalExists } from './runtime-signals.js';
 // ── Constants ──────────────────────────────────────────────────────────────
 /**
  * Minimum age before a run is eligible for reconciliation. Below this, the
@@ -52,6 +53,11 @@ export const DEFAULT_HEALTH_CHECK_GRACE_MS = 60_000;
 export const DEFAULT_STALE_AFTER_MS = 30 * 60_000;
 export const DEFAULT_DEAD_PID_READ_SWEEP_AGE_MS = 5 * 60_000;
 export const DEFAULT_DEAD_PID_READ_SWEEP_LIMIT = 50;
+/**
+ * pln#520 step 1 — a heartbeat older than this (with no completion signal) means
+ * the worker reached its loop then went silent: `stalled`. Default 10 min.
+ */
+export const DEFAULT_HEARTBEAT_STALE_MS = 10 * 60_000;
 const TERMINAL_STATUSES = new Set([
     'completed', 'failed', 'cancelled', 'timed_out', 'interrupted',
 ]);
@@ -152,15 +158,51 @@ export function collectEvidence(run, cwd, options) {
     }
     catch { /* defensive */ }
     const process_alive = isProcessAlive(run.pid);
-    return { age_ms, has_post_start_commit, claim_released, assignment_completed, process_alive };
+    // pln#520 step 1 — sentinel evidence. Signals live under the project
+    // coordination dir (the dispatcher's ackRoot), which is `cwd` for the
+    // reconciler. Keyed by assignment_id.
+    const signalRoot = cwd ?? process.cwd();
+    let completed_signal = false;
+    let failed_signal = false;
+    let heartbeat_exists = false;
+    let heartbeat_age_ms;
+    try {
+        completed_signal = signalExists(signalRoot, run.assignment_id, 'completed');
+        failed_signal = signalExists(signalRoot, run.assignment_id, 'failed');
+        const hb = readHeartbeat(signalRoot, run.assignment_id);
+        heartbeat_exists = hb.exists;
+        if (hb.exists && hb.mtimeMs !== undefined)
+            heartbeat_age_ms = now - hb.mtimeMs;
+    }
+    catch { /* defensive */ }
+    return {
+        age_ms, has_post_start_commit, claim_released, assignment_completed, process_alive,
+        completed_signal, failed_signal, heartbeat_exists, heartbeat_age_ms,
+    };
 }
 function anyCompletionEvidence(evidence) {
-    return evidence.has_post_start_commit
+    return evidence.completed_signal
+        || evidence.has_post_start_commit
         || evidence.claim_released
         || evidence.assignment_completed;
 }
+/**
+ * pln#520 step 1 — a short tail of the captured stderr (or stdout) for
+ * failed_silent / stalled diagnostics, so the verdict carries the worker's
+ * last words instead of just a status code.
+ */
+function logTailSuffix(run, cwd) {
+    const root = cwd ?? process.cwd();
+    const tail = (readLogTail(root, run.assignment_id, 'stderr', 500).trim()
+        || readLogTail(root, run.assignment_id, 'stdout', 500).trim());
+    if (!tail)
+        return '';
+    return ` | log tail: ${tail.replace(/\s+/g, ' ').slice(0, 300)}`;
+}
 function describeEvidence(evidence) {
     const reasons = [];
+    if (evidence.completed_signal)
+        reasons.push('wrapper wrote completed sentinel');
     if (evidence.has_post_start_commit)
         reasons.push('post-start commit on worktree branch');
     if (evidence.claim_released)
@@ -231,6 +273,7 @@ export function reconcileAgentRun(runId, cwd, options = {}) {
         const evidence = {
             age_ms: 0, has_post_start_commit: false, claim_released: false,
             assignment_completed: false, process_alive: undefined,
+            completed_signal: false, failed_signal: false, heartbeat_exists: false,
         };
         return {
             run_id: runId, action: 'no_op', reason: 'run not found', evidence,
@@ -280,18 +323,12 @@ export function reconcileAgentRun(runId, cwd, options = {}) {
             };
         }
     }
-    // Failure inference: stale + dead process + no evidence.
-    if (evidence.age_ms >= stale && evidence.process_alive === false) {
+    // pln#520 step 1 — sentinel-based failure (fast + trustworthy, pid-independent).
+    const heartbeatStale = options.heartbeatStaleMs ?? DEFAULT_HEARTBEAT_STALE_MS;
+    const failHere = (reason) => {
         try {
-            transitionAgentRun(runId, 'failed', {
-                actor,
-                status_reason: 'silent_termination_no_evidence',
-            }, cwd);
-            return {
-                run_id: runId, action: 'inferred_failed',
-                reason: 'silent_termination_no_evidence',
-                evidence, previous_status, current_status: 'failed',
-            };
+            transitionAgentRun(runId, 'failed', { actor, status_reason: reason }, cwd);
+            return { run_id: runId, action: 'inferred_failed', reason, evidence, previous_status, current_status: 'failed' };
         }
         catch (err) {
             return {
@@ -300,6 +337,26 @@ export function reconcileAgentRun(runId, cwd, options = {}) {
                 evidence, previous_status, current_status: run.status,
             };
         }
+    };
+    // `failed` sentinel — the wrapper saw a non-zero agent exit.
+    if (evidence.failed_signal) {
+        return failHere(`failed_silent: wrapper reported non-zero exit${logTailSuffix(run, cwd)}`);
+    }
+    // Heartbeat present but stale → reached the loop then went silent.
+    if (evidence.heartbeat_exists && evidence.heartbeat_age_ms !== undefined && evidence.heartbeat_age_ms >= heartbeatStale) {
+        return failHere(`stalled: heartbeat last seen ${Math.round(evidence.heartbeat_age_ms / 1000)}s ago${logTailSuffix(run, cwd)}`);
+    }
+    // Fresh heartbeat → alive; trust it over the untrustworthy wrapper pid.
+    if (evidence.heartbeat_exists) {
+        return {
+            run_id: runId, action: 'no_op',
+            reason: `heartbeat fresh (${Math.round((evidence.heartbeat_age_ms ?? 0) / 1000)}s) — worker alive, pid untrusted`,
+            evidence, previous_status, current_status: run.status,
+        };
+    }
+    // Failure inference: stale + dead process + no evidence.
+    if (evidence.age_ms >= stale && evidence.process_alive === false) {
+        return failHere('silent_termination_no_evidence');
     }
     // Health-check window: past grace, not yet stale, no evidence either way.
     // Emit a non-mutating event so callers see the uncertainty without
@@ -339,6 +396,7 @@ export function reconcileDeadPidRunningAgentRunAtRead(runId, cwd, options = {})
         const evidence = {
             age_ms: 0, has_post_start_commit: false, claim_released: false,
             assignment_completed: false, process_alive: undefined,
+            completed_signal: false, failed_signal: false, heartbeat_exists: false,
         };
         return {
             run_id: runId, action: 'no_op', reason: 'run not found', evidence,
@@ -352,19 +410,25 @@ export function reconcileDeadPidRunningAgentRunAtRead(runId, cwd, options = {})
             evidence, previous_status: run.status, current_status: run.status,
         };
     }
-    if (evidence.process_alive !== false) {
-        return {
-            run_id: run.id, action: 'no_op',
-            reason: evidence.process_alive === true ? 'process alive' : 'pid liveness unknown',
-            evidence, previous_status: run.status, current_status: run.status,
-        };
-    }
-    // pid reads dead — but the tracked pid is NOT trustworthy (see doc above),
-    // so a bare dead pid NEVER cancels. Evidence of real work wins; otherwise
-    // surface the uncertainty non-destructively and leave the run `running` for
-    // reconcileAgentRun's stale-threshold path to fail it only after a fair,
-    // evidence-based delay.
     const actor = options.actor ?? 'reconciler';
+    const stale = options.staleAfterMs ?? DEFAULT_STALE_AFTER_MS;
+    const heartbeatStale = options.heartbeatStaleMs ?? DEFAULT_HEARTBEAT_STALE_MS;
+    const failRun = (reason) => {
+        try {
+            transitionAgentRun(run.id, 'failed', { actor, status_reason: reason }, cwd);
+            return { run_id: run.id, action: 'inferred_failed', reason, evidence, previous_status: run.status, current_status: 'failed' };
+        }
+        catch (err) {
+            return {
+                run_id: run.id, action: 'no_op',
+                reason: `failure transition rejected: ${err instanceof Error ? err.message : String(err)}`,
+                evidence, previous_status: run.status, current_status: run.status,
+            };
+        }
+    };
+    // ── pln#520 step 1: SENTINELS are authoritative, independent of the
+    // untrustworthy wrapper pid. Check them first. ──────────────────────────
+    // 1. Completion evidence (mechanical `completed` sentinel or work evidence).
     if (anyCompletionEvidence(evidence)) {
         try {
             transitionAgentRun(run.id, 'completed', {
@@ -385,33 +449,43 @@ export function reconcileDeadPidRunningAgentRunAtRead(runId, cwd, options = {})
             };
         }
     }
-    // Stale + provably dead + still no evidence -> genuine silent failure. This
-    // MUST converge HERE: the canonical read path (entity-operations.ts) and the
-    // MCP pre-read sweep route `running` runs through this function, never
-    // through reconcileAgentRun, so deferring would leave a crashed run `running`
-    // forever (trp#292). The 30-min stale window — vs the immediate cancel before
-    // pln#520 — gives a worker behind an untrusted pid ample time to leave
-    // evidence first. Reported as `failed` (it died), not `cancelled`.
-    const stale = options.staleAfterMs ?? DEFAULT_STALE_AFTER_MS;
+    // 2. `failed` sentinel — the wrapper saw a non-zero agent exit. This is the
+    // FAST, TRUSTWORTHY failed_silent detector (vs the pid heuristic that caused
+    // can_f792cacd false negatives). Carries the captured log tail.
+    if (evidence.failed_signal) {
+        return failRun(`failed_silent: wrapper reported non-zero exit${logTailSuffix(run, cwd)}`);
+    }
+    // 3. Heartbeat present but STALE → the worker reached its loop then went
+    // silent (e.g. hung). pid-independent: a hung worker keeps the wrapper alive.
+    if (evidence.heartbeat_exists && evidence.heartbeat_age_ms !== undefined && evidence.heartbeat_age_ms >= heartbeatStale) {
+        return failRun(`stalled: heartbeat last seen ${Math.round(evidence.heartbeat_age_ms / 1000)}s ago${logTailSuffix(run, cwd)}`);
+    }
+    // 4. Fresh heartbeat → the worker is alive and working; trust it OVER the
+    // (untrustworthy) wrapper pid. This is the can_f792cacd fix: never fail a
+    // live, heartbeating worker just because its wrapper pid reads dead.
+    if (evidence.heartbeat_exists) {
+        return {
+            run_id: run.id, action: 'no_op',
+            reason: `heartbeat fresh (${Math.round((evidence.heartbeat_age_ms ?? 0) / 1000)}s) — worker alive, pid untrusted`,
+            evidence, previous_status: run.status, current_status: run.status,
+        };
+    }
+    // ── No sentinel, no heartbeat: fall back to the pid-conservative path. The
+    // wrapper writes completed/failed on any normal exit, so reaching here means
+    // the worker has not exited and never heartbeat. Do NOT fast-fail on a dead
+    // pid (it's the wrapper's, not the worker's). ──────────────────────────────
+    if (evidence.process_alive !== false) {
+        return {
+            run_id: run.id, action: 'no_op',
+            reason: evidence.process_alive === true ? 'process alive' : 'pid liveness unknown',
+            evidence, previous_status: run.status, current_status: run.status,
+        };
+    }
+    // pid dead + no sentinel + no heartbeat: only converge after the long stale
+    // window (trp#292 — must converge HERE since the read path never routes
+    // through reconcileAgentRun), giving an untrusted-pid worker ample time.
     if (evidence.age_ms >= stale) {
-        try {
-            transitionAgentRun(run.id, 'failed', {
-                actor,
-                status_reason: 'silent_termination_no_evidence',
-            }, cwd);
-            return {
-                run_id: run.id, action: 'inferred_failed',
-                reason: 'silent_termination_no_evidence',
-                evidence, previous_status: run.status, current_status: 'failed',
-            };
-        }
-        catch (err) {
-            return {
-                run_id: run.id, action: 'no_op',
-                reason: `failure transition rejected: ${err instanceof Error ? err.message : String(err)}`,
-                evidence, previous_status: run.status, current_status: run.status,
-            };
-        }
+        return failRun('silent_termination_no_evidence');
     }
     emitUnverifiedEvent(run, evidence, actor, cwd);
     return {
@@ -457,7 +531,7 @@ export function reconcileAllOpenRuns(cwd, filter = {}, options = {}) {
             catch {
                 results.push({
                     run_id: run.id, action: 'no_op', reason: 'reconcile threw — skipped',
-                    evidence: { age_ms: 0, has_post_start_commit: false, claim_released: false, assignment_completed: false, process_alive: undefined },
+                    evidence: { age_ms: 0, has_post_start_commit: false, claim_released: false, assignment_completed: false, process_alive: undefined, completed_signal: false, failed_signal: false, heartbeat_exists: false },
                     previous_status: run.status, current_status: run.status,
                 });
             }

package/dist/core/coordination.js CHANGED Viewed

@@ -11,7 +11,7 @@ import { inferProjectFromTarget, loadInstructions, resolveInstructions } from '.
 import { buildReputationSummary, findAgentReputationSummary } from './reputation.js';
 import { listRuntimeNotes } from './runtime.js';
 import { loadState, persistState } from './state.js';
-import { getCapabilityProfile } from './agent-capability.js';
+import { resolveConcurrencyLimit, serializeConcurrencyLimit } from './agent-capability.js';
 import { loadAllSessions } from './identity.js';
 import { countActionable } from './messaging.js';
 import { listCandidates } from './candidates.js';
@@ -176,8 +176,7 @@ function buildOtherAgentsSummary(claims, notes, currentAgent, cwd) {
     for (const identity of listAgentIdentities(cwd)) {
         if (identity.agent_name === currentAgent)
             continue;
-        const profile = getCapabilityProfile(identity.agent_name);
-        const maxTasks = profile?.max_concurrent_tasks ?? 1;
+        const limit = serializeConcurrencyLimit(resolveConcurrencyLimit(identity.agent_name));
         agentMap.set(identity.agent_name, {
             name: identity.agent_name,
             trust_level: identity.trust_level ?? 'contributor',
@@ -185,23 +184,25 @@ function buildOtherAgentsSummary(claims, notes, currentAgent, cwd) {
             scopes: [],
             has_open_session: false,
             instance_count: sessionCounts.get(identity.agent_name) ?? 0,
-            max_tasks: maxTasks,
-            slots_remaining: maxTasks, // will be reduced when claims are counted
+            max_tasks: limit,
+            slots_remaining: limit, // will be reduced when claims are counted (null stays unlimited)
         });
     }
     // Enrich with active claims
     for (const claim of claims) {
         if (claim.agent === currentAgent)
             continue;
-        const profile = getCapabilityProfile(claim.agent);
-        const maxTasks = profile?.max_concurrent_tasks ?? 1;
+        const limit = serializeConcurrencyLimit(resolveConcurrencyLimit(claim.agent));
         const existing = agentMap.get(claim.agent) ?? {
             name: claim.agent, trust_level: 'contributor', claim_count: 0, scopes: [],
             has_open_session: false, instance_count: sessionCounts.get(claim.agent) ?? 0,
-            max_tasks: maxTasks, slots_remaining: maxTasks,
+            max_tasks: limit, slots_remaining: limit,
         };
         existing.claim_count++;
-        existing.slots_remaining = Math.max(0, existing.max_tasks - existing.claim_count);
+        // null max_tasks = unlimited → slots stay unlimited.
+        existing.slots_remaining = existing.max_tasks === null
+            ? null
+            : Math.max(0, existing.max_tasks - existing.claim_count);
         existing.scopes.push(claim.scope);
         if (!existing.last_active || claim.created_at > existing.last_active) {
             existing.last_active = claim.created_at;

package/dist/core/dirty-scope.js CHANGED Viewed

@@ -44,13 +44,19 @@ function defaultRunGit(cwd, args) {
         return { ok: false, stdout: '' };
     }
 }
-/** True for coordination/store paths that are dirty as a side effect of dispatching. */
+/**
+ * Top-level directories that are dirty as a side effect of coordination /
+ * agent tooling, never part of a dispatch's code scope:
+ *   - `.brainclaw`, `.git` — coordination store + VCS metadata.
+ *   - `.claude`, `.cursor`, `.codex` — per-agent local config (trp#371). A
+ *     worker leaving these dirty (Claude Code settings, etc.) must not block an
+ *     otherwise-safe dispatch of an unrelated code scope.
+ */
+const SYSTEM_DIRTY_DIRS = ['.brainclaw', '.git', '.claude', '.cursor', '.codex'];
+/** True for coordination/store/agent-config paths that are dirty as a side effect of tooling. */
 export function isSystemDirtyPath(p) {
     const norm = p.replace(/\\/g, '/');
-    return norm === '.brainclaw'
-        || norm.startsWith('.brainclaw/')
-        || norm === '.git'
-        || norm.startsWith('.git/');
+    return SYSTEM_DIRTY_DIRS.some((dir) => norm === dir || norm.startsWith(dir + '/'));
 }
 /**
  * Parse `git status --porcelain=v1 -z` output into a flat list of paths.