brainclaw 1.7.1 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,7 +43,8 @@ import { memoryDir } from './io.js';
43
43
  import { loadVersionedJsonFile } from './migration.js';
44
44
  import fs from 'node:fs';
45
45
  import path from 'node:path';
46
- import { buildInvokeCommand, resolveBriefMode, getCapabilityProfile } from './agent-capability.js';
46
+ import { buildInvokeCommand, resolveBriefMode, getCapabilityProfile, resolveConcurrencyLimit, resolveResourceKey, resolveModel, serializeConcurrencyLimit } from './agent-capability.js';
47
+ import { getRuntimeSignalPath } from './runtime-signals.js';
47
48
  import { attemptExecution } from './execution.js';
48
49
  import { createAssignment, transitionAssignment, generateAssignmentId, patchAssignmentMessageId } from './assignments.js';
49
50
  import { createAgentRun, transitionAgentRun } from './agentruns.js';
@@ -163,13 +164,20 @@ export function analyzeSequence(cwd) {
163
164
  .map(a => a.agent_name);
164
165
  const agent_capacity = allAgentNames.map(agent => {
165
166
  const active_claims = agentClaimCounts.get(agent) ?? 0;
166
- const profile = getCapabilityProfile(agent);
167
- const max_tasks = profile?.max_concurrent_tasks ?? 1;
168
- return { agent, active_claims, max_tasks, slots_remaining: Math.max(0, max_tasks - active_claims) };
167
+ // pln#520 step 3: limit is resolved (default unlimited for parallelizable
168
+ // CLI agents), not the per-name structural constant.
169
+ const limit = resolveConcurrencyLimit(agent);
170
+ const slots = Number.isFinite(limit) ? Math.max(0, limit - active_claims) : Infinity;
171
+ return {
172
+ agent,
173
+ active_claims,
174
+ max_tasks: serializeConcurrencyLimit(limit),
175
+ slots_remaining: serializeConcurrencyLimit(slots),
176
+ };
169
177
  });
170
- // Available agents: those with remaining capacity (slots_remaining > 0)
178
+ // Available agents: unlimited (null) or with remaining capacity (> 0).
171
179
  const available_agents = agent_capacity
172
- .filter(a => a.slots_remaining > 0)
180
+ .filter(a => a.slots_remaining === null || a.slots_remaining > 0)
173
181
  .map(a => a.agent);
174
182
  return { sequence, ready, active, blocked, done, available_agents, agent_capacity };
175
183
  }
@@ -188,6 +196,37 @@ export function analyzeSequence(cwd) {
188
196
  * Protocol section IS useful to them — `resolveBriefMode` was updated to
189
197
  * return 'full' for that combination.
190
198
  */
199
+ /**
200
+ * pln#520 step 5 — the liveness section of a generated brief. An imperative
201
+ * "do this first" instruction telling the worker to write its `work_loop_reached`
202
+ * heartbeat to an ABSOLUTE, writable signals path BEFORE any other action, then
203
+ * refresh it periodically. Zero-MCP (a plain shell redirect) so even sandboxed
204
+ * agents without the brainclaw MCP can comply. Completion is recorded
205
+ * mechanically by the spawn wrapper (step 4), so the agent only owns the
206
+ * heartbeat. This is the worker-side half of the liveness contract whose
207
+ * engine-side floor is the wrapper + reconciler (steps 4 + 1).
208
+ */
209
+ export function buildLivenessSection(cwd, assignmentId) {
210
+ const hbPath = getRuntimeSignalPath(cwd, assignmentId, 'heartbeat');
211
+ const isWin = process.platform === 'win32';
212
+ const writeCmd = isWin
213
+ ? `echo work_loop_reached ${assignmentId} > "${hbPath}"`
214
+ : `printf 'work_loop_reached ${assignmentId} %s' "$(date +%s)" > "${hbPath}"`;
215
+ return [
216
+ '## Liveness — DO THIS FIRST (step 0)',
217
+ 'Before ANY other action, prove you reached your work loop by writing a heartbeat,',
218
+ 'then refresh it every few minutes while you work. brainclaw uses this to tell',
219
+ '"alive and working" from "spawned but dead" — a missing/stale heartbeat marks the',
220
+ 'run stalled. Completion is recorded automatically by the spawn wrapper; you do NOT',
221
+ 'need to write a completed/failed signal.',
222
+ '',
223
+ '```sh',
224
+ writeCmd,
225
+ '```',
226
+ `Heartbeat file (absolute, writable): ${hbPath}`,
227
+ '',
228
+ ].join('\n');
229
+ }
191
230
  export function buildProtocolSection(options) {
192
231
  const parts = [];
193
232
  parts.push('## Protocol');
@@ -305,6 +344,12 @@ export function generateBrief(plan, item, cwd, briefMode, options) {
305
344
  if (plan.estimated_effort)
306
345
  parts.push(`Estimated effort: ${plan.estimated_effort} minutes`);
307
346
  parts.push('');
347
+ // pln#520 step 5 — liveness heartbeat instruction, first actionable block so
348
+ // the worker writes work_loop_reached before anything else. Only when an
349
+ // assignment id is known (the heartbeat is keyed by it).
350
+ if (options?.assignmentId) {
351
+ parts.push(buildLivenessSection(cwd, options.assignmentId));
352
+ }
308
353
  // Steps if any
309
354
  if (plan.steps?.length) {
310
355
  parts.push('## Steps');
@@ -421,14 +466,18 @@ export function scoreAgents(agentPool, plan, activeClaims, cycleAssignments) {
421
466
  const canExecute = profile?.role_capabilities.includes('execute') ?? false;
422
467
  const canSpawn = profile?.runtime.canBeSpawnedCli ?? false;
423
468
  const capability = canExecute ? (canSpawn ? 1.0 : 0.5) : 0.1;
424
- // Factor 3: Availability graduated by utilization (claims / max_concurrent_tasks)
425
- // Include in-cycle assignments so load-balance works within a single dispatch call
469
+ // Factor 3 & 4: Availability + load balance.
470
+ // pln#520 step 3: these are based on the agent's RAW load (active claims +
471
+ // in-cycle assignments), decoupled from any concurrency cap. Dividing by the
472
+ // cap (as before) made every agent look identically idle once concurrency
473
+ // went unlimited, collapsing load-balancing — work piled onto the single
474
+ // top-scored agent. A cap-independent load fraction keeps spreading work to
475
+ // the least-busy agent whether or not a cap is set. The hard cap is enforced
476
+ // separately by the capacity guard in the dispatch loop.
426
477
  const agentClaims = (claimCounts.get(agent) ?? 0) + (cycleAssignments?.get(agent) ?? 0);
427
- const maxTasks = profile?.max_concurrent_tasks ?? 1;
428
- const utilization = Math.min(1.0, agentClaims / maxTasks);
429
- const availability = 1.0 - (utilization * 0.5); // range [0.5, 1.0]
430
- // Factor 4: Load balance — normalized by agent's capacity, not raw claim count
431
- const load_balance = 1.0 - utilization;
478
+ const loadFraction = agentClaims / (agentClaims + 1); // 0 when idle, →1 as load grows
479
+ const availability = 1.0 - loadFraction * 0.5; // range (0.5, 1.0]
480
+ const load_balance = 1.0 - loadFraction; // range (0, 1]
432
481
  const score = preference * W_PREFERENCE +
433
482
  capability * W_CAPABILITY +
434
483
  availability * W_AVAILABILITY +
@@ -438,6 +487,20 @@ export function scoreAgents(agentPool, plan, activeClaims, cycleAssignments) {
438
487
  }
439
488
  // Re-export checkActiveInstance for consumers who import from dispatcher
440
489
  export { checkActiveInstance } from './execution.js';
490
+ /**
491
+ * pln#520 step 3 — sum in-cycle assignments across every agent identity that
492
+ * shares the same host-binary resource (e.g. claude-code + claude-sonnet both
493
+ * map to `claude`). Pairs with `resolveResourceKey` so a concurrency cap pools
494
+ * by binary, not by agent name.
495
+ */
496
+ function countCycleByResource(cycleAssignments, resourceKey) {
497
+ let total = 0;
498
+ for (const [agent, count] of cycleAssignments) {
499
+ if (resolveResourceKey(agent) === resourceKey)
500
+ total += count;
501
+ }
502
+ return total;
503
+ }
441
504
  export function selectWorktreeBaseForReadyLane(item, analysis) {
442
505
  const hardAfter = item.hard_after ?? [];
443
506
  if (hardAfter.length === 0)
@@ -503,13 +566,17 @@ export async function dispatch(options, cwd) {
503
566
  continue; // truly active — skip
504
567
  // Claim released but message not archived: stale assignment, allow re-dispatch
505
568
  }
506
- // Claim-based capacity guard: check claims (existing + this cycle) against max_concurrent_tasks.
507
- // This is the authoritative capacity check covers both options.agents and analysis.available_agents paths.
508
- const existingClaims = allActiveClaims.filter(c => c.agent === candidate.agent).length;
509
- const inCycleCount = cycleAssignments.get(candidate.agent) ?? 0;
510
- const maxTasks = getCapabilityProfile(candidate.agent)?.max_concurrent_tasks ?? 1;
511
- if (existingClaims + inCycleCount >= maxTasks) {
512
- result.warnings.push(`${candidate.agent}: at capacity (${existingClaims + inCycleCount}/${maxTasks} claims)`);
569
+ // Claim-based capacity guard (pln#520 step 3): count usage per host-binary
570
+ // resource (claude-code + claude-sonnet share `claude`), compare against the
571
+ // resolved limit (default unlimited no arbitrary per-identity throttle).
572
+ // This is the authoritative capacity check — covers both options.agents and
573
+ // analysis.available_agents paths.
574
+ const resourceKey = resolveResourceKey(candidate.agent);
575
+ const existingClaims = allActiveClaims.filter(c => resolveResourceKey(c.agent) === resourceKey).length;
576
+ const inCycleCount = countCycleByResource(cycleAssignments, resourceKey);
577
+ const limit = resolveConcurrencyLimit(candidate.agent, { override: options.maxConcurrency });
578
+ if (existingClaims + inCycleCount >= limit) {
579
+ result.warnings.push(`${candidate.agent}: at capacity (${existingClaims + inCycleCount}/${limit} ${resourceKey} slots)`);
513
580
  continue; // try next agent
514
581
  }
515
582
  targetAgent = candidate.agent;
@@ -561,7 +628,7 @@ export async function dispatch(options, cwd) {
561
628
  if (options.dryRun) {
562
629
  const briefMode = resolveBriefMode(targetAgent);
563
630
  const brief = generateBrief(readyItem.plan, readyItem.item, cwd, briefMode, { claimId, worktreePath });
564
- const invokeCmd = buildInvokeCommand(targetAgent, brief);
631
+ const invokeCmd = buildInvokeCommand(targetAgent, brief, { model: resolveModel(targetAgent, { override: options.model }) });
565
632
  if (invokeCmd) {
566
633
  const cmdPrefix = buildEnvPrefix(claimId);
567
634
  result.commands.push({ agent: targetAgent, lane: readyItem.lane, command: `${cmdPrefix}${invokeCmd.bashCommand}`, shell: process.platform === 'win32' ? 'cmd' : (invokeCmd.shell ? 'bash' : 'sh') });
@@ -571,9 +638,10 @@ export async function dispatch(options, cwd) {
571
638
  result.messages_sent.push(deliveryEntry);
572
639
  assigned++;
573
640
  cycleAssignments.set(targetAgent, (cycleAssignments.get(targetAgent) ?? 0) + 1);
574
- const dryExisting = allActiveClaims.filter(c => c.agent === targetAgent).length;
575
- const dryCycle = cycleAssignments.get(targetAgent) ?? 0;
576
- const dryMax = getCapabilityProfile(targetAgent)?.max_concurrent_tasks ?? 1;
641
+ const dryResourceKey = resolveResourceKey(targetAgent);
642
+ const dryExisting = allActiveClaims.filter(c => resolveResourceKey(c.agent) === dryResourceKey).length;
643
+ const dryCycle = countCycleByResource(cycleAssignments, dryResourceKey);
644
+ const dryMax = resolveConcurrencyLimit(targetAgent, { override: options.maxConcurrency });
577
645
  if (dryExisting + dryCycle >= dryMax) {
578
646
  const idx = agentPool.indexOf(targetAgent);
579
647
  if (idx >= 0)
@@ -616,7 +684,7 @@ export async function dispatch(options, cwd) {
616
684
  agent: targetAgent,
617
685
  });
618
686
  // Step 3: Build invoke command
619
- const invokeCmd = buildInvokeCommand(targetAgent, brief);
687
+ const invokeCmd = buildInvokeCommand(targetAgent, brief, { model: resolveModel(targetAgent, { override: options.model }) });
620
688
  if (invokeCmd) {
621
689
  const cmdPrefix = buildEnvPrefix(claimId);
622
690
  result.commands.push({
@@ -707,10 +775,12 @@ export async function dispatch(options, cwd) {
707
775
  assigned++;
708
776
  // Track assignments this cycle for multi-slot capacity
709
777
  cycleAssignments.set(targetAgent, (cycleAssignments.get(targetAgent) ?? 0) + 1);
710
- // Remove agent from pool only when at capacity (existing claims + this cycle's assignments)
711
- const existingClaims = allActiveClaims.filter(c => c.agent === targetAgent).length;
712
- const cycleCount = cycleAssignments.get(targetAgent) ?? 0;
713
- const maxTasks = getCapabilityProfile(targetAgent)?.max_concurrent_tasks ?? 1;
778
+ // Remove agent from pool only when at capacity, counted per host-binary
779
+ // resource against the resolved limit (pln#520 step 3).
780
+ const liveResourceKey = resolveResourceKey(targetAgent);
781
+ const existingClaims = allActiveClaims.filter(c => resolveResourceKey(c.agent) === liveResourceKey).length;
782
+ const cycleCount = countCycleByResource(cycleAssignments, liveResourceKey);
783
+ const maxTasks = resolveConcurrencyLimit(targetAgent, { override: options.maxConcurrency });
714
784
  if (existingClaims + cycleCount >= maxTasks) {
715
785
  const idx = agentPool.indexOf(targetAgent);
716
786
  if (idx >= 0)
@@ -21,12 +21,13 @@ import { deleteAssignment, listAssignments, loadAssignment, saveAssignment, tran
21
21
  import { listAgentRuns } from './agentruns.js';
22
22
  import { reconcileAgentRun, reconcileDeadPidRunningAgentRunAtRead, TERMINAL_STATUSES } from './agentrun-reconciler.js';
23
23
  import { deleteRuntimeNote, listRuntimeNotes, saveRuntimeNote, } from './runtime.js';
24
+ import { createSequence, deleteSequence, listSequences, updateSequence, } from './sequence.js';
24
25
  import { createConstraint, createDecision, createTrap, } from './operations/memory-write.js';
25
26
  import { deleteMemoryItem, findMemoryItemInChain, updateMemoryItem, } from './operations/memory-mutation.js';
26
27
  import { createPlan, deletePlan, updatePlan, } from './operations/plan.js';
27
28
  import { ENTITY_REGISTRY, isValidTransition, } from './entity-registry.js';
28
29
  import { generateId } from './ids.js';
29
- import { CandidateTypeSchema, ConstraintCategorySchema, DecisionOutcomeSchema, MemoryVisibilitySchema, PlanTypeEnumSchema, PrioritySchema, RuntimeNoteTypeSchema, SeveritySchema, } from './schema.js';
30
+ import { CandidateTypeSchema, ConstraintCategorySchema, DecisionOutcomeSchema, MemoryVisibilitySchema, PlanTypeEnumSchema, PrioritySchema, RuntimeNoteTypeSchema, SequenceStatusSchema, SeveritySchema, } from './schema.js';
30
31
  /**
31
32
  * Default provenance stamp applied on create when the caller does not
32
33
  * supply one. `user` kind with whatever author is in the payload; the
@@ -134,6 +135,7 @@ function loadAll(name, cwd) {
134
135
  case 'handoff': return loadState(cwd).open_handoffs;
135
136
  case 'candidate': return listCandidates(undefined, cwd);
136
137
  case 'runtime_note': return listRuntimeNotes(undefined, cwd);
138
+ case 'sequence': return listSequences(cwd);
137
139
  case 'claim': return listClaims(cwd);
138
140
  case 'action': return listActionRequired(cwd);
139
141
  case 'assignment': return listAssignments(cwd);
@@ -310,6 +312,19 @@ export function createEntity(name, data, cwd) {
310
312
  saveCandidate(candidate, cwd);
311
313
  return { entity: name, id };
312
314
  }
315
+ case 'sequence': {
316
+ const res = createSequence({
317
+ name: requireString(data, 'name'),
318
+ description: data.description,
319
+ status: requireEnum(data, 'status', SequenceStatusSchema.options, { optional: true }),
320
+ items: optionalSequenceItems(data),
321
+ owner: data.owner,
322
+ author: requireString(data, 'author'),
323
+ authorId: data.agent_id,
324
+ tags: data.tags,
325
+ }, cwd);
326
+ return { entity: name, id: res.id, short_label: res.shortLabel };
327
+ }
313
328
  case 'cross_project_link': {
314
329
  const link = addCrossProjectLink({
315
330
  path: requireString(data, 'path'),
@@ -398,6 +413,20 @@ export function updateEntity(name, id, patch, cwd) {
398
413
  saveCandidate(patched, cwd);
399
414
  return { entity: name, id };
400
415
  }
416
+ case 'sequence': {
417
+ // `status` is intentionally NOT in sequence.updatable — lifecycle moves
418
+ // go through bclaw_transition. The invalidFields guard above already
419
+ // rejects it, so only name/description/tags/items/owner reach here.
420
+ const result = updateSequence({
421
+ id,
422
+ name: patch.name,
423
+ description: patch.description,
424
+ items: optionalSequenceItems(patch),
425
+ owner: patch.owner,
426
+ tags: patch.tags,
427
+ }, cwd);
428
+ return { entity: name, id: result.id };
429
+ }
401
430
  case 'cross_project_link': {
402
431
  // In-place patch: find by id (= name/path), remove, re-add with merged
403
432
  // fields. Same path semantics as resolveCrossProjectTarget so callers can
@@ -450,6 +479,16 @@ export function removeEntity(name, id, cwd, purge = false) {
450
479
  archiveCandidate(candidate, 'rejected', cwd);
451
480
  return { entity: name, id, archived: true, purged: false };
452
481
  }
482
+ case 'sequence': {
483
+ // purge → hard-delete the file; default → soft-archive (status='archived',
484
+ // the sequence terminal state) so the lane history stays auditable.
485
+ if (purge) {
486
+ const deleted = deleteSequence(id, cwd);
487
+ return { entity: name, id: deleted.id, archived: false, purged: true };
488
+ }
489
+ const archived = updateSequence({ id, status: 'archived' }, cwd);
490
+ return { entity: name, id: archived.id, archived: true, purged: false };
491
+ }
453
492
  case 'cross_project_link': {
454
493
  const removed = removeCrossProjectLink(id, cwd);
455
494
  return { entity: name, id: removed.name ?? removed.path, archived: false, purged: true };
@@ -530,6 +569,12 @@ export function transitionEntity(name, id, to, cwd, _reason) {
530
569
  }, cwd);
531
570
  return { entity: name, id, from, to, side_effects: sideEffects };
532
571
  }
572
+ case 'sequence': {
573
+ // isValidTransition above already enforced the registry matrix
574
+ // (draft→active|archived, active→archived); updateSequence persists it.
575
+ updateSequence({ id, status: to }, cwd);
576
+ return { entity: name, id, from, to, side_effects: sideEffects };
577
+ }
533
578
  default:
534
579
  throw new EntityOperationUnsupportedError(name, 'transition', `Lifecycle transitions for ${name} not yet wired.`);
535
580
  }
@@ -559,6 +604,14 @@ function requireString(data, field) {
559
604
  }
560
605
  return value;
561
606
  }
607
+ function optionalSequenceItems(data) {
608
+ if (!('items' in data) || data.items === undefined || data.items === null)
609
+ return undefined;
610
+ if (!Array.isArray(data.items)) {
611
+ throw new Error(`Invalid value for 'items': expected an array of sequence item objects`);
612
+ }
613
+ return data.items;
614
+ }
562
615
  /**
563
616
  * Validates that data[field] is one of `validValues`, throwing a clear
564
617
  * error message when the value is invalid. Fixes the silent-data-loss bug
@@ -4,12 +4,21 @@ import path from 'node:path';
4
4
  import { buildClaimEnvPrefix } from './execution-profile.js';
5
5
  import { getCapabilityProfile } from './agent-capability.js';
6
6
  import { nowISO } from './ids.js';
7
+ import { ensureRuntimeDirs, getRuntimeLogPath, getRuntimeSignalPath, } from './runtime-signals.js';
8
+ export function buildAckWrapCommand(bashCommand, paths, isWin32) {
9
+ const touch = isWin32
10
+ ? (p) => `type nul > "${p}"`
11
+ : (p) => `touch "${p}"`;
12
+ const redirected = `${bashCommand} > "${paths.stdoutLog}" 2> "${paths.stderrLog}"`;
13
+ return (`${touch(paths.ackPath)} && ` +
14
+ `( ${redirected} && ${touch(paths.completedPath)} || ${touch(paths.failedPath)} )`);
15
+ }
7
16
  /**
8
17
  * Check if a binary is resolvable on the system PATH.
9
18
  * On Windows, `spawn({shell:true})` always succeeds (launches cmd.exe),
10
19
  * masking ENOENT for missing binaries. This pre-check catches that.
11
20
  */
12
- function resolveBinaryOnPath(binary) {
21
+ export function resolveBinaryOnPath(binary) {
13
22
  // Absolute or relative path — check directly
14
23
  if (binary.includes('/') || binary.includes('\\')) {
15
24
  return fs.existsSync(binary) ? binary : undefined;
@@ -90,48 +99,32 @@ export class CliExecutionAdapter {
90
99
  const spawnExecutable = resolvedExecutable ?? invoke.executable;
91
100
  const useShell = isWin32 && /\.(cmd|bat)$/i.test(spawnExecutable);
92
101
  const needsStdin = invoke.promptDelivery === 'stdin_pipe' && invoke.promptText;
93
- // pln#504: open per-assignment log files for stdout/stderr capture so silent
94
- // worker deaths (trp#292) become diagnosable. Previously stdio used 'ignore'
95
- // for stdout+stderr anything the worker said vanished. Best-effort: on
96
- // failure to open log files we fall back to the legacy 'ignore' behaviour
97
- // rather than abort the spawn.
102
+ // pln#520 step 4: when we ack-wrap, the SHELL redirects stdout/stderr to the
103
+ // per-assignment log files (fds passed via stdio are NOT inherited through
104
+ // the cmd.exe .cmd node shim the empty-logs bug of can_f792cacd), and
105
+ // the wrapper emits completed/failed sentinels mechanically. So the spawned
106
+ // process just ignores stdout/stderr here. stdin stays a pipe when the
107
+ // prompt is delivered that way (the grouped agent command inherits it).
98
108
  const useAckWrap = !!(options.assignmentId && (options.ackRoot ?? options.worktreePath));
99
- let logFds;
100
- if (useAckWrap) {
101
- try {
102
- const logRoot = options.ackRoot ?? options.worktreePath;
103
- const logDir = path.join(logRoot, '.brainclaw', 'coordination', 'runtime', 'log');
104
- fs.mkdirSync(logDir, { recursive: true });
105
- logFds = {
106
- stdout: fs.openSync(path.join(logDir, `${options.assignmentId}.stdout.log`), 'a'),
107
- stderr: fs.openSync(path.join(logDir, `${options.assignmentId}.stderr.log`), 'a'),
108
- };
109
- }
110
- catch {
111
- // Log capture is best-effort — never block the spawn on logging issues.
112
- logFds = undefined;
113
- }
114
- }
115
109
  const stdinTarget = needsStdin ? 'pipe' : 'ignore';
116
- const stdoutTarget = logFds ? logFds.stdout : 'ignore';
117
- const stderrTarget = logFds ? logFds.stderr : 'ignore';
118
- const stdio = [stdinTarget, stdoutTarget, stderrTarget];
119
- // pln#476: wrap the spawn command with a brief-ack step so the worker
120
- // shell touches a sentinel file BEFORE the agent binary runs.
121
- // waitForAssignmentHandshake checks that file as evidence the spawn
122
- // executed needed for codex (which lacks the brainclaw MCP context
123
- // to call bclaw_assignment_update). When ackRoot/assignmentId are
124
- // omitted, we keep the original direct-binary spawn.
110
+ const stdio = [stdinTarget, 'ignore', 'ignore'];
111
+ // pln#476 + pln#520 step 4: wrap the spawn so the worker shell touches the
112
+ // pre-exec `ack` sentinel, redirects logs at the shell level, and emits a
113
+ // completed/failed sentinel from the agent's exit code. waitForAssignmentHandshake
114
+ // checks the ack file; the reconciler trusts the completed/failed/heartbeat
115
+ // sentinels rather than the (untrustworthy) wrapper pid. When ackRoot/
116
+ // assignmentId are omitted, we keep the original direct-binary spawn.
125
117
  let child;
126
118
  if (useAckWrap) {
127
- const ackRoot = options.ackRoot ?? options.worktreePath;
128
- const ackDir = path.join(ackRoot, '.brainclaw', 'coordination', 'runtime', 'ack');
129
- const ackPath = path.join(ackDir, `${options.assignmentId}.ack`);
130
- fs.mkdirSync(ackDir, { recursive: true });
131
- const ackStep = isWin32
132
- ? `type nul > "${ackPath}"`
133
- : `touch "${ackPath}"`;
134
- const wrappedCmd = `${ackStep} && ${invoke.bashCommand}`;
119
+ const signalRoot = options.ackRoot ?? options.worktreePath;
120
+ ensureRuntimeDirs(signalRoot);
121
+ const wrappedCmd = buildAckWrapCommand(invoke.bashCommand, {
122
+ ackPath: getRuntimeSignalPath(signalRoot, options.assignmentId, 'ack'),
123
+ completedPath: getRuntimeSignalPath(signalRoot, options.assignmentId, 'completed'),
124
+ failedPath: getRuntimeSignalPath(signalRoot, options.assignmentId, 'failed'),
125
+ stdoutLog: getRuntimeLogPath(signalRoot, options.assignmentId, 'stdout'),
126
+ stderrLog: getRuntimeLogPath(signalRoot, options.assignmentId, 'stderr'),
127
+ }, isWin32);
135
128
  child = spawn(wrappedCmd, [], {
136
129
  detached: !isWin32,
137
130
  shell: true,
@@ -163,18 +156,6 @@ export class CliExecutionAdapter {
163
156
  child.stdin.end();
164
157
  }
165
158
  child.unref();
166
- // Close the parent's copies of the log file descriptors. The child has its
167
- // own dup'd copies and will keep writing to them after we return.
168
- if (logFds) {
169
- try {
170
- fs.closeSync(logFds.stdout);
171
- }
172
- catch { /* best-effort */ }
173
- try {
174
- fs.closeSync(logFds.stderr);
175
- }
176
- catch { /* best-effort */ }
177
- }
178
159
  const pid = child.pid;
179
160
  if (!pid) {
180
161
  throw new Error(`Failed to spawn agent ${options.agent}: no PID returned`);
@@ -8,8 +8,8 @@
8
8
  * @module
9
9
  */
10
10
  import fs from 'node:fs';
11
- import path from 'node:path';
12
- import { getCapabilityProfile } from './agent-capability.js';
11
+ import { resolveConcurrencyLimit, resolveResourceKey } from './agent-capability.js';
12
+ import { getRuntimeSignalPath } from './runtime-signals.js';
13
13
  import { appendAuditEntry } from './audit.js';
14
14
  import { loadAllSessions } from './identity.js';
15
15
  import { loadConfig } from './config.js';
@@ -30,7 +30,7 @@ function sleep(ms) {
30
30
  * spawn anyway).
31
31
  */
32
32
  export function getAssignmentAckPath(cwd, assignmentId) {
33
- return path.join(cwd, '.brainclaw', 'coordination', 'runtime', 'ack', `${assignmentId}.ack`);
33
+ return getRuntimeSignalPath(cwd, assignmentId, 'ack');
34
34
  }
35
35
  function isAssignmentAcked(assignmentId, cwd) {
36
36
  // Fast path: the brief-ack sentinel was written by the worker shell.
@@ -73,9 +73,13 @@ export function checkActiveInstance(agentName, cwd) {
73
73
  catch { /* use default */ }
74
74
  const SESSION_STALE_MS = parseDurationMs(ttlStr);
75
75
  const now = Date.now();
76
+ // pln#520 step 3: pool active sessions by host-binary resource so all
77
+ // identities of one binary (e.g. claude-code + claude-sonnet → `claude`)
78
+ // count together against a shared cap.
79
+ const targetResource = resolveResourceKey(agentName);
76
80
  const activeSessions = [];
77
81
  for (const session of sessions) {
78
- if (session.agent !== agentName)
82
+ if (resolveResourceKey(session.agent) !== targetResource)
79
83
  continue;
80
84
  const lastSeen = new Date(session.last_seen_at).getTime();
81
85
  if (isNaN(lastSeen))
@@ -84,18 +88,20 @@ export function checkActiveInstance(agentName, cwd) {
84
88
  activeSessions.push(session.session_id);
85
89
  }
86
90
  }
87
- const profile = getCapabilityProfile(agentName);
88
- const maxAllowed = profile?.max_concurrent_tasks ?? 1;
91
+ // Limit resolved from the chain (default unlimited for parallelizable CLI
92
+ // agents; structural floor for non-spawnable IDE agents). Infinity → no cap.
93
+ const maxAllowed = resolveConcurrencyLimit(agentName);
89
94
  const activeCount = activeSessions.length;
90
95
  const canSpawnMore = activeCount < maxAllowed;
96
+ const capLabel = Number.isFinite(maxAllowed) ? String(maxAllowed) : '∞';
91
97
  return {
92
98
  active: !canSpawnMore, // backward compat: active=true means "cannot spawn more"
93
99
  canSpawnMore,
94
100
  activeCount,
95
101
  maxAllowed,
96
102
  reason: canSpawnMore
97
- ? `Agent ${agentName} has capacity (${activeCount}/${maxAllowed} slots used)`
98
- : `Agent ${agentName} at capacity (${activeCount}/${maxAllowed} slots used)`,
103
+ ? `Agent ${agentName} has capacity (${activeCount}/${capLabel} slots used)`
104
+ : `Agent ${agentName} at capacity (${activeCount}/${capLabel} slots used)`,
99
105
  activeSessions,
100
106
  };
101
107
  }
@@ -382,21 +382,22 @@ function renderAvailableTools() {
382
382
  '- `bclaw_remove(entity, id, purge?)` — soft-delete (or purge)',
383
383
  '- `bclaw_transition(entity, id, to)` — change status (e.g. plan todo→in_progress→done)',
384
384
  '',
385
- 'Entities supported by the grammar: plan, decision, constraint, trap, handoff, runtime_note, candidate, claim, action, assignment, agent_run.',
385
+ 'Entities supported by the grammar: plan, decision, constraint, trap, handoff, runtime_note, candidate, sequence, claim, action, assignment, agent_run.',
386
386
  '',
387
387
  '**Cross-project access (pln#359):** every canonical-grammar call, `bclaw_context`, and `bclaw_coordinate` accept an optional `project: <name>` argument that routes the operation to a linked project (cross_project_links from `brainclaw link list` OR a workspace store-chain child). Identity is sourced from the caller; writes + audit land in the target. Unknown project names throw — no silent fallback. The CLI exposes the same as `--project <name>` (mutually exclusive with `--cwd`). Example: `bclaw_get(entity="trap", id="trp#36", project="brainclaw-site")`. Cross-project `bclaw_coordinate` is inbox-only — auto-spawn is force-disabled because the spawn cwd / worktree are tied to the target repo; the target agent picks the brief up async via its own `bclaw_work`.',
388
388
  '',
389
389
  '**Session + claims:** `bclaw_session_start`, `bclaw_session_end`, `bclaw_claim`, `bclaw_release_claim`',
390
390
  '**Plan steps:** `bclaw_add_step`, `bclaw_complete_step`, `bclaw_update_step`, `bclaw_delete_step`',
391
+ '**Sequences:** `bclaw_list_sequences`, `bclaw_create_sequence`, `bclaw_update_sequence`, `bclaw_delete_sequence` — create/activate ordered lanes for parallel dispatch. Item shape: `{ planId, stepId?, rank, hard_after?, soft_after?, lane?, scope_hint?, rationale? }`.',
391
392
  '**Inbox + handoffs:** `bclaw_read_inbox`, `bclaw_ack_message`, `bclaw_send_message`, `bclaw_correct_handoff`',
392
393
  '**Notes + search:** `bclaw_write_note`, `bclaw_quick_capture`, `bclaw_search`',
393
394
  '**Escalation (orchestrator path):**',
394
395
  '- Review / consult / assign another agent → `bclaw_coordinate(intent=review|consult|assign)` (use `open_loop=true` on review to also dispatch the reviewer turn)',
395
- '- Parallel execute across a sequence\'s lanes → `bclaw_dispatch(intent=execute)`',
396
+ '- Parallel execute across a sequence\'s lanes → create/update an active sequence, then `bclaw_dispatch(intent=analysis)` and `bclaw_dispatch(intent=execute)`',
396
397
  '- Drive your turn in an already-opened loop → `bclaw_loop(intent=turn|complete_turn|advance|close)`',
397
398
  '**Setup + navigation:** `bclaw_setup`, `bclaw_bootstrap`, `bclaw_switch`, `bclaw_release_notes`',
398
399
  '',
399
- 'Legacy per-entity tools (`bclaw_list_plans`, `bclaw_accept`, `bclaw_get_context`, `bclaw_dispatch_review`, …) were removed from the catalog at v1.0 — direct calls still succeed as a migration escape hatch but emit a redirect warning. See `docs/integrations/mcp.md` + `docs/concepts/mcp-governance.md` for the full catalog and stability contract; raw MCP clients can request advanced tools with `tools/list` params `{ catalog: "all" }`.',
400
+ 'Legacy per-entity tools (`bclaw_list_plans`, `bclaw_accept`, `bclaw_get_context`, `bclaw_dispatch_review`, …) were removed from the catalog at v1.0 — direct calls still succeed as a migration escape hatch but emit a redirect warning. See `docs/integrations/mcp.md` + `docs/concepts/mcp-governance.md` for the full catalog and stability contract.',
400
401
  ].join('\n');
401
402
  }
402
403
  // ─── Live section renderers ─────────────────────────────────────────────────
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Runtime spawn signals (pln#520 steps 1 + 4) — the file-based, zero-MCP
3
+ * liveness channel between a dispatched worker and brainclaw.
4
+ *
5
+ * Why files, not the tracked pid: on Windows the ack-wrap spawn runs under
6
+ * `shell:true`, so `child.pid` is the cmd.exe wrapper (which dies early),
7
+ * NOT the real worker (cmd.exe → claude.cmd → node.exe). Reading that pid as
8
+ * dead produced false-negative `pid_dead_at_read` cancellations while the
9
+ * worker was alive and committing (can_f792cacd: 6 workers cancelled, then
10
+ * committed 4-7 min later). The fix is to stop trusting the wrapper pid and
11
+ * trust sentinels the worker / wrapper actually write:
12
+ *
13
+ * - `ack` — pre-exec; the spawn shell touched it BEFORE the agent ran
14
+ * (pln#476). Proves delivery, NOT that work started.
15
+ * - `heartbeat` — the worker writes `work_loop_reached{run_id,nonce}` as its
16
+ * FIRST action (step 0 of the generated brief) and refreshes
17
+ * it periodically. Distinct from `ack`: this is what flips
18
+ * execution_status to `started`.
19
+ * - `completed` / `failed` — emitted MECHANICALLY by the spawn wrapper
20
+ * (`agentcmd && completed || failed`) so a dead wrapper pid
21
+ * is never misread as a silent failure.
22
+ *
23
+ * All paths are absolute under the project coordination dir so a worker in a
24
+ * worktree (or a sandboxed agent without MCP) can write them with a plain
25
+ * shell redirect.
26
+ *
27
+ * @module
28
+ */
29
+ import fs from 'node:fs';
30
+ import path from 'node:path';
31
+ function runtimeDir(root) {
32
+ return path.join(root, '.brainclaw', 'coordination', 'runtime');
33
+ }
34
+ /**
35
+ * Absolute path for a runtime signal sentinel. `ack` keeps its historical
36
+ * `runtime/ack/<id>.ack` location (pln#476); the liveness signals live under
37
+ * `runtime/signal/<id>.<signal>`.
38
+ */
39
+ export function getRuntimeSignalPath(root, assignmentId, signal) {
40
+ if (signal === 'ack') {
41
+ return path.join(runtimeDir(root), 'ack', `${assignmentId}.ack`);
42
+ }
43
+ return path.join(runtimeDir(root), 'signal', `${assignmentId}.${signal}`);
44
+ }
45
+ /** Absolute path for a captured stream log (`runtime/log/<id>.{stdout,stderr}.log`). */
46
+ export function getRuntimeLogPath(root, assignmentId, stream) {
47
+ return path.join(runtimeDir(root), 'log', `${assignmentId}.${stream}.log`);
48
+ }
49
+ /** Ensure the ack / signal / log directories exist (best-effort, recursive). */
50
+ export function ensureRuntimeDirs(root) {
51
+ const base = runtimeDir(root);
52
+ for (const sub of ['ack', 'signal', 'log']) {
53
+ fs.mkdirSync(path.join(base, sub), { recursive: true });
54
+ }
55
+ }
56
+ export function signalExists(root, assignmentId, signal) {
57
+ try {
58
+ return fs.existsSync(getRuntimeSignalPath(root, assignmentId, signal));
59
+ }
60
+ catch {
61
+ return false;
62
+ }
63
+ }
64
+ /**
65
+ * Read the heartbeat sentinel. The body is expected to be
66
+ * `work_loop_reached{run_id,nonce}` JSON, but a bare `touch` (empty file) still
67
+ * counts as a heartbeat — the mtime alone is a valid life-sign.
68
+ */
69
+ export function readHeartbeat(root, assignmentId) {
70
+ const p = getRuntimeSignalPath(root, assignmentId, 'heartbeat');
71
+ try {
72
+ const stat = fs.statSync(p);
73
+ const info = { exists: true, mtimeMs: stat.mtimeMs };
74
+ try {
75
+ const raw = fs.readFileSync(p, 'utf-8').trim();
76
+ if (raw) {
77
+ const parsed = JSON.parse(raw);
78
+ if (typeof parsed.run_id === 'string')
79
+ info.runId = parsed.run_id;
80
+ if (typeof parsed.nonce === 'string')
81
+ info.nonce = parsed.nonce;
82
+ }
83
+ }
84
+ catch { /* empty / non-JSON body — mtime still counts */ }
85
+ return info;
86
+ }
87
+ catch {
88
+ return { exists: false };
89
+ }
90
+ }
91
+ /** Read the tail of a captured stream log (for failed_silent diagnostics). */
92
+ export function readLogTail(root, assignmentId, stream, maxBytes = 2000) {
93
+ try {
94
+ const p = getRuntimeLogPath(root, assignmentId, stream);
95
+ const content = fs.readFileSync(p, 'utf-8');
96
+ return content.length > maxBytes ? content.slice(content.length - maxBytes) : content;
97
+ }
98
+ catch {
99
+ return '';
100
+ }
101
+ }
102
+ //# sourceMappingURL=runtime-signals.js.map