brainclaw 1.7.1 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -0
- package/dist/brainclaw-vscode.vsix +0 -0
- package/dist/cli.js +12 -2
- package/dist/commands/dispatch.js +2 -0
- package/dist/commands/doctor.js +17 -0
- package/dist/commands/mcp.js +31 -7
- package/dist/core/agent-capability.js +67 -0
- package/dist/core/agentrun-reconciler.js +126 -52
- package/dist/core/coordination.js +10 -9
- package/dist/core/dispatcher.js +99 -29
- package/dist/core/entity-operations.js +54 -1
- package/dist/core/execution-adapters.js +32 -51
- package/dist/core/execution.js +14 -8
- package/dist/core/instruction-templates.js +4 -3
- package/dist/core/runtime-signals.js +102 -0
- package/dist/core/spawn-check.js +125 -0
- package/dist/facts.js +3 -3
- package/dist/facts.json +2 -2
- package/docs/cli.md +8 -4
- package/docs/integrations/mcp.md +48 -15
- package/docs/mcp-schema-changelog.md +16 -5
- package/docs/playbooks/team/index.md +7 -5
- package/package.json +1 -1
package/dist/core/dispatcher.js
CHANGED
|
@@ -43,7 +43,8 @@ import { memoryDir } from './io.js';
|
|
|
43
43
|
import { loadVersionedJsonFile } from './migration.js';
|
|
44
44
|
import fs from 'node:fs';
|
|
45
45
|
import path from 'node:path';
|
|
46
|
-
import { buildInvokeCommand, resolveBriefMode, getCapabilityProfile } from './agent-capability.js';
|
|
46
|
+
import { buildInvokeCommand, resolveBriefMode, getCapabilityProfile, resolveConcurrencyLimit, resolveResourceKey, resolveModel, serializeConcurrencyLimit } from './agent-capability.js';
|
|
47
|
+
import { getRuntimeSignalPath } from './runtime-signals.js';
|
|
47
48
|
import { attemptExecution } from './execution.js';
|
|
48
49
|
import { createAssignment, transitionAssignment, generateAssignmentId, patchAssignmentMessageId } from './assignments.js';
|
|
49
50
|
import { createAgentRun, transitionAgentRun } from './agentruns.js';
|
|
@@ -163,13 +164,20 @@ export function analyzeSequence(cwd) {
|
|
|
163
164
|
.map(a => a.agent_name);
|
|
164
165
|
const agent_capacity = allAgentNames.map(agent => {
|
|
165
166
|
const active_claims = agentClaimCounts.get(agent) ?? 0;
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
167
|
+
// pln#520 step 3: limit is resolved (default unlimited for parallelizable
|
|
168
|
+
// CLI agents), not the per-name structural constant.
|
|
169
|
+
const limit = resolveConcurrencyLimit(agent);
|
|
170
|
+
const slots = Number.isFinite(limit) ? Math.max(0, limit - active_claims) : Infinity;
|
|
171
|
+
return {
|
|
172
|
+
agent,
|
|
173
|
+
active_claims,
|
|
174
|
+
max_tasks: serializeConcurrencyLimit(limit),
|
|
175
|
+
slots_remaining: serializeConcurrencyLimit(slots),
|
|
176
|
+
};
|
|
169
177
|
});
|
|
170
|
-
// Available agents:
|
|
178
|
+
// Available agents: unlimited (null) or with remaining capacity (> 0).
|
|
171
179
|
const available_agents = agent_capacity
|
|
172
|
-
.filter(a => a.slots_remaining > 0)
|
|
180
|
+
.filter(a => a.slots_remaining === null || a.slots_remaining > 0)
|
|
173
181
|
.map(a => a.agent);
|
|
174
182
|
return { sequence, ready, active, blocked, done, available_agents, agent_capacity };
|
|
175
183
|
}
|
|
@@ -188,6 +196,37 @@ export function analyzeSequence(cwd) {
|
|
|
188
196
|
* Protocol section IS useful to them — `resolveBriefMode` was updated to
|
|
189
197
|
* return 'full' for that combination.
|
|
190
198
|
*/
|
|
199
|
+
/**
|
|
200
|
+
* pln#520 step 5 — the liveness section of a generated brief. An imperative
|
|
201
|
+
* "do this first" instruction telling the worker to write its `work_loop_reached`
|
|
202
|
+
* heartbeat to an ABSOLUTE, writable signals path BEFORE any other action, then
|
|
203
|
+
* refresh it periodically. Zero-MCP (a plain shell redirect) so even sandboxed
|
|
204
|
+
* agents without the brainclaw MCP can comply. Completion is recorded
|
|
205
|
+
* mechanically by the spawn wrapper (step 4), so the agent only owns the
|
|
206
|
+
* heartbeat. This is the worker-side half of the liveness contract whose
|
|
207
|
+
* engine-side floor is the wrapper + reconciler (steps 4 + 1).
|
|
208
|
+
*/
|
|
209
|
+
export function buildLivenessSection(cwd, assignmentId) {
|
|
210
|
+
const hbPath = getRuntimeSignalPath(cwd, assignmentId, 'heartbeat');
|
|
211
|
+
const isWin = process.platform === 'win32';
|
|
212
|
+
const writeCmd = isWin
|
|
213
|
+
? `echo work_loop_reached ${assignmentId} > "${hbPath}"`
|
|
214
|
+
: `printf 'work_loop_reached ${assignmentId} %s' "$(date +%s)" > "${hbPath}"`;
|
|
215
|
+
return [
|
|
216
|
+
'## Liveness — DO THIS FIRST (step 0)',
|
|
217
|
+
'Before ANY other action, prove you reached your work loop by writing a heartbeat,',
|
|
218
|
+
'then refresh it every few minutes while you work. brainclaw uses this to tell',
|
|
219
|
+
'"alive and working" from "spawned but dead" — a missing/stale heartbeat marks the',
|
|
220
|
+
'run stalled. Completion is recorded automatically by the spawn wrapper; you do NOT',
|
|
221
|
+
'need to write a completed/failed signal.',
|
|
222
|
+
'',
|
|
223
|
+
'```sh',
|
|
224
|
+
writeCmd,
|
|
225
|
+
'```',
|
|
226
|
+
`Heartbeat file (absolute, writable): ${hbPath}`,
|
|
227
|
+
'',
|
|
228
|
+
].join('\n');
|
|
229
|
+
}
|
|
191
230
|
export function buildProtocolSection(options) {
|
|
192
231
|
const parts = [];
|
|
193
232
|
parts.push('## Protocol');
|
|
@@ -305,6 +344,12 @@ export function generateBrief(plan, item, cwd, briefMode, options) {
|
|
|
305
344
|
if (plan.estimated_effort)
|
|
306
345
|
parts.push(`Estimated effort: ${plan.estimated_effort} minutes`);
|
|
307
346
|
parts.push('');
|
|
347
|
+
// pln#520 step 5 — liveness heartbeat instruction, first actionable block so
|
|
348
|
+
// the worker writes work_loop_reached before anything else. Only when an
|
|
349
|
+
// assignment id is known (the heartbeat is keyed by it).
|
|
350
|
+
if (options?.assignmentId) {
|
|
351
|
+
parts.push(buildLivenessSection(cwd, options.assignmentId));
|
|
352
|
+
}
|
|
308
353
|
// Steps if any
|
|
309
354
|
if (plan.steps?.length) {
|
|
310
355
|
parts.push('## Steps');
|
|
@@ -421,14 +466,18 @@ export function scoreAgents(agentPool, plan, activeClaims, cycleAssignments) {
|
|
|
421
466
|
const canExecute = profile?.role_capabilities.includes('execute') ?? false;
|
|
422
467
|
const canSpawn = profile?.runtime.canBeSpawnedCli ?? false;
|
|
423
468
|
const capability = canExecute ? (canSpawn ? 1.0 : 0.5) : 0.1;
|
|
424
|
-
// Factor 3: Availability
|
|
425
|
-
//
|
|
469
|
+
// Factor 3 & 4: Availability + load balance.
|
|
470
|
+
// pln#520 step 3: these are based on the agent's RAW load (active claims +
|
|
471
|
+
// in-cycle assignments), decoupled from any concurrency cap. Dividing by the
|
|
472
|
+
// cap (as before) made every agent look identically idle once concurrency
|
|
473
|
+
// went unlimited, collapsing load-balancing — work piled onto the single
|
|
474
|
+
// top-scored agent. A cap-independent load fraction keeps spreading work to
|
|
475
|
+
// the least-busy agent whether or not a cap is set. The hard cap is enforced
|
|
476
|
+
// separately by the capacity guard in the dispatch loop.
|
|
426
477
|
const agentClaims = (claimCounts.get(agent) ?? 0) + (cycleAssignments?.get(agent) ?? 0);
|
|
427
|
-
const
|
|
428
|
-
const
|
|
429
|
-
const
|
|
430
|
-
// Factor 4: Load balance — normalized by agent's capacity, not raw claim count
|
|
431
|
-
const load_balance = 1.0 - utilization;
|
|
478
|
+
const loadFraction = agentClaims / (agentClaims + 1); // 0 when idle, →1 as load grows
|
|
479
|
+
const availability = 1.0 - loadFraction * 0.5; // range (0.5, 1.0]
|
|
480
|
+
const load_balance = 1.0 - loadFraction; // range (0, 1]
|
|
432
481
|
const score = preference * W_PREFERENCE +
|
|
433
482
|
capability * W_CAPABILITY +
|
|
434
483
|
availability * W_AVAILABILITY +
|
|
@@ -438,6 +487,20 @@ export function scoreAgents(agentPool, plan, activeClaims, cycleAssignments) {
|
|
|
438
487
|
}
|
|
439
488
|
// Re-export checkActiveInstance for consumers who import from dispatcher
|
|
440
489
|
export { checkActiveInstance } from './execution.js';
|
|
490
|
+
/**
|
|
491
|
+
* pln#520 step 3 — sum in-cycle assignments across every agent identity that
|
|
492
|
+
* shares the same host-binary resource (e.g. claude-code + claude-sonnet both
|
|
493
|
+
* map to `claude`). Pairs with `resolveResourceKey` so a concurrency cap pools
|
|
494
|
+
* by binary, not by agent name.
|
|
495
|
+
*/
|
|
496
|
+
function countCycleByResource(cycleAssignments, resourceKey) {
|
|
497
|
+
let total = 0;
|
|
498
|
+
for (const [agent, count] of cycleAssignments) {
|
|
499
|
+
if (resolveResourceKey(agent) === resourceKey)
|
|
500
|
+
total += count;
|
|
501
|
+
}
|
|
502
|
+
return total;
|
|
503
|
+
}
|
|
441
504
|
export function selectWorktreeBaseForReadyLane(item, analysis) {
|
|
442
505
|
const hardAfter = item.hard_after ?? [];
|
|
443
506
|
if (hardAfter.length === 0)
|
|
@@ -503,13 +566,17 @@ export async function dispatch(options, cwd) {
|
|
|
503
566
|
continue; // truly active — skip
|
|
504
567
|
// Claim released but message not archived: stale assignment, allow re-dispatch
|
|
505
568
|
}
|
|
506
|
-
// Claim-based capacity guard
|
|
507
|
-
//
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
569
|
+
// Claim-based capacity guard (pln#520 step 3): count usage per host-binary
|
|
570
|
+
// resource (claude-code + claude-sonnet share `claude`), compare against the
|
|
571
|
+
// resolved limit (default unlimited — no arbitrary per-identity throttle).
|
|
572
|
+
// This is the authoritative capacity check — covers both options.agents and
|
|
573
|
+
// analysis.available_agents paths.
|
|
574
|
+
const resourceKey = resolveResourceKey(candidate.agent);
|
|
575
|
+
const existingClaims = allActiveClaims.filter(c => resolveResourceKey(c.agent) === resourceKey).length;
|
|
576
|
+
const inCycleCount = countCycleByResource(cycleAssignments, resourceKey);
|
|
577
|
+
const limit = resolveConcurrencyLimit(candidate.agent, { override: options.maxConcurrency });
|
|
578
|
+
if (existingClaims + inCycleCount >= limit) {
|
|
579
|
+
result.warnings.push(`${candidate.agent}: at capacity (${existingClaims + inCycleCount}/${limit} ${resourceKey} slots)`);
|
|
513
580
|
continue; // try next agent
|
|
514
581
|
}
|
|
515
582
|
targetAgent = candidate.agent;
|
|
@@ -561,7 +628,7 @@ export async function dispatch(options, cwd) {
|
|
|
561
628
|
if (options.dryRun) {
|
|
562
629
|
const briefMode = resolveBriefMode(targetAgent);
|
|
563
630
|
const brief = generateBrief(readyItem.plan, readyItem.item, cwd, briefMode, { claimId, worktreePath });
|
|
564
|
-
const invokeCmd = buildInvokeCommand(targetAgent, brief);
|
|
631
|
+
const invokeCmd = buildInvokeCommand(targetAgent, brief, { model: resolveModel(targetAgent, { override: options.model }) });
|
|
565
632
|
if (invokeCmd) {
|
|
566
633
|
const cmdPrefix = buildEnvPrefix(claimId);
|
|
567
634
|
result.commands.push({ agent: targetAgent, lane: readyItem.lane, command: `${cmdPrefix}${invokeCmd.bashCommand}`, shell: process.platform === 'win32' ? 'cmd' : (invokeCmd.shell ? 'bash' : 'sh') });
|
|
@@ -571,9 +638,10 @@ export async function dispatch(options, cwd) {
|
|
|
571
638
|
result.messages_sent.push(deliveryEntry);
|
|
572
639
|
assigned++;
|
|
573
640
|
cycleAssignments.set(targetAgent, (cycleAssignments.get(targetAgent) ?? 0) + 1);
|
|
574
|
-
const
|
|
575
|
-
const
|
|
576
|
-
const
|
|
641
|
+
const dryResourceKey = resolveResourceKey(targetAgent);
|
|
642
|
+
const dryExisting = allActiveClaims.filter(c => resolveResourceKey(c.agent) === dryResourceKey).length;
|
|
643
|
+
const dryCycle = countCycleByResource(cycleAssignments, dryResourceKey);
|
|
644
|
+
const dryMax = resolveConcurrencyLimit(targetAgent, { override: options.maxConcurrency });
|
|
577
645
|
if (dryExisting + dryCycle >= dryMax) {
|
|
578
646
|
const idx = agentPool.indexOf(targetAgent);
|
|
579
647
|
if (idx >= 0)
|
|
@@ -616,7 +684,7 @@ export async function dispatch(options, cwd) {
|
|
|
616
684
|
agent: targetAgent,
|
|
617
685
|
});
|
|
618
686
|
// Step 3: Build invoke command
|
|
619
|
-
const invokeCmd = buildInvokeCommand(targetAgent, brief);
|
|
687
|
+
const invokeCmd = buildInvokeCommand(targetAgent, brief, { model: resolveModel(targetAgent, { override: options.model }) });
|
|
620
688
|
if (invokeCmd) {
|
|
621
689
|
const cmdPrefix = buildEnvPrefix(claimId);
|
|
622
690
|
result.commands.push({
|
|
@@ -707,10 +775,12 @@ export async function dispatch(options, cwd) {
|
|
|
707
775
|
assigned++;
|
|
708
776
|
// Track assignments this cycle for multi-slot capacity
|
|
709
777
|
cycleAssignments.set(targetAgent, (cycleAssignments.get(targetAgent) ?? 0) + 1);
|
|
710
|
-
// Remove agent from pool only when at capacity
|
|
711
|
-
|
|
712
|
-
const
|
|
713
|
-
const
|
|
778
|
+
// Remove agent from pool only when at capacity, counted per host-binary
|
|
779
|
+
// resource against the resolved limit (pln#520 step 3).
|
|
780
|
+
const liveResourceKey = resolveResourceKey(targetAgent);
|
|
781
|
+
const existingClaims = allActiveClaims.filter(c => resolveResourceKey(c.agent) === liveResourceKey).length;
|
|
782
|
+
const cycleCount = countCycleByResource(cycleAssignments, liveResourceKey);
|
|
783
|
+
const maxTasks = resolveConcurrencyLimit(targetAgent, { override: options.maxConcurrency });
|
|
714
784
|
if (existingClaims + cycleCount >= maxTasks) {
|
|
715
785
|
const idx = agentPool.indexOf(targetAgent);
|
|
716
786
|
if (idx >= 0)
|
|
@@ -21,12 +21,13 @@ import { deleteAssignment, listAssignments, loadAssignment, saveAssignment, tran
|
|
|
21
21
|
import { listAgentRuns } from './agentruns.js';
|
|
22
22
|
import { reconcileAgentRun, reconcileDeadPidRunningAgentRunAtRead, TERMINAL_STATUSES } from './agentrun-reconciler.js';
|
|
23
23
|
import { deleteRuntimeNote, listRuntimeNotes, saveRuntimeNote, } from './runtime.js';
|
|
24
|
+
import { createSequence, deleteSequence, listSequences, updateSequence, } from './sequence.js';
|
|
24
25
|
import { createConstraint, createDecision, createTrap, } from './operations/memory-write.js';
|
|
25
26
|
import { deleteMemoryItem, findMemoryItemInChain, updateMemoryItem, } from './operations/memory-mutation.js';
|
|
26
27
|
import { createPlan, deletePlan, updatePlan, } from './operations/plan.js';
|
|
27
28
|
import { ENTITY_REGISTRY, isValidTransition, } from './entity-registry.js';
|
|
28
29
|
import { generateId } from './ids.js';
|
|
29
|
-
import { CandidateTypeSchema, ConstraintCategorySchema, DecisionOutcomeSchema, MemoryVisibilitySchema, PlanTypeEnumSchema, PrioritySchema, RuntimeNoteTypeSchema, SeveritySchema, } from './schema.js';
|
|
30
|
+
import { CandidateTypeSchema, ConstraintCategorySchema, DecisionOutcomeSchema, MemoryVisibilitySchema, PlanTypeEnumSchema, PrioritySchema, RuntimeNoteTypeSchema, SequenceStatusSchema, SeveritySchema, } from './schema.js';
|
|
30
31
|
/**
|
|
31
32
|
* Default provenance stamp applied on create when the caller does not
|
|
32
33
|
* supply one. `user` kind with whatever author is in the payload; the
|
|
@@ -134,6 +135,7 @@ function loadAll(name, cwd) {
|
|
|
134
135
|
case 'handoff': return loadState(cwd).open_handoffs;
|
|
135
136
|
case 'candidate': return listCandidates(undefined, cwd);
|
|
136
137
|
case 'runtime_note': return listRuntimeNotes(undefined, cwd);
|
|
138
|
+
case 'sequence': return listSequences(cwd);
|
|
137
139
|
case 'claim': return listClaims(cwd);
|
|
138
140
|
case 'action': return listActionRequired(cwd);
|
|
139
141
|
case 'assignment': return listAssignments(cwd);
|
|
@@ -310,6 +312,19 @@ export function createEntity(name, data, cwd) {
|
|
|
310
312
|
saveCandidate(candidate, cwd);
|
|
311
313
|
return { entity: name, id };
|
|
312
314
|
}
|
|
315
|
+
case 'sequence': {
|
|
316
|
+
const res = createSequence({
|
|
317
|
+
name: requireString(data, 'name'),
|
|
318
|
+
description: data.description,
|
|
319
|
+
status: requireEnum(data, 'status', SequenceStatusSchema.options, { optional: true }),
|
|
320
|
+
items: optionalSequenceItems(data),
|
|
321
|
+
owner: data.owner,
|
|
322
|
+
author: requireString(data, 'author'),
|
|
323
|
+
authorId: data.agent_id,
|
|
324
|
+
tags: data.tags,
|
|
325
|
+
}, cwd);
|
|
326
|
+
return { entity: name, id: res.id, short_label: res.shortLabel };
|
|
327
|
+
}
|
|
313
328
|
case 'cross_project_link': {
|
|
314
329
|
const link = addCrossProjectLink({
|
|
315
330
|
path: requireString(data, 'path'),
|
|
@@ -398,6 +413,20 @@ export function updateEntity(name, id, patch, cwd) {
|
|
|
398
413
|
saveCandidate(patched, cwd);
|
|
399
414
|
return { entity: name, id };
|
|
400
415
|
}
|
|
416
|
+
case 'sequence': {
|
|
417
|
+
// `status` is intentionally NOT in sequence.updatable — lifecycle moves
|
|
418
|
+
// go through bclaw_transition. The invalidFields guard above already
|
|
419
|
+
// rejects it, so only name/description/tags/items/owner reach here.
|
|
420
|
+
const result = updateSequence({
|
|
421
|
+
id,
|
|
422
|
+
name: patch.name,
|
|
423
|
+
description: patch.description,
|
|
424
|
+
items: optionalSequenceItems(patch),
|
|
425
|
+
owner: patch.owner,
|
|
426
|
+
tags: patch.tags,
|
|
427
|
+
}, cwd);
|
|
428
|
+
return { entity: name, id: result.id };
|
|
429
|
+
}
|
|
401
430
|
case 'cross_project_link': {
|
|
402
431
|
// In-place patch: find by id (= name/path), remove, re-add with merged
|
|
403
432
|
// fields. Same path semantics as resolveCrossProjectTarget so callers can
|
|
@@ -450,6 +479,16 @@ export function removeEntity(name, id, cwd, purge = false) {
|
|
|
450
479
|
archiveCandidate(candidate, 'rejected', cwd);
|
|
451
480
|
return { entity: name, id, archived: true, purged: false };
|
|
452
481
|
}
|
|
482
|
+
case 'sequence': {
|
|
483
|
+
// purge → hard-delete the file; default → soft-archive (status='archived',
|
|
484
|
+
// the sequence terminal state) so the lane history stays auditable.
|
|
485
|
+
if (purge) {
|
|
486
|
+
const deleted = deleteSequence(id, cwd);
|
|
487
|
+
return { entity: name, id: deleted.id, archived: false, purged: true };
|
|
488
|
+
}
|
|
489
|
+
const archived = updateSequence({ id, status: 'archived' }, cwd);
|
|
490
|
+
return { entity: name, id: archived.id, archived: true, purged: false };
|
|
491
|
+
}
|
|
453
492
|
case 'cross_project_link': {
|
|
454
493
|
const removed = removeCrossProjectLink(id, cwd);
|
|
455
494
|
return { entity: name, id: removed.name ?? removed.path, archived: false, purged: true };
|
|
@@ -530,6 +569,12 @@ export function transitionEntity(name, id, to, cwd, _reason) {
|
|
|
530
569
|
}, cwd);
|
|
531
570
|
return { entity: name, id, from, to, side_effects: sideEffects };
|
|
532
571
|
}
|
|
572
|
+
case 'sequence': {
|
|
573
|
+
// isValidTransition above already enforced the registry matrix
|
|
574
|
+
// (draft→active|archived, active→archived); updateSequence persists it.
|
|
575
|
+
updateSequence({ id, status: to }, cwd);
|
|
576
|
+
return { entity: name, id, from, to, side_effects: sideEffects };
|
|
577
|
+
}
|
|
533
578
|
default:
|
|
534
579
|
throw new EntityOperationUnsupportedError(name, 'transition', `Lifecycle transitions for ${name} not yet wired.`);
|
|
535
580
|
}
|
|
@@ -559,6 +604,14 @@ function requireString(data, field) {
|
|
|
559
604
|
}
|
|
560
605
|
return value;
|
|
561
606
|
}
|
|
607
|
+
function optionalSequenceItems(data) {
|
|
608
|
+
if (!('items' in data) || data.items === undefined || data.items === null)
|
|
609
|
+
return undefined;
|
|
610
|
+
if (!Array.isArray(data.items)) {
|
|
611
|
+
throw new Error(`Invalid value for 'items': expected an array of sequence item objects`);
|
|
612
|
+
}
|
|
613
|
+
return data.items;
|
|
614
|
+
}
|
|
562
615
|
/**
|
|
563
616
|
* Validates that data[field] is one of `validValues`, throwing a clear
|
|
564
617
|
* error message when the value is invalid. Fixes the silent-data-loss bug
|
|
@@ -4,12 +4,21 @@ import path from 'node:path';
|
|
|
4
4
|
import { buildClaimEnvPrefix } from './execution-profile.js';
|
|
5
5
|
import { getCapabilityProfile } from './agent-capability.js';
|
|
6
6
|
import { nowISO } from './ids.js';
|
|
7
|
+
import { ensureRuntimeDirs, getRuntimeLogPath, getRuntimeSignalPath, } from './runtime-signals.js';
|
|
8
|
+
export function buildAckWrapCommand(bashCommand, paths, isWin32) {
|
|
9
|
+
const touch = isWin32
|
|
10
|
+
? (p) => `type nul > "${p}"`
|
|
11
|
+
: (p) => `touch "${p}"`;
|
|
12
|
+
const redirected = `${bashCommand} > "${paths.stdoutLog}" 2> "${paths.stderrLog}"`;
|
|
13
|
+
return (`${touch(paths.ackPath)} && ` +
|
|
14
|
+
`( ${redirected} && ${touch(paths.completedPath)} || ${touch(paths.failedPath)} )`);
|
|
15
|
+
}
|
|
7
16
|
/**
|
|
8
17
|
* Check if a binary is resolvable on the system PATH.
|
|
9
18
|
* On Windows, `spawn({shell:true})` always succeeds (launches cmd.exe),
|
|
10
19
|
* masking ENOENT for missing binaries. This pre-check catches that.
|
|
11
20
|
*/
|
|
12
|
-
function resolveBinaryOnPath(binary) {
|
|
21
|
+
export function resolveBinaryOnPath(binary) {
|
|
13
22
|
// Absolute or relative path — check directly
|
|
14
23
|
if (binary.includes('/') || binary.includes('\\')) {
|
|
15
24
|
return fs.existsSync(binary) ? binary : undefined;
|
|
@@ -90,48 +99,32 @@ export class CliExecutionAdapter {
|
|
|
90
99
|
const spawnExecutable = resolvedExecutable ?? invoke.executable;
|
|
91
100
|
const useShell = isWin32 && /\.(cmd|bat)$/i.test(spawnExecutable);
|
|
92
101
|
const needsStdin = invoke.promptDelivery === 'stdin_pipe' && invoke.promptText;
|
|
93
|
-
// pln#
|
|
94
|
-
//
|
|
95
|
-
//
|
|
96
|
-
//
|
|
97
|
-
//
|
|
102
|
+
// pln#520 step 4: when we ack-wrap, the SHELL redirects stdout/stderr to the
|
|
103
|
+
// per-assignment log files (fds passed via stdio are NOT inherited through
|
|
104
|
+
// the cmd.exe → .cmd → node shim — the empty-logs bug of can_f792cacd), and
|
|
105
|
+
// the wrapper emits completed/failed sentinels mechanically. So the spawned
|
|
106
|
+
// process just ignores stdout/stderr here. stdin stays a pipe when the
|
|
107
|
+
// prompt is delivered that way (the grouped agent command inherits it).
|
|
98
108
|
const useAckWrap = !!(options.assignmentId && (options.ackRoot ?? options.worktreePath));
|
|
99
|
-
let logFds;
|
|
100
|
-
if (useAckWrap) {
|
|
101
|
-
try {
|
|
102
|
-
const logRoot = options.ackRoot ?? options.worktreePath;
|
|
103
|
-
const logDir = path.join(logRoot, '.brainclaw', 'coordination', 'runtime', 'log');
|
|
104
|
-
fs.mkdirSync(logDir, { recursive: true });
|
|
105
|
-
logFds = {
|
|
106
|
-
stdout: fs.openSync(path.join(logDir, `${options.assignmentId}.stdout.log`), 'a'),
|
|
107
|
-
stderr: fs.openSync(path.join(logDir, `${options.assignmentId}.stderr.log`), 'a'),
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
catch {
|
|
111
|
-
// Log capture is best-effort — never block the spawn on logging issues.
|
|
112
|
-
logFds = undefined;
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
109
|
const stdinTarget = needsStdin ? 'pipe' : 'ignore';
|
|
116
|
-
const
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
//
|
|
120
|
-
//
|
|
121
|
-
//
|
|
122
|
-
//
|
|
123
|
-
// to call bclaw_assignment_update). When ackRoot/assignmentId are
|
|
124
|
-
// omitted, we keep the original direct-binary spawn.
|
|
110
|
+
const stdio = [stdinTarget, 'ignore', 'ignore'];
|
|
111
|
+
// pln#476 + pln#520 step 4: wrap the spawn so the worker shell touches the
|
|
112
|
+
// pre-exec `ack` sentinel, redirects logs at the shell level, and emits a
|
|
113
|
+
// completed/failed sentinel from the agent's exit code. waitForAssignmentHandshake
|
|
114
|
+
// checks the ack file; the reconciler trusts the completed/failed/heartbeat
|
|
115
|
+
// sentinels rather than the (untrustworthy) wrapper pid. When ackRoot/
|
|
116
|
+
// assignmentId are omitted, we keep the original direct-binary spawn.
|
|
125
117
|
let child;
|
|
126
118
|
if (useAckWrap) {
|
|
127
|
-
const
|
|
128
|
-
|
|
129
|
-
const
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
:
|
|
134
|
-
|
|
119
|
+
const signalRoot = options.ackRoot ?? options.worktreePath;
|
|
120
|
+
ensureRuntimeDirs(signalRoot);
|
|
121
|
+
const wrappedCmd = buildAckWrapCommand(invoke.bashCommand, {
|
|
122
|
+
ackPath: getRuntimeSignalPath(signalRoot, options.assignmentId, 'ack'),
|
|
123
|
+
completedPath: getRuntimeSignalPath(signalRoot, options.assignmentId, 'completed'),
|
|
124
|
+
failedPath: getRuntimeSignalPath(signalRoot, options.assignmentId, 'failed'),
|
|
125
|
+
stdoutLog: getRuntimeLogPath(signalRoot, options.assignmentId, 'stdout'),
|
|
126
|
+
stderrLog: getRuntimeLogPath(signalRoot, options.assignmentId, 'stderr'),
|
|
127
|
+
}, isWin32);
|
|
135
128
|
child = spawn(wrappedCmd, [], {
|
|
136
129
|
detached: !isWin32,
|
|
137
130
|
shell: true,
|
|
@@ -163,18 +156,6 @@ export class CliExecutionAdapter {
|
|
|
163
156
|
child.stdin.end();
|
|
164
157
|
}
|
|
165
158
|
child.unref();
|
|
166
|
-
// Close the parent's copies of the log file descriptors. The child has its
|
|
167
|
-
// own dup'd copies and will keep writing to them after we return.
|
|
168
|
-
if (logFds) {
|
|
169
|
-
try {
|
|
170
|
-
fs.closeSync(logFds.stdout);
|
|
171
|
-
}
|
|
172
|
-
catch { /* best-effort */ }
|
|
173
|
-
try {
|
|
174
|
-
fs.closeSync(logFds.stderr);
|
|
175
|
-
}
|
|
176
|
-
catch { /* best-effort */ }
|
|
177
|
-
}
|
|
178
159
|
const pid = child.pid;
|
|
179
160
|
if (!pid) {
|
|
180
161
|
throw new Error(`Failed to spawn agent ${options.agent}: no PID returned`);
|
package/dist/core/execution.js
CHANGED
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
* @module
|
|
9
9
|
*/
|
|
10
10
|
import fs from 'node:fs';
|
|
11
|
-
import
|
|
12
|
-
import {
|
|
11
|
+
import { resolveConcurrencyLimit, resolveResourceKey } from './agent-capability.js';
|
|
12
|
+
import { getRuntimeSignalPath } from './runtime-signals.js';
|
|
13
13
|
import { appendAuditEntry } from './audit.js';
|
|
14
14
|
import { loadAllSessions } from './identity.js';
|
|
15
15
|
import { loadConfig } from './config.js';
|
|
@@ -30,7 +30,7 @@ function sleep(ms) {
|
|
|
30
30
|
* spawn anyway).
|
|
31
31
|
*/
|
|
32
32
|
export function getAssignmentAckPath(cwd, assignmentId) {
|
|
33
|
-
return
|
|
33
|
+
return getRuntimeSignalPath(cwd, assignmentId, 'ack');
|
|
34
34
|
}
|
|
35
35
|
function isAssignmentAcked(assignmentId, cwd) {
|
|
36
36
|
// Fast path: the brief-ack sentinel was written by the worker shell.
|
|
@@ -73,9 +73,13 @@ export function checkActiveInstance(agentName, cwd) {
|
|
|
73
73
|
catch { /* use default */ }
|
|
74
74
|
const SESSION_STALE_MS = parseDurationMs(ttlStr);
|
|
75
75
|
const now = Date.now();
|
|
76
|
+
// pln#520 step 3: pool active sessions by host-binary resource so all
|
|
77
|
+
// identities of one binary (e.g. claude-code + claude-sonnet → `claude`)
|
|
78
|
+
// count together against a shared cap.
|
|
79
|
+
const targetResource = resolveResourceKey(agentName);
|
|
76
80
|
const activeSessions = [];
|
|
77
81
|
for (const session of sessions) {
|
|
78
|
-
if (session.agent !==
|
|
82
|
+
if (resolveResourceKey(session.agent) !== targetResource)
|
|
79
83
|
continue;
|
|
80
84
|
const lastSeen = new Date(session.last_seen_at).getTime();
|
|
81
85
|
if (isNaN(lastSeen))
|
|
@@ -84,18 +88,20 @@ export function checkActiveInstance(agentName, cwd) {
|
|
|
84
88
|
activeSessions.push(session.session_id);
|
|
85
89
|
}
|
|
86
90
|
}
|
|
87
|
-
|
|
88
|
-
|
|
91
|
+
// Limit resolved from the chain (default unlimited for parallelizable CLI
|
|
92
|
+
// agents; structural floor for non-spawnable IDE agents). Infinity → no cap.
|
|
93
|
+
const maxAllowed = resolveConcurrencyLimit(agentName);
|
|
89
94
|
const activeCount = activeSessions.length;
|
|
90
95
|
const canSpawnMore = activeCount < maxAllowed;
|
|
96
|
+
const capLabel = Number.isFinite(maxAllowed) ? String(maxAllowed) : '∞';
|
|
91
97
|
return {
|
|
92
98
|
active: !canSpawnMore, // backward compat: active=true means "cannot spawn more"
|
|
93
99
|
canSpawnMore,
|
|
94
100
|
activeCount,
|
|
95
101
|
maxAllowed,
|
|
96
102
|
reason: canSpawnMore
|
|
97
|
-
? `Agent ${agentName} has capacity (${activeCount}/${
|
|
98
|
-
: `Agent ${agentName} at capacity (${activeCount}/${
|
|
103
|
+
? `Agent ${agentName} has capacity (${activeCount}/${capLabel} slots used)`
|
|
104
|
+
: `Agent ${agentName} at capacity (${activeCount}/${capLabel} slots used)`,
|
|
99
105
|
activeSessions,
|
|
100
106
|
};
|
|
101
107
|
}
|
|
@@ -382,21 +382,22 @@ function renderAvailableTools() {
|
|
|
382
382
|
'- `bclaw_remove(entity, id, purge?)` — soft-delete (or purge)',
|
|
383
383
|
'- `bclaw_transition(entity, id, to)` — change status (e.g. plan todo→in_progress→done)',
|
|
384
384
|
'',
|
|
385
|
-
'Entities supported by the grammar: plan, decision, constraint, trap, handoff, runtime_note, candidate, claim, action, assignment, agent_run.',
|
|
385
|
+
'Entities supported by the grammar: plan, decision, constraint, trap, handoff, runtime_note, candidate, sequence, claim, action, assignment, agent_run.',
|
|
386
386
|
'',
|
|
387
387
|
'**Cross-project access (pln#359):** every canonical-grammar call, `bclaw_context`, and `bclaw_coordinate` accept an optional `project: <name>` argument that routes the operation to a linked project (cross_project_links from `brainclaw link list` OR a workspace store-chain child). Identity is sourced from the caller; writes + audit land in the target. Unknown project names throw — no silent fallback. The CLI exposes the same as `--project <name>` (mutually exclusive with `--cwd`). Example: `bclaw_get(entity="trap", id="trp#36", project="brainclaw-site")`. Cross-project `bclaw_coordinate` is inbox-only — auto-spawn is force-disabled because the spawn cwd / worktree are tied to the target repo; the target agent picks the brief up async via its own `bclaw_work`.',
|
|
388
388
|
'',
|
|
389
389
|
'**Session + claims:** `bclaw_session_start`, `bclaw_session_end`, `bclaw_claim`, `bclaw_release_claim`',
|
|
390
390
|
'**Plan steps:** `bclaw_add_step`, `bclaw_complete_step`, `bclaw_update_step`, `bclaw_delete_step`',
|
|
391
|
+
'**Sequences:** `bclaw_list_sequences`, `bclaw_create_sequence`, `bclaw_update_sequence`, `bclaw_delete_sequence` — create/activate ordered lanes for parallel dispatch. Item shape: `{ planId, stepId?, rank, hard_after?, soft_after?, lane?, scope_hint?, rationale? }`.',
|
|
391
392
|
'**Inbox + handoffs:** `bclaw_read_inbox`, `bclaw_ack_message`, `bclaw_send_message`, `bclaw_correct_handoff`',
|
|
392
393
|
'**Notes + search:** `bclaw_write_note`, `bclaw_quick_capture`, `bclaw_search`',
|
|
393
394
|
'**Escalation (orchestrator path):**',
|
|
394
395
|
'- Review / consult / assign another agent → `bclaw_coordinate(intent=review|consult|assign)` (use `open_loop=true` on review to also dispatch the reviewer turn)',
|
|
395
|
-
'- Parallel execute across a sequence\'s lanes → `bclaw_dispatch(intent=execute)`',
|
|
396
|
+
'- Parallel execute across a sequence\'s lanes → create/update an active sequence, then `bclaw_dispatch(intent=analysis)` and `bclaw_dispatch(intent=execute)`',
|
|
396
397
|
'- Drive your turn in an already-opened loop → `bclaw_loop(intent=turn|complete_turn|advance|close)`',
|
|
397
398
|
'**Setup + navigation:** `bclaw_setup`, `bclaw_bootstrap`, `bclaw_switch`, `bclaw_release_notes`',
|
|
398
399
|
'',
|
|
399
|
-
'Legacy per-entity tools (`bclaw_list_plans`, `bclaw_accept`, `bclaw_get_context`, `bclaw_dispatch_review`, …) were removed from the catalog at v1.0 — direct calls still succeed as a migration escape hatch but emit a redirect warning. See `docs/integrations/mcp.md` + `docs/concepts/mcp-governance.md` for the full catalog and stability contract
|
|
400
|
+
'Legacy per-entity tools (`bclaw_list_plans`, `bclaw_accept`, `bclaw_get_context`, `bclaw_dispatch_review`, …) were removed from the catalog at v1.0 — direct calls still succeed as a migration escape hatch but emit a redirect warning. See `docs/integrations/mcp.md` + `docs/concepts/mcp-governance.md` for the full catalog and stability contract.',
|
|
400
401
|
].join('\n');
|
|
401
402
|
}
|
|
402
403
|
// ─── Live section renderers ─────────────────────────────────────────────────
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Runtime spawn signals (pln#520 steps 1 + 4) — the file-based, zero-MCP
|
|
3
|
+
* liveness channel between a dispatched worker and brainclaw.
|
|
4
|
+
*
|
|
5
|
+
* Why files, not the tracked pid: on Windows the ack-wrap spawn runs under
|
|
6
|
+
* `shell:true`, so `child.pid` is the cmd.exe wrapper (which dies early),
|
|
7
|
+
* NOT the real worker (cmd.exe → claude.cmd → node.exe). Reading that pid as
|
|
8
|
+
* dead produced false-negative `pid_dead_at_read` cancellations while the
|
|
9
|
+
* worker was alive and committing (can_f792cacd: 6 workers cancelled, then
|
|
10
|
+
* committed 4-7 min later). The fix is to stop trusting the wrapper pid and
|
|
11
|
+
* trust sentinels the worker / wrapper actually write:
|
|
12
|
+
*
|
|
13
|
+
* - `ack` — pre-exec; the spawn shell touched it BEFORE the agent ran
|
|
14
|
+
* (pln#476). Proves delivery, NOT that work started.
|
|
15
|
+
* - `heartbeat` — the worker writes `work_loop_reached{run_id,nonce}` as its
|
|
16
|
+
* FIRST action (step 0 of the generated brief) and refreshes
|
|
17
|
+
* it periodically. Distinct from `ack`: this is what flips
|
|
18
|
+
* execution_status to `started`.
|
|
19
|
+
* - `completed` / `failed` — emitted MECHANICALLY by the spawn wrapper
|
|
20
|
+
* (`agentcmd && completed || failed`) so a dead wrapper pid
|
|
21
|
+
* is never misread as a silent failure.
|
|
22
|
+
*
|
|
23
|
+
* All paths are absolute under the project coordination dir so a worker in a
|
|
24
|
+
* worktree (or a sandboxed agent without MCP) can write them with a plain
|
|
25
|
+
* shell redirect.
|
|
26
|
+
*
|
|
27
|
+
* @module
|
|
28
|
+
*/
|
|
29
|
+
import fs from 'node:fs';
|
|
30
|
+
import path from 'node:path';
|
|
31
|
+
function runtimeDir(root) {
|
|
32
|
+
return path.join(root, '.brainclaw', 'coordination', 'runtime');
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Absolute path for a runtime signal sentinel. `ack` keeps its historical
|
|
36
|
+
* `runtime/ack/<id>.ack` location (pln#476); the liveness signals live under
|
|
37
|
+
* `runtime/signal/<id>.<signal>`.
|
|
38
|
+
*/
|
|
39
|
+
export function getRuntimeSignalPath(root, assignmentId, signal) {
|
|
40
|
+
if (signal === 'ack') {
|
|
41
|
+
return path.join(runtimeDir(root), 'ack', `${assignmentId}.ack`);
|
|
42
|
+
}
|
|
43
|
+
return path.join(runtimeDir(root), 'signal', `${assignmentId}.${signal}`);
|
|
44
|
+
}
|
|
45
|
+
/** Absolute path for a captured stream log (`runtime/log/<id>.{stdout,stderr}.log`). */
|
|
46
|
+
export function getRuntimeLogPath(root, assignmentId, stream) {
|
|
47
|
+
return path.join(runtimeDir(root), 'log', `${assignmentId}.${stream}.log`);
|
|
48
|
+
}
|
|
49
|
+
/** Ensure the ack / signal / log directories exist (best-effort, recursive). */
|
|
50
|
+
export function ensureRuntimeDirs(root) {
|
|
51
|
+
const base = runtimeDir(root);
|
|
52
|
+
for (const sub of ['ack', 'signal', 'log']) {
|
|
53
|
+
fs.mkdirSync(path.join(base, sub), { recursive: true });
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
export function signalExists(root, assignmentId, signal) {
|
|
57
|
+
try {
|
|
58
|
+
return fs.existsSync(getRuntimeSignalPath(root, assignmentId, signal));
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Read the heartbeat sentinel. The body is expected to be
|
|
66
|
+
* `work_loop_reached{run_id,nonce}` JSON, but a bare `touch` (empty file) still
|
|
67
|
+
* counts as a heartbeat — the mtime alone is a valid life-sign.
|
|
68
|
+
*/
|
|
69
|
+
export function readHeartbeat(root, assignmentId) {
|
|
70
|
+
const p = getRuntimeSignalPath(root, assignmentId, 'heartbeat');
|
|
71
|
+
try {
|
|
72
|
+
const stat = fs.statSync(p);
|
|
73
|
+
const info = { exists: true, mtimeMs: stat.mtimeMs };
|
|
74
|
+
try {
|
|
75
|
+
const raw = fs.readFileSync(p, 'utf-8').trim();
|
|
76
|
+
if (raw) {
|
|
77
|
+
const parsed = JSON.parse(raw);
|
|
78
|
+
if (typeof parsed.run_id === 'string')
|
|
79
|
+
info.runId = parsed.run_id;
|
|
80
|
+
if (typeof parsed.nonce === 'string')
|
|
81
|
+
info.nonce = parsed.nonce;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
catch { /* empty / non-JSON body — mtime still counts */ }
|
|
85
|
+
return info;
|
|
86
|
+
}
|
|
87
|
+
catch {
|
|
88
|
+
return { exists: false };
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
/** Read the tail of a captured stream log (for failed_silent diagnostics). */
|
|
92
|
+
export function readLogTail(root, assignmentId, stream, maxBytes = 2000) {
|
|
93
|
+
try {
|
|
94
|
+
const p = getRuntimeLogPath(root, assignmentId, stream);
|
|
95
|
+
const content = fs.readFileSync(p, 'utf-8');
|
|
96
|
+
return content.length > maxBytes ? content.slice(content.length - maxBytes) : content;
|
|
97
|
+
}
|
|
98
|
+
catch {
|
|
99
|
+
return '';
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
//# sourceMappingURL=runtime-signals.js.map
|