brainclaw 1.5.5 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainclaw-vscode.vsix +0 -0
- package/dist/cli.js +124 -7
- package/dist/commands/bootstrap-loop.js +206 -0
- package/dist/commands/loop.js +156 -0
- package/dist/commands/loops-handlers.js +110 -55
- package/dist/commands/mcp-read-handlers.js +37 -0
- package/dist/commands/mcp.js +621 -202
- package/dist/commands/questions.js +180 -0
- package/dist/commands/reply.js +190 -0
- package/dist/commands/session-end.js +105 -3
- package/dist/commands/session-start.js +32 -53
- package/dist/commands/switch.js +17 -1
- package/dist/core/agentrun-reconciler.js +65 -0
- package/dist/core/claims.js +29 -0
- package/dist/core/dispatch-status.js +219 -0
- package/dist/core/entity-operations.js +128 -9
- package/dist/core/execution-adapters.js +38 -2
- package/dist/core/facade-schema.js +55 -0
- package/dist/core/federation-cloud.js +27 -12
- package/dist/core/federation-materialize.js +57 -0
- package/dist/core/instruction-templates.js +2 -0
- package/dist/core/loops/bootstrap-acquire.js +195 -0
- package/dist/core/loops/facade-schema.js +68 -1
- package/dist/core/loops/hooks/bootstrap-write.js +144 -0
- package/dist/core/loops/hooks/notify-operator.js +148 -0
- package/dist/core/loops/hooks/survey-source-reader.js +256 -0
- package/dist/core/loops/index.js +8 -2
- package/dist/core/loops/next-expected.js +63 -0
- package/dist/core/loops/presets/bootstrap.js +75 -0
- package/dist/core/loops/presets/index.js +16 -0
- package/dist/core/loops/store.js +224 -4
- package/dist/core/loops/types.js +346 -1
- package/dist/core/loops/verbs.js +739 -6
- package/dist/core/schema.js +28 -2
- package/dist/core/state.js +62 -0
- package/dist/facts.js +7 -5
- package/dist/facts.json +6 -4
- package/docs/concepts/dispatch-lifecycle.md +228 -0
- package/docs/concepts/loop-engine.md +55 -0
- package/docs/concepts/multi-agent-workflows.md +167 -166
- package/docs/concepts/troubleshooting.md +10 -2
- package/docs/integrations/overview.md +14 -12
- package/package.json +1 -1
|
@@ -50,6 +50,8 @@ export const DEFAULT_HEALTH_CHECK_GRACE_MS = 60_000;
|
|
|
50
50
|
* declared `failed` with `silent_termination_no_evidence`. Default 30 min.
|
|
51
51
|
*/
|
|
52
52
|
export const DEFAULT_STALE_AFTER_MS = 30 * 60_000;
|
|
53
|
+
export const DEFAULT_DEAD_PID_READ_SWEEP_AGE_MS = 5 * 60_000;
|
|
54
|
+
export const DEFAULT_DEAD_PID_READ_SWEEP_LIMIT = 50;
|
|
53
55
|
const TERMINAL_STATUSES = new Set([
|
|
54
56
|
'completed', 'failed', 'cancelled', 'timed_out', 'interrupted',
|
|
55
57
|
]);
|
|
@@ -309,6 +311,69 @@ export function reconcileAgentRun(runId, cwd, options = {}) {
|
|
|
309
311
|
evidence, previous_status, current_status: run.status,
|
|
310
312
|
};
|
|
311
313
|
}
|
|
314
|
+
export function reconcileDeadPidRunningAgentRunAtRead(runId, cwd, options = {}) {
|
|
315
|
+
const run = loadAgentRun(runId, cwd);
|
|
316
|
+
if (!run) {
|
|
317
|
+
const evidence = {
|
|
318
|
+
age_ms: 0, has_post_start_commit: false, claim_released: false,
|
|
319
|
+
assignment_completed: false, process_alive: undefined,
|
|
320
|
+
};
|
|
321
|
+
return {
|
|
322
|
+
run_id: runId, action: 'no_op', reason: 'run not found', evidence,
|
|
323
|
+
previous_status: 'created', current_status: 'created',
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
const evidence = collectEvidence(run, cwd, { nowMs: options.nowMs });
|
|
327
|
+
if (run.status !== 'running') {
|
|
328
|
+
return {
|
|
329
|
+
run_id: run.id, action: 'no_op', reason: `run status is ${run.status}, not running`,
|
|
330
|
+
evidence, previous_status: run.status, current_status: run.status,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
if (evidence.process_alive !== false) {
|
|
334
|
+
return {
|
|
335
|
+
run_id: run.id, action: 'no_op',
|
|
336
|
+
reason: evidence.process_alive === true ? 'process alive' : 'pid liveness unknown',
|
|
337
|
+
evidence, previous_status: run.status, current_status: run.status,
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
try {
|
|
341
|
+
transitionAgentRun(run.id, 'cancelled', {
|
|
342
|
+
actor: options.actor ?? 'reconciler',
|
|
343
|
+
status_reason: 'pid_dead_at_read',
|
|
344
|
+
}, cwd);
|
|
345
|
+
return {
|
|
346
|
+
run_id: run.id, action: 'cancelled_dead_pid', reason: 'pid_dead_at_read',
|
|
347
|
+
evidence, previous_status: run.status, current_status: 'cancelled',
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
catch (err) {
|
|
351
|
+
return {
|
|
352
|
+
run_id: run.id, action: 'no_op',
|
|
353
|
+
reason: `cancel transition rejected: ${err instanceof Error ? err.message : String(err)}`,
|
|
354
|
+
evidence, previous_status: run.status, current_status: run.status,
|
|
355
|
+
};
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
export function sweepDeadPidRunningAgentRunsAtRead(cwd, options = {}) {
|
|
359
|
+
const now = options.nowMs ?? Date.now();
|
|
360
|
+
const minAgeMs = options.staleAfterMs ?? DEFAULT_DEAD_PID_READ_SWEEP_AGE_MS;
|
|
361
|
+
const cutoff = now - minAgeMs;
|
|
362
|
+
const limit = options.limit ?? DEFAULT_DEAD_PID_READ_SWEEP_LIMIT;
|
|
363
|
+
const candidates = listAgentRuns(cwd, { status: 'running' })
|
|
364
|
+
.filter((run) => {
|
|
365
|
+
const lastEventAt = run.last_event_at ?? run.started_at ?? run.created_at;
|
|
366
|
+
const ts = new Date(lastEventAt).getTime();
|
|
367
|
+
return Number.isFinite(ts) && ts <= cutoff;
|
|
368
|
+
})
|
|
369
|
+
.sort((left, right) => {
|
|
370
|
+
const leftTs = new Date(left.last_event_at ?? left.started_at ?? left.created_at).getTime();
|
|
371
|
+
const rightTs = new Date(right.last_event_at ?? right.started_at ?? right.created_at).getTime();
|
|
372
|
+
return rightTs - leftTs;
|
|
373
|
+
})
|
|
374
|
+
.slice(0, limit);
|
|
375
|
+
return candidates.map((run) => reconcileDeadPidRunningAgentRunAtRead(run.id, cwd, options));
|
|
376
|
+
}
|
|
312
377
|
/**
|
|
313
378
|
* Reconcile every non-terminal agent_run matching `filter`. Useful for
|
|
314
379
|
* batch sweeps from `bclaw_assignment_events` or `brainclaw doctor --dispatch`.
|
package/dist/core/claims.js
CHANGED
|
@@ -59,6 +59,35 @@ export function saveClaim(claim, cwd) {
|
|
|
59
59
|
catch { /* best-effort */ }
|
|
60
60
|
});
|
|
61
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* Atomically check for an active claim on `scope` and save a new one if absent.
|
|
64
|
+
*
|
|
65
|
+
* Atomicity is provided by running both operations inside a single mutate() call;
|
|
66
|
+
* the mutation-pipeline mutex serializes filesystem writes on the claims store.
|
|
67
|
+
*/
|
|
68
|
+
export function acquireClaimScope(input, cwd) {
|
|
69
|
+
return mutate({ cwd }, () => {
|
|
70
|
+
const conflictingClaim = listClaims(cwd).find((claim) => claim.status === 'active' && claim.scope === input.scope);
|
|
71
|
+
if (conflictingClaim) {
|
|
72
|
+
return { acquired: false, conflicting_claim: conflictingClaim };
|
|
73
|
+
}
|
|
74
|
+
const claim = {
|
|
75
|
+
id: generateClaimId(),
|
|
76
|
+
agent: input.agent,
|
|
77
|
+
agent_id: input.agent_id,
|
|
78
|
+
user: input.user,
|
|
79
|
+
session_id: input.session_id,
|
|
80
|
+
scope: input.scope,
|
|
81
|
+
description: input.description,
|
|
82
|
+
created_at: nowISO(),
|
|
83
|
+
status: 'active',
|
|
84
|
+
plan_id: input.plan_id,
|
|
85
|
+
model: input.model,
|
|
86
|
+
};
|
|
87
|
+
saveClaim(claim, cwd);
|
|
88
|
+
return { acquired: true, claim };
|
|
89
|
+
});
|
|
90
|
+
}
|
|
62
91
|
export function loadClaim(id, cwd) {
|
|
63
92
|
return claimStore(cwd).load(id);
|
|
64
93
|
}
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Consolidated dispatch status (pln#503 phase 3.1).
|
|
3
|
+
*
|
|
4
|
+
* Resolves a single dispatch reference (`asgn_…`, `clm_…`, `lop_…`, `run_…`)
|
|
5
|
+
* into the full set of linked entities — assignment, claim, loop, agent_run —
|
|
6
|
+
* plus on-disk artefacts (brief-ack sentinel, stdout/stderr logs) and a
|
|
7
|
+
* pid-liveness check, then computes a health verdict + a recommended next
|
|
8
|
+
* action for the caller.
|
|
9
|
+
*
|
|
10
|
+
* The motivating use case: an agent who just called `bclaw_coordinate` and
|
|
11
|
+
* got `execution_status: "delivered_and_started"` should be able to verify
|
|
12
|
+
* the dispatch is alive without running five separate `bclaw_find` calls.
|
|
13
|
+
* The `verify_with` hint added in phase 3.3 already points callers at this
|
|
14
|
+
* tool by name.
|
|
15
|
+
*
|
|
16
|
+
* See docs/concepts/dispatch-lifecycle.md for the full entity-relationship
|
|
17
|
+
* and FSM model.
|
|
18
|
+
*
|
|
19
|
+
* @module
|
|
20
|
+
*/
|
|
21
|
+
import fs from 'node:fs';
|
|
22
|
+
import path from 'node:path';
|
|
23
|
+
import { loadAssignment, listAssignments } from './assignments.js';
|
|
24
|
+
import { loadAgentRun, listAgentRuns } from './agentruns.js';
|
|
25
|
+
import { loadClaim } from './claims.js';
|
|
26
|
+
import { getLoop, listLoops } from './loops/store.js';
|
|
27
|
+
import { isProcessAlive } from './agentrun-reconciler.js';
|
|
28
|
+
const DEFAULT_TAIL = 20;
|
|
29
|
+
const DEFAULT_STALL_MS = 5 * 60_000;
|
|
30
|
+
// ── Internal helpers ──────────────────────────────────────────────────────
|
|
31
|
+
function readLogTail(filePath, lines) {
|
|
32
|
+
try {
|
|
33
|
+
const stat = fs.statSync(filePath);
|
|
34
|
+
if (lines <= 0) {
|
|
35
|
+
return { path: filePath, exists: true, size_bytes: stat.size };
|
|
36
|
+
}
|
|
37
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
38
|
+
const all = content.split(/\r?\n/);
|
|
39
|
+
// Strip trailing empty line from final \n
|
|
40
|
+
if (all.length > 0 && all[all.length - 1] === '')
|
|
41
|
+
all.pop();
|
|
42
|
+
return {
|
|
43
|
+
path: filePath,
|
|
44
|
+
exists: true,
|
|
45
|
+
size_bytes: stat.size,
|
|
46
|
+
tail: all.slice(-lines),
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
return { path: filePath, exists: false, size_bytes: 0 };
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
function findLoopByAssignmentId(assignmentId, cwd) {
|
|
54
|
+
for (const loop of listLoops({}, cwd)) {
|
|
55
|
+
if (loop.slots.some((s) => s.assignment_id === assignmentId))
|
|
56
|
+
return loop;
|
|
57
|
+
}
|
|
58
|
+
return undefined;
|
|
59
|
+
}
|
|
60
|
+
function resolveTarget(targetId, cwd) {
|
|
61
|
+
if (targetId.startsWith('asgn_')) {
|
|
62
|
+
return { resolved_from: 'assignment_id', assignment_id: targetId };
|
|
63
|
+
}
|
|
64
|
+
if (targetId.startsWith('run_')) {
|
|
65
|
+
const run = loadAgentRun(targetId, cwd);
|
|
66
|
+
if (run)
|
|
67
|
+
return { resolved_from: 'run_id', assignment_id: run.assignment_id, agent_run: run };
|
|
68
|
+
return { resolved_from: 'unresolved' };
|
|
69
|
+
}
|
|
70
|
+
if (targetId.startsWith('clm_')) {
|
|
71
|
+
const assignments = listAssignments(cwd, { claim_id: targetId });
|
|
72
|
+
if (assignments.length > 0) {
|
|
73
|
+
// Pick the most recent assignment for this claim (latest created_at).
|
|
74
|
+
const recent = [...assignments].sort((a, b) => b.created_at.localeCompare(a.created_at))[0];
|
|
75
|
+
return { resolved_from: 'claim_id', assignment_id: recent.id };
|
|
76
|
+
}
|
|
77
|
+
return { resolved_from: 'claim_id' };
|
|
78
|
+
}
|
|
79
|
+
if (targetId.startsWith('lop_')) {
|
|
80
|
+
const loop = getLoop(targetId, cwd);
|
|
81
|
+
if (loop) {
|
|
82
|
+
// Prefer the slot in the current_phase with an assignment_id; fall back
|
|
83
|
+
// to any slot's assignment_id.
|
|
84
|
+
const phaseSlot = loop.slots.find((s) => s.phase === loop.current_phase && s.assignment_id);
|
|
85
|
+
const anySlot = loop.slots.find((s) => s.assignment_id);
|
|
86
|
+
const slot = phaseSlot ?? anySlot;
|
|
87
|
+
if (slot?.assignment_id) {
|
|
88
|
+
return { resolved_from: 'loop_id', assignment_id: slot.assignment_id };
|
|
89
|
+
}
|
|
90
|
+
return { resolved_from: 'loop_id' };
|
|
91
|
+
}
|
|
92
|
+
return { resolved_from: 'unresolved' };
|
|
93
|
+
}
|
|
94
|
+
return { resolved_from: 'unresolved' };
|
|
95
|
+
}
|
|
96
|
+
const TERMINAL_RUN_STATUSES = new Set([
|
|
97
|
+
'completed', 'failed', 'cancelled', 'timed_out', 'interrupted',
|
|
98
|
+
]);
|
|
99
|
+
function computeDiagnosis(assignment, agentRun, runtime, options) {
|
|
100
|
+
if (!assignment && !agentRun) {
|
|
101
|
+
return {
|
|
102
|
+
health: 'unknown',
|
|
103
|
+
summary: 'target_id did not resolve to any assignment or agent_run',
|
|
104
|
+
recommended_next_action: 'Verify the target_id is correct (asgn_/clm_/lop_/run_). Use bclaw_find(entity="assignment") to list available assignments.',
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
if (!agentRun) {
|
|
108
|
+
return {
|
|
109
|
+
health: 'not_dispatched',
|
|
110
|
+
summary: `assignment exists (status=${assignment?.status}) but no agent_run record — the spawn never produced a process, or the assignment is still waiting to be picked up`,
|
|
111
|
+
recommended_next_action: assignment?.status === 'offered'
|
|
112
|
+
? 'Wait for the target agent to accept, or reroute via bclaw_coordinate(intent="reroute", scope=…).'
|
|
113
|
+
: 'Re-dispatch with bclaw_coordinate or check for an earlier spawn error.',
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
if (TERMINAL_RUN_STATUSES.has(agentRun.status)) {
|
|
117
|
+
const isSuccess = agentRun.status === 'completed';
|
|
118
|
+
return {
|
|
119
|
+
health: 'terminal',
|
|
120
|
+
summary: `agent_run already terminal (status=${agentRun.status})${agentRun.status_reason ? `: ${agentRun.status_reason}` : ''}`,
|
|
121
|
+
recommended_next_action: isSuccess
|
|
122
|
+
? 'Harvest artifacts and move on; if the assignment is still open, transition it to completed.'
|
|
123
|
+
: 'Read stderr log (path in runtime.log_files.stderr) for the failure detail; reroute or retry if appropriate.',
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
// status is running / launching / waiting_input / blocked → check liveness
|
|
127
|
+
const lastEventMs = new Date(agentRun.last_event_at ?? agentRun.started_at ?? agentRun.created_at).getTime();
|
|
128
|
+
const stallAge = options.nowMs - lastEventMs;
|
|
129
|
+
if (runtime.pid_alive === false) {
|
|
130
|
+
return {
|
|
131
|
+
health: 'silent_death',
|
|
132
|
+
summary: `agent_run.status="${agentRun.status}" but pid ${runtime.pid} is dead — worker exited without self-reporting; lazy reconciler will mark it failed after the stale window (default 30min)`,
|
|
133
|
+
recommended_next_action: 'Read .stderr.log for the exit reason; then trigger reconciliation by calling bclaw_find(entity="agent_run") again, or cancel + reroute.',
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
if (runtime.pid_alive === true && stallAge > options.stallMs) {
|
|
137
|
+
return {
|
|
138
|
+
health: 'stalled',
|
|
139
|
+
summary: `agent_run alive (pid=${runtime.pid}) but no activity for ${Math.round(stallAge / 1000)}s; last_event_at=${agentRun.last_event_at ?? '(never)'}`,
|
|
140
|
+
recommended_next_action: 'Tail the stdout/stderr log to see whether the worker is doing useful work; if truly hung, kill the pid and reroute.',
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
if (runtime.pid_alive === true) {
|
|
144
|
+
return {
|
|
145
|
+
health: 'healthy',
|
|
146
|
+
summary: `agent_run.status="${agentRun.status}", pid ${runtime.pid} alive, last activity ${Math.round(stallAge / 1000)}s ago`,
|
|
147
|
+
recommended_next_action: 'No action — the dispatch is alive and recent. Re-check periodically until terminal.',
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
// pid_alive undefined → cannot determine (no pid tracked, or signal failed)
|
|
151
|
+
return {
|
|
152
|
+
health: 'unknown',
|
|
153
|
+
summary: `agent_run.status="${agentRun.status}" but pid liveness could not be determined (pid=${agentRun.pid ?? '(none)'})`,
|
|
154
|
+
recommended_next_action: 'Read the stdout/stderr log for life signs; or wait for the lazy reconciler to converge based on commit / claim / assignment evidence.',
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
// ── Public API ─────────────────────────────────────────────────────────────
|
|
158
|
+
export function getDispatchStatus(options) {
|
|
159
|
+
const cwd = options.cwd;
|
|
160
|
+
const tailLines = options.tail_log_lines ?? DEFAULT_TAIL;
|
|
161
|
+
const stallMs = options.stall_threshold_ms ?? DEFAULT_STALL_MS;
|
|
162
|
+
const nowMs = options.nowMs ?? Date.now();
|
|
163
|
+
const resolved = resolveTarget(options.target_id, cwd);
|
|
164
|
+
const assignmentId = resolved.assignment_id;
|
|
165
|
+
const assignment = assignmentId ? loadAssignment(assignmentId, cwd) : undefined;
|
|
166
|
+
const claim = assignment?.claim_id ? loadClaim(assignment.claim_id, cwd) : undefined;
|
|
167
|
+
// Prefer the pre-resolved agent_run (when target_id was a run_…); otherwise
|
|
168
|
+
// look up by assignment_id and pick the most recent attempt.
|
|
169
|
+
let agentRun = resolved.agent_run;
|
|
170
|
+
if (!agentRun && assignmentId) {
|
|
171
|
+
const runs = listAgentRuns(cwd, { assignment_id: assignmentId });
|
|
172
|
+
agentRun = [...runs].sort((a, b) => b.created_at.localeCompare(a.created_at))[0];
|
|
173
|
+
}
|
|
174
|
+
let loop;
|
|
175
|
+
if (resolved.resolved_from === 'loop_id') {
|
|
176
|
+
loop = getLoop(options.target_id, cwd);
|
|
177
|
+
}
|
|
178
|
+
else if (assignmentId) {
|
|
179
|
+
loop = findLoopByAssignmentId(assignmentId, cwd);
|
|
180
|
+
}
|
|
181
|
+
// Runtime artefacts (ack file + log files) — all under the project's
|
|
182
|
+
// coordination root. Use the cwd or the runtime cwd as the anchor; the
|
|
183
|
+
// dispatcher writes them under cwd/.brainclaw/coordination/runtime/...
|
|
184
|
+
const projectRoot = cwd ?? process.cwd();
|
|
185
|
+
const runtimeRoot = path.join(projectRoot, '.brainclaw', 'coordination', 'runtime');
|
|
186
|
+
const ackPath = assignmentId ? path.join(runtimeRoot, 'ack', `${assignmentId}.ack`) : undefined;
|
|
187
|
+
const stdoutPath = assignmentId ? path.join(runtimeRoot, 'log', `${assignmentId}.stdout.log`) : undefined;
|
|
188
|
+
const stderrPath = assignmentId ? path.join(runtimeRoot, 'log', `${assignmentId}.stderr.log`) : undefined;
|
|
189
|
+
const runtime = {
|
|
190
|
+
pid: agentRun?.pid,
|
|
191
|
+
pid_alive: isProcessAlive(agentRun?.pid),
|
|
192
|
+
ack_file: {
|
|
193
|
+
exists: ackPath ? fs.existsSync(ackPath) : false,
|
|
194
|
+
path: ackPath,
|
|
195
|
+
},
|
|
196
|
+
log_files: {
|
|
197
|
+
stdout: stdoutPath ? readLogTail(stdoutPath, tailLines) : undefined,
|
|
198
|
+
stderr: stderrPath ? readLogTail(stderrPath, tailLines) : undefined,
|
|
199
|
+
},
|
|
200
|
+
};
|
|
201
|
+
const diagnosis = computeDiagnosis(assignment, agentRun, runtime, { stallMs, nowMs });
|
|
202
|
+
return {
|
|
203
|
+
target_id: options.target_id,
|
|
204
|
+
resolved_from: resolved.resolved_from,
|
|
205
|
+
entities: {
|
|
206
|
+
assignment_id: assignmentId,
|
|
207
|
+
claim_id: assignment?.claim_id,
|
|
208
|
+
loop_id: loop?.id,
|
|
209
|
+
run_id: agentRun?.id,
|
|
210
|
+
},
|
|
211
|
+
assignment,
|
|
212
|
+
claim,
|
|
213
|
+
loop,
|
|
214
|
+
agent_run: agentRun,
|
|
215
|
+
runtime,
|
|
216
|
+
diagnosis,
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
//# sourceMappingURL=dispatch-status.js.map
|
|
@@ -19,12 +19,14 @@ import { listClaims } from './claims.js';
|
|
|
19
19
|
import { listActionRequired } from './actions.js';
|
|
20
20
|
import { deleteAssignment, listAssignments, loadAssignment, saveAssignment, transitionAssignment } from './assignments.js';
|
|
21
21
|
import { listAgentRuns } from './agentruns.js';
|
|
22
|
+
import { reconcileAgentRun, reconcileDeadPidRunningAgentRunAtRead, TERMINAL_STATUSES } from './agentrun-reconciler.js';
|
|
22
23
|
import { deleteRuntimeNote, listRuntimeNotes, saveRuntimeNote, } from './runtime.js';
|
|
23
24
|
import { createConstraint, createDecision, createTrap, } from './operations/memory-write.js';
|
|
24
25
|
import { deleteMemoryItem, findMemoryItemInChain, updateMemoryItem, } from './operations/memory-mutation.js';
|
|
25
26
|
import { createPlan, deletePlan, updatePlan, } from './operations/plan.js';
|
|
26
27
|
import { ENTITY_REGISTRY, isValidTransition, } from './entity-registry.js';
|
|
27
28
|
import { generateId } from './ids.js';
|
|
29
|
+
import { CandidateTypeSchema, ConstraintCategorySchema, DecisionOutcomeSchema, MemoryVisibilitySchema, PlanTypeEnumSchema, PrioritySchema, RuntimeNoteTypeSchema, SeveritySchema, } from './schema.js';
|
|
28
30
|
/**
|
|
29
31
|
* Default provenance stamp applied on create when the caller does not
|
|
30
32
|
* supply one. `user` kind with whatever author is in the payload; the
|
|
@@ -79,6 +81,44 @@ export class InvalidTransitionError extends Error {
|
|
|
79
81
|
}
|
|
80
82
|
}
|
|
81
83
|
// ─── FIND ─────────────────────────────────────────────────────────────
|
|
84
|
+
/**
|
|
85
|
+
* Lazy reconciliation pass on agent_run reads (pln#503 phase 3.2).
|
|
86
|
+
*
|
|
87
|
+
* Before returning agent_run records to `bclaw_find` / `bclaw_get`, walk any
|
|
88
|
+
* record whose status is non-terminal and call `reconcileAgentRun(id)`. The
|
|
89
|
+
* reconciler:
|
|
90
|
+
* - no-ops for runs under the 60s grace window or already terminal
|
|
91
|
+
* - transitions to `completed` (inferred=true) when evidence of completion
|
|
92
|
+
* exists (post-start commit, claim released, assignment completed)
|
|
93
|
+
* - transitions to `failed` (silent_termination_no_evidence) when the run
|
|
94
|
+
* is past the stale threshold AND its pid is provably dead
|
|
95
|
+
*
|
|
96
|
+
* Without this pass, a worker that crashed before its first output keeps
|
|
97
|
+
* `status="running"` indefinitely — the empirical pattern recorded in trp#292.
|
|
98
|
+
* The full agentrun-reconciler.ts machinery already existed (pln#496); this
|
|
99
|
+
* just wires it into the canonical-grammar read path so every read of
|
|
100
|
+
* `agent_run` produces converged state.
|
|
101
|
+
*/
|
|
102
|
+
function loadAgentRunsWithReconciliation(cwd) {
|
|
103
|
+
const runs = listAgentRuns(cwd);
|
|
104
|
+
for (const run of runs) {
|
|
105
|
+
if (run.status === 'running') {
|
|
106
|
+
try {
|
|
107
|
+
reconcileDeadPidRunningAgentRunAtRead(run.id, cwd);
|
|
108
|
+
}
|
|
109
|
+
catch { /* best-effort: never block reads on reconciliation errors */ }
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
if (!TERMINAL_STATUSES.has(run.status)) {
|
|
113
|
+
try {
|
|
114
|
+
reconcileAgentRun(run.id, cwd);
|
|
115
|
+
}
|
|
116
|
+
catch { /* best-effort: never block reads on reconciliation errors */ }
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
// Re-list to capture any transitions made above.
|
|
120
|
+
return listAgentRuns(cwd);
|
|
121
|
+
}
|
|
82
122
|
export function listEntities(name, cwd, filter = {}) {
|
|
83
123
|
const all = loadAll(name, cwd);
|
|
84
124
|
const filtered = applyFilter(all, filter);
|
|
@@ -97,7 +137,7 @@ function loadAll(name, cwd) {
|
|
|
97
137
|
case 'claim': return listClaims(cwd);
|
|
98
138
|
case 'action': return listActionRequired(cwd);
|
|
99
139
|
case 'assignment': return listAssignments(cwd);
|
|
100
|
-
case 'agent_run': return
|
|
140
|
+
case 'agent_run': return loadAgentRunsWithReconciliation(cwd);
|
|
101
141
|
case 'cross_project_link': return resolveCrossProjectLinks(cwd);
|
|
102
142
|
default:
|
|
103
143
|
throw new EntityOperationUnsupportedError(name, 'find');
|
|
@@ -112,12 +152,24 @@ function applyFilter(items, filter) {
|
|
|
112
152
|
if (filter.tag) {
|
|
113
153
|
result = result.filter((item) => Array.isArray(item.tags) && item.tags.includes(filter.tag));
|
|
114
154
|
}
|
|
155
|
+
if (Array.isArray(filter.tags) && filter.tags.length > 0) {
|
|
156
|
+
result = result.filter((item) => Array.isArray(item.tags) && filter.tags.some((tag) => item.tags.includes(tag)));
|
|
157
|
+
}
|
|
115
158
|
if (filter.author) {
|
|
116
159
|
result = result.filter((item) => item.author === filter.author);
|
|
117
160
|
}
|
|
118
161
|
if (filter.plan_id) {
|
|
119
162
|
result = result.filter((item) => item.plan_id === filter.plan_id);
|
|
120
163
|
}
|
|
164
|
+
if (filter.assignment_id) {
|
|
165
|
+
result = result.filter((item) => item.assignment_id === filter.assignment_id);
|
|
166
|
+
}
|
|
167
|
+
if (filter.claim_id) {
|
|
168
|
+
result = result.filter((item) => item.claim_id === filter.claim_id);
|
|
169
|
+
}
|
|
170
|
+
if (filter.message_id) {
|
|
171
|
+
result = result.filter((item) => item.message_id === filter.message_id);
|
|
172
|
+
}
|
|
121
173
|
if (filter.source) {
|
|
122
174
|
result = result.filter((item) => item.source === filter.source);
|
|
123
175
|
}
|
|
@@ -166,8 +218,8 @@ export function createEntity(name, data, cwd) {
|
|
|
166
218
|
const res = createPlan({
|
|
167
219
|
text: requireString(data, 'text'),
|
|
168
220
|
author: requireString(data, 'author'),
|
|
169
|
-
type: data
|
|
170
|
-
priority: data
|
|
221
|
+
type: requireEnum(data, 'type', PlanTypeEnumSchema.options, { optional: true }),
|
|
222
|
+
priority: requireEnum(data, 'priority', PrioritySchema.options, { optional: true }),
|
|
171
223
|
assignee: data.assignee,
|
|
172
224
|
project: data.project,
|
|
173
225
|
tags: data.tags,
|
|
@@ -182,7 +234,7 @@ export function createEntity(name, data, cwd) {
|
|
|
182
234
|
const res = createDecision({
|
|
183
235
|
text: requireString(data, 'text'),
|
|
184
236
|
author: requireString(data, 'author'),
|
|
185
|
-
outcome: data
|
|
237
|
+
outcome: requireEnum(data, 'outcome', DecisionOutcomeSchema.options, { optional: true }),
|
|
186
238
|
tags: data.tags,
|
|
187
239
|
relatedPaths: data.related_paths,
|
|
188
240
|
planId: data.plan_id,
|
|
@@ -194,7 +246,7 @@ export function createEntity(name, data, cwd) {
|
|
|
194
246
|
const res = createConstraint({
|
|
195
247
|
text: requireString(data, 'text'),
|
|
196
248
|
author: requireString(data, 'author'),
|
|
197
|
-
category: data
|
|
249
|
+
category: requireEnum(data, 'category', ConstraintCategorySchema.options, { optional: true }),
|
|
198
250
|
tags: data.tags,
|
|
199
251
|
relatedPaths: data.related_paths,
|
|
200
252
|
}, cwd);
|
|
@@ -205,7 +257,7 @@ export function createEntity(name, data, cwd) {
|
|
|
205
257
|
const res = createTrap({
|
|
206
258
|
text: requireString(data, 'text'),
|
|
207
259
|
author: requireString(data, 'author'),
|
|
208
|
-
severity: (data
|
|
260
|
+
severity: requireEnum(data, 'severity', SeveritySchema.options, { optional: true }) ?? 'medium',
|
|
209
261
|
tags: data.tags,
|
|
210
262
|
relatedPaths: data.related_paths,
|
|
211
263
|
}, cwd);
|
|
@@ -220,8 +272,8 @@ export function createEntity(name, data, cwd) {
|
|
|
220
272
|
text: requireString(data, 'text'),
|
|
221
273
|
created_at: new Date().toISOString(),
|
|
222
274
|
tags: data.tags ?? [],
|
|
223
|
-
visibility: data
|
|
224
|
-
note_type: data
|
|
275
|
+
visibility: requireEnum(data, 'visibility', MemoryVisibilitySchema.options, { optional: true }) ?? 'shared',
|
|
276
|
+
note_type: requireEnum(data, 'note_type', RuntimeNoteTypeSchema.options, { optional: true }) ?? 'observation',
|
|
225
277
|
provenance: defaultProvenance(data),
|
|
226
278
|
...(data.agent_id ? { agent_id: data.agent_id } : {}),
|
|
227
279
|
...(data.project_id ? { project_id: data.project_id } : {}),
|
|
@@ -233,9 +285,15 @@ export function createEntity(name, data, cwd) {
|
|
|
233
285
|
}
|
|
234
286
|
case 'candidate': {
|
|
235
287
|
const id = generateId('candidate');
|
|
288
|
+
const validatedType = requireEnum(data, 'type', CandidateTypeSchema.options);
|
|
289
|
+
if (!validatedType) {
|
|
290
|
+
// requireEnum without `optional` throws on missing/invalid, but
|
|
291
|
+
// narrow the type for the assignment below.
|
|
292
|
+
throw new Error(`Missing required field: type`);
|
|
293
|
+
}
|
|
236
294
|
const candidate = {
|
|
237
295
|
id,
|
|
238
|
-
type:
|
|
296
|
+
type: validatedType,
|
|
239
297
|
text: requireString(data, 'text'),
|
|
240
298
|
created_at: new Date().toISOString(),
|
|
241
299
|
author: requireString(data, 'author'),
|
|
@@ -281,6 +339,9 @@ export function updateEntity(name, id, patch, cwd) {
|
|
|
281
339
|
// declared updatable fields (text, tags, estimated_effort, depends_on)
|
|
282
340
|
// actually land. The typed surface still covers status/assignee/priority/
|
|
283
341
|
// actualEffort for legacy CLI callers — see UpdatePlanInput.
|
|
342
|
+
// Note: `plan.type` is intentionally create-only (not in plan.updatable
|
|
343
|
+
// at entity-registry.ts) — no validation needed here.
|
|
344
|
+
validatePatchEnum(patch, 'priority', PrioritySchema.options);
|
|
284
345
|
updatePlan({
|
|
285
346
|
id,
|
|
286
347
|
patch: patch,
|
|
@@ -293,6 +354,15 @@ export function updateEntity(name, id, patch, cwd) {
|
|
|
293
354
|
// Same generic-patch escape-hatch for memory items. Registry declares
|
|
294
355
|
// severity, scope, related_paths, expires_at, etc. as updatable; the
|
|
295
356
|
// legacy explicit text/tags whitelist silently dropped them.
|
|
357
|
+
if (name === 'decision') {
|
|
358
|
+
validatePatchEnum(patch, 'outcome', DecisionOutcomeSchema.options);
|
|
359
|
+
}
|
|
360
|
+
else if (name === 'constraint') {
|
|
361
|
+
validatePatchEnum(patch, 'category', ConstraintCategorySchema.options);
|
|
362
|
+
}
|
|
363
|
+
else {
|
|
364
|
+
validatePatchEnum(patch, 'severity', SeveritySchema.options);
|
|
365
|
+
}
|
|
296
366
|
updateMemoryItem({
|
|
297
367
|
id,
|
|
298
368
|
type: name,
|
|
@@ -301,6 +371,9 @@ export function updateEntity(name, id, patch, cwd) {
|
|
|
301
371
|
return { entity: name, id };
|
|
302
372
|
}
|
|
303
373
|
case 'runtime_note': {
|
|
374
|
+
// Note: `note_type` is intentionally create-only (not in
|
|
375
|
+
// runtime_note.updatable at entity-registry.ts) — no validation needed.
|
|
376
|
+
validatePatchEnum(patch, 'visibility', MemoryVisibilitySchema.options);
|
|
304
377
|
const notes = listRuntimeNotes(undefined, cwd);
|
|
305
378
|
const note = notes.find((n) => n.id === id);
|
|
306
379
|
if (!note)
|
|
@@ -318,6 +391,8 @@ export function updateEntity(name, id, patch, cwd) {
|
|
|
318
391
|
return { entity: name, id };
|
|
319
392
|
}
|
|
320
393
|
case 'candidate': {
|
|
394
|
+
// Note: `candidate.type` is intentionally create-only (not in
|
|
395
|
+
// candidate.updatable at entity-registry.ts) — no validation needed.
|
|
321
396
|
const candidate = loadCandidate(id, cwd);
|
|
322
397
|
const patched = { ...candidate, ...patch };
|
|
323
398
|
saveCandidate(patched, cwd);
|
|
@@ -484,4 +559,48 @@ function requireString(data, field) {
|
|
|
484
559
|
}
|
|
485
560
|
return value;
|
|
486
561
|
}
|
|
562
|
+
/**
|
|
563
|
+
* Validates that data[field] is one of `validValues`, throwing a clear
|
|
564
|
+
* error message when the value is invalid. Fixes the silent-data-loss bug
|
|
565
|
+
* documented in candidate can_a3458961 + pln#509 step 1: previously the
|
|
566
|
+
* create path used unchecked `as` casts on enum fields, so invalid values
|
|
567
|
+
* (e.g. outcome:'accepted' instead of 'approved') were written to disk and
|
|
568
|
+
* then silently skipped at load time by the strict Zod parser. Now we
|
|
569
|
+
* validate at write time against the same valid-value lists used by the
|
|
570
|
+
* load-time schemas.
|
|
571
|
+
*
|
|
572
|
+
* Callers pass `XxxSchema.options` (a readonly tuple of valid strings)
|
|
573
|
+
* rather than the schema itself — this avoids brittle generic constraints
|
|
574
|
+
* on Zod's enum type which differs between major versions.
|
|
575
|
+
*/
|
|
576
|
+
function requireEnum(data, field, validValues, opts = {}) {
|
|
577
|
+
const value = data[field];
|
|
578
|
+
if (value === undefined || value === null) {
|
|
579
|
+
if (opts.optional)
|
|
580
|
+
return undefined;
|
|
581
|
+
throw new Error(`Missing required field: ${field}`);
|
|
582
|
+
}
|
|
583
|
+
if (typeof value !== 'string' || !validValues.includes(value)) {
|
|
584
|
+
throw new Error(`Invalid value for '${field}': got ${JSON.stringify(value)}. Expected one of: ${validValues.join(' | ')}`);
|
|
585
|
+
}
|
|
586
|
+
return value;
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Validates that, if `patch[field]` is present (and not null/undefined), it
|
|
590
|
+
* matches one of `validValues`. Used by updateEntity for enum-shaped patch
|
|
591
|
+
* fields, to extend the same validation parity used at create time. Codex
|
|
592
|
+
* round 1 (pln#509 step 1 review) correctly flagged that updateEntity was
|
|
593
|
+
* still vulnerable to the same silent persistence bug when patching enum
|
|
594
|
+
* fields with invalid values. Fields not present in `patch` are ignored.
|
|
595
|
+
*/
|
|
596
|
+
function validatePatchEnum(patch, field, validValues) {
|
|
597
|
+
if (!(field in patch))
|
|
598
|
+
return;
|
|
599
|
+
const value = patch[field];
|
|
600
|
+
if (value === undefined || value === null)
|
|
601
|
+
return;
|
|
602
|
+
if (typeof value !== 'string' || !validValues.includes(value)) {
|
|
603
|
+
throw new Error(`Invalid value for '${field}' in patch: got ${JSON.stringify(value)}. Expected one of: ${validValues.join(' | ')}`);
|
|
604
|
+
}
|
|
605
|
+
}
|
|
487
606
|
//# sourceMappingURL=entity-operations.js.map
|
|
@@ -90,14 +90,38 @@ export class CliExecutionAdapter {
|
|
|
90
90
|
const spawnExecutable = resolvedExecutable ?? invoke.executable;
|
|
91
91
|
const useShell = isWin32 && /\.(cmd|bat)$/i.test(spawnExecutable);
|
|
92
92
|
const needsStdin = invoke.promptDelivery === 'stdin_pipe' && invoke.promptText;
|
|
93
|
-
|
|
93
|
+
// pln#504: open per-assignment log files for stdout/stderr capture so silent
|
|
94
|
+
// worker deaths (trp#292) become diagnosable. Previously stdio used 'ignore'
|
|
95
|
+
// for stdout+stderr — anything the worker said vanished. Best-effort: on
|
|
96
|
+
// failure to open log files we fall back to the legacy 'ignore' behaviour
|
|
97
|
+
// rather than abort the spawn.
|
|
98
|
+
const useAckWrap = !!(options.assignmentId && (options.ackRoot ?? options.worktreePath));
|
|
99
|
+
let logFds;
|
|
100
|
+
if (useAckWrap) {
|
|
101
|
+
try {
|
|
102
|
+
const logRoot = options.ackRoot ?? options.worktreePath;
|
|
103
|
+
const logDir = path.join(logRoot, '.brainclaw', 'coordination', 'runtime', 'log');
|
|
104
|
+
fs.mkdirSync(logDir, { recursive: true });
|
|
105
|
+
logFds = {
|
|
106
|
+
stdout: fs.openSync(path.join(logDir, `${options.assignmentId}.stdout.log`), 'a'),
|
|
107
|
+
stderr: fs.openSync(path.join(logDir, `${options.assignmentId}.stderr.log`), 'a'),
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
// Log capture is best-effort — never block the spawn on logging issues.
|
|
112
|
+
logFds = undefined;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
const stdinTarget = needsStdin ? 'pipe' : 'ignore';
|
|
116
|
+
const stdoutTarget = logFds ? logFds.stdout : 'ignore';
|
|
117
|
+
const stderrTarget = logFds ? logFds.stderr : 'ignore';
|
|
118
|
+
const stdio = [stdinTarget, stdoutTarget, stderrTarget];
|
|
94
119
|
// pln#476: wrap the spawn command with a brief-ack step so the worker
|
|
95
120
|
// shell touches a sentinel file BEFORE the agent binary runs.
|
|
96
121
|
// waitForAssignmentHandshake checks that file as evidence the spawn
|
|
97
122
|
// executed — needed for codex (which lacks the brainclaw MCP context
|
|
98
123
|
// to call bclaw_assignment_update). When ackRoot/assignmentId are
|
|
99
124
|
// omitted, we keep the original direct-binary spawn.
|
|
100
|
-
const useAckWrap = !!(options.assignmentId && (options.ackRoot ?? options.worktreePath));
|
|
101
125
|
let child;
|
|
102
126
|
if (useAckWrap) {
|
|
103
127
|
const ackRoot = options.ackRoot ?? options.worktreePath;
|
|
@@ -139,6 +163,18 @@ export class CliExecutionAdapter {
|
|
|
139
163
|
child.stdin.end();
|
|
140
164
|
}
|
|
141
165
|
child.unref();
|
|
166
|
+
// Close the parent's copies of the log file descriptors. The child has its
|
|
167
|
+
// own dup'd copies and will keep writing to them after we return.
|
|
168
|
+
if (logFds) {
|
|
169
|
+
try {
|
|
170
|
+
fs.closeSync(logFds.stdout);
|
|
171
|
+
}
|
|
172
|
+
catch { /* best-effort */ }
|
|
173
|
+
try {
|
|
174
|
+
fs.closeSync(logFds.stderr);
|
|
175
|
+
}
|
|
176
|
+
catch { /* best-effort */ }
|
|
177
|
+
}
|
|
142
178
|
const pid = child.pid;
|
|
143
179
|
if (!pid) {
|
|
144
180
|
throw new Error(`Failed to spawn agent ${options.agent}: no PID returned`);
|