brainclaw 1.5.5 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brainclaw-vscode.vsix +0 -0
- package/dist/cli.js +124 -7
- package/dist/commands/bootstrap-loop.js +206 -0
- package/dist/commands/loop.js +156 -0
- package/dist/commands/loops-handlers.js +110 -55
- package/dist/commands/mcp-read-handlers.js +37 -0
- package/dist/commands/mcp.js +621 -202
- package/dist/commands/questions.js +180 -0
- package/dist/commands/reply.js +190 -0
- package/dist/commands/session-end.js +105 -3
- package/dist/commands/session-start.js +32 -53
- package/dist/commands/switch.js +17 -1
- package/dist/core/agentrun-reconciler.js +65 -0
- package/dist/core/claims.js +29 -0
- package/dist/core/dispatch-status.js +219 -0
- package/dist/core/entity-operations.js +128 -9
- package/dist/core/execution-adapters.js +38 -2
- package/dist/core/facade-schema.js +55 -0
- package/dist/core/federation-cloud.js +27 -12
- package/dist/core/federation-materialize.js +57 -0
- package/dist/core/instruction-templates.js +2 -0
- package/dist/core/loops/bootstrap-acquire.js +195 -0
- package/dist/core/loops/facade-schema.js +68 -1
- package/dist/core/loops/hooks/bootstrap-write.js +144 -0
- package/dist/core/loops/hooks/notify-operator.js +148 -0
- package/dist/core/loops/hooks/survey-source-reader.js +256 -0
- package/dist/core/loops/index.js +8 -2
- package/dist/core/loops/next-expected.js +63 -0
- package/dist/core/loops/presets/bootstrap.js +75 -0
- package/dist/core/loops/presets/index.js +16 -0
- package/dist/core/loops/store.js +224 -4
- package/dist/core/loops/types.js +346 -1
- package/dist/core/loops/verbs.js +739 -6
- package/dist/core/schema.js +28 -2
- package/dist/core/state.js +62 -0
- package/dist/facts.js +7 -5
- package/dist/facts.json +6 -4
- package/docs/concepts/dispatch-lifecycle.md +228 -0
- package/docs/concepts/loop-engine.md +55 -0
- package/docs/concepts/multi-agent-workflows.md +167 -166
- package/docs/concepts/troubleshooting.md +10 -2
- package/docs/integrations/overview.md +14 -12
- package/package.json +1 -1
package/dist/core/schema.js
CHANGED
|
@@ -232,6 +232,17 @@ export const HandoffSchema = z.object({
|
|
|
232
232
|
*/
|
|
233
233
|
superseded_by: z.string().optional(),
|
|
234
234
|
supersedes: z.string().optional(),
|
|
235
|
+
/**
|
|
236
|
+
* pln#365 finalization: opt-in cloud federation. Optional (no default) so
|
|
237
|
+
* existing on-disk handoffs without the field continue to parse, AND so
|
|
238
|
+
* an unset value means "stay local" rather than "implicitly shared". To
|
|
239
|
+
* opt a handoff into cloud push, set `visibility: 'shared'` explicitly
|
|
240
|
+
* when building it. Session-end push uses literal `visibility === 'shared'`
|
|
241
|
+
* (cf. session-end.ts:isExplicitlyShared). Risk it mitigates: a session
|
|
242
|
+
* handoff with `snapshot.diff` carrying secrets would otherwise leak the
|
|
243
|
+
* moment cloud_sync flips on, by virtue of being created at all.
|
|
244
|
+
*/
|
|
245
|
+
visibility: MemoryVisibilitySchema.optional(),
|
|
235
246
|
});
|
|
236
247
|
export const PlanStatusSchema = z.enum(['todo', 'in_progress', 'blocked', 'done', 'dropped']);
|
|
237
248
|
export const PlanStepStatusSchema = z.enum(['todo', 'in_progress', 'testing', 'done', 'blocked']);
|
|
@@ -243,7 +254,8 @@ export const PlanStepSchema = z.object({
|
|
|
243
254
|
created_at: z.string(),
|
|
244
255
|
updated_at: z.string(),
|
|
245
256
|
});
|
|
246
|
-
export const
|
|
257
|
+
export const PlanTypeEnumSchema = z.enum(['feat', 'fix', 'chore', 'spike', 'doc']);
|
|
258
|
+
export const PlanTypeSchema = PlanTypeEnumSchema.default('feat');
|
|
247
259
|
export const PlanItemSchema = z.object({
|
|
248
260
|
schema_version: z.number().int().positive().optional(),
|
|
249
261
|
id: z.string(),
|
|
@@ -532,6 +544,13 @@ export const CandidateSchema = z.preprocess(candidatePreprocess, z.object({
|
|
|
532
544
|
resolved_by: z.string().optional(),
|
|
533
545
|
resolution_reason: z.string().optional(),
|
|
534
546
|
provenance: ProvenancePassthroughSchema,
|
|
547
|
+
/**
|
|
548
|
+
* pln#365 finalization: opt-in cloud federation. Mirrors HandoffSchema —
|
|
549
|
+
* optional (no default) so unset means "stay local" and an agent must
|
|
550
|
+
* explicitly set `visibility: 'shared'` to opt a candidate into cloud
|
|
551
|
+
* push. Conservative because candidate.text can carry secrets.
|
|
552
|
+
*/
|
|
553
|
+
visibility: MemoryVisibilitySchema.optional(),
|
|
535
554
|
}));
|
|
536
555
|
export const ReflectiveMemoryConfigSchema = z.object({
|
|
537
556
|
enabled: z.boolean().default(true),
|
|
@@ -756,6 +775,7 @@ export const ActionRequiredSchema = z.object({
|
|
|
756
775
|
tags: TagsWithDefaultSchema,
|
|
757
776
|
});
|
|
758
777
|
// --- Runtime notes schemas ---
|
|
778
|
+
export const RuntimeNoteTypeSchema = z.enum(['observation', 'session_start', 'session_end']);
|
|
759
779
|
export const RuntimeNoteSchema = z.object({
|
|
760
780
|
schema_version: z.number().int().positive().optional(),
|
|
761
781
|
id: z.string(),
|
|
@@ -771,7 +791,7 @@ export const RuntimeNoteSchema = z.object({
|
|
|
771
791
|
visibility: MemoryVisibilitySchema.default('shared'),
|
|
772
792
|
host_id: z.string().optional(),
|
|
773
793
|
expires_at: z.string().optional(),
|
|
774
|
-
note_type:
|
|
794
|
+
note_type: RuntimeNoteTypeSchema.default('observation'),
|
|
775
795
|
model: z.string().optional(),
|
|
776
796
|
provenance: ProvenancePassthroughSchema,
|
|
777
797
|
});
|
|
@@ -937,6 +957,11 @@ export const RemoteSyncSchema = z.object({
|
|
|
937
957
|
ssh_key_path: z.string().optional(),
|
|
938
958
|
sync_strategy: z.enum(['pull-only', 'push-pull', 'pr-based']).default('push-pull'),
|
|
939
959
|
});
|
|
960
|
+
export const CloudSyncConfigSchema = z.object({
|
|
961
|
+
enabled: z.boolean().default(false),
|
|
962
|
+
endpoint: z.string().default('https://app.brainclaw.dev'),
|
|
963
|
+
api_key: z.string().optional(),
|
|
964
|
+
});
|
|
940
965
|
export const SessionSnapshotSchema = z.object({
|
|
941
966
|
schema_version: z.number().int().positive().optional(),
|
|
942
967
|
session_id: z.string(),
|
|
@@ -1232,6 +1257,7 @@ export const ConfigSchema = z.object({
|
|
|
1232
1257
|
target_audience: z.enum(['human', 'agent']).optional().default('human'),
|
|
1233
1258
|
openclaw_bridge: z.boolean().optional().default(false),
|
|
1234
1259
|
remote_sync: RemoteSyncSchema.optional(),
|
|
1260
|
+
cloud_sync: CloudSyncConfigSchema.optional(),
|
|
1235
1261
|
telemetry: z.literal(false),
|
|
1236
1262
|
allow_network: z.literal(false),
|
|
1237
1263
|
redaction: RedactionConfigSchema,
|
package/dist/core/state.js
CHANGED
|
@@ -37,6 +37,68 @@ function loadDirectoryItems(dirPath, schema, documentType) {
|
|
|
37
37
|
}
|
|
38
38
|
return items;
|
|
39
39
|
}
|
|
40
|
+
const ENTITY_LOAD_CONFIG = {
|
|
41
|
+
constraint: { subdir: 'constraints', documentType: 'constraint', recursive: false },
|
|
42
|
+
decision: { subdir: 'decisions', documentType: 'decision', recursive: false },
|
|
43
|
+
trap: { subdir: 'traps', documentType: 'trap', recursive: false },
|
|
44
|
+
handoff: { subdir: 'handoffs', documentType: 'handoff', recursive: false },
|
|
45
|
+
plan: { subdir: 'plans', documentType: 'plan', recursive: false },
|
|
46
|
+
candidate: { subdir: 'inbox', documentType: 'candidate', recursive: false },
|
|
47
|
+
claim: { subdir: 'claims', documentType: 'claim', recursive: false },
|
|
48
|
+
assignment: { subdir: 'assignments', documentType: 'assignment', recursive: false },
|
|
49
|
+
agent_run: { subdir: 'runs', documentType: 'agent_run', recursive: false },
|
|
50
|
+
action: { subdir: 'actions', documentType: 'action_required', recursive: false },
|
|
51
|
+
runtime_note: { subdir: 'runtime', documentType: 'runtime_note', recursive: true },
|
|
52
|
+
};
|
|
53
|
+
function listJsonFiles(dirPath, recursive) {
|
|
54
|
+
if (!fs.existsSync(dirPath))
|
|
55
|
+
return [];
|
|
56
|
+
const files = [];
|
|
57
|
+
for (const entry of fs.readdirSync(dirPath).sort()) {
|
|
58
|
+
const fullPath = path.join(dirPath, entry);
|
|
59
|
+
const stat = fs.statSync(fullPath);
|
|
60
|
+
if (stat.isDirectory()) {
|
|
61
|
+
if (recursive)
|
|
62
|
+
files.push(...listJsonFiles(fullPath, true));
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
if (entry.endsWith('.json'))
|
|
66
|
+
files.push(fullPath);
|
|
67
|
+
}
|
|
68
|
+
return files;
|
|
69
|
+
}
|
|
70
|
+
function validationErrorsFrom(error) {
|
|
71
|
+
if (error && typeof error === 'object' && 'issues' in error && Array.isArray(error.issues)) {
|
|
72
|
+
return (error.issues).map((issue) => {
|
|
73
|
+
const issuePath = Array.isArray(issue.path) && issue.path.length > 0 ? `${issue.path.join('.')}: ` : '';
|
|
74
|
+
return `${issuePath}${issue.message ?? 'validation failed'}`;
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
return [error instanceof Error ? error.message : String(error)];
|
|
78
|
+
}
|
|
79
|
+
export function collectLoadValidationWarnings(entity, cwd) {
|
|
80
|
+
const config = ENTITY_LOAD_CONFIG[entity];
|
|
81
|
+
if (!config)
|
|
82
|
+
return [];
|
|
83
|
+
const effectiveCwd = cwd ?? process.cwd();
|
|
84
|
+
const dirPath = resolveEntityDir(config.subdir, effectiveCwd, 'read');
|
|
85
|
+
return listJsonFiles(dirPath, config.recursive).flatMap((filepath) => {
|
|
86
|
+
try {
|
|
87
|
+
loadVersionedJsonFile(config.documentType, filepath);
|
|
88
|
+
return [];
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
return [{
|
|
92
|
+
entity_id: path.basename(filepath, '.json'),
|
|
93
|
+
validation_errors: validationErrorsFrom(error),
|
|
94
|
+
path: filepath,
|
|
95
|
+
}];
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
export function findLoadValidationWarning(entity, id, cwd) {
|
|
100
|
+
return collectLoadValidationWarnings(entity, cwd).find((warning) => warning.entity_id === id);
|
|
101
|
+
}
|
|
40
102
|
export function loadState(cwd) {
|
|
41
103
|
// Load from entity-aligned directories (with legacy fallback)
|
|
42
104
|
const effectiveCwd = cwd ?? process.cwd();
|
package/dist/facts.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
// Generated by scripts/emit-site-facts.mjs at build time. Do not edit manually.
|
|
2
|
-
// Source: brainclaw v1.
|
|
2
|
+
// Source: brainclaw v1.6.0 on 2026-05-22T22:24:08.363Z
|
|
3
3
|
export const FACTS = {
|
|
4
|
-
"version": "1.
|
|
5
|
-
"generated_at": "2026-05-
|
|
4
|
+
"version": "1.6.0",
|
|
5
|
+
"generated_at": "2026-05-22T22:24:08.363Z",
|
|
6
6
|
"tools": {
|
|
7
|
-
"count":
|
|
8
|
-
"published_count":
|
|
7
|
+
"count": 62,
|
|
8
|
+
"published_count": 61,
|
|
9
9
|
"names": [
|
|
10
10
|
"bclaw_bootstrap",
|
|
11
11
|
"bclaw_release_notes",
|
|
@@ -30,10 +30,12 @@ export const FACTS = {
|
|
|
30
30
|
"bclaw_check_security",
|
|
31
31
|
"bclaw_read_inbox",
|
|
32
32
|
"bclaw_get_thread",
|
|
33
|
+
"bclaw_dispatch_status",
|
|
33
34
|
"bclaw_dispatch",
|
|
34
35
|
"bclaw_send_message",
|
|
35
36
|
"bclaw_ack_message",
|
|
36
37
|
"bclaw_setup",
|
|
38
|
+
"bclaw_init_project",
|
|
37
39
|
"bclaw_write_note",
|
|
38
40
|
"bclaw_quick_capture",
|
|
39
41
|
"bclaw_claim",
|
package/dist/facts.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "1.
|
|
3
|
-
"generated_at": "2026-05-
|
|
2
|
+
"version": "1.6.0",
|
|
3
|
+
"generated_at": "2026-05-22T22:24:08.363Z",
|
|
4
4
|
"tools": {
|
|
5
|
-
"count":
|
|
6
|
-
"published_count":
|
|
5
|
+
"count": 62,
|
|
6
|
+
"published_count": 61,
|
|
7
7
|
"names": [
|
|
8
8
|
"bclaw_bootstrap",
|
|
9
9
|
"bclaw_release_notes",
|
|
@@ -28,10 +28,12 @@
|
|
|
28
28
|
"bclaw_check_security",
|
|
29
29
|
"bclaw_read_inbox",
|
|
30
30
|
"bclaw_get_thread",
|
|
31
|
+
"bclaw_dispatch_status",
|
|
31
32
|
"bclaw_dispatch",
|
|
32
33
|
"bclaw_send_message",
|
|
33
34
|
"bclaw_ack_message",
|
|
34
35
|
"bclaw_setup",
|
|
36
|
+
"bclaw_init_project",
|
|
35
37
|
"bclaw_write_note",
|
|
36
38
|
"bclaw_quick_capture",
|
|
37
39
|
"bclaw_claim",
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# Dispatch lifecycle
|
|
2
|
+
|
|
3
|
+
When brainclaw routes work to another agent — `bclaw_coordinate(intent="assign"|"review"|"consult")`, `bclaw_dispatch(intent="execute")`, or a multi-turn `bclaw_loop` — it spins up **up to six related entities** plus an on-disk **brief-ack sentinel** and (since pln#504) **per-assignment stdout/stderr log files**. Knowing what each one means lets you tell at a glance whether a dispatch is alive, dead, or merely slow.
|
|
4
|
+
|
|
5
|
+
This doc is the consolidated reference. It complements:
|
|
6
|
+
- [multi-agent-workflows.md](multi-agent-workflows.md) — happy-path coordination patterns
|
|
7
|
+
- [troubleshooting.md](troubleshooting.md) — symptom-driven diagnostic playbooks
|
|
8
|
+
- [loop-engine.md](loop-engine.md) — multi-turn loop protocol details
|
|
9
|
+
- [../integrations/codex.md](../integrations/codex.md), [../integrations/claude-code.md](../integrations/claude-code.md), etc. — per-agent spawn semantics
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## The six entities
|
|
14
|
+
|
|
15
|
+
A single `bclaw_coordinate(intent="review", open_loop=true, targetAgents=[codex])` creates:
|
|
16
|
+
|
|
17
|
+
```
|
|
18
|
+
┌─────────────────┐
|
|
19
|
+
│ candidate │ cnd_… (review payload)
|
|
20
|
+
└────────┬────────┘
|
|
21
|
+
│ references
|
|
22
|
+
┌───────────────────┼──────────────────┐
|
|
23
|
+
▼ ▼ ▼
|
|
24
|
+
┌──────────┐ ┌─────────────┐ ┌──────────┐
|
|
25
|
+
│ loop │ ◄────►│ assignment │ │ message │
|
|
26
|
+
│ lop_… │ │ asgn_… │ │ msg_… │
|
|
27
|
+
└──────────┘ └──────┬──────┘ └──────────┘
|
|
28
|
+
│
|
|
29
|
+
│ owned-by
|
|
30
|
+
▼
|
|
31
|
+
┌──────────────┐
|
|
32
|
+
│ claim │ clm_… (worktree lock)
|
|
33
|
+
└──────┬───────┘
|
|
34
|
+
│ triggers
|
|
35
|
+
▼
|
|
36
|
+
┌──────────────┐
|
|
37
|
+
│ agent_run │ run_… (the OS-level spawn)
|
|
38
|
+
└──────┬───────┘
|
|
39
|
+
│
|
|
40
|
+
┌───────────────────┼─────────────────┐
|
|
41
|
+
▼ ▼ ▼
|
|
42
|
+
┌──────────┐ ┌─────────────┐ ┌────────────┐
|
|
43
|
+
│ ack file │ │ stdout log │ │ stderr log │
|
|
44
|
+
│ .ack │ │ .stdout.log │ │ .stderr.log│
|
|
45
|
+
└──────────┘ └─────────────┘ └────────────┘
|
|
46
|
+
(pln#476) (pln#504) (pln#504)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
| Entity | Prefix | Created by | Owner | Purpose |
|
|
50
|
+
|---|---|---|---|---|
|
|
51
|
+
| `candidate` | `cnd_` | the coordinate facade (review/ideate) | the dispatcher agent | Review payload that the loop references. Stays after the loop closes. |
|
|
52
|
+
| `loop` | `lop_` | `bclaw_coordinate(open_loop=true)` or `bclaw_loop(intent="open")` | the dispatcher | Multi-turn thread of structured work. Has its own FSM. |
|
|
53
|
+
| `assignment` | `asgn_` | dispatcher when targeting an agent | the **target** agent | Lifecycle event for that agent's turn. The only entity whose FSM tracks the WORKER's progress. |
|
|
54
|
+
| `message` | `msg_` | dispatcher | the dispatcher | The brief delivered to the target's inbox. |
|
|
55
|
+
| `claim` | `clm_` | dispatcher (or `bclaw_claim` directly) | the target agent | Worktree advisory lock. Released when the work is done or the agent gives up. |
|
|
56
|
+
| `agent_run` | `run_` | the CLI execution adapter, only when an OS-level spawn actually happens | the target agent | OS-level subprocess record. Status FSM tracks the LIFETIME of the process — but only the parts brainclaw can observe (see [§Liveness limits](#liveness-limits) below). |
|
|
57
|
+
|
|
58
|
+
Plus two filesystem-only artefacts created by the worker shell wrapper:
|
|
59
|
+
|
|
60
|
+
- **Brief-ack sentinel**: `.brainclaw/coordination/runtime/ack/<assignment_id>.ack` — touched by the spawn wrapper BEFORE the agent binary runs (pln#476). Proves the spawn shell got far enough to execute `touch`. Does NOT prove the agent binary itself succeeded.
|
|
61
|
+
- **stdout/stderr logs** (pln#504): `.brainclaw/coordination/runtime/log/<assignment_id>.{stdout,stderr}.log` — opened by the parent before the spawn, the child inherits dup'd fds and writes its streams there. This is the only window onto what a sandboxed worker actually said before dying.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## FSM cheatsheet
|
|
66
|
+
|
|
67
|
+
### `loop.status`
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
open ──▶ paused ──▶ open (pause / resume)
|
|
71
|
+
│
|
|
72
|
+
├──▶ completed (stop_condition met)
|
|
73
|
+
├──▶ cancelled (manual close — use when the loop dies abnormally)
|
|
74
|
+
└──▶ blocked (external blocker; intent to resume later)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
`bclaw_loop(intent="close")` accepts **only** `completed | cancelled | blocked` as `status`. **Not `failed`** — map crashed/dead loops to `cancelled` with a `reason`.
|
|
78
|
+
|
|
79
|
+
### `assignment.status`
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
created ──▶ offered ──▶ accepted ──▶ started ──▶ completed
|
|
83
|
+
│ │ │ │
|
|
84
|
+
│ │ │ └──▶ failed (worker self-reported)
|
|
85
|
+
│ │ │ └──▶ blocked (worker needs supervisor)
|
|
86
|
+
│ │ │ └──▶ cancelled (rerouted away)
|
|
87
|
+
│ │ └──▶ acceptance_ttl expired (default 15min) → cancelled
|
|
88
|
+
│ └──▶ heartbeat_ttl expired (default 30min while running) → cancelled
|
|
89
|
+
└──▶ removed by `bclaw_assignment_admin` (rare)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Transitions past `offered` require the assigned agent itself (or `bclaw_assignment_admin`). A coordinator that didn't create the assignment **cannot** update it — `Agent X cannot update assignment owned by Y` is the canonical rejection.
|
|
93
|
+
|
|
94
|
+
### `agent_run.status`
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
launching ──▶ running ──▶ completed
|
|
98
|
+
│ ──▶ failed (non-zero exit, worker reported)
|
|
99
|
+
│ ──▶ interrupted (TTL/heartbeat expiry, see below)
|
|
100
|
+
│
|
|
101
|
+
└──▶ failed (spawn returned no pid, brief-ack timeout)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
**Liveness limits** {#liveness-limits}: `last_event_at` is bumped only when the worker writes a lifecycle event (via MCP or via the wrap shell). A worker that crashes before its first output keeps `status=running` and `last_event_at == launched_at` until reconciled. Since pln#503 phase 3.2, **any read of `agent_run` via `bclaw_find` / `bclaw_get` triggers a lazy reconciliation pass**: open runs past the 60s grace window get their pid checked, and dead workers transition to `failed` (`status_reason='silent_termination_no_evidence'`) once past the 30min stale threshold.
|
|
105
|
+
|
|
106
|
+
For a single consolidated check (run + assignment + claim + loop + pid + log tails + verdict in one response), use **`bclaw_dispatch_status(target_id)`** (pln#503 phase 3.1).
|
|
107
|
+
|
|
108
|
+
### `claim.status`
|
|
109
|
+
|
|
110
|
+
```
|
|
111
|
+
active ──▶ released
|
|
112
|
+
│
|
|
113
|
+
└──▶ adopted (another session inherited the claim, e.g. reconnect)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Releasing a claim does NOT cancel its assignment / agent_run / loop — those are independent entities. You generally need to clean up all of them together when aborting a dispatch.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Observability decision tree
|
|
121
|
+
|
|
122
|
+
You called `bclaw_coordinate(intent="review", open_loop=true, …)` and got back `execution_status: "delivered_and_started"`. What does that actually mean?
|
|
123
|
+
|
|
124
|
+
**Fast path** (recommended since pln#503 phase 3.1): call `bclaw_dispatch_status(target_id="<asgn_…>")` and read its `diagnosis.health` + `diagnosis.recommended_next_action`. The tool consolidates the steps below into a single response — entity fan-out, pid liveness, log tails, verdict, recommended next action.
|
|
125
|
+
|
|
126
|
+
**Long path** (for understanding or when the tool isn't available):
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
1. execution_status = "delivered_and_started"
|
|
130
|
+
├──▶ Means: the spawn wrapper touched the brief-ack sentinel
|
|
131
|
+
└──▶ Does NOT mean: the worker is doing useful work
|
|
132
|
+
|
|
133
|
+
2. Verify the spawn is alive — check the agent_run record
|
|
134
|
+
bclaw_find(entity="agent_run", filter={assignment_id: "<asgn>"})
|
|
135
|
+
├──▶ status="running" AND pid alive on OS AND last_event_at < 5min ago → healthy
|
|
136
|
+
├──▶ status="running" AND pid alive AND last_event_at == launched_at → stalled (worker never produced output)
|
|
137
|
+
├──▶ status="running" AND pid dead → silently died (see logs)
|
|
138
|
+
└──▶ status="completed" / "failed" / "interrupted" → terminal, read status_reason
|
|
139
|
+
|
|
140
|
+
3. If silent, read the logs (pln#504)
|
|
141
|
+
cat .brainclaw/coordination/runtime/log/<asgn>.stderr.log
|
|
142
|
+
cat .brainclaw/coordination/runtime/log/<asgn>.stdout.log
|
|
143
|
+
├──▶ Contains an error → root cause found
|
|
144
|
+
└──▶ Empty → worker died before any write OR launched without log capture (legacy path)
|
|
145
|
+
|
|
146
|
+
4. If the worker is alive but doing nothing useful for 15+ min
|
|
147
|
+
→ most likely sandbox / MCP / capability mismatch with the brief
|
|
148
|
+
→ see ../integrations/<agent>.md "Caveats" for per-agent gotchas
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## Diagnostic playbook
|
|
154
|
+
|
|
155
|
+
When a dispatch hangs, work top-down through these checks. For the symptom-driven variant see [troubleshooting.md#inbox-messages-stuck--brief-ack-never-arrived](troubleshooting.md#inbox-messages-stuck--brief-ack-never-arrived).
|
|
156
|
+
|
|
157
|
+
### Quick triage (≤5s)
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
# Single call covers process liveness + ack + log tails + entity state + verdict
|
|
161
|
+
bclaw_dispatch_status(target_id="<asgn>") # or clm_/lop_/run_
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Read `diagnosis.health` (`healthy` | `stalled` | `silent_death` | `terminal` | `not_dispatched` | `unknown`) and `diagnosis.recommended_next_action` — usually that's all you need.
|
|
165
|
+
|
|
166
|
+
### Manual triage (≤30s — when `bclaw_dispatch_status` isn't available)
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
# 1. Is the OS-level process alive?
|
|
170
|
+
Get-Process -Id <pid> # Windows
|
|
171
|
+
ps -p <pid> # POSIX
|
|
172
|
+
|
|
173
|
+
# 2. Did the spawn wrapper actually run?
|
|
174
|
+
ls .brainclaw/coordination/runtime/ack/<asgn>.ack
|
|
175
|
+
|
|
176
|
+
# 3. What did the worker say? (pln#504)
|
|
177
|
+
cat .brainclaw/coordination/runtime/log/<asgn>.stderr.log
|
|
178
|
+
cat .brainclaw/coordination/runtime/log/<asgn>.stdout.log
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Deeper (1-5min)
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
# Full entity state — same fan-out bclaw_dispatch_status does for you
|
|
185
|
+
bclaw_get(entity="assignment", id="<asgn>") # owner, ttls, status_reason
|
|
186
|
+
bclaw_get(entity="agent_run", id="<run>") # pid, started_at, last_event_at
|
|
187
|
+
bclaw_get(entity="claim", id="<clm>") # worktree, agent
|
|
188
|
+
bclaw_get(entity="loop", id="<lop>") # current_phase, slot states
|
|
189
|
+
|
|
190
|
+
# Worktree activity
|
|
191
|
+
git -C <worktree> log --oneline -5 # any new commits?
|
|
192
|
+
git -C <worktree> status # uncommitted work?
|
|
193
|
+
ls <worktree>/REVIEW_FINDINGS.md # for review loops
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Abort a dispatch cleanly
|
|
197
|
+
|
|
198
|
+
A dead dispatch needs four cleanup steps (no single facade does all of them today):
|
|
199
|
+
|
|
200
|
+
```text
|
|
201
|
+
1. Stop-Process -Id <pid> # if pid still alive
|
|
202
|
+
2. bclaw_loop(intent="close", loop_id="<lop>", status="cancelled", reason="...")
|
|
203
|
+
3. bclaw_release_claim(id="<clm>")
|
|
204
|
+
4. (optional) bclaw_assignment_admin or leave assignment as `offered`
|
|
205
|
+
— only the owning agent can transition assignment.status, and a
|
|
206
|
+
released claim already makes it effectively orphan
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
---
|
|
210
|
+
|
|
211
|
+
## Per-agent spawn semantics
|
|
212
|
+
|
|
213
|
+
Spawn behaviour varies by agent. The capability profile in `src/core/agent-capability.ts` describes each agent's prompt delivery, sandbox model, and MCP availability. Per-agent caveats:
|
|
214
|
+
|
|
215
|
+
- [codex.md](../integrations/codex.md#caveats) — `--sandbox workspace-write` required; spawned codex may not have brainclaw MCP wired; stdin_pipe prompt delivery; brief-ack required for headless dispatch detection.
|
|
216
|
+
- [claude-code.md](../integrations/claude-code.md) — interactive vs `-p` headless modes; tools whitelist.
|
|
217
|
+
- [copilot.md](../integrations/copilot.md), [windsurf.md](../integrations/windsurf.md), [cline.md](../integrations/cline.md), [opencode.md](../integrations/opencode.md), [roo.md](../integrations/roo.md), [kilocode.md](../integrations/kilocode.md), [continue.md](../integrations/continue.md) — per-agent specifics.
|
|
218
|
+
- [mistral-vibe.md](../integrations/mistral-vibe.md) — EU/GDPR self-hosted option.
|
|
219
|
+
|
|
220
|
+
---
|
|
221
|
+
|
|
222
|
+
## See also
|
|
223
|
+
|
|
224
|
+
- [troubleshooting.md](troubleshooting.md) — symptom-driven diagnostic playbooks
|
|
225
|
+
- [loop-engine.md](loop-engine.md) — multi-turn loop protocol, locks, advance gates
|
|
226
|
+
- [multi-agent-workflows.md](multi-agent-workflows.md) — high-level coordination scenarios
|
|
227
|
+
- [../integrations/overview.md](../integrations/overview.md) — index of supported agents
|
|
228
|
+
- [../integrations/mcp.md](../integrations/mcp.md) — full MCP tool catalog
|
|
@@ -163,6 +163,60 @@ interface LoopConflictRecord {
|
|
|
163
163
|
}
|
|
164
164
|
```
|
|
165
165
|
|
|
166
|
+
## Artifact body shapes
|
|
167
|
+
|
|
168
|
+
`LoopArtifact.body` has two known shape categories. Ref-based bodies keep large
|
|
169
|
+
content out of the loop thread JSON and store only file metadata in `body`.
|
|
170
|
+
Inline bodies keep the whole structured payload in `body` for small artifacts
|
|
171
|
+
such as operator questions and answers.
|
|
172
|
+
|
|
173
|
+
Ref-based bodies are JSON encoded as `RefBasedArtifactBody`:
|
|
174
|
+
|
|
175
|
+
- `ref`: string filename within the loop's `artifacts/` directory.
|
|
176
|
+
- `byte_count`: exact byte length of the referenced file at attach time.
|
|
177
|
+
- `sha256`: lowercase hex SHA-256 digest of the referenced file content.
|
|
178
|
+
|
|
179
|
+
The referenced file lives at
|
|
180
|
+
`.brainclaw/loops/threads/<loop_id>/artifacts/<ref>`. The champion or driver
|
|
181
|
+
code that calls `complete_turn` / `add_artifact` is responsible for writing the
|
|
182
|
+
file before or during the attach call, then attaching only
|
|
183
|
+
`JSON.stringify({ ref, byte_count, sha256 })` as the artifact body.
|
|
184
|
+
|
|
185
|
+
These artifact types use the ref-based shape:
|
|
186
|
+
|
|
187
|
+
- `signals_report`: structured discovery or bootstrap signals, often larger
|
|
188
|
+
than the inline body cap.
|
|
189
|
+
- `project_md_draft`: draft `PROJECT.md` content prepared by a loop slot.
|
|
190
|
+
- `project_md_final`: final `PROJECT.md` content accepted by the loop.
|
|
191
|
+
- `file_diff`: unified diff or patch content produced for review or apply.
|
|
192
|
+
|
|
193
|
+
Typical attach flow:
|
|
194
|
+
|
|
195
|
+
```ts
|
|
196
|
+
const body = '<content>';
|
|
197
|
+
const ref = `<artifact-id>.<ext>`;
|
|
198
|
+
const artifactsDir = path.join(memoryDir(cwd), 'loops', 'threads', loopId, 'artifacts');
|
|
199
|
+
fs.mkdirSync(artifactsDir, { recursive: true });
|
|
200
|
+
fs.writeFileSync(path.join(artifactsDir, ref), body, 'utf8');
|
|
201
|
+
const byte_count = Buffer.byteLength(body, 'utf8');
|
|
202
|
+
const sha256 = crypto.createHash('sha256').update(body, 'utf8').digest('hex');
|
|
203
|
+
complete_turn(
|
|
204
|
+
{
|
|
205
|
+
...,
|
|
206
|
+
artifact: {
|
|
207
|
+
phase,
|
|
208
|
+
type,
|
|
209
|
+
body: JSON.stringify({ ref, byte_count, sha256 }),
|
|
210
|
+
},
|
|
211
|
+
},
|
|
212
|
+
cwd,
|
|
213
|
+
);
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
`RefBasedArtifactBodySchema` in `src/core/loops/types.ts` is the authoritative
|
|
217
|
+
validator for this metadata shape. `KNOWN_ARTIFACT_BODY_SCHEMAS` in the same
|
|
218
|
+
file lists which artifact types are ref-based and which use inline JSON bodies.
|
|
219
|
+
|
|
166
220
|
## Lifecycle verbs
|
|
167
221
|
|
|
168
222
|
The engine exposes four active verbs. Each one mutates state, appends an event, and returns the updated `LoopThread`. **All verbs are strictly synchronous-on-state and asynchronous-on-work**: any downstream dispatch (spawning a CLI, calling another MCP tool) is fire-and-forget from the commit window, so the per-loop lock is always released quickly.
|
|
@@ -450,6 +504,7 @@ Status after Codex schema review (cnd#574 / `dec_be66ccbf`, verdict `needs_revis
|
|
|
450
504
|
|
|
451
505
|
- [plans-and-claims.md](plans-and-claims.md)
|
|
452
506
|
- [coordination.md](coordination.md)
|
|
507
|
+
- [dispatch-lifecycle.md](dispatch-lifecycle.md) — entity FSMs (loop / assignment / agent_run / claim), brief-ack semantics, log-file diagnostic playbook
|
|
453
508
|
- [runtime-notes.md](runtime-notes.md)
|
|
454
509
|
- pln#394 `feat/loop-engine-mvp`
|
|
455
510
|
- pln#395 `feat/review-loop-protocol`
|