@mclawnet/swarm 0.1.11 → 0.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/dynamic-add-integration.test.d.ts +2 -0
- package/dist/__tests__/dynamic-add-integration.test.d.ts.map +1 -0
- package/dist/__tests__/dynamic-add-integration.test.js +108 -0
- package/dist/__tests__/dynamic-add-integration.test.js.map +1 -0
- package/dist/__tests__/inbox-store-notbefore.test.d.ts +2 -0
- package/dist/__tests__/inbox-store-notbefore.test.d.ts.map +1 -0
- package/dist/__tests__/inbox-store-notbefore.test.js +47 -0
- package/dist/__tests__/inbox-store-notbefore.test.js.map +1 -0
- package/dist/__tests__/inbox-store-swarmid-validation.test.d.ts +2 -0
- package/dist/__tests__/inbox-store-swarmid-validation.test.d.ts.map +1 -0
- package/dist/__tests__/inbox-store-swarmid-validation.test.js +32 -0
- package/dist/__tests__/inbox-store-swarmid-validation.test.js.map +1 -0
- package/dist/__tests__/membership-change.test.d.ts +2 -0
- package/dist/__tests__/membership-change.test.d.ts.map +1 -0
- package/dist/__tests__/membership-change.test.js +132 -0
- package/dist/__tests__/membership-change.test.js.map +1 -0
- package/dist/__tests__/persistence.test.js +93 -4
- package/dist/__tests__/persistence.test.js.map +1 -1
- package/dist/__tests__/phase4-5-e2e.test.js +7 -7
- package/dist/__tests__/phase4-5-e2e.test.js.map +1 -1
- package/dist/__tests__/phase6-7-e2e.test.js +10 -7
- package/dist/__tests__/phase6-7-e2e.test.js.map +1 -1
- package/dist/__tests__/plan-sync.test.d.ts +2 -0
- package/dist/__tests__/plan-sync.test.d.ts.map +1 -0
- package/dist/__tests__/plan-sync.test.js +30 -0
- package/dist/__tests__/plan-sync.test.js.map +1 -0
- package/dist/__tests__/projects-fs.test.js +30 -0
- package/dist/__tests__/projects-fs.test.js.map +1 -1
- package/dist/__tests__/recovery-forwards-to-coordinator.test.js +1 -1
- package/dist/__tests__/recovery-forwards-to-coordinator.test.js.map +1 -1
- package/dist/__tests__/recovery-resume.test.js +15 -5
- package/dist/__tests__/recovery-resume.test.js.map +1 -1
- package/dist/__tests__/role-loader-editor.test.d.ts +2 -0
- package/dist/__tests__/role-loader-editor.test.d.ts.map +1 -0
- package/dist/__tests__/role-loader-editor.test.js +168 -0
- package/dist/__tests__/role-loader-editor.test.js.map +1 -0
- package/dist/__tests__/role-loader.test.js +9 -0
- package/dist/__tests__/role-loader.test.js.map +1 -1
- package/dist/__tests__/spawn-role-injects-briefings.test.js +40 -28
- package/dist/__tests__/spawn-role-injects-briefings.test.js.map +1 -1
- package/dist/__tests__/spawn-role-mutex.test.d.ts +2 -0
- package/dist/__tests__/spawn-role-mutex.test.d.ts.map +1 -0
- package/dist/__tests__/spawn-role-mutex.test.js +158 -0
- package/dist/__tests__/spawn-role-mutex.test.js.map +1 -0
- package/dist/__tests__/spawn-role-rollback.test.d.ts +2 -0
- package/dist/__tests__/spawn-role-rollback.test.d.ts.map +1 -0
- package/dist/__tests__/spawn-role-rollback.test.js +180 -0
- package/dist/__tests__/spawn-role-rollback.test.js.map +1 -0
- package/dist/__tests__/swarm-coordinator-backend.test.d.ts +2 -0
- package/dist/__tests__/swarm-coordinator-backend.test.d.ts.map +1 -0
- package/dist/__tests__/swarm-coordinator-backend.test.js +334 -0
- package/dist/__tests__/swarm-coordinator-backend.test.js.map +1 -0
- package/dist/__tests__/swarm-coordinator-init.test.js +2 -2
- package/dist/__tests__/swarm-coordinator-init.test.js.map +1 -1
- package/dist/__tests__/swarm-coordinator-plan-sync.test.d.ts +2 -0
- package/dist/__tests__/swarm-coordinator-plan-sync.test.d.ts.map +1 -0
- package/dist/__tests__/swarm-coordinator-plan-sync.test.js +263 -0
- package/dist/__tests__/swarm-coordinator-plan-sync.test.js.map +1 -0
- package/dist/__tests__/swarm-coordinator-resume.test.js +27 -17
- package/dist/__tests__/swarm-coordinator-resume.test.js.map +1 -1
- package/dist/__tests__/swarm-coordinator-roleId.test.js +24 -9
- package/dist/__tests__/swarm-coordinator-roleId.test.js.map +1 -1
- package/dist/__tests__/sync-plan-status.test.d.ts +2 -0
- package/dist/__tests__/sync-plan-status.test.d.ts.map +1 -0
- package/dist/__tests__/sync-plan-status.test.js +198 -0
- package/dist/__tests__/sync-plan-status.test.js.map +1 -0
- package/dist/__tests__/template-loader-editor.test.d.ts +2 -0
- package/dist/__tests__/template-loader-editor.test.d.ts.map +1 -0
- package/dist/__tests__/template-loader-editor.test.js +156 -0
- package/dist/__tests__/template-loader-editor.test.js.map +1 -0
- package/dist/__tests__/wakeup-end-to-end.test.d.ts +2 -0
- package/dist/__tests__/wakeup-end-to-end.test.d.ts.map +1 -0
- package/dist/__tests__/wakeup-end-to-end.test.js +80 -0
- package/dist/__tests__/wakeup-end-to-end.test.js.map +1 -0
- package/dist/__tests__/wakeup-scheduler-restore-fires.test.d.ts +2 -0
- package/dist/__tests__/wakeup-scheduler-restore-fires.test.d.ts.map +1 -0
- package/dist/__tests__/wakeup-scheduler-restore-fires.test.js +33 -0
- package/dist/__tests__/wakeup-scheduler-restore-fires.test.js.map +1 -0
- package/dist/__tests__/wakeup-scheduler-restore.test.d.ts +2 -0
- package/dist/__tests__/wakeup-scheduler-restore.test.d.ts.map +1 -0
- package/dist/__tests__/wakeup-scheduler-restore.test.js +62 -0
- package/dist/__tests__/wakeup-scheduler-restore.test.js.map +1 -0
- package/dist/__tests__/wakeup-scheduler.test.d.ts +2 -0
- package/dist/__tests__/wakeup-scheduler.test.d.ts.map +1 -0
- package/dist/__tests__/wakeup-scheduler.test.js +65 -0
- package/dist/__tests__/wakeup-scheduler.test.js.map +1 -0
- package/dist/__tests__/watch-manager.test.d.ts +2 -0
- package/dist/__tests__/watch-manager.test.d.ts.map +1 -0
- package/dist/__tests__/watch-manager.test.js +203 -0
- package/dist/__tests__/watch-manager.test.js.map +1 -0
- package/dist/inbox-store.d.ts +7 -0
- package/dist/inbox-store.d.ts.map +1 -1
- package/dist/inbox-store.js +8 -1
- package/dist/inbox-store.js.map +1 -1
- package/dist/index.d.ts +7 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -2
- package/dist/index.js.map +1 -1
- package/dist/persistence.d.ts +5 -1
- package/dist/persistence.d.ts.map +1 -1
- package/dist/persistence.js +4 -1
- package/dist/persistence.js.map +1 -1
- package/dist/plan-sync.d.ts +10 -0
- package/dist/plan-sync.d.ts.map +1 -0
- package/dist/plan-sync.js +37 -0
- package/dist/plan-sync.js.map +1 -0
- package/dist/projects-fs.d.ts +2 -0
- package/dist/projects-fs.d.ts.map +1 -1
- package/dist/projects-fs.js +1 -0
- package/dist/projects-fs.js.map +1 -1
- package/dist/recovery.d.ts +1 -1
- package/dist/recovery.js +1 -1
- package/dist/roles/role-loader.d.ts +46 -2
- package/dist/roles/role-loader.d.ts.map +1 -1
- package/dist/roles/role-loader.js +139 -20
- package/dist/roles/role-loader.js.map +1 -1
- package/dist/roles/types.d.ts +13 -0
- package/dist/roles/types.d.ts.map +1 -1
- package/dist/swarm-coordinator.d.ts +87 -9
- package/dist/swarm-coordinator.d.ts.map +1 -1
- package/dist/swarm-coordinator.js +392 -60
- package/dist/swarm-coordinator.js.map +1 -1
- package/dist/templates/template-loader.d.ts +42 -2
- package/dist/templates/template-loader.d.ts.map +1 -1
- package/dist/templates/template-loader.js +138 -17
- package/dist/templates/template-loader.js.map +1 -1
- package/dist/types.d.ts +25 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/wakeup-scheduler.d.ts +36 -0
- package/dist/wakeup-scheduler.d.ts.map +1 -0
- package/dist/wakeup-scheduler.js +107 -0
- package/dist/wakeup-scheduler.js.map +1 -0
- package/dist/watch-manager.d.ts +32 -0
- package/dist/watch-manager.d.ts.map +1 -0
- package/dist/watch-manager.js +153 -0
- package/dist/watch-manager.js.map +1 -0
- package/package.json +6 -6
- package/roles/queen.md +26 -0
- package/dist/message-router.d.ts +0 -37
- package/dist/message-router.d.ts.map +0 -1
- package/dist/message-router.js +0 -60
- package/dist/message-router.js.map +0 -1
|
@@ -11,10 +11,12 @@ import { InboxWatcher } from "./inbox-watcher.js";
|
|
|
11
11
|
import { randomUUID } from "node:crypto";
|
|
12
12
|
import { EvolutionPipeline } from "@mclawnet/skill-manager";
|
|
13
13
|
import { TaskStore, computeLeadBriefing, computeMemberBriefing, computeTaskBriefing, formatLeadBriefing, formatMemberBriefing, formatTaskBriefing, projectRoot } from "@mclawnet/task";
|
|
14
|
+
import { pickStrongestStatus } from "./plan-sync.js";
|
|
14
15
|
import { existsSync } from "node:fs";
|
|
15
16
|
import { homedir } from "node:os";
|
|
16
17
|
import { join } from "node:path";
|
|
17
18
|
import { createLogger } from "@mclawnet/logger";
|
|
19
|
+
import { DEFAULT_SANDBOX } from "@mclawnet/shared";
|
|
18
20
|
const log = createLogger({ module: "swarm" });
|
|
19
21
|
// Queen periodic patrol: pure safety-net heartbeat now that crash detection
|
|
20
22
|
// is event-driven via handleRoleCrashed. Worker `task_set_status` /
|
|
@@ -49,6 +51,18 @@ export class SwarmCoordinator {
|
|
|
49
51
|
swarms = new Map();
|
|
50
52
|
inboxRelay;
|
|
51
53
|
inboxWatcher;
|
|
54
|
+
/**
|
|
55
|
+
* Per-swarm spawn mutex (PR#5). A simple Promise-chain serialises every
|
|
56
|
+
* `spawnRole` call against a given swarmId so the
|
|
57
|
+
* `nextInstanceSeq.get / set` read-modify-write pair is atomic from the
|
|
58
|
+
* caller's perspective. Without this, two concurrent dynamic adds (e.g.
|
|
59
|
+
* the UI clicking "+ 添加成员" twice quickly while a prior spawn awaits
|
|
60
|
+
* its backend createSession) both observe seq=N and collide on the
|
|
61
|
+
* resulting `dev-N` instanceId. The chain element is replaced on every
|
|
62
|
+
* call so completed entries garbage-collect naturally; we never delete
|
|
63
|
+
* the key (cheap one-entry-per-swarm and avoids races on cleanup).
|
|
64
|
+
*/
|
|
65
|
+
spawnLocks = new Map();
|
|
52
66
|
constructor(sessionAdapter, hub,
|
|
53
67
|
/**
|
|
54
68
|
* Optional factory that resolves a per-swarm TaskStore (workDir-scoped).
|
|
@@ -95,6 +109,85 @@ export class SwarmCoordinator {
|
|
|
95
109
|
}
|
|
96
110
|
return this.taskStoreFactory;
|
|
97
111
|
}
|
|
112
|
+
/**
|
|
113
|
+
* Persist swarm snapshot to recovery.json, first reconciling plan task
|
|
114
|
+
* statuses from the live TaskStore (Bug #2 方案 B). All save call sites
|
|
115
|
+
* MUST go through this wrapper instead of saveSwarmSnapshot directly so
|
|
116
|
+
* the on-disk plan never lies about task progress.
|
|
117
|
+
*
|
|
118
|
+
* Why this exists:
|
|
119
|
+
* `swarm.plan` is a snapshot of queen's initial output — queen sets
|
|
120
|
+
* each task.status to "pending" once and never updates the plan
|
|
121
|
+
* object. Without this sync, recovery.json would keep showing
|
|
122
|
+
* "pending" forever, misleading both the queen LLM (when re-reading
|
|
123
|
+
* its own plan after compaction) and human readers.
|
|
124
|
+
*/
|
|
125
|
+
persistSwarm(swarm) {
|
|
126
|
+
try {
|
|
127
|
+
this.syncPlanStatusFromTasks(swarm);
|
|
128
|
+
}
|
|
129
|
+
catch (err) {
|
|
130
|
+
// Sync failure must not block persistence — recovery.json is the
|
|
131
|
+
// critical artifact for kill+restart; a stale plan is far better
|
|
132
|
+
// than no snapshot at all.
|
|
133
|
+
log.warn({ err, swarmId: swarm.id }, "syncPlanStatusFromTasks failed, persisting anyway");
|
|
134
|
+
}
|
|
135
|
+
saveSwarmSnapshot(swarm);
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Match plan tasks to live task store entries via the explicit
|
|
139
|
+
* `planTaskId` foreign key (PR#1). Live tasks without a planTaskId are
|
|
140
|
+
* ignored — the queen is now responsible for tagging plan-derived tasks
|
|
141
|
+
* when she calls `task_create` / `task_create_from_message`. When multiple
|
|
142
|
+
* live tasks share the same planTaskId (e.g. queen retried a failure),
|
|
143
|
+
* `pickStrongestStatus` folds them: completed > in_progress > pending >
|
|
144
|
+
* cancelled.
|
|
145
|
+
*
|
|
146
|
+
* Replaces the prior subject-prefix heuristic (PR #73 / Bug #2 方案 B),
|
|
147
|
+
* which was fragile to queens rephrasing subjects and t1↔t10 collisions.
|
|
148
|
+
*/
|
|
149
|
+
syncPlanStatusFromTasks(swarm) {
|
|
150
|
+
const plan = swarm.plan;
|
|
151
|
+
if (!plan || !plan.phases || !swarm.workDir)
|
|
152
|
+
return;
|
|
153
|
+
const taskStore = this.resolveTaskStore(swarm.workDir);
|
|
154
|
+
// No-op when no TaskStore is wired (e.g. unit tests with bare
|
|
155
|
+
// construction, or callers that intentionally opted out of DI).
|
|
156
|
+
// Plan stays with queen's original statuses — a stale display is
|
|
157
|
+
// strictly better than a runtime crash here.
|
|
158
|
+
if (!taskStore)
|
|
159
|
+
return;
|
|
160
|
+
const liveTasks = taskStore.listBySwarm(swarm.id);
|
|
161
|
+
if (liveTasks.length === 0)
|
|
162
|
+
return;
|
|
163
|
+
// Group live tasks by planTaskId. Tasks without a planTaskId are
|
|
164
|
+
// intentionally skipped — they are ad-hoc work that doesn't belong to
|
|
165
|
+
// any plan node, and silently subject-matching them would re-introduce
|
|
166
|
+
// the very ambiguity this PR removes.
|
|
167
|
+
const byPlanId = new Map();
|
|
168
|
+
for (const t of liveTasks) {
|
|
169
|
+
if (!t.planTaskId)
|
|
170
|
+
continue;
|
|
171
|
+
const arr = byPlanId.get(t.planTaskId) ?? [];
|
|
172
|
+
arr.push(t);
|
|
173
|
+
byPlanId.set(t.planTaskId, arr);
|
|
174
|
+
}
|
|
175
|
+
let synced = 0;
|
|
176
|
+
let total = 0;
|
|
177
|
+
for (const phase of plan.phases) {
|
|
178
|
+
if (!phase.tasks)
|
|
179
|
+
continue;
|
|
180
|
+
for (const task of phase.tasks) {
|
|
181
|
+
total++;
|
|
182
|
+
const matches = byPlanId.get(task.id);
|
|
183
|
+
if (!matches || matches.length === 0)
|
|
184
|
+
continue;
|
|
185
|
+
task.status = pickStrongestStatus(matches.map((t) => t.status));
|
|
186
|
+
synced++;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
log.debug({ swarmId: swarm.id, planTasksTotal: total, planTasksSynced: synced }, "syncPlanStatusFromTasks");
|
|
190
|
+
}
|
|
98
191
|
// ── Public API ──────────────────────────────────────────────────────
|
|
99
192
|
/** Create a new swarm with two-phase initialization. */
|
|
100
193
|
async create(swarmSessionId, options) {
|
|
@@ -106,6 +199,38 @@ export class SwarmCoordinator {
|
|
|
106
199
|
if (options.templateName) {
|
|
107
200
|
const tpl = loadTemplate(options.templateName);
|
|
108
201
|
roleSpecs = tpl.roles.map((r) => ({ roleName: r.roleName, count: r.count, eager: r.eager }));
|
|
202
|
+
// Overlay per-role backend from options.roles (UI selector). To stay
|
|
203
|
+
// correct under duplicate roleNames in the template (e.g. two
|
|
204
|
+
// {roleName:"developer"} entries), walk options.roles in order and
|
|
205
|
+
// claim the FIRST template spec whose name matches AND whose backend
|
|
206
|
+
// hasn't been overlaid yet.
|
|
207
|
+
//
|
|
208
|
+
// Note on UI/coordinator asymmetry: SwarmConfigInline collapses duplicate
|
|
209
|
+
// roleNames in its selection list via `.find()`, so today the UI only
|
|
210
|
+
// ever sends one entry per roleName and at most one slot gets overlaid
|
|
211
|
+
// here. This positional-claim algorithm is intentionally more general so
|
|
212
|
+
// non-UI callers (tests, future bulk APIs) can pass multiple entries with
|
|
213
|
+
// the same roleName and get distinct overlays.
|
|
214
|
+
if (options.roles) {
|
|
215
|
+
const claimed = new Set();
|
|
216
|
+
for (const uiRole of options.roles) {
|
|
217
|
+
const idx = roleSpecs.findIndex((s, i) => !claimed.has(i) && s.roleName === uiRole.roleName);
|
|
218
|
+
if (idx >= 0) {
|
|
219
|
+
claimed.add(idx);
|
|
220
|
+
if (uiRole.backend) {
|
|
221
|
+
roleSpecs[idx] = { ...roleSpecs[idx], backend: uiRole.backend };
|
|
222
|
+
}
|
|
223
|
+
// Mirror backend overlay for sandbox so user's per-role choice in
|
|
224
|
+
// SwarmConfigInline reaches spawnRole → role.definition.sandbox →
|
|
225
|
+
// persistence.json. Without this, only dynamically-added roles
|
|
226
|
+
// ever carried `sandbox`; the 6 initial roles silently dropped to
|
|
227
|
+
// undefined and recovery.json had no record of the user's pick.
|
|
228
|
+
if (uiRole.sandbox) {
|
|
229
|
+
roleSpecs[idx] = { ...roleSpecs[idx], sandbox: uiRole.sandbox };
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
109
234
|
}
|
|
110
235
|
else {
|
|
111
236
|
roleSpecs = options.roles ?? [];
|
|
@@ -115,6 +240,7 @@ export class SwarmCoordinator {
|
|
|
115
240
|
hubSessionId: swarmSessionId,
|
|
116
241
|
workDir: options.workDir,
|
|
117
242
|
teamName: options.templateName,
|
|
243
|
+
displayName: options.displayName,
|
|
118
244
|
roles: new Map(),
|
|
119
245
|
plan: null,
|
|
120
246
|
nextInstanceSeq: new Map(),
|
|
@@ -125,7 +251,6 @@ export class SwarmCoordinator {
|
|
|
125
251
|
planStatus: "none",
|
|
126
252
|
};
|
|
127
253
|
// Store pending role specs for on-demand spawning
|
|
128
|
-
swarm._pendingRoleSpecs = roleSpecs;
|
|
129
254
|
this.swarms.set(swarmSessionId, swarm);
|
|
130
255
|
// Snapshot may already exist from a prior run (continuation, or
|
|
131
256
|
// restart-after-crash with the same swarmId). Record this BEFORE any
|
|
@@ -153,7 +278,12 @@ export class SwarmCoordinator {
|
|
|
153
278
|
for (const spec of eagerSpecs) {
|
|
154
279
|
const count = spec.count ?? 1;
|
|
155
280
|
for (let i = 0; i < count; i++) {
|
|
156
|
-
const role = await this.spawnRole(swarmSessionId, spec.roleName,
|
|
281
|
+
const role = await this.spawnRole(swarmSessionId, spec.roleName, {
|
|
282
|
+
customPrompt: spec.customPrompt,
|
|
283
|
+
customDefinition: spec.customDefinition,
|
|
284
|
+
backendOverride: spec.backend,
|
|
285
|
+
sandboxOverride: spec.sandbox,
|
|
286
|
+
});
|
|
157
287
|
trackOpen(role);
|
|
158
288
|
}
|
|
159
289
|
}
|
|
@@ -163,13 +293,12 @@ export class SwarmCoordinator {
|
|
|
163
293
|
swarm.status = "running";
|
|
164
294
|
const queen = this.findQueen(swarm);
|
|
165
295
|
if (queen) {
|
|
166
|
-
|
|
167
|
-
//
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
});
|
|
296
|
+
// Initial spawn delivers a "成员变更" envelope listing every just-spawned
|
|
297
|
+
// role as "+ 新增". Refactored from the prior bespoke "蜂群初始化完成"
|
|
298
|
+
// envelope so dynamic adds (PR#5) reuse exactly the same protocol;
|
|
299
|
+
// queen-side handling stays uniform across init vs. runtime adds.
|
|
300
|
+
const initiallyAdded = [...swarm.roles.values()].filter((r) => r.definition.type !== "queen");
|
|
301
|
+
await this.notifyMembershipChange(swarm, { added: initiallyAdded });
|
|
173
302
|
// Inject continuation context from previous run if applicable
|
|
174
303
|
if (options.isContinuation) {
|
|
175
304
|
const summary = this.buildContinuationSummary(swarmSessionId);
|
|
@@ -192,7 +321,7 @@ export class SwarmCoordinator {
|
|
|
192
321
|
}
|
|
193
322
|
log.info({ swarmId: swarmSessionId, roleCount: swarm.roles.size }, "swarm created");
|
|
194
323
|
// Persistence: save initial snapshot
|
|
195
|
-
|
|
324
|
+
this.persistSwarm(swarm);
|
|
196
325
|
// Start inbox watcher to react to inbox file changes (best-effort).
|
|
197
326
|
if (options.workDir) {
|
|
198
327
|
try {
|
|
@@ -262,8 +391,11 @@ export class SwarmCoordinator {
|
|
|
262
391
|
const { swarm, role } = this.findByRoleSessionId(roleSessionId);
|
|
263
392
|
if (!swarm || !role)
|
|
264
393
|
return false;
|
|
394
|
+
// Output is already normalized to Claude SDK shape by
|
|
395
|
+
// SessionManager's onOutput gateway (see normalize-backend-output.ts).
|
|
396
|
+
const event = data;
|
|
265
397
|
// Extract text content from the streaming event
|
|
266
|
-
const text = extractTextFromEvent(
|
|
398
|
+
const text = extractTextFromEvent(event);
|
|
267
399
|
// Parse swarm action blocks
|
|
268
400
|
if (text) {
|
|
269
401
|
// Legacy-format safety net: if a reviewer regresses to the old
|
|
@@ -289,7 +421,7 @@ export class SwarmCoordinator {
|
|
|
289
421
|
if (plan) {
|
|
290
422
|
swarm.plan = plan;
|
|
291
423
|
swarm.planStatus = "draft";
|
|
292
|
-
|
|
424
|
+
this.persistSwarm(swarm);
|
|
293
425
|
this.sendStatusUpdate(swarm);
|
|
294
426
|
log.info({ swarmId: swarm.id, instanceId: role.instanceId }, "plan updated (draft)");
|
|
295
427
|
this.requestPlanReview(swarm, plan).catch((err) => {
|
|
@@ -307,7 +439,7 @@ export class SwarmCoordinator {
|
|
|
307
439
|
sessionId: swarm.hubSessionId,
|
|
308
440
|
instanceId: role.instanceId,
|
|
309
441
|
roleName: role.roleName,
|
|
310
|
-
data,
|
|
442
|
+
data: event,
|
|
311
443
|
});
|
|
312
444
|
return true;
|
|
313
445
|
}
|
|
@@ -321,18 +453,18 @@ export class SwarmCoordinator {
|
|
|
321
453
|
return false;
|
|
322
454
|
// Update role status
|
|
323
455
|
role.status = "idle";
|
|
324
|
-
// Persist per-role
|
|
456
|
+
// Persist per-role backendSessionId — turn_complete frame carries the
|
|
325
457
|
// backend's real session UUID. We need it so a future restart can
|
|
326
458
|
// `--resume` this exact role's conversation (Task 4 / Phase 4-5).
|
|
327
|
-
if (info.
|
|
328
|
-
role.
|
|
459
|
+
if (info.backendSessionId && role.backendSessionId !== info.backendSessionId) {
|
|
460
|
+
role.backendSessionId = info.backendSessionId;
|
|
329
461
|
// saveSwarmSnapshot is sync-fire (proper-lockfile internally async);
|
|
330
462
|
// call directly — failures should not break turn completion.
|
|
331
463
|
try {
|
|
332
|
-
|
|
464
|
+
this.persistSwarm(swarm);
|
|
333
465
|
}
|
|
334
466
|
catch (err) {
|
|
335
|
-
log.warn({ err, swarmId: swarm.id, instanceId: role.instanceId }, "failed to persist
|
|
467
|
+
log.warn({ err, swarmId: swarm.id, instanceId: role.instanceId }, "failed to persist backendSessionId on turn complete");
|
|
336
468
|
}
|
|
337
469
|
}
|
|
338
470
|
// Settle inbox echoes for this turn (fire-and-forget).
|
|
@@ -349,37 +481,55 @@ export class SwarmCoordinator {
|
|
|
349
481
|
return true;
|
|
350
482
|
}
|
|
351
483
|
/**
|
|
352
|
-
* Persist the per-role
|
|
484
|
+
* Persist the per-role backendSessionId immediately (e.g. from `system/init`
|
|
353
485
|
* frame, before the first turn_complete). Returns true if a role was found
|
|
354
486
|
* and updated; false otherwise (no-op for non-swarm sessions).
|
|
355
487
|
*/
|
|
356
|
-
|
|
488
|
+
setRoleBackendSessionId(swarmId, instanceId, backendSessionId) {
|
|
357
489
|
const swarm = this.swarms.get(swarmId);
|
|
358
490
|
if (!swarm)
|
|
359
491
|
return false;
|
|
360
492
|
const role = swarm.roles.get(instanceId);
|
|
361
493
|
if (!role)
|
|
362
494
|
return false;
|
|
363
|
-
if (role.
|
|
495
|
+
if (role.backendSessionId === backendSessionId)
|
|
364
496
|
return true;
|
|
365
|
-
role.
|
|
497
|
+
role.backendSessionId = backendSessionId;
|
|
366
498
|
try {
|
|
367
|
-
|
|
499
|
+
this.persistSwarm(swarm);
|
|
368
500
|
}
|
|
369
501
|
catch (err) {
|
|
370
|
-
log.warn({ err, swarmId, instanceId }, "failed to persist
|
|
502
|
+
log.warn({ err, swarmId, instanceId }, "failed to persist backendSessionId");
|
|
371
503
|
}
|
|
372
504
|
return true;
|
|
373
505
|
}
|
|
374
|
-
/** Convenience: same as
|
|
375
|
-
|
|
506
|
+
/** Convenience: same as setRoleBackendSessionId but takes the `${swarmId}::${instanceId}` roleSessionId. */
|
|
507
|
+
setRoleBackendSessionIdBySession(roleSessionId, backendSessionId) {
|
|
376
508
|
const { swarm, role } = this.findByRoleSessionId(roleSessionId);
|
|
377
509
|
if (!swarm || !role)
|
|
378
510
|
return false;
|
|
379
|
-
return this.
|
|
511
|
+
return this.setRoleBackendSessionId(swarm.id, role.instanceId, backendSessionId);
|
|
380
512
|
}
|
|
381
|
-
/**
|
|
382
|
-
|
|
513
|
+
/**
|
|
514
|
+
* Public spawnRole entry point. Wraps the real body in a per-swarm
|
|
515
|
+
* Promise-chain mutex (see `spawnLocks`) so concurrent calls against the
|
|
516
|
+
* same swarmId serialise — preventing nextInstanceSeq collisions on
|
|
517
|
+
* parallel dynamic adds. Calls against different swarmIds run in
|
|
518
|
+
* parallel as before.
|
|
519
|
+
*/
|
|
520
|
+
async spawnRole(swarmId, roleName, opts = {}) {
|
|
521
|
+
const prev = this.spawnLocks.get(swarmId) ?? Promise.resolve();
|
|
522
|
+
// `.catch(() => {})` here ensures a previous spawn's rejection does NOT
|
|
523
|
+
// poison subsequent waiters — they only need ORDER guarantees, not
|
|
524
|
+
// shared success/failure semantics. Each call's own rejection still
|
|
525
|
+
// surfaces through `next` to its own awaiter.
|
|
526
|
+
const next = prev.catch(() => undefined).then(() => this.doSpawnRole(swarmId, roleName, opts));
|
|
527
|
+
this.spawnLocks.set(swarmId, next);
|
|
528
|
+
return next;
|
|
529
|
+
}
|
|
530
|
+
/** Spawn a new role instance in a swarm (real implementation, serialized by spawnRole). */
|
|
531
|
+
async doSpawnRole(swarmId, roleName, opts = {}) {
|
|
532
|
+
const { taskPrompt, customPrompt, customDefinition, additionalDirs, resumeId, presetInstanceId, backendOverride, sandboxOverride, isDynamicAdd, } = opts;
|
|
383
533
|
const swarm = this.swarms.get(swarmId);
|
|
384
534
|
if (!swarm)
|
|
385
535
|
throw new Error(`Swarm ${swarmId} not found`);
|
|
@@ -394,6 +544,7 @@ export class SwarmCoordinator {
|
|
|
394
544
|
capabilities: customDefinition.capabilities ?? [],
|
|
395
545
|
color: customDefinition.color,
|
|
396
546
|
promptBody: customDefinition.promptBody,
|
|
547
|
+
backend: customDefinition.backend,
|
|
397
548
|
};
|
|
398
549
|
}
|
|
399
550
|
else {
|
|
@@ -403,10 +554,36 @@ export class SwarmCoordinator {
|
|
|
403
554
|
if (customPrompt) {
|
|
404
555
|
definition.promptBody = customPrompt;
|
|
405
556
|
}
|
|
557
|
+
// Per-spawn backend override (UI selector) takes precedence over the
|
|
558
|
+
// role definition's static backend. Frozen here for the lifetime of the
|
|
559
|
+
// role instance.
|
|
560
|
+
if (backendOverride) {
|
|
561
|
+
definition = { ...definition, backend: backendOverride };
|
|
562
|
+
}
|
|
563
|
+
// Per-spawn sandbox override — same precedence rule. Each adapter
|
|
564
|
+
// maps the abstract level to its own permission model (see
|
|
565
|
+
// SpawnOptions.sandbox in @mclawnet/agent).
|
|
566
|
+
if (sandboxOverride) {
|
|
567
|
+
definition = { ...definition, sandbox: sandboxOverride };
|
|
568
|
+
}
|
|
569
|
+
// Final default: role files don't currently declare `sandbox`, and not
|
|
570
|
+
// every caller passes an override (e.g. legacy recovery, programmatic
|
|
571
|
+
// spawns). Pin to DEFAULT_SANDBOX so snapshots and the membership-change
|
|
572
|
+
// envelope always show a concrete level instead of `undefined`.
|
|
573
|
+
if (!definition.sandbox) {
|
|
574
|
+
definition = { ...definition, sandbox: DEFAULT_SANDBOX };
|
|
575
|
+
}
|
|
576
|
+
log.info({
|
|
577
|
+
swarmId,
|
|
578
|
+
roleName,
|
|
579
|
+
backendOverride,
|
|
580
|
+
definitionBackend: definition.backend,
|
|
581
|
+
hasCustomDefinition: !!customDefinition,
|
|
582
|
+
}, "spawnRole: backend resolved");
|
|
406
583
|
let instanceId;
|
|
407
584
|
if (presetInstanceId) {
|
|
408
585
|
// Recovery path: preserve the original instanceId so per-role state
|
|
409
|
-
// (logs, inbox,
|
|
586
|
+
// (logs, inbox, backendSessionId) lines up with prior snapshot.
|
|
410
587
|
instanceId = presetInstanceId;
|
|
411
588
|
}
|
|
412
589
|
else {
|
|
@@ -422,9 +599,12 @@ export class SwarmCoordinator {
|
|
|
422
599
|
roleSessionId,
|
|
423
600
|
status: "spawning",
|
|
424
601
|
currentTask: taskPrompt,
|
|
425
|
-
|
|
602
|
+
backendSessionId: resumeId,
|
|
426
603
|
};
|
|
427
604
|
swarm.roles.set(instanceId, roleInstance);
|
|
605
|
+
// Track whether we successfully opened the backend session so the
|
|
606
|
+
// dynamic-add rollback (below) knows whether to also kill the adapter.
|
|
607
|
+
let sessionOpened = false;
|
|
428
608
|
// Build role list for prompt
|
|
429
609
|
const roleList = this.buildRoleListString(swarm);
|
|
430
610
|
const systemPrompt = buildRolePrompt(definition, instanceId, roleList, {
|
|
@@ -451,29 +631,107 @@ export class SwarmCoordinator {
|
|
|
451
631
|
// Spawn Claude CLI process via SessionAdapter
|
|
452
632
|
// SessionManager handles memory injection (Pipeline A: memory prompt + roleId hint) via roleId
|
|
453
633
|
const tools = resolveRoleTools(definition);
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
634
|
+
try {
|
|
635
|
+
await this.sessionAdapter.createSession({
|
|
636
|
+
sessionId: roleSessionId,
|
|
637
|
+
workDir: swarm.workDir,
|
|
638
|
+
systemPrompt: finalPrompt,
|
|
639
|
+
roleId,
|
|
640
|
+
additionalDirs,
|
|
641
|
+
// Task 5: when recovering, the caller passes resumeId = role.backendSessionId
|
|
642
|
+
// so the Claude conversation continues with `--resume`. Fresh spawns
|
|
643
|
+
// (Task 3 default) leave it undefined for a new conversation.
|
|
644
|
+
resumeId,
|
|
645
|
+
allowedTools: tools.allowedTools,
|
|
646
|
+
disallowedTools: tools.disallowedTools,
|
|
647
|
+
backend: definition.backend,
|
|
648
|
+
sandbox: definition.sandbox,
|
|
649
|
+
});
|
|
650
|
+
sessionOpened = true;
|
|
651
|
+
}
|
|
652
|
+
catch (err) {
|
|
653
|
+
// Wrap the underlying backend error with role context so the user can
|
|
654
|
+
// pinpoint which role failed when (for example) codex CLI is missing.
|
|
655
|
+
// M3.S5 #5 acceptance: "未装 codex CLI 时配 codex role → 启动报错指明
|
|
656
|
+
// 哪个 role 缺 codex". Without this wrap, the surfaced message is
|
|
657
|
+
// "Failed to spawn codex CLI: ENOENT" with no role identifier.
|
|
658
|
+
const backendLabel = definition.backend ?? "claude";
|
|
659
|
+
const cause = err instanceof Error ? err.message : String(err);
|
|
660
|
+
const wrapped = new Error(`role ${roleName} (${instanceId}, backend=${backendLabel}) failed to spawn: ${cause}`);
|
|
661
|
+
// Preserve original error for callers that inspect stack/cause
|
|
662
|
+
wrapped.cause = err;
|
|
663
|
+
// Dynamic-add rollback (PR#5): the caller invoked spawnRole at runtime
|
|
664
|
+
// via swarm_add_role, not as part of the initial create() batch. The
|
|
665
|
+
// outer create() does not run cleanupPartialCreate for these calls, so
|
|
666
|
+
// we MUST locally undo the in-memory mutations (role row + seq bump)
|
|
667
|
+
// and free the freshly allocated instanceId for reuse. Initial-batch
|
|
668
|
+
// callers (isDynamicAdd=false) keep the legacy behaviour where the row
|
|
669
|
+
// stays in place and cleanupPartialCreate at the create() layer tears
|
|
670
|
+
// it down on outer failure — mixing the two would double-cleanup.
|
|
671
|
+
if (isDynamicAdd) {
|
|
672
|
+
try {
|
|
673
|
+
swarm.roles.delete(instanceId);
|
|
674
|
+
}
|
|
675
|
+
catch { /* ignore */ }
|
|
676
|
+
if (!presetInstanceId) {
|
|
677
|
+
const cur = swarm.nextInstanceSeq.get(roleName) ?? 0;
|
|
678
|
+
if (cur > 0)
|
|
679
|
+
swarm.nextInstanceSeq.set(roleName, cur - 1);
|
|
680
|
+
}
|
|
681
|
+
await this.notifyMembershipChangeFailed(swarm, roleName, wrapped);
|
|
682
|
+
}
|
|
683
|
+
throw wrapped;
|
|
684
|
+
}
|
|
467
685
|
roleInstance.status = "active";
|
|
468
686
|
// Persistence: save snapshot after role spawned. Skipped during recover()
|
|
469
687
|
// where the caller saves once at the end (avoids N writes for N roles).
|
|
470
688
|
if (!swarm._suppressSnapshot) {
|
|
471
|
-
|
|
689
|
+
try {
|
|
690
|
+
this.persistSwarm(swarm);
|
|
691
|
+
}
|
|
692
|
+
catch (err) {
|
|
693
|
+
// Dynamic-add rollback also covers persistSwarm failure — without
|
|
694
|
+
// this branch a disk-full / lockfile crash would leave an in-memory
|
|
695
|
+
// role with no snapshot, surfacing as a phantom worker on the next
|
|
696
|
+
// restart. Initial-batch callers continue to surface the error
|
|
697
|
+
// through cleanupPartialCreate.
|
|
698
|
+
if (isDynamicAdd) {
|
|
699
|
+
if (sessionOpened) {
|
|
700
|
+
try {
|
|
701
|
+
await this.sessionAdapter.closeSession(roleSessionId);
|
|
702
|
+
}
|
|
703
|
+
catch { /* ignore */ }
|
|
704
|
+
}
|
|
705
|
+
try {
|
|
706
|
+
swarm.roles.delete(instanceId);
|
|
707
|
+
}
|
|
708
|
+
catch { /* ignore */ }
|
|
709
|
+
if (!presetInstanceId) {
|
|
710
|
+
const cur = swarm.nextInstanceSeq.get(roleName) ?? 0;
|
|
711
|
+
if (cur > 0)
|
|
712
|
+
swarm.nextInstanceSeq.set(roleName, cur - 1);
|
|
713
|
+
}
|
|
714
|
+
await this.notifyMembershipChangeFailed(swarm, roleName, err);
|
|
715
|
+
}
|
|
716
|
+
throw err;
|
|
717
|
+
}
|
|
472
718
|
}
|
|
473
719
|
// Send swarm status update to Hub
|
|
474
720
|
this.sendStatusUpdate(swarm);
|
|
475
721
|
// Flush any pending inbox messages (fire-and-forget).
|
|
476
722
|
void this.inboxRelay.deliver(swarmId, instanceId);
|
|
723
|
+
// Dynamic-add: notify the queen via the unified membership-change
|
|
724
|
+
// protocol so she can decide whether to dispatch the new worker.
|
|
725
|
+
// Best-effort — failure here must not unwind the spawn; the worker is
|
|
726
|
+
// already running and the swarm is in a consistent state.
|
|
727
|
+
if (isDynamicAdd) {
|
|
728
|
+
try {
|
|
729
|
+
await this.notifyMembershipChange(swarm, { added: [roleInstance] });
|
|
730
|
+
}
|
|
731
|
+
catch (err) {
|
|
732
|
+
log.warn({ err, swarmId, instanceId }, "notifyMembershipChange failed after dynamic spawn (non-fatal)");
|
|
733
|
+
}
|
|
734
|
+
}
|
|
477
735
|
log.info({ swarmId, instanceId, roleName, roleId }, "role spawned");
|
|
478
736
|
return roleInstance;
|
|
479
737
|
}
|
|
@@ -481,9 +739,9 @@ export class SwarmCoordinator {
|
|
|
481
739
|
* Recover a previously persisted swarm by id.
|
|
482
740
|
*
|
|
483
741
|
* Locates the snapshot via `listRecoverableSwarmIds()`, then for each role
|
|
484
|
-
* in the snapshot spawns a Claude session via `--resume role.
|
|
742
|
+
* in the snapshot spawns a Claude session via `--resume role.backendSessionId`
|
|
485
743
|
* (when present) so the per-role conversation continues. Roles without a
|
|
486
|
-
* stored
|
|
744
|
+
* stored backendSessionId start fresh.
|
|
487
745
|
*
|
|
488
746
|
* After all roles are spawned, drains each role's offline inbox via
|
|
489
747
|
* `inboxRelay.deliver`. Drain failures are best-effort: warn but never throw.
|
|
@@ -507,12 +765,13 @@ export class SwarmCoordinator {
|
|
|
507
765
|
}
|
|
508
766
|
// Bootstrap the swarm shell. We deliberately bypass create() because
|
|
509
767
|
// create() would respawn eager roles from template defaults — losing the
|
|
510
|
-
// per-role instanceId /
|
|
768
|
+
// per-role instanceId / backendSessionId from the snapshot.
|
|
511
769
|
const swarm = {
|
|
512
770
|
id: swarmId,
|
|
513
771
|
hubSessionId: snapshot.hubSessionId,
|
|
514
772
|
workDir: snapshot.workDir,
|
|
515
773
|
teamName: snapshot.teamName,
|
|
774
|
+
displayName: snapshot.displayName,
|
|
516
775
|
roles: new Map(),
|
|
517
776
|
plan: snapshot.plan ?? null,
|
|
518
777
|
nextInstanceSeq: new Map(Object.entries(snapshot.nextInstanceSeq ?? {})),
|
|
@@ -523,7 +782,7 @@ export class SwarmCoordinator {
|
|
|
523
782
|
planStatus: snapshot.planStatus ?? "none",
|
|
524
783
|
};
|
|
525
784
|
this.swarms.set(swarmId, swarm);
|
|
526
|
-
// Respawn each role with the same instanceId, passing
|
|
785
|
+
// Respawn each role with the same instanceId, passing backendSessionId
|
|
527
786
|
// through as resumeId so SessionAdapter can `--resume` the conversation.
|
|
528
787
|
// Suppress per-role snapshot writes — we save once at the end with the
|
|
529
788
|
// full role set (and partialRecover marker if any role failed).
|
|
@@ -531,10 +790,13 @@ export class SwarmCoordinator {
|
|
|
531
790
|
let partialRecover = false;
|
|
532
791
|
for (const r of snapshot.roles) {
|
|
533
792
|
try {
|
|
534
|
-
await this.spawnRole(swarmId, r.roleName,
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
793
|
+
await this.spawnRole(swarmId, r.roleName, {
|
|
794
|
+
taskPrompt: r.currentTask,
|
|
795
|
+
resumeId: r.backendSessionId,
|
|
796
|
+
presetInstanceId: r.instanceId,
|
|
797
|
+
backendOverride: r.backend,
|
|
798
|
+
sandboxOverride: r.sandbox,
|
|
799
|
+
});
|
|
538
800
|
}
|
|
539
801
|
catch (err) {
|
|
540
802
|
partialRecover = true;
|
|
@@ -543,7 +805,7 @@ export class SwarmCoordinator {
|
|
|
543
805
|
}
|
|
544
806
|
delete swarm._suppressSnapshot;
|
|
545
807
|
swarm.partialRecover = partialRecover;
|
|
546
|
-
|
|
808
|
+
this.persistSwarm(swarm);
|
|
547
809
|
// Drain offline inboxes — best-effort per role.
|
|
548
810
|
for (const role of swarm.roles.values()) {
|
|
549
811
|
try {
|
|
@@ -587,7 +849,7 @@ export class SwarmCoordinator {
|
|
|
587
849
|
await this.sessionAdapter.closeSession(role.roleSessionId);
|
|
588
850
|
swarm.roles.delete(instanceId);
|
|
589
851
|
// Persistence: save snapshot after role stopped
|
|
590
|
-
|
|
852
|
+
this.persistSwarm(swarm);
|
|
591
853
|
this.sendStatusUpdate(swarm);
|
|
592
854
|
log.info({ swarmId, instanceId }, "role stopped");
|
|
593
855
|
}
|
|
@@ -721,7 +983,7 @@ export class SwarmCoordinator {
|
|
|
721
983
|
log.warn({ swarmId: swarm.id, instanceId: role.instanceId, roleName: role.roleName, reason }, "role crashed — flipping status and notifying queen");
|
|
722
984
|
role.status = "stopped";
|
|
723
985
|
try {
|
|
724
|
-
|
|
986
|
+
this.persistSwarm(swarm);
|
|
725
987
|
}
|
|
726
988
|
catch (err) {
|
|
727
989
|
log.warn({ err, swarmId: swarm.id }, "handleRoleCrashed: saveSwarmSnapshot failed");
|
|
@@ -969,7 +1231,7 @@ export class SwarmCoordinator {
|
|
|
969
1231
|
swarm.planReviewTimer = undefined;
|
|
970
1232
|
}
|
|
971
1233
|
swarm.planStatus = args.verdict === "approved" ? "approved" : "rejected";
|
|
972
|
-
|
|
1234
|
+
this.persistSwarm(swarm);
|
|
973
1235
|
this.sendStatusUpdate(swarm);
|
|
974
1236
|
const queen = this.findQueen(swarm);
|
|
975
1237
|
if (!queen) {
|
|
@@ -1224,6 +1486,60 @@ export class SwarmCoordinator {
|
|
|
1224
1486
|
await this.deliverInbox(swarm, role.instanceId, msg);
|
|
1225
1487
|
}
|
|
1226
1488
|
}
|
|
1489
|
+
/**
|
|
1490
|
+
* Deliver a unified "成员变更" envelope to the queen describing one
|
|
1491
|
+
* membership delta (added and/or removed roles) plus the post-change
|
|
1492
|
+
* roster. Used both by the initial spawn flow (create()) and by the
|
|
1493
|
+
* dynamic add path (spawnRole with isDynamicAdd:true) so queen-side
|
|
1494
|
+
* handling is the same regardless of when membership changed.
|
|
1495
|
+
*
|
|
1496
|
+
* No-op when the swarm has no queen (e.g. mid-shutdown), matching the
|
|
1497
|
+
* defensive behaviour of other helpers that target the queen.
|
|
1498
|
+
*/
|
|
1499
|
+
async notifyMembershipChange(swarm, change) {
|
|
1500
|
+
const queen = this.findQueen(swarm);
|
|
1501
|
+
if (!queen)
|
|
1502
|
+
return;
|
|
1503
|
+
const lines = ["[系统] 成员变更:"];
|
|
1504
|
+
for (const r of change.added ?? []) {
|
|
1505
|
+
const backend = r.definition?.backend ?? "claude";
|
|
1506
|
+
const sandbox = r.definition?.sandbox ?? "workspace-write";
|
|
1507
|
+
lines.push(` + 新增 ${r.instanceId} (${r.roleName}, ${backend}, ${sandbox})`);
|
|
1508
|
+
}
|
|
1509
|
+
for (const id of change.removed ?? []) {
|
|
1510
|
+
lines.push(` - 移除 ${id}`);
|
|
1511
|
+
}
|
|
1512
|
+
lines.push("", "当前成员:", this.buildRoleListString(swarm));
|
|
1513
|
+
await this.deliverInbox(swarm, queen.instanceId, {
|
|
1514
|
+
from: "system",
|
|
1515
|
+
type: "system",
|
|
1516
|
+
data: lines.join("\n"),
|
|
1517
|
+
});
|
|
1518
|
+
}
|
|
1519
|
+
/**
|
|
1520
|
+
* Dynamic-add rollback companion to {@link notifyMembershipChange}: tell the
|
|
1521
|
+
* queen a runtime "+ 添加成员" attempt failed and the roster did NOT change,
|
|
1522
|
+
* so she does not sit waiting for a worker that never came online or assume
|
|
1523
|
+
* her dispatch plan can target a not-yet-existent instanceId. Best-effort —
|
|
1524
|
+
* any inbox failure is swallowed so the caller's `throw` (which surfaces the
|
|
1525
|
+
* underlying spawn error to the UI) is preserved.
|
|
1526
|
+
*/
|
|
1527
|
+
async notifyMembershipChangeFailed(swarm, roleName, err) {
|
|
1528
|
+
try {
|
|
1529
|
+
const queen = this.findQueen(swarm);
|
|
1530
|
+
if (!queen)
|
|
1531
|
+
return;
|
|
1532
|
+
const reason = err instanceof Error ? err.message : String(err);
|
|
1533
|
+
await this.deliverInbox(swarm, queen.instanceId, {
|
|
1534
|
+
from: "system",
|
|
1535
|
+
type: "system",
|
|
1536
|
+
data: `[系统] 添加成员失败:尝试为蜂群添加 ${roleName} 角色失败,原因:${reason}。当前名单未变更。`,
|
|
1537
|
+
});
|
|
1538
|
+
}
|
|
1539
|
+
catch (notifyErr) {
|
|
1540
|
+
log.warn({ err: notifyErr, swarmId: swarm.id, roleName }, "notifyMembershipChangeFailed: deliverInbox to queen failed (non-fatal)");
|
|
1541
|
+
}
|
|
1542
|
+
}
|
|
1227
1543
|
findQueen(swarm) {
|
|
1228
1544
|
for (const role of swarm.roles.values()) {
|
|
1229
1545
|
if (role.definition.type === "queen" && role.status !== "stopped")
|
|
@@ -1290,13 +1606,13 @@ export class SwarmCoordinator {
|
|
|
1290
1606
|
if (!reviewer) {
|
|
1291
1607
|
// No reviewer — auto-approve
|
|
1292
1608
|
swarm.planStatus = "approved";
|
|
1293
|
-
|
|
1609
|
+
this.persistSwarm(swarm);
|
|
1294
1610
|
this.sendStatusUpdate(swarm);
|
|
1295
1611
|
log.info({ swarmId: swarm.id }, "plan auto-approved (no reviewer)");
|
|
1296
1612
|
return;
|
|
1297
1613
|
}
|
|
1298
1614
|
swarm.planStatus = "reviewing";
|
|
1299
|
-
|
|
1615
|
+
this.persistSwarm(swarm);
|
|
1300
1616
|
this.sendStatusUpdate(swarm);
|
|
1301
1617
|
const planJson = JSON.stringify(plan, null, 2);
|
|
1302
1618
|
await this.deliverInbox(swarm, reviewer.instanceId, {
|
|
@@ -1394,7 +1710,7 @@ ${planJson}
|
|
|
1394
1710
|
if (swarm.idleCheckCount >= swarm.maxIdleChecks) {
|
|
1395
1711
|
swarm.isPaused = true;
|
|
1396
1712
|
swarm.status = "paused";
|
|
1397
|
-
|
|
1713
|
+
this.persistSwarm(swarm);
|
|
1398
1714
|
this.sendStatusUpdate(swarm);
|
|
1399
1715
|
await this.deliverInbox(swarm, queen.instanceId, {
|
|
1400
1716
|
from: "system",
|
|
@@ -1442,6 +1758,22 @@ ${planJson}
|
|
|
1442
1758
|
status: r.status,
|
|
1443
1759
|
currentTask: r.currentTask,
|
|
1444
1760
|
color: r.definition.color,
|
|
1761
|
+
// CRITICAL: include backend so the hub's swarm.status handler stores
|
|
1762
|
+
// the per-role backend in `crewConfig.roles` on the DB session. Hub
|
|
1763
|
+
// overwrites the existing crewConfig.roles on each status update
|
|
1764
|
+
// (chat-handler.ts), so omitting backend here silently corrupts the
|
|
1765
|
+
// DB — any path that later recreates the swarm from crewConfig
|
|
1766
|
+
// (continuation after finish, race during recovery) spawns roles
|
|
1767
|
+
// with default backend=claude. Recovered codex roles were silently
|
|
1768
|
+
// downgraded to claude with no error.
|
|
1769
|
+
backend: r.definition.backend,
|
|
1770
|
+
// Same anti-downgrade reasoning as `backend` above — without
|
|
1771
|
+
// including sandbox in the wire payload, the hub's swarm.status
|
|
1772
|
+
// handler writes crewConfig.roles WITHOUT sandbox, and any path
|
|
1773
|
+
// that later recreates the swarm from crewConfig (continuation,
|
|
1774
|
+
// race during recovery) would silently downgrade roles to the
|
|
1775
|
+
// default workspace-write sandbox.
|
|
1776
|
+
sandbox: r.definition.sandbox,
|
|
1445
1777
|
})),
|
|
1446
1778
|
plan: swarm.plan,
|
|
1447
1779
|
};
|