@triflux/remote 10.0.0-alpha.2 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hub/index.mjs +5 -5
- package/hub/team/notify.mjs +1 -1
- package/hub/team/remote-session.mjs +296 -0
- package/package.json +1 -1
- package/hub/team/swarm-hypervisor.mjs +0 -554
- package/hub/team/swarm-locks.mjs +0 -204
- package/hub/team/swarm-planner.mjs +0 -256
- package/hub/team/swarm-reconciler.mjs +0 -137
- package/hub/team/worktree-lifecycle.mjs +0 -172
|
@@ -1,554 +0,0 @@
|
|
|
1
|
-
// hub/team/swarm-hypervisor.mjs — Multi-model swarm orchestration hypervisor
|
|
2
|
-
// Consumes a SwarmPlan (from swarm-planner.mjs) and orchestrates parallel
|
|
3
|
-
// conductor sessions with file-lease enforcement, result validation,
|
|
4
|
-
// and ordered integration.
|
|
5
|
-
//
|
|
6
|
-
// Failure modes handled:
|
|
7
|
-
// F1: Worker crash → conductor auto-restart (maxRestarts)
|
|
8
|
-
// F2: Rate limit → account-broker cooldown + fallback agent
|
|
9
|
-
// F3: Stall → health probe L1 detection + kill + restart
|
|
10
|
-
// F4: File lease violation → revert worker changes, flag shard as failed
|
|
11
|
-
// F5: Merge conflict → retry integration with conflict resolution
|
|
12
|
-
|
|
13
|
-
import { EventEmitter } from 'node:events';
|
|
14
|
-
import { join } from 'node:path';
|
|
15
|
-
import { mkdirSync, readFileSync, existsSync } from 'node:fs';
|
|
16
|
-
import { execSync } from 'node:child_process';
|
|
17
|
-
|
|
18
|
-
import { createConductor, STATES } from './conductor.mjs';
|
|
19
|
-
import { createSwarmLocks } from './swarm-locks.mjs';
|
|
20
|
-
import { createEventLog } from './event-log.mjs';
|
|
21
|
-
|
|
22
|
-
// ── Swarm states ──────────────────────────────────────────────
|
|
23
|
-
|
|
24
|
-
export const SWARM_STATES = Object.freeze({
|
|
25
|
-
PLANNING: 'planning',
|
|
26
|
-
LAUNCHING: 'launching',
|
|
27
|
-
RUNNING: 'running',
|
|
28
|
-
INTEGRATING: 'integrating',
|
|
29
|
-
VALIDATING: 'validating',
|
|
30
|
-
COMPLETED: 'completed',
|
|
31
|
-
FAILED: 'failed',
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
// ── Failure mode classification ───────────────────────────────
|
|
35
|
-
|
|
36
|
-
const FAILURE_MODES = Object.freeze({
|
|
37
|
-
F1_CRASH: 'F1_crash',
|
|
38
|
-
F2_RATE_LIMIT: 'F2_rate_limit',
|
|
39
|
-
F3_STALL: 'F3_stall',
|
|
40
|
-
F4_LEASE_VIOLATION: 'F4_lease_violation',
|
|
41
|
-
F5_MERGE_CONFLICT: 'F5_merge_conflict',
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
const FALLBACK_AGENTS = Object.freeze({
|
|
45
|
-
codex: 'gemini',
|
|
46
|
-
gemini: 'codex',
|
|
47
|
-
claude: 'codex',
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Create a swarm hypervisor.
|
|
52
|
-
* @param {object} opts
|
|
53
|
-
* @param {string} opts.workdir — repository root / working directory
|
|
54
|
-
* @param {string} opts.logsDir — base directory for all logs
|
|
55
|
-
* @param {number} [opts.maxRestarts=2] — per-shard max restarts
|
|
56
|
-
* @param {number} [opts.graceMs=10000] — conductor shutdown grace period
|
|
57
|
-
* @param {number} [opts.integrationTimeoutMs=60000] — max time for integration phase
|
|
58
|
-
* @param {object} [opts.probeOpts] — health probe overrides
|
|
59
|
-
* @param {object} [opts.deps] — dependency injection for testing
|
|
60
|
-
* @returns {SwarmHypervisor}
|
|
61
|
-
*/
|
|
62
|
-
export function createSwarmHypervisor(opts) {
|
|
63
|
-
const {
|
|
64
|
-
workdir,
|
|
65
|
-
logsDir,
|
|
66
|
-
maxRestarts = 2,
|
|
67
|
-
graceMs = 10_000,
|
|
68
|
-
integrationTimeoutMs = 60_000,
|
|
69
|
-
probeOpts = {},
|
|
70
|
-
deps = {},
|
|
71
|
-
} = opts;
|
|
72
|
-
|
|
73
|
-
if (!workdir) throw new Error('workdir is required');
|
|
74
|
-
if (!logsDir) throw new Error('logsDir is required');
|
|
75
|
-
|
|
76
|
-
mkdirSync(logsDir, { recursive: true });
|
|
77
|
-
|
|
78
|
-
const emitter = new EventEmitter();
|
|
79
|
-
const eventLog = createEventLog(join(logsDir, 'swarm-events.jsonl'));
|
|
80
|
-
|
|
81
|
-
let state = SWARM_STATES.PLANNING;
|
|
82
|
-
let plan = null;
|
|
83
|
-
let lockManager = null;
|
|
84
|
-
|
|
85
|
-
/** @type {Map<string, { conductor, shardConfig, result, status }>} */
|
|
86
|
-
const workers = new Map();
|
|
87
|
-
|
|
88
|
-
/** @type {Map<string, { conductor, shardConfig }>} redundant workers for critical shards */
|
|
89
|
-
const redundantWorkers = new Map();
|
|
90
|
-
|
|
91
|
-
const results = new Map(); // shardName → validated result
|
|
92
|
-
const failures = new Map(); // shardName → failure info
|
|
93
|
-
|
|
94
|
-
// ── State machine ───────────────────────────────────────────
|
|
95
|
-
|
|
96
|
-
function setState(next, reason = '') {
|
|
97
|
-
const prev = state;
|
|
98
|
-
state = next;
|
|
99
|
-
eventLog.append('swarm_state', { from: prev, to: next, reason });
|
|
100
|
-
emitter.emit('stateChange', { from: prev, to: next, reason });
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// ── Worker lifecycle ────────────────────────────────────────
|
|
104
|
-
|
|
105
|
-
function buildSessionConfig(shard) {
|
|
106
|
-
return {
|
|
107
|
-
id: `swarm-${shard.name}-${Date.now()}`,
|
|
108
|
-
agent: shard.agent,
|
|
109
|
-
prompt: shard.prompt,
|
|
110
|
-
workdir,
|
|
111
|
-
mcpServers: shard.mcp,
|
|
112
|
-
};
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
function launchShard(shard, isRedundant = false) {
|
|
116
|
-
const shardLogsDir = join(logsDir, isRedundant ? `${shard.name}-redundant` : shard.name);
|
|
117
|
-
mkdirSync(shardLogsDir, { recursive: true });
|
|
118
|
-
|
|
119
|
-
const conductor = createConductor({
|
|
120
|
-
logsDir: shardLogsDir,
|
|
121
|
-
maxRestarts,
|
|
122
|
-
graceMs,
|
|
123
|
-
probeOpts,
|
|
124
|
-
onCompleted: (sessionId) => handleShardCompleted(shard.name, sessionId, isRedundant),
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
const sessionConfig = buildSessionConfig(shard);
|
|
128
|
-
|
|
129
|
-
// Acquire file leases
|
|
130
|
-
if (!isRedundant) {
|
|
131
|
-
const leaseResult = lockManager.acquire(shard.name, shard.files);
|
|
132
|
-
if (!leaseResult.ok) {
|
|
133
|
-
eventLog.append('lease_denied', {
|
|
134
|
-
shard: shard.name,
|
|
135
|
-
conflicts: leaseResult.conflicts,
|
|
136
|
-
});
|
|
137
|
-
failures.set(shard.name, {
|
|
138
|
-
mode: FAILURE_MODES.F4_LEASE_VIOLATION,
|
|
139
|
-
conflicts: leaseResult.conflicts,
|
|
140
|
-
});
|
|
141
|
-
return null;
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
conductor.spawnSession(sessionConfig);
|
|
146
|
-
|
|
147
|
-
eventLog.append('shard_launched', {
|
|
148
|
-
shard: shard.name,
|
|
149
|
-
agent: shard.agent,
|
|
150
|
-
sessionId: sessionConfig.id,
|
|
151
|
-
isRedundant,
|
|
152
|
-
files: shard.files,
|
|
153
|
-
});
|
|
154
|
-
|
|
155
|
-
const entry = { conductor, shardConfig: shard, sessionConfig, startedAt: Date.now() };
|
|
156
|
-
|
|
157
|
-
if (isRedundant) {
|
|
158
|
-
redundantWorkers.set(shard.name, entry);
|
|
159
|
-
} else {
|
|
160
|
-
workers.set(shard.name, entry);
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
// Listen for dead events (F1/F2/F3)
|
|
164
|
-
conductor.on('dead', ({ sessionId, reason }) => {
|
|
165
|
-
handleShardFailed(shard.name, sessionId, reason, isRedundant);
|
|
166
|
-
});
|
|
167
|
-
|
|
168
|
-
return entry;
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
// ── Completion handling ─────────────────────────────────────
|
|
172
|
-
|
|
173
|
-
function handleShardCompleted(shardName, sessionId, isRedundant) {
|
|
174
|
-
eventLog.append('shard_completed', { shard: shardName, sessionId, isRedundant });
|
|
175
|
-
|
|
176
|
-
if (isRedundant) {
|
|
177
|
-
// Redundant worker completed first — kill primary if still running
|
|
178
|
-
const primary = workers.get(shardName);
|
|
179
|
-
if (primary && !isTerminal(primary)) {
|
|
180
|
-
eventLog.append('redundant_wins', { shard: shardName });
|
|
181
|
-
void primary.conductor.shutdown('redundant_completed_first');
|
|
182
|
-
}
|
|
183
|
-
} else {
|
|
184
|
-
// Primary completed — kill redundant if exists
|
|
185
|
-
const redundant = redundantWorkers.get(shardName);
|
|
186
|
-
if (redundant) {
|
|
187
|
-
void redundant.conductor.shutdown('primary_completed_first');
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
emitter.emit('shardCompleted', { shardName, sessionId, isRedundant });
|
|
192
|
-
checkAllShardsCompleted();
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
function handleShardFailed(shardName, sessionId, reason, isRedundant) {
|
|
196
|
-
const failureMode = classifyFailure(reason);
|
|
197
|
-
|
|
198
|
-
eventLog.append('shard_failed', {
|
|
199
|
-
shard: shardName,
|
|
200
|
-
sessionId,
|
|
201
|
-
reason,
|
|
202
|
-
failureMode,
|
|
203
|
-
isRedundant,
|
|
204
|
-
});
|
|
205
|
-
|
|
206
|
-
if (isRedundant) return; // redundant failure is non-critical
|
|
207
|
-
|
|
208
|
-
// F2: Rate limit — try fallback agent
|
|
209
|
-
if (failureMode === FAILURE_MODES.F2_RATE_LIMIT) {
|
|
210
|
-
const shard = plan.shards.find((s) => s.name === shardName);
|
|
211
|
-
if (shard) {
|
|
212
|
-
const fallbackAgent = FALLBACK_AGENTS[shard.agent];
|
|
213
|
-
if (fallbackAgent) {
|
|
214
|
-
eventLog.append('fallback_agent', {
|
|
215
|
-
shard: shardName,
|
|
216
|
-
from: shard.agent,
|
|
217
|
-
to: fallbackAgent,
|
|
218
|
-
});
|
|
219
|
-
const fallbackShard = { ...shard, agent: fallbackAgent };
|
|
220
|
-
lockManager.release(shardName);
|
|
221
|
-
launchShard(fallbackShard);
|
|
222
|
-
return;
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
failures.set(shardName, { mode: failureMode, reason, sessionId });
|
|
228
|
-
lockManager.release(shardName);
|
|
229
|
-
|
|
230
|
-
emitter.emit('shardFailed', { shardName, failureMode, reason });
|
|
231
|
-
checkAllShardsCompleted();
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
function classifyFailure(reason) {
|
|
235
|
-
if (!reason) return FAILURE_MODES.F1_CRASH;
|
|
236
|
-
const r = String(reason).toLowerCase();
|
|
237
|
-
if (/rate.?limit|cooldown/u.test(r)) return FAILURE_MODES.F2_RATE_LIMIT;
|
|
238
|
-
if (/stall|l1_stall|timeout/u.test(r)) return FAILURE_MODES.F3_STALL;
|
|
239
|
-
if (/lease|violation/u.test(r)) return FAILURE_MODES.F4_LEASE_VIOLATION;
|
|
240
|
-
if (/merge|conflict/u.test(r)) return FAILURE_MODES.F5_MERGE_CONFLICT;
|
|
241
|
-
return FAILURE_MODES.F1_CRASH;
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
function isTerminal(entry) {
|
|
245
|
-
const snap = entry.conductor.getSnapshot();
|
|
246
|
-
return snap.every((s) => s.state === STATES.COMPLETED || s.state === STATES.DEAD);
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
// ── Integration ─────────────────────────────────────────────
|
|
250
|
-
|
|
251
|
-
function checkAllShardsCompleted() {
|
|
252
|
-
if (state !== SWARM_STATES.RUNNING) return;
|
|
253
|
-
|
|
254
|
-
const allDone = plan.mergeOrder.every((name) => {
|
|
255
|
-
const w = workers.get(name);
|
|
256
|
-
return (w && isTerminal(w)) || failures.has(name);
|
|
257
|
-
});
|
|
258
|
-
|
|
259
|
-
if (allDone) {
|
|
260
|
-
void integrateResults();
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
/**
|
|
265
|
-
* Validate a shard's output — check for file lease violations.
|
|
266
|
-
* @param {string} shardName
|
|
267
|
-
* @param {string[]} changedFiles — files the shard actually modified
|
|
268
|
-
* @returns {{ ok: boolean, violations: Array }}
|
|
269
|
-
*/
|
|
270
|
-
function validateResult(shardName, changedFiles) {
|
|
271
|
-
const violations = lockManager.validateChanges(shardName, changedFiles);
|
|
272
|
-
|
|
273
|
-
eventLog.append('validate_result', {
|
|
274
|
-
shard: shardName,
|
|
275
|
-
changedFiles,
|
|
276
|
-
violations,
|
|
277
|
-
ok: violations.length === 0,
|
|
278
|
-
});
|
|
279
|
-
|
|
280
|
-
return {
|
|
281
|
-
ok: violations.length === 0,
|
|
282
|
-
violations,
|
|
283
|
-
};
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
/**
|
|
287
|
-
* Integrate results from all completed shards in merge order.
|
|
288
|
-
* Uses git operations for conflict detection.
|
|
289
|
-
*/
|
|
290
|
-
async function integrateResults() {
|
|
291
|
-
setState(SWARM_STATES.INTEGRATING, 'all_shards_done');
|
|
292
|
-
|
|
293
|
-
const integrated = [];
|
|
294
|
-
const integrationFailures = [];
|
|
295
|
-
|
|
296
|
-
for (const shardName of plan.mergeOrder) {
|
|
297
|
-
if (failures.has(shardName)) {
|
|
298
|
-
eventLog.append('skip_failed_shard', { shard: shardName });
|
|
299
|
-
continue;
|
|
300
|
-
}
|
|
301
|
-
|
|
302
|
-
const worker = workers.get(shardName);
|
|
303
|
-
if (!worker) continue;
|
|
304
|
-
|
|
305
|
-
// Read shard output log for changed files
|
|
306
|
-
const changedFiles = detectChangedFiles(shardName, worker);
|
|
307
|
-
|
|
308
|
-
// Validate against lease map
|
|
309
|
-
const validation = validateResult(shardName, changedFiles);
|
|
310
|
-
if (!validation.ok) {
|
|
311
|
-
failures.set(shardName, {
|
|
312
|
-
mode: FAILURE_MODES.F4_LEASE_VIOLATION,
|
|
313
|
-
violations: validation.violations,
|
|
314
|
-
});
|
|
315
|
-
eventLog.append('lease_violation_revert', {
|
|
316
|
-
shard: shardName,
|
|
317
|
-
violations: validation.violations,
|
|
318
|
-
});
|
|
319
|
-
integrationFailures.push(shardName);
|
|
320
|
-
continue;
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
results.set(shardName, {
|
|
324
|
-
shard: shardName,
|
|
325
|
-
changedFiles,
|
|
326
|
-
completedAt: Date.now(),
|
|
327
|
-
});
|
|
328
|
-
integrated.push(shardName);
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
eventLog.append('integration_complete', {
|
|
332
|
-
integrated,
|
|
333
|
-
failed: integrationFailures,
|
|
334
|
-
skipped: [...failures.keys()].filter((n) => !integrationFailures.includes(n)),
|
|
335
|
-
});
|
|
336
|
-
|
|
337
|
-
if (integrationFailures.length > 0 && integrated.length === 0) {
|
|
338
|
-
setState(SWARM_STATES.FAILED, 'all_shards_failed_integration');
|
|
339
|
-
} else {
|
|
340
|
-
setState(SWARM_STATES.COMPLETED, `${integrated.length}/${plan.shards.length} integrated`);
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
emitter.emit('integrationComplete', {
|
|
344
|
-
integrated,
|
|
345
|
-
failed: integrationFailures,
|
|
346
|
-
results: [...results.values()],
|
|
347
|
-
});
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
/**
|
|
351
|
-
* Detect which files a shard modified by reading its output logs.
|
|
352
|
-
* Falls back to an empty list if detection fails.
|
|
353
|
-
* @param {string} shardName
|
|
354
|
-
* @param {object} worker
|
|
355
|
-
* @returns {string[]}
|
|
356
|
-
*/
|
|
357
|
-
function detectChangedFiles(shardName, worker) {
|
|
358
|
-
// Best-effort: parse output log for file paths
|
|
359
|
-
const outPath = join(logsDir, shardName);
|
|
360
|
-
try {
|
|
361
|
-
const snap = worker.conductor.getSnapshot();
|
|
362
|
-
for (const session of snap) {
|
|
363
|
-
if (session.outPath && existsSync(session.outPath)) {
|
|
364
|
-
const output = readFileSync(session.outPath, 'utf8');
|
|
365
|
-
return extractFilePathsFromOutput(output, plan.leaseMap.get(shardName) || []);
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
} catch { /* best-effort */ }
|
|
369
|
-
|
|
370
|
-
// Fallback: trust the lease map (shard was allowed these files)
|
|
371
|
-
return plan.leaseMap.get(shardName) || [];
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
/**
|
|
375
|
-
* Extract modified file paths from worker output text.
|
|
376
|
-
* Looks for common patterns: "wrote file.mjs", "modified file.mjs", diff headers.
|
|
377
|
-
* @param {string} output
|
|
378
|
-
* @param {string[]} allowedFiles — lease map files to match against
|
|
379
|
-
* @returns {string[]}
|
|
380
|
-
*/
|
|
381
|
-
function extractFilePathsFromOutput(output, allowedFiles) {
|
|
382
|
-
if (!output) return allowedFiles;
|
|
383
|
-
|
|
384
|
-
const found = new Set();
|
|
385
|
-
const lines = output.split(/\r?\n/);
|
|
386
|
-
|
|
387
|
-
for (const line of lines) {
|
|
388
|
-
// Match common patterns
|
|
389
|
-
const patterns = [
|
|
390
|
-
/(?:wrote|created|modified|updated|edited)\s+['"]?([^\s'"]+\.\w+)/i,
|
|
391
|
-
/^[+-]{3}\s+[ab]\/(.+)/, // diff headers
|
|
392
|
-
/^diff --git a\/(.+)\s+b\//, // git diff headers
|
|
393
|
-
];
|
|
394
|
-
|
|
395
|
-
for (const re of patterns) {
|
|
396
|
-
const match = line.match(re);
|
|
397
|
-
if (match) found.add(match[1]);
|
|
398
|
-
}
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
// Intersect with allowed files if we found anything
|
|
402
|
-
if (found.size > 0) {
|
|
403
|
-
return [...found].filter((f) => allowedFiles.some(
|
|
404
|
-
(a) => f.endsWith(a) || a.endsWith(f) || f === a,
|
|
405
|
-
));
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
return allowedFiles;
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
// ── Status monitor ──────────────────────────────────────────
|
|
412
|
-
|
|
413
|
-
/**
|
|
414
|
-
* Get current swarm status snapshot.
|
|
415
|
-
* @returns {SwarmStatus}
|
|
416
|
-
*/
|
|
417
|
-
function getStatus() {
|
|
418
|
-
const workerStatuses = [];
|
|
419
|
-
|
|
420
|
-
for (const [name, w] of workers) {
|
|
421
|
-
const snap = w.conductor.getSnapshot();
|
|
422
|
-
workerStatuses.push({
|
|
423
|
-
shard: name,
|
|
424
|
-
agent: w.shardConfig.agent,
|
|
425
|
-
sessions: snap,
|
|
426
|
-
failed: failures.has(name),
|
|
427
|
-
failureInfo: failures.get(name) || null,
|
|
428
|
-
integrated: results.has(name),
|
|
429
|
-
});
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
return Object.freeze({
|
|
433
|
-
state,
|
|
434
|
-
totalShards: plan?.shards.length || 0,
|
|
435
|
-
completedShards: results.size,
|
|
436
|
-
failedShards: failures.size,
|
|
437
|
-
workers: workerStatuses,
|
|
438
|
-
mergeOrder: plan?.mergeOrder || [],
|
|
439
|
-
criticalShards: plan?.criticalShards || [],
|
|
440
|
-
locks: lockManager?.snapshot() || [],
|
|
441
|
-
});
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
// ── Public API ──────────────────────────────────────────────
|
|
445
|
-
|
|
446
|
-
/**
|
|
447
|
-
* Launch the swarm from a pre-built plan.
|
|
448
|
-
* @param {SwarmPlan} swarmPlan — from planSwarm()
|
|
449
|
-
* @returns {SwarmStatus}
|
|
450
|
-
*/
|
|
451
|
-
function launch(swarmPlan) {
|
|
452
|
-
if (state !== SWARM_STATES.PLANNING) {
|
|
453
|
-
throw new Error(`Cannot launch in state "${state}"`);
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
plan = swarmPlan;
|
|
457
|
-
|
|
458
|
-
// Warn about file conflicts but don't block
|
|
459
|
-
if (plan.conflicts.length > 0) {
|
|
460
|
-
eventLog.append('file_conflicts_warning', { conflicts: plan.conflicts });
|
|
461
|
-
emitter.emit('warning', {
|
|
462
|
-
type: 'file_conflicts',
|
|
463
|
-
conflicts: plan.conflicts,
|
|
464
|
-
});
|
|
465
|
-
}
|
|
466
|
-
|
|
467
|
-
// Initialize lock manager
|
|
468
|
-
lockManager = createSwarmLocks({
|
|
469
|
-
repoRoot: workdir,
|
|
470
|
-
persistPath: join(workdir, '.triflux', 'swarm-locks.json'),
|
|
471
|
-
});
|
|
472
|
-
|
|
473
|
-
setState(SWARM_STATES.LAUNCHING, `${plan.shards.length} shards`);
|
|
474
|
-
|
|
475
|
-
// Launch shards respecting dependency order
|
|
476
|
-
const launched = new Set();
|
|
477
|
-
const pending = new Set(plan.mergeOrder);
|
|
478
|
-
|
|
479
|
-
function launchReady() {
|
|
480
|
-
for (const name of pending) {
|
|
481
|
-
const shard = plan.shards.find((s) => s.name === name);
|
|
482
|
-
if (!shard) continue;
|
|
483
|
-
|
|
484
|
-
// Check all dependencies are launched (not necessarily completed)
|
|
485
|
-
const depsReady = shard.depends.every((d) => launched.has(d));
|
|
486
|
-
if (!depsReady) continue;
|
|
487
|
-
|
|
488
|
-
pending.delete(name);
|
|
489
|
-
launched.add(name);
|
|
490
|
-
launchShard(shard);
|
|
491
|
-
|
|
492
|
-
// Launch redundant worker for critical shards
|
|
493
|
-
if (shard.critical) {
|
|
494
|
-
const redundantShard = {
|
|
495
|
-
...shard,
|
|
496
|
-
agent: FALLBACK_AGENTS[shard.agent] || shard.agent,
|
|
497
|
-
};
|
|
498
|
-
launchShard(redundantShard, true);
|
|
499
|
-
}
|
|
500
|
-
}
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
launchReady();
|
|
504
|
-
|
|
505
|
-
// Re-check pending on each shard completion (dependency chains)
|
|
506
|
-
emitter.on('shardCompleted', () => {
|
|
507
|
-
if (pending.size > 0) launchReady();
|
|
508
|
-
});
|
|
509
|
-
|
|
510
|
-
setState(SWARM_STATES.RUNNING, `${launched.size} launched, ${pending.size} pending deps`);
|
|
511
|
-
|
|
512
|
-
return getStatus();
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
/**
|
|
516
|
-
* Graceful shutdown — kill all workers and release locks.
|
|
517
|
-
* @param {string} [reason]
|
|
518
|
-
*/
|
|
519
|
-
async function shutdown(reason = 'shutdown') {
|
|
520
|
-
eventLog.append('swarm_shutdown', { reason, state });
|
|
521
|
-
|
|
522
|
-
const shutdowns = [];
|
|
523
|
-
for (const [, w] of workers) {
|
|
524
|
-
shutdowns.push(w.conductor.shutdown(reason));
|
|
525
|
-
}
|
|
526
|
-
for (const [, w] of redundantWorkers) {
|
|
527
|
-
shutdowns.push(w.conductor.shutdown(reason));
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
await Promise.allSettled(shutdowns);
|
|
531
|
-
|
|
532
|
-
lockManager?.releaseAll();
|
|
533
|
-
await eventLog.flush();
|
|
534
|
-
await eventLog.close();
|
|
535
|
-
|
|
536
|
-
if (state !== SWARM_STATES.COMPLETED && state !== SWARM_STATES.FAILED) {
|
|
537
|
-
setState(SWARM_STATES.FAILED, reason);
|
|
538
|
-
}
|
|
539
|
-
|
|
540
|
-
emitter.emit('shutdown', { reason });
|
|
541
|
-
}
|
|
542
|
-
|
|
543
|
-
return Object.freeze({
|
|
544
|
-
launch,
|
|
545
|
-
shutdown,
|
|
546
|
-
getStatus,
|
|
547
|
-
validateResult,
|
|
548
|
-
on: emitter.on.bind(emitter),
|
|
549
|
-
off: emitter.off.bind(emitter),
|
|
550
|
-
get state() { return state; },
|
|
551
|
-
get plan() { return plan; },
|
|
552
|
-
get eventLogPath() { return eventLog.filePath; },
|
|
553
|
-
});
|
|
554
|
-
}
|