agent-pool-mcp 1.5.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/scheduler/daemon.js +321 -70
- package/src/scheduler/pipeline.js +82 -74
- package/src/scheduler/run-signals.js +81 -0
- package/src/server.js +61 -1
- package/src/tool-definitions.js +45 -1
- package/src/tools/messaging.js +104 -0
package/package.json
CHANGED
package/src/scheduler/daemon.js
CHANGED
|
@@ -11,10 +11,15 @@
|
|
|
11
11
|
* @module agent-pool/scheduler/daemon
|
|
12
12
|
*/
|
|
13
13
|
|
|
14
|
-
import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync } from 'node:fs';
|
|
14
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, unlinkSync, readdirSync, renameSync } from 'node:fs';
|
|
15
15
|
import { spawn } from 'node:child_process';
|
|
16
16
|
import { join, dirname } from 'node:path';
|
|
17
17
|
import { matchesCron } from './cron.js';
|
|
18
|
+
import { getGroup } from '../tools/groups.js';
|
|
19
|
+
import { getRunner } from '../runner/config.js';
|
|
20
|
+
import { buildSshSpawn } from '../runner/ssh.js';
|
|
21
|
+
import { killGroup } from '../runner/process-manager.js';
|
|
22
|
+
import { consumeSignals, deleteSignals } from './run-signals.js';
|
|
18
23
|
|
|
19
24
|
const POLL_INTERVAL_MS = 30_000; // Check schedules every 30 seconds
|
|
20
25
|
const PID_FILE = '.agents/scheduler.pid';
|
|
@@ -159,62 +164,233 @@ function executeSchedule(schedule) {
|
|
|
159
164
|
console.error(`[scheduler] Started: ${schedule.id} → gemini pid ${child.pid}`);
|
|
160
165
|
}
|
|
161
166
|
|
|
162
|
-
// ─── Pipeline tick
|
|
163
|
-
|
|
164
|
-
import { readdirSync } from 'node:fs';
|
|
167
|
+
// ─── Pipeline tick ──────────────────────────────────────────────────
|
|
165
168
|
|
|
166
169
|
const PIPELINES_DIR = '.agents/pipelines';
|
|
167
170
|
const RUNS_DIR = '.agents/runs';
|
|
168
171
|
|
|
169
172
|
/**
|
|
170
|
-
*
|
|
173
|
+
* In-memory pipeline state cache.
|
|
174
|
+
* Loaded from disk on startup, updated in-place during ticks.
|
|
175
|
+
* Written to disk on state transitions (write-through).
|
|
176
|
+
* @type {Map<string, object>}
|
|
177
|
+
*/
|
|
178
|
+
const runCache = new Map();
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Load all active runs from disk into the in-memory cache.
|
|
182
|
+
* Called once on daemon startup.
|
|
183
|
+
*/
|
|
184
|
+
function loadRunCache() {
|
|
185
|
+
const dir = join(cwd, RUNS_DIR);
|
|
186
|
+
if (!existsSync(dir)) return;
|
|
187
|
+
for (const f of readdirSync(dir).filter(f => f.endsWith('.json') && !f.includes('.signal-'))) {
|
|
188
|
+
try {
|
|
189
|
+
const run = JSON.parse(readFileSync(join(dir, f), 'utf-8'));
|
|
190
|
+
const runId = f.replace('.json', '');
|
|
191
|
+
runCache.set(runId, run);
|
|
192
|
+
} catch { /* skip corrupted */ }
|
|
193
|
+
}
|
|
194
|
+
console.error(`[pipeline] Loaded ${runCache.size} runs into memory cache`);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Persist a run to disk atomically (write-then-rename).
|
|
199
|
+
* Prevents corruption if daemon crashes mid-write.
|
|
200
|
+
* @param {string} runId
|
|
201
|
+
* @param {object} run
|
|
202
|
+
*/
|
|
203
|
+
function persistRun(runId, run) {
|
|
204
|
+
const dir = join(cwd, RUNS_DIR);
|
|
205
|
+
mkdirSync(dir, { recursive: true });
|
|
206
|
+
const target = join(dir, `${runId}.json`);
|
|
207
|
+
const tmp = join(dir, `${runId}.json.tmp`);
|
|
208
|
+
writeFileSync(tmp, JSON.stringify(run, null, 2));
|
|
209
|
+
// Atomic rename (same filesystem) — prevents corruption on crash
|
|
210
|
+
try { renameSync(tmp, target); }
|
|
211
|
+
catch { writeFileSync(target, JSON.stringify(run, null, 2)); }
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Apply consumed signal files to a run's in-memory state.
|
|
216
|
+
* @param {object} run - Run state object (mutated in place)
|
|
217
|
+
* @param {Array} signals - Consumed signal objects
|
|
218
|
+
* @param {object} pipeline - Pipeline definition
|
|
219
|
+
* @returns {boolean} true if any signal was applied
|
|
220
|
+
*/
|
|
221
|
+
function applySignals(run, signals, pipeline) {
|
|
222
|
+
let modified = false;
|
|
223
|
+
for (const signal of signals) {
|
|
224
|
+
if (signal.type === 'STEP_COMPLETE') {
|
|
225
|
+
const step = run.steps[signal.stepName];
|
|
226
|
+
if (step && step.status === 'running') {
|
|
227
|
+
step.status = 'success';
|
|
228
|
+
step.signaled = true;
|
|
229
|
+
step.completedAt = new Date().toISOString();
|
|
230
|
+
if (signal.output) step.output = signal.output;
|
|
231
|
+
modified = true;
|
|
232
|
+
console.error(`[pipeline] Signal: step "${signal.stepName}" completed`);
|
|
233
|
+
}
|
|
234
|
+
} else if (signal.type === 'BOUNCE_BACK') {
|
|
235
|
+
const targetStep = run.steps[signal.stepName];
|
|
236
|
+
if (!targetStep) continue;
|
|
237
|
+
|
|
238
|
+
const stepDef = pipeline?.steps.find(s => s.name === signal.stepName);
|
|
239
|
+
const maxBounces = stepDef?.maxBounces ?? 2;
|
|
240
|
+
|
|
241
|
+
if (targetStep.bounces >= maxBounces) {
|
|
242
|
+
// Bounce limit reached
|
|
243
|
+
targetStep.status = 'failed';
|
|
244
|
+
targetStep.lastBounceReason = `Bounce limit (${maxBounces}) reached. Last: ${signal.reason}`;
|
|
245
|
+
run.status = 'failed';
|
|
246
|
+
run.completedAt = new Date().toISOString();
|
|
247
|
+
console.error(`[pipeline] Bounce limit reached for "${signal.stepName}"`);
|
|
248
|
+
} else {
|
|
249
|
+
// Reset target step
|
|
250
|
+
targetStep.status = 'bounce_pending';
|
|
251
|
+
targetStep.bounces = (targetStep.bounces || 0) + 1;
|
|
252
|
+
targetStep.lastBounceReason = signal.reason;
|
|
253
|
+
|
|
254
|
+
// Kill running processes for this step
|
|
255
|
+
const pidsToKill = [...(targetStep.pids || [])];
|
|
256
|
+
if (targetStep.pid && !pidsToKill.includes(targetStep.pid)) pidsToKill.push(targetStep.pid);
|
|
257
|
+
for (const pid of pidsToKill) killGroup(pid);
|
|
258
|
+
|
|
259
|
+
targetStep.pid = null;
|
|
260
|
+
targetStep.pids = [];
|
|
261
|
+
targetStep.exitCode = null;
|
|
262
|
+
targetStep.signaled = false;
|
|
263
|
+
|
|
264
|
+
// Reset calling step
|
|
265
|
+
if (signal.callingStepName && run.steps[signal.callingStepName]) {
|
|
266
|
+
run.steps[signal.callingStepName].status = 'waiting_bounce';
|
|
267
|
+
}
|
|
268
|
+
console.error(`[pipeline] Bounce: step "${signal.stepName}" reset (reason: ${signal.reason})`);
|
|
269
|
+
}
|
|
270
|
+
modified = true;
|
|
271
|
+
} else if (signal.type === 'CANCEL_RUN') {
|
|
272
|
+
// Cancel the entire run
|
|
273
|
+
for (const [name, step] of Object.entries(run.steps)) {
|
|
274
|
+
if (step.status === 'running') step.status = 'cancelled';
|
|
275
|
+
if (step.status === 'pending') step.status = 'skipped';
|
|
276
|
+
}
|
|
277
|
+
run.status = 'cancelled';
|
|
278
|
+
run.completedAt = new Date().toISOString();
|
|
279
|
+
console.error(`[pipeline] Signal: run cancelled`);
|
|
280
|
+
modified = true;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
return modified;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Spawn Gemini CLI agent(s) for a pipeline step.
|
|
171
288
|
* @param {object} stepDef - Step definition from pipeline
|
|
172
289
|
* @param {object} run - Current run state
|
|
173
290
|
* @param {string} runId
|
|
174
291
|
* @param {string} [bounceReason] - If bouncing back, the reason
|
|
175
|
-
* @returns {number} child
|
|
292
|
+
* @returns {number[]} Array of child PIDs
|
|
176
293
|
*/
|
|
177
294
|
function spawnStep(stepDef, run, runId, bounceReason) {
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
295
|
+
const count = stepDef.count || 1;
|
|
296
|
+
const pids = [];
|
|
297
|
+
|
|
298
|
+
// Resolve group
|
|
299
|
+
let groupConfig = {};
|
|
300
|
+
if (stepDef.group) {
|
|
301
|
+
groupConfig = getGroup(run.cwd || cwd, stepDef.group) || {};
|
|
181
302
|
}
|
|
182
303
|
|
|
183
|
-
|
|
184
|
-
|
|
304
|
+
const skill = stepDef.skill || groupConfig.skill;
|
|
305
|
+
const policy = groupConfig.policy; // currently policy only from group
|
|
306
|
+
const runnerId = groupConfig.runner;
|
|
307
|
+
const runner = runnerId ? getRunner(runnerId) : { type: 'local' };
|
|
308
|
+
const isRemote = runner && runner.type === 'ssh';
|
|
185
309
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
310
|
+
for (let i = 0; i < count; i++) {
|
|
311
|
+
let prompt = stepDef.prompt;
|
|
312
|
+
if (bounceReason) {
|
|
313
|
+
prompt = `${stepDef.prompt}\n\n⚠️ BOUNCE BACK: предыдущая попытка была отклонена следующим шагом.\nПричина: ${bounceReason}\nДополни и улучши результат.`;
|
|
314
|
+
}
|
|
191
315
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
196
|
-
detached: true,
|
|
197
|
-
});
|
|
316
|
+
if (count > 1) {
|
|
317
|
+
prompt = `[Agent ${i + 1}/${count}]\n\n${prompt}`;
|
|
318
|
+
}
|
|
198
319
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
320
|
+
// Inject pipeline context
|
|
321
|
+
prompt = `[Pipeline: ${run.pipelineName}, Step: ${stepDef.name}, Run: ${runId}]\n\nTask:\n${prompt}\n\nWhen finished, call signal_step_complete with step_name "${stepDef.name}" and run_id "${runId}".`;
|
|
322
|
+
|
|
323
|
+
const args = [
|
|
324
|
+
'-p', prompt,
|
|
325
|
+
'--output-format', 'stream-json',
|
|
326
|
+
'--approval-mode', stepDef.approvalMode || 'yolo',
|
|
327
|
+
];
|
|
328
|
+
|
|
329
|
+
if (skill) {
|
|
330
|
+
// Skills can be active via prompt injection, as we do for scheduled tasks
|
|
331
|
+
args[1] = `Activate skill "${skill}" first.\n\n${args[1]}`;
|
|
332
|
+
}
|
|
333
|
+
if (policy) {
|
|
334
|
+
args.push('--policy', policy);
|
|
335
|
+
}
|
|
336
|
+
if (groupConfig.include_dirs?.length > 0) {
|
|
337
|
+
for (const dir of groupConfig.include_dirs) {
|
|
338
|
+
args.push('--include-directories', dir);
|
|
205
339
|
}
|
|
206
|
-
|
|
207
|
-
} catch { /* ignore */ }
|
|
208
|
-
console.error(`[pipeline] Step "${stepDef.name}" exited (code: ${code}, run: ${runId})`);
|
|
209
|
-
});
|
|
340
|
+
}
|
|
210
341
|
|
|
211
|
-
|
|
212
|
-
|
|
342
|
+
let spawnCmd, spawnArgs, spawnOpts;
|
|
343
|
+
if (isRemote) {
|
|
344
|
+
const ssh = buildSshSpawn(runner, args, run.cwd || cwd);
|
|
345
|
+
spawnCmd = ssh.command;
|
|
346
|
+
spawnArgs = ssh.args;
|
|
347
|
+
spawnOpts = { stdio: ['pipe', 'pipe', 'pipe'], detached: true };
|
|
348
|
+
} else {
|
|
349
|
+
spawnCmd = 'gemini';
|
|
350
|
+
spawnArgs = args;
|
|
351
|
+
const currentDepth = parseInt(process.env.AGENT_POOL_DEPTH ?? '0');
|
|
352
|
+
spawnOpts = {
|
|
353
|
+
cwd: run.cwd || cwd,
|
|
354
|
+
env: {
|
|
355
|
+
...process.env,
|
|
356
|
+
TERM: 'dumb',
|
|
357
|
+
CI: '1',
|
|
358
|
+
AGENT_POOL_DEPTH: String(currentDepth + 1)
|
|
359
|
+
},
|
|
360
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
361
|
+
detached: true,
|
|
362
|
+
};
|
|
363
|
+
if (count > 1) spawnOpts.env.AGENT_INDEX = String(i);
|
|
364
|
+
}
|
|
213
365
|
|
|
214
|
-
|
|
215
|
-
|
|
366
|
+
const child = spawn(spawnCmd, spawnArgs, spawnOpts);
|
|
367
|
+
|
|
368
|
+
child.on('close', (code) => {
|
|
369
|
+
// Update step exit code in in-memory state directly (same process)
|
|
370
|
+
const currentRun = runCache.get(runId);
|
|
371
|
+
if (currentRun?.steps[stepDef.name]) {
|
|
372
|
+
if (code !== 0) {
|
|
373
|
+
currentRun.steps[stepDef.name].exitCode = code;
|
|
374
|
+
} else if (currentRun.steps[stepDef.name].exitCode === null) {
|
|
375
|
+
currentRun.steps[stepDef.name].exitCode = 0;
|
|
376
|
+
}
|
|
377
|
+
// Write-through to disk
|
|
378
|
+
persistRun(runId, currentRun);
|
|
379
|
+
}
|
|
380
|
+
console.error(`[pipeline] Step "${stepDef.name}" [pid ${child.pid}] exited (code: ${code}, run: ${runId})`);
|
|
381
|
+
});
|
|
382
|
+
|
|
383
|
+
child.stdin.end();
|
|
384
|
+
child.unref();
|
|
385
|
+
|
|
386
|
+
console.error(`[pipeline] Started step "${stepDef.name}" → pid ${child.pid} (run: ${runId})`);
|
|
387
|
+
pids.push(child.pid);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
return pids;
|
|
216
391
|
}
|
|
217
392
|
|
|
393
|
+
|
|
218
394
|
/**
|
|
219
395
|
* Check if a process is alive.
|
|
220
396
|
* @param {number} pid
|
|
@@ -226,33 +402,69 @@ function isAlive(pid) {
|
|
|
226
402
|
}
|
|
227
403
|
|
|
228
404
|
/**
|
|
229
|
-
* Process pipeline runs — check triggers, advance steps.
|
|
405
|
+
* Process pipeline runs — consume signals, check triggers, advance steps.
|
|
406
|
+
* Uses in-memory cache for state; persists to disk on changes.
|
|
230
407
|
* @returns {boolean} true if any pipeline is actively running
|
|
231
408
|
*/
|
|
232
409
|
function tickPipelines() {
|
|
410
|
+
// Pick up new runs added to disk since last tick (e.g., from runPipeline)
|
|
233
411
|
const runsDir = join(cwd, RUNS_DIR);
|
|
234
|
-
if (
|
|
412
|
+
if (existsSync(runsDir)) {
|
|
413
|
+
for (const f of readdirSync(runsDir).filter(f => f.endsWith('.json') && !f.includes('.signal-') && !f.endsWith('.tmp'))) {
|
|
414
|
+
const runId = f.replace('.json', '');
|
|
415
|
+
if (!runCache.has(runId)) {
|
|
416
|
+
try {
|
|
417
|
+
const run = JSON.parse(readFileSync(join(runsDir, f), 'utf-8'));
|
|
418
|
+
runCache.set(runId, run);
|
|
419
|
+
console.error(`[pipeline] Picked up new run: ${runId}`);
|
|
420
|
+
} catch { /* skip corrupted */ }
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
}
|
|
235
424
|
|
|
236
425
|
const pipelinesDir = join(cwd, PIPELINES_DIR);
|
|
237
426
|
let hasActive = false;
|
|
238
427
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
if (run.status !== 'running')
|
|
428
|
+
// Iterate over a copy of keys to allow modification of runCache during iteration
|
|
429
|
+
for (const runId of Array.from(runCache.keys())) {
|
|
430
|
+
const run = runCache.get(runId);
|
|
431
|
+
|
|
432
|
+
// Evict completed runs from cache (memory leak fix)
|
|
433
|
+
if (run.status !== 'running') {
|
|
434
|
+
// Clean up any orphaned/late signals for completed runs
|
|
435
|
+
const lateSignals = consumeSignals(cwd, runId);
|
|
436
|
+
if (lateSignals.length > 0) {
|
|
437
|
+
deleteSignals(cwd, lateSignals);
|
|
438
|
+
console.error(`[pipeline] Cleaned ${lateSignals.length} orphaned signal(s) for completed run ${runId}`);
|
|
439
|
+
}
|
|
440
|
+
runCache.delete(runId);
|
|
441
|
+
continue;
|
|
442
|
+
}
|
|
245
443
|
hasActive = true;
|
|
246
444
|
|
|
247
445
|
// Load pipeline definition
|
|
248
446
|
let pipeline;
|
|
249
447
|
try {
|
|
250
448
|
pipeline = JSON.parse(readFileSync(join(pipelinesDir, `${run.pipeline}.json`), 'utf-8'));
|
|
251
|
-
} catch {
|
|
449
|
+
} catch {
|
|
450
|
+
console.error(`[pipeline] Could not load pipeline definition for run ${runId}: ${run.pipeline}.json`);
|
|
451
|
+
continue;
|
|
452
|
+
}
|
|
252
453
|
|
|
253
|
-
|
|
454
|
+
// 1. Consume and apply signal files
|
|
455
|
+
const signals = consumeSignals(cwd, runId);
|
|
254
456
|
let modified = false;
|
|
255
457
|
|
|
458
|
+
if (signals.length > 0) {
|
|
459
|
+
modified = applySignals(run, signals, pipeline);
|
|
460
|
+
if (modified) {
|
|
461
|
+
// Durability: persist state BEFORE deleting signals
|
|
462
|
+
persistRun(runId, run);
|
|
463
|
+
deleteSignals(cwd, signals);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// 2. Process each step
|
|
256
468
|
for (const stepDef of pipeline.steps) {
|
|
257
469
|
const step = run.steps[stepDef.name];
|
|
258
470
|
if (!step) continue;
|
|
@@ -261,33 +473,67 @@ function tickPipelines() {
|
|
|
261
473
|
if (step.status === 'bounce_pending') {
|
|
262
474
|
step.status = 'running';
|
|
263
475
|
step.startedAt = new Date().toISOString();
|
|
264
|
-
|
|
476
|
+
const pids = spawnStep(stepDef, run, runId, step.lastBounceReason);
|
|
477
|
+
step.pids = pids;
|
|
478
|
+
if (pids.length > 0) step.pid = pids[0];
|
|
265
479
|
modified = true;
|
|
266
480
|
continue;
|
|
267
481
|
}
|
|
268
482
|
|
|
269
483
|
// ── Handle running steps: check if process died ──
|
|
270
|
-
if (step.status === 'running'
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
484
|
+
if (step.status === 'running') {
|
|
485
|
+
const pids = step.pids?.length > 0 ? step.pids : (step.pid ? [step.pid] : []);
|
|
486
|
+
if (pids.length === 0) continue;
|
|
487
|
+
|
|
488
|
+
let livingPids = 0;
|
|
489
|
+
for (const pid of pids) if (isAlive(pid)) livingPids++;
|
|
490
|
+
|
|
491
|
+
const isParallel = pids.length > 1;
|
|
492
|
+
|
|
493
|
+
if (isParallel) {
|
|
494
|
+
// Parallel semantics: rely entirely on exit codes
|
|
495
|
+
if (step.exitCode !== null && step.exitCode !== 0) {
|
|
496
|
+
// Fail fast: kill siblings
|
|
497
|
+
for (const pid of pids) if (isAlive(pid)) killGroup(pid);
|
|
498
|
+
step.status = 'failed';
|
|
499
|
+
step.completedAt = new Date().toISOString();
|
|
500
|
+
console.error(`[pipeline] Step "${stepDef.name}" parallel failed (exit: ${step.exitCode})`);
|
|
501
|
+
if (pipeline.onError === 'stop') {
|
|
502
|
+
run.status = 'failed';
|
|
503
|
+
run.completedAt = new Date().toISOString();
|
|
289
504
|
}
|
|
290
505
|
modified = true;
|
|
506
|
+
} else if (livingPids === 0) {
|
|
507
|
+
// All dead and no errors
|
|
508
|
+
step.status = 'success';
|
|
509
|
+
step.completedAt = new Date().toISOString();
|
|
510
|
+
console.error(`[pipeline] Step "${stepDef.name}" parallel completed successfully`);
|
|
511
|
+
modified = true;
|
|
512
|
+
}
|
|
513
|
+
} else {
|
|
514
|
+
// Sequential semantics (count 1)
|
|
515
|
+
const pid = pids[0];
|
|
516
|
+
if (!isAlive(pid)) {
|
|
517
|
+
// Process is dead — did agent signal?
|
|
518
|
+
if (!step.signaled) {
|
|
519
|
+
// Auto-fallback: check exit code
|
|
520
|
+
if (step.exitCode === 0 || step.exitCode === null) {
|
|
521
|
+
// Treat as success (agent forgot to signal)
|
|
522
|
+
step.status = 'success';
|
|
523
|
+
step.completedAt = new Date().toISOString();
|
|
524
|
+
console.error(`[pipeline] Step "${stepDef.name}" auto-completed (pid dead, exit: ${step.exitCode})`);
|
|
525
|
+
} else {
|
|
526
|
+
// Failed
|
|
527
|
+
step.status = 'failed';
|
|
528
|
+
step.completedAt = new Date().toISOString();
|
|
529
|
+
console.error(`[pipeline] Step "${stepDef.name}" failed (exit: ${step.exitCode})`);
|
|
530
|
+
if (pipeline.onError === 'stop') {
|
|
531
|
+
run.status = 'failed';
|
|
532
|
+
run.completedAt = new Date().toISOString();
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
modified = true;
|
|
536
|
+
}
|
|
291
537
|
}
|
|
292
538
|
}
|
|
293
539
|
continue;
|
|
@@ -324,7 +570,9 @@ function tickPipelines() {
|
|
|
324
570
|
if (shouldStart && run.status === 'running') {
|
|
325
571
|
step.status = 'running';
|
|
326
572
|
step.startedAt = new Date().toISOString();
|
|
327
|
-
|
|
573
|
+
const pids = spawnStep(stepDef, run, runId);
|
|
574
|
+
step.pids = pids;
|
|
575
|
+
if (pids.length > 0) step.pid = pids[0];
|
|
328
576
|
modified = true;
|
|
329
577
|
}
|
|
330
578
|
}
|
|
@@ -335,7 +583,9 @@ function tickPipelines() {
|
|
|
335
583
|
if (depStepName && run.steps[depStepName]?.status === 'success') {
|
|
336
584
|
step.status = 'running';
|
|
337
585
|
step.startedAt = new Date().toISOString();
|
|
338
|
-
|
|
586
|
+
const pids = spawnStep(stepDef, run, runId);
|
|
587
|
+
step.pids = pids;
|
|
588
|
+
if (pids.length > 0) step.pid = pids[0];
|
|
339
589
|
modified = true;
|
|
340
590
|
}
|
|
341
591
|
}
|
|
@@ -354,7 +604,7 @@ function tickPipelines() {
|
|
|
354
604
|
}
|
|
355
605
|
|
|
356
606
|
if (modified) {
|
|
357
|
-
|
|
607
|
+
persistRun(runId, run);
|
|
358
608
|
}
|
|
359
609
|
}
|
|
360
610
|
|
|
@@ -415,9 +665,10 @@ function tick() {
|
|
|
415
665
|
setTimeout(tick, nextTickMs);
|
|
416
666
|
}
|
|
417
667
|
|
|
418
|
-
// ─── Startup
|
|
668
|
+
// ─── Startup ────────────────────────────────────────────────────
|
|
419
669
|
|
|
420
670
|
acquireLock();
|
|
671
|
+
loadRunCache();
|
|
421
672
|
|
|
422
673
|
process.on('SIGINT', () => { releaseLock(); process.exit(0); });
|
|
423
674
|
process.on('SIGTERM', () => { releaseLock(); process.exit(0); });
|
|
@@ -11,6 +11,8 @@ import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync, unlink
|
|
|
11
11
|
import { join, dirname } from 'node:path';
|
|
12
12
|
import { randomUUID } from 'node:crypto';
|
|
13
13
|
import { ensureDaemon } from './scheduler.js';
|
|
14
|
+
import { killGroup } from '../runner/process-manager.js';
|
|
15
|
+
import { writeSignal } from './run-signals.js';
|
|
14
16
|
|
|
15
17
|
const PIPELINES_DIR = '.agents/pipelines';
|
|
16
18
|
const RUNS_DIR = '.agents/runs';
|
|
@@ -69,6 +71,8 @@ export function createPipeline(cwd, { name, steps, onError }) {
|
|
|
69
71
|
name: s.name,
|
|
70
72
|
prompt: s.prompt,
|
|
71
73
|
skill: s.skill || null,
|
|
74
|
+
group: s.group || null,
|
|
75
|
+
count: s.count ? parseInt(s.count, 10) : 1,
|
|
72
76
|
approvalMode: s.approval_mode || 'yolo',
|
|
73
77
|
timeout: s.timeout || 600,
|
|
74
78
|
maxBounces: s.maxBounces ?? s.max_bounces ?? 2,
|
|
@@ -134,7 +138,8 @@ export function runPipeline(cwd, pipelineId) {
|
|
|
134
138
|
for (const step of pipeline.steps) {
|
|
135
139
|
steps[step.name] = {
|
|
136
140
|
status: 'pending',
|
|
137
|
-
pid: null,
|
|
141
|
+
pid: null, // Legacy / single pid
|
|
142
|
+
pids: [], // Array for parallel execution
|
|
138
143
|
exitCode: null,
|
|
139
144
|
signaled: false,
|
|
140
145
|
bounces: 0,
|
|
@@ -198,7 +203,7 @@ export function listRuns(cwd, pipelineId) {
|
|
|
198
203
|
const dir = join(cwd, RUNS_DIR);
|
|
199
204
|
if (!existsSync(dir)) return [];
|
|
200
205
|
return readdirSync(dir)
|
|
201
|
-
.filter(f => f.endsWith('.json'))
|
|
206
|
+
.filter(f => f.endsWith('.json') && !f.includes('.signal-'))
|
|
202
207
|
.map(f => {
|
|
203
208
|
try { return JSON.parse(readFileSync(join(dir, f), 'utf-8')); }
|
|
204
209
|
catch { return null; }
|
|
@@ -208,7 +213,8 @@ export function listRuns(cwd, pipelineId) {
|
|
|
208
213
|
}
|
|
209
214
|
|
|
210
215
|
/**
|
|
211
|
-
* Cancel a pipeline run.
|
|
216
|
+
* Cancel a pipeline run. Writes a signal file for the daemon.
|
|
217
|
+
* Kills running processes immediately for responsiveness.
|
|
212
218
|
* @param {string} cwd
|
|
213
219
|
* @param {string} runId
|
|
214
220
|
* @returns {boolean}
|
|
@@ -217,19 +223,22 @@ export function cancelRun(cwd, runId) {
|
|
|
217
223
|
const run = getRun(cwd, runId);
|
|
218
224
|
if (!run || run.status !== 'running') return false;
|
|
219
225
|
|
|
220
|
-
// Kill
|
|
226
|
+
// Kill running processes immediately (side-effect safe)
|
|
221
227
|
for (const [name, step] of Object.entries(run.steps)) {
|
|
222
|
-
if (step.status === 'running'
|
|
223
|
-
|
|
224
|
-
step.
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
+
if (step.status === 'running') {
|
|
229
|
+
const pidsToKill = [...(step.pids || [])];
|
|
230
|
+
if (step.pid && !pidsToKill.includes(step.pid)) pidsToKill.push(step.pid);
|
|
231
|
+
for (const pid of pidsToKill) {
|
|
232
|
+
killGroup(pid);
|
|
233
|
+
}
|
|
228
234
|
}
|
|
229
235
|
}
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
236
|
+
|
|
237
|
+
// Write signal file — daemon will apply the state change
|
|
238
|
+
writeSignal(cwd, runId, {
|
|
239
|
+
type: 'CANCEL_RUN',
|
|
240
|
+
});
|
|
241
|
+
|
|
233
242
|
return true;
|
|
234
243
|
}
|
|
235
244
|
|
|
@@ -245,7 +254,7 @@ export function findActiveRunByStep(cwd, stepName) {
|
|
|
245
254
|
const dir = join(cwd, RUNS_DIR);
|
|
246
255
|
if (!existsSync(dir)) return null;
|
|
247
256
|
|
|
248
|
-
for (const f of readdirSync(dir).filter(f => f.endsWith('.json'))) {
|
|
257
|
+
for (const f of readdirSync(dir).filter(f => f.endsWith('.json') && !f.includes('.signal-'))) {
|
|
249
258
|
try {
|
|
250
259
|
const run = JSON.parse(readFileSync(join(dir, f), 'utf-8'));
|
|
251
260
|
if (run.status === 'running' && run.steps[stepName]) {
|
|
@@ -258,42 +267,42 @@ export function findActiveRunByStep(cwd, stepName) {
|
|
|
258
267
|
|
|
259
268
|
/**
|
|
260
269
|
* Signal step completion. Called by agent via MCP tool.
|
|
270
|
+
* Writes a signal file instead of mutating run state directly.
|
|
271
|
+
* The daemon will consume this signal on its next tick.
|
|
261
272
|
* @param {string} cwd
|
|
262
273
|
* @param {string} stepName
|
|
263
274
|
* @param {string} [output]
|
|
264
275
|
* @param {string} [runId] - Specific run ID (recommended)
|
|
265
|
-
* @returns {{ success: boolean
|
|
276
|
+
* @returns {{ success: boolean }}
|
|
266
277
|
*/
|
|
267
278
|
export function signalStepComplete(cwd, stepName, output, runId) {
|
|
268
|
-
let
|
|
279
|
+
let resolvedRunId = runId;
|
|
269
280
|
|
|
270
|
-
if (
|
|
271
|
-
// Direct lookup by run ID
|
|
272
|
-
run = getRun(cwd, runId);
|
|
273
|
-
resolvedRunId = runId;
|
|
274
|
-
} else {
|
|
281
|
+
if (!resolvedRunId) {
|
|
275
282
|
// Fallback: search by step name
|
|
276
283
|
const found = findActiveRunByStep(cwd, stepName);
|
|
277
284
|
if (!found) return { success: false };
|
|
278
|
-
run = found.run;
|
|
279
285
|
resolvedRunId = found.runId;
|
|
280
286
|
}
|
|
281
287
|
|
|
288
|
+
// Verify run exists and is active
|
|
289
|
+
const run = getRun(cwd, resolvedRunId);
|
|
282
290
|
if (!run || run.status !== 'running') return { success: false };
|
|
283
|
-
|
|
284
|
-
if (!step || step.status !== 'running') return { success: false };
|
|
291
|
+
if (!run.steps[stepName] || run.steps[stepName].status !== 'running') return { success: false };
|
|
285
292
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
293
|
+
// Write signal file — daemon will apply it
|
|
294
|
+
writeSignal(cwd, resolvedRunId, {
|
|
295
|
+
type: 'STEP_COMPLETE',
|
|
296
|
+
stepName,
|
|
297
|
+
output: output || null,
|
|
298
|
+
});
|
|
290
299
|
|
|
291
|
-
saveRun(cwd, resolvedRunId, run);
|
|
292
300
|
return { success: true };
|
|
293
301
|
}
|
|
294
302
|
|
|
295
303
|
/**
|
|
296
304
|
* Bounce back to a previous step. Called by agent via MCP tool.
|
|
305
|
+
* Writes a signal file instead of mutating run state directly.
|
|
297
306
|
* @param {string} cwd
|
|
298
307
|
* @param {string} targetStepName - Step to re-run
|
|
299
308
|
* @param {string} reason - Why bouncing back
|
|
@@ -301,54 +310,53 @@ export function signalStepComplete(cwd, stepName, output, runId) {
|
|
|
301
310
|
* @returns {{ success: boolean, bounceCount?: number, maxBounces?: number }}
|
|
302
311
|
*/
|
|
303
312
|
export function bounceBack(cwd, targetStepName, reason, runId) {
|
|
304
|
-
// Find active run
|
|
305
|
-
|
|
306
|
-
|
|
313
|
+
// Find the active run containing this step
|
|
314
|
+
let resolvedRunId = runId;
|
|
315
|
+
let run;
|
|
307
316
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
targetStep.lastBounceReason = `Bounce limit (${maxBounces}) reached. Last: ${reason}`;
|
|
325
|
-
run.status = 'failed';
|
|
326
|
-
run.completedAt = new Date().toISOString();
|
|
327
|
-
saveRun(cwd, f.replace('.json', ''), run);
|
|
328
|
-
return { success: false, bounceCount: targetStep.bounces, maxBounces };
|
|
329
|
-
}
|
|
317
|
+
if (resolvedRunId) {
|
|
318
|
+
run = getRun(cwd, resolvedRunId);
|
|
319
|
+
} else {
|
|
320
|
+
const dir = join(cwd, RUNS_DIR);
|
|
321
|
+
if (!existsSync(dir)) return { success: false };
|
|
322
|
+
for (const f of readdirSync(dir).filter(f => f.endsWith('.json') && !f.includes('.signal-'))) {
|
|
323
|
+
try {
|
|
324
|
+
const r = JSON.parse(readFileSync(join(dir, f), 'utf-8'));
|
|
325
|
+
if (r.status === 'running' && r.steps[targetStepName]) {
|
|
326
|
+
run = r;
|
|
327
|
+
resolvedRunId = f.replace('.json', '');
|
|
328
|
+
break;
|
|
329
|
+
}
|
|
330
|
+
} catch { /* skip */ }
|
|
331
|
+
}
|
|
332
|
+
}
|
|
330
333
|
|
|
331
|
-
|
|
332
|
-
targetStep.status = 'bounce_pending';
|
|
333
|
-
targetStep.bounces += 1;
|
|
334
|
-
targetStep.lastBounceReason = reason;
|
|
335
|
-
targetStep.pid = null;
|
|
336
|
-
targetStep.exitCode = null;
|
|
337
|
-
targetStep.signaled = false;
|
|
338
|
-
|
|
339
|
-
// Reset the calling step too
|
|
340
|
-
const callingStepName = Object.keys(run.steps).find(name => {
|
|
341
|
-
const s = run.steps[name];
|
|
342
|
-
return s.status === 'running';
|
|
343
|
-
});
|
|
344
|
-
if (callingStepName) {
|
|
345
|
-
run.steps[callingStepName].status = 'waiting_bounce';
|
|
346
|
-
}
|
|
334
|
+
if (!run || run.status !== 'running') return { success: false };
|
|
347
335
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
336
|
+
const targetStep = run.steps[targetStepName];
|
|
337
|
+
if (!targetStep) return { success: false };
|
|
338
|
+
|
|
339
|
+
// Check bounce limit (read-only check — safe without lock)
|
|
340
|
+
const pipeline = getPipeline(run.cwd || cwd, run.pipeline);
|
|
341
|
+
const stepDef = pipeline?.steps.find(s => s.name === targetStepName);
|
|
342
|
+
const maxBounces = stepDef?.maxBounces ?? 2;
|
|
343
|
+
|
|
344
|
+
if (targetStep.bounces >= maxBounces) {
|
|
345
|
+
return { success: false, bounceCount: targetStep.bounces, maxBounces };
|
|
351
346
|
}
|
|
352
347
|
|
|
353
|
-
|
|
348
|
+
// Find the calling step name (the step that's bouncing back)
|
|
349
|
+
const callingStepName = Object.keys(run.steps).find(name =>
|
|
350
|
+
run.steps[name].status === 'running' && name !== targetStepName,
|
|
351
|
+
);
|
|
352
|
+
|
|
353
|
+
// Write signal file — daemon will apply the state changes and kill processes
|
|
354
|
+
writeSignal(cwd, resolvedRunId, {
|
|
355
|
+
type: 'BOUNCE_BACK',
|
|
356
|
+
stepName: targetStepName,
|
|
357
|
+
callingStepName: callingStepName || null,
|
|
358
|
+
reason,
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
return { success: true, bounceCount: targetStep.bounces + 1, maxBounces };
|
|
354
362
|
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run signal files — atomic communication between MCP server and daemon.
|
|
3
|
+
*
|
|
4
|
+
* Instead of MCP tools writing directly to run JSON (race condition),
|
|
5
|
+
* they write small signal files that the daemon consumes on each tick.
|
|
6
|
+
*
|
|
7
|
+
* Signal types: STEP_COMPLETE, BOUNCE_BACK
|
|
8
|
+
*
|
|
9
|
+
* @module agent-pool/scheduler/run-signals
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { writeFileSync, readFileSync, readdirSync, unlinkSync, existsSync, mkdirSync } from 'node:fs';
|
|
13
|
+
import { join } from 'node:path';
|
|
14
|
+
import { randomUUID } from 'node:crypto';
|
|
15
|
+
|
|
16
|
+
const RUNS_DIR = '.agents/runs';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Write a signal file for a specific run.
|
|
20
|
+
* Signal files are atomic — no concurrent read-modify-write.
|
|
21
|
+
* @param {string} cwd
|
|
22
|
+
* @param {string} runId
|
|
23
|
+
* @param {object} signal - { type, stepName, output?, reason?, targetStep? }
|
|
24
|
+
*/
|
|
25
|
+
export function writeSignal(cwd, runId, signal) {
|
|
26
|
+
const dir = join(cwd, RUNS_DIR);
|
|
27
|
+
mkdirSync(dir, { recursive: true });
|
|
28
|
+
|
|
29
|
+
const id = randomUUID().split('-')[0];
|
|
30
|
+
const fileName = `${runId}.signal-${id}.json`;
|
|
31
|
+
const payload = {
|
|
32
|
+
...signal,
|
|
33
|
+
timestamp: new Date().toISOString(),
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
writeFileSync(join(dir, fileName), JSON.stringify(payload));
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Consume all pending signal files for a run.
|
|
41
|
+
* Returns signals sorted by timestamp. Does NOT delete them —
|
|
42
|
+
* caller must call deleteSignals() after persisting state.
|
|
43
|
+
* @param {string} cwd
|
|
44
|
+
* @param {string} runId
|
|
45
|
+
* @returns {Array<{ type: string, stepName: string, fileName: string, [key: string]: any }>}
|
|
46
|
+
*/
|
|
47
|
+
export function consumeSignals(cwd, runId) {
|
|
48
|
+
const dir = join(cwd, RUNS_DIR);
|
|
49
|
+
if (!existsSync(dir)) return [];
|
|
50
|
+
|
|
51
|
+
const prefix = `${runId}.signal-`;
|
|
52
|
+
const signalFiles = readdirSync(dir).filter(f => f.startsWith(prefix) && f.endsWith('.json'));
|
|
53
|
+
|
|
54
|
+
const signals = [];
|
|
55
|
+
for (const f of signalFiles) {
|
|
56
|
+
try {
|
|
57
|
+
const data = JSON.parse(readFileSync(join(dir, f), 'utf-8'));
|
|
58
|
+
signals.push({ ...data, fileName: f });
|
|
59
|
+
} catch {
|
|
60
|
+
// Include corrupted files so they get cleaned up by deleteSignals
|
|
61
|
+
signals.push({ type: '_corrupted', fileName: f });
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Sort by timestamp for deterministic processing
|
|
66
|
+
signals.sort((a, b) => (a.timestamp || '').localeCompare(b.timestamp || ''));
|
|
67
|
+
return signals;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Delete signal files after state has been persisted to disk.
|
|
72
|
+
* @param {string} cwd
|
|
73
|
+
* @param {Array<{ fileName: string }>} signals
|
|
74
|
+
*/
|
|
75
|
+
export function deleteSignals(cwd, signals) {
|
|
76
|
+
const dir = join(cwd, RUNS_DIR);
|
|
77
|
+
for (const s of signals) {
|
|
78
|
+
try { unlinkSync(join(dir, s.fileName)); }
|
|
79
|
+
catch { /* ignore */ }
|
|
80
|
+
}
|
|
81
|
+
}
|
package/src/server.js
CHANGED
|
@@ -23,6 +23,7 @@ import { consultPeer } from './tools/consult.js';
|
|
|
23
23
|
import { addSchedule, listSchedules, removeSchedule, getScheduledResults, getDaemonStatus } from './scheduler/scheduler.js';
|
|
24
24
|
import { createPipeline, listPipelines, runPipeline, getRun, listRuns, cancelRun, signalStepComplete, bounceBack } from './scheduler/pipeline.js';
|
|
25
25
|
import { createGroup, listGroups, getGroup } from './tools/groups.js';
|
|
26
|
+
import { sendMessage, getMessages } from './tools/messaging.js';
|
|
26
27
|
|
|
27
28
|
import { TOOL_DEFINITIONS } from './tool-definitions.js';
|
|
28
29
|
|
|
@@ -112,7 +113,7 @@ export function createServer() {
|
|
|
112
113
|
}
|
|
113
114
|
|
|
114
115
|
const server = new Server(
|
|
115
|
-
{ name: 'agent-pool', version: '1.
|
|
116
|
+
{ name: 'agent-pool', version: '1.7.0' },
|
|
116
117
|
{ capabilities: { tools: {}, resources: {} } },
|
|
117
118
|
);
|
|
118
119
|
|
|
@@ -208,6 +209,10 @@ export function createServer() {
|
|
|
208
209
|
response = handleListGroups(args); break;
|
|
209
210
|
case 'delegate_to_group':
|
|
210
211
|
response = handleDelegateToGroup(args); break;
|
|
212
|
+
case 'send_message':
|
|
213
|
+
response = handleSendMessage(args); break;
|
|
214
|
+
case 'get_messages':
|
|
215
|
+
response = handleGetMessages(args); break;
|
|
211
216
|
default:
|
|
212
217
|
response = { content: [{ type: 'text', text: `Unknown tool: ${name}` }], isError: true };
|
|
213
218
|
}
|
|
@@ -803,3 +808,58 @@ function handleDelegateToGroup(args) {
|
|
|
803
808
|
}],
|
|
804
809
|
};
|
|
805
810
|
}
|
|
811
|
+
|
|
812
|
+
// ─── Messaging handlers ─────────────────────────────────────
|
|
813
|
+
|
|
814
|
+
function handleSendMessage(args) {
|
|
815
|
+
const cwd = args.cwd ?? defaultCwd;
|
|
816
|
+
const result = sendMessage(cwd, {
|
|
817
|
+
channel: args.channel,
|
|
818
|
+
payload: args.payload,
|
|
819
|
+
from: args.from,
|
|
820
|
+
});
|
|
821
|
+
|
|
822
|
+
if (!result.success) {
|
|
823
|
+
return {
|
|
824
|
+
content: [{ type: 'text', text: `❌ Failed to send message: ${result.error || 'unknown error'}` }],
|
|
825
|
+
isError: true,
|
|
826
|
+
};
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
return {
|
|
830
|
+
content: [{ type: 'text', text: `📨 Message sent to channel \`${result.channel}\`.` }],
|
|
831
|
+
};
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
function handleGetMessages(args) {
|
|
835
|
+
const cwd = args.cwd ?? defaultCwd;
|
|
836
|
+
const result = getMessages(cwd, {
|
|
837
|
+
channel: args.channel,
|
|
838
|
+
clear: args.clear,
|
|
839
|
+
});
|
|
840
|
+
|
|
841
|
+
if (result.error) {
|
|
842
|
+
return {
|
|
843
|
+
content: [{ type: 'text', text: `❌ ${result.error}` }],
|
|
844
|
+
isError: true,
|
|
845
|
+
};
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
if (result.count === 0) {
|
|
849
|
+
return {
|
|
850
|
+
content: [{ type: 'text', text: `📭 No messages on channel \`${args.channel}\`.` }],
|
|
851
|
+
};
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
const lines = result.messages.map((m, i) =>
|
|
855
|
+
`**${i + 1}.** [${m.timestamp}] from \`${m.from}\`:\n\`\`\`json\n${JSON.stringify(m.payload, null, 2)}\n\`\`\``
|
|
856
|
+
);
|
|
857
|
+
|
|
858
|
+
return {
|
|
859
|
+
content: [{
|
|
860
|
+
type: 'text',
|
|
861
|
+
text: `📬 **${result.count}** message(s) on channel \`${args.channel}\`${args.clear ? ' (cleared)' : ''}:\n\n${lines.join('\n\n')}`,
|
|
862
|
+
}],
|
|
863
|
+
};
|
|
864
|
+
}
|
|
865
|
+
|
package/src/tool-definitions.js
CHANGED
|
@@ -408,5 +408,49 @@ export const TOOL_DEFINITIONS = [
|
|
|
408
408
|
required: ['group', 'prompt'],
|
|
409
409
|
},
|
|
410
410
|
},
|
|
411
|
+
{
|
|
412
|
+
name: 'send_message',
|
|
413
|
+
description: [
|
|
414
|
+
'Send a message to a channel for inter-agent communication.',
|
|
415
|
+
'Use this to pass structured data between pipeline steps or between any agents.',
|
|
416
|
+
'',
|
|
417
|
+
'Channel conventions:',
|
|
418
|
+
' - {run_id} — broadcast to all steps in a pipeline run',
|
|
419
|
+
' - {run_id}:{step_name} — targeted to a specific step',
|
|
420
|
+
' - any string — ad-hoc channel for custom messaging',
|
|
421
|
+
'',
|
|
422
|
+
'Messages are persisted to disk (survives restarts). Uses JSONL format for concurrent-write safety.',
|
|
423
|
+
].join('\n'),
|
|
424
|
+
inputSchema: {
|
|
425
|
+
type: 'object',
|
|
426
|
+
properties: {
|
|
427
|
+
channel: { type: 'string', description: 'Target channel. Use run_id for broadcast, run_id:step_name for targeted.' },
|
|
428
|
+
payload: { description: 'Message payload (any JSON-serializable value).' },
|
|
429
|
+
from: { type: 'string', description: 'Sender identifier (e.g., step name or task description).' },
|
|
430
|
+
cwd: { type: 'string', description: 'Working directory. Defaults to current working directory.' },
|
|
431
|
+
},
|
|
432
|
+
required: ['channel', 'payload'],
|
|
433
|
+
},
|
|
434
|
+
},
|
|
435
|
+
{
|
|
436
|
+
name: 'get_messages',
|
|
437
|
+
description: [
|
|
438
|
+
'Read messages from a channel. Returns all messages in chronological order.',
|
|
439
|
+
'',
|
|
440
|
+
'Channel conventions:',
|
|
441
|
+
' - {run_id} — read broadcast messages for a pipeline run',
|
|
442
|
+
' - {run_id}:{step_name} — read messages targeted to a specific step',
|
|
443
|
+
'',
|
|
444
|
+
'Use clear=true to consume messages (delete after reading).',
|
|
445
|
+
].join('\n'),
|
|
446
|
+
inputSchema: {
|
|
447
|
+
type: 'object',
|
|
448
|
+
properties: {
|
|
449
|
+
channel: { type: 'string', description: 'Channel to read messages from.' },
|
|
450
|
+
clear: { type: 'boolean', description: 'If true, clear the channel after reading (consume mode). Default: false.' },
|
|
451
|
+
cwd: { type: 'string', description: 'Working directory. Defaults to current working directory.' },
|
|
452
|
+
},
|
|
453
|
+
required: ['channel'],
|
|
454
|
+
},
|
|
455
|
+
},
|
|
411
456
|
];
|
|
412
|
-
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inter-agent messaging — file-based JSONL mailboxes.
|
|
3
|
+
*
|
|
4
|
+
* Provides send_message / get_messages tools for agents
|
|
5
|
+
* to pass structured data between pipeline steps or tasks.
|
|
6
|
+
*
|
|
7
|
+
* Uses JSONL format (one JSON object per line) with appendFileSync()
|
|
8
|
+
* to avoid read-modify-write race conditions on concurrent writes.
|
|
9
|
+
*
|
|
10
|
+
* Channel addressing:
|
|
11
|
+
* - {run_id} → broadcast to all steps in a pipeline run
|
|
12
|
+
* - {run_id}:{step} → targeted to a specific step
|
|
13
|
+
* - {custom_channel} → any string for ad-hoc messaging
|
|
14
|
+
*
|
|
15
|
+
* @module agent-pool/tools/messaging
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { appendFileSync, readFileSync, writeFileSync, existsSync, mkdirSync, renameSync, unlinkSync } from 'node:fs';
|
|
19
|
+
import { join, dirname } from 'node:path';
|
|
20
|
+
|
|
21
|
+
const MESSAGES_DIR = '.agents/messages';
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Sanitize channel name for use as filename.
|
|
25
|
+
* @param {string} channel
|
|
26
|
+
* @returns {string}
|
|
27
|
+
*/
|
|
28
|
+
function sanitizeChannel(channel) {
|
|
29
|
+
return channel.replace(/[^a-zA-Z0-9_:-]/g, '_');
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Send a message to a channel.
|
|
34
|
+
* Uses appendFileSync for atomic writes (no read-modify-write).
|
|
35
|
+
* @param {string} cwd
|
|
36
|
+
* @param {object} opts
|
|
37
|
+
* @param {string} opts.channel - Target channel (e.g., "run_id:step_name")
|
|
38
|
+
* @param {*} opts.payload - Message payload (any JSON-serializable value)
|
|
39
|
+
* @param {string} [opts.from] - Sender identifier
|
|
40
|
+
* @returns {{ success: boolean, channel: string }}
|
|
41
|
+
*/
|
|
42
|
+
export function sendMessage(cwd, { channel, payload, from }) {
|
|
43
|
+
if (!channel) return { success: false, error: 'channel is required' };
|
|
44
|
+
|
|
45
|
+
const dir = join(cwd, MESSAGES_DIR);
|
|
46
|
+
mkdirSync(dir, { recursive: true });
|
|
47
|
+
|
|
48
|
+
const filePath = join(dir, `${sanitizeChannel(channel)}.jsonl`);
|
|
49
|
+
const message = {
|
|
50
|
+
timestamp: new Date().toISOString(),
|
|
51
|
+
from: from || 'unknown',
|
|
52
|
+
payload,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
// JSONL: one JSON object per line, appended atomically
|
|
56
|
+
appendFileSync(filePath, JSON.stringify(message) + '\n');
|
|
57
|
+
|
|
58
|
+
return { success: true, channel };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Get messages from a channel.
|
|
63
|
+
* @param {string} cwd
|
|
64
|
+
* @param {object} opts
|
|
65
|
+
* @param {string} opts.channel - Channel to read from
|
|
66
|
+
* @param {boolean} [opts.clear] - If true, clear the channel after reading
|
|
67
|
+
* @returns {{ messages: Array<{ timestamp: string, from: string, payload: any }>, count: number }}
|
|
68
|
+
*/
|
|
69
|
+
export function getMessages(cwd, { channel, clear }) {
|
|
70
|
+
if (!channel) return { messages: [], count: 0, error: 'channel is required' };
|
|
71
|
+
|
|
72
|
+
const filePath = join(cwd, MESSAGES_DIR, `${sanitizeChannel(channel)}.jsonl`);
|
|
73
|
+
if (!existsSync(filePath)) return { messages: [], count: 0 };
|
|
74
|
+
|
|
75
|
+
let content;
|
|
76
|
+
if (clear) {
|
|
77
|
+
// Atomic consume: rename file first, then read. Any new messages
|
|
78
|
+
// appended after rename go to a NEW file (no data loss).
|
|
79
|
+
const tmpPath = filePath + '.consuming';
|
|
80
|
+
try {
|
|
81
|
+
renameSync(filePath, tmpPath);
|
|
82
|
+
content = readFileSync(tmpPath, 'utf-8').trim();
|
|
83
|
+
unlinkSync(tmpPath);
|
|
84
|
+
} catch {
|
|
85
|
+
// File was deleted or renamed between check and read
|
|
86
|
+
return { messages: [], count: 0 };
|
|
87
|
+
}
|
|
88
|
+
} else {
|
|
89
|
+
try {
|
|
90
|
+
content = readFileSync(filePath, 'utf-8').trim();
|
|
91
|
+
} catch {
|
|
92
|
+
return { messages: [], count: 0 };
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (!content) return { messages: [], count: 0 };
|
|
97
|
+
|
|
98
|
+
const messages = content.split('\n').map(line => {
|
|
99
|
+
try { return JSON.parse(line); }
|
|
100
|
+
catch { return null; }
|
|
101
|
+
}).filter(Boolean);
|
|
102
|
+
|
|
103
|
+
return { messages, count: messages.length };
|
|
104
|
+
}
|