@agent-relay/sdk 3.1.23 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agent-relay-broker-darwin-arm64 +0 -0
- package/bin/agent-relay-broker-darwin-x64 +0 -0
- package/bin/agent-relay-broker-linux-arm64 +0 -0
- package/bin/agent-relay-broker-linux-x64 +0 -0
- package/dist/__tests__/completion-pipeline.test.d.ts +14 -0
- package/dist/__tests__/completion-pipeline.test.d.ts.map +1 -0
- package/dist/__tests__/completion-pipeline.test.js +1476 -0
- package/dist/__tests__/completion-pipeline.test.js.map +1 -0
- package/dist/__tests__/e2e-owner-review.test.js +2 -2
- package/dist/__tests__/e2e-owner-review.test.js.map +1 -1
- package/dist/__tests__/unit.test.js +8 -0
- package/dist/__tests__/unit.test.js.map +1 -1
- package/dist/client.js +2 -2
- package/dist/client.js.map +1 -1
- package/dist/examples/example.js +1 -1
- package/dist/examples/example.js.map +1 -1
- package/dist/examples/ralph-loop.js +6 -6
- package/dist/examples/ralph-loop.js.map +1 -1
- package/dist/relay-adapter.js +4 -4
- package/dist/relay-adapter.js.map +1 -1
- package/dist/relay.d.ts +1 -0
- package/dist/relay.d.ts.map +1 -1
- package/dist/relay.js +2 -0
- package/dist/relay.js.map +1 -1
- package/dist/workflows/runner.d.ts +53 -2
- package/dist/workflows/runner.d.ts.map +1 -1
- package/dist/workflows/runner.js +1277 -94
- package/dist/workflows/runner.js.map +1 -1
- package/dist/workflows/trajectory.d.ts +6 -2
- package/dist/workflows/trajectory.d.ts.map +1 -1
- package/dist/workflows/trajectory.js +37 -2
- package/dist/workflows/trajectory.js.map +1 -1
- package/dist/workflows/types.d.ts +88 -0
- package/dist/workflows/types.d.ts.map +1 -1
- package/dist/workflows/types.js.map +1 -1
- package/dist/workflows/validator.js +4 -4
- package/dist/workflows/validator.js.map +1 -1
- package/package.json +2 -2
package/dist/workflows/runner.js
CHANGED
|
@@ -5,11 +5,12 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import { spawn as cpSpawn, execFileSync } from 'node:child_process';
|
|
7
7
|
import { randomBytes } from 'node:crypto';
|
|
8
|
-
import { createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
|
|
8
|
+
import { createWriteStream, existsSync, mkdirSync, readFileSync, readdirSync, renameSync, statSync, writeFileSync, } from 'node:fs';
|
|
9
9
|
import { readFile, writeFile } from 'node:fs/promises';
|
|
10
10
|
import path from 'node:path';
|
|
11
11
|
import { parse as parseYaml } from 'yaml';
|
|
12
12
|
import { stripAnsi as stripAnsiFn } from '../pty.js';
|
|
13
|
+
import { resolveSpawnPolicy } from '../spawn-from-env.js';
|
|
13
14
|
import { loadCustomSteps, resolveAllCustomSteps, validateCustomStepsUsage, CustomStepsParseError, CustomStepResolutionError, } from './custom-steps.js';
|
|
14
15
|
import { InMemoryWorkflowDb } from './memory-db.js';
|
|
15
16
|
import { WorkflowTrajectory } from './trajectory.js';
|
|
@@ -28,6 +29,14 @@ class SpawnExitError extends Error {
|
|
|
28
29
|
this.exitSignal = exitSignal ?? undefined;
|
|
29
30
|
}
|
|
30
31
|
}
|
|
32
|
+
class WorkflowCompletionError extends Error {
|
|
33
|
+
completionReason;
|
|
34
|
+
constructor(message, completionReason) {
|
|
35
|
+
super(message);
|
|
36
|
+
this.name = 'WorkflowCompletionError';
|
|
37
|
+
this.completionReason = completionReason;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
31
40
|
// ── CLI resolution ───────────────────────────────────────────────────────────
|
|
32
41
|
/**
|
|
33
42
|
* Resolve `cursor` to the concrete cursor agent binary available in PATH.
|
|
@@ -101,6 +110,12 @@ export class WorkflowRunner {
|
|
|
101
110
|
lastActivity = new Map();
|
|
102
111
|
/** Runtime-name lookup for agents participating in supervised owner flows. */
|
|
103
112
|
supervisedRuntimeAgents = new Map();
|
|
113
|
+
/** Runtime-name lookup for active step agents so channel messages can be attributed to a step. */
|
|
114
|
+
runtimeStepAgents = new Map();
|
|
115
|
+
/** Per-step completion evidence collected across output, channel, files, and tool side-effects. */
|
|
116
|
+
stepCompletionEvidence = new Map();
|
|
117
|
+
/** Expected owner/worker identities per step so coordination signals can be validated by sender. */
|
|
118
|
+
stepSignalParticipants = new Map();
|
|
104
119
|
/** Resolved named paths from the top-level `paths` config, keyed by name → absolute directory. */
|
|
105
120
|
resolvedPaths = new Map();
|
|
106
121
|
constructor(options = {}) {
|
|
@@ -182,6 +197,441 @@ export class WorkflowRunner {
|
|
|
182
197
|
}
|
|
183
198
|
return resolved;
|
|
184
199
|
}
|
|
200
|
+
static EVIDENCE_IGNORED_DIRS = new Set([
|
|
201
|
+
'.git',
|
|
202
|
+
'.agent-relay',
|
|
203
|
+
'.trajectories',
|
|
204
|
+
'node_modules',
|
|
205
|
+
]);
|
|
206
|
+
getStepCompletionEvidence(stepName) {
|
|
207
|
+
const record = this.stepCompletionEvidence.get(stepName);
|
|
208
|
+
if (!record)
|
|
209
|
+
return undefined;
|
|
210
|
+
const evidence = structuredClone(record.evidence);
|
|
211
|
+
return this.filterStepEvidenceBySignalProvenance(stepName, evidence);
|
|
212
|
+
}
|
|
213
|
+
getOrCreateStepEvidenceRecord(stepName) {
|
|
214
|
+
const existing = this.stepCompletionEvidence.get(stepName);
|
|
215
|
+
if (existing)
|
|
216
|
+
return existing;
|
|
217
|
+
const now = new Date().toISOString();
|
|
218
|
+
const record = {
|
|
219
|
+
evidence: {
|
|
220
|
+
stepName,
|
|
221
|
+
lastUpdatedAt: now,
|
|
222
|
+
roots: [],
|
|
223
|
+
output: {
|
|
224
|
+
stdout: '',
|
|
225
|
+
stderr: '',
|
|
226
|
+
combined: '',
|
|
227
|
+
},
|
|
228
|
+
channelPosts: [],
|
|
229
|
+
files: [],
|
|
230
|
+
process: {},
|
|
231
|
+
toolSideEffects: [],
|
|
232
|
+
coordinationSignals: [],
|
|
233
|
+
},
|
|
234
|
+
baselineSnapshots: new Map(),
|
|
235
|
+
filesCaptured: false,
|
|
236
|
+
};
|
|
237
|
+
this.stepCompletionEvidence.set(stepName, record);
|
|
238
|
+
return record;
|
|
239
|
+
}
|
|
240
|
+
initializeStepSignalParticipants(stepName, ownerSender, workerSender) {
|
|
241
|
+
this.stepSignalParticipants.set(stepName, {
|
|
242
|
+
ownerSenders: new Set(),
|
|
243
|
+
workerSenders: new Set(),
|
|
244
|
+
});
|
|
245
|
+
this.rememberStepSignalSender(stepName, 'owner', ownerSender);
|
|
246
|
+
this.rememberStepSignalSender(stepName, 'worker', workerSender);
|
|
247
|
+
}
|
|
248
|
+
rememberStepSignalSender(stepName, participant, ...senders) {
|
|
249
|
+
const participants = this.stepSignalParticipants.get(stepName) ??
|
|
250
|
+
{
|
|
251
|
+
ownerSenders: new Set(),
|
|
252
|
+
workerSenders: new Set(),
|
|
253
|
+
};
|
|
254
|
+
this.stepSignalParticipants.set(stepName, participants);
|
|
255
|
+
const target = participant === 'owner' ? participants.ownerSenders : participants.workerSenders;
|
|
256
|
+
for (const sender of senders) {
|
|
257
|
+
const trimmed = sender?.trim();
|
|
258
|
+
if (trimmed)
|
|
259
|
+
target.add(trimmed);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
resolveSignalParticipantKind(role) {
|
|
263
|
+
const roleLC = role?.toLowerCase().trim();
|
|
264
|
+
if (!roleLC)
|
|
265
|
+
return undefined;
|
|
266
|
+
if (/\b(owner|lead|supervisor)\b/.test(roleLC))
|
|
267
|
+
return 'owner';
|
|
268
|
+
if (/\b(worker|specialist|engineer|implementer)\b/.test(roleLC))
|
|
269
|
+
return 'worker';
|
|
270
|
+
return undefined;
|
|
271
|
+
}
|
|
272
|
+
isSignalFromExpectedSender(stepName, signal) {
|
|
273
|
+
const expectedParticipant = signal.kind === 'worker_done'
|
|
274
|
+
? 'worker'
|
|
275
|
+
: signal.kind === 'lead_done'
|
|
276
|
+
? 'owner'
|
|
277
|
+
: undefined;
|
|
278
|
+
if (!expectedParticipant)
|
|
279
|
+
return true;
|
|
280
|
+
const participants = this.stepSignalParticipants.get(stepName);
|
|
281
|
+
if (!participants)
|
|
282
|
+
return true;
|
|
283
|
+
const allowedSenders = expectedParticipant === 'owner' ? participants.ownerSenders : participants.workerSenders;
|
|
284
|
+
if (allowedSenders.size === 0)
|
|
285
|
+
return true;
|
|
286
|
+
const sender = signal.sender ?? signal.actor;
|
|
287
|
+
if (sender) {
|
|
288
|
+
return allowedSenders.has(sender);
|
|
289
|
+
}
|
|
290
|
+
const observedParticipant = this.resolveSignalParticipantKind(signal.role);
|
|
291
|
+
if (observedParticipant) {
|
|
292
|
+
return observedParticipant === expectedParticipant;
|
|
293
|
+
}
|
|
294
|
+
return signal.source !== 'channel';
|
|
295
|
+
}
|
|
296
|
+
filterStepEvidenceBySignalProvenance(stepName, evidence) {
|
|
297
|
+
evidence.channelPosts = evidence.channelPosts.map((post) => {
|
|
298
|
+
const signals = post.signals.filter((signal) => this.isSignalFromExpectedSender(stepName, signal));
|
|
299
|
+
return {
|
|
300
|
+
...post,
|
|
301
|
+
completionRelevant: signals.length > 0,
|
|
302
|
+
signals,
|
|
303
|
+
};
|
|
304
|
+
});
|
|
305
|
+
evidence.coordinationSignals = evidence.coordinationSignals.filter((signal) => this.isSignalFromExpectedSender(stepName, signal));
|
|
306
|
+
return evidence;
|
|
307
|
+
}
|
|
308
|
+
beginStepEvidence(stepName, roots, startedAt) {
|
|
309
|
+
const record = this.getOrCreateStepEvidenceRecord(stepName);
|
|
310
|
+
const evidence = record.evidence;
|
|
311
|
+
const now = startedAt ?? new Date().toISOString();
|
|
312
|
+
evidence.startedAt ??= now;
|
|
313
|
+
evidence.status = 'running';
|
|
314
|
+
evidence.lastUpdatedAt = now;
|
|
315
|
+
for (const root of this.uniqueEvidenceRoots(roots)) {
|
|
316
|
+
if (!evidence.roots.includes(root)) {
|
|
317
|
+
evidence.roots.push(root);
|
|
318
|
+
}
|
|
319
|
+
if (!record.baselineSnapshots.has(root)) {
|
|
320
|
+
record.baselineSnapshots.set(root, this.captureFileSnapshot(root));
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
captureStepTerminalEvidence(stepName, output, process, meta) {
|
|
325
|
+
const record = this.getOrCreateStepEvidenceRecord(stepName);
|
|
326
|
+
const evidence = record.evidence;
|
|
327
|
+
const observedAt = new Date().toISOString();
|
|
328
|
+
const append = (current, next) => {
|
|
329
|
+
if (!next)
|
|
330
|
+
return current;
|
|
331
|
+
return current ? `${current}\n${next}` : next;
|
|
332
|
+
};
|
|
333
|
+
if (output.stdout) {
|
|
334
|
+
evidence.output.stdout = append(evidence.output.stdout, output.stdout);
|
|
335
|
+
for (const signal of this.extractCompletionSignals(output.stdout, 'stdout', observedAt, meta)) {
|
|
336
|
+
evidence.coordinationSignals.push(signal);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
if (output.stderr) {
|
|
340
|
+
evidence.output.stderr = append(evidence.output.stderr, output.stderr);
|
|
341
|
+
for (const signal of this.extractCompletionSignals(output.stderr, 'stderr', observedAt, meta)) {
|
|
342
|
+
evidence.coordinationSignals.push(signal);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
const combinedOutput = output.combined ??
|
|
346
|
+
[output.stdout, output.stderr].filter((value) => Boolean(value)).join('\n');
|
|
347
|
+
if (combinedOutput) {
|
|
348
|
+
evidence.output.combined = append(evidence.output.combined, combinedOutput);
|
|
349
|
+
}
|
|
350
|
+
if (process) {
|
|
351
|
+
if (process.exitCode !== undefined) {
|
|
352
|
+
evidence.process.exitCode = process.exitCode;
|
|
353
|
+
evidence.coordinationSignals.push({
|
|
354
|
+
kind: 'process_exit',
|
|
355
|
+
source: 'process',
|
|
356
|
+
text: `Process exited with code ${process.exitCode}`,
|
|
357
|
+
observedAt,
|
|
358
|
+
value: String(process.exitCode),
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
if (process.exitSignal !== undefined) {
|
|
362
|
+
evidence.process.exitSignal = process.exitSignal;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
evidence.lastUpdatedAt = observedAt;
|
|
366
|
+
}
|
|
367
|
+
finalizeStepEvidence(stepName, status, completedAt, completionReason) {
|
|
368
|
+
const record = this.stepCompletionEvidence.get(stepName);
|
|
369
|
+
if (!record)
|
|
370
|
+
return;
|
|
371
|
+
const evidence = record.evidence;
|
|
372
|
+
const observedAt = completedAt ?? new Date().toISOString();
|
|
373
|
+
evidence.status = status;
|
|
374
|
+
if (status !== 'running') {
|
|
375
|
+
evidence.completedAt = observedAt;
|
|
376
|
+
}
|
|
377
|
+
evidence.lastUpdatedAt = observedAt;
|
|
378
|
+
if (!record.filesCaptured) {
|
|
379
|
+
const existing = new Set(evidence.files.map((file) => `${file.kind}:${file.path}`));
|
|
380
|
+
for (const root of evidence.roots) {
|
|
381
|
+
const before = record.baselineSnapshots.get(root) ?? new Map();
|
|
382
|
+
const after = this.captureFileSnapshot(root);
|
|
383
|
+
for (const change of this.diffFileSnapshots(before, after, root, observedAt)) {
|
|
384
|
+
const key = `${change.kind}:${change.path}`;
|
|
385
|
+
if (existing.has(key))
|
|
386
|
+
continue;
|
|
387
|
+
existing.add(key);
|
|
388
|
+
evidence.files.push(change);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
record.filesCaptured = true;
|
|
392
|
+
}
|
|
393
|
+
if (completionReason) {
|
|
394
|
+
const decision = this.buildStepCompletionDecision(stepName, completionReason);
|
|
395
|
+
if (decision) {
|
|
396
|
+
void this.trajectory?.stepCompletionDecision(stepName, decision);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
recordStepToolSideEffect(stepName, effect) {
|
|
401
|
+
const record = this.getOrCreateStepEvidenceRecord(stepName);
|
|
402
|
+
const observedAt = effect.observedAt ?? new Date().toISOString();
|
|
403
|
+
record.evidence.toolSideEffects.push({
|
|
404
|
+
...effect,
|
|
405
|
+
observedAt,
|
|
406
|
+
});
|
|
407
|
+
record.evidence.lastUpdatedAt = observedAt;
|
|
408
|
+
}
|
|
409
|
+
recordChannelEvidence(text, options = {}) {
|
|
410
|
+
const stepName = options.stepName ??
|
|
411
|
+
this.inferStepNameFromChannelText(text) ??
|
|
412
|
+
(options.actor ? this.runtimeStepAgents.get(options.actor)?.stepName : undefined);
|
|
413
|
+
if (!stepName)
|
|
414
|
+
return;
|
|
415
|
+
const record = this.getOrCreateStepEvidenceRecord(stepName);
|
|
416
|
+
const postedAt = new Date().toISOString();
|
|
417
|
+
const sender = options.sender ?? options.actor;
|
|
418
|
+
const signals = this.extractCompletionSignals(text, 'channel', postedAt, {
|
|
419
|
+
sender,
|
|
420
|
+
actor: options.actor,
|
|
421
|
+
role: options.role,
|
|
422
|
+
});
|
|
423
|
+
const channelPost = {
|
|
424
|
+
stepName,
|
|
425
|
+
text,
|
|
426
|
+
postedAt,
|
|
427
|
+
origin: options.origin ?? 'runner_post',
|
|
428
|
+
completionRelevant: signals.length > 0,
|
|
429
|
+
sender,
|
|
430
|
+
actor: options.actor,
|
|
431
|
+
role: options.role,
|
|
432
|
+
target: options.target,
|
|
433
|
+
signals,
|
|
434
|
+
};
|
|
435
|
+
record.evidence.channelPosts.push(channelPost);
|
|
436
|
+
record.evidence.coordinationSignals.push(...signals);
|
|
437
|
+
record.evidence.lastUpdatedAt = postedAt;
|
|
438
|
+
}
|
|
439
|
+
extractCompletionSignals(text, source, observedAt, meta) {
|
|
440
|
+
const signals = [];
|
|
441
|
+
const seen = new Set();
|
|
442
|
+
const add = (kind, signalText, value) => {
|
|
443
|
+
const trimmed = signalText.trim().slice(0, 280);
|
|
444
|
+
if (!trimmed)
|
|
445
|
+
return;
|
|
446
|
+
const key = `${kind}:${trimmed}:${value ?? ''}`;
|
|
447
|
+
if (seen.has(key))
|
|
448
|
+
return;
|
|
449
|
+
seen.add(key);
|
|
450
|
+
signals.push({
|
|
451
|
+
kind,
|
|
452
|
+
source,
|
|
453
|
+
text: trimmed,
|
|
454
|
+
observedAt,
|
|
455
|
+
sender: meta?.sender,
|
|
456
|
+
actor: meta?.actor,
|
|
457
|
+
role: meta?.role,
|
|
458
|
+
value,
|
|
459
|
+
});
|
|
460
|
+
};
|
|
461
|
+
for (const match of text.matchAll(/\bWORKER_DONE\b(?::\s*([^\n]+))?/gi)) {
|
|
462
|
+
add('worker_done', match[0], match[1]?.trim());
|
|
463
|
+
}
|
|
464
|
+
for (const match of text.matchAll(/\bLEAD_DONE\b(?::\s*([^\n]+))?/gi)) {
|
|
465
|
+
add('lead_done', match[0], match[1]?.trim());
|
|
466
|
+
}
|
|
467
|
+
for (const match of text.matchAll(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/g)) {
|
|
468
|
+
add('step_complete', match[0], match[1]);
|
|
469
|
+
}
|
|
470
|
+
for (const match of text.matchAll(/\bOWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi)) {
|
|
471
|
+
add('owner_decision', match[0], match[1].toUpperCase());
|
|
472
|
+
}
|
|
473
|
+
for (const match of text.matchAll(/\bREVIEW_DECISION:\s*(APPROVE|REJECT)\b/gi)) {
|
|
474
|
+
add('review_decision', match[0], match[1].toUpperCase());
|
|
475
|
+
}
|
|
476
|
+
if (/\bverification gate observed\b|\bverification passed\b/i.test(text)) {
|
|
477
|
+
add('verification_passed', this.firstMeaningfulLine(text) ?? text);
|
|
478
|
+
}
|
|
479
|
+
if (/\bverification failed\b/i.test(text)) {
|
|
480
|
+
add('verification_failed', this.firstMeaningfulLine(text) ?? text);
|
|
481
|
+
}
|
|
482
|
+
if (/\b(summary|handoff|ready for review|ready for handoff|task complete|work complete|completed work|finished work)\b/i.test(text)) {
|
|
483
|
+
add('task_summary', this.firstMeaningfulLine(text) ?? text);
|
|
484
|
+
}
|
|
485
|
+
return signals;
|
|
486
|
+
}
|
|
487
|
+
inferStepNameFromChannelText(text) {
|
|
488
|
+
const bracketMatch = text.match(/^\*\*\[([^\]]+)\]/);
|
|
489
|
+
if (bracketMatch?.[1])
|
|
490
|
+
return bracketMatch[1];
|
|
491
|
+
const markerMatch = text.match(/\bSTEP_COMPLETE:([A-Za-z0-9_.:-]+)/);
|
|
492
|
+
if (markerMatch?.[1])
|
|
493
|
+
return markerMatch[1];
|
|
494
|
+
return undefined;
|
|
495
|
+
}
|
|
496
|
+
uniqueEvidenceRoots(roots) {
|
|
497
|
+
return [...new Set(roots.filter((root) => Boolean(root)).map((root) => path.resolve(root)))];
|
|
498
|
+
}
|
|
499
|
+
captureFileSnapshot(root) {
|
|
500
|
+
const snapshot = new Map();
|
|
501
|
+
if (!existsSync(root))
|
|
502
|
+
return snapshot;
|
|
503
|
+
const visit = (currentPath) => {
|
|
504
|
+
let entries;
|
|
505
|
+
try {
|
|
506
|
+
entries = readdirSync(currentPath, { withFileTypes: true });
|
|
507
|
+
}
|
|
508
|
+
catch {
|
|
509
|
+
return;
|
|
510
|
+
}
|
|
511
|
+
for (const entry of entries) {
|
|
512
|
+
if (entry.isDirectory() && WorkflowRunner.EVIDENCE_IGNORED_DIRS.has(entry.name)) {
|
|
513
|
+
continue;
|
|
514
|
+
}
|
|
515
|
+
const fullPath = path.join(currentPath, entry.name);
|
|
516
|
+
if (entry.isDirectory()) {
|
|
517
|
+
visit(fullPath);
|
|
518
|
+
continue;
|
|
519
|
+
}
|
|
520
|
+
try {
|
|
521
|
+
const stats = statSync(fullPath);
|
|
522
|
+
if (!stats.isFile())
|
|
523
|
+
continue;
|
|
524
|
+
snapshot.set(fullPath, { mtimeMs: stats.mtimeMs, size: stats.size });
|
|
525
|
+
}
|
|
526
|
+
catch {
|
|
527
|
+
// Best-effort evidence collection only.
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
};
|
|
531
|
+
try {
|
|
532
|
+
const stats = statSync(root);
|
|
533
|
+
if (stats.isFile()) {
|
|
534
|
+
snapshot.set(root, { mtimeMs: stats.mtimeMs, size: stats.size });
|
|
535
|
+
return snapshot;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
catch {
|
|
539
|
+
return snapshot;
|
|
540
|
+
}
|
|
541
|
+
visit(root);
|
|
542
|
+
return snapshot;
|
|
543
|
+
}
|
|
544
|
+
diffFileSnapshots(before, after, root, observedAt) {
|
|
545
|
+
const allPaths = new Set([...before.keys(), ...after.keys()]);
|
|
546
|
+
const changes = [];
|
|
547
|
+
for (const filePath of allPaths) {
|
|
548
|
+
const prior = before.get(filePath);
|
|
549
|
+
const next = after.get(filePath);
|
|
550
|
+
let kind;
|
|
551
|
+
if (!prior && next) {
|
|
552
|
+
kind = 'created';
|
|
553
|
+
}
|
|
554
|
+
else if (prior && !next) {
|
|
555
|
+
kind = 'deleted';
|
|
556
|
+
}
|
|
557
|
+
else if (prior && next && (prior.mtimeMs !== next.mtimeMs || prior.size !== next.size)) {
|
|
558
|
+
kind = 'modified';
|
|
559
|
+
}
|
|
560
|
+
if (!kind)
|
|
561
|
+
continue;
|
|
562
|
+
changes.push({
|
|
563
|
+
path: this.normalizeEvidencePath(filePath),
|
|
564
|
+
kind,
|
|
565
|
+
observedAt,
|
|
566
|
+
root,
|
|
567
|
+
});
|
|
568
|
+
}
|
|
569
|
+
return changes.sort((a, b) => a.path.localeCompare(b.path));
|
|
570
|
+
}
|
|
571
|
+
normalizeEvidencePath(filePath) {
|
|
572
|
+
const relative = path.relative(this.cwd, filePath);
|
|
573
|
+
if (!relative || relative === '')
|
|
574
|
+
return path.basename(filePath);
|
|
575
|
+
return relative.startsWith('..') ? filePath : relative;
|
|
576
|
+
}
|
|
577
|
+
buildStepCompletionDecision(stepName, completionReason) {
|
|
578
|
+
let reason;
|
|
579
|
+
let mode;
|
|
580
|
+
switch (completionReason) {
|
|
581
|
+
case 'completed_verified':
|
|
582
|
+
mode = 'verification';
|
|
583
|
+
reason = 'Verification passed';
|
|
584
|
+
break;
|
|
585
|
+
case 'completed_by_evidence':
|
|
586
|
+
mode = 'evidence';
|
|
587
|
+
reason = 'Completion inferred from collected evidence';
|
|
588
|
+
break;
|
|
589
|
+
case 'completed_by_owner_decision': {
|
|
590
|
+
const evidence = this.getStepCompletionEvidence(stepName);
|
|
591
|
+
const markerObserved = evidence?.coordinationSignals.some((signal) => signal.kind === 'step_complete');
|
|
592
|
+
mode = markerObserved ? 'marker' : 'owner_decision';
|
|
593
|
+
reason = markerObserved ? 'Legacy STEP_COMPLETE marker observed' : 'Owner approved completion';
|
|
594
|
+
break;
|
|
595
|
+
}
|
|
596
|
+
default:
|
|
597
|
+
return undefined;
|
|
598
|
+
}
|
|
599
|
+
return {
|
|
600
|
+
mode,
|
|
601
|
+
reason,
|
|
602
|
+
evidence: this.buildTrajectoryCompletionEvidence(stepName),
|
|
603
|
+
};
|
|
604
|
+
}
|
|
605
|
+
buildTrajectoryCompletionEvidence(stepName) {
|
|
606
|
+
const evidence = this.getStepCompletionEvidence(stepName);
|
|
607
|
+
if (!evidence)
|
|
608
|
+
return undefined;
|
|
609
|
+
const signals = evidence.coordinationSignals
|
|
610
|
+
.slice(-6)
|
|
611
|
+
.map((signal) => signal.value ?? signal.text);
|
|
612
|
+
const channelPosts = evidence.channelPosts
|
|
613
|
+
.filter((post) => post.completionRelevant)
|
|
614
|
+
.slice(-3)
|
|
615
|
+
.map((post) => post.text.slice(0, 160));
|
|
616
|
+
const files = evidence.files.slice(0, 6).map((file) => `${file.kind}:${file.path}`);
|
|
617
|
+
const summaryParts = [];
|
|
618
|
+
if (signals.length > 0)
|
|
619
|
+
summaryParts.push(`${signals.length} signal(s)`);
|
|
620
|
+
if (channelPosts.length > 0)
|
|
621
|
+
summaryParts.push(`${channelPosts.length} relevant channel post(s)`);
|
|
622
|
+
if (files.length > 0)
|
|
623
|
+
summaryParts.push(`${files.length} file change(s)`);
|
|
624
|
+
if (evidence.process.exitCode !== undefined) {
|
|
625
|
+
summaryParts.push(`exit=${evidence.process.exitCode}`);
|
|
626
|
+
}
|
|
627
|
+
return {
|
|
628
|
+
summary: summaryParts.length > 0 ? summaryParts.join(', ') : undefined,
|
|
629
|
+
signals: signals.length > 0 ? signals : undefined,
|
|
630
|
+
channelPosts: channelPosts.length > 0 ? channelPosts : undefined,
|
|
631
|
+
files: files.length > 0 ? files : undefined,
|
|
632
|
+
exitCode: evidence.process.exitCode,
|
|
633
|
+
};
|
|
634
|
+
}
|
|
185
635
|
// ── Progress logging ────────────────────────────────────────────────────
|
|
186
636
|
/** Log a progress message with elapsed time since run start. */
|
|
187
637
|
log(msg) {
|
|
@@ -985,9 +1435,11 @@ export class WorkflowRunner {
|
|
|
985
1435
|
if (state.row.status === 'failed') {
|
|
986
1436
|
state.row.status = 'pending';
|
|
987
1437
|
state.row.error = undefined;
|
|
1438
|
+
state.row.completionReason = undefined;
|
|
988
1439
|
await this.db.updateStep(state.row.id, {
|
|
989
1440
|
status: 'pending',
|
|
990
1441
|
error: undefined,
|
|
1442
|
+
completionReason: undefined,
|
|
991
1443
|
updatedAt: new Date().toISOString(),
|
|
992
1444
|
});
|
|
993
1445
|
}
|
|
@@ -1007,6 +1459,8 @@ export class WorkflowRunner {
|
|
|
1007
1459
|
this.currentConfig = config;
|
|
1008
1460
|
this.currentRunId = runId;
|
|
1009
1461
|
this.runStartTime = Date.now();
|
|
1462
|
+
this.runtimeStepAgents.clear();
|
|
1463
|
+
this.stepCompletionEvidence.clear();
|
|
1010
1464
|
this.log(`Starting workflow "${workflow.name}" (${workflow.steps.length} steps)`);
|
|
1011
1465
|
// Initialize trajectory recording
|
|
1012
1466
|
this.trajectory = new WorkflowTrajectory(config.trajectories, runId, this.cwd);
|
|
@@ -1132,8 +1586,24 @@ export class WorkflowRunner {
|
|
|
1132
1586
|
const fromShort = msg.from.replace(/-[a-f0-9]{6,}$/, '');
|
|
1133
1587
|
const toShort = msg.to.replace(/-[a-f0-9]{6,}$/, '');
|
|
1134
1588
|
this.log(`[msg] ${fromShort} → ${toShort}: ${body}`);
|
|
1589
|
+
if (this.channel && (msg.to === this.channel || msg.to === `#${this.channel}`)) {
|
|
1590
|
+
const runtimeAgent = this.runtimeStepAgents.get(msg.from);
|
|
1591
|
+
this.recordChannelEvidence(msg.text, {
|
|
1592
|
+
sender: runtimeAgent?.logicalName ?? msg.from,
|
|
1593
|
+
actor: msg.from,
|
|
1594
|
+
role: runtimeAgent?.role,
|
|
1595
|
+
target: msg.to,
|
|
1596
|
+
origin: 'relay_message',
|
|
1597
|
+
stepName: runtimeAgent?.stepName,
|
|
1598
|
+
});
|
|
1599
|
+
}
|
|
1135
1600
|
const supervision = this.supervisedRuntimeAgents.get(msg.from);
|
|
1136
1601
|
if (supervision?.role === 'owner') {
|
|
1602
|
+
this.recordStepToolSideEffect(supervision.stepName, {
|
|
1603
|
+
type: 'owner_monitoring',
|
|
1604
|
+
detail: `Owner messaged ${msg.to}: ${msg.text.slice(0, 120)}`,
|
|
1605
|
+
raw: { to: msg.to, text: msg.text },
|
|
1606
|
+
});
|
|
1137
1607
|
void this.trajectory?.ownerMonitoringEvent(supervision.stepName, supervision.logicalName, `Messaged ${msg.to}: ${msg.text.slice(0, 120)}`, { to: msg.to, text: msg.text });
|
|
1138
1608
|
}
|
|
1139
1609
|
};
|
|
@@ -1288,6 +1758,7 @@ export class WorkflowRunner {
|
|
|
1288
1758
|
updatedAt: new Date().toISOString(),
|
|
1289
1759
|
});
|
|
1290
1760
|
this.emit({ type: 'step:failed', runId, stepName, error: 'Cancelled' });
|
|
1761
|
+
this.finalizeStepEvidence(stepName, 'failed');
|
|
1291
1762
|
}
|
|
1292
1763
|
}
|
|
1293
1764
|
this.emit({ type: 'run:cancelled', runId });
|
|
@@ -1328,6 +1799,7 @@ export class WorkflowRunner {
|
|
|
1328
1799
|
this.lastIdleLog.clear();
|
|
1329
1800
|
this.lastActivity.clear();
|
|
1330
1801
|
this.supervisedRuntimeAgents.clear();
|
|
1802
|
+
this.runtimeStepAgents.clear();
|
|
1331
1803
|
this.log('Shutting down broker...');
|
|
1332
1804
|
await this.relay?.shutdown();
|
|
1333
1805
|
this.relay = undefined;
|
|
@@ -1435,6 +1907,9 @@ export class WorkflowRunner {
|
|
|
1435
1907
|
attempts: (state?.row.retryCount ?? 0) + 1,
|
|
1436
1908
|
output: state?.row.output,
|
|
1437
1909
|
verificationPassed: state?.row.status === 'completed' && step.verification !== undefined,
|
|
1910
|
+
completionMode: state?.row.completionReason
|
|
1911
|
+
? this.buildStepCompletionDecision(step.name, state.row.completionReason)?.mode
|
|
1912
|
+
: undefined,
|
|
1438
1913
|
});
|
|
1439
1914
|
}
|
|
1440
1915
|
}
|
|
@@ -1595,11 +2070,21 @@ export class WorkflowRunner {
|
|
|
1595
2070
|
const maxRetries = step.retries ?? errorHandling?.maxRetries ?? 0;
|
|
1596
2071
|
const retryDelay = errorHandling?.retryDelayMs ?? 1000;
|
|
1597
2072
|
let lastError;
|
|
2073
|
+
let lastCompletionReason;
|
|
2074
|
+
let lastExitCode;
|
|
2075
|
+
let lastExitSignal;
|
|
1598
2076
|
for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
|
|
1599
2077
|
this.checkAborted();
|
|
2078
|
+
lastExitCode = undefined;
|
|
2079
|
+
lastExitSignal = undefined;
|
|
1600
2080
|
if (attempt > 0) {
|
|
1601
2081
|
this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
|
|
1602
2082
|
this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
|
|
2083
|
+
this.recordStepToolSideEffect(step.name, {
|
|
2084
|
+
type: 'retry',
|
|
2085
|
+
detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
|
|
2086
|
+
raw: { attempt, maxRetries },
|
|
2087
|
+
});
|
|
1603
2088
|
state.row.retryCount = attempt;
|
|
1604
2089
|
await this.db.updateStep(state.row.id, {
|
|
1605
2090
|
retryCount: attempt,
|
|
@@ -1609,9 +2094,13 @@ export class WorkflowRunner {
|
|
|
1609
2094
|
}
|
|
1610
2095
|
// Mark step as running
|
|
1611
2096
|
state.row.status = 'running';
|
|
2097
|
+
state.row.error = undefined;
|
|
2098
|
+
state.row.completionReason = undefined;
|
|
1612
2099
|
state.row.startedAt = new Date().toISOString();
|
|
1613
2100
|
await this.db.updateStep(state.row.id, {
|
|
1614
2101
|
status: 'running',
|
|
2102
|
+
error: undefined,
|
|
2103
|
+
completionReason: undefined,
|
|
1615
2104
|
startedAt: state.row.startedAt,
|
|
1616
2105
|
updatedAt: new Date().toISOString(),
|
|
1617
2106
|
});
|
|
@@ -1629,32 +2118,40 @@ export class WorkflowRunner {
|
|
|
1629
2118
|
});
|
|
1630
2119
|
// Resolve step workdir (named path reference) for deterministic steps
|
|
1631
2120
|
const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
|
|
2121
|
+
this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
|
|
1632
2122
|
try {
|
|
1633
2123
|
// Delegate to executor if present
|
|
1634
2124
|
if (this.executor?.executeDeterministicStep) {
|
|
1635
2125
|
const result = await this.executor.executeDeterministicStep(step, resolvedCommand, stepCwd);
|
|
2126
|
+
lastExitCode = result.exitCode;
|
|
1636
2127
|
const failOnError = step.failOnError !== false;
|
|
1637
2128
|
if (failOnError && result.exitCode !== 0) {
|
|
1638
2129
|
throw new Error(`Command failed with exit code ${result.exitCode}: ${result.output.slice(0, 500)}`);
|
|
1639
2130
|
}
|
|
1640
2131
|
const output = step.captureOutput !== false ? result.output : `Command completed (exit code ${result.exitCode})`;
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
2132
|
+
this.captureStepTerminalEvidence(step.name, { stdout: result.output, combined: result.output }, { exitCode: result.exitCode });
|
|
2133
|
+
const verificationResult = step.verification
|
|
2134
|
+
? this.runVerification(step.verification, output, step.name)
|
|
2135
|
+
: undefined;
|
|
1644
2136
|
// Mark completed
|
|
1645
2137
|
state.row.status = 'completed';
|
|
1646
2138
|
state.row.output = output;
|
|
2139
|
+
state.row.completionReason = verificationResult?.completionReason;
|
|
1647
2140
|
state.row.completedAt = new Date().toISOString();
|
|
1648
2141
|
await this.db.updateStep(state.row.id, {
|
|
1649
2142
|
status: 'completed',
|
|
1650
2143
|
output,
|
|
2144
|
+
completionReason: verificationResult?.completionReason,
|
|
1651
2145
|
completedAt: state.row.completedAt,
|
|
1652
2146
|
updatedAt: new Date().toISOString(),
|
|
1653
2147
|
});
|
|
1654
2148
|
await this.persistStepOutput(runId, step.name, output);
|
|
1655
2149
|
this.emit({ type: 'step:completed', runId, stepName: step.name, output });
|
|
2150
|
+
this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, verificationResult?.completionReason);
|
|
1656
2151
|
return;
|
|
1657
2152
|
}
|
|
2153
|
+
let commandStdout = '';
|
|
2154
|
+
let commandStderr = '';
|
|
1658
2155
|
const output = await new Promise((resolve, reject) => {
|
|
1659
2156
|
const child = cpSpawn('sh', ['-c', resolvedCommand], {
|
|
1660
2157
|
stdio: 'pipe',
|
|
@@ -1689,7 +2186,7 @@ export class WorkflowRunner {
|
|
|
1689
2186
|
child.stderr?.on('data', (chunk) => {
|
|
1690
2187
|
stderrChunks.push(chunk.toString());
|
|
1691
2188
|
});
|
|
1692
|
-
child.on('close', (code) => {
|
|
2189
|
+
child.on('close', (code, signal) => {
|
|
1693
2190
|
if (timer)
|
|
1694
2191
|
clearTimeout(timer);
|
|
1695
2192
|
if (abortHandler && abortSignal) {
|
|
@@ -1705,6 +2202,10 @@ export class WorkflowRunner {
|
|
|
1705
2202
|
}
|
|
1706
2203
|
const stdout = stdoutChunks.join('');
|
|
1707
2204
|
const stderr = stderrChunks.join('');
|
|
2205
|
+
commandStdout = stdout;
|
|
2206
|
+
commandStderr = stderr;
|
|
2207
|
+
lastExitCode = code ?? undefined;
|
|
2208
|
+
lastExitSignal = signal ?? undefined;
|
|
1708
2209
|
// Check exit code unless failOnError is explicitly false
|
|
1709
2210
|
const failOnError = step.failOnError !== false;
|
|
1710
2211
|
if (failOnError && code !== 0 && code !== null) {
|
|
@@ -1722,31 +2223,41 @@ export class WorkflowRunner {
|
|
|
1722
2223
|
reject(new Error(`Failed to execute command: ${err.message}`));
|
|
1723
2224
|
});
|
|
1724
2225
|
});
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
2226
|
+
this.captureStepTerminalEvidence(step.name, {
|
|
2227
|
+
stdout: commandStdout || output,
|
|
2228
|
+
stderr: commandStderr,
|
|
2229
|
+
combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
|
|
2230
|
+
}, { exitCode: lastExitCode, exitSignal: lastExitSignal });
|
|
2231
|
+
const verificationResult = step.verification
|
|
2232
|
+
? this.runVerification(step.verification, output, step.name)
|
|
2233
|
+
: undefined;
|
|
1728
2234
|
// Mark completed
|
|
1729
2235
|
state.row.status = 'completed';
|
|
1730
2236
|
state.row.output = output;
|
|
2237
|
+
state.row.completionReason = verificationResult?.completionReason;
|
|
1731
2238
|
state.row.completedAt = new Date().toISOString();
|
|
1732
2239
|
await this.db.updateStep(state.row.id, {
|
|
1733
2240
|
status: 'completed',
|
|
1734
2241
|
output,
|
|
2242
|
+
completionReason: verificationResult?.completionReason,
|
|
1735
2243
|
completedAt: state.row.completedAt,
|
|
1736
2244
|
updatedAt: new Date().toISOString(),
|
|
1737
2245
|
});
|
|
1738
2246
|
// Persist step output
|
|
1739
2247
|
await this.persistStepOutput(runId, step.name, output);
|
|
1740
2248
|
this.emit({ type: 'step:completed', runId, stepName: step.name, output });
|
|
2249
|
+
this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, verificationResult?.completionReason);
|
|
1741
2250
|
return;
|
|
1742
2251
|
}
|
|
1743
2252
|
catch (err) {
|
|
1744
2253
|
lastError = err instanceof Error ? err.message : String(err);
|
|
2254
|
+
lastCompletionReason =
|
|
2255
|
+
err instanceof WorkflowCompletionError ? err.completionReason : undefined;
|
|
1745
2256
|
}
|
|
1746
2257
|
}
|
|
1747
2258
|
const errorMsg = lastError ?? 'Unknown error';
|
|
1748
2259
|
this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
|
|
1749
|
-
await this.markStepFailed(state, errorMsg, runId);
|
|
2260
|
+
await this.markStepFailed(state, errorMsg, runId, { exitCode: lastExitCode, exitSignal: lastExitSignal }, lastCompletionReason);
|
|
1750
2261
|
throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
|
|
1751
2262
|
}
|
|
1752
2263
|
/**
|
|
@@ -1758,12 +2269,18 @@ export class WorkflowRunner {
|
|
|
1758
2269
|
const state = stepStates.get(step.name);
|
|
1759
2270
|
if (!state)
|
|
1760
2271
|
throw new Error(`Step state not found: ${step.name}`);
|
|
2272
|
+
let lastExitCode;
|
|
2273
|
+
let lastExitSignal;
|
|
1761
2274
|
this.checkAborted();
|
|
1762
2275
|
// Mark step as running
|
|
1763
2276
|
state.row.status = 'running';
|
|
2277
|
+
state.row.error = undefined;
|
|
2278
|
+
state.row.completionReason = undefined;
|
|
1764
2279
|
state.row.startedAt = new Date().toISOString();
|
|
1765
2280
|
await this.db.updateStep(state.row.id, {
|
|
1766
2281
|
status: 'running',
|
|
2282
|
+
error: undefined,
|
|
2283
|
+
completionReason: undefined,
|
|
1767
2284
|
startedAt: state.row.startedAt,
|
|
1768
2285
|
updatedAt: new Date().toISOString(),
|
|
1769
2286
|
});
|
|
@@ -1781,6 +2298,7 @@ export class WorkflowRunner {
|
|
|
1781
2298
|
const createBranch = step.createBranch !== false;
|
|
1782
2299
|
// Resolve workdir for worktree steps (same as deterministic/agent steps)
|
|
1783
2300
|
const stepCwd = this.resolveStepWorkdir(step) ?? this.cwd;
|
|
2301
|
+
this.beginStepEvidence(step.name, [stepCwd], state.row.startedAt);
|
|
1784
2302
|
if (!branch) {
|
|
1785
2303
|
const errorMsg = 'Worktree step missing required "branch" field';
|
|
1786
2304
|
await this.markStepFailed(state, errorMsg, runId);
|
|
@@ -1821,6 +2339,10 @@ export class WorkflowRunner {
|
|
|
1821
2339
|
await this.markStepFailed(state, errorMsg, runId);
|
|
1822
2340
|
throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
|
|
1823
2341
|
}
|
|
2342
|
+
let commandStdout = '';
|
|
2343
|
+
let commandStderr = '';
|
|
2344
|
+
let commandExitCode;
|
|
2345
|
+
let commandExitSignal;
|
|
1824
2346
|
const output = await new Promise((resolve, reject) => {
|
|
1825
2347
|
const child = cpSpawn('sh', ['-c', worktreeCmd], {
|
|
1826
2348
|
stdio: 'pipe',
|
|
@@ -1855,7 +2377,7 @@ export class WorkflowRunner {
|
|
|
1855
2377
|
child.stderr?.on('data', (chunk) => {
|
|
1856
2378
|
stderrChunks.push(chunk.toString());
|
|
1857
2379
|
});
|
|
1858
|
-
child.on('close', (code) => {
|
|
2380
|
+
child.on('close', (code, signal) => {
|
|
1859
2381
|
if (timer)
|
|
1860
2382
|
clearTimeout(timer);
|
|
1861
2383
|
if (abortHandler && abortSignal) {
|
|
@@ -1869,7 +2391,13 @@ export class WorkflowRunner {
|
|
|
1869
2391
|
reject(new Error(`Step "${step.name}" timed out (no step timeout set, check global swarm.timeoutMs)`));
|
|
1870
2392
|
return;
|
|
1871
2393
|
}
|
|
2394
|
+
commandStdout = stdoutChunks.join('');
|
|
1872
2395
|
const stderr = stderrChunks.join('');
|
|
2396
|
+
commandStderr = stderr;
|
|
2397
|
+
commandExitCode = code ?? undefined;
|
|
2398
|
+
commandExitSignal = signal ?? undefined;
|
|
2399
|
+
lastExitCode = commandExitCode;
|
|
2400
|
+
lastExitSignal = commandExitSignal;
|
|
1873
2401
|
if (code !== 0 && code !== null) {
|
|
1874
2402
|
reject(new Error(`git worktree add failed with exit code ${code}${stderr ? `: ${stderr.slice(0, 500)}` : ''}`));
|
|
1875
2403
|
return;
|
|
@@ -1886,6 +2414,11 @@ export class WorkflowRunner {
|
|
|
1886
2414
|
reject(new Error(`Failed to execute git worktree command: ${err.message}`));
|
|
1887
2415
|
});
|
|
1888
2416
|
});
|
|
2417
|
+
this.captureStepTerminalEvidence(step.name, {
|
|
2418
|
+
stdout: commandStdout || output,
|
|
2419
|
+
stderr: commandStderr,
|
|
2420
|
+
combined: [commandStdout || output, commandStderr].filter(Boolean).join('\n'),
|
|
2421
|
+
}, { exitCode: commandExitCode, exitSignal: commandExitSignal });
|
|
1889
2422
|
// Mark completed
|
|
1890
2423
|
state.row.status = 'completed';
|
|
1891
2424
|
state.row.output = output;
|
|
@@ -1900,11 +2433,20 @@ export class WorkflowRunner {
|
|
|
1900
2433
|
await this.persistStepOutput(runId, step.name, output);
|
|
1901
2434
|
this.emit({ type: 'step:completed', runId, stepName: step.name, output });
|
|
1902
2435
|
this.postToChannel(`**[${step.name}]** Worktree created at: ${output}\n Branch: ${branch}${!branchExists && createBranch ? ' (created)' : ''}`);
|
|
2436
|
+
this.recordStepToolSideEffect(step.name, {
|
|
2437
|
+
type: 'worktree_created',
|
|
2438
|
+
detail: `Worktree created at ${output}`,
|
|
2439
|
+
raw: { branch, createdBranch: !branchExists && createBranch },
|
|
2440
|
+
});
|
|
2441
|
+
this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt);
|
|
1903
2442
|
}
|
|
1904
2443
|
catch (err) {
|
|
1905
2444
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
1906
2445
|
this.postToChannel(`**[${step.name}]** Failed: ${errorMsg}`);
|
|
1907
|
-
await this.markStepFailed(state, errorMsg, runId
|
|
2446
|
+
await this.markStepFailed(state, errorMsg, runId, {
|
|
2447
|
+
exitCode: lastExitCode,
|
|
2448
|
+
exitSignal: lastExitSignal,
|
|
2449
|
+
});
|
|
1908
2450
|
throw new Error(`Step "${step.name}" failed: ${errorMsg}`);
|
|
1909
2451
|
}
|
|
1910
2452
|
}
|
|
@@ -1925,8 +2467,9 @@ export class WorkflowRunner {
|
|
|
1925
2467
|
}
|
|
1926
2468
|
const specialistDef = WorkflowRunner.resolveAgentDef(rawAgentDef);
|
|
1927
2469
|
const usesOwnerFlow = specialistDef.interactive !== false;
|
|
1928
|
-
const
|
|
1929
|
-
const
|
|
2470
|
+
const usesAutoHardening = usesOwnerFlow && !this.isExplicitInteractiveWorker(specialistDef);
|
|
2471
|
+
const ownerDef = usesAutoHardening ? this.resolveAutoStepOwner(specialistDef, agentMap) : specialistDef;
|
|
2472
|
+
const reviewDef = usesAutoHardening ? this.resolveAutoReviewAgent(ownerDef, agentMap) : undefined;
|
|
1930
2473
|
const supervised = {
|
|
1931
2474
|
specialist: specialistDef,
|
|
1932
2475
|
owner: ownerDef,
|
|
@@ -1946,6 +2489,12 @@ export class WorkflowRunner {
|
|
|
1946
2489
|
let lastError;
|
|
1947
2490
|
let lastExitCode;
|
|
1948
2491
|
let lastExitSignal;
|
|
2492
|
+
let lastCompletionReason;
|
|
2493
|
+
// OWNER_DECISION: INCOMPLETE_RETRY is enforced here at the attempt-loop level so every
|
|
2494
|
+
// interactive execution path shares the same contract:
|
|
2495
|
+
// - retries remaining => throw back into the loop and retry
|
|
2496
|
+
// - maxRetries = 0 => fail immediately after the first retry request
|
|
2497
|
+
// - retry budget exhausted => fail with retry_requested_by_owner, never "completed"
|
|
1949
2498
|
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
1950
2499
|
this.checkAborted();
|
|
1951
2500
|
// Reset per-attempt exit info so stale values don't leak across retries
|
|
@@ -1954,6 +2503,11 @@ export class WorkflowRunner {
|
|
|
1954
2503
|
if (attempt > 0) {
|
|
1955
2504
|
this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
|
|
1956
2505
|
this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
|
|
2506
|
+
this.recordStepToolSideEffect(step.name, {
|
|
2507
|
+
type: 'retry',
|
|
2508
|
+
detail: `Retrying attempt ${attempt + 1}/${maxRetries + 1}`,
|
|
2509
|
+
raw: { attempt, maxRetries },
|
|
2510
|
+
});
|
|
1957
2511
|
state.row.retryCount = attempt;
|
|
1958
2512
|
await this.db.updateStep(state.row.id, {
|
|
1959
2513
|
retryCount: attempt,
|
|
@@ -1965,14 +2519,19 @@ export class WorkflowRunner {
|
|
|
1965
2519
|
try {
|
|
1966
2520
|
// Mark step as running
|
|
1967
2521
|
state.row.status = 'running';
|
|
2522
|
+
state.row.error = undefined;
|
|
2523
|
+
state.row.completionReason = undefined;
|
|
1968
2524
|
state.row.startedAt = new Date().toISOString();
|
|
1969
2525
|
await this.db.updateStep(state.row.id, {
|
|
1970
2526
|
status: 'running',
|
|
2527
|
+
error: undefined,
|
|
2528
|
+
completionReason: undefined,
|
|
1971
2529
|
startedAt: state.row.startedAt,
|
|
1972
2530
|
updatedAt: new Date().toISOString(),
|
|
1973
2531
|
});
|
|
1974
2532
|
this.emit({ type: 'step:started', runId, stepName: step.name });
|
|
1975
|
-
this.
|
|
2533
|
+
this.log(`[${step.name}] Started (owner: ${ownerDef.name}, specialist: ${specialistDef.name})`);
|
|
2534
|
+
this.initializeStepSignalParticipants(step.name, ownerDef.name, specialistDef.name);
|
|
1976
2535
|
await this.trajectory?.stepStarted(step, ownerDef.name, {
|
|
1977
2536
|
role: usesDedicatedOwner ? 'owner' : 'specialist',
|
|
1978
2537
|
owner: ownerDef.name,
|
|
@@ -2021,37 +2580,104 @@ export class WorkflowRunner {
|
|
|
2021
2580
|
};
|
|
2022
2581
|
const effectiveSpecialist = applyStepWorkdir(specialistDef);
|
|
2023
2582
|
const effectiveOwner = applyStepWorkdir(ownerDef);
|
|
2583
|
+
const effectiveReviewer = reviewDef ? applyStepWorkdir(reviewDef) : undefined;
|
|
2584
|
+
this.beginStepEvidence(step.name, [
|
|
2585
|
+
this.resolveAgentCwd(effectiveSpecialist),
|
|
2586
|
+
this.resolveAgentCwd(effectiveOwner),
|
|
2587
|
+
effectiveReviewer ? this.resolveAgentCwd(effectiveReviewer) : undefined,
|
|
2588
|
+
], state.row.startedAt);
|
|
2024
2589
|
let specialistOutput;
|
|
2025
2590
|
let ownerOutput;
|
|
2026
2591
|
let ownerElapsed;
|
|
2592
|
+
let completionReason;
|
|
2027
2593
|
if (usesDedicatedOwner) {
|
|
2028
2594
|
const result = await this.executeSupervisedAgentStep(step, { specialist: effectiveSpecialist, owner: effectiveOwner, reviewer: reviewDef }, resolvedTask, timeoutMs);
|
|
2029
2595
|
specialistOutput = result.specialistOutput;
|
|
2030
2596
|
ownerOutput = result.ownerOutput;
|
|
2031
2597
|
ownerElapsed = result.ownerElapsed;
|
|
2598
|
+
completionReason = result.completionReason;
|
|
2032
2599
|
}
|
|
2033
2600
|
else {
|
|
2034
2601
|
const ownerTask = this.injectStepOwnerContract(step, resolvedTask, effectiveOwner, effectiveSpecialist);
|
|
2602
|
+
const explicitInteractiveWorker = this.isExplicitInteractiveWorker(effectiveOwner);
|
|
2603
|
+
let explicitWorkerHandle;
|
|
2604
|
+
let explicitWorkerCompleted = false;
|
|
2605
|
+
let explicitWorkerOutput = '';
|
|
2035
2606
|
this.log(`[${step.name}] Spawning owner "${effectiveOwner.name}" (cli: ${effectiveOwner.cli})${step.workdir ? ` [workdir: ${step.workdir}]` : ''}`);
|
|
2036
2607
|
const resolvedStep = { ...step, task: ownerTask };
|
|
2037
2608
|
const ownerStartTime = Date.now();
|
|
2038
2609
|
const spawnResult = this.executor
|
|
2039
2610
|
? await this.executor.executeAgentStep(resolvedStep, effectiveOwner, ownerTask, timeoutMs)
|
|
2040
|
-
: await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs
|
|
2611
|
+
: await this.spawnAndWait(effectiveOwner, resolvedStep, timeoutMs, {
|
|
2612
|
+
evidenceStepName: step.name,
|
|
2613
|
+
evidenceRole: usesOwnerFlow ? 'owner' : 'specialist',
|
|
2614
|
+
logicalName: effectiveOwner.name,
|
|
2615
|
+
onSpawned: explicitInteractiveWorker
|
|
2616
|
+
? ({ agent }) => {
|
|
2617
|
+
explicitWorkerHandle = agent;
|
|
2618
|
+
}
|
|
2619
|
+
: undefined,
|
|
2620
|
+
onChunk: explicitInteractiveWorker
|
|
2621
|
+
? ({ chunk }) => {
|
|
2622
|
+
explicitWorkerOutput += WorkflowRunner.stripAnsi(chunk);
|
|
2623
|
+
if (!explicitWorkerCompleted &&
|
|
2624
|
+
this.hasExplicitInteractiveWorkerCompletionEvidence(step, explicitWorkerOutput, ownerTask, resolvedTask)) {
|
|
2625
|
+
explicitWorkerCompleted = true;
|
|
2626
|
+
void explicitWorkerHandle?.release().catch(() => undefined);
|
|
2627
|
+
}
|
|
2628
|
+
}
|
|
2629
|
+
: undefined,
|
|
2630
|
+
});
|
|
2041
2631
|
const output = typeof spawnResult === 'string' ? spawnResult : spawnResult.output;
|
|
2042
2632
|
lastExitCode = typeof spawnResult === 'string' ? undefined : spawnResult.exitCode;
|
|
2043
2633
|
lastExitSignal = typeof spawnResult === 'string' ? undefined : spawnResult.exitSignal;
|
|
2044
2634
|
ownerElapsed = Date.now() - ownerStartTime;
|
|
2045
2635
|
this.log(`[${step.name}] Owner "${effectiveOwner.name}" exited`);
|
|
2046
2636
|
if (usesOwnerFlow) {
|
|
2047
|
-
|
|
2637
|
+
try {
|
|
2638
|
+
const completionDecision = this.resolveOwnerCompletionDecision(step, output, output, ownerTask, resolvedTask);
|
|
2639
|
+
completionReason = completionDecision.completionReason;
|
|
2640
|
+
}
|
|
2641
|
+
catch (error) {
|
|
2642
|
+
const canUseVerificationFallback = !usesDedicatedOwner &&
|
|
2643
|
+
step.verification &&
|
|
2644
|
+
error instanceof WorkflowCompletionError &&
|
|
2645
|
+
error.completionReason === 'failed_no_evidence';
|
|
2646
|
+
if (!canUseVerificationFallback) {
|
|
2647
|
+
throw error;
|
|
2648
|
+
}
|
|
2649
|
+
}
|
|
2048
2650
|
}
|
|
2049
2651
|
specialistOutput = output;
|
|
2050
2652
|
ownerOutput = output;
|
|
2051
2653
|
}
|
|
2052
|
-
//
|
|
2053
|
-
|
|
2054
|
-
|
|
2654
|
+
// Even non-interactive steps can emit an explicit OWNER_DECISION contract.
|
|
2655
|
+
// Honor retry/fail/clarification signals before verification-driven success so
|
|
2656
|
+
// real runs stay consistent with interactive owner flows.
|
|
2657
|
+
if (!usesOwnerFlow) {
|
|
2658
|
+
const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
|
|
2659
|
+
if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
|
|
2660
|
+
throw new WorkflowCompletionError(`Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
|
|
2661
|
+
}
|
|
2662
|
+
if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
|
|
2663
|
+
throw new WorkflowCompletionError(`Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'failed_owner_decision');
|
|
2664
|
+
}
|
|
2665
|
+
if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
|
|
2666
|
+
throw new WorkflowCompletionError(`Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
|
|
2667
|
+
}
|
|
2668
|
+
}
|
|
2669
|
+
// Run verification if configured.
|
|
2670
|
+
// Self-owned interactive steps still need verification fallback so
|
|
2671
|
+
// explicit OWNER_DECISION output is not mandatory for the happy path.
|
|
2672
|
+
if (step.verification && (!usesOwnerFlow || !usesDedicatedOwner) && !completionReason) {
|
|
2673
|
+
const verificationResult = this.runVerification(step.verification, specialistOutput, step.name, effectiveOwner.interactive === false ? undefined : resolvedTask);
|
|
2674
|
+
completionReason = verificationResult.completionReason;
|
|
2675
|
+
}
|
|
2676
|
+
// Retry-style owner decisions are control-flow signals, not terminal success states.
|
|
2677
|
+
// Guard here so they cannot accidentally fall through into review or completed-step
|
|
2678
|
+
// persistence if a future branch returns a completionReason instead of throwing.
|
|
2679
|
+
if (completionReason === 'retry_requested_by_owner') {
|
|
2680
|
+
throw new WorkflowCompletionError(`Step "${step.name}" owner requested another attempt`, 'retry_requested_by_owner');
|
|
2055
2681
|
}
|
|
2056
2682
|
// Every interactive step gets a review pass; pick a dedicated reviewer when available.
|
|
2057
2683
|
let combinedOutput = specialistOutput;
|
|
@@ -2063,21 +2689,29 @@ export class WorkflowRunner {
|
|
|
2063
2689
|
// Mark completed
|
|
2064
2690
|
state.row.status = 'completed';
|
|
2065
2691
|
state.row.output = combinedOutput;
|
|
2692
|
+
state.row.completionReason = completionReason;
|
|
2066
2693
|
state.row.completedAt = new Date().toISOString();
|
|
2067
2694
|
await this.db.updateStep(state.row.id, {
|
|
2068
2695
|
status: 'completed',
|
|
2069
2696
|
output: combinedOutput,
|
|
2697
|
+
completionReason,
|
|
2070
2698
|
completedAt: state.row.completedAt,
|
|
2071
2699
|
updatedAt: new Date().toISOString(),
|
|
2072
2700
|
});
|
|
2073
2701
|
// Persist step output to disk so it survives restarts and is inspectable
|
|
2074
2702
|
await this.persistStepOutput(runId, step.name, combinedOutput);
|
|
2075
2703
|
this.emit({ type: 'step:completed', runId, stepName: step.name, output: combinedOutput, exitCode: lastExitCode, exitSignal: lastExitSignal });
|
|
2704
|
+
this.finalizeStepEvidence(step.name, 'completed', state.row.completedAt, completionReason);
|
|
2076
2705
|
await this.trajectory?.stepCompleted(step, combinedOutput, attempt + 1);
|
|
2077
2706
|
return;
|
|
2078
2707
|
}
|
|
2079
2708
|
catch (err) {
|
|
2080
2709
|
lastError = err instanceof Error ? err.message : String(err);
|
|
2710
|
+
lastCompletionReason =
|
|
2711
|
+
err instanceof WorkflowCompletionError ? err.completionReason : undefined;
|
|
2712
|
+
if (lastCompletionReason === 'retry_requested_by_owner' && attempt >= maxRetries) {
|
|
2713
|
+
lastError = this.buildOwnerRetryBudgetExceededMessage(step.name, maxRetries, lastError);
|
|
2714
|
+
}
|
|
2081
2715
|
if (err instanceof SpawnExitError) {
|
|
2082
2716
|
lastExitCode = err.exitCode;
|
|
2083
2717
|
lastExitSignal = err.exitSignal;
|
|
@@ -2104,9 +2738,27 @@ export class WorkflowRunner {
|
|
|
2104
2738
|
await this.markStepFailed(state, lastError ?? 'Unknown error', runId, {
|
|
2105
2739
|
exitCode: lastExitCode,
|
|
2106
2740
|
exitSignal: lastExitSignal,
|
|
2107
|
-
});
|
|
2741
|
+
}, lastCompletionReason);
|
|
2108
2742
|
throw new Error(`Step "${step.name}" failed after ${maxRetries} retries: ${lastError ?? 'Unknown error'}`);
|
|
2109
2743
|
}
|
|
2744
|
+
buildOwnerRetryBudgetExceededMessage(stepName, maxRetries, ownerDecisionError) {
|
|
2745
|
+
const attempts = maxRetries + 1;
|
|
2746
|
+
const prefix = `Step "${stepName}" `;
|
|
2747
|
+
const normalizedDecision = ownerDecisionError?.startsWith(prefix)
|
|
2748
|
+
? ownerDecisionError.slice(prefix.length).trim()
|
|
2749
|
+
: ownerDecisionError?.trim();
|
|
2750
|
+
const decisionSuffix = normalizedDecision
|
|
2751
|
+
? ` Latest owner decision: ${normalizedDecision}`
|
|
2752
|
+
: '';
|
|
2753
|
+
if (maxRetries === 0) {
|
|
2754
|
+
return (`Step "${stepName}" owner requested another attempt, but no retries are configured ` +
|
|
2755
|
+
`(maxRetries=0). Configure retries > 0 to allow OWNER_DECISION: INCOMPLETE_RETRY.` +
|
|
2756
|
+
decisionSuffix);
|
|
2757
|
+
}
|
|
2758
|
+
return (`Step "${stepName}" owner requested another attempt after ${attempts} total attempts, ` +
|
|
2759
|
+
`but the retry budget is exhausted (maxRetries=${maxRetries}).` +
|
|
2760
|
+
decisionSuffix);
|
|
2761
|
+
}
|
|
2110
2762
|
injectStepOwnerContract(step, resolvedTask, ownerDef, specialistDef) {
|
|
2111
2763
|
if (ownerDef.interactive === false)
|
|
2112
2764
|
return resolvedTask;
|
|
@@ -2119,12 +2771,19 @@ export class WorkflowRunner {
|
|
|
2119
2771
|
`- You are the accountable owner for step "${step.name}".\n` +
|
|
2120
2772
|
(specialistNote ? `- ${specialistNote}\n` : '') +
|
|
2121
2773
|
`- If you delegate, you must still verify completion yourself.\n` +
|
|
2122
|
-
`-
|
|
2774
|
+
`- Preferred final decision format:\n` +
|
|
2775
|
+
` OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
|
|
2776
|
+
` REASON: <one sentence>\n` +
|
|
2777
|
+
`- Legacy completion marker still supported: STEP_COMPLETE:${step.name}\n` +
|
|
2123
2778
|
`- Then self-terminate immediately with /exit.`);
|
|
2124
2779
|
}
|
|
2125
2780
|
buildOwnerSupervisorTask(step, originalTask, supervised, workerRuntimeName) {
|
|
2126
2781
|
const verificationGuide = this.buildSupervisorVerificationGuide(step.verification);
|
|
2127
2782
|
const channelLine = this.channel ? `#${this.channel}` : '(workflow channel unavailable)';
|
|
2783
|
+
const channelContract = this.channel
|
|
2784
|
+
? `- Prefer Relaycast/group-chat handoff signals over terminal sentinels: wait for the worker to post \`WORKER_DONE: <brief summary>\` in ${channelLine}\n` +
|
|
2785
|
+
`- When you have validated the handoff, post \`LEAD_DONE: <brief summary>\` to ${channelLine} before you exit\n`
|
|
2786
|
+
: '';
|
|
2128
2787
|
return (`You are the step owner/supervisor for step "${step.name}".\n\n` +
|
|
2129
2788
|
`Worker: ${supervised.specialist.name} (runtime: ${workerRuntimeName}) on ${channelLine}\n` +
|
|
2130
2789
|
`Task: ${originalTask}\n\n` +
|
|
@@ -2133,9 +2792,22 @@ export class WorkflowRunner {
|
|
|
2133
2792
|
`- Watch ${channelLine} for the worker's progress messages and mirrored PTY output\n` +
|
|
2134
2793
|
`- Check file changes: run \`git diff --stat\` or inspect expected files directly\n` +
|
|
2135
2794
|
`- Ask the worker directly on ${channelLine} if you need a status update\n` +
|
|
2795
|
+
channelContract +
|
|
2136
2796
|
verificationGuide +
|
|
2137
|
-
`\nWhen you
|
|
2138
|
-
`
|
|
2797
|
+
`\nWhen you have enough evidence, return:\n` +
|
|
2798
|
+
`OWNER_DECISION: <one of COMPLETE, INCOMPLETE_RETRY, INCOMPLETE_FAIL, NEEDS_CLARIFICATION>\n` +
|
|
2799
|
+
`REASON: <one sentence>\n` +
|
|
2800
|
+
`Legacy completion marker still supported: STEP_COMPLETE:${step.name}`);
|
|
2801
|
+
}
|
|
2802
|
+
buildWorkerHandoffTask(step, originalTask, supervised) {
|
|
2803
|
+
if (!this.channel)
|
|
2804
|
+
return originalTask;
|
|
2805
|
+
return (`${originalTask}\n\n---\n` +
|
|
2806
|
+
`WORKER COMPLETION CONTRACT:\n` +
|
|
2807
|
+
`- You are handing work off to owner "${supervised.owner.name}" for step "${step.name}".\n` +
|
|
2808
|
+
`- When your work is ready for review, post to #${this.channel}: \`WORKER_DONE: <brief summary>\`\n` +
|
|
2809
|
+
`- Do not rely on terminal output alone for handoff; use the workflow group chat signal above.\n` +
|
|
2810
|
+
`- After posting your handoff signal, self-terminate with /exit unless the owner asks for follow-up.`);
|
|
2139
2811
|
}
|
|
2140
2812
|
buildSupervisorVerificationGuide(verification) {
|
|
2141
2813
|
if (!verification)
|
|
@@ -2155,8 +2827,9 @@ export class WorkflowRunner {
|
|
|
2155
2827
|
}
|
|
2156
2828
|
async executeSupervisedAgentStep(step, supervised, resolvedTask, timeoutMs) {
|
|
2157
2829
|
if (this.executor) {
|
|
2830
|
+
const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
|
|
2158
2831
|
const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, supervised.specialist.name);
|
|
2159
|
-
const specialistStep = { ...step, task:
|
|
2832
|
+
const specialistStep = { ...step, task: specialistTask };
|
|
2160
2833
|
const ownerStep = {
|
|
2161
2834
|
...step,
|
|
2162
2835
|
name: `${step.name}-owner`,
|
|
@@ -2164,16 +2837,21 @@ export class WorkflowRunner {
|
|
|
2164
2837
|
task: supervisorTask,
|
|
2165
2838
|
};
|
|
2166
2839
|
this.log(`[${step.name}] Spawning specialist "${supervised.specialist.name}" and owner "${supervised.owner.name}"`);
|
|
2167
|
-
const specialistPromise = this.executor.executeAgentStep(specialistStep, supervised.specialist,
|
|
2840
|
+
const specialistPromise = this.executor.executeAgentStep(specialistStep, supervised.specialist, specialistTask, timeoutMs);
|
|
2168
2841
|
// Guard against unhandled rejection if owner fails before specialist settles
|
|
2169
2842
|
const specialistSettled = specialistPromise.catch(() => undefined);
|
|
2170
2843
|
try {
|
|
2171
2844
|
const ownerStartTime = Date.now();
|
|
2172
2845
|
const ownerOutput = await this.executor.executeAgentStep(ownerStep, supervised.owner, supervisorTask, timeoutMs);
|
|
2173
2846
|
const ownerElapsed = Date.now() - ownerStartTime;
|
|
2174
|
-
this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
|
|
2175
2847
|
const specialistOutput = await specialistPromise;
|
|
2176
|
-
|
|
2848
|
+
const completionDecision = this.resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, supervisorTask, resolvedTask);
|
|
2849
|
+
return {
|
|
2850
|
+
specialistOutput,
|
|
2851
|
+
ownerOutput,
|
|
2852
|
+
ownerElapsed,
|
|
2853
|
+
completionReason: completionDecision.completionReason,
|
|
2854
|
+
};
|
|
2177
2855
|
}
|
|
2178
2856
|
catch (error) {
|
|
2179
2857
|
await specialistSettled;
|
|
@@ -2190,10 +2868,14 @@ export class WorkflowRunner {
|
|
|
2190
2868
|
resolveWorkerSpawn = resolve;
|
|
2191
2869
|
rejectWorkerSpawn = reject;
|
|
2192
2870
|
});
|
|
2193
|
-
const
|
|
2871
|
+
const specialistTask = this.buildWorkerHandoffTask(step, resolvedTask, supervised);
|
|
2872
|
+
const specialistStep = { ...step, task: specialistTask };
|
|
2194
2873
|
this.log(`[${step.name}] Spawning specialist "${supervised.specialist.name}" (cli: ${supervised.specialist.cli})`);
|
|
2195
2874
|
const workerPromise = this.spawnAndWait(supervised.specialist, specialistStep, timeoutMs, {
|
|
2196
2875
|
agentNameSuffix: 'worker',
|
|
2876
|
+
evidenceStepName: step.name,
|
|
2877
|
+
evidenceRole: 'worker',
|
|
2878
|
+
logicalName: supervised.specialist.name,
|
|
2197
2879
|
onSpawned: ({ actualName, agent }) => {
|
|
2198
2880
|
workerHandle = agent;
|
|
2199
2881
|
workerRuntimeName = actualName;
|
|
@@ -2208,7 +2890,7 @@ export class WorkflowRunner {
|
|
|
2208
2890
|
}
|
|
2209
2891
|
},
|
|
2210
2892
|
onChunk: ({ agentName, chunk }) => {
|
|
2211
|
-
this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk);
|
|
2893
|
+
this.forwardAgentChunkToChannel(step.name, 'Worker', agentName, chunk, supervised.specialist.name);
|
|
2212
2894
|
},
|
|
2213
2895
|
}).catch((error) => {
|
|
2214
2896
|
if (!workerSpawned) {
|
|
@@ -2221,14 +2903,24 @@ export class WorkflowRunner {
|
|
|
2221
2903
|
workerPromise
|
|
2222
2904
|
.then((result) => {
|
|
2223
2905
|
workerReleased = true;
|
|
2224
|
-
this.
|
|
2906
|
+
this.log(`[${step.name}] Worker ${workerRuntimeName} exited`);
|
|
2907
|
+
this.recordStepToolSideEffect(step.name, {
|
|
2908
|
+
type: 'worker_exit',
|
|
2909
|
+
detail: `Worker ${workerRuntimeName} exited`,
|
|
2910
|
+
raw: { worker: workerRuntimeName, exitCode: result.exitCode, exitSignal: result.exitSignal },
|
|
2911
|
+
});
|
|
2225
2912
|
if (step.verification?.type === 'output_contains' && result.output.includes(step.verification.value)) {
|
|
2226
|
-
this.
|
|
2913
|
+
this.log(`[${step.name}] Verification gate observed: output contains ${JSON.stringify(step.verification.value)}`);
|
|
2227
2914
|
}
|
|
2228
2915
|
})
|
|
2229
2916
|
.catch((error) => {
|
|
2230
2917
|
const message = error instanceof Error ? error.message : String(error);
|
|
2231
2918
|
this.postToChannel(`**[${step.name}]** Worker \`${workerRuntimeName}\` exited with error: ${message}`);
|
|
2919
|
+
this.recordStepToolSideEffect(step.name, {
|
|
2920
|
+
type: 'worker_error',
|
|
2921
|
+
detail: `Worker ${workerRuntimeName} exited with error: ${message}`,
|
|
2922
|
+
raw: { worker: workerRuntimeName, error: message },
|
|
2923
|
+
});
|
|
2232
2924
|
});
|
|
2233
2925
|
await workerReady;
|
|
2234
2926
|
const supervisorTask = this.buildOwnerSupervisorTask(step, resolvedTask, supervised, workerRuntimeName);
|
|
@@ -2243,6 +2935,9 @@ export class WorkflowRunner {
|
|
|
2243
2935
|
try {
|
|
2244
2936
|
const ownerResultObj = await this.spawnAndWait(supervised.owner, ownerStep, timeoutMs, {
|
|
2245
2937
|
agentNameSuffix: 'owner',
|
|
2938
|
+
evidenceStepName: step.name,
|
|
2939
|
+
evidenceRole: 'owner',
|
|
2940
|
+
logicalName: supervised.owner.name,
|
|
2246
2941
|
onSpawned: ({ actualName }) => {
|
|
2247
2942
|
this.supervisedRuntimeAgents.set(actualName, {
|
|
2248
2943
|
stepName: step.name,
|
|
@@ -2257,9 +2952,14 @@ export class WorkflowRunner {
|
|
|
2257
2952
|
const ownerElapsed = Date.now() - ownerStartTime;
|
|
2258
2953
|
const ownerOutput = ownerResultObj.output;
|
|
2259
2954
|
this.log(`[${step.name}] Owner "${supervised.owner.name}" exited`);
|
|
2260
|
-
this.assertOwnerCompletionMarker(step, ownerOutput, supervisorTask);
|
|
2261
2955
|
const specialistOutput = (await workerPromise).output;
|
|
2262
|
-
|
|
2956
|
+
const completionDecision = this.resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, supervisorTask, resolvedTask);
|
|
2957
|
+
return {
|
|
2958
|
+
specialistOutput,
|
|
2959
|
+
ownerOutput,
|
|
2960
|
+
ownerElapsed,
|
|
2961
|
+
completionReason: completionDecision.completionReason,
|
|
2962
|
+
};
|
|
2263
2963
|
}
|
|
2264
2964
|
catch (error) {
|
|
2265
2965
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -2273,14 +2973,20 @@ export class WorkflowRunner {
|
|
|
2273
2973
|
throw error;
|
|
2274
2974
|
}
|
|
2275
2975
|
}
|
|
2276
|
-
forwardAgentChunkToChannel(stepName, roleLabel, agentName, chunk) {
|
|
2277
|
-
const lines = WorkflowRunner.
|
|
2976
|
+
forwardAgentChunkToChannel(stepName, roleLabel, agentName, chunk, sender) {
|
|
2977
|
+
const lines = WorkflowRunner.scrubForChannel(chunk)
|
|
2278
2978
|
.split('\n')
|
|
2279
2979
|
.map((line) => line.trim())
|
|
2280
2980
|
.filter(Boolean)
|
|
2281
2981
|
.slice(0, 3);
|
|
2282
2982
|
for (const line of lines) {
|
|
2283
|
-
this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}
|
|
2983
|
+
this.postToChannel(`**[${stepName}]** ${roleLabel} \`${agentName}\`: ${line.slice(0, 280)}`, {
|
|
2984
|
+
stepName,
|
|
2985
|
+
sender,
|
|
2986
|
+
actor: agentName,
|
|
2987
|
+
role: roleLabel,
|
|
2988
|
+
origin: 'forwarded_chunk',
|
|
2989
|
+
});
|
|
2284
2990
|
}
|
|
2285
2991
|
}
|
|
2286
2992
|
async recordOwnerMonitoringChunk(step, ownerDef, chunk) {
|
|
@@ -2295,6 +3001,11 @@ export class WorkflowRunner {
|
|
|
2295
3001
|
if (/STEP_COMPLETE:/i.test(stripped))
|
|
2296
3002
|
details.push('Declared the step complete');
|
|
2297
3003
|
for (const detail of details) {
|
|
3004
|
+
this.recordStepToolSideEffect(step.name, {
|
|
3005
|
+
type: 'owner_monitoring',
|
|
3006
|
+
detail,
|
|
3007
|
+
raw: { output: stripped.slice(0, 240), owner: ownerDef.name },
|
|
3008
|
+
});
|
|
2298
3009
|
await this.trajectory?.ownerMonitoringEvent(step.name, ownerDef.name, detail, {
|
|
2299
3010
|
output: stripped.slice(0, 240),
|
|
2300
3011
|
});
|
|
@@ -2335,6 +3046,7 @@ export class WorkflowRunner {
|
|
|
2335
3046
|
}
|
|
2336
3047
|
resolveAutoReviewAgent(ownerDef, agentMap) {
|
|
2337
3048
|
const allDefs = [...agentMap.values()].map((d) => WorkflowRunner.resolveAgentDef(d));
|
|
3049
|
+
const eligible = (def) => def.name !== ownerDef.name && !this.isExplicitInteractiveWorker(def);
|
|
2338
3050
|
const isReviewer = (def) => {
|
|
2339
3051
|
const roleLC = def.role?.toLowerCase() ?? '';
|
|
2340
3052
|
const nameLC = def.name.toLowerCase();
|
|
@@ -2359,32 +3071,239 @@ export class WorkflowRunner {
|
|
|
2359
3071
|
return isReviewer(def) ? 1 : 0;
|
|
2360
3072
|
};
|
|
2361
3073
|
const dedicated = allDefs
|
|
2362
|
-
.filter((d) => d
|
|
3074
|
+
.filter((d) => eligible(d) && isReviewer(d))
|
|
2363
3075
|
.sort((a, b) => reviewerPriority(b) - reviewerPriority(a) || a.name.localeCompare(b.name))[0];
|
|
2364
3076
|
if (dedicated)
|
|
2365
3077
|
return dedicated;
|
|
2366
|
-
const alternate = allDefs.find((d) => d
|
|
3078
|
+
const alternate = allDefs.find((d) => eligible(d) && d.interactive !== false);
|
|
2367
3079
|
if (alternate)
|
|
2368
3080
|
return alternate;
|
|
2369
3081
|
// Self-review fallback — log a warning since owner reviewing itself is weak.
|
|
2370
3082
|
return ownerDef;
|
|
2371
3083
|
}
|
|
2372
|
-
|
|
3084
|
+
isExplicitInteractiveWorker(agentDef) {
|
|
3085
|
+
return agentDef.preset === 'worker' && agentDef.interactive !== false;
|
|
3086
|
+
}
|
|
3087
|
+
resolveOwnerCompletionDecision(step, ownerOutput, specialistOutput, injectedTaskText, verificationTaskText) {
|
|
3088
|
+
const hasMarker = this.hasOwnerCompletionMarker(step, ownerOutput, injectedTaskText);
|
|
3089
|
+
const explicitOwnerDecision = this.parseOwnerDecision(step, ownerOutput, false);
|
|
3090
|
+
// INCOMPLETE_RETRY / NEEDS_CLARIFICATION are non-terminal owner outcomes. They never mark
|
|
3091
|
+
// the step complete here; instead they throw back to executeAgentStep(), which decides
|
|
3092
|
+
// whether to retry or fail based on the remaining retry budget for this step.
|
|
3093
|
+
if (explicitOwnerDecision?.decision === 'INCOMPLETE_RETRY') {
|
|
3094
|
+
throw new WorkflowCompletionError(`Step "${step.name}" owner requested retry${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
|
|
3095
|
+
}
|
|
3096
|
+
if (explicitOwnerDecision?.decision === 'INCOMPLETE_FAIL') {
|
|
3097
|
+
throw new WorkflowCompletionError(`Step "${step.name}" owner marked the step incomplete${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'failed_owner_decision');
|
|
3098
|
+
}
|
|
3099
|
+
if (explicitOwnerDecision?.decision === 'NEEDS_CLARIFICATION') {
|
|
3100
|
+
throw new WorkflowCompletionError(`Step "${step.name}" owner requested clarification before completion${explicitOwnerDecision.reason ? `: ${explicitOwnerDecision.reason}` : ''}`, 'retry_requested_by_owner');
|
|
3101
|
+
}
|
|
3102
|
+
const verificationResult = step.verification
|
|
3103
|
+
? this.runVerification(step.verification, specialistOutput, step.name, verificationTaskText, {
|
|
3104
|
+
allowFailure: true,
|
|
3105
|
+
completionMarkerFound: hasMarker,
|
|
3106
|
+
})
|
|
3107
|
+
: { passed: false };
|
|
3108
|
+
if (verificationResult.error) {
|
|
3109
|
+
throw new WorkflowCompletionError(`Step "${step.name}" verification failed and no owner decision or evidence established completion: ${verificationResult.error}`, 'failed_verification');
|
|
3110
|
+
}
|
|
3111
|
+
if (explicitOwnerDecision?.decision === 'COMPLETE') {
|
|
3112
|
+
if (!hasMarker) {
|
|
3113
|
+
this.log(`[${step.name}] Structured OWNER_DECISION completed the step without legacy STEP_COMPLETE marker`);
|
|
3114
|
+
}
|
|
3115
|
+
return {
|
|
3116
|
+
completionReason: 'completed_by_owner_decision',
|
|
3117
|
+
ownerDecision: explicitOwnerDecision.decision,
|
|
3118
|
+
reason: explicitOwnerDecision.reason,
|
|
3119
|
+
};
|
|
3120
|
+
}
|
|
3121
|
+
if (verificationResult.passed) {
|
|
3122
|
+
return { completionReason: 'completed_verified' };
|
|
3123
|
+
}
|
|
3124
|
+
const ownerDecision = this.parseOwnerDecision(step, ownerOutput, hasMarker);
|
|
3125
|
+
if (ownerDecision?.decision === 'COMPLETE') {
|
|
3126
|
+
return {
|
|
3127
|
+
completionReason: 'completed_by_owner_decision',
|
|
3128
|
+
ownerDecision: ownerDecision.decision,
|
|
3129
|
+
reason: ownerDecision.reason,
|
|
3130
|
+
};
|
|
3131
|
+
}
|
|
3132
|
+
if (!explicitOwnerDecision) {
|
|
3133
|
+
const evidenceReason = this.judgeOwnerCompletionByEvidence(step.name, ownerOutput);
|
|
3134
|
+
if (evidenceReason) {
|
|
3135
|
+
if (!hasMarker) {
|
|
3136
|
+
this.log(`[${step.name}] Evidence-based completion resolved without legacy STEP_COMPLETE marker`);
|
|
3137
|
+
}
|
|
3138
|
+
return {
|
|
3139
|
+
completionReason: 'completed_by_evidence',
|
|
3140
|
+
reason: evidenceReason,
|
|
3141
|
+
};
|
|
3142
|
+
}
|
|
3143
|
+
}
|
|
3144
|
+
// Process-exit fallback: if the agent exited cleanly (code 0) and verification
|
|
3145
|
+
// passes (or no verification is configured), infer completion rather than failing.
|
|
3146
|
+
// This reduces dependence on agents posting exact coordination signals.
|
|
3147
|
+
const processExitFallback = this.tryProcessExitFallback(step, specialistOutput, verificationTaskText, ownerOutput);
|
|
3148
|
+
if (processExitFallback) {
|
|
3149
|
+
this.log(`[${step.name}] Completion inferred from clean process exit (code 0)` +
|
|
3150
|
+
(step.verification ? ' + verification passed' : '') +
|
|
3151
|
+
' — no coordination signal was required');
|
|
3152
|
+
return processExitFallback;
|
|
3153
|
+
}
|
|
3154
|
+
throw new WorkflowCompletionError(`Step "${step.name}" owner completion decision missing: no OWNER_DECISION, legacy STEP_COMPLETE marker, or evidence-backed completion signal`, 'failed_no_evidence');
|
|
3155
|
+
}
|
|
3156
|
+
hasExplicitInteractiveWorkerCompletionEvidence(step, output, injectedTaskText, verificationTaskText) {
|
|
3157
|
+
try {
|
|
3158
|
+
this.resolveOwnerCompletionDecision(step, output, output, injectedTaskText, verificationTaskText);
|
|
3159
|
+
return true;
|
|
3160
|
+
}
|
|
3161
|
+
catch {
|
|
3162
|
+
return false;
|
|
3163
|
+
}
|
|
3164
|
+
}
|
|
3165
|
+
hasOwnerCompletionMarker(step, output, injectedTaskText) {
|
|
2373
3166
|
const marker = `STEP_COMPLETE:${step.name}`;
|
|
2374
3167
|
const taskHasMarker = injectedTaskText.includes(marker);
|
|
2375
3168
|
const first = output.indexOf(marker);
|
|
2376
3169
|
if (first === -1) {
|
|
2377
|
-
|
|
2378
|
-
}
|
|
2379
|
-
// PTY output includes
|
|
2380
|
-
//
|
|
2381
|
-
|
|
3170
|
+
return false;
|
|
3171
|
+
}
|
|
3172
|
+
// PTY output often includes echoed prompt text, so when the injected task
|
|
3173
|
+
// itself contains the legacy marker require a second occurrence from the
|
|
3174
|
+
// agent response.
|
|
3175
|
+
const outputLikelyContainsInjectedPrompt = output.includes('STEP OWNER CONTRACT') ||
|
|
3176
|
+
output.includes('Preferred final decision format') ||
|
|
3177
|
+
output.includes('Legacy completion marker still supported') ||
|
|
3178
|
+
output.includes('Output exactly: STEP_COMPLETE:');
|
|
2382
3179
|
if (taskHasMarker && outputLikelyContainsInjectedPrompt) {
|
|
2383
|
-
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
3180
|
+
return output.includes(marker, first + marker.length);
|
|
3181
|
+
}
|
|
3182
|
+
return true;
|
|
3183
|
+
}
|
|
3184
|
+
parseOwnerDecision(step, ownerOutput, hasMarker) {
|
|
3185
|
+
const decisionPattern = /OWNER_DECISION:\s*(COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/gi;
|
|
3186
|
+
const decisionMatches = [...ownerOutput.matchAll(decisionPattern)];
|
|
3187
|
+
const outputLikelyContainsEchoedPrompt = ownerOutput.includes('STEP OWNER CONTRACT') ||
|
|
3188
|
+
ownerOutput.includes('Preferred final decision format') ||
|
|
3189
|
+
ownerOutput.includes('one of COMPLETE, INCOMPLETE_RETRY') ||
|
|
3190
|
+
ownerOutput.includes('COMPLETE|INCOMPLETE_RETRY');
|
|
3191
|
+
if (decisionMatches.length === 0) {
|
|
3192
|
+
if (!hasMarker)
|
|
3193
|
+
return null;
|
|
3194
|
+
return {
|
|
3195
|
+
decision: 'COMPLETE',
|
|
3196
|
+
reason: `Legacy completion marker observed: STEP_COMPLETE:${step.name}`,
|
|
3197
|
+
};
|
|
3198
|
+
}
|
|
3199
|
+
// Filter out matches that appear on a template/instruction line (e.g.
|
|
3200
|
+
// "COMPLETE|INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION") to avoid
|
|
3201
|
+
// picking up the template format as the agent's actual decision.
|
|
3202
|
+
const realMatches = outputLikelyContainsEchoedPrompt
|
|
3203
|
+
? decisionMatches.filter((m) => {
|
|
3204
|
+
const lineStart = ownerOutput.lastIndexOf('\n', m.index) + 1;
|
|
3205
|
+
const lineEnd = ownerOutput.indexOf('\n', m.index);
|
|
3206
|
+
const line = ownerOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
|
|
3207
|
+
return !line.includes('COMPLETE|INCOMPLETE_RETRY');
|
|
3208
|
+
})
|
|
3209
|
+
: decisionMatches;
|
|
3210
|
+
const decisionMatch = realMatches.length > 0
|
|
3211
|
+
? realMatches[realMatches.length - 1]
|
|
3212
|
+
: decisionMatches[decisionMatches.length - 1];
|
|
3213
|
+
const decision = decisionMatch?.[1]?.toUpperCase();
|
|
3214
|
+
if (decision !== 'COMPLETE' &&
|
|
3215
|
+
decision !== 'INCOMPLETE_RETRY' &&
|
|
3216
|
+
decision !== 'INCOMPLETE_FAIL' &&
|
|
3217
|
+
decision !== 'NEEDS_CLARIFICATION') {
|
|
3218
|
+
return null;
|
|
3219
|
+
}
|
|
3220
|
+
const reasonPattern = /(?:^|\n)REASON:\s*(.+)/gi;
|
|
3221
|
+
const reasonMatches = [...ownerOutput.matchAll(reasonPattern)];
|
|
3222
|
+
const reasonMatch = outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
|
|
3223
|
+
? reasonMatches[reasonMatches.length - 1]
|
|
3224
|
+
: reasonMatches[0];
|
|
3225
|
+
const reason = reasonMatch?.[1]?.trim();
|
|
3226
|
+
return {
|
|
3227
|
+
decision,
|
|
3228
|
+
reason: reason && reason !== '<one sentence>' ? reason : undefined,
|
|
3229
|
+
};
|
|
3230
|
+
}
|
|
3231
|
+
stripEchoedPromptLines(output, patterns) {
|
|
3232
|
+
return output
|
|
3233
|
+
.split('\n')
|
|
3234
|
+
.map((line) => line.trim())
|
|
3235
|
+
.filter(Boolean)
|
|
3236
|
+
.filter((line) => patterns.every((pattern) => !pattern.test(line)))
|
|
3237
|
+
.join('\n');
|
|
3238
|
+
}
|
|
3239
|
+
firstMeaningfulLine(output) {
|
|
3240
|
+
return output
|
|
3241
|
+
.split('\n')
|
|
3242
|
+
.map((line) => line.trim())
|
|
3243
|
+
.find(Boolean);
|
|
3244
|
+
}
|
|
3245
|
+
judgeOwnerCompletionByEvidence(stepName, ownerOutput) {
|
|
3246
|
+
// Never infer completion when the raw output contains an explicit retry/fail/clarification signal.
|
|
3247
|
+
if (/OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
|
|
3248
|
+
return null;
|
|
2387
3249
|
}
|
|
3250
|
+
const sanitized = this.stripEchoedPromptLines(ownerOutput, [
|
|
3251
|
+
/^STEP OWNER CONTRACT:?$/i,
|
|
3252
|
+
/^Preferred final decision format:?$/i,
|
|
3253
|
+
/^OWNER_DECISION:\s*(?:COMPLETE\|INCOMPLETE_RETRY|<one of COMPLETE, INCOMPLETE_RETRY)/i,
|
|
3254
|
+
/^REASON:\s*<one sentence>$/i,
|
|
3255
|
+
/^Legacy completion marker still supported:/i,
|
|
3256
|
+
/^STEP_COMPLETE:/i,
|
|
3257
|
+
]);
|
|
3258
|
+
if (!sanitized)
|
|
3259
|
+
return null;
|
|
3260
|
+
const hasExplicitSelfRelease = /Calling\s+(?:[\w.-]+\.)?remove_agent\(\{[^<\n]*"reason":"task completed"/i.test(sanitized);
|
|
3261
|
+
const hasPositiveConclusion = /\b(complete(?:d)?|done|verified|looks correct|safe handoff|artifact verified)\b/i.test(sanitized) ||
|
|
3262
|
+
/\bartifacts?\b.*\b(correct|verified|complete)\b/i.test(sanitized) ||
|
|
3263
|
+
hasExplicitSelfRelease;
|
|
3264
|
+
const evidence = this.getStepCompletionEvidence(stepName);
|
|
3265
|
+
const hasValidatedCoordinationSignal = evidence?.coordinationSignals.some((signal) => signal.kind === 'worker_done' ||
|
|
3266
|
+
signal.kind === 'lead_done' ||
|
|
3267
|
+
signal.kind === 'verification_passed' ||
|
|
3268
|
+
(signal.kind === 'process_exit' && signal.value === '0')) ?? false;
|
|
3269
|
+
const hasValidatedInspectionSignal = evidence?.toolSideEffects.some((effect) => effect.type === 'owner_monitoring' &&
|
|
3270
|
+
(/Checked git diff stats/i.test(effect.detail) ||
|
|
3271
|
+
/Listed files for verification/i.test(effect.detail))) ?? false;
|
|
3272
|
+
const hasEvidenceSignal = hasValidatedCoordinationSignal || hasValidatedInspectionSignal;
|
|
3273
|
+
if (!hasPositiveConclusion || !hasEvidenceSignal) {
|
|
3274
|
+
return null;
|
|
3275
|
+
}
|
|
3276
|
+
return this.firstMeaningfulLine(sanitized) ?? 'Evidence-backed completion';
|
|
3277
|
+
}
|
|
3278
|
+
/**
|
|
3279
|
+
* Process-exit fallback: when agent exits with code 0 but posts no coordination
|
|
3280
|
+
* signal, check if verification passes (or no verification is configured) and
|
|
3281
|
+
* infer completion. This is the key mechanism for reducing agent compliance
|
|
3282
|
+
* dependence — the runner trusts a clean exit + passing verification over
|
|
3283
|
+
* requiring exact signal text.
|
|
3284
|
+
*/
|
|
3285
|
+
tryProcessExitFallback(step, specialistOutput, verificationTaskText, ownerOutput) {
|
|
3286
|
+
const gracePeriodMs = this.currentConfig?.swarm.completionGracePeriodMs ?? 5000;
|
|
3287
|
+
if (gracePeriodMs === 0)
|
|
3288
|
+
return null;
|
|
3289
|
+
// Never infer completion when the owner explicitly requested retry/fail/clarification.
|
|
3290
|
+
if (ownerOutput && /OWNER_DECISION:\s*(?:INCOMPLETE_RETRY|INCOMPLETE_FAIL|NEEDS_CLARIFICATION)\b/i.test(ownerOutput)) {
|
|
3291
|
+
return null;
|
|
3292
|
+
}
|
|
3293
|
+
const evidence = this.getStepCompletionEvidence(step.name);
|
|
3294
|
+
const hasCleanExit = evidence?.coordinationSignals.some((signal) => signal.kind === 'process_exit' && signal.value === '0') ?? false;
|
|
3295
|
+
if (!hasCleanExit)
|
|
3296
|
+
return null;
|
|
3297
|
+
// If verification is configured, it must pass for the fallback to succeed.
|
|
3298
|
+
if (step.verification) {
|
|
3299
|
+
const verificationResult = this.runVerification(step.verification, specialistOutput, step.name, verificationTaskText, { allowFailure: true });
|
|
3300
|
+
if (!verificationResult.passed)
|
|
3301
|
+
return null;
|
|
3302
|
+
}
|
|
3303
|
+
return {
|
|
3304
|
+
completionReason: 'completed_by_process_exit',
|
|
3305
|
+
reason: `Process exited with code 0${step.verification ? ' and verification passed' : ''} — coordination signal not required`,
|
|
3306
|
+
};
|
|
2388
3307
|
}
|
|
2389
3308
|
async runStepReviewGate(step, resolvedTask, specialistOutput, ownerOutput, ownerDef, reviewerDef, timeoutMs) {
|
|
2390
3309
|
const reviewSnippetMax = 12_000;
|
|
@@ -2426,7 +3345,17 @@ export class WorkflowRunner {
|
|
|
2426
3345
|
};
|
|
2427
3346
|
await this.trajectory?.registerAgent(reviewerDef.name, 'reviewer');
|
|
2428
3347
|
this.postToChannel(`**[${step.name}]** Review started (reviewer: ${reviewerDef.name})`);
|
|
3348
|
+
this.recordStepToolSideEffect(step.name, {
|
|
3349
|
+
type: 'review_started',
|
|
3350
|
+
detail: `Review started with ${reviewerDef.name}`,
|
|
3351
|
+
raw: { reviewer: reviewerDef.name },
|
|
3352
|
+
});
|
|
2429
3353
|
const emitReviewCompleted = async (decision, reason) => {
|
|
3354
|
+
this.recordStepToolSideEffect(step.name, {
|
|
3355
|
+
type: 'review_completed',
|
|
3356
|
+
detail: `Review ${decision} by ${reviewerDef.name}${reason ? `: ${reason}` : ''}`,
|
|
3357
|
+
raw: { reviewer: reviewerDef.name, decision, reason },
|
|
3358
|
+
});
|
|
2430
3359
|
await this.trajectory?.reviewCompleted(step.name, reviewerDef.name, decision, reason);
|
|
2431
3360
|
this.emit({
|
|
2432
3361
|
type: 'step:review-completed',
|
|
@@ -2470,6 +3399,9 @@ export class WorkflowRunner {
|
|
|
2470
3399
|
};
|
|
2471
3400
|
try {
|
|
2472
3401
|
await this.spawnAndWait(reviewerDef, reviewStep, safetyTimeoutMs, {
|
|
3402
|
+
evidenceStepName: step.name,
|
|
3403
|
+
evidenceRole: 'reviewer',
|
|
3404
|
+
logicalName: reviewerDef.name,
|
|
2473
3405
|
onSpawned: ({ agent }) => {
|
|
2474
3406
|
reviewerHandle = agent;
|
|
2475
3407
|
},
|
|
@@ -2507,15 +3439,34 @@ export class WorkflowRunner {
|
|
|
2507
3439
|
return reviewOutput;
|
|
2508
3440
|
}
|
|
2509
3441
|
parseReviewDecision(reviewOutput) {
|
|
3442
|
+
const strict = this.parseStrictReviewDecision(reviewOutput);
|
|
3443
|
+
if (strict) {
|
|
3444
|
+
return strict;
|
|
3445
|
+
}
|
|
3446
|
+
const tolerant = this.parseTolerantReviewDecision(reviewOutput);
|
|
3447
|
+
if (tolerant) {
|
|
3448
|
+
return tolerant;
|
|
3449
|
+
}
|
|
3450
|
+
return this.judgeReviewDecisionFromEvidence(reviewOutput);
|
|
3451
|
+
}
|
|
3452
|
+
parseStrictReviewDecision(reviewOutput) {
|
|
2510
3453
|
const decisionPattern = /REVIEW_DECISION:\s*(APPROVE|REJECT)/gi;
|
|
2511
3454
|
const decisionMatches = [...reviewOutput.matchAll(decisionPattern)];
|
|
2512
3455
|
if (decisionMatches.length === 0) {
|
|
2513
3456
|
return null;
|
|
2514
3457
|
}
|
|
2515
3458
|
const outputLikelyContainsEchoedPrompt = reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
|
|
2516
|
-
const
|
|
2517
|
-
? decisionMatches
|
|
2518
|
-
|
|
3459
|
+
const realReviewMatches = outputLikelyContainsEchoedPrompt
|
|
3460
|
+
? decisionMatches.filter((m) => {
|
|
3461
|
+
const lineStart = reviewOutput.lastIndexOf('\n', m.index) + 1;
|
|
3462
|
+
const lineEnd = reviewOutput.indexOf('\n', m.index);
|
|
3463
|
+
const line = reviewOutput.slice(lineStart, lineEnd === -1 ? undefined : lineEnd);
|
|
3464
|
+
return !line.includes('APPROVE or REJECT');
|
|
3465
|
+
})
|
|
3466
|
+
: decisionMatches;
|
|
3467
|
+
const decisionMatch = realReviewMatches.length > 0
|
|
3468
|
+
? realReviewMatches[realReviewMatches.length - 1]
|
|
3469
|
+
: decisionMatches[decisionMatches.length - 1];
|
|
2519
3470
|
const decision = decisionMatch?.[1]?.toUpperCase();
|
|
2520
3471
|
if (decision !== 'APPROVE' && decision !== 'REJECT') {
|
|
2521
3472
|
return null;
|
|
@@ -2531,6 +3482,85 @@ export class WorkflowRunner {
|
|
|
2531
3482
|
reason: reason && reason !== '<one sentence>' ? reason : undefined,
|
|
2532
3483
|
};
|
|
2533
3484
|
}
|
|
3485
|
+
parseTolerantReviewDecision(reviewOutput) {
|
|
3486
|
+
const sanitized = this.stripEchoedPromptLines(reviewOutput, [
|
|
3487
|
+
/^Return exactly:?$/i,
|
|
3488
|
+
/^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
|
|
3489
|
+
/^REVIEW_REASON:\s*<one sentence>$/i,
|
|
3490
|
+
]);
|
|
3491
|
+
if (!sanitized) {
|
|
3492
|
+
return null;
|
|
3493
|
+
}
|
|
3494
|
+
const lines = sanitized
|
|
3495
|
+
.split('\n')
|
|
3496
|
+
.map((line) => line.trim())
|
|
3497
|
+
.filter(Boolean);
|
|
3498
|
+
for (const line of lines) {
|
|
3499
|
+
const candidate = line.replace(/^REVIEW_DECISION:\s*/i, '').trim();
|
|
3500
|
+
const decision = this.normalizeReviewDecisionCandidate(candidate);
|
|
3501
|
+
if (decision) {
|
|
3502
|
+
return {
|
|
3503
|
+
decision,
|
|
3504
|
+
reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
|
|
3505
|
+
};
|
|
3506
|
+
}
|
|
3507
|
+
}
|
|
3508
|
+
const decision = this.normalizeReviewDecisionCandidate(lines.join(' '));
|
|
3509
|
+
if (!decision) {
|
|
3510
|
+
return null;
|
|
3511
|
+
}
|
|
3512
|
+
return {
|
|
3513
|
+
decision,
|
|
3514
|
+
reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
|
|
3515
|
+
};
|
|
3516
|
+
}
|
|
3517
|
+
normalizeReviewDecisionCandidate(candidate) {
|
|
3518
|
+
const value = candidate.trim().toLowerCase();
|
|
3519
|
+
if (!value)
|
|
3520
|
+
return null;
|
|
3521
|
+
if (/^(approve|approved|complete|completed|pass|passed|accept|accepted|lgtm|ship it|looks good|looks fine)\b/i.test(value)) {
|
|
3522
|
+
return 'approved';
|
|
3523
|
+
}
|
|
3524
|
+
if (/^(reject|rejected|retry|retry requested|fail|failed|incomplete|needs clarification|not complete|not ready|insufficient evidence)\b/i.test(value)) {
|
|
3525
|
+
return 'rejected';
|
|
3526
|
+
}
|
|
3527
|
+
return null;
|
|
3528
|
+
}
|
|
3529
|
+
parseReviewReason(reviewOutput) {
|
|
3530
|
+
const reasonPattern = /REVIEW_REASON:\s*(.+)/gi;
|
|
3531
|
+
const reasonMatches = [...reviewOutput.matchAll(reasonPattern)];
|
|
3532
|
+
const outputLikelyContainsEchoedPrompt = reviewOutput.includes('Return exactly') || reviewOutput.includes('REVIEW_DECISION: APPROVE or REJECT');
|
|
3533
|
+
const reasonMatch = outputLikelyContainsEchoedPrompt && reasonMatches.length > 1
|
|
3534
|
+
? reasonMatches[reasonMatches.length - 1]
|
|
3535
|
+
: reasonMatches[0];
|
|
3536
|
+
const reason = reasonMatch?.[1]?.trim();
|
|
3537
|
+
return reason && reason !== '<one sentence>' ? reason : undefined;
|
|
3538
|
+
}
|
|
3539
|
+
judgeReviewDecisionFromEvidence(reviewOutput) {
|
|
3540
|
+
const sanitized = this.stripEchoedPromptLines(reviewOutput, [
|
|
3541
|
+
/^Return exactly:?$/i,
|
|
3542
|
+
/^REVIEW_DECISION:\s*APPROVE\s+or\s+REJECT$/i,
|
|
3543
|
+
/^REVIEW_REASON:\s*<one sentence>$/i,
|
|
3544
|
+
]);
|
|
3545
|
+
if (!sanitized) {
|
|
3546
|
+
return null;
|
|
3547
|
+
}
|
|
3548
|
+
const hasPositiveEvidence = /\b(approved?|complete(?:d)?|verified|looks good|looks fine|safe handoff|pass(?:ed)?)\b/i.test(sanitized);
|
|
3549
|
+
const hasNegativeEvidence = /\b(reject(?:ed)?|retry|fail(?:ed)?|incomplete|missing checks|insufficient evidence|not safe)\b/i.test(sanitized);
|
|
3550
|
+
if (hasNegativeEvidence) {
|
|
3551
|
+
return {
|
|
3552
|
+
decision: 'rejected',
|
|
3553
|
+
reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
|
|
3554
|
+
};
|
|
3555
|
+
}
|
|
3556
|
+
if (!hasPositiveEvidence) {
|
|
3557
|
+
return null;
|
|
3558
|
+
}
|
|
3559
|
+
return {
|
|
3560
|
+
decision: 'approved',
|
|
3561
|
+
reason: this.parseReviewReason(sanitized) ?? this.firstMeaningfulLine(sanitized),
|
|
3562
|
+
};
|
|
3563
|
+
}
|
|
2534
3564
|
combineStepAndReviewOutput(stepOutput, reviewOutput) {
|
|
2535
3565
|
const primary = stepOutput.trimEnd();
|
|
2536
3566
|
const review = reviewOutput.trim();
|
|
@@ -2600,8 +3630,8 @@ export class WorkflowRunner {
|
|
|
2600
3630
|
switch (preset) {
|
|
2601
3631
|
case 'worker':
|
|
2602
3632
|
return ('You are a non-interactive worker agent. Produce clean, structured output to stdout.\n' +
|
|
2603
|
-
'Do NOT use
|
|
2604
|
-
'Do NOT use
|
|
3633
|
+
'Do NOT use mcp__relaycast__agent_add, add_agent, or any MCP tool to spawn sub-agents.\n' +
|
|
3634
|
+
'Do NOT use mcp__relaycast__dm_send or any Relaycast messaging tools — you have no relay connection.\n\n');
|
|
2605
3635
|
case 'reviewer':
|
|
2606
3636
|
return ('You are a non-interactive reviewer agent. Read the specified files/artifacts and produce a clear verdict.\n' +
|
|
2607
3637
|
'Do NOT spawn sub-agents or use any Relaycast messaging tools.\n\n');
|
|
@@ -2627,7 +3657,7 @@ export class WorkflowRunner {
|
|
|
2627
3657
|
step.task +
|
|
2628
3658
|
'\n\n---\n' +
|
|
2629
3659
|
'IMPORTANT: You are running as a non-interactive subprocess. ' +
|
|
2630
|
-
'Do NOT call
|
|
3660
|
+
'Do NOT call mcp__relaycast__agent_add, add_agent, or any MCP tool to spawn or manage other agents.\n\n' +
|
|
2631
3661
|
'CRITICAL REQUIREMENT — YOU MUST FOLLOW THIS EXACTLY:\n' +
|
|
2632
3662
|
'You are running in non-interactive mode. There is NO opportunity for follow-up, ' +
|
|
2633
3663
|
'clarification, or additional input. Your stdout output is your ONLY deliverable.\n\n' +
|
|
@@ -2759,11 +3789,19 @@ export class WorkflowRunner {
|
|
|
2759
3789
|
reject(new Error(`Failed to spawn ${cmd}: ${err.message}`));
|
|
2760
3790
|
});
|
|
2761
3791
|
});
|
|
3792
|
+
this.captureStepTerminalEvidence(step.name, {}, { exitCode, exitSignal });
|
|
2762
3793
|
return { output, exitCode, exitSignal };
|
|
2763
3794
|
}
|
|
2764
3795
|
finally {
|
|
2765
|
-
const
|
|
3796
|
+
const stdout = stdoutChunks.join('');
|
|
3797
|
+
const stderr = stderrChunks.join('');
|
|
3798
|
+
const combinedOutput = stdout + stderr;
|
|
2766
3799
|
this.lastFailedStepOutput.set(step.name, combinedOutput);
|
|
3800
|
+
this.captureStepTerminalEvidence(step.name, {
|
|
3801
|
+
stdout,
|
|
3802
|
+
stderr,
|
|
3803
|
+
combined: combinedOutput,
|
|
3804
|
+
});
|
|
2767
3805
|
stopHeartbeat?.();
|
|
2768
3806
|
logStream.end();
|
|
2769
3807
|
this.unregisterWorker(agentName);
|
|
@@ -2777,6 +3815,7 @@ export class WorkflowRunner {
|
|
|
2777
3815
|
if (!this.relay) {
|
|
2778
3816
|
throw new Error('AgentRelay not initialized');
|
|
2779
3817
|
}
|
|
3818
|
+
const evidenceStepName = options.evidenceStepName ?? step.name;
|
|
2780
3819
|
// Deterministic name: step name + optional role suffix + first 8 chars of run ID.
|
|
2781
3820
|
const requestedName = `${step.name}${options.agentNameSuffix ? `-${options.agentNameSuffix}` : ''}-${(this.currentRunId ?? this.generateShortId()).slice(0, 8)}`;
|
|
2782
3821
|
let agentName = requestedName;
|
|
@@ -2823,11 +3862,17 @@ export class WorkflowRunner {
|
|
|
2823
3862
|
let ptyChunks = [];
|
|
2824
3863
|
try {
|
|
2825
3864
|
const agentCwd = this.resolveAgentCwd(agentDef);
|
|
3865
|
+
const interactiveSpawnPolicy = resolveSpawnPolicy({
|
|
3866
|
+
AGENT_NAME: agentName,
|
|
3867
|
+
AGENT_CLI: agentDef.cli,
|
|
3868
|
+
RELAY_API_KEY: this.relayApiKey ?? 'workflow-runner',
|
|
3869
|
+
AGENT_CHANNELS: (agentChannels ?? []).join(','),
|
|
3870
|
+
});
|
|
2826
3871
|
agent = await this.relay.spawnPty({
|
|
2827
3872
|
name: agentName,
|
|
2828
3873
|
cli: agentDef.cli,
|
|
2829
3874
|
model: agentDef.constraints?.model,
|
|
2830
|
-
args:
|
|
3875
|
+
args: interactiveSpawnPolicy.args,
|
|
2831
3876
|
channels: agentChannels,
|
|
2832
3877
|
task: taskWithExit,
|
|
2833
3878
|
idleThresholdSecs: agentDef.constraints?.idleThresholdSecs,
|
|
@@ -2859,16 +3904,27 @@ export class WorkflowRunner {
|
|
|
2859
3904
|
const oldListener = this.ptyListeners.get(oldName);
|
|
2860
3905
|
if (oldListener) {
|
|
2861
3906
|
this.ptyListeners.delete(oldName);
|
|
2862
|
-
|
|
3907
|
+
const resolvedAgentName = agent.name;
|
|
3908
|
+
this.ptyListeners.set(resolvedAgentName, (chunk) => {
|
|
2863
3909
|
const stripped = WorkflowRunner.stripAnsi(chunk);
|
|
2864
|
-
this.ptyOutputBuffers.get(
|
|
3910
|
+
this.ptyOutputBuffers.get(resolvedAgentName)?.push(stripped);
|
|
2865
3911
|
newLogStream.write(chunk);
|
|
2866
|
-
options.onChunk?.({ agentName:
|
|
3912
|
+
options.onChunk?.({ agentName: resolvedAgentName, chunk });
|
|
2867
3913
|
});
|
|
2868
3914
|
}
|
|
2869
3915
|
agentName = agent.name;
|
|
2870
3916
|
}
|
|
2871
|
-
|
|
3917
|
+
const liveAgent = agent;
|
|
3918
|
+
await options.onSpawned?.({ requestedName, actualName: liveAgent.name, agent: liveAgent });
|
|
3919
|
+
this.runtimeStepAgents.set(liveAgent.name, {
|
|
3920
|
+
stepName: evidenceStepName,
|
|
3921
|
+
role: options.evidenceRole ?? agentDef.role ?? 'agent',
|
|
3922
|
+
logicalName: options.logicalName ?? agentDef.name,
|
|
3923
|
+
});
|
|
3924
|
+
const signalParticipant = this.resolveSignalParticipantKind(options.evidenceRole ?? agentDef.role ?? 'agent');
|
|
3925
|
+
if (signalParticipant) {
|
|
3926
|
+
this.rememberStepSignalSender(evidenceStepName, signalParticipant, liveAgent.name, options.logicalName ?? agentDef.name);
|
|
3927
|
+
}
|
|
2872
3928
|
// Register in workers.json so `agents:kill` can find this agent
|
|
2873
3929
|
let workerPid;
|
|
2874
3930
|
try {
|
|
@@ -2881,8 +3937,8 @@ export class WorkflowRunner {
|
|
|
2881
3937
|
this.registerWorker(agentName, agentDef.cli, step.task ?? '', workerPid);
|
|
2882
3938
|
// Register the spawned agent in Relaycast for observability + start heartbeat
|
|
2883
3939
|
if (this.relayApiKey) {
|
|
2884
|
-
const agentClient = await this.registerRelaycastExternalAgent(
|
|
2885
|
-
console.warn(`[WorkflowRunner] Failed to register ${
|
|
3940
|
+
const agentClient = await this.registerRelaycastExternalAgent(liveAgent.name, `Workflow agent for step "${step.name}" (${agentDef.cli})`).catch((err) => {
|
|
3941
|
+
console.warn(`[WorkflowRunner] Failed to register ${liveAgent.name} in Relaycast:`, err?.message ?? err);
|
|
2886
3942
|
return null;
|
|
2887
3943
|
});
|
|
2888
3944
|
// Keep the agent online in the dashboard while it's working
|
|
@@ -2895,30 +3951,30 @@ export class WorkflowRunner {
|
|
|
2895
3951
|
const channelAgent = await this.ensureRelaycastRunnerAgent().catch(() => null);
|
|
2896
3952
|
await channelAgent?.channels.invite(this.channel, agent.name).catch(() => { });
|
|
2897
3953
|
}
|
|
2898
|
-
//
|
|
2899
|
-
this.
|
|
3954
|
+
// Keep operational assignment chatter out of the agent coordination channel.
|
|
3955
|
+
this.log(`[${step.name}] Assigned to ${agent.name}`);
|
|
2900
3956
|
// Register agent handle for hub-mediated nudging
|
|
2901
3957
|
this.activeAgentHandles.set(agentName, agent);
|
|
2902
3958
|
// Wait for agent to exit, with idle nudging if configured
|
|
2903
|
-
exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs);
|
|
3959
|
+
exitResult = await this.waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs, options.preserveOnIdle ?? this.shouldPreserveIdleSupervisor(agentDef, step, options.evidenceRole));
|
|
2904
3960
|
// Stop heartbeat now that agent has exited
|
|
2905
3961
|
stopHeartbeat?.();
|
|
2906
3962
|
if (exitResult === 'timeout') {
|
|
2907
|
-
//
|
|
2908
|
-
//
|
|
2909
|
-
|
|
2910
|
-
|
|
2911
|
-
|
|
2912
|
-
|
|
2913
|
-
|
|
2914
|
-
|
|
2915
|
-
|
|
2916
|
-
|
|
3963
|
+
// Grace-period fallback: before failing, check if the agent completed
|
|
3964
|
+
// its work but just failed to self-terminate. Run verification if
|
|
3965
|
+
// configured — a passing gate + timeout is better than a hard failure.
|
|
3966
|
+
let timeoutRecovered = false;
|
|
3967
|
+
if (step.verification) {
|
|
3968
|
+
const ptyOutput = (this.ptyOutputBuffers.get(agentName) ?? []).join('');
|
|
3969
|
+
const verificationResult = this.runVerification(step.verification, ptyOutput, step.name, undefined, { allowFailure: true });
|
|
3970
|
+
if (verificationResult.passed) {
|
|
3971
|
+
this.log(`[${step.name}] Agent timed out but verification passed — treating as complete`);
|
|
3972
|
+
this.postToChannel(`**[${step.name}]** Agent idle after completing work — verification passed, releasing`);
|
|
2917
3973
|
await agent.release();
|
|
2918
|
-
|
|
3974
|
+
timeoutRecovered = true;
|
|
2919
3975
|
}
|
|
2920
3976
|
}
|
|
2921
|
-
|
|
3977
|
+
if (!timeoutRecovered) {
|
|
2922
3978
|
await agent.release();
|
|
2923
3979
|
throw new Error(`Step "${step.name}" timed out after ${timeoutMs ?? 'unknown'}ms`);
|
|
2924
3980
|
}
|
|
@@ -2931,6 +3987,19 @@ export class WorkflowRunner {
|
|
|
2931
3987
|
// Snapshot PTY chunks before cleanup — we need them for output reading below
|
|
2932
3988
|
ptyChunks = this.ptyOutputBuffers.get(agentName) ?? [];
|
|
2933
3989
|
this.lastFailedStepOutput.set(step.name, ptyChunks.join(''));
|
|
3990
|
+
if (ptyChunks.length > 0 || agent?.exitCode !== undefined || agent?.exitSignal !== undefined) {
|
|
3991
|
+
this.captureStepTerminalEvidence(evidenceStepName, {
|
|
3992
|
+
stdout: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
|
|
3993
|
+
combined: ptyChunks.length > 0 ? ptyChunks.join('') : undefined,
|
|
3994
|
+
}, {
|
|
3995
|
+
exitCode: agent?.exitCode,
|
|
3996
|
+
exitSignal: agent?.exitSignal,
|
|
3997
|
+
}, {
|
|
3998
|
+
sender: options.logicalName ?? agentDef.name,
|
|
3999
|
+
actor: agent?.name ?? agentName,
|
|
4000
|
+
role: options.evidenceRole ?? agentDef.role ?? 'agent',
|
|
4001
|
+
});
|
|
4002
|
+
}
|
|
2934
4003
|
// Always clean up PTY resources — prevents fd leaks if spawnPty or waitForExit throws
|
|
2935
4004
|
stopHeartbeat?.();
|
|
2936
4005
|
this.activeAgentHandles.delete(agentName);
|
|
@@ -2943,6 +4012,7 @@ export class WorkflowRunner {
|
|
|
2943
4012
|
}
|
|
2944
4013
|
this.unregisterWorker(agentName);
|
|
2945
4014
|
this.supervisedRuntimeAgents.delete(agentName);
|
|
4015
|
+
this.runtimeStepAgents.delete(agentName);
|
|
2946
4016
|
}
|
|
2947
4017
|
let output;
|
|
2948
4018
|
if (ptyChunks.length > 0) {
|
|
@@ -2959,6 +4029,13 @@ export class WorkflowRunner {
|
|
|
2959
4029
|
? 'Agent completed (idle — treated as done)'
|
|
2960
4030
|
: `Agent exited (${exitResult})`;
|
|
2961
4031
|
}
|
|
4032
|
+
if (ptyChunks.length === 0) {
|
|
4033
|
+
this.captureStepTerminalEvidence(evidenceStepName, { stdout: output, combined: output }, { exitCode: agent?.exitCode, exitSignal: agent?.exitSignal }, {
|
|
4034
|
+
sender: options.logicalName ?? agentDef.name,
|
|
4035
|
+
actor: agent?.name ?? agentName,
|
|
4036
|
+
role: options.evidenceRole ?? agentDef.role ?? 'agent',
|
|
4037
|
+
});
|
|
4038
|
+
}
|
|
2962
4039
|
return {
|
|
2963
4040
|
output,
|
|
2964
4041
|
exitCode: agent?.exitCode,
|
|
@@ -2986,13 +4063,35 @@ export class WorkflowRunner {
|
|
|
2986
4063
|
'orchestrator',
|
|
2987
4064
|
'auctioneer',
|
|
2988
4065
|
]);
|
|
4066
|
+
isLeadLikeAgent(agentDef, roleOverride) {
|
|
4067
|
+
if (agentDef.preset === 'lead')
|
|
4068
|
+
return true;
|
|
4069
|
+
const role = (roleOverride ?? agentDef.role ?? '').toLowerCase();
|
|
4070
|
+
const nameLC = agentDef.name.toLowerCase();
|
|
4071
|
+
return [...WorkflowRunner.HUB_ROLES].some((hubRole) => new RegExp(`\\b${hubRole}\\b`, 'i').test(nameLC) ||
|
|
4072
|
+
new RegExp(`\\b${hubRole}\\b`, 'i').test(role));
|
|
4073
|
+
}
|
|
4074
|
+
shouldPreserveIdleSupervisor(agentDef, step, evidenceRole) {
|
|
4075
|
+
if (evidenceRole && /\bowner\b/i.test(evidenceRole)) {
|
|
4076
|
+
return true;
|
|
4077
|
+
}
|
|
4078
|
+
if (!this.isLeadLikeAgent(agentDef, evidenceRole)) {
|
|
4079
|
+
return false;
|
|
4080
|
+
}
|
|
4081
|
+
const task = step.task ?? '';
|
|
4082
|
+
return /\b(wait|waiting|monitor|supervis|check inbox|check.*channel|poll|DONE|_DONE|signal|handoff)\b/i.test(task);
|
|
4083
|
+
}
|
|
2989
4084
|
/**
|
|
2990
4085
|
* Wait for agent exit with idle detection and nudging.
|
|
2991
4086
|
* If no idle nudge config is set, falls through to simple waitForExit.
|
|
2992
4087
|
*/
|
|
2993
|
-
async waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs) {
|
|
4088
|
+
async waitForExitWithIdleNudging(agent, agentDef, step, timeoutMs, preserveIdleSupervisor = false) {
|
|
2994
4089
|
const nudgeConfig = this.currentConfig?.swarm.idleNudge;
|
|
2995
4090
|
if (!nudgeConfig) {
|
|
4091
|
+
if (preserveIdleSupervisor) {
|
|
4092
|
+
this.log(`[${step.name}] Supervising agent "${agent.name}" may idle while waiting — using exit-only completion`);
|
|
4093
|
+
return agent.waitForExit(timeoutMs);
|
|
4094
|
+
}
|
|
2996
4095
|
// Idle = done: race exit against idle. Whichever fires first completes the step.
|
|
2997
4096
|
const result = await Promise.race([
|
|
2998
4097
|
agent.waitForExit(timeoutMs).then((r) => ({ kind: 'exit', result: r })),
|
|
@@ -3011,6 +4110,7 @@ export class WorkflowRunner {
|
|
|
3011
4110
|
const escalateAfterMs = nudgeConfig.escalateAfterMs ?? 120_000;
|
|
3012
4111
|
const maxNudges = nudgeConfig.maxNudges ?? 1;
|
|
3013
4112
|
let nudgeCount = 0;
|
|
4113
|
+
let preservedSupervisorNoticeSent = false;
|
|
3014
4114
|
const startTime = Date.now();
|
|
3015
4115
|
while (true) {
|
|
3016
4116
|
// Calculate remaining time from overall timeout
|
|
@@ -3045,6 +4145,14 @@ export class WorkflowRunner {
|
|
|
3045
4145
|
this.emit({ type: 'step:nudged', runId: this.currentRunId ?? '', stepName: step.name, nudgeCount });
|
|
3046
4146
|
continue;
|
|
3047
4147
|
}
|
|
4148
|
+
if (preserveIdleSupervisor) {
|
|
4149
|
+
if (!preservedSupervisorNoticeSent) {
|
|
4150
|
+
this.log(`[${step.name}] Supervising agent "${agent.name}" stayed idle after ${nudgeCount} nudge(s) — preserving until exit or timeout`);
|
|
4151
|
+
this.postToChannel(`**[${step.name}]** Supervising agent \`${agent.name}\` is waiting on handoff — keeping it alive until it exits or the step times out`);
|
|
4152
|
+
preservedSupervisorNoticeSent = true;
|
|
4153
|
+
}
|
|
4154
|
+
continue;
|
|
4155
|
+
}
|
|
3048
4156
|
// Exhausted nudges — force-release
|
|
3049
4157
|
this.postToChannel(`**[${step.name}]** Agent \`${agent.name}\` still idle after ${nudgeCount} nudge(s) — force-releasing`);
|
|
3050
4158
|
this.emit({ type: 'step:force-released', runId: this.currentRunId ?? '', stepName: step.name });
|
|
@@ -3114,7 +4222,31 @@ export class WorkflowRunner {
|
|
|
3114
4222
|
return undefined;
|
|
3115
4223
|
}
|
|
3116
4224
|
// ── Verification ────────────────────────────────────────────────────────
|
|
3117
|
-
runVerification(check, output, stepName, injectedTaskText) {
|
|
4225
|
+
runVerification(check, output, stepName, injectedTaskText, options) {
|
|
4226
|
+
const fail = (message) => {
|
|
4227
|
+
const observedAt = new Date().toISOString();
|
|
4228
|
+
this.recordStepToolSideEffect(stepName, {
|
|
4229
|
+
type: 'verification_observed',
|
|
4230
|
+
detail: message,
|
|
4231
|
+
observedAt,
|
|
4232
|
+
raw: { passed: false, type: check.type, value: check.value },
|
|
4233
|
+
});
|
|
4234
|
+
this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
|
|
4235
|
+
kind: 'verification_failed',
|
|
4236
|
+
source: 'verification',
|
|
4237
|
+
text: message,
|
|
4238
|
+
observedAt,
|
|
4239
|
+
value: check.value,
|
|
4240
|
+
});
|
|
4241
|
+
if (options?.allowFailure) {
|
|
4242
|
+
return {
|
|
4243
|
+
passed: false,
|
|
4244
|
+
completionReason: 'failed_verification',
|
|
4245
|
+
error: message,
|
|
4246
|
+
};
|
|
4247
|
+
}
|
|
4248
|
+
throw new WorkflowCompletionError(message, 'failed_verification');
|
|
4249
|
+
};
|
|
3118
4250
|
switch (check.type) {
|
|
3119
4251
|
case 'output_contains': {
|
|
3120
4252
|
// Guard against false positives: the PTY captures the injected task text
|
|
@@ -3128,12 +4260,12 @@ export class WorkflowRunner {
|
|
|
3128
4260
|
const first = output.indexOf(token);
|
|
3129
4261
|
const hasSecond = first !== -1 && output.includes(token, first + token.length);
|
|
3130
4262
|
if (!hasSecond) {
|
|
3131
|
-
|
|
4263
|
+
return fail(`Verification failed for "${stepName}": output does not contain "${token}" ` +
|
|
3132
4264
|
`(token found only in task injection — agent must output it explicitly)`);
|
|
3133
4265
|
}
|
|
3134
4266
|
}
|
|
3135
4267
|
else if (!output.includes(token)) {
|
|
3136
|
-
|
|
4268
|
+
return fail(`Verification failed for "${stepName}": output does not contain "${token}"`);
|
|
3137
4269
|
}
|
|
3138
4270
|
break;
|
|
3139
4271
|
}
|
|
@@ -3142,13 +4274,37 @@ export class WorkflowRunner {
|
|
|
3142
4274
|
break;
|
|
3143
4275
|
case 'file_exists':
|
|
3144
4276
|
if (!existsSync(path.resolve(this.cwd, check.value))) {
|
|
3145
|
-
|
|
4277
|
+
return fail(`Verification failed for "${stepName}": file "${check.value}" does not exist`);
|
|
3146
4278
|
}
|
|
3147
4279
|
break;
|
|
3148
4280
|
case 'custom':
|
|
3149
4281
|
// Custom verifications are evaluated by callers; no-op here
|
|
3150
|
-
|
|
3151
|
-
}
|
|
4282
|
+
return { passed: false };
|
|
4283
|
+
}
|
|
4284
|
+
if (options?.completionMarkerFound === false) {
|
|
4285
|
+
this.log(`[${stepName}] Verification passed without legacy STEP_COMPLETE marker; allowing completion`);
|
|
4286
|
+
}
|
|
4287
|
+
const successMessage = options?.completionMarkerFound === false
|
|
4288
|
+
? `Verification passed without legacy STEP_COMPLETE marker`
|
|
4289
|
+
: `Verification passed`;
|
|
4290
|
+
const observedAt = new Date().toISOString();
|
|
4291
|
+
this.recordStepToolSideEffect(stepName, {
|
|
4292
|
+
type: 'verification_observed',
|
|
4293
|
+
detail: successMessage,
|
|
4294
|
+
observedAt,
|
|
4295
|
+
raw: { passed: true, type: check.type, value: check.value },
|
|
4296
|
+
});
|
|
4297
|
+
this.getOrCreateStepEvidenceRecord(stepName).evidence.coordinationSignals.push({
|
|
4298
|
+
kind: 'verification_passed',
|
|
4299
|
+
source: 'verification',
|
|
4300
|
+
text: successMessage,
|
|
4301
|
+
observedAt,
|
|
4302
|
+
value: check.value,
|
|
4303
|
+
});
|
|
4304
|
+
return {
|
|
4305
|
+
passed: true,
|
|
4306
|
+
completionReason: 'completed_verified',
|
|
4307
|
+
};
|
|
3152
4308
|
}
|
|
3153
4309
|
// ── State helpers ─────────────────────────────────────────────────────
|
|
3154
4310
|
async updateRunStatus(runId, status, error) {
|
|
@@ -3164,13 +4320,16 @@ export class WorkflowRunner {
|
|
|
3164
4320
|
}
|
|
3165
4321
|
await this.db.updateRun(runId, patch);
|
|
3166
4322
|
}
|
|
3167
|
-
async markStepFailed(state, error, runId, exitInfo) {
|
|
4323
|
+
async markStepFailed(state, error, runId, exitInfo, completionReason) {
|
|
4324
|
+
this.captureStepTerminalEvidence(state.row.stepName, {}, exitInfo);
|
|
3168
4325
|
state.row.status = 'failed';
|
|
3169
4326
|
state.row.error = error;
|
|
4327
|
+
state.row.completionReason = completionReason;
|
|
3170
4328
|
state.row.completedAt = new Date().toISOString();
|
|
3171
4329
|
await this.db.updateStep(state.row.id, {
|
|
3172
4330
|
status: 'failed',
|
|
3173
4331
|
error,
|
|
4332
|
+
completionReason,
|
|
3174
4333
|
completedAt: state.row.completedAt,
|
|
3175
4334
|
updatedAt: new Date().toISOString(),
|
|
3176
4335
|
});
|
|
@@ -3182,6 +4341,7 @@ export class WorkflowRunner {
|
|
|
3182
4341
|
exitCode: exitInfo?.exitCode,
|
|
3183
4342
|
exitSignal: exitInfo?.exitSignal,
|
|
3184
4343
|
});
|
|
4344
|
+
this.finalizeStepEvidence(state.row.stepName, 'failed', state.row.completedAt, completionReason);
|
|
3185
4345
|
}
|
|
3186
4346
|
async markDownstreamSkipped(failedStepName, allSteps, stepStates, runId) {
|
|
3187
4347
|
const queue = [failedStepName];
|
|
@@ -3275,7 +4435,7 @@ export class WorkflowRunner {
|
|
|
3275
4435
|
'RELAY SETUP — do this FIRST before any other relay tool:\n' +
|
|
3276
4436
|
`1. Call: register(name="${agentName}")\n` +
|
|
3277
4437
|
' This authenticates you in the Relaycast workspace.\n' +
|
|
3278
|
-
' ALL relay tools (
|
|
4438
|
+
' ALL relay tools (mcp__relaycast__dm_send, mcp__relaycast__inbox_check, mcp__relaycast__message_post, etc.) require\n' +
|
|
3279
4439
|
' registration first — they will fail with "Not registered" otherwise.\n' +
|
|
3280
4440
|
`2. Your agent name is "${agentName}" — use this exact name when registering.`);
|
|
3281
4441
|
}
|
|
@@ -3297,10 +4457,10 @@ export class WorkflowRunner {
|
|
|
3297
4457
|
'If it involves multiple independent subtasks, touches many files, or could take a long time, ' +
|
|
3298
4458
|
'you should break it down and delegate to helper agents to avoid timeouts.\n\n' +
|
|
3299
4459
|
'Option 1 — Spawn relay agents (for real parallel coding work):\n' +
|
|
3300
|
-
' -
|
|
3301
|
-
' - Coordinate via
|
|
3302
|
-
' - Check on them with
|
|
3303
|
-
' - Clean up when done:
|
|
4460
|
+
' - mcp__relaycast__agent_add(name="helper-1", cli="claude", task="Specific subtask description")\n' +
|
|
4461
|
+
' - Coordinate via mcp__relaycast__dm_send(to="helper-1", text="...")\n' +
|
|
4462
|
+
' - Check on them with mcp__relaycast__inbox_check()\n' +
|
|
4463
|
+
' - Clean up when done: mcp__relaycast__agent_remove(name="helper-1")\n\n' +
|
|
3304
4464
|
subAgentOption +
|
|
3305
4465
|
'Guidelines:\n' +
|
|
3306
4466
|
'- You are the lead — delegate but stay in control, track progress, integrate results\n' +
|
|
@@ -3311,9 +4471,23 @@ export class WorkflowRunner {
|
|
|
3311
4471
|
' "RELAY SETUP: First call register(name=\'<exact-agent-name>\') before any other relay tool."');
|
|
3312
4472
|
}
|
|
3313
4473
|
/** Post a message to the workflow channel. Fire-and-forget — never throws or blocks. */
|
|
3314
|
-
postToChannel(text) {
|
|
4474
|
+
postToChannel(text, options = {}) {
|
|
3315
4475
|
if (!this.relayApiKey || !this.channel)
|
|
3316
4476
|
return;
|
|
4477
|
+
this.recordChannelEvidence(text, options);
|
|
4478
|
+
const stepName = options.stepName ?? this.inferStepNameFromChannelText(text);
|
|
4479
|
+
if (stepName) {
|
|
4480
|
+
this.recordStepToolSideEffect(stepName, {
|
|
4481
|
+
type: 'post_channel_message',
|
|
4482
|
+
detail: text.slice(0, 240),
|
|
4483
|
+
raw: {
|
|
4484
|
+
actor: options.actor,
|
|
4485
|
+
role: options.role,
|
|
4486
|
+
target: options.target ?? this.channel,
|
|
4487
|
+
origin: options.origin ?? 'runner_post',
|
|
4488
|
+
},
|
|
4489
|
+
});
|
|
4490
|
+
}
|
|
3317
4491
|
this.ensureRelaycastRunnerAgent()
|
|
3318
4492
|
.then((agent) => agent.send(this.channel, text))
|
|
3319
4493
|
.catch(() => {
|
|
@@ -3471,6 +4645,9 @@ export class WorkflowRunner {
|
|
|
3471
4645
|
output: state.row.output,
|
|
3472
4646
|
error: state.row.error,
|
|
3473
4647
|
verificationPassed: state.row.status === 'completed' && stepsWithVerification.has(name),
|
|
4648
|
+
completionMode: state.row.completionReason
|
|
4649
|
+
? this.buildStepCompletionDecision(name, state.row.completionReason)?.mode
|
|
4650
|
+
: undefined,
|
|
3474
4651
|
});
|
|
3475
4652
|
}
|
|
3476
4653
|
return outcomes;
|
|
@@ -3603,24 +4780,30 @@ export class WorkflowRunner {
|
|
|
3603
4780
|
/** Persist step output to disk and post full output as a channel message. */
|
|
3604
4781
|
async persistStepOutput(runId, stepName, output) {
|
|
3605
4782
|
// 1. Write to disk
|
|
4783
|
+
const outputPath = path.join(this.getStepOutputDir(runId), `${stepName}.md`);
|
|
3606
4784
|
try {
|
|
3607
4785
|
const dir = this.getStepOutputDir(runId);
|
|
3608
4786
|
mkdirSync(dir, { recursive: true });
|
|
3609
4787
|
const cleaned = WorkflowRunner.stripAnsi(output);
|
|
3610
|
-
await writeFile(
|
|
4788
|
+
await writeFile(outputPath, cleaned);
|
|
3611
4789
|
}
|
|
3612
4790
|
catch {
|
|
3613
4791
|
// Non-critical
|
|
3614
4792
|
}
|
|
4793
|
+
this.recordStepToolSideEffect(stepName, {
|
|
4794
|
+
type: 'persist_step_output',
|
|
4795
|
+
detail: `Persisted step output to ${this.normalizeEvidencePath(outputPath)}`,
|
|
4796
|
+
raw: { path: outputPath },
|
|
4797
|
+
});
|
|
3615
4798
|
// 2. Post scrubbed output as a single channel message (most recent tail only)
|
|
3616
4799
|
const scrubbed = WorkflowRunner.scrubForChannel(output);
|
|
3617
4800
|
if (scrubbed.length === 0) {
|
|
3618
|
-
this.postToChannel(`**[${stepName}]** Step completed — output written to disk
|
|
4801
|
+
this.postToChannel(`**[${stepName}]** Step completed — output written to disk`, { stepName });
|
|
3619
4802
|
return;
|
|
3620
4803
|
}
|
|
3621
4804
|
const maxMsg = 2000;
|
|
3622
4805
|
const preview = scrubbed.length > maxMsg ? scrubbed.slice(-maxMsg) : scrubbed;
|
|
3623
|
-
this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n
|
|
4806
|
+
this.postToChannel(`**[${stepName}] Output:**\n\`\`\`\n${preview}\n\`\`\``, { stepName });
|
|
3624
4807
|
}
|
|
3625
4808
|
/** Load persisted step output from disk. */
|
|
3626
4809
|
loadStepOutput(runId, stepName) {
|