agent-relay 2.3.12 → 2.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +20 -20
- package/packages/acp-bridge/package.json +2 -2
- package/packages/bridge/package.json +7 -7
- package/packages/broker-sdk/dist/protocol.d.ts +4 -0
- package/packages/broker-sdk/dist/protocol.d.ts.map +1 -1
- package/packages/broker-sdk/dist/relay.d.ts +6 -0
- package/packages/broker-sdk/dist/relay.d.ts.map +1 -1
- package/packages/broker-sdk/dist/relay.js +8 -0
- package/packages/broker-sdk/dist/relay.js.map +1 -1
- package/packages/broker-sdk/dist/relaycast.d.ts +17 -1
- package/packages/broker-sdk/dist/relaycast.d.ts.map +1 -1
- package/packages/broker-sdk/dist/relaycast.js +79 -5
- package/packages/broker-sdk/dist/relaycast.js.map +1 -1
- package/packages/broker-sdk/dist/workflows/index.d.ts +1 -0
- package/packages/broker-sdk/dist/workflows/index.d.ts.map +1 -1
- package/packages/broker-sdk/dist/workflows/index.js +1 -0
- package/packages/broker-sdk/dist/workflows/index.js.map +1 -1
- package/packages/broker-sdk/dist/workflows/run.d.ts +3 -1
- package/packages/broker-sdk/dist/workflows/run.d.ts.map +1 -1
- package/packages/broker-sdk/dist/workflows/run.js +4 -0
- package/packages/broker-sdk/dist/workflows/run.js.map +1 -1
- package/packages/broker-sdk/dist/workflows/runner.d.ts +13 -0
- package/packages/broker-sdk/dist/workflows/runner.d.ts.map +1 -1
- package/packages/broker-sdk/dist/workflows/runner.js +279 -29
- package/packages/broker-sdk/dist/workflows/runner.js.map +1 -1
- package/packages/broker-sdk/dist/workflows/trajectory.d.ts +80 -0
- package/packages/broker-sdk/dist/workflows/trajectory.d.ts.map +1 -0
- package/packages/broker-sdk/dist/workflows/trajectory.js +362 -0
- package/packages/broker-sdk/dist/workflows/trajectory.js.map +1 -0
- package/packages/broker-sdk/dist/workflows/types.d.ts +12 -0
- package/packages/broker-sdk/dist/workflows/types.d.ts.map +1 -1
- package/packages/broker-sdk/package.json +2 -2
- package/packages/broker-sdk/src/__tests__/workflow-trajectory.test.ts +408 -0
- package/packages/broker-sdk/src/protocol.ts +5 -0
- package/packages/broker-sdk/src/relay.ts +11 -0
- package/packages/broker-sdk/src/relaycast.ts +83 -5
- package/packages/broker-sdk/src/workflows/README.md +30 -0
- package/packages/broker-sdk/src/workflows/index.ts +1 -0
- package/packages/broker-sdk/src/workflows/run.ts +9 -1
- package/packages/broker-sdk/src/workflows/runner.ts +349 -28
- package/packages/broker-sdk/src/workflows/trajectory.ts +507 -0
- package/packages/broker-sdk/src/workflows/types.ts +15 -0
- package/packages/broker-sdk/tsconfig.json +1 -0
- package/packages/broker-sdk/vitest.config.ts +9 -0
- package/packages/config/package.json +2 -2
- package/packages/continuity/package.json +2 -2
- package/packages/daemon/package.json +12 -12
- package/packages/hooks/package.json +4 -4
- package/packages/mcp/package.json +5 -5
- package/packages/memory/package.json +2 -2
- package/packages/policy/package.json +2 -2
- package/packages/protocol/package.json +1 -1
- package/packages/resiliency/package.json +1 -1
- package/packages/sdk/package.json +3 -3
- package/packages/spawner/package.json +1 -1
- package/packages/state/package.json +1 -1
- package/packages/storage/package.json +2 -2
- package/packages/telemetry/package.json +1 -1
- package/packages/trajectory/package.json +2 -2
- package/packages/user-directory/package.json +2 -2
- package/packages/utils/package.json +3 -3
- package/packages/wrapper/package.json +5 -5
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { AgentRelayOptions } from '../relay.js';
|
|
2
|
-
import type { WorkflowRunRow } from './types.js';
|
|
2
|
+
import type { TrajectoryConfig, WorkflowRunRow } from './types.js';
|
|
3
3
|
import { WorkflowRunner, type WorkflowEventListener, type VariableContext } from './runner.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -16,6 +16,8 @@ export interface RunWorkflowOptions {
|
|
|
16
16
|
relay?: AgentRelayOptions;
|
|
17
17
|
/** Progress callback for workflow events. */
|
|
18
18
|
onEvent?: WorkflowEventListener;
|
|
19
|
+
/** Override trajectory config. Set to false to disable trajectory recording. */
|
|
20
|
+
trajectories?: TrajectoryConfig | false;
|
|
19
21
|
}
|
|
20
22
|
|
|
21
23
|
/**
|
|
@@ -43,5 +45,11 @@ export async function runWorkflow(
|
|
|
43
45
|
}
|
|
44
46
|
|
|
45
47
|
const config = await runner.parseYamlFile(yamlPath);
|
|
48
|
+
|
|
49
|
+
// Allow programmatic trajectory override
|
|
50
|
+
if (options.trajectories !== undefined) {
|
|
51
|
+
config.trajectories = options.trajectories;
|
|
52
|
+
}
|
|
53
|
+
|
|
46
54
|
return runner.execute(config, options.workflow, options.vars);
|
|
47
55
|
}
|
|
@@ -27,12 +27,14 @@ import type {
|
|
|
27
27
|
WorkflowStepRow,
|
|
28
28
|
WorkflowStepStatus,
|
|
29
29
|
} from './types.js';
|
|
30
|
+
import { WorkflowTrajectory, type StepOutcome } from './trajectory.js';
|
|
30
31
|
|
|
31
32
|
// ── AgentRelay SDK imports ──────────────────────────────────────────────────
|
|
32
33
|
|
|
33
34
|
// Import from sub-paths to avoid pulling in the full @relaycast/sdk dependency.
|
|
34
35
|
import { AgentRelay } from '../relay.js';
|
|
35
36
|
import type { Agent, AgentRelayOptions } from '../relay.js';
|
|
37
|
+
import { RelaycastApi } from '../relaycast.js';
|
|
36
38
|
|
|
37
39
|
// ── DB adapter interface ────────────────────────────────────────────────────
|
|
38
40
|
|
|
@@ -95,6 +97,9 @@ export class WorkflowRunner {
|
|
|
95
97
|
private readonly summaryDir: string;
|
|
96
98
|
|
|
97
99
|
private relay?: AgentRelay;
|
|
100
|
+
private relaycastApi?: RelaycastApi;
|
|
101
|
+
private channel?: string;
|
|
102
|
+
private trajectory?: WorkflowTrajectory;
|
|
98
103
|
private abortController?: AbortController;
|
|
99
104
|
private paused = false;
|
|
100
105
|
private pauseResolver?: () => void;
|
|
@@ -120,18 +125,24 @@ export class WorkflowRunner {
|
|
|
120
125
|
private async ensureRelaycastApiKey(channel: string): Promise<void> {
|
|
121
126
|
if (process.env.RELAY_API_KEY) return;
|
|
122
127
|
|
|
123
|
-
// Check cached credentials
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
128
|
+
// Check cached credentials — prefer per-project cache (written by the local
|
|
129
|
+
// relay daemon) over the legacy global cache so concurrent workflows from
|
|
130
|
+
// different repos never stomp each other's credentials.
|
|
131
|
+
const projectCachePath = path.join(this.cwd, '.agent-relay', 'relaycast.json');
|
|
132
|
+
const globalCachePath = path.join(homedir(), '.agent-relay', 'relaycast.json');
|
|
133
|
+
|
|
134
|
+
for (const cachePath of [projectCachePath, globalCachePath]) {
|
|
135
|
+
if (existsSync(cachePath)) {
|
|
136
|
+
try {
|
|
137
|
+
const raw = await readFile(cachePath, 'utf-8');
|
|
138
|
+
const creds = JSON.parse(raw);
|
|
139
|
+
if (creds.api_key) {
|
|
140
|
+
process.env.RELAY_API_KEY = creds.api_key;
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
} catch {
|
|
144
|
+
// Cache corrupt — try next path
|
|
132
145
|
}
|
|
133
|
-
} catch {
|
|
134
|
-
// Cache corrupt — fall through to auto-create
|
|
135
146
|
}
|
|
136
147
|
}
|
|
137
148
|
|
|
@@ -159,11 +170,12 @@ export class WorkflowRunner {
|
|
|
159
170
|
throw new Error('Relaycast workspace response missing api_key');
|
|
160
171
|
}
|
|
161
172
|
|
|
162
|
-
// Cache credentials
|
|
163
|
-
|
|
173
|
+
// Cache credentials in the per-project directory so concurrent workflows
|
|
174
|
+
// from different repos each get their own workspace credentials.
|
|
175
|
+
const cacheDir = path.dirname(projectCachePath);
|
|
164
176
|
await mkdir(cacheDir, { recursive: true, mode: 0o700 });
|
|
165
177
|
await writeFile(
|
|
166
|
-
|
|
178
|
+
projectCachePath,
|
|
167
179
|
JSON.stringify({
|
|
168
180
|
workspace_id: workspaceId,
|
|
169
181
|
api_key: apiKey,
|
|
@@ -468,11 +480,19 @@ export class WorkflowRunner {
|
|
|
468
480
|
this.abortController = new AbortController();
|
|
469
481
|
this.paused = false;
|
|
470
482
|
|
|
483
|
+
// Initialize trajectory recording
|
|
484
|
+
this.trajectory = new WorkflowTrajectory(resolved.trajectories, runId, this.cwd);
|
|
485
|
+
|
|
471
486
|
try {
|
|
472
487
|
await this.updateRunStatus(runId, 'running');
|
|
473
488
|
this.emit({ type: 'run:started', runId });
|
|
474
489
|
|
|
490
|
+
// Analyze DAG for trajectory context
|
|
491
|
+
const dagInfo = this.analyzeDAG(workflow.steps);
|
|
492
|
+
await this.trajectory.start(workflow.name, workflow.steps.length, dagInfo);
|
|
493
|
+
|
|
475
494
|
const channel = resolved.swarm.channel ?? 'general';
|
|
495
|
+
this.channel = channel;
|
|
476
496
|
await this.ensureRelaycastApiKey(channel);
|
|
477
497
|
|
|
478
498
|
this.relay = new AgentRelay({
|
|
@@ -480,6 +500,17 @@ export class WorkflowRunner {
|
|
|
480
500
|
channels: [channel],
|
|
481
501
|
});
|
|
482
502
|
|
|
503
|
+
// Create the dedicated workflow channel and join it
|
|
504
|
+
this.relaycastApi = new RelaycastApi({
|
|
505
|
+
agentName: 'WorkflowRunner',
|
|
506
|
+
cachePath: path.join(this.cwd, '.agent-relay', 'relaycast.json'),
|
|
507
|
+
});
|
|
508
|
+
await this.relaycastApi.createChannel(channel, workflow.description);
|
|
509
|
+
await this.relaycastApi.joinChannel(channel);
|
|
510
|
+
this.postToChannel(
|
|
511
|
+
`Workflow **${workflow.name}** started — ${workflow.steps.length} steps, pattern: ${resolved.swarm.pattern}`,
|
|
512
|
+
);
|
|
513
|
+
|
|
483
514
|
const agentMap = new Map<string, AgentDefinition>();
|
|
484
515
|
for (const agent of resolved.agents) {
|
|
485
516
|
agentMap.set(agent.name, agent);
|
|
@@ -501,11 +532,29 @@ export class WorkflowRunner {
|
|
|
501
532
|
if (allCompleted) {
|
|
502
533
|
await this.updateRunStatus(runId, 'completed');
|
|
503
534
|
this.emit({ type: 'run:completed', runId });
|
|
535
|
+
|
|
536
|
+
// Complete trajectory with summary
|
|
537
|
+
const outcomes = this.collectOutcomes(stepStates, workflow.steps);
|
|
538
|
+
const summary = this.trajectory.buildRunSummary(outcomes);
|
|
539
|
+
const confidence = this.trajectory.computeConfidence(outcomes);
|
|
540
|
+
await this.trajectory.complete(summary, confidence, {
|
|
541
|
+
learnings: this.trajectory.extractLearnings(outcomes),
|
|
542
|
+
challenges: this.trajectory.extractChallenges(outcomes),
|
|
543
|
+
});
|
|
544
|
+
|
|
545
|
+
// Post rich completion report to channel
|
|
546
|
+
this.postCompletionReport(workflow.name, outcomes, summary, confidence);
|
|
504
547
|
} else {
|
|
505
548
|
const failedStep = [...stepStates.values()].find((s) => s.row.status === 'failed');
|
|
506
549
|
const errorMsg = failedStep?.row.error ?? 'One or more steps failed';
|
|
507
550
|
await this.updateRunStatus(runId, 'failed', errorMsg);
|
|
508
551
|
this.emit({ type: 'run:failed', runId, error: errorMsg });
|
|
552
|
+
|
|
553
|
+
const outcomes = this.collectOutcomes(stepStates, workflow.steps);
|
|
554
|
+
this.postFailureReport(workflow.name, outcomes, errorMsg);
|
|
555
|
+
|
|
556
|
+
// Abandon trajectory on failure
|
|
557
|
+
await this.trajectory.abandon(errorMsg);
|
|
509
558
|
}
|
|
510
559
|
} catch (err) {
|
|
511
560
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
@@ -514,12 +563,19 @@ export class WorkflowRunner {
|
|
|
514
563
|
|
|
515
564
|
if (status === 'cancelled') {
|
|
516
565
|
this.emit({ type: 'run:cancelled', runId });
|
|
566
|
+
this.postToChannel(`Workflow **${workflow.name}** cancelled`);
|
|
567
|
+
await this.trajectory.abandon('Cancelled by user');
|
|
517
568
|
} else {
|
|
518
569
|
this.emit({ type: 'run:failed', runId, error: errorMsg });
|
|
570
|
+
this.postToChannel(`Workflow failed: ${errorMsg}`);
|
|
571
|
+
await this.trajectory.abandon(errorMsg);
|
|
519
572
|
}
|
|
520
573
|
} finally {
|
|
521
574
|
await this.relay?.shutdown();
|
|
522
575
|
this.relay = undefined;
|
|
576
|
+
this.relaycastApi = undefined;
|
|
577
|
+
this.channel = undefined;
|
|
578
|
+
this.trajectory = undefined;
|
|
523
579
|
this.abortController = undefined;
|
|
524
580
|
}
|
|
525
581
|
|
|
@@ -567,10 +623,21 @@ export class WorkflowRunner {
|
|
|
567
623
|
this.abortController = new AbortController();
|
|
568
624
|
this.paused = false;
|
|
569
625
|
|
|
626
|
+
// Initialize trajectory for resumed run
|
|
627
|
+
this.trajectory = new WorkflowTrajectory(config.trajectories, runId, this.cwd);
|
|
628
|
+
|
|
570
629
|
try {
|
|
571
630
|
await this.updateRunStatus(runId, 'running');
|
|
572
631
|
|
|
632
|
+
const pendingCount = [...stepStates.values()].filter((s) => s.row.status === 'pending').length;
|
|
633
|
+
await this.trajectory.start(
|
|
634
|
+
workflow.name,
|
|
635
|
+
workflow.steps.length,
|
|
636
|
+
`Resumed run: ${pendingCount} pending steps of ${workflow.steps.length} total`,
|
|
637
|
+
);
|
|
638
|
+
|
|
573
639
|
const resumeChannel = config.swarm.channel ?? 'general';
|
|
640
|
+
this.channel = resumeChannel;
|
|
574
641
|
await this.ensureRelaycastApiKey(resumeChannel);
|
|
575
642
|
|
|
576
643
|
this.relay = new AgentRelay({
|
|
@@ -578,6 +645,17 @@ export class WorkflowRunner {
|
|
|
578
645
|
channels: [resumeChannel],
|
|
579
646
|
});
|
|
580
647
|
|
|
648
|
+
// Ensure channel exists and join it for resumed runs
|
|
649
|
+
this.relaycastApi = new RelaycastApi({
|
|
650
|
+
agentName: 'WorkflowRunner',
|
|
651
|
+
cachePath: path.join(this.cwd, '.agent-relay', 'relaycast.json'),
|
|
652
|
+
});
|
|
653
|
+
await this.relaycastApi.createChannel(resumeChannel);
|
|
654
|
+
await this.relaycastApi.joinChannel(resumeChannel);
|
|
655
|
+
this.postToChannel(
|
|
656
|
+
`Workflow **${workflow.name}** resumed — ${pendingCount} pending steps`,
|
|
657
|
+
);
|
|
658
|
+
|
|
581
659
|
const agentMap = new Map<string, AgentDefinition>();
|
|
582
660
|
for (const agent of config.agents) {
|
|
583
661
|
agentMap.set(agent.name, agent);
|
|
@@ -592,19 +670,38 @@ export class WorkflowRunner {
|
|
|
592
670
|
if (allCompleted) {
|
|
593
671
|
await this.updateRunStatus(runId, 'completed');
|
|
594
672
|
this.emit({ type: 'run:completed', runId });
|
|
673
|
+
|
|
674
|
+
const outcomes = this.collectOutcomes(stepStates, workflow.steps);
|
|
675
|
+
const summary = this.trajectory.buildRunSummary(outcomes);
|
|
676
|
+
const confidence = this.trajectory.computeConfidence(outcomes);
|
|
677
|
+
await this.trajectory.complete(summary, confidence, {
|
|
678
|
+
learnings: this.trajectory.extractLearnings(outcomes),
|
|
679
|
+
challenges: this.trajectory.extractChallenges(outcomes),
|
|
680
|
+
});
|
|
681
|
+
|
|
682
|
+
this.postCompletionReport(workflow.name, outcomes, summary, confidence);
|
|
595
683
|
} else {
|
|
596
684
|
const failedStep = [...stepStates.values()].find((s) => s.row.status === 'failed');
|
|
597
685
|
const errorMsg = failedStep?.row.error ?? 'One or more steps failed';
|
|
598
686
|
await this.updateRunStatus(runId, 'failed', errorMsg);
|
|
599
687
|
this.emit({ type: 'run:failed', runId, error: errorMsg });
|
|
688
|
+
|
|
689
|
+
const outcomes = this.collectOutcomes(stepStates, workflow.steps);
|
|
690
|
+
this.postFailureReport(workflow.name, outcomes, errorMsg);
|
|
691
|
+
await this.trajectory.abandon(errorMsg);
|
|
600
692
|
}
|
|
601
693
|
} catch (err) {
|
|
602
694
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
603
695
|
await this.updateRunStatus(runId, 'failed', errorMsg);
|
|
604
696
|
this.emit({ type: 'run:failed', runId, error: errorMsg });
|
|
697
|
+
this.postToChannel(`Workflow failed: ${errorMsg}`);
|
|
698
|
+
await this.trajectory.abandon(errorMsg);
|
|
605
699
|
} finally {
|
|
606
700
|
await this.relay?.shutdown();
|
|
607
701
|
this.relay = undefined;
|
|
702
|
+
this.relaycastApi = undefined;
|
|
703
|
+
this.channel = undefined;
|
|
704
|
+
this.trajectory = undefined;
|
|
608
705
|
this.abortController = undefined;
|
|
609
706
|
}
|
|
610
707
|
|
|
@@ -660,23 +757,40 @@ export class WorkflowRunner {
|
|
|
660
757
|
break;
|
|
661
758
|
}
|
|
662
759
|
|
|
760
|
+
// Begin a track chapter if multiple parallel steps are starting
|
|
761
|
+
if (readySteps.length > 1 && this.trajectory) {
|
|
762
|
+
const trackNames = readySteps.map((s) => s.name).join(', ');
|
|
763
|
+
await this.trajectory.beginTrack(trackNames);
|
|
764
|
+
}
|
|
765
|
+
|
|
663
766
|
const results = await Promise.allSettled(
|
|
664
767
|
readySteps.map((step) =>
|
|
665
768
|
this.executeStep(step, stepStates, agentMap, errorHandling, runId),
|
|
666
769
|
),
|
|
667
770
|
);
|
|
668
771
|
|
|
772
|
+
// Collect outcomes from this batch for convergence reflection
|
|
773
|
+
const batchOutcomes: StepOutcome[] = [];
|
|
774
|
+
|
|
669
775
|
for (let i = 0; i < results.length; i++) {
|
|
670
776
|
const result = results[i];
|
|
671
777
|
const step = readySteps[i];
|
|
778
|
+
const state = stepStates.get(step.name);
|
|
672
779
|
|
|
673
780
|
if (result.status === 'rejected') {
|
|
674
781
|
const error = result.reason instanceof Error ? result.reason.message : String(result.reason);
|
|
675
|
-
const state = stepStates.get(step.name);
|
|
676
782
|
if (state && state.row.status !== 'failed') {
|
|
677
783
|
await this.markStepFailed(state, error, runId);
|
|
678
784
|
}
|
|
679
785
|
|
|
786
|
+
batchOutcomes.push({
|
|
787
|
+
name: step.name,
|
|
788
|
+
agent: step.agent,
|
|
789
|
+
status: 'failed',
|
|
790
|
+
attempts: (state?.row.retryCount ?? 0) + 1,
|
|
791
|
+
error,
|
|
792
|
+
});
|
|
793
|
+
|
|
680
794
|
if (strategy === 'fail-fast') {
|
|
681
795
|
// Mark all pending downstream steps as skipped
|
|
682
796
|
await this.markDownstreamSkipped(step.name, workflow.steps, stepStates, runId);
|
|
@@ -686,8 +800,33 @@ export class WorkflowRunner {
|
|
|
686
800
|
if (strategy === 'continue') {
|
|
687
801
|
await this.markDownstreamSkipped(step.name, workflow.steps, stepStates, runId);
|
|
688
802
|
}
|
|
803
|
+
} else {
|
|
804
|
+
batchOutcomes.push({
|
|
805
|
+
name: step.name,
|
|
806
|
+
agent: step.agent,
|
|
807
|
+
status: state?.row.status === 'completed' ? 'completed' : 'failed',
|
|
808
|
+
attempts: (state?.row.retryCount ?? 0) + 1,
|
|
809
|
+
output: state?.row.output,
|
|
810
|
+
verificationPassed: state?.row.status === 'completed' && step.verification !== undefined,
|
|
811
|
+
});
|
|
689
812
|
}
|
|
690
813
|
}
|
|
814
|
+
|
|
815
|
+
// Reflect at convergence when a parallel batch completes
|
|
816
|
+
if (readySteps.length > 1 && this.trajectory?.shouldReflectOnConverge()) {
|
|
817
|
+
const label = readySteps.map((s) => s.name).join(' + ');
|
|
818
|
+
// Find steps that this batch unblocks
|
|
819
|
+
const completedNames = new Set(batchOutcomes.filter((o) => o.status === 'completed').map((o) => o.name));
|
|
820
|
+
const unblocked = workflow.steps
|
|
821
|
+
.filter((s) => s.dependsOn?.some((dep) => completedNames.has(dep)))
|
|
822
|
+
.filter((s) => {
|
|
823
|
+
const st = stepStates.get(s.name);
|
|
824
|
+
return st && st.row.status === 'pending';
|
|
825
|
+
})
|
|
826
|
+
.map((s) => s.name);
|
|
827
|
+
|
|
828
|
+
await this.trajectory.synthesizeAndReflect(label, batchOutcomes, unblocked.length > 0 ? unblocked : undefined);
|
|
829
|
+
}
|
|
691
830
|
}
|
|
692
831
|
}
|
|
693
832
|
|
|
@@ -733,11 +872,13 @@ export class WorkflowRunner {
|
|
|
733
872
|
|
|
734
873
|
if (attempt > 0) {
|
|
735
874
|
this.emit({ type: 'step:retrying', runId, stepName: step.name, attempt });
|
|
875
|
+
this.postToChannel(`**[${step.name}]** Retrying (attempt ${attempt + 1}/${maxRetries + 1})`);
|
|
736
876
|
state.row.retryCount = attempt;
|
|
737
877
|
await this.db.updateStep(state.row.id, {
|
|
738
878
|
retryCount: attempt,
|
|
739
879
|
updatedAt: new Date().toISOString(),
|
|
740
880
|
});
|
|
881
|
+
await this.trajectory?.stepRetrying(step, attempt, maxRetries);
|
|
741
882
|
await this.delay(retryDelay);
|
|
742
883
|
}
|
|
743
884
|
|
|
@@ -751,6 +892,8 @@ export class WorkflowRunner {
|
|
|
751
892
|
updatedAt: new Date().toISOString(),
|
|
752
893
|
});
|
|
753
894
|
this.emit({ type: 'step:started', runId, stepName: step.name });
|
|
895
|
+
this.postToChannel(`**[${step.name}]** Started (agent: ${agentDef.name})`);
|
|
896
|
+
await this.trajectory?.stepStarted(step, agentDef.name);
|
|
754
897
|
|
|
755
898
|
// Resolve step-output variables (e.g. {{steps.plan.output}}) at execution time
|
|
756
899
|
const stepOutputContext = this.buildStepOutputContext(stepStates);
|
|
@@ -776,13 +919,24 @@ export class WorkflowRunner {
|
|
|
776
919
|
updatedAt: new Date().toISOString(),
|
|
777
920
|
});
|
|
778
921
|
this.emit({ type: 'step:completed', runId, stepName: step.name, output });
|
|
922
|
+
this.postToChannel(
|
|
923
|
+
`**[${step.name}]** Completed\n${output.slice(0, 500)}${output.length > 500 ? '\n...(truncated)' : ''}`,
|
|
924
|
+
);
|
|
925
|
+
await this.trajectory?.stepCompleted(step, output, attempt + 1);
|
|
779
926
|
return;
|
|
780
927
|
} catch (err) {
|
|
781
928
|
lastError = err instanceof Error ? err.message : String(err);
|
|
782
929
|
}
|
|
783
930
|
}
|
|
784
931
|
|
|
785
|
-
// All retries exhausted —
|
|
932
|
+
// All retries exhausted — record decision and mark failed
|
|
933
|
+
await this.trajectory?.stepFailed(step, lastError ?? 'Unknown error', maxRetries + 1, maxRetries);
|
|
934
|
+
await this.trajectory?.decide(
|
|
935
|
+
`How to handle ${step.name} failure`,
|
|
936
|
+
'exhausted',
|
|
937
|
+
`All ${maxRetries + 1} attempts failed: ${lastError ?? 'Unknown error'}`,
|
|
938
|
+
);
|
|
939
|
+
this.postToChannel(`**[${step.name}]** Failed: ${lastError ?? 'Unknown error'}`);
|
|
786
940
|
await this.markStepFailed(state, lastError ?? 'Unknown error', runId);
|
|
787
941
|
throw new Error(`Step "${step.name}" failed after ${maxRetries} retries: ${lastError ?? 'Unknown error'}`);
|
|
788
942
|
}
|
|
@@ -796,33 +950,85 @@ export class WorkflowRunner {
|
|
|
796
950
|
throw new Error('AgentRelay not initialized');
|
|
797
951
|
}
|
|
798
952
|
|
|
953
|
+
// Append self-termination instructions to the task
|
|
954
|
+
const agentName = `${step.name}-${this.generateShortId()}`;
|
|
955
|
+
const taskWithExit = step.task + '\n\n---\n' +
|
|
956
|
+
'IMPORTANT: When you have fully completed this task, you MUST self-terminate by calling ' +
|
|
957
|
+
`the MCP tool: remove_agent(name="${agentName}", reason="Task completed"). ` +
|
|
958
|
+
'Do not wait for further input — release yourself immediately after finishing.';
|
|
959
|
+
|
|
960
|
+
const agentChannels = this.channel ? [this.channel] : agentDef.channels;
|
|
961
|
+
|
|
799
962
|
const agent = await this.relay.spawnPty({
|
|
800
|
-
name:
|
|
963
|
+
name: agentName,
|
|
801
964
|
cli: agentDef.cli,
|
|
802
965
|
args: agentDef.constraints?.model ? ['--model', agentDef.constraints.model] : [],
|
|
803
|
-
channels:
|
|
966
|
+
channels: agentChannels,
|
|
967
|
+
task: taskWithExit,
|
|
804
968
|
idleThresholdSecs: agentDef.constraints?.idleThresholdSecs,
|
|
805
969
|
});
|
|
806
970
|
|
|
807
|
-
//
|
|
808
|
-
|
|
809
|
-
|
|
971
|
+
// Register the spawned agent in Relaycast for observability + start heartbeat
|
|
972
|
+
let stopHeartbeat: (() => void) | undefined;
|
|
973
|
+
if (this.relaycastApi) {
|
|
974
|
+
const agentClient = await this.relaycastApi.registerExternalAgent(
|
|
975
|
+
agent.name,
|
|
976
|
+
`Workflow agent for step "${step.name}" (${agentDef.cli})`,
|
|
977
|
+
).catch(() => null);
|
|
978
|
+
|
|
979
|
+
// Keep the agent online in the dashboard while it's working
|
|
980
|
+
if (agentClient) {
|
|
981
|
+
stopHeartbeat = this.relaycastApi.startHeartbeat(agentClient);
|
|
982
|
+
}
|
|
983
|
+
}
|
|
810
984
|
|
|
811
|
-
//
|
|
985
|
+
// Invite the spawned agent to the workflow channel
|
|
986
|
+
if (this.channel && this.relaycastApi) {
|
|
987
|
+
await this.relaycastApi.inviteToChannel(this.channel, agent.name).catch(() => {});
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
// Post task assignment to channel for observability
|
|
991
|
+
const taskPreview = step.task.slice(0, 500) + (step.task.length > 500 ? '...' : '');
|
|
992
|
+
this.postToChannel(`**[${step.name}]** Assigned to \`${agent.name}\`:\n${taskPreview}`);
|
|
993
|
+
|
|
994
|
+
// Task was already delivered as initial_task via spawnPty above.
|
|
995
|
+
|
|
996
|
+
// Wait for agent to exit (self-termination via /exit)
|
|
812
997
|
const exitResult = await agent.waitForExit(timeoutMs);
|
|
813
998
|
|
|
999
|
+
// Stop heartbeat now that agent has exited
|
|
1000
|
+
stopHeartbeat?.();
|
|
1001
|
+
|
|
814
1002
|
if (exitResult === 'timeout') {
|
|
815
|
-
|
|
816
|
-
|
|
1003
|
+
// Safety net: check if the verification file exists before giving up.
|
|
1004
|
+
// The agent may have completed work but failed to /exit.
|
|
1005
|
+
if (step.verification?.type === 'file_exists') {
|
|
1006
|
+
const verifyPath = path.resolve(this.cwd, step.verification.value);
|
|
1007
|
+
if (existsSync(verifyPath)) {
|
|
1008
|
+
this.postToChannel(
|
|
1009
|
+
`**[${step.name}]** Agent idle after completing work — releasing`,
|
|
1010
|
+
);
|
|
1011
|
+
await agent.release();
|
|
1012
|
+
// Fall through to read output below
|
|
1013
|
+
} else {
|
|
1014
|
+
await agent.release();
|
|
1015
|
+
throw new Error(`Step "${step.name}" timed out after ${timeoutMs}ms`);
|
|
1016
|
+
}
|
|
1017
|
+
} else {
|
|
1018
|
+
await agent.release();
|
|
1019
|
+
throw new Error(`Step "${step.name}" timed out after ${timeoutMs}ms`);
|
|
1020
|
+
}
|
|
817
1021
|
}
|
|
818
1022
|
|
|
819
1023
|
// Read output from summary file if it exists
|
|
820
1024
|
const summaryPath = path.join(this.summaryDir, `${step.name}.md`);
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
1025
|
+
const output = existsSync(summaryPath)
|
|
1026
|
+
? await readFile(summaryPath, 'utf-8')
|
|
1027
|
+
: exitResult === 'timeout'
|
|
1028
|
+
? 'Agent completed (released after idle timeout)'
|
|
1029
|
+
: `Agent exited (${exitResult})`;
|
|
824
1030
|
|
|
825
|
-
return
|
|
1031
|
+
return output;
|
|
826
1032
|
}
|
|
827
1033
|
|
|
828
1034
|
// ── Verification ────────────────────────────────────────────────────────
|
|
@@ -912,6 +1118,13 @@ export class WorkflowRunner {
|
|
|
912
1118
|
updatedAt: new Date().toISOString(),
|
|
913
1119
|
});
|
|
914
1120
|
this.emit({ type: 'step:skipped', runId, stepName: step.name });
|
|
1121
|
+
this.postToChannel(`**[${step.name}]** Skipped — upstream dependency "${current}" failed`);
|
|
1122
|
+
await this.trajectory?.stepSkipped(step, `Upstream dependency "${current}" failed`);
|
|
1123
|
+
await this.trajectory?.decide(
|
|
1124
|
+
`Whether to skip ${step.name}`,
|
|
1125
|
+
'skip',
|
|
1126
|
+
`Upstream dependency "${current}" failed`,
|
|
1127
|
+
);
|
|
915
1128
|
queue.push(step.name);
|
|
916
1129
|
}
|
|
917
1130
|
}
|
|
@@ -938,6 +1151,114 @@ export class WorkflowRunner {
|
|
|
938
1151
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
939
1152
|
}
|
|
940
1153
|
|
|
1154
|
+
// ── Channel messaging ──────────────────────────────────────────────────
|
|
1155
|
+
|
|
1156
|
+
/** Post a message to the workflow channel. Fire-and-forget — never throws or blocks. */
|
|
1157
|
+
private postToChannel(text: string): void {
|
|
1158
|
+
if (!this.relaycastApi || !this.channel) return;
|
|
1159
|
+
this.relaycastApi.sendToChannel(this.channel, text).catch(() => {
|
|
1160
|
+
// Non-critical — don't break workflow execution
|
|
1161
|
+
});
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
/** Post a rich completion report to the channel. */
|
|
1165
|
+
private postCompletionReport(
|
|
1166
|
+
workflowName: string,
|
|
1167
|
+
outcomes: StepOutcome[],
|
|
1168
|
+
summary: string,
|
|
1169
|
+
confidence: number,
|
|
1170
|
+
): void {
|
|
1171
|
+
const completed = outcomes.filter((o) => o.status === 'completed');
|
|
1172
|
+
const skipped = outcomes.filter((o) => o.status === 'skipped');
|
|
1173
|
+
const retried = outcomes.filter((o) => o.attempts > 1);
|
|
1174
|
+
|
|
1175
|
+
const lines: string[] = [
|
|
1176
|
+
`## Workflow **${workflowName}** — Complete`,
|
|
1177
|
+
'',
|
|
1178
|
+
summary,
|
|
1179
|
+
`Confidence: ${Math.round(confidence * 100)}%`,
|
|
1180
|
+
'',
|
|
1181
|
+
'### Steps',
|
|
1182
|
+
...completed.map((o) =>
|
|
1183
|
+
`- **${o.name}** (${o.agent}) — passed${o.verificationPassed ? ' (verified)' : ''}${o.attempts > 1 ? ` after ${o.attempts} attempts` : ''}`,
|
|
1184
|
+
),
|
|
1185
|
+
...skipped.map((o) => `- **${o.name}** — skipped`),
|
|
1186
|
+
];
|
|
1187
|
+
|
|
1188
|
+
if (retried.length > 0) {
|
|
1189
|
+
lines.push('', '### Retries');
|
|
1190
|
+
for (const o of retried) {
|
|
1191
|
+
lines.push(`- ${o.name}: ${o.attempts} attempts`);
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
this.postToChannel(lines.join('\n'));
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
/** Post a failure report to the channel. */
|
|
1199
|
+
private postFailureReport(
|
|
1200
|
+
workflowName: string,
|
|
1201
|
+
outcomes: StepOutcome[],
|
|
1202
|
+
errorMsg: string,
|
|
1203
|
+
): void {
|
|
1204
|
+
const completed = outcomes.filter((o) => o.status === 'completed');
|
|
1205
|
+
const failed = outcomes.filter((o) => o.status === 'failed');
|
|
1206
|
+
const skipped = outcomes.filter((o) => o.status === 'skipped');
|
|
1207
|
+
|
|
1208
|
+
const lines: string[] = [
|
|
1209
|
+
`## Workflow **${workflowName}** — Failed`,
|
|
1210
|
+
'',
|
|
1211
|
+
`${completed.length}/${outcomes.length} steps passed. Error: ${errorMsg}`,
|
|
1212
|
+
'',
|
|
1213
|
+
'### Steps',
|
|
1214
|
+
...completed.map((o) => `- **${o.name}** (${o.agent}) — passed`),
|
|
1215
|
+
...failed.map((o) => `- **${o.name}** (${o.agent}) — FAILED: ${o.error ?? 'unknown'}`),
|
|
1216
|
+
...skipped.map((o) => `- **${o.name}** — skipped`),
|
|
1217
|
+
];
|
|
1218
|
+
|
|
1219
|
+
this.postToChannel(lines.join('\n'));
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
// ── Trajectory helpers ────────────────────────────────────────────────
|
|
1223
|
+
|
|
1224
|
+
/** Analyze DAG structure for trajectory context. */
|
|
1225
|
+
private analyzeDAG(steps: WorkflowStep[]): string {
|
|
1226
|
+
const roots = steps.filter((s) => !s.dependsOn?.length);
|
|
1227
|
+
const withDeps = steps.filter((s) => s.dependsOn?.length);
|
|
1228
|
+
|
|
1229
|
+
const parts = [`Parsed ${steps.length} steps`];
|
|
1230
|
+
if (roots.length > 1) {
|
|
1231
|
+
parts.push(`${roots.length} parallel tracks`);
|
|
1232
|
+
}
|
|
1233
|
+
if (withDeps.length > 0) {
|
|
1234
|
+
parts.push(`${withDeps.length} dependent steps`);
|
|
1235
|
+
}
|
|
1236
|
+
parts.push('DAG validated, no cycles');
|
|
1237
|
+
return parts.join(', ');
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
/** Collect step outcomes for trajectory synthesis. */
|
|
1241
|
+
private collectOutcomes(stepStates: Map<string, StepState>, steps?: WorkflowStep[]): StepOutcome[] {
|
|
1242
|
+
const stepsWithVerification = new Set(
|
|
1243
|
+
steps?.filter((s) => s.verification).map((s) => s.name) ?? [],
|
|
1244
|
+
);
|
|
1245
|
+
const outcomes: StepOutcome[] = [];
|
|
1246
|
+
for (const [name, state] of stepStates) {
|
|
1247
|
+
outcomes.push({
|
|
1248
|
+
name,
|
|
1249
|
+
agent: state.row.agentName,
|
|
1250
|
+
status: state.row.status === 'completed' ? 'completed'
|
|
1251
|
+
: state.row.status === 'skipped' ? 'skipped'
|
|
1252
|
+
: 'failed',
|
|
1253
|
+
attempts: state.row.retryCount + 1,
|
|
1254
|
+
output: state.row.output,
|
|
1255
|
+
error: state.row.error,
|
|
1256
|
+
verificationPassed: state.row.status === 'completed' && stepsWithVerification.has(name),
|
|
1257
|
+
});
|
|
1258
|
+
}
|
|
1259
|
+
return outcomes;
|
|
1260
|
+
}
|
|
1261
|
+
|
|
941
1262
|
// ── ID generation ─────────────────────────────────────────────────────
|
|
942
1263
|
|
|
943
1264
|
private generateId(): string {
|