@defai.digital/agent-parallel 13.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,595 @@
1
+ /**
2
+ * Parallel Agent Orchestrator
3
+ *
4
+ * Main orchestration logic for executing multiple agents in parallel
5
+ * with DAG-based dependency management.
6
+ *
7
+ * Invariants:
8
+ * - INV-APE-001: Concurrent agents MUST NOT exceed maxConcurrentAgents
9
+ * - INV-APE-002: Dependencies honored (DAG ordering)
10
+ * - INV-APE-003: Shared context immutable during execution
11
+ * - INV-APE-004: Result aggregation follows configured strategy
12
+ * - INV-APE-005: Timeout enforced per-agent independently
13
+ */
14
+
15
+ import {
16
+ type AgentParallelTask,
17
+ type AgentParallelTaskResult,
18
+ type AgentParallelGroupResult,
19
+ type AgentParallelExecutionConfig,
20
+ type ExecutionPlan,
21
+ type ExecutionLayer,
22
+ createDefaultAgentParallelExecutionConfig,
23
+ ParallelExecutionErrorCodes,
24
+ } from '@defai.digital/contracts';
25
+
26
+ import type {
27
+ AgentParallelOrchestrator,
28
+ AgentParallelOrchestratorOptions,
29
+ ParallelProgressEvent,
30
+ TaskLayer,
31
+ } from './types.js';
32
+
33
+ import { createDAGAnalyzer, DAGAnalysisError } from './dag-analyzer.js';
34
+ import { createContextManager } from './context-manager.js';
35
+ import { createResultAggregator } from './result-aggregator.js';
36
+
37
+ /**
38
+ * Error thrown during parallel execution
39
+ */
40
+ export class ParallelExecutionError extends Error {
41
+ constructor(
42
+ public readonly code: string,
43
+ message: string,
44
+ public readonly taskId?: string
45
+ ) {
46
+ super(message);
47
+ this.name = 'ParallelExecutionError';
48
+ }
49
+ }
50
+
51
+ /**
52
+ * Creates a parallel agent orchestrator
53
+ */
54
+ export function createAgentParallelOrchestrator(
55
+ options: AgentParallelOrchestratorOptions
56
+ ): AgentParallelOrchestrator {
57
+ const { agentExecutor, defaultConfig, onProgress } = options;
58
+
59
+ const dagAnalyzer = createDAGAnalyzer();
60
+ const contextManager = createContextManager();
61
+ const resultAggregator = createResultAggregator();
62
+
63
+ const config: AgentParallelExecutionConfig = {
64
+ ...createDefaultAgentParallelExecutionConfig(),
65
+ ...defaultConfig,
66
+ };
67
+
68
+ // Track active executions for proper cancellation scoping
69
+ // Each execution has its own cancelled state to prevent cross-execution interference
70
+ const activeExecutions = new Map<string, { cancelled: boolean }>();
71
+ let currentGroupId: string | null = null;
72
+
73
+ /**
74
+ * Check if a specific execution is cancelled
75
+ */
76
+ function isCancelled(groupId: string): boolean {
77
+ return activeExecutions.get(groupId)?.cancelled ?? false;
78
+ }
79
+
80
+ /**
81
+ * Mark an execution as cancelled
82
+ */
83
+ function setCancelled(groupId: string): void {
84
+ const execution = activeExecutions.get(groupId);
85
+ if (execution) {
86
+ execution.cancelled = true;
87
+ }
88
+ }
89
+
90
+ /**
91
+ * Emit progress event
92
+ */
93
+ function emitProgress(event: Omit<ParallelProgressEvent, 'timestamp' | 'groupId'>, groupId?: string): void {
94
+ const gId = groupId ?? currentGroupId;
95
+ if (onProgress && gId) {
96
+ onProgress({
97
+ ...event,
98
+ timestamp: new Date().toISOString(),
99
+ groupId: gId,
100
+ });
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Execute a single task with timeout
106
+ * INV-APE-005: Timeout enforced independently
107
+ */
108
+ async function executeTask(
109
+ task: AgentParallelTask,
110
+ sharedContext: Record<string, unknown> | undefined,
111
+ taskConfig: AgentParallelExecutionConfig,
112
+ layerIndex: number,
113
+ groupId: string
114
+ ): Promise<AgentParallelTaskResult> {
115
+ const startTime = Date.now();
116
+ const startedAt = new Date().toISOString();
117
+
118
+ emitProgress({
119
+ type: 'task.started',
120
+ taskId: task.taskId,
121
+ agentId: task.agentId,
122
+ layerIndex,
123
+ }, groupId);
124
+
125
+ // Task-specific timeout or default
126
+ const timeout = task.timeout ?? taskConfig.agentTimeout;
127
+
128
+ try {
129
+ // Check cancellation (scoped to this execution)
130
+ if (isCancelled(groupId)) {
131
+ return {
132
+ taskId: task.taskId,
133
+ agentId: task.agentId,
134
+ status: 'cancelled',
135
+ success: false,
136
+ errorCode: ParallelExecutionErrorCodes.CANCELLED,
137
+ durationMs: Date.now() - startTime,
138
+ layer: layerIndex,
139
+ startedAt,
140
+ completedAt: new Date().toISOString(),
141
+ retryCount: 0,
142
+ };
143
+ }
144
+
145
+ // Check if agent exists
146
+ const exists = await agentExecutor.exists(task.agentId);
147
+ if (!exists) {
148
+ const errorMsg = `Agent "${task.agentId}" not found`;
149
+ const result: AgentParallelTaskResult = {
150
+ taskId: task.taskId,
151
+ agentId: task.agentId,
152
+ status: 'failed',
153
+ success: false,
154
+ error: errorMsg,
155
+ errorCode: ParallelExecutionErrorCodes.AGENT_NOT_FOUND,
156
+ durationMs: Date.now() - startTime,
157
+ layer: layerIndex,
158
+ startedAt,
159
+ completedAt: new Date().toISOString(),
160
+ retryCount: 0,
161
+ };
162
+
163
+ emitProgress({
164
+ type: 'task.failed',
165
+ taskId: task.taskId,
166
+ agentId: task.agentId,
167
+ layerIndex,
168
+ message: errorMsg,
169
+ });
170
+
171
+ return result;
172
+ }
173
+
174
+ // Build input with shared context
175
+ const input = taskConfig.shareContext && sharedContext
176
+ ? { ...sharedContext, __taskInput: task.input }
177
+ : task.input;
178
+
179
+ // Execute with timeout
180
+ // Build execution request, only include optional fields if defined
181
+ const execRequest: Parameters<typeof agentExecutor.execute>[0] = {
182
+ agentId: task.agentId,
183
+ input,
184
+ timeout,
185
+ };
186
+ if (task.provider) execRequest.provider = task.provider;
187
+ if (task.model) execRequest.model = task.model;
188
+
189
+ const executePromise = agentExecutor.execute(execRequest);
190
+
191
+ // Create timeout with cleanup to prevent memory leak
192
+ let timeoutId: ReturnType<typeof setTimeout> | undefined;
193
+ const timeoutPromise = new Promise<never>((_, reject) => {
194
+ timeoutId = setTimeout(() => {
195
+ reject(new Error(`Task timeout after ${timeout}ms`));
196
+ }, timeout);
197
+ });
198
+
199
+ let execResult;
200
+ try {
201
+ execResult = await Promise.race([executePromise, timeoutPromise]);
202
+ } finally {
203
+ // Clean up timeout to prevent memory leak
204
+ if (timeoutId !== undefined) {
205
+ clearTimeout(timeoutId);
206
+ }
207
+ }
208
+
209
+ const completedAt = new Date().toISOString();
210
+ const durationMs = Date.now() - startTime;
211
+
212
+ if (execResult.success) {
213
+ emitProgress({
214
+ type: 'task.completed',
215
+ taskId: task.taskId,
216
+ agentId: task.agentId,
217
+ layerIndex,
218
+ });
219
+
220
+ return {
221
+ taskId: task.taskId,
222
+ agentId: task.agentId,
223
+ status: 'completed',
224
+ success: true,
225
+ output: execResult.output,
226
+ durationMs,
227
+ layer: layerIndex,
228
+ startedAt,
229
+ completedAt,
230
+ retryCount: 0,
231
+ };
232
+ } else {
233
+ emitProgress({
234
+ type: 'task.failed',
235
+ taskId: task.taskId,
236
+ agentId: task.agentId,
237
+ layerIndex,
238
+ message: execResult.error ?? 'Task execution failed',
239
+ });
240
+
241
+ return {
242
+ taskId: task.taskId,
243
+ agentId: task.agentId,
244
+ status: 'failed',
245
+ success: false,
246
+ error: execResult.error,
247
+ errorCode: execResult.errorCode ?? ParallelExecutionErrorCodes.TASK_FAILED,
248
+ durationMs,
249
+ layer: layerIndex,
250
+ startedAt,
251
+ completedAt,
252
+ retryCount: 0,
253
+ };
254
+ }
255
+ } catch (error) {
256
+ const completedAt = new Date().toISOString();
257
+ const durationMs = Date.now() - startTime;
258
+ const isTimeout = error instanceof Error && error.message.includes('timeout');
259
+
260
+ emitProgress({
261
+ type: 'task.failed',
262
+ taskId: task.taskId,
263
+ agentId: task.agentId,
264
+ layerIndex,
265
+ message: error instanceof Error ? error.message : 'Unknown error',
266
+ });
267
+
268
+ return {
269
+ taskId: task.taskId,
270
+ agentId: task.agentId,
271
+ status: isTimeout ? 'timeout' : 'failed',
272
+ success: false,
273
+ error: error instanceof Error ? error.message : 'Unknown error',
274
+ errorCode: isTimeout
275
+ ? ParallelExecutionErrorCodes.TASK_TIMEOUT
276
+ : ParallelExecutionErrorCodes.TASK_FAILED,
277
+ durationMs,
278
+ layer: layerIndex,
279
+ startedAt,
280
+ completedAt,
281
+ retryCount: 0,
282
+ };
283
+ }
284
+ }
285
+
286
+ /**
287
+ * Execute a layer of tasks with concurrency limit
288
+ * INV-APE-001: Respects maxConcurrentAgents
289
+ */
290
+ async function executeLayer(
291
+ layer: TaskLayer,
292
+ sharedContext: Record<string, unknown> | undefined,
293
+ taskConfig: AgentParallelExecutionConfig,
294
+ failedTaskIds: Set<string>,
295
+ groupId: string
296
+ ): Promise<AgentParallelTaskResult[]> {
297
+ const results: AgentParallelTaskResult[] = [];
298
+ const pending = new Set<Promise<void>>();
299
+
300
+ emitProgress({
301
+ type: 'layer.started',
302
+ layerIndex: layer.index,
303
+ totalTasks: layer.tasks.length,
304
+ }, groupId);
305
+
306
+ // Filter out tasks whose dependencies failed
307
+ // INV-APE-201: Dependency cascading
308
+ const tasksToExecute: AgentParallelTask[] = [];
309
+ const skippedTasks: AgentParallelTask[] = [];
310
+
311
+ for (const task of layer.tasks) {
312
+ const dependencyFailed = task.dependencies.some((depId) => failedTaskIds.has(depId));
313
+ if (dependencyFailed) {
314
+ skippedTasks.push(task);
315
+ } else {
316
+ tasksToExecute.push(task);
317
+ }
318
+ }
319
+
320
+ // Mark skipped tasks
321
+ for (const task of skippedTasks) {
322
+ const now = new Date().toISOString();
323
+ const result: AgentParallelTaskResult = {
324
+ taskId: task.taskId,
325
+ agentId: task.agentId,
326
+ status: 'skipped',
327
+ success: false,
328
+ error: 'Dependency failed',
329
+ errorCode: ParallelExecutionErrorCodes.DEPENDENCY_FAILED,
330
+ durationMs: 0,
331
+ layer: layer.index,
332
+ startedAt: now,
333
+ completedAt: now,
334
+ retryCount: 0,
335
+ };
336
+ results.push(result);
337
+ failedTaskIds.add(task.taskId);
338
+
339
+ emitProgress({
340
+ type: 'task.skipped',
341
+ taskId: task.taskId,
342
+ agentId: task.agentId,
343
+ layerIndex: layer.index,
344
+ message: 'Dependency failed',
345
+ }, groupId);
346
+ }
347
+
348
+ // Execute remaining tasks with concurrency control
349
+ let taskIndex = 0;
350
+
351
+ async function executeAndCollect(task: AgentParallelTask): Promise<void> {
352
+ const result = await executeTask(task, sharedContext, taskConfig, layer.index, groupId);
353
+ results.push(result);
354
+
355
+ if (!result.success) {
356
+ failedTaskIds.add(task.taskId);
357
+
358
+ // Handle failFast strategy (scoped to this execution)
359
+ if (taskConfig.failureStrategy === 'failFast' && !isCancelled(groupId)) {
360
+ setCancelled(groupId);
361
+ }
362
+ }
363
+ }
364
+
365
+ // INV-APE-001: Concurrency limit enforcement
366
+ while (taskIndex < tasksToExecute.length || pending.size > 0) {
367
+ // Check cancellation (scoped to this execution)
368
+ if (isCancelled(groupId) && taskConfig.failureStrategy === 'failFast') {
369
+ // Mark remaining as cancelled
370
+ while (taskIndex < tasksToExecute.length) {
371
+ const task = tasksToExecute[taskIndex++]!;
372
+ const now = new Date().toISOString();
373
+ results.push({
374
+ taskId: task.taskId,
375
+ agentId: task.agentId,
376
+ status: 'cancelled',
377
+ success: false,
378
+ errorCode: ParallelExecutionErrorCodes.CANCELLED,
379
+ durationMs: 0,
380
+ layer: layer.index,
381
+ startedAt: now,
382
+ completedAt: now,
383
+ retryCount: 0,
384
+ });
385
+ }
386
+ break;
387
+ }
388
+
389
+ // Start new tasks up to concurrency limit
390
+ while (
391
+ pending.size < taskConfig.maxConcurrentAgents &&
392
+ taskIndex < tasksToExecute.length
393
+ ) {
394
+ const task = tasksToExecute[taskIndex++]!;
395
+ const promise = executeAndCollect(task).finally(() => {
396
+ pending.delete(promise);
397
+ });
398
+ pending.add(promise);
399
+ }
400
+
401
+ // Wait for at least one to complete
402
+ if (pending.size > 0) {
403
+ await Promise.race(pending);
404
+ }
405
+ }
406
+
407
+ emitProgress({
408
+ type: 'layer.completed',
409
+ layerIndex: layer.index,
410
+ completedTasks: results.filter((r) => r.success).length,
411
+ failedTasks: results.filter((r) => !r.success).length,
412
+ }, groupId);
413
+
414
+ return results;
415
+ }
416
+
417
+ return {
418
+ getConfig(): AgentParallelExecutionConfig {
419
+ return { ...config };
420
+ },
421
+
422
+ buildExecutionPlan(tasks: AgentParallelTask[]): ExecutionPlan {
423
+ const planId = crypto.randomUUID();
424
+ const analysis = dagAnalyzer.analyze(tasks);
425
+
426
+ const layers: ExecutionLayer[] = analysis.layers.map((layer) => ({
427
+ index: layer.index,
428
+ tasks: layer.tasks,
429
+ taskCount: layer.tasks.length,
430
+ }));
431
+
432
+ return {
433
+ planId,
434
+ layers,
435
+ totalTasks: tasks.length,
436
+ totalLayers: analysis.totalLayers,
437
+ maxParallelism: analysis.maxParallelism,
438
+ hasCycles: analysis.hasCycles,
439
+ createdAt: new Date().toISOString(),
440
+ };
441
+ },
442
+
443
+ async executeParallel(
444
+ tasks: AgentParallelTask[],
445
+ configOverride?: Partial<AgentParallelExecutionConfig>,
446
+ sharedContext?: Record<string, unknown>
447
+ ): Promise<AgentParallelGroupResult> {
448
+ const groupId = crypto.randomUUID();
449
+ currentGroupId = groupId;
450
+ // Register this execution with its own cancellation state
451
+ activeExecutions.set(groupId, { cancelled: false });
452
+ const startTime = Date.now();
453
+ const startedAt = new Date().toISOString();
454
+
455
+ // Merge config
456
+ const taskConfig: AgentParallelExecutionConfig = {
457
+ ...config,
458
+ ...configOverride,
459
+ };
460
+
461
+ // Setup context if provided
462
+ // INV-APE-003: Context frozen before execution
463
+ // INV-APE-300: Context snapshot timing
464
+ if (sharedContext && taskConfig.shareContext) {
465
+ contextManager.create(sharedContext);
466
+ }
467
+
468
+ emitProgress({
469
+ type: 'execution.started',
470
+ totalTasks: tasks.length,
471
+ }, groupId);
472
+
473
+ try {
474
+ // Analyze DAG
475
+ // INV-APE-002: Dependencies honored
476
+ // INV-APE-200: Cycles detected
477
+ const analysis = dagAnalyzer.analyze(tasks);
478
+
479
+ // Track failed tasks for dependency cascading
480
+ const failedTaskIds = new Set<string>();
481
+
482
+ // Execute layers sequentially, tasks within layers in parallel
483
+ const allResults: AgentParallelTaskResult[] = [];
484
+ let peakConcurrency = 0;
485
+
486
+ for (const layer of analysis.layers) {
487
+ if (isCancelled(groupId) && taskConfig.failureStrategy === 'failFast') {
488
+ // Mark remaining layers as cancelled
489
+ for (const task of layer.tasks) {
490
+ const now = new Date().toISOString();
491
+ allResults.push({
492
+ taskId: task.taskId,
493
+ agentId: task.agentId,
494
+ status: 'cancelled',
495
+ success: false,
496
+ errorCode: ParallelExecutionErrorCodes.CANCELLED,
497
+ durationMs: 0,
498
+ layer: layer.index,
499
+ startedAt: now,
500
+ completedAt: now,
501
+ retryCount: 0,
502
+ });
503
+ }
504
+ continue;
505
+ }
506
+
507
+ const layerResults = await executeLayer(
508
+ layer,
509
+ sharedContext,
510
+ taskConfig,
511
+ failedTaskIds,
512
+ groupId
513
+ );
514
+ allResults.push(...layerResults);
515
+
516
+ // Track peak concurrency
517
+ const layerConcurrency = Math.min(layer.tasks.length, taskConfig.maxConcurrentAgents);
518
+ if (layerConcurrency > peakConcurrency) {
519
+ peakConcurrency = layerConcurrency;
520
+ }
521
+ }
522
+
523
+ // Aggregate results
524
+ // INV-APE-004: Follows configured strategy
525
+ const aggregatedOutput = resultAggregator.aggregate(allResults, {
526
+ strategy: taskConfig.resultAggregation,
527
+ });
528
+
529
+ const completedAt = new Date().toISOString();
530
+ const totalDurationMs = Date.now() - startTime;
531
+
532
+ const failedTasks = allResults
533
+ .filter((r) => !r.success && r.status !== 'cancelled' && r.status !== 'skipped')
534
+ .map((r) => r.taskId);
535
+
536
+ const cancelledTasks = allResults
537
+ .filter((r) => r.status === 'cancelled')
538
+ .map((r) => r.taskId);
539
+
540
+ const skippedTasks = allResults
541
+ .filter((r) => r.status === 'skipped')
542
+ .map((r) => r.taskId);
543
+
544
+ emitProgress({
545
+ type: 'execution.completed',
546
+ totalTasks: tasks.length,
547
+ completedTasks: allResults.filter((r) => r.success).length,
548
+ failedTasks: failedTasks.length,
549
+ }, groupId);
550
+
551
+ return {
552
+ groupId,
553
+ taskResults: allResults,
554
+ aggregatedOutput,
555
+ allSucceeded: failedTasks.length === 0 && cancelledTasks.length === 0,
556
+ failedTasks,
557
+ cancelledTasks: cancelledTasks.length > 0 ? cancelledTasks : undefined,
558
+ skippedTasks: skippedTasks.length > 0 ? skippedTasks : undefined,
559
+ totalDurationMs,
560
+ tasksExecuted: allResults.filter((r) => r.status !== 'skipped' && r.status !== 'cancelled').length,
561
+ tasksSkipped: skippedTasks.length + cancelledTasks.length,
562
+ layerCount: analysis.totalLayers,
563
+ peakConcurrency,
564
+ config: taskConfig,
565
+ startedAt,
566
+ completedAt,
567
+ };
568
+ } catch (error) {
569
+ // Handle DAG analysis errors
570
+ if (error instanceof DAGAnalysisError) {
571
+ throw new ParallelExecutionError(
572
+ error.code,
573
+ error.message
574
+ );
575
+ }
576
+ throw error;
577
+ } finally {
578
+ // Cleanup - remove execution state to prevent memory leak
579
+ contextManager.clear();
580
+ activeExecutions.delete(groupId);
581
+ currentGroupId = null;
582
+ }
583
+ },
584
+
585
+ cancel(): void {
586
+ // Cancel all active executions
587
+ for (const [gId, state] of activeExecutions) {
588
+ state.cancelled = true;
589
+ emitProgress({
590
+ type: 'execution.cancelled',
591
+ }, gId);
592
+ }
593
+ },
594
+ };
595
+ }