claude-flow 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +612 -0
- package/bin/claude-flow +0 -0
- package/bin/claude-flow-simple +0 -0
- package/bin/claude-flow-typecheck +0 -0
- package/deno.json +84 -0
- package/package.json +45 -0
- package/scripts/check-links.ts +274 -0
- package/scripts/check-performance-regression.ts +168 -0
- package/scripts/claude-sparc.sh +562 -0
- package/scripts/coverage-report.ts +692 -0
- package/scripts/demo-task-system.ts +224 -0
- package/scripts/install.js +72 -0
- package/scripts/test-batch-tasks.ts +29 -0
- package/scripts/test-coordination-features.ts +238 -0
- package/scripts/test-mcp.ts +251 -0
- package/scripts/test-runner.ts +571 -0
- package/scripts/validate-examples.ts +288 -0
- package/src/cli/cli-core.ts +273 -0
- package/src/cli/commands/agent.ts +83 -0
- package/src/cli/commands/config.ts +442 -0
- package/src/cli/commands/help.ts +765 -0
- package/src/cli/commands/index.ts +963 -0
- package/src/cli/commands/mcp.ts +191 -0
- package/src/cli/commands/memory.ts +74 -0
- package/src/cli/commands/monitor.ts +403 -0
- package/src/cli/commands/session.ts +595 -0
- package/src/cli/commands/start.ts +156 -0
- package/src/cli/commands/status.ts +345 -0
- package/src/cli/commands/task.ts +79 -0
- package/src/cli/commands/workflow.ts +763 -0
- package/src/cli/completion.ts +553 -0
- package/src/cli/formatter.ts +310 -0
- package/src/cli/index.ts +211 -0
- package/src/cli/main.ts +23 -0
- package/src/cli/repl.ts +1050 -0
- package/src/cli/simple-cli.js +211 -0
- package/src/cli/simple-cli.ts +211 -0
- package/src/coordination/README.md +400 -0
- package/src/coordination/advanced-scheduler.ts +487 -0
- package/src/coordination/circuit-breaker.ts +366 -0
- package/src/coordination/conflict-resolution.ts +490 -0
- package/src/coordination/dependency-graph.ts +475 -0
- package/src/coordination/index.ts +63 -0
- package/src/coordination/manager.ts +460 -0
- package/src/coordination/messaging.ts +290 -0
- package/src/coordination/metrics.ts +585 -0
- package/src/coordination/resources.ts +322 -0
- package/src/coordination/scheduler.ts +390 -0
- package/src/coordination/work-stealing.ts +224 -0
- package/src/core/config.ts +627 -0
- package/src/core/event-bus.ts +186 -0
- package/src/core/json-persistence.ts +183 -0
- package/src/core/logger.ts +262 -0
- package/src/core/orchestrator-fixed.ts +312 -0
- package/src/core/orchestrator.ts +1234 -0
- package/src/core/persistence.ts +276 -0
- package/src/mcp/auth.ts +438 -0
- package/src/mcp/claude-flow-tools.ts +1280 -0
- package/src/mcp/load-balancer.ts +510 -0
- package/src/mcp/router.ts +240 -0
- package/src/mcp/server.ts +548 -0
- package/src/mcp/session-manager.ts +418 -0
- package/src/mcp/tools.ts +180 -0
- package/src/mcp/transports/base.ts +21 -0
- package/src/mcp/transports/http.ts +457 -0
- package/src/mcp/transports/stdio.ts +254 -0
- package/src/memory/backends/base.ts +22 -0
- package/src/memory/backends/markdown.ts +283 -0
- package/src/memory/backends/sqlite.ts +329 -0
- package/src/memory/cache.ts +238 -0
- package/src/memory/indexer.ts +238 -0
- package/src/memory/manager.ts +572 -0
- package/src/terminal/adapters/base.ts +29 -0
- package/src/terminal/adapters/native.ts +504 -0
- package/src/terminal/adapters/vscode.ts +340 -0
- package/src/terminal/manager.ts +308 -0
- package/src/terminal/pool.ts +271 -0
- package/src/terminal/session.ts +250 -0
- package/src/terminal/vscode-bridge.ts +242 -0
- package/src/utils/errors.ts +231 -0
- package/src/utils/helpers.ts +476 -0
- package/src/utils/types.ts +493 -0
|
@@ -0,0 +1,1234 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Main orchestrator for Claude-Flow
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
Config,
|
|
7
|
+
SystemEvents,
|
|
8
|
+
AgentProfile,
|
|
9
|
+
AgentSession,
|
|
10
|
+
Task,
|
|
11
|
+
HealthStatus,
|
|
12
|
+
ComponentHealth,
|
|
13
|
+
TaskStatus,
|
|
14
|
+
OrchestratorMetrics,
|
|
15
|
+
} from '../utils/types.ts';
|
|
16
|
+
import { IEventBus } from './event-bus.ts';
|
|
17
|
+
import { ILogger } from './logger.ts';
|
|
18
|
+
import { ITerminalManager } from '../terminal/manager.ts';
|
|
19
|
+
import { IMemoryManager } from '../memory/manager.ts';
|
|
20
|
+
import { ICoordinationManager } from '../coordination/manager.ts';
|
|
21
|
+
import { IMCPServer } from '../mcp/server.ts';
|
|
22
|
+
import { SystemError, InitializationError, ShutdownError } from '../utils/errors.ts';
|
|
23
|
+
import { delay, retry, circuitBreaker, CircuitBreaker } from '../utils/helpers.ts';
|
|
24
|
+
import { ensureDir, exists } from 'https://deno.land/std@0.208.0/fs/mod.ts';
|
|
25
|
+
import { join, dirname } from 'https://deno.land/std@0.208.0/path/mod.ts';
|
|
26
|
+
|
|
27
|
+
export interface ISessionManager {
|
|
28
|
+
createSession(profile: AgentProfile): Promise<AgentSession>;
|
|
29
|
+
getSession(sessionId: string): AgentSession | undefined;
|
|
30
|
+
getActiveSessions(): AgentSession[];
|
|
31
|
+
terminateSession(sessionId: string): Promise<void>;
|
|
32
|
+
terminateAllSessions(): Promise<void>;
|
|
33
|
+
persistSessions(): Promise<void>;
|
|
34
|
+
restoreSessions(): Promise<void>;
|
|
35
|
+
removeSession(sessionId: string): void;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface IOrchestrator {
|
|
39
|
+
initialize(): Promise<void>;
|
|
40
|
+
shutdown(): Promise<void>;
|
|
41
|
+
spawnAgent(profile: AgentProfile): Promise<string>;
|
|
42
|
+
terminateAgent(agentId: string): Promise<void>;
|
|
43
|
+
assignTask(task: Task): Promise<void>;
|
|
44
|
+
getHealthStatus(): Promise<HealthStatus>;
|
|
45
|
+
getMetrics(): Promise<OrchestratorMetrics>;
|
|
46
|
+
performMaintenance(): Promise<void>;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
export interface SessionPersistence {
|
|
51
|
+
sessions: Array<AgentSession & { profile: AgentProfile }>;
|
|
52
|
+
taskQueue: Task[];
|
|
53
|
+
metrics: {
|
|
54
|
+
completedTasks: number;
|
|
55
|
+
failedTasks: number;
|
|
56
|
+
totalTaskDuration: number;
|
|
57
|
+
};
|
|
58
|
+
savedAt: Date;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Session manager implementation with persistence
|
|
63
|
+
*/
|
|
64
|
+
class SessionManager implements ISessionManager {
|
|
65
|
+
private sessions = new Map<string, AgentSession>();
|
|
66
|
+
private sessionProfiles = new Map<string, AgentProfile>();
|
|
67
|
+
private persistencePath: string;
|
|
68
|
+
private persistenceCircuitBreaker: CircuitBreaker;
|
|
69
|
+
|
|
70
|
+
constructor(
|
|
71
|
+
private terminalManager: ITerminalManager,
|
|
72
|
+
private memoryManager: IMemoryManager,
|
|
73
|
+
private eventBus: IEventBus,
|
|
74
|
+
private logger: ILogger,
|
|
75
|
+
private config: Config,
|
|
76
|
+
) {
|
|
77
|
+
this.persistencePath = join(
|
|
78
|
+
config.orchestrator.dataDir || './data',
|
|
79
|
+
'sessions.json'
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
// Circuit breaker for persistence operations
|
|
83
|
+
this.persistenceCircuitBreaker = circuitBreaker(
|
|
84
|
+
'SessionPersistence',
|
|
85
|
+
{ threshold: 5, timeout: 30000, resetTimeout: 60000 }
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async createSession(profile: AgentProfile): Promise<AgentSession> {
|
|
90
|
+
try {
|
|
91
|
+
// Create terminal with retry logic
|
|
92
|
+
const terminalId = await retry(
|
|
93
|
+
() => this.terminalManager.spawnTerminal(profile),
|
|
94
|
+
{ maxAttempts: 3, initialDelay: 1000 }
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
// Create memory bank with retry logic
|
|
98
|
+
const memoryBankId = await retry(
|
|
99
|
+
() => this.memoryManager.createBank(profile.id),
|
|
100
|
+
{ maxAttempts: 3, initialDelay: 1000 }
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
// Create session
|
|
104
|
+
const session: AgentSession = {
|
|
105
|
+
id: `session_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
|
|
106
|
+
agentId: profile.id,
|
|
107
|
+
terminalId,
|
|
108
|
+
startTime: new Date(),
|
|
109
|
+
status: 'active',
|
|
110
|
+
lastActivity: new Date(),
|
|
111
|
+
memoryBankId,
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
this.sessions.set(session.id, session);
|
|
115
|
+
this.sessionProfiles.set(session.id, profile);
|
|
116
|
+
|
|
117
|
+
this.logger.info('Session created', {
|
|
118
|
+
sessionId: session.id,
|
|
119
|
+
agentId: profile.id,
|
|
120
|
+
terminalId,
|
|
121
|
+
memoryBankId
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
// Persist sessions asynchronously
|
|
125
|
+
this.persistSessions().catch(error =>
|
|
126
|
+
this.logger.error('Failed to persist sessions', error)
|
|
127
|
+
);
|
|
128
|
+
|
|
129
|
+
return session;
|
|
130
|
+
} catch (error) {
|
|
131
|
+
this.logger.error('Failed to create session', { agentId: profile.id, error });
|
|
132
|
+
throw new SystemError(`Failed to create session for agent ${profile.id}`, { error });
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
getSession(sessionId: string): AgentSession | undefined {
|
|
137
|
+
return this.sessions.get(sessionId);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
getActiveSessions(): AgentSession[] {
|
|
141
|
+
return Array.from(this.sessions.values()).filter(
|
|
142
|
+
(session) => session.status === 'active' || session.status === 'idle',
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
async terminateSession(sessionId: string): Promise<void> {
|
|
147
|
+
const session = this.sessions.get(sessionId);
|
|
148
|
+
if (!session) {
|
|
149
|
+
throw new Error(`Session not found: ${sessionId}`);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
try {
|
|
153
|
+
// Update session status first
|
|
154
|
+
session.status = 'terminated';
|
|
155
|
+
session.endTime = new Date();
|
|
156
|
+
|
|
157
|
+
// Terminate terminal with timeout
|
|
158
|
+
await Promise.race([
|
|
159
|
+
this.terminalManager.terminateTerminal(session.terminalId),
|
|
160
|
+
delay(5000).then(() => {
|
|
161
|
+
throw new Error('Terminal termination timeout');
|
|
162
|
+
})
|
|
163
|
+
]).catch(error => {
|
|
164
|
+
this.logger.error('Error terminating terminal', { sessionId, error });
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
// Close memory bank with timeout
|
|
168
|
+
await Promise.race([
|
|
169
|
+
this.memoryManager.closeBank(session.memoryBankId),
|
|
170
|
+
delay(5000).then(() => {
|
|
171
|
+
throw new Error('Memory bank close timeout');
|
|
172
|
+
})
|
|
173
|
+
]).catch(error => {
|
|
174
|
+
this.logger.error('Error closing memory bank', { sessionId, error });
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
// Clean up
|
|
178
|
+
this.sessionProfiles.delete(sessionId);
|
|
179
|
+
|
|
180
|
+
this.logger.info('Session terminated', { sessionId, duration: session.endTime.getTime() - session.startTime.getTime() });
|
|
181
|
+
|
|
182
|
+
// Persist sessions asynchronously
|
|
183
|
+
this.persistSessions().catch(error =>
|
|
184
|
+
this.logger.error('Failed to persist sessions', error)
|
|
185
|
+
);
|
|
186
|
+
} catch (error) {
|
|
187
|
+
this.logger.error('Error during session termination', { sessionId, error });
|
|
188
|
+
throw error;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async terminateAllSessions(): Promise<void> {
|
|
193
|
+
const sessions = this.getActiveSessions();
|
|
194
|
+
|
|
195
|
+
// Terminate sessions in batches to avoid overwhelming the system
|
|
196
|
+
const batchSize = 5;
|
|
197
|
+
for (let i = 0; i < sessions.length; i += batchSize) {
|
|
198
|
+
const batch = sessions.slice(i, i + batchSize);
|
|
199
|
+
await Promise.allSettled(
|
|
200
|
+
batch.map((session) => this.terminateSession(session.id))
|
|
201
|
+
);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
removeSession(sessionId: string): void {
|
|
206
|
+
this.sessions.delete(sessionId);
|
|
207
|
+
this.sessionProfiles.delete(sessionId);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
async persistSessions(): Promise<void> {
|
|
211
|
+
if (!this.config.orchestrator.persistSessions) {
|
|
212
|
+
return;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
try {
|
|
216
|
+
await this.persistenceCircuitBreaker.execute(async () => {
|
|
217
|
+
const data: SessionPersistence = {
|
|
218
|
+
sessions: Array.from(this.sessions.values()).map(session => ({
|
|
219
|
+
...session,
|
|
220
|
+
profile: this.sessionProfiles.get(session.id)!
|
|
221
|
+
})).filter(s => s.profile),
|
|
222
|
+
taskQueue: [],
|
|
223
|
+
metrics: {
|
|
224
|
+
completedTasks: 0,
|
|
225
|
+
failedTasks: 0,
|
|
226
|
+
totalTaskDuration: 0,
|
|
227
|
+
},
|
|
228
|
+
savedAt: new Date(),
|
|
229
|
+
};
|
|
230
|
+
|
|
231
|
+
await ensureDir(dirname(this.persistencePath));
|
|
232
|
+
await Deno.writeTextFile(this.persistencePath, JSON.stringify(data, null, 2));
|
|
233
|
+
|
|
234
|
+
this.logger.debug('Sessions persisted', { count: data.sessions.length });
|
|
235
|
+
});
|
|
236
|
+
} catch (error) {
|
|
237
|
+
this.logger.error('Failed to persist sessions', error);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
async restoreSessions(): Promise<void> {
|
|
242
|
+
if (!this.config.orchestrator.persistSessions) {
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
try {
|
|
247
|
+
const data = await Deno.readTextFile(this.persistencePath);
|
|
248
|
+
const persistence: SessionPersistence = JSON.parse(data);
|
|
249
|
+
|
|
250
|
+
// Restore only active/idle sessions
|
|
251
|
+
const sessionsToRestore = persistence.sessions.filter(
|
|
252
|
+
s => s.status === 'active' || s.status === 'idle'
|
|
253
|
+
);
|
|
254
|
+
|
|
255
|
+
for (const sessionData of sessionsToRestore) {
|
|
256
|
+
try {
|
|
257
|
+
// Recreate session
|
|
258
|
+
const session = await this.createSession(sessionData.profile);
|
|
259
|
+
|
|
260
|
+
// Update with persisted data
|
|
261
|
+
Object.assign(session, {
|
|
262
|
+
id: sessionData.id,
|
|
263
|
+
startTime: new Date(sessionData.startTime),
|
|
264
|
+
lastActivity: new Date(sessionData.lastActivity),
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
this.logger.info('Session restored', { sessionId: session.id });
|
|
268
|
+
} catch (error) {
|
|
269
|
+
this.logger.error('Failed to restore session', {
|
|
270
|
+
sessionId: sessionData.id,
|
|
271
|
+
error
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
} catch (error) {
|
|
276
|
+
if ((error as any).code !== 'ENOENT') {
|
|
277
|
+
this.logger.error('Failed to restore sessions', error);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Main orchestrator implementation with enhanced features
|
|
285
|
+
*/
|
|
286
|
+
export class Orchestrator implements IOrchestrator {
|
|
287
|
+
private initialized = false;
|
|
288
|
+
private shutdownInProgress = false;
|
|
289
|
+
private sessionManager: ISessionManager;
|
|
290
|
+
private healthCheckInterval?: number;
|
|
291
|
+
private maintenanceInterval?: number;
|
|
292
|
+
private metricsInterval?: number;
|
|
293
|
+
private agents = new Map<string, AgentProfile>();
|
|
294
|
+
private taskQueue: Task[] = [];
|
|
295
|
+
private taskHistory = new Map<string, Task>();
|
|
296
|
+
private startTime = Date.now();
|
|
297
|
+
|
|
298
|
+
// Metrics tracking
|
|
299
|
+
private metrics = {
|
|
300
|
+
completedTasks: 0,
|
|
301
|
+
failedTasks: 0,
|
|
302
|
+
totalTaskDuration: 0,
|
|
303
|
+
};
|
|
304
|
+
|
|
305
|
+
// Circuit breakers for critical operations
|
|
306
|
+
private healthCheckCircuitBreaker: CircuitBreaker;
|
|
307
|
+
private taskAssignmentCircuitBreaker: CircuitBreaker;
|
|
308
|
+
|
|
309
|
+
constructor(
|
|
310
|
+
private config: Config,
|
|
311
|
+
private terminalManager: ITerminalManager,
|
|
312
|
+
private memoryManager: IMemoryManager,
|
|
313
|
+
private coordinationManager: ICoordinationManager,
|
|
314
|
+
private mcpServer: IMCPServer,
|
|
315
|
+
private eventBus: IEventBus,
|
|
316
|
+
private logger: ILogger,
|
|
317
|
+
) {
|
|
318
|
+
this.sessionManager = new SessionManager(
|
|
319
|
+
terminalManager,
|
|
320
|
+
memoryManager,
|
|
321
|
+
eventBus,
|
|
322
|
+
logger,
|
|
323
|
+
config,
|
|
324
|
+
);
|
|
325
|
+
|
|
326
|
+
// Initialize circuit breakers
|
|
327
|
+
this.healthCheckCircuitBreaker = circuitBreaker(
|
|
328
|
+
'HealthCheck',
|
|
329
|
+
{ threshold: 3, timeout: 10000, resetTimeout: 30000 }
|
|
330
|
+
);
|
|
331
|
+
|
|
332
|
+
this.taskAssignmentCircuitBreaker = circuitBreaker(
|
|
333
|
+
'TaskAssignment',
|
|
334
|
+
{ threshold: 5, timeout: 5000, resetTimeout: 20000 }
|
|
335
|
+
);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
async initialize(): Promise<void> {
|
|
339
|
+
if (this.initialized) {
|
|
340
|
+
throw new InitializationError('Orchestrator already initialized');
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
this.logger.info('Initializing orchestrator...');
|
|
344
|
+
const startTime = Date.now();
|
|
345
|
+
|
|
346
|
+
try {
|
|
347
|
+
// Initialize components in parallel where possible
|
|
348
|
+
await Promise.all([
|
|
349
|
+
this.initializeComponent('Terminal Manager', () => this.terminalManager.initialize()),
|
|
350
|
+
this.initializeComponent('Memory Manager', () => this.memoryManager.initialize()),
|
|
351
|
+
this.initializeComponent('Coordination Manager', () => this.coordinationManager.initialize()),
|
|
352
|
+
]);
|
|
353
|
+
|
|
354
|
+
// MCP server needs to be started after other components
|
|
355
|
+
await this.initializeComponent('MCP Server', () => this.mcpServer.start());
|
|
356
|
+
|
|
357
|
+
// Restore persisted sessions
|
|
358
|
+
await this.sessionManager.restoreSessions();
|
|
359
|
+
|
|
360
|
+
// Set up event handlers
|
|
361
|
+
this.setupEventHandlers();
|
|
362
|
+
|
|
363
|
+
// Start background tasks
|
|
364
|
+
this.startHealthChecks();
|
|
365
|
+
this.startMaintenanceTasks();
|
|
366
|
+
this.startMetricsCollection();
|
|
367
|
+
|
|
368
|
+
this.initialized = true;
|
|
369
|
+
|
|
370
|
+
const initDuration = Date.now() - startTime;
|
|
371
|
+
this.eventBus.emit(SystemEvents.SYSTEM_READY, { timestamp: new Date() });
|
|
372
|
+
this.logger.info('Orchestrator initialized successfully', { duration: initDuration });
|
|
373
|
+
} catch (error) {
|
|
374
|
+
this.logger.error('Failed to initialize orchestrator', error);
|
|
375
|
+
|
|
376
|
+
// Attempt cleanup on initialization failure
|
|
377
|
+
await this.emergencyShutdown();
|
|
378
|
+
|
|
379
|
+
throw new InitializationError('Orchestrator', { error });
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
async shutdown(): Promise<void> {
|
|
384
|
+
if (!this.initialized || this.shutdownInProgress) {
|
|
385
|
+
return;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
this.shutdownInProgress = true;
|
|
389
|
+
this.logger.info('Shutting down orchestrator...');
|
|
390
|
+
const shutdownStart = Date.now();
|
|
391
|
+
|
|
392
|
+
try {
|
|
393
|
+
// Stop background tasks
|
|
394
|
+
this.stopBackgroundTasks();
|
|
395
|
+
|
|
396
|
+
// Save current state
|
|
397
|
+
await this.sessionManager.persistSessions();
|
|
398
|
+
|
|
399
|
+
// Process any remaining critical tasks
|
|
400
|
+
await this.processShutdownTasks();
|
|
401
|
+
|
|
402
|
+
// Terminate all sessions
|
|
403
|
+
await this.sessionManager.terminateAllSessions();
|
|
404
|
+
|
|
405
|
+
// Shutdown components with timeout
|
|
406
|
+
await Promise.race([
|
|
407
|
+
this.shutdownComponents(),
|
|
408
|
+
delay(this.config.orchestrator.shutdownTimeout),
|
|
409
|
+
]);
|
|
410
|
+
|
|
411
|
+
const shutdownDuration = Date.now() - shutdownStart;
|
|
412
|
+
this.eventBus.emit(SystemEvents.SYSTEM_SHUTDOWN, { reason: 'Graceful shutdown' });
|
|
413
|
+
this.logger.info('Orchestrator shutdown complete', { duration: shutdownDuration });
|
|
414
|
+
} catch (error) {
|
|
415
|
+
this.logger.error('Error during shutdown', error);
|
|
416
|
+
|
|
417
|
+
// Force shutdown if graceful shutdown fails
|
|
418
|
+
await this.emergencyShutdown();
|
|
419
|
+
|
|
420
|
+
throw new ShutdownError('Failed to shutdown gracefully', { error });
|
|
421
|
+
} finally {
|
|
422
|
+
this.initialized = false;
|
|
423
|
+
this.shutdownInProgress = false;
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
async spawnAgent(profile: AgentProfile): Promise<string> {
|
|
428
|
+
if (!this.initialized) {
|
|
429
|
+
throw new SystemError('Orchestrator not initialized');
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// Check agent limit
|
|
433
|
+
if (this.agents.size >= this.config.orchestrator.maxConcurrentAgents) {
|
|
434
|
+
throw new SystemError('Maximum concurrent agents reached');
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
// Validate agent profile
|
|
438
|
+
this.validateAgentProfile(profile);
|
|
439
|
+
|
|
440
|
+
this.logger.info('Spawning agent', { agentId: profile.id, type: profile.type });
|
|
441
|
+
|
|
442
|
+
try {
|
|
443
|
+
// Create session with retry
|
|
444
|
+
const session = await retry(
|
|
445
|
+
() => this.sessionManager.createSession(profile),
|
|
446
|
+
{ maxAttempts: 3, initialDelay: 2000 }
|
|
447
|
+
);
|
|
448
|
+
|
|
449
|
+
// Store agent profile
|
|
450
|
+
this.agents.set(profile.id, profile);
|
|
451
|
+
|
|
452
|
+
// Emit event
|
|
453
|
+
this.eventBus.emit(SystemEvents.AGENT_SPAWNED, {
|
|
454
|
+
agentId: profile.id,
|
|
455
|
+
profile,
|
|
456
|
+
sessionId: session.id,
|
|
457
|
+
});
|
|
458
|
+
|
|
459
|
+
// Start agent health monitoring
|
|
460
|
+
this.startAgentHealthMonitoring(profile.id);
|
|
461
|
+
|
|
462
|
+
return session.id;
|
|
463
|
+
} catch (error) {
|
|
464
|
+
this.logger.error('Failed to spawn agent', { agentId: profile.id, error });
|
|
465
|
+
throw error;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
async terminateAgent(agentId: string): Promise<void> {
|
|
470
|
+
if (!this.initialized) {
|
|
471
|
+
throw new SystemError('Orchestrator not initialized');
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
const profile = this.agents.get(agentId);
|
|
475
|
+
if (!profile) {
|
|
476
|
+
throw new SystemError(`Agent not found: ${agentId}`);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
this.logger.info('Terminating agent', { agentId });
|
|
480
|
+
|
|
481
|
+
try {
|
|
482
|
+
// Cancel any assigned tasks
|
|
483
|
+
await this.cancelAgentTasks(agentId);
|
|
484
|
+
|
|
485
|
+
// Find and terminate all sessions for this agent
|
|
486
|
+
const sessions = this.sessionManager.getActiveSessions().filter(
|
|
487
|
+
(session) => session.agentId === agentId,
|
|
488
|
+
);
|
|
489
|
+
|
|
490
|
+
await Promise.allSettled(
|
|
491
|
+
sessions.map((session) => this.sessionManager.terminateSession(session.id)),
|
|
492
|
+
);
|
|
493
|
+
|
|
494
|
+
// Remove agent
|
|
495
|
+
this.agents.delete(agentId);
|
|
496
|
+
|
|
497
|
+
// Emit event
|
|
498
|
+
this.eventBus.emit(SystemEvents.AGENT_TERMINATED, {
|
|
499
|
+
agentId,
|
|
500
|
+
reason: 'User requested',
|
|
501
|
+
});
|
|
502
|
+
} catch (error) {
|
|
503
|
+
this.logger.error('Failed to terminate agent', { agentId, error });
|
|
504
|
+
throw error;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
async assignTask(task: Task): Promise<void> {
|
|
509
|
+
if (!this.initialized) {
|
|
510
|
+
throw new SystemError('Orchestrator not initialized');
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// Validate task
|
|
514
|
+
this.validateTask(task);
|
|
515
|
+
|
|
516
|
+
// Store task in history
|
|
517
|
+
this.taskHistory.set(task.id, task);
|
|
518
|
+
|
|
519
|
+
try {
|
|
520
|
+
await this.taskAssignmentCircuitBreaker.execute(async () => {
|
|
521
|
+
// Add to queue if no agent assigned
|
|
522
|
+
if (!task.assignedAgent) {
|
|
523
|
+
if (this.taskQueue.length >= this.config.orchestrator.taskQueueSize) {
|
|
524
|
+
throw new SystemError('Task queue is full');
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
this.taskQueue.push(task);
|
|
528
|
+
this.eventBus.emit(SystemEvents.TASK_CREATED, { task });
|
|
529
|
+
|
|
530
|
+
// Try to assign immediately
|
|
531
|
+
await this.processTaskQueue();
|
|
532
|
+
return;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// Assign to specific agent
|
|
536
|
+
const agent = this.agents.get(task.assignedAgent);
|
|
537
|
+
if (!agent) {
|
|
538
|
+
throw new SystemError(`Agent not found: ${task.assignedAgent}`);
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
await this.coordinationManager.assignTask(task, task.assignedAgent);
|
|
542
|
+
|
|
543
|
+
this.eventBus.emit(SystemEvents.TASK_ASSIGNED, {
|
|
544
|
+
taskId: task.id,
|
|
545
|
+
agentId: task.assignedAgent,
|
|
546
|
+
});
|
|
547
|
+
});
|
|
548
|
+
} catch (error) {
|
|
549
|
+
this.logger.error('Failed to assign task', { taskId: task.id, error });
|
|
550
|
+
throw error;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
async getHealthStatus(): Promise<HealthStatus> {
|
|
555
|
+
try {
|
|
556
|
+
return await this.healthCheckCircuitBreaker.execute(async () => {
|
|
557
|
+
const components: Record<string, ComponentHealth> = {};
|
|
558
|
+
|
|
559
|
+
// Check all components in parallel
|
|
560
|
+
const [terminal, memory, coordination, mcp] = await Promise.allSettled([
|
|
561
|
+
this.getComponentHealth(
|
|
562
|
+
'Terminal Manager',
|
|
563
|
+
async () => await this.terminalManager.getHealthStatus(),
|
|
564
|
+
),
|
|
565
|
+
this.getComponentHealth(
|
|
566
|
+
'Memory Manager',
|
|
567
|
+
async () => await this.memoryManager.getHealthStatus(),
|
|
568
|
+
),
|
|
569
|
+
this.getComponentHealth(
|
|
570
|
+
'Coordination Manager',
|
|
571
|
+
async () => await this.coordinationManager.getHealthStatus(),
|
|
572
|
+
),
|
|
573
|
+
this.getComponentHealth(
|
|
574
|
+
'MCP Server',
|
|
575
|
+
async () => await this.mcpServer.getHealthStatus(),
|
|
576
|
+
),
|
|
577
|
+
]);
|
|
578
|
+
|
|
579
|
+
// Process results
|
|
580
|
+
components.terminal = this.processHealthResult(terminal, 'Terminal Manager');
|
|
581
|
+
components.memory = this.processHealthResult(memory, 'Memory Manager');
|
|
582
|
+
components.coordination = this.processHealthResult(coordination, 'Coordination Manager');
|
|
583
|
+
components.mcp = this.processHealthResult(mcp, 'MCP Server');
|
|
584
|
+
|
|
585
|
+
// Add orchestrator self-check
|
|
586
|
+
components.orchestrator = {
|
|
587
|
+
name: 'Orchestrator',
|
|
588
|
+
status: 'healthy',
|
|
589
|
+
lastCheck: new Date(),
|
|
590
|
+
metrics: {
|
|
591
|
+
uptime: Date.now() - this.startTime,
|
|
592
|
+
activeAgents: this.agents.size,
|
|
593
|
+
queuedTasks: this.taskQueue.length,
|
|
594
|
+
memoryUsage: process.memoryUsage().heapUsed / 1024 / 1024, // MB
|
|
595
|
+
},
|
|
596
|
+
};
|
|
597
|
+
|
|
598
|
+
// Determine overall status
|
|
599
|
+
const statuses = Object.values(components).map((c) => c.status);
|
|
600
|
+
let overallStatus: HealthStatus['status'] = 'healthy';
|
|
601
|
+
|
|
602
|
+
if (statuses.some((s) => s === 'unhealthy')) {
|
|
603
|
+
overallStatus = 'unhealthy';
|
|
604
|
+
} else if (statuses.some((s) => s === 'degraded')) {
|
|
605
|
+
overallStatus = 'degraded';
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
return {
|
|
609
|
+
status: overallStatus,
|
|
610
|
+
components,
|
|
611
|
+
timestamp: new Date(),
|
|
612
|
+
};
|
|
613
|
+
});
|
|
614
|
+
} catch (error) {
|
|
615
|
+
this.logger.error('Health check failed', error);
|
|
616
|
+
|
|
617
|
+
// Return degraded status if health check fails
|
|
618
|
+
return {
|
|
619
|
+
status: 'degraded',
|
|
620
|
+
components: {
|
|
621
|
+
orchestrator: {
|
|
622
|
+
name: 'Orchestrator',
|
|
623
|
+
status: 'degraded',
|
|
624
|
+
lastCheck: new Date(),
|
|
625
|
+
error: 'Health check circuit breaker open',
|
|
626
|
+
},
|
|
627
|
+
},
|
|
628
|
+
timestamp: new Date(),
|
|
629
|
+
};
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
async getMetrics(): Promise<OrchestratorMetrics> {
|
|
634
|
+
const memUsage = process.memoryUsage();
|
|
635
|
+
const cpuUsage = process.cpuUsage();
|
|
636
|
+
|
|
637
|
+
const avgTaskDuration = this.metrics.completedTasks > 0
|
|
638
|
+
? this.metrics.totalTaskDuration / this.metrics.completedTasks
|
|
639
|
+
: 0;
|
|
640
|
+
|
|
641
|
+
return {
|
|
642
|
+
uptime: Date.now() - this.startTime,
|
|
643
|
+
totalAgents: this.agents.size,
|
|
644
|
+
activeAgents: this.sessionManager.getActiveSessions().length,
|
|
645
|
+
totalTasks: this.taskHistory.size,
|
|
646
|
+
completedTasks: this.metrics.completedTasks,
|
|
647
|
+
failedTasks: this.metrics.failedTasks,
|
|
648
|
+
queuedTasks: this.taskQueue.length,
|
|
649
|
+
avgTaskDuration,
|
|
650
|
+
memoryUsage: memUsage,
|
|
651
|
+
cpuUsage: cpuUsage,
|
|
652
|
+
timestamp: new Date(),
|
|
653
|
+
};
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
async performMaintenance(): Promise<void> {
|
|
657
|
+
this.logger.debug('Performing maintenance tasks');
|
|
658
|
+
|
|
659
|
+
try {
|
|
660
|
+
// Clean up terminated sessions
|
|
661
|
+
await this.cleanupTerminatedSessions();
|
|
662
|
+
|
|
663
|
+
// Clean up old task history
|
|
664
|
+
await this.cleanupTaskHistory();
|
|
665
|
+
|
|
666
|
+
// Perform component maintenance
|
|
667
|
+
await Promise.allSettled([
|
|
668
|
+
this.terminalManager.performMaintenance(),
|
|
669
|
+
this.memoryManager.performMaintenance(),
|
|
670
|
+
this.coordinationManager.performMaintenance(),
|
|
671
|
+
]);
|
|
672
|
+
|
|
673
|
+
// Persist current state
|
|
674
|
+
await this.sessionManager.persistSessions();
|
|
675
|
+
|
|
676
|
+
// Force garbage collection if available
|
|
677
|
+
if (global.gc) {
|
|
678
|
+
global.gc();
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
this.logger.debug('Maintenance tasks completed');
|
|
682
|
+
} catch (error) {
|
|
683
|
+
this.logger.error('Error during maintenance', error);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
private setupEventHandlers(): void {
|
|
688
|
+
// Handle task lifecycle events
|
|
689
|
+
this.eventBus.on(SystemEvents.TASK_STARTED, (data: unknown) => {
|
|
690
|
+
const { taskId, agentId } = data as { taskId: string; agentId: string };
|
|
691
|
+
const task = this.taskHistory.get(taskId);
|
|
692
|
+
if (task) {
|
|
693
|
+
task.status = 'running';
|
|
694
|
+
task.startedAt = new Date();
|
|
695
|
+
}
|
|
696
|
+
});
|
|
697
|
+
|
|
698
|
+
this.eventBus.on(SystemEvents.TASK_COMPLETED, async (data: unknown) => {
|
|
699
|
+
const { taskId, result } = data as { taskId: string; result: unknown };
|
|
700
|
+
const task = this.taskHistory.get(taskId);
|
|
701
|
+
if (task) {
|
|
702
|
+
task.status = 'completed';
|
|
703
|
+
task.completedAt = new Date();
|
|
704
|
+
if (result !== undefined) {
|
|
705
|
+
task.output = result as Record<string, unknown>;
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
// Update metrics
|
|
709
|
+
this.metrics.completedTasks++;
|
|
710
|
+
if (task.startedAt) {
|
|
711
|
+
this.metrics.totalTaskDuration += task.completedAt.getTime() - task.startedAt.getTime();
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
await this.processTaskQueue();
|
|
716
|
+
});
|
|
717
|
+
|
|
718
|
+
this.eventBus.on(SystemEvents.TASK_FAILED, async (data: unknown) => {
|
|
719
|
+
const { taskId, error } = data as { taskId: string; error: Error };
|
|
720
|
+
const task = this.taskHistory.get(taskId);
|
|
721
|
+
if (task) {
|
|
722
|
+
task.status = 'failed';
|
|
723
|
+
task.completedAt = new Date();
|
|
724
|
+
task.error = error;
|
|
725
|
+
|
|
726
|
+
// Update metrics
|
|
727
|
+
this.metrics.failedTasks++;
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
// Retry or requeue based on configuration
|
|
731
|
+
await this.handleTaskFailure(taskId, error);
|
|
732
|
+
});
|
|
733
|
+
|
|
734
|
+
// Handle agent events
|
|
735
|
+
this.eventBus.on(SystemEvents.AGENT_ERROR, async (data: unknown) => {
|
|
736
|
+
const { agentId, error } = data as { agentId: string; error: Error };
|
|
737
|
+
this.logger.error('Agent error', { agentId, error });
|
|
738
|
+
|
|
739
|
+
// Implement agent recovery
|
|
740
|
+
await this.handleAgentError(agentId, error);
|
|
741
|
+
});
|
|
742
|
+
|
|
743
|
+
this.eventBus.on(SystemEvents.AGENT_IDLE, async (data: unknown) => {
|
|
744
|
+
const { agentId } = data as { agentId: string };
|
|
745
|
+
// Update session status
|
|
746
|
+
const sessions = this.sessionManager.getActiveSessions().filter(
|
|
747
|
+
s => s.agentId === agentId
|
|
748
|
+
);
|
|
749
|
+
sessions.forEach(s => s.status = 'idle');
|
|
750
|
+
|
|
751
|
+
// Try to assign queued tasks
|
|
752
|
+
await this.processTaskQueue();
|
|
753
|
+
});
|
|
754
|
+
|
|
755
|
+
// Handle system events
|
|
756
|
+
this.eventBus.on(SystemEvents.SYSTEM_ERROR, (data: unknown) => {
|
|
757
|
+
const { error, component } = data as { error: Error; component: string };
|
|
758
|
+
this.logger.error('System error', { component, error });
|
|
759
|
+
|
|
760
|
+
// Implement system-level error recovery
|
|
761
|
+
this.handleSystemError(component, error);
|
|
762
|
+
});
|
|
763
|
+
|
|
764
|
+
// Handle resource events
|
|
765
|
+
this.eventBus.on(SystemEvents.DEADLOCK_DETECTED, (data: unknown) => {
|
|
766
|
+
const { agents, resources } = data as { agents: string[]; resources: string[] };
|
|
767
|
+
this.logger.error('Deadlock detected', { agents, resources });
|
|
768
|
+
|
|
769
|
+
// Implement deadlock resolution
|
|
770
|
+
this.resolveDeadlock(agents, resources);
|
|
771
|
+
});
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
private startHealthChecks(): void {
|
|
775
|
+
this.healthCheckInterval = setInterval(async () => {
|
|
776
|
+
try {
|
|
777
|
+
const health = await this.getHealthStatus();
|
|
778
|
+
this.eventBus.emit(SystemEvents.SYSTEM_HEALTHCHECK, { status: health });
|
|
779
|
+
|
|
780
|
+
if (health.status === 'unhealthy') {
|
|
781
|
+
this.logger.warn('System health check failed', health);
|
|
782
|
+
|
|
783
|
+
// Attempt recovery for unhealthy components
|
|
784
|
+
await this.recoverUnhealthyComponents(health);
|
|
785
|
+
}
|
|
786
|
+
} catch (error) {
|
|
787
|
+
this.logger.error('Health check error', error);
|
|
788
|
+
}
|
|
789
|
+
}, this.config.orchestrator.healthCheckInterval);
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
private startMaintenanceTasks(): void {
|
|
793
|
+
this.maintenanceInterval = setInterval(async () => {
|
|
794
|
+
await this.performMaintenance();
|
|
795
|
+
}, this.config.orchestrator.maintenanceInterval || 300000); // 5 minutes default
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
private startMetricsCollection(): void {
|
|
799
|
+
this.metricsInterval = setInterval(async () => {
|
|
800
|
+
try {
|
|
801
|
+
const metrics = await this.getMetrics();
|
|
802
|
+
this.logger.debug('Metrics collected', metrics);
|
|
803
|
+
|
|
804
|
+
// Emit metrics event for monitoring systems
|
|
805
|
+
this.eventBus.emit('metrics:collected', metrics);
|
|
806
|
+
} catch (error) {
|
|
807
|
+
this.logger.error('Metrics collection error', error);
|
|
808
|
+
}
|
|
809
|
+
}, this.config.orchestrator.metricsInterval || 60000); // 1 minute default
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
private stopBackgroundTasks(): void {
|
|
813
|
+
if (this.healthCheckInterval) {
|
|
814
|
+
clearInterval(this.healthCheckInterval);
|
|
815
|
+
}
|
|
816
|
+
if (this.maintenanceInterval) {
|
|
817
|
+
clearInterval(this.maintenanceInterval);
|
|
818
|
+
}
|
|
819
|
+
if (this.metricsInterval) {
|
|
820
|
+
clearInterval(this.metricsInterval);
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
private async shutdownComponents(): Promise<void> {
|
|
825
|
+
const shutdownTasks = [
|
|
826
|
+
this.shutdownComponent('Terminal Manager', () => this.terminalManager.shutdown()),
|
|
827
|
+
this.shutdownComponent('Memory Manager', () => this.memoryManager.shutdown()),
|
|
828
|
+
this.shutdownComponent('Coordination Manager', () => this.coordinationManager.shutdown()),
|
|
829
|
+
this.shutdownComponent('MCP Server', () => this.mcpServer.stop()),
|
|
830
|
+
];
|
|
831
|
+
|
|
832
|
+
const results = await Promise.allSettled(shutdownTasks);
|
|
833
|
+
|
|
834
|
+
// Log any shutdown failures
|
|
835
|
+
results.forEach((result, index) => {
|
|
836
|
+
if (result.status === 'rejected') {
|
|
837
|
+
const componentName = ['Terminal Manager', 'Memory Manager', 'Coordination Manager', 'MCP Server'][index];
|
|
838
|
+
this.logger.error(`Failed to shutdown ${componentName}`, result.reason);
|
|
839
|
+
}
|
|
840
|
+
});
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
private async emergencyShutdown(): Promise<void> {
|
|
844
|
+
this.logger.warn('Performing emergency shutdown');
|
|
845
|
+
|
|
846
|
+
try {
|
|
847
|
+
// Force stop all components
|
|
848
|
+
await Promise.allSettled([
|
|
849
|
+
this.terminalManager.shutdown().catch(() => {}),
|
|
850
|
+
this.memoryManager.shutdown().catch(() => {}),
|
|
851
|
+
this.coordinationManager.shutdown().catch(() => {}),
|
|
852
|
+
this.mcpServer.stop().catch(() => {}),
|
|
853
|
+
]);
|
|
854
|
+
} catch (error) {
|
|
855
|
+
this.logger.error('Emergency shutdown error', error);
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
private async processTaskQueue(): Promise<void> {
|
|
860
|
+
if (this.taskQueue.length === 0) {
|
|
861
|
+
return;
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
const availableAgents = await this.getAvailableAgents();
|
|
865
|
+
|
|
866
|
+
while (this.taskQueue.length > 0 && availableAgents.length > 0) {
|
|
867
|
+
const task = this.taskQueue.shift()!;
|
|
868
|
+
const agent = this.selectAgentForTask(task, availableAgents);
|
|
869
|
+
|
|
870
|
+
if (agent) {
|
|
871
|
+
task.assignedAgent = agent.id;
|
|
872
|
+
task.status = 'assigned';
|
|
873
|
+
|
|
874
|
+
try {
|
|
875
|
+
await this.coordinationManager.assignTask(task, agent.id);
|
|
876
|
+
|
|
877
|
+
this.eventBus.emit(SystemEvents.TASK_ASSIGNED, {
|
|
878
|
+
taskId: task.id,
|
|
879
|
+
agentId: agent.id,
|
|
880
|
+
});
|
|
881
|
+
|
|
882
|
+
// Remove agent from available list
|
|
883
|
+
const index = availableAgents.indexOf(agent);
|
|
884
|
+
availableAgents.splice(index, 1);
|
|
885
|
+
} catch (error) {
|
|
886
|
+
// Put task back in queue
|
|
887
|
+
this.taskQueue.unshift(task);
|
|
888
|
+
this.logger.error('Failed to assign task', { taskId: task.id, error });
|
|
889
|
+
break;
|
|
890
|
+
}
|
|
891
|
+
} else {
|
|
892
|
+
// No suitable agent, put task back
|
|
893
|
+
this.taskQueue.unshift(task);
|
|
894
|
+
break;
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
private async getAvailableAgents(): Promise<AgentProfile[]> {
|
|
900
|
+
const sessions = this.sessionManager.getActiveSessions();
|
|
901
|
+
const available: AgentProfile[] = [];
|
|
902
|
+
|
|
903
|
+
for (const session of sessions) {
|
|
904
|
+
if (session.status === 'idle' || session.status === 'active') {
|
|
905
|
+
const profile = this.agents.get(session.agentId);
|
|
906
|
+
if (profile) {
|
|
907
|
+
try {
|
|
908
|
+
const taskCount = await this.coordinationManager.getAgentTaskCount(profile.id);
|
|
909
|
+
if (taskCount < profile.maxConcurrentTasks) {
|
|
910
|
+
available.push(profile);
|
|
911
|
+
}
|
|
912
|
+
} catch (error) {
|
|
913
|
+
this.logger.error('Failed to get agent task count', { agentId: profile.id, error });
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
return available.sort((a, b) => b.priority - a.priority);
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
private selectAgentForTask(task: Task, agents: AgentProfile[]): AgentProfile | undefined {
|
|
923
|
+
// Score agents based on capabilities, load, and priority
|
|
924
|
+
const scoredAgents = agents.map(agent => {
|
|
925
|
+
let score = agent.priority * 10;
|
|
926
|
+
|
|
927
|
+
// Check capability match
|
|
928
|
+
const requiredCapabilities = (task.metadata?.requiredCapabilities as string[]) || [];
|
|
929
|
+
const matchedCapabilities = requiredCapabilities.filter(
|
|
930
|
+
cap => agent.capabilities.includes(cap)
|
|
931
|
+
).length;
|
|
932
|
+
|
|
933
|
+
if (requiredCapabilities.length > 0 && matchedCapabilities === 0) {
|
|
934
|
+
return { agent, score: -1 }; // Can't handle task
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
score += matchedCapabilities * 5;
|
|
938
|
+
|
|
939
|
+
// Prefer agents with matching type
|
|
940
|
+
if (task.type === agent.type) {
|
|
941
|
+
score += 20;
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
return { agent, score };
|
|
945
|
+
});
|
|
946
|
+
|
|
947
|
+
// Filter out agents that can't handle the task
|
|
948
|
+
const eligibleAgents = scoredAgents.filter(({ score }) => score >= 0);
|
|
949
|
+
|
|
950
|
+
if (eligibleAgents.length === 0) {
|
|
951
|
+
return undefined;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
// Select agent with highest score
|
|
955
|
+
eligibleAgents.sort((a, b) => b.score - a.score);
|
|
956
|
+
return eligibleAgents[0].agent;
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
private async getComponentHealth(
|
|
960
|
+
name: string,
|
|
961
|
+
check: () => Promise<{ healthy: boolean; error?: string; metrics?: Record<string, number> }>,
|
|
962
|
+
): Promise<ComponentHealth> {
|
|
963
|
+
try {
|
|
964
|
+
const result = await Promise.race([
|
|
965
|
+
check(),
|
|
966
|
+
delay(5000).then(() => ({ healthy: false, error: 'Health check timeout' }))
|
|
967
|
+
]);
|
|
968
|
+
|
|
969
|
+
const health: ComponentHealth = {
|
|
970
|
+
name,
|
|
971
|
+
status: result.healthy ? 'healthy' : 'unhealthy',
|
|
972
|
+
lastCheck: new Date(),
|
|
973
|
+
};
|
|
974
|
+
if (result.error !== undefined) {
|
|
975
|
+
health.error = result.error;
|
|
976
|
+
}
|
|
977
|
+
if ('metrics' in result && result.metrics !== undefined) {
|
|
978
|
+
health.metrics = result.metrics;
|
|
979
|
+
}
|
|
980
|
+
return health;
|
|
981
|
+
} catch (error) {
|
|
982
|
+
return {
|
|
983
|
+
name,
|
|
984
|
+
status: 'unhealthy',
|
|
985
|
+
lastCheck: new Date(),
|
|
986
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
987
|
+
};
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
|
|
991
|
+
private processHealthResult(
|
|
992
|
+
result: PromiseSettledResult<ComponentHealth>,
|
|
993
|
+
componentName: string
|
|
994
|
+
): ComponentHealth {
|
|
995
|
+
if (result.status === 'fulfilled') {
|
|
996
|
+
return result.value;
|
|
997
|
+
} else {
|
|
998
|
+
return {
|
|
999
|
+
name: componentName,
|
|
1000
|
+
status: 'unhealthy',
|
|
1001
|
+
lastCheck: new Date(),
|
|
1002
|
+
error: result.reason?.message || 'Health check failed',
|
|
1003
|
+
};
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
private async initializeComponent(name: string, init: () => Promise<void>): Promise<void> {
|
|
1008
|
+
try {
|
|
1009
|
+
await retry(init, { maxAttempts: 3, initialDelay: 2000 });
|
|
1010
|
+
this.logger.info(`${name} initialized`);
|
|
1011
|
+
} catch (error) {
|
|
1012
|
+
this.logger.error(`Failed to initialize ${name}`, error);
|
|
1013
|
+
throw new InitializationError(name, { error });
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
private async shutdownComponent(name: string, shutdown: () => Promise<void>): Promise<void> {
|
|
1018
|
+
try {
|
|
1019
|
+
await Promise.race([
|
|
1020
|
+
shutdown(),
|
|
1021
|
+
delay(10000) // 10 second timeout per component
|
|
1022
|
+
]);
|
|
1023
|
+
this.logger.info(`${name} shut down`);
|
|
1024
|
+
} catch (error) {
|
|
1025
|
+
this.logger.error(`Failed to shutdown ${name}`, error);
|
|
1026
|
+
throw error;
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
private validateAgentProfile(profile: AgentProfile): void {
|
|
1031
|
+
if (!profile.id || !profile.name || !profile.type) {
|
|
1032
|
+
throw new Error('Invalid agent profile: missing required fields');
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
if (profile.maxConcurrentTasks < 1) {
|
|
1036
|
+
throw new Error('Invalid agent profile: maxConcurrentTasks must be at least 1');
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
if (this.agents.has(profile.id)) {
|
|
1040
|
+
throw new Error(`Agent with ID ${profile.id} already exists`);
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
private validateTask(task: Task): void {
|
|
1045
|
+
if (!task.id || !task.type || !task.description) {
|
|
1046
|
+
throw new Error('Invalid task: missing required fields');
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
if (task.priority < 0 || task.priority > 100) {
|
|
1050
|
+
throw new Error('Invalid task: priority must be between 0 and 100');
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
if (this.taskHistory.has(task.id)) {
|
|
1054
|
+
throw new Error(`Task with ID ${task.id} already exists`);
|
|
1055
|
+
}
|
|
1056
|
+
}
|
|
1057
|
+
|
|
1058
|
+
private async handleAgentError(agentId: string, error: Error): Promise<void> {
|
|
1059
|
+
const profile = this.agents.get(agentId);
|
|
1060
|
+
if (!profile) {
|
|
1061
|
+
return;
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
// Log error details
|
|
1065
|
+
this.logger.error('Handling agent error', { agentId, error });
|
|
1066
|
+
|
|
1067
|
+
// Check if agent should be restarted
|
|
1068
|
+
const errorCount = (profile.metadata?.errorCount as number) || 0;
|
|
1069
|
+
profile.metadata = { ...profile.metadata, errorCount: errorCount + 1 };
|
|
1070
|
+
|
|
1071
|
+
if (errorCount < 3) {
|
|
1072
|
+
// Attempt to restart agent
|
|
1073
|
+
try {
|
|
1074
|
+
await this.terminateAgent(agentId);
|
|
1075
|
+
await delay(2000); // Wait before restart
|
|
1076
|
+
await this.spawnAgent({ ...profile, metadata: { ...profile.metadata, errorCount: 0 } });
|
|
1077
|
+
this.logger.info('Agent restarted after error', { agentId });
|
|
1078
|
+
} catch (restartError) {
|
|
1079
|
+
this.logger.error('Failed to restart agent', { agentId, error: restartError });
|
|
1080
|
+
}
|
|
1081
|
+
} else {
|
|
1082
|
+
// Too many errors, terminate agent
|
|
1083
|
+
this.logger.error('Agent exceeded error threshold, terminating', { agentId, errorCount });
|
|
1084
|
+
await this.terminateAgent(agentId);
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
private async handleTaskFailure(taskId: string, error: Error): Promise<void> {
|
|
1089
|
+
const task = this.taskHistory.get(taskId);
|
|
1090
|
+
if (!task) {
|
|
1091
|
+
return;
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
const retryCount = (task.metadata?.retryCount as number) || 0;
|
|
1095
|
+
const maxRetries = this.config.orchestrator.taskMaxRetries || 3;
|
|
1096
|
+
|
|
1097
|
+
if (retryCount < maxRetries) {
|
|
1098
|
+
// Retry task
|
|
1099
|
+
task.metadata = { ...task.metadata, retryCount: retryCount + 1 };
|
|
1100
|
+
task.status = 'queued';
|
|
1101
|
+
delete task.assignedAgent;
|
|
1102
|
+
|
|
1103
|
+
// Add back to queue with delay
|
|
1104
|
+
setTimeout(() => {
|
|
1105
|
+
this.taskQueue.push(task);
|
|
1106
|
+
this.processTaskQueue();
|
|
1107
|
+
}, Math.pow(2, retryCount) * 1000); // Exponential backoff
|
|
1108
|
+
|
|
1109
|
+
this.logger.info('Task queued for retry', { taskId, retryCount: retryCount + 1 });
|
|
1110
|
+
} else {
|
|
1111
|
+
this.logger.error('Task exceeded retry limit', { taskId, retryCount });
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
private handleSystemError(component: string, error: Error): void {
|
|
1116
|
+
// Implement system-level error recovery strategies
|
|
1117
|
+
this.logger.error('Handling system error', { component, error });
|
|
1118
|
+
|
|
1119
|
+
// TODO: Implement specific recovery strategies based on component and error type
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
private async resolveDeadlock(agents: string[], resources: string[]): Promise<void> {
|
|
1123
|
+
this.logger.warn('Resolving deadlock', { agents, resources });
|
|
1124
|
+
|
|
1125
|
+
// Simple deadlock resolution: cancel lowest priority agent's tasks
|
|
1126
|
+
const agentProfiles = agents
|
|
1127
|
+
.map(id => this.agents.get(id))
|
|
1128
|
+
.filter(Boolean) as AgentProfile[];
|
|
1129
|
+
|
|
1130
|
+
if (agentProfiles.length === 0) {
|
|
1131
|
+
return;
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
// Sort by priority (lowest first)
|
|
1135
|
+
agentProfiles.sort((a, b) => a.priority - b.priority);
|
|
1136
|
+
|
|
1137
|
+
// Cancel tasks for lowest priority agent
|
|
1138
|
+
const targetAgent = agentProfiles[0];
|
|
1139
|
+
await this.cancelAgentTasks(targetAgent.id);
|
|
1140
|
+
|
|
1141
|
+
this.logger.info('Deadlock resolved by cancelling tasks', { agentId: targetAgent.id });
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
private async cancelAgentTasks(agentId: string): Promise<void> {
|
|
1145
|
+
try {
|
|
1146
|
+
const tasks = await this.coordinationManager.getAgentTasks(agentId);
|
|
1147
|
+
|
|
1148
|
+
for (const task of tasks) {
|
|
1149
|
+
await this.coordinationManager.cancelTask(task.id);
|
|
1150
|
+
|
|
1151
|
+
// Update task status
|
|
1152
|
+
const trackedTask = this.taskHistory.get(task.id);
|
|
1153
|
+
if (trackedTask) {
|
|
1154
|
+
trackedTask.status = 'cancelled';
|
|
1155
|
+
trackedTask.completedAt = new Date();
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
this.eventBus.emit(SystemEvents.TASK_CANCELLED, {
|
|
1159
|
+
taskId: task.id,
|
|
1160
|
+
reason: 'Agent termination',
|
|
1161
|
+
});
|
|
1162
|
+
}
|
|
1163
|
+
} catch (error) {
|
|
1164
|
+
this.logger.error('Failed to cancel agent tasks', { agentId, error });
|
|
1165
|
+
}
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
private startAgentHealthMonitoring(agentId: string): void {
|
|
1169
|
+
// TODO: Implement periodic health checks for individual agents
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1172
|
+
private async recoverUnhealthyComponents(health: HealthStatus): Promise<void> {
|
|
1173
|
+
for (const [name, component] of Object.entries(health.components)) {
|
|
1174
|
+
if (component.status === 'unhealthy') {
|
|
1175
|
+
this.logger.warn('Attempting to recover unhealthy component', { name });
|
|
1176
|
+
|
|
1177
|
+
// TODO: Implement component-specific recovery strategies
|
|
1178
|
+
switch (name) {
|
|
1179
|
+
case 'Terminal Manager':
|
|
1180
|
+
// Restart terminal pools, etc.
|
|
1181
|
+
break;
|
|
1182
|
+
case 'Memory Manager':
|
|
1183
|
+
// Clear cache, reconnect to backends, etc.
|
|
1184
|
+
break;
|
|
1185
|
+
case 'Coordination Manager':
|
|
1186
|
+
// Reset locks, clear message queues, etc.
|
|
1187
|
+
break;
|
|
1188
|
+
case 'MCP Server':
|
|
1189
|
+
// Restart server, reset connections, etc.
|
|
1190
|
+
break;
|
|
1191
|
+
}
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
private async cleanupTerminatedSessions(): Promise<void> {
|
|
1197
|
+
const allSessions = this.sessionManager.getActiveSessions();
|
|
1198
|
+
const terminatedSessions = allSessions.filter(s => (s as any).status === 'terminated');
|
|
1199
|
+
|
|
1200
|
+
const cutoffTime = Date.now() - (this.config.orchestrator.sessionRetentionMs || 3600000); // 1 hour default
|
|
1201
|
+
|
|
1202
|
+
for (const session of terminatedSessions) {
|
|
1203
|
+
const typedSession = session as any;
|
|
1204
|
+
if (typedSession.endTime && typedSession.endTime.getTime() < cutoffTime) {
|
|
1205
|
+
await this.sessionManager.terminateSession(typedSession.id);
|
|
1206
|
+
this.logger.debug('Cleaned up old session', { sessionId: typedSession.id });
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
}
|
|
1210
|
+
|
|
1211
|
+
private async cleanupTaskHistory(): Promise<void> {
|
|
1212
|
+
const cutoffTime = Date.now() - (this.config.orchestrator.taskHistoryRetentionMs || 86400000); // 24 hours default
|
|
1213
|
+
|
|
1214
|
+
for (const [taskId, task] of this.taskHistory.entries()) {
|
|
1215
|
+
if (task.completedAt && task.completedAt.getTime() < cutoffTime) {
|
|
1216
|
+
this.taskHistory.delete(taskId);
|
|
1217
|
+
this.logger.debug('Cleaned up old task', { taskId });
|
|
1218
|
+
}
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
private async processShutdownTasks(): Promise<void> {
|
|
1223
|
+
// Process any critical tasks before shutdown
|
|
1224
|
+
const criticalTasks = this.taskQueue.filter(
|
|
1225
|
+
t => t.priority >= 90 || t.metadata?.critical === true
|
|
1226
|
+
);
|
|
1227
|
+
|
|
1228
|
+
if (criticalTasks.length > 0) {
|
|
1229
|
+
this.logger.info('Processing critical tasks before shutdown', { count: criticalTasks.length });
|
|
1230
|
+
|
|
1231
|
+
// TODO: Implement critical task processing
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
}
|