@weave_protocol/domere 1.0.17 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,568 @@
1
+ /**
2
+ * Dōmere - Agent Registry
3
+ *
4
+ * Agent lifecycle management, health monitoring, and capability tracking
5
+ * for multi-agent AI orchestration systems.
6
+ */
7
+
8
+ import * as crypto from 'crypto';
9
+
10
+ // =============================================================================
11
+ // Types
12
+ // =============================================================================
13
+
14
+ export type AgentStatus = 'initializing' | 'ready' | 'busy' | 'overloaded' | 'draining' | 'offline' | 'failed';
15
+
16
+ export interface Agent {
17
+ id: string;
18
+ name?: string;
19
+
20
+ // Status
21
+ status: AgentStatus;
22
+ registered_at: Date;
23
+ last_heartbeat: Date;
24
+
25
+ // Capabilities
26
+ capabilities: string[];
27
+ max_concurrent_tasks: number;
28
+ current_tasks: string[]; // Task IDs
29
+
30
+ // Performance
31
+ metrics: AgentMetrics;
32
+
33
+ // Configuration
34
+ config: AgentConfig;
35
+
36
+ // Metadata
37
+ metadata: Record<string, any>;
38
+ }
39
+
40
+ export interface AgentConfig {
41
+ heartbeat_interval_ms: number;
42
+ heartbeat_timeout_ms: number;
43
+ drain_timeout_ms: number;
44
+ auto_recover: boolean;
45
+ }
46
+
47
+ export interface AgentMetrics {
48
+ tasks_completed: number;
49
+ tasks_failed: number;
50
+ total_duration_ms: number;
51
+ avg_duration_ms: number;
52
+ success_rate: number;
53
+ current_load: number; // 0-1
54
+ uptime_ms: number;
55
+ last_task_completed_at?: Date;
56
+ }
57
+
58
+ export interface AgentRegistration {
59
+ agent_id?: string; // Optional, will generate if not provided
60
+ name?: string;
61
+ capabilities: string[];
62
+ max_concurrent_tasks?: number;
63
+ heartbeat_interval_ms?: number;
64
+ metadata?: Record<string, any>;
65
+ }
66
+
67
+ export interface HeartbeatPayload {
68
+ agent_id: string;
69
+ status?: AgentStatus;
70
+ current_tasks?: string[];
71
+ metrics_update?: Partial<AgentMetrics>;
72
+ metadata_update?: Record<string, any>;
73
+ }
74
+
75
+ export interface AgentQuery {
76
+ capabilities?: string[];
77
+ status?: AgentStatus[];
78
+ min_available_slots?: number;
79
+ max_load?: number;
80
+ exclude?: string[];
81
+ }
82
+
83
+ export interface AgentEvent {
84
+ type: 'registered' | 'ready' | 'busy' | 'overloaded' | 'draining' | 'offline' | 'failed' | 'recovered' | 'deregistered';
85
+ agent_id: string;
86
+ timestamp: Date;
87
+ details?: Record<string, any>;
88
+ }
89
+
90
+ // =============================================================================
91
+ // Agent Registry
92
+ // =============================================================================
93
+
94
+ export class AgentRegistry {
95
+ private agents: Map<string, Agent> = new Map();
96
+ private heartbeatTimers: Map<string, NodeJS.Timeout> = new Map();
97
+ private eventCallbacks: Map<string, ((event: AgentEvent) => void)[]> = new Map();
98
+ private globalEventCallbacks: ((event: AgentEvent) => void)[] = [];
99
+
100
+ private defaultConfig: AgentConfig = {
101
+ heartbeat_interval_ms: 5000,
102
+ heartbeat_timeout_ms: 15000,
103
+ drain_timeout_ms: 60000,
104
+ auto_recover: true,
105
+ };
106
+
107
+ constructor(defaultConfig?: Partial<AgentConfig>) {
108
+ if (defaultConfig) {
109
+ this.defaultConfig = { ...this.defaultConfig, ...defaultConfig };
110
+ }
111
+ }
112
+
113
+ /**
114
+ * Register a new agent
115
+ */
116
+ async register(params: AgentRegistration): Promise<Agent> {
117
+ const id = params.agent_id || `agent_${crypto.randomUUID().split('-')[0]}`;
118
+
119
+ if (this.agents.has(id)) {
120
+ throw new Error(`Agent ${id} already registered`);
121
+ }
122
+
123
+ const now = new Date();
124
+
125
+ const agent: Agent = {
126
+ id,
127
+ name: params.name,
128
+
129
+ status: 'initializing',
130
+ registered_at: now,
131
+ last_heartbeat: now,
132
+
133
+ capabilities: params.capabilities,
134
+ max_concurrent_tasks: params.max_concurrent_tasks || 5,
135
+ current_tasks: [],
136
+
137
+ metrics: {
138
+ tasks_completed: 0,
139
+ tasks_failed: 0,
140
+ total_duration_ms: 0,
141
+ avg_duration_ms: 0,
142
+ success_rate: 1,
143
+ current_load: 0,
144
+ uptime_ms: 0,
145
+ },
146
+
147
+ config: {
148
+ ...this.defaultConfig,
149
+ heartbeat_interval_ms: params.heartbeat_interval_ms || this.defaultConfig.heartbeat_interval_ms,
150
+ },
151
+
152
+ metadata: params.metadata || {},
153
+ };
154
+
155
+ this.agents.set(id, agent);
156
+ this.startHeartbeatMonitor(id);
157
+ this.emitEvent({ type: 'registered', agent_id: id, timestamp: now });
158
+
159
+ return agent;
160
+ }
161
+
162
+ /**
163
+ * Mark agent as ready
164
+ */
165
+ async setReady(agentId: string): Promise<void> {
166
+ const agent = this.agents.get(agentId);
167
+ if (!agent) throw new Error(`Agent ${agentId} not found`);
168
+
169
+ agent.status = 'ready';
170
+ this.emitEvent({ type: 'ready', agent_id: agentId, timestamp: new Date() });
171
+ }
172
+
173
+ /**
174
+ * Process heartbeat from agent
175
+ */
176
+ async heartbeat(payload: HeartbeatPayload): Promise<{ acknowledged: boolean; instructions?: string[] }> {
177
+ const agent = this.agents.get(payload.agent_id);
178
+ if (!agent) {
179
+ return { acknowledged: false, instructions: ['re-register'] };
180
+ }
181
+
182
+ const now = new Date();
183
+ const wasOffline = agent.status === 'offline' || agent.status === 'failed';
184
+
185
+ agent.last_heartbeat = now;
186
+
187
+ // Update status
188
+ if (payload.status) {
189
+ agent.status = payload.status;
190
+ }
191
+
192
+ // Update current tasks
193
+ if (payload.current_tasks !== undefined) {
194
+ agent.current_tasks = payload.current_tasks;
195
+ agent.metrics.current_load = agent.current_tasks.length / agent.max_concurrent_tasks;
196
+
197
+ // Auto-update status based on load
198
+ if (agent.status !== 'draining') {
199
+ if (agent.metrics.current_load >= 1) {
200
+ agent.status = 'overloaded';
201
+ } else if (agent.metrics.current_load > 0) {
202
+ agent.status = 'busy';
203
+ } else {
204
+ agent.status = 'ready';
205
+ }
206
+ }
207
+ }
208
+
209
+ // Update metrics
210
+ if (payload.metrics_update) {
211
+ agent.metrics = { ...agent.metrics, ...payload.metrics_update };
212
+ }
213
+
214
+ // Update metadata
215
+ if (payload.metadata_update) {
216
+ agent.metadata = { ...agent.metadata, ...payload.metadata_update };
217
+ }
218
+
219
+ // Calculate uptime
220
+ agent.metrics.uptime_ms = now.getTime() - agent.registered_at.getTime();
221
+
222
+ // Recovery event
223
+ if (wasOffline && agent.config.auto_recover) {
224
+ this.emitEvent({ type: 'recovered', agent_id: payload.agent_id, timestamp: now });
225
+ }
226
+
227
+ // Reset heartbeat timer
228
+ this.resetHeartbeatTimer(payload.agent_id);
229
+
230
+ const instructions: string[] = [];
231
+ if (agent.status === 'draining') {
232
+ instructions.push('drain-tasks');
233
+ }
234
+
235
+ return { acknowledged: true, instructions: instructions.length > 0 ? instructions : undefined };
236
+ }
237
+
238
+ /**
239
+ * Get agent by ID
240
+ */
241
+ getAgent(agentId: string): Agent | undefined {
242
+ return this.agents.get(agentId);
243
+ }
244
+
245
+ /**
246
+ * Get all agents
247
+ */
248
+ getAllAgents(): Agent[] {
249
+ return Array.from(this.agents.values());
250
+ }
251
+
252
+ /**
253
+ * Find agents matching criteria
254
+ */
255
+ findAgents(query: AgentQuery): Agent[] {
256
+ let results = Array.from(this.agents.values());
257
+
258
+ // Filter by status
259
+ if (query.status?.length) {
260
+ results = results.filter(a => query.status!.includes(a.status));
261
+ } else {
262
+ // Default: only ready/busy agents
263
+ results = results.filter(a => a.status === 'ready' || a.status === 'busy');
264
+ }
265
+
266
+ // Filter by capabilities
267
+ if (query.capabilities?.length) {
268
+ results = results.filter(a =>
269
+ query.capabilities!.every(c => a.capabilities.includes(c))
270
+ );
271
+ }
272
+
273
+ // Filter by available slots
274
+ if (query.min_available_slots !== undefined) {
275
+ results = results.filter(a =>
276
+ (a.max_concurrent_tasks - a.current_tasks.length) >= query.min_available_slots!
277
+ );
278
+ }
279
+
280
+ // Filter by load
281
+ if (query.max_load !== undefined) {
282
+ results = results.filter(a => a.metrics.current_load <= query.max_load!);
283
+ }
284
+
285
+ // Exclude specific agents
286
+ if (query.exclude?.length) {
287
+ results = results.filter(a => !query.exclude!.includes(a.id));
288
+ }
289
+
290
+ return results;
291
+ }
292
+
293
+ /**
294
+ * Get best agent for a task
295
+ */
296
+ getBestAgent(query: AgentQuery & { prefer_lowest_load?: boolean; prefer_highest_success?: boolean }): Agent | null {
297
+ let candidates = this.findAgents(query);
298
+
299
+ if (candidates.length === 0) return null;
300
+
301
+ // Sort by preference
302
+ if (query.prefer_lowest_load) {
303
+ candidates.sort((a, b) => a.metrics.current_load - b.metrics.current_load);
304
+ } else if (query.prefer_highest_success) {
305
+ candidates.sort((a, b) => b.metrics.success_rate - a.metrics.success_rate);
306
+ }
307
+
308
+ return candidates[0];
309
+ }
310
+
311
+ /**
312
+ * Assign task to agent
313
+ */
314
+ async assignTask(agentId: string, taskId: string): Promise<boolean> {
315
+ const agent = this.agents.get(agentId);
316
+ if (!agent) return false;
317
+
318
+ if (agent.current_tasks.length >= agent.max_concurrent_tasks) {
319
+ return false;
320
+ }
321
+
322
+ if (agent.status === 'offline' || agent.status === 'failed' || agent.status === 'draining') {
323
+ return false;
324
+ }
325
+
326
+ agent.current_tasks.push(taskId);
327
+ agent.metrics.current_load = agent.current_tasks.length / agent.max_concurrent_tasks;
328
+
329
+ // Update status
330
+ if (agent.metrics.current_load >= 1) {
331
+ agent.status = 'overloaded';
332
+ this.emitEvent({ type: 'overloaded', agent_id: agentId, timestamp: new Date() });
333
+ } else if (agent.status === 'ready') {
334
+ agent.status = 'busy';
335
+ this.emitEvent({ type: 'busy', agent_id: agentId, timestamp: new Date() });
336
+ }
337
+
338
+ return true;
339
+ }
340
+
341
+ /**
342
+ * Complete task for agent
343
+ */
344
+ async completeTask(agentId: string, taskId: string, success: boolean, durationMs: number): Promise<void> {
345
+ const agent = this.agents.get(agentId);
346
+ if (!agent) return;
347
+
348
+ // Remove from current tasks
349
+ const taskIndex = agent.current_tasks.indexOf(taskId);
350
+ if (taskIndex !== -1) {
351
+ agent.current_tasks.splice(taskIndex, 1);
352
+ }
353
+
354
+ // Update metrics
355
+ if (success) {
356
+ agent.metrics.tasks_completed++;
357
+ } else {
358
+ agent.metrics.tasks_failed++;
359
+ }
360
+
361
+ agent.metrics.total_duration_ms += durationMs;
362
+ const totalTasks = agent.metrics.tasks_completed + agent.metrics.tasks_failed;
363
+ agent.metrics.avg_duration_ms = agent.metrics.total_duration_ms / totalTasks;
364
+ agent.metrics.success_rate = agent.metrics.tasks_completed / totalTasks;
365
+ agent.metrics.current_load = agent.current_tasks.length / agent.max_concurrent_tasks;
366
+ agent.metrics.last_task_completed_at = new Date();
367
+
368
+ // Update status
369
+ if (agent.status !== 'draining') {
370
+ if (agent.current_tasks.length === 0) {
371
+ agent.status = 'ready';
372
+ } else if (agent.metrics.current_load < 1) {
373
+ agent.status = 'busy';
374
+ }
375
+ }
376
+ }
377
+
378
+ /**
379
+ * Start draining agent (stop accepting new tasks)
380
+ */
381
+ async drain(agentId: string): Promise<{ drained: boolean; remaining_tasks: number }> {
382
+ const agent = this.agents.get(agentId);
383
+ if (!agent) throw new Error(`Agent ${agentId} not found`);
384
+
385
+ agent.status = 'draining';
386
+ this.emitEvent({ type: 'draining', agent_id: agentId, timestamp: new Date() });
387
+
388
+ return {
389
+ drained: agent.current_tasks.length === 0,
390
+ remaining_tasks: agent.current_tasks.length,
391
+ };
392
+ }
393
+
394
+ /**
395
+ * Deregister agent
396
+ */
397
+ async deregister(agentId: string, force: boolean = false): Promise<{ success: boolean; orphaned_tasks: string[] }> {
398
+ const agent = this.agents.get(agentId);
399
+ if (!agent) throw new Error(`Agent ${agentId} not found`);
400
+
401
+ const orphanedTasks = [...agent.current_tasks];
402
+
403
+ if (!force && orphanedTasks.length > 0) {
404
+ throw new Error(`Agent ${agentId} has ${orphanedTasks.length} active tasks. Use force=true or drain first.`);
405
+ }
406
+
407
+ // Clear heartbeat timer
408
+ const timer = this.heartbeatTimers.get(agentId);
409
+ if (timer) {
410
+ clearTimeout(timer);
411
+ this.heartbeatTimers.delete(agentId);
412
+ }
413
+
414
+ // Remove agent
415
+ this.agents.delete(agentId);
416
+ this.emitEvent({ type: 'deregistered', agent_id: agentId, timestamp: new Date(), details: { orphaned_tasks: orphanedTasks } });
417
+
418
+ return { success: true, orphaned_tasks: orphanedTasks };
419
+ }
420
+
421
+ /**
422
+ * Get agent statistics
423
+ */
424
+ getStats(): {
425
+ total_agents: number;
426
+ by_status: Record<AgentStatus, number>;
427
+ total_capacity: number;
428
+ total_load: number;
429
+ avg_success_rate: number;
430
+ } {
431
+ const agents = Array.from(this.agents.values());
432
+
433
+ const byStatus: Record<AgentStatus, number> = {
434
+ initializing: 0, ready: 0, busy: 0, overloaded: 0, draining: 0, offline: 0, failed: 0
435
+ };
436
+
437
+ let totalCapacity = 0;
438
+ let totalCurrentTasks = 0;
439
+ let totalSuccessRate = 0;
440
+ let agentsWithTasks = 0;
441
+
442
+ for (const agent of agents) {
443
+ byStatus[agent.status]++;
444
+ totalCapacity += agent.max_concurrent_tasks;
445
+ totalCurrentTasks += agent.current_tasks.length;
446
+
447
+ if (agent.metrics.tasks_completed + agent.metrics.tasks_failed > 0) {
448
+ totalSuccessRate += agent.metrics.success_rate;
449
+ agentsWithTasks++;
450
+ }
451
+ }
452
+
453
+ return {
454
+ total_agents: agents.length,
455
+ by_status: byStatus,
456
+ total_capacity: totalCapacity,
457
+ total_load: totalCapacity > 0 ? totalCurrentTasks / totalCapacity : 0,
458
+ avg_success_rate: agentsWithTasks > 0 ? totalSuccessRate / agentsWithTasks : 1,
459
+ };
460
+ }
461
+
462
+ /**
463
+ * Subscribe to agent events
464
+ */
465
+ onAgentEvent(agentId: string, callback: (event: AgentEvent) => void): () => void {
466
+ const callbacks = this.eventCallbacks.get(agentId) || [];
467
+ callbacks.push(callback);
468
+ this.eventCallbacks.set(agentId, callbacks);
469
+
470
+ return () => {
471
+ const cbs = this.eventCallbacks.get(agentId) || [];
472
+ const index = cbs.indexOf(callback);
473
+ if (index !== -1) cbs.splice(index, 1);
474
+ };
475
+ }
476
+
477
+ /**
478
+ * Subscribe to all agent events
479
+ */
480
+ onAnyAgentEvent(callback: (event: AgentEvent) => void): () => void {
481
+ this.globalEventCallbacks.push(callback);
482
+
483
+ return () => {
484
+ const index = this.globalEventCallbacks.indexOf(callback);
485
+ if (index !== -1) this.globalEventCallbacks.splice(index, 1);
486
+ };
487
+ }
488
+
489
+ /**
490
+ * Subscribe to agent going offline/failed
491
+ */
492
+ onAgentDown(callback: (agent: Agent, activeTasks: string[]) => void): () => void {
493
+ return this.onAnyAgentEvent((event) => {
494
+ if (event.type === 'offline' || event.type === 'failed') {
495
+ const agent = this.agents.get(event.agent_id);
496
+ if (agent) {
497
+ callback(agent, [...agent.current_tasks]);
498
+ }
499
+ }
500
+ });
501
+ }
502
+
503
+ // ===========================================================================
504
+ // Private Methods
505
+ // ===========================================================================
506
+
507
+ private startHeartbeatMonitor(agentId: string): void {
508
+ const agent = this.agents.get(agentId);
509
+ if (!agent) return;
510
+
511
+ const timer = setTimeout(() => {
512
+ this.handleHeartbeatTimeout(agentId);
513
+ }, agent.config.heartbeat_timeout_ms);
514
+
515
+ this.heartbeatTimers.set(agentId, timer);
516
+ }
517
+
518
+ private resetHeartbeatTimer(agentId: string): void {
519
+ const existing = this.heartbeatTimers.get(agentId);
520
+ if (existing) {
521
+ clearTimeout(existing);
522
+ }
523
+ this.startHeartbeatMonitor(agentId);
524
+ }
525
+
526
+ private handleHeartbeatTimeout(agentId: string): void {
527
+ const agent = this.agents.get(agentId);
528
+ if (!agent) return;
529
+
530
+ const wasOnline = agent.status !== 'offline' && agent.status !== 'failed';
531
+
532
+ if (wasOnline) {
533
+ agent.status = 'offline';
534
+ this.emitEvent({
535
+ type: 'offline',
536
+ agent_id: agentId,
537
+ timestamp: new Date(),
538
+ details: { last_heartbeat: agent.last_heartbeat }
539
+ });
540
+ }
541
+
542
+ // Continue monitoring in case agent recovers
543
+ this.startHeartbeatMonitor(agentId);
544
+ }
545
+
546
+ private emitEvent(event: AgentEvent): void {
547
+ // Agent-specific callbacks
548
+ const callbacks = this.eventCallbacks.get(event.agent_id) || [];
549
+ for (const cb of callbacks) {
550
+ try {
551
+ cb(event);
552
+ } catch (e) {
553
+ // Ignore
554
+ }
555
+ }
556
+
557
+ // Global callbacks
558
+ for (const cb of this.globalEventCallbacks) {
559
+ try {
560
+ cb(event);
561
+ } catch (e) {
562
+ // Ignore
563
+ }
564
+ }
565
+ }
566
+ }
567
+
568
+ export default AgentRegistry;