@agenticmail/enterprise 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/ARCHITECTURE.md +183 -0
  2. package/agenticmail-enterprise.db +0 -0
  3. package/dashboards/README.md +120 -0
  4. package/dashboards/dotnet/Program.cs +261 -0
  5. package/dashboards/express/app.js +146 -0
  6. package/dashboards/go/main.go +513 -0
  7. package/dashboards/html/index.html +535 -0
  8. package/dashboards/java/AgenticMailDashboard.java +376 -0
  9. package/dashboards/php/index.php +414 -0
  10. package/dashboards/python/app.py +273 -0
  11. package/dashboards/ruby/app.rb +195 -0
  12. package/dist/chunk-77IDQJL3.js +7 -0
  13. package/dist/chunk-7RGCCHIT.js +115 -0
  14. package/dist/chunk-DXNKR3TG.js +1355 -0
  15. package/dist/chunk-IQWA44WT.js +970 -0
  16. package/dist/chunk-LCUZGIDH.js +965 -0
  17. package/dist/chunk-N2JVTNNJ.js +2553 -0
  18. package/dist/chunk-O462UJBH.js +363 -0
  19. package/dist/chunk-PNKVD2UK.js +26 -0
  20. package/dist/cli.js +218 -0
  21. package/dist/dashboard/index.html +558 -0
  22. package/dist/db-adapter-DEWEFNIV.js +7 -0
  23. package/dist/dynamodb-CCGL2E77.js +426 -0
  24. package/dist/engine/index.js +1261 -0
  25. package/dist/index.js +522 -0
  26. package/dist/mongodb-ODTXIVPV.js +319 -0
  27. package/dist/mysql-RM3S2FV5.js +521 -0
  28. package/dist/postgres-LN7A6MGQ.js +518 -0
  29. package/dist/routes-2JEPIIKC.js +441 -0
  30. package/dist/routes-74ZLKJKP.js +399 -0
  31. package/dist/server.js +7 -0
  32. package/dist/sqlite-3K5YOZ4K.js +439 -0
  33. package/dist/turso-LDWODSDI.js +442 -0
  34. package/package.json +49 -0
  35. package/src/admin/routes.ts +331 -0
  36. package/src/auth/routes.ts +130 -0
  37. package/src/cli.ts +260 -0
  38. package/src/dashboard/index.html +558 -0
  39. package/src/db/adapter.ts +230 -0
  40. package/src/db/dynamodb.ts +456 -0
  41. package/src/db/factory.ts +51 -0
  42. package/src/db/mongodb.ts +360 -0
  43. package/src/db/mysql.ts +472 -0
  44. package/src/db/postgres.ts +479 -0
  45. package/src/db/sql-schema.ts +123 -0
  46. package/src/db/sqlite.ts +391 -0
  47. package/src/db/turso.ts +411 -0
  48. package/src/deploy/fly.ts +368 -0
  49. package/src/deploy/managed.ts +213 -0
  50. package/src/engine/activity.ts +474 -0
  51. package/src/engine/agent-config.ts +429 -0
  52. package/src/engine/agenticmail-bridge.ts +296 -0
  53. package/src/engine/approvals.ts +278 -0
  54. package/src/engine/db-adapter.ts +682 -0
  55. package/src/engine/db-schema.ts +335 -0
  56. package/src/engine/deployer.ts +595 -0
  57. package/src/engine/index.ts +134 -0
  58. package/src/engine/knowledge.ts +486 -0
  59. package/src/engine/lifecycle.ts +635 -0
  60. package/src/engine/openclaw-hook.ts +371 -0
  61. package/src/engine/routes.ts +528 -0
  62. package/src/engine/skills.ts +473 -0
  63. package/src/engine/tenant.ts +345 -0
  64. package/src/engine/tool-catalog.ts +189 -0
  65. package/src/index.ts +64 -0
  66. package/src/lib/resilience.ts +326 -0
  67. package/src/middleware/index.ts +286 -0
  68. package/src/server.ts +310 -0
  69. package/tsconfig.json +14 -0
@@ -0,0 +1,635 @@
1
+ /**
2
+ * Agent Lifecycle Manager
3
+ *
4
+ * Manages the full lifecycle of an autonomous AI agent employee:
5
+ * create → configure → deploy → running → monitor → update → stop
6
+ *
7
+ * This is the core state machine. Every agent goes through these states
8
+ * and the manager handles transitions, health checks, auto-recovery,
9
+ * and status tracking.
10
+ */
11
+
12
+ import type { DatabaseAdapter } from '../db/adapter.js';
13
+ import type { AgentConfig, DeploymentStatus } from './agent-config.js';
14
+ import { AgentConfigGenerator } from './agent-config.js';
15
+ import { DeploymentEngine } from './deployer.js';
16
+ import { PermissionEngine } from './skills.js';
17
+
18
+ // ─── Types ──────────────────────────────────────────────
19
+
20
+ export type AgentState =
21
+ | 'draft' // Created but not configured
22
+ | 'configuring' // Skills/permissions/identity being set up
23
+ | 'ready' // Fully configured, waiting for deploy
24
+ | 'provisioning' // Infrastructure being created
25
+ | 'deploying' // Code/config being pushed
26
+ | 'starting' // Container/process starting up
27
+ | 'running' // Active and healthy
28
+ | 'degraded' // Running but with issues
29
+ | 'stopped' // Intentionally stopped
30
+ | 'error' // Failed — needs attention
31
+ | 'updating' // Config/code update in progress
32
+ | 'destroying'; // Being torn down
33
+
34
+ export interface ManagedAgent {
35
+ id: string;
36
+ orgId: string; // Which company owns this agent
37
+ config: AgentConfig;
38
+ state: AgentState;
39
+ stateHistory: StateTransition[];
40
+ health: AgentHealth;
41
+ usage: AgentUsage;
42
+ createdAt: string;
43
+ updatedAt: string;
44
+ lastDeployedAt?: string;
45
+ lastHealthCheckAt?: string;
46
+ version: number; // Config version for optimistic locking
47
+ }
48
+
49
+ export interface StateTransition {
50
+ from: AgentState;
51
+ to: AgentState;
52
+ reason: string;
53
+ triggeredBy: string; // User ID or 'system'
54
+ timestamp: string;
55
+ error?: string;
56
+ }
57
+
58
+ export interface AgentHealth {
59
+ status: 'healthy' | 'degraded' | 'unhealthy' | 'unknown';
60
+ lastCheck: string;
61
+ uptime: number; // Seconds since last start
62
+ consecutiveFailures: number;
63
+ checks: HealthCheck[];
64
+ }
65
+
66
+ export interface HealthCheck {
67
+ name: string;
68
+ status: 'pass' | 'fail' | 'warn';
69
+ message?: string;
70
+ timestamp: string;
71
+ durationMs: number;
72
+ }
73
+
74
+ export interface AgentUsage {
75
+ // Token usage
76
+ tokensToday: number;
77
+ tokensThisMonth: number;
78
+ tokenBudgetMonthly: number; // 0 = unlimited
79
+
80
+ // Tool calls
81
+ toolCallsToday: number;
82
+ toolCallsThisMonth: number;
83
+
84
+ // External actions (emails sent, messages, etc.)
85
+ externalActionsToday: number;
86
+ externalActionsThisMonth: number;
87
+
88
+ // Cost estimate (USD)
89
+ costToday: number;
90
+ costThisMonth: number;
91
+ costBudgetMonthly: number; // 0 = unlimited
92
+
93
+ // Sessions
94
+ activeSessionCount: number;
95
+ totalSessionsToday: number;
96
+
97
+ // Errors
98
+ errorsToday: number;
99
+ errorRate1h: number; // Errors per hour in last hour
100
+
101
+ lastUpdated: string;
102
+ }
103
+
104
+ export interface LifecycleEvent {
105
+ id: string;
106
+ agentId: string;
107
+ orgId: string;
108
+ type: LifecycleEventType;
109
+ data: Record<string, any>;
110
+ timestamp: string;
111
+ }
112
+
113
+ export type LifecycleEventType =
114
+ | 'created'
115
+ | 'configured'
116
+ | 'deployed'
117
+ | 'started'
118
+ | 'stopped'
119
+ | 'restarted'
120
+ | 'updated'
121
+ | 'error'
122
+ | 'health_check'
123
+ | 'auto_recovered'
124
+ | 'budget_warning'
125
+ | 'budget_exceeded'
126
+ | 'tool_call'
127
+ | 'approval_requested'
128
+ | 'approval_decided'
129
+ | 'destroyed';
130
+
131
+ // ─── Lifecycle Manager ──────────────────────────────────
132
+
133
+ export class AgentLifecycleManager {
134
+ private agents = new Map<string, ManagedAgent>();
135
+ private healthCheckIntervals = new Map<string, NodeJS.Timeout>();
136
+ private deployer = new DeploymentEngine();
137
+ private configGen = new AgentConfigGenerator();
138
+ private permissions: PermissionEngine;
139
+ private db?: DatabaseAdapter;
140
+ private eventListeners: ((event: LifecycleEvent) => void)[] = [];
141
+
142
+ constructor(opts?: { db?: DatabaseAdapter; permissions?: PermissionEngine }) {
143
+ this.db = opts?.db;
144
+ this.permissions = opts?.permissions || new PermissionEngine();
145
+ }
146
+
147
+ // ─── Agent CRUD ─────────────────────────────────────
148
+
149
+ /**
150
+ * Create a new managed agent (starts in 'draft' state)
151
+ */
152
+ async createAgent(orgId: string, config: AgentConfig, createdBy: string): Promise<ManagedAgent> {
153
+ const agent: ManagedAgent = {
154
+ id: config.id || crypto.randomUUID(),
155
+ orgId,
156
+ config,
157
+ state: 'draft',
158
+ stateHistory: [],
159
+ health: {
160
+ status: 'unknown',
161
+ lastCheck: new Date().toISOString(),
162
+ uptime: 0,
163
+ consecutiveFailures: 0,
164
+ checks: [],
165
+ },
166
+ usage: this.emptyUsage(),
167
+ createdAt: new Date().toISOString(),
168
+ updatedAt: new Date().toISOString(),
169
+ version: 1,
170
+ };
171
+
172
+ this.agents.set(agent.id, agent);
173
+ await this.persistAgent(agent);
174
+ this.emitEvent(agent, 'created', { createdBy });
175
+
176
+ return agent;
177
+ }
178
+
179
+ /**
180
+ * Update agent configuration (must be in draft, ready, stopped, or error state)
181
+ */
182
+ async updateConfig(agentId: string, updates: Partial<AgentConfig>, updatedBy: string): Promise<ManagedAgent> {
183
+ const agent = this.getAgent(agentId);
184
+ if (!agent) throw new Error(`Agent ${agentId} not found`);
185
+
186
+ const mutableStates: AgentState[] = ['draft', 'ready', 'stopped', 'error'];
187
+ if (!mutableStates.includes(agent.state)) {
188
+ throw new Error(`Cannot update config in state "${agent.state}". Stop the agent first.`);
189
+ }
190
+
191
+ // Merge updates
192
+ agent.config = { ...agent.config, ...updates, updatedAt: new Date().toISOString() };
193
+ agent.updatedAt = new Date().toISOString();
194
+ agent.version++;
195
+
196
+ // If all required fields are set, transition to 'ready'
197
+ if (agent.state === 'draft' && this.isConfigComplete(agent.config)) {
198
+ this.transition(agent, 'ready', 'Configuration complete', updatedBy);
199
+ } else if (agent.state !== 'draft') {
200
+ this.transition(agent, 'ready', 'Configuration updated', updatedBy);
201
+ }
202
+
203
+ await this.persistAgent(agent);
204
+ this.emitEvent(agent, 'configured', { updatedBy, changes: Object.keys(updates) });
205
+
206
+ return agent;
207
+ }
208
+
209
+ /**
210
+ * Deploy an agent to its target environment
211
+ */
212
+ async deploy(agentId: string, deployedBy: string): Promise<ManagedAgent> {
213
+ const agent = this.getAgent(agentId);
214
+ if (!agent) throw new Error(`Agent ${agentId} not found`);
215
+
216
+ if (!['ready', 'stopped', 'error'].includes(agent.state)) {
217
+ throw new Error(`Cannot deploy from state "${agent.state}"`);
218
+ }
219
+
220
+ if (!this.isConfigComplete(agent.config)) {
221
+ throw new Error('Agent configuration is incomplete');
222
+ }
223
+
224
+ // Transition: provisioning
225
+ this.transition(agent, 'provisioning', 'Deployment initiated', deployedBy);
226
+ await this.persistAgent(agent);
227
+
228
+ try {
229
+ // Run deployment
230
+ this.transition(agent, 'deploying', 'Pushing configuration', 'system');
231
+
232
+ const result = await this.deployer.deploy(agent.config, (event) => {
233
+ this.emitEvent(agent, 'deployed', { phase: event.phase, status: event.status, message: event.message });
234
+ });
235
+
236
+ if (result.success) {
237
+ this.transition(agent, 'starting', 'Deployment successful, agent starting', 'system');
238
+ agent.lastDeployedAt = new Date().toISOString();
239
+
240
+ // Wait for agent to be healthy
241
+ const healthy = await this.waitForHealthy(agent, 60_000);
242
+ if (healthy) {
243
+ this.transition(agent, 'running', 'Agent is healthy and running', 'system');
244
+ this.startHealthCheckLoop(agent);
245
+ } else {
246
+ this.transition(agent, 'degraded', 'Agent started but health check failed', 'system');
247
+ this.startHealthCheckLoop(agent);
248
+ }
249
+ } else {
250
+ this.transition(agent, 'error', `Deployment failed: ${result.error}`, 'system');
251
+ }
252
+
253
+ await this.persistAgent(agent);
254
+ return agent;
255
+
256
+ } catch (error: any) {
257
+ this.transition(agent, 'error', `Deployment error: ${error.message}`, 'system');
258
+ await this.persistAgent(agent);
259
+ throw error;
260
+ }
261
+ }
262
+
263
+ /**
264
+ * Stop a running agent
265
+ */
266
+ async stop(agentId: string, stoppedBy: string, reason?: string): Promise<ManagedAgent> {
267
+ const agent = this.getAgent(agentId);
268
+ if (!agent) throw new Error(`Agent ${agentId} not found`);
269
+
270
+ if (!['running', 'degraded', 'starting', 'error'].includes(agent.state)) {
271
+ throw new Error(`Cannot stop from state "${agent.state}"`);
272
+ }
273
+
274
+ this.stopHealthCheckLoop(agentId);
275
+
276
+ try {
277
+ await this.deployer.stop(agent.config);
278
+ this.transition(agent, 'stopped', reason || 'Stopped by user', stoppedBy);
279
+ } catch (error: any) {
280
+ this.transition(agent, 'stopped', `Stopped with error: ${error.message}`, stoppedBy);
281
+ }
282
+
283
+ await this.persistAgent(agent);
284
+ this.emitEvent(agent, 'stopped', { stoppedBy, reason });
285
+ return agent;
286
+ }
287
+
288
+ /**
289
+ * Restart a running agent
290
+ */
291
+ async restart(agentId: string, restartedBy: string): Promise<ManagedAgent> {
292
+ const agent = this.getAgent(agentId);
293
+ if (!agent) throw new Error(`Agent ${agentId} not found`);
294
+
295
+ this.transition(agent, 'updating', 'Restarting', restartedBy);
296
+
297
+ try {
298
+ await this.deployer.restart(agent.config);
299
+ const healthy = await this.waitForHealthy(agent, 30_000);
300
+ this.transition(agent, healthy ? 'running' : 'degraded', 'Restarted', 'system');
301
+ } catch (error: any) {
302
+ this.transition(agent, 'error', `Restart failed: ${error.message}`, 'system');
303
+ }
304
+
305
+ await this.persistAgent(agent);
306
+ this.emitEvent(agent, 'restarted', { restartedBy });
307
+ return agent;
308
+ }
309
+
310
+ /**
311
+ * Hot-update config on a running agent (no full redeploy)
312
+ */
313
+ async hotUpdate(agentId: string, updates: Partial<AgentConfig>, updatedBy: string): Promise<ManagedAgent> {
314
+ const agent = this.getAgent(agentId);
315
+ if (!agent) throw new Error(`Agent ${agentId} not found`);
316
+
317
+ if (agent.state !== 'running' && agent.state !== 'degraded') {
318
+ throw new Error(`Hot update only works on running agents (current: "${agent.state}")`);
319
+ }
320
+
321
+ const prevState = agent.state;
322
+ this.transition(agent, 'updating', 'Hot config update', updatedBy);
323
+
324
+ // Merge config
325
+ agent.config = { ...agent.config, ...updates, updatedAt: new Date().toISOString() };
326
+ agent.version++;
327
+
328
+ try {
329
+ await this.deployer.updateConfig(agent.config);
330
+ this.transition(agent, prevState, 'Config updated successfully', 'system');
331
+ } catch (error: any) {
332
+ this.transition(agent, 'degraded', `Config update failed: ${error.message}`, 'system');
333
+ }
334
+
335
+ await this.persistAgent(agent);
336
+ this.emitEvent(agent, 'updated', { updatedBy, hotUpdate: true });
337
+ return agent;
338
+ }
339
+
340
+ /**
341
+ * Destroy an agent completely (stop + delete all resources)
342
+ */
343
+ async destroy(agentId: string, destroyedBy: string): Promise<void> {
344
+ const agent = this.getAgent(agentId);
345
+ if (!agent) throw new Error(`Agent ${agentId} not found`);
346
+
347
+ this.transition(agent, 'destroying', 'Agent being destroyed', destroyedBy);
348
+ this.stopHealthCheckLoop(agentId);
349
+
350
+ // Stop if running
351
+ if (['running', 'degraded', 'starting'].includes(agent.state)) {
352
+ try { await this.deployer.stop(agent.config); } catch { /* best effort */ }
353
+ }
354
+
355
+ this.emitEvent(agent, 'destroyed', { destroyedBy });
356
+ this.agents.delete(agentId);
357
+ // DB cleanup would happen here
358
+ }
359
+
360
+ // ─── Monitoring ─────────────────────────────────────
361
+
362
+ /**
363
+ * Record a tool call for usage tracking
364
+ */
365
+ recordToolCall(agentId: string, toolId: string, opts?: {
366
+ tokensUsed?: number;
367
+ costUsd?: number;
368
+ isExternalAction?: boolean;
369
+ error?: boolean;
370
+ }) {
371
+ const agent = this.agents.get(agentId);
372
+ if (!agent) return;
373
+
374
+ const usage = agent.usage;
375
+ usage.toolCallsToday++;
376
+ usage.toolCallsThisMonth++;
377
+ if (opts?.tokensUsed) {
378
+ usage.tokensToday += opts.tokensUsed;
379
+ usage.tokensThisMonth += opts.tokensUsed;
380
+ }
381
+ if (opts?.costUsd) {
382
+ usage.costToday += opts.costUsd;
383
+ usage.costThisMonth += opts.costUsd;
384
+ }
385
+ if (opts?.isExternalAction) {
386
+ usage.externalActionsToday++;
387
+ usage.externalActionsThisMonth++;
388
+ }
389
+ if (opts?.error) {
390
+ usage.errorsToday++;
391
+ }
392
+ usage.lastUpdated = new Date().toISOString();
393
+
394
+ // Budget checks
395
+ if (usage.tokenBudgetMonthly > 0 && usage.tokensThisMonth >= usage.tokenBudgetMonthly) {
396
+ this.emitEvent(agent, 'budget_exceeded', { type: 'tokens', used: usage.tokensThisMonth, budget: usage.tokenBudgetMonthly });
397
+ // Auto-stop on budget exceeded
398
+ this.stop(agentId, 'system', 'Monthly token budget exceeded').catch(() => {});
399
+ } else if (usage.tokenBudgetMonthly > 0 && usage.tokensThisMonth >= usage.tokenBudgetMonthly * 0.8) {
400
+ this.emitEvent(agent, 'budget_warning', { type: 'tokens', used: usage.tokensThisMonth, budget: usage.tokenBudgetMonthly, percent: 80 });
401
+ }
402
+
403
+ if (usage.costBudgetMonthly > 0 && usage.costThisMonth >= usage.costBudgetMonthly) {
404
+ this.emitEvent(agent, 'budget_exceeded', { type: 'cost', used: usage.costThisMonth, budget: usage.costBudgetMonthly });
405
+ this.stop(agentId, 'system', 'Monthly cost budget exceeded').catch(() => {});
406
+ }
407
+
408
+ this.emitEvent(agent, 'tool_call', { toolId, ...opts });
409
+ }
410
+
411
+ /**
412
+ * Get all agents for an org
413
+ */
414
+ getAgentsByOrg(orgId: string): ManagedAgent[] {
415
+ return Array.from(this.agents.values()).filter(a => a.orgId === orgId);
416
+ }
417
+
418
+ /**
419
+ * Get a single agent
420
+ */
421
+ getAgent(agentId: string): ManagedAgent | undefined {
422
+ return this.agents.get(agentId);
423
+ }
424
+
425
+ /**
426
+ * Get org-wide usage summary
427
+ */
428
+ getOrgUsage(orgId: string): {
429
+ totalAgents: number;
430
+ runningAgents: number;
431
+ totalTokensToday: number;
432
+ totalCostToday: number;
433
+ totalToolCallsToday: number;
434
+ totalErrorsToday: number;
435
+ agents: { id: string; name: string; state: AgentState; usage: AgentUsage }[];
436
+ } {
437
+ const agents = this.getAgentsByOrg(orgId);
438
+ return {
439
+ totalAgents: agents.length,
440
+ runningAgents: agents.filter(a => a.state === 'running').length,
441
+ totalTokensToday: agents.reduce((sum, a) => sum + a.usage.tokensToday, 0),
442
+ totalCostToday: agents.reduce((sum, a) => sum + a.usage.costToday, 0),
443
+ totalToolCallsToday: agents.reduce((sum, a) => sum + a.usage.toolCallsToday, 0),
444
+ totalErrorsToday: agents.reduce((sum, a) => sum + a.usage.errorsToday, 0),
445
+ agents: agents.map(a => ({ id: a.id, name: a.config.displayName, state: a.state, usage: a.usage })),
446
+ };
447
+ }
448
+
449
+ /**
450
+ * Subscribe to lifecycle events (for dashboard real-time updates)
451
+ */
452
+ onEvent(listener: (event: LifecycleEvent) => void): () => void {
453
+ this.eventListeners.push(listener);
454
+ return () => { this.eventListeners = this.eventListeners.filter(l => l !== listener); };
455
+ }
456
+
457
+ /**
458
+ * Reset daily counters (call at midnight via cron)
459
+ */
460
+ resetDailyCounters() {
461
+ for (const agent of this.agents.values()) {
462
+ agent.usage.tokensToday = 0;
463
+ agent.usage.toolCallsToday = 0;
464
+ agent.usage.externalActionsToday = 0;
465
+ agent.usage.costToday = 0;
466
+ agent.usage.errorsToday = 0;
467
+ agent.usage.totalSessionsToday = 0;
468
+ }
469
+ }
470
+
471
+ /**
472
+ * Reset monthly counters (call on 1st of month)
473
+ */
474
+ resetMonthlyCounters() {
475
+ for (const agent of this.agents.values()) {
476
+ agent.usage.tokensThisMonth = 0;
477
+ agent.usage.toolCallsThisMonth = 0;
478
+ agent.usage.externalActionsThisMonth = 0;
479
+ agent.usage.costThisMonth = 0;
480
+ }
481
+ }
482
+
483
+ // ─── Health Check Loop ────────────────────────────────
484
+
485
+ private startHealthCheckLoop(agent: ManagedAgent) {
486
+ this.stopHealthCheckLoop(agent.id);
487
+
488
+ const interval = setInterval(async () => {
489
+ try {
490
+ const status = await this.deployer.getStatus(agent.config);
491
+ agent.lastHealthCheckAt = new Date().toISOString();
492
+
493
+ const check: HealthCheck = {
494
+ name: 'deployment_status',
495
+ status: status.status === 'running' ? 'pass' : 'fail',
496
+ message: `Status: ${status.status}, Health: ${status.healthStatus}`,
497
+ timestamp: new Date().toISOString(),
498
+ durationMs: 0,
499
+ };
500
+
501
+ // Keep last 10 checks
502
+ agent.health.checks = [check, ...agent.health.checks].slice(0, 10);
503
+
504
+ if (status.status === 'running' && status.healthStatus === 'healthy') {
505
+ agent.health.status = 'healthy';
506
+ agent.health.consecutiveFailures = 0;
507
+ if (status.uptime) agent.health.uptime = status.uptime;
508
+ if (status.metrics) {
509
+ agent.usage.activeSessionCount = status.metrics.activeSessionCount;
510
+ }
511
+ // Recover from degraded
512
+ if (agent.state === 'degraded') {
513
+ this.transition(agent, 'running', 'Health restored', 'system');
514
+ this.emitEvent(agent, 'auto_recovered', {});
515
+ }
516
+ } else {
517
+ agent.health.consecutiveFailures++;
518
+ agent.health.status = agent.health.consecutiveFailures >= 3 ? 'unhealthy' : 'degraded';
519
+
520
+ if (agent.state === 'running' && agent.health.consecutiveFailures >= 2) {
521
+ this.transition(agent, 'degraded', `Health degraded: ${agent.health.consecutiveFailures} consecutive failures`, 'system');
522
+ }
523
+
524
+ // Auto-restart after 5 consecutive failures
525
+ if (agent.health.consecutiveFailures >= 5 && agent.state !== 'error') {
526
+ this.emitEvent(agent, 'auto_recovered', { action: 'restart', failures: agent.health.consecutiveFailures });
527
+ agent.health.consecutiveFailures = 0;
528
+ try {
529
+ await this.deployer.restart(agent.config);
530
+ this.transition(agent, 'starting', 'Auto-restarted after health failures', 'system');
531
+ } catch {
532
+ this.transition(agent, 'error', 'Auto-restart failed', 'system');
533
+ }
534
+ }
535
+ }
536
+
537
+ agent.health.lastCheck = new Date().toISOString();
538
+ await this.persistAgent(agent);
539
+
540
+ } catch (error: any) {
541
+ agent.health.consecutiveFailures++;
542
+ agent.health.status = 'unhealthy';
543
+ }
544
+ }, 30_000); // Every 30 seconds
545
+
546
+ this.healthCheckIntervals.set(agent.id, interval);
547
+ }
548
+
549
+ private stopHealthCheckLoop(agentId: string) {
550
+ const interval = this.healthCheckIntervals.get(agentId);
551
+ if (interval) {
552
+ clearInterval(interval);
553
+ this.healthCheckIntervals.delete(agentId);
554
+ }
555
+ }
556
+
557
+ // ─── Private Helpers ──────────────────────────────────
558
+
559
+ private transition(agent: ManagedAgent, to: AgentState, reason: string, triggeredBy: string) {
560
+ const from = agent.state;
561
+ agent.stateHistory.push({
562
+ from, to, reason, triggeredBy,
563
+ timestamp: new Date().toISOString(),
564
+ });
565
+ // Keep last 50 transitions
566
+ if (agent.stateHistory.length > 50) agent.stateHistory = agent.stateHistory.slice(-50);
567
+ agent.state = to;
568
+ agent.updatedAt = new Date().toISOString();
569
+ }
570
+
571
+ private isConfigComplete(config: AgentConfig): boolean {
572
+ return !!(
573
+ config.name &&
574
+ config.displayName &&
575
+ config.identity?.role &&
576
+ config.model?.modelId &&
577
+ config.deployment?.target &&
578
+ config.permissionProfileId
579
+ );
580
+ }
581
+
582
+ private async waitForHealthy(agent: ManagedAgent, timeoutMs: number): Promise<boolean> {
583
+ const start = Date.now();
584
+ while (Date.now() - start < timeoutMs) {
585
+ try {
586
+ const status = await this.deployer.getStatus(agent.config);
587
+ if (status.status === 'running') return true;
588
+ } catch { /* retry */ }
589
+ await new Promise(r => setTimeout(r, 3000));
590
+ }
591
+ return false;
592
+ }
593
+
594
+ private async persistAgent(agent: ManagedAgent) {
595
+ // In production, this writes to the database
596
+ // For now, just update the in-memory map
597
+ this.agents.set(agent.id, agent);
598
+ // TODO: this.db?.upsertManagedAgent(agent);
599
+ }
600
+
601
+ private emitEvent(agent: ManagedAgent, type: LifecycleEventType, data: Record<string, any>) {
602
+ const event: LifecycleEvent = {
603
+ id: crypto.randomUUID(),
604
+ agentId: agent.id,
605
+ orgId: agent.orgId,
606
+ type,
607
+ data,
608
+ timestamp: new Date().toISOString(),
609
+ };
610
+ for (const listener of this.eventListeners) {
611
+ try { listener(event); } catch { /* ignore */ }
612
+ }
613
+ }
614
+
615
+ private emptyUsage(): AgentUsage {
616
+ return {
617
+ tokensToday: 0, tokensThisMonth: 0, tokenBudgetMonthly: 0,
618
+ toolCallsToday: 0, toolCallsThisMonth: 0,
619
+ externalActionsToday: 0, externalActionsThisMonth: 0,
620
+ costToday: 0, costThisMonth: 0, costBudgetMonthly: 0,
621
+ activeSessionCount: 0, totalSessionsToday: 0,
622
+ errorsToday: 0, errorRate1h: 0,
623
+ lastUpdated: new Date().toISOString(),
624
+ };
625
+ }
626
+
627
+ /**
628
+ * Cleanup: stop all health check loops
629
+ */
630
+ shutdown() {
631
+ for (const [id] of this.healthCheckIntervals) {
632
+ this.stopHealthCheckLoop(id);
633
+ }
634
+ }
635
+ }