@agenticmail/enterprise 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +183 -0
- package/agenticmail-enterprise.db +0 -0
- package/dashboards/README.md +120 -0
- package/dashboards/dotnet/Program.cs +261 -0
- package/dashboards/express/app.js +146 -0
- package/dashboards/go/main.go +513 -0
- package/dashboards/html/index.html +535 -0
- package/dashboards/java/AgenticMailDashboard.java +376 -0
- package/dashboards/php/index.php +414 -0
- package/dashboards/python/app.py +273 -0
- package/dashboards/ruby/app.rb +195 -0
- package/dist/chunk-77IDQJL3.js +7 -0
- package/dist/chunk-7RGCCHIT.js +115 -0
- package/dist/chunk-DXNKR3TG.js +1355 -0
- package/dist/chunk-IQWA44WT.js +970 -0
- package/dist/chunk-LCUZGIDH.js +965 -0
- package/dist/chunk-N2JVTNNJ.js +2553 -0
- package/dist/chunk-O462UJBH.js +363 -0
- package/dist/chunk-PNKVD2UK.js +26 -0
- package/dist/cli.js +218 -0
- package/dist/dashboard/index.html +558 -0
- package/dist/db-adapter-DEWEFNIV.js +7 -0
- package/dist/dynamodb-CCGL2E77.js +426 -0
- package/dist/engine/index.js +1261 -0
- package/dist/index.js +522 -0
- package/dist/mongodb-ODTXIVPV.js +319 -0
- package/dist/mysql-RM3S2FV5.js +521 -0
- package/dist/postgres-LN7A6MGQ.js +518 -0
- package/dist/routes-2JEPIIKC.js +441 -0
- package/dist/routes-74ZLKJKP.js +399 -0
- package/dist/server.js +7 -0
- package/dist/sqlite-3K5YOZ4K.js +439 -0
- package/dist/turso-LDWODSDI.js +442 -0
- package/package.json +49 -0
- package/src/admin/routes.ts +331 -0
- package/src/auth/routes.ts +130 -0
- package/src/cli.ts +260 -0
- package/src/dashboard/index.html +558 -0
- package/src/db/adapter.ts +230 -0
- package/src/db/dynamodb.ts +456 -0
- package/src/db/factory.ts +51 -0
- package/src/db/mongodb.ts +360 -0
- package/src/db/mysql.ts +472 -0
- package/src/db/postgres.ts +479 -0
- package/src/db/sql-schema.ts +123 -0
- package/src/db/sqlite.ts +391 -0
- package/src/db/turso.ts +411 -0
- package/src/deploy/fly.ts +368 -0
- package/src/deploy/managed.ts +213 -0
- package/src/engine/activity.ts +474 -0
- package/src/engine/agent-config.ts +429 -0
- package/src/engine/agenticmail-bridge.ts +296 -0
- package/src/engine/approvals.ts +278 -0
- package/src/engine/db-adapter.ts +682 -0
- package/src/engine/db-schema.ts +335 -0
- package/src/engine/deployer.ts +595 -0
- package/src/engine/index.ts +134 -0
- package/src/engine/knowledge.ts +486 -0
- package/src/engine/lifecycle.ts +635 -0
- package/src/engine/openclaw-hook.ts +371 -0
- package/src/engine/routes.ts +528 -0
- package/src/engine/skills.ts +473 -0
- package/src/engine/tenant.ts +345 -0
- package/src/engine/tool-catalog.ts +189 -0
- package/src/index.ts +64 -0
- package/src/lib/resilience.ts +326 -0
- package/src/middleware/index.ts +286 -0
- package/src/server.ts +310 -0
- package/tsconfig.json +14 -0
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Lifecycle Manager
|
|
3
|
+
*
|
|
4
|
+
* Manages the full lifecycle of an autonomous AI agent employee:
|
|
5
|
+
* create → configure → deploy → running → monitor → update → stop
|
|
6
|
+
*
|
|
7
|
+
* This is the core state machine. Every agent goes through these states
|
|
8
|
+
* and the manager handles transitions, health checks, auto-recovery,
|
|
9
|
+
* and status tracking.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { DatabaseAdapter } from '../db/adapter.js';
|
|
13
|
+
import type { AgentConfig, DeploymentStatus } from './agent-config.js';
|
|
14
|
+
import { AgentConfigGenerator } from './agent-config.js';
|
|
15
|
+
import { DeploymentEngine } from './deployer.js';
|
|
16
|
+
import { PermissionEngine } from './skills.js';
|
|
17
|
+
|
|
18
|
+
// ─── Types ──────────────────────────────────────────────
|
|
19
|
+
|
|
20
|
+
export type AgentState =
|
|
21
|
+
| 'draft' // Created but not configured
|
|
22
|
+
| 'configuring' // Skills/permissions/identity being set up
|
|
23
|
+
| 'ready' // Fully configured, waiting for deploy
|
|
24
|
+
| 'provisioning' // Infrastructure being created
|
|
25
|
+
| 'deploying' // Code/config being pushed
|
|
26
|
+
| 'starting' // Container/process starting up
|
|
27
|
+
| 'running' // Active and healthy
|
|
28
|
+
| 'degraded' // Running but with issues
|
|
29
|
+
| 'stopped' // Intentionally stopped
|
|
30
|
+
| 'error' // Failed — needs attention
|
|
31
|
+
| 'updating' // Config/code update in progress
|
|
32
|
+
| 'destroying'; // Being torn down
|
|
33
|
+
|
|
34
|
+
export interface ManagedAgent {
|
|
35
|
+
id: string;
|
|
36
|
+
orgId: string; // Which company owns this agent
|
|
37
|
+
config: AgentConfig;
|
|
38
|
+
state: AgentState;
|
|
39
|
+
stateHistory: StateTransition[];
|
|
40
|
+
health: AgentHealth;
|
|
41
|
+
usage: AgentUsage;
|
|
42
|
+
createdAt: string;
|
|
43
|
+
updatedAt: string;
|
|
44
|
+
lastDeployedAt?: string;
|
|
45
|
+
lastHealthCheckAt?: string;
|
|
46
|
+
version: number; // Config version for optimistic locking
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface StateTransition {
|
|
50
|
+
from: AgentState;
|
|
51
|
+
to: AgentState;
|
|
52
|
+
reason: string;
|
|
53
|
+
triggeredBy: string; // User ID or 'system'
|
|
54
|
+
timestamp: string;
|
|
55
|
+
error?: string;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface AgentHealth {
|
|
59
|
+
status: 'healthy' | 'degraded' | 'unhealthy' | 'unknown';
|
|
60
|
+
lastCheck: string;
|
|
61
|
+
uptime: number; // Seconds since last start
|
|
62
|
+
consecutiveFailures: number;
|
|
63
|
+
checks: HealthCheck[];
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export interface HealthCheck {
|
|
67
|
+
name: string;
|
|
68
|
+
status: 'pass' | 'fail' | 'warn';
|
|
69
|
+
message?: string;
|
|
70
|
+
timestamp: string;
|
|
71
|
+
durationMs: number;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export interface AgentUsage {
|
|
75
|
+
// Token usage
|
|
76
|
+
tokensToday: number;
|
|
77
|
+
tokensThisMonth: number;
|
|
78
|
+
tokenBudgetMonthly: number; // 0 = unlimited
|
|
79
|
+
|
|
80
|
+
// Tool calls
|
|
81
|
+
toolCallsToday: number;
|
|
82
|
+
toolCallsThisMonth: number;
|
|
83
|
+
|
|
84
|
+
// External actions (emails sent, messages, etc.)
|
|
85
|
+
externalActionsToday: number;
|
|
86
|
+
externalActionsThisMonth: number;
|
|
87
|
+
|
|
88
|
+
// Cost estimate (USD)
|
|
89
|
+
costToday: number;
|
|
90
|
+
costThisMonth: number;
|
|
91
|
+
costBudgetMonthly: number; // 0 = unlimited
|
|
92
|
+
|
|
93
|
+
// Sessions
|
|
94
|
+
activeSessionCount: number;
|
|
95
|
+
totalSessionsToday: number;
|
|
96
|
+
|
|
97
|
+
// Errors
|
|
98
|
+
errorsToday: number;
|
|
99
|
+
errorRate1h: number; // Errors per hour in last hour
|
|
100
|
+
|
|
101
|
+
lastUpdated: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export interface LifecycleEvent {
|
|
105
|
+
id: string;
|
|
106
|
+
agentId: string;
|
|
107
|
+
orgId: string;
|
|
108
|
+
type: LifecycleEventType;
|
|
109
|
+
data: Record<string, any>;
|
|
110
|
+
timestamp: string;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export type LifecycleEventType =
|
|
114
|
+
| 'created'
|
|
115
|
+
| 'configured'
|
|
116
|
+
| 'deployed'
|
|
117
|
+
| 'started'
|
|
118
|
+
| 'stopped'
|
|
119
|
+
| 'restarted'
|
|
120
|
+
| 'updated'
|
|
121
|
+
| 'error'
|
|
122
|
+
| 'health_check'
|
|
123
|
+
| 'auto_recovered'
|
|
124
|
+
| 'budget_warning'
|
|
125
|
+
| 'budget_exceeded'
|
|
126
|
+
| 'tool_call'
|
|
127
|
+
| 'approval_requested'
|
|
128
|
+
| 'approval_decided'
|
|
129
|
+
| 'destroyed';
|
|
130
|
+
|
|
131
|
+
// ─── Lifecycle Manager ──────────────────────────────────
|
|
132
|
+
|
|
133
|
+
export class AgentLifecycleManager {
|
|
134
|
+
private agents = new Map<string, ManagedAgent>();
|
|
135
|
+
private healthCheckIntervals = new Map<string, NodeJS.Timeout>();
|
|
136
|
+
private deployer = new DeploymentEngine();
|
|
137
|
+
private configGen = new AgentConfigGenerator();
|
|
138
|
+
private permissions: PermissionEngine;
|
|
139
|
+
private db?: DatabaseAdapter;
|
|
140
|
+
private eventListeners: ((event: LifecycleEvent) => void)[] = [];
|
|
141
|
+
|
|
142
|
+
constructor(opts?: { db?: DatabaseAdapter; permissions?: PermissionEngine }) {
|
|
143
|
+
this.db = opts?.db;
|
|
144
|
+
this.permissions = opts?.permissions || new PermissionEngine();
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ─── Agent CRUD ─────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Create a new managed agent (starts in 'draft' state)
|
|
151
|
+
*/
|
|
152
|
+
async createAgent(orgId: string, config: AgentConfig, createdBy: string): Promise<ManagedAgent> {
|
|
153
|
+
const agent: ManagedAgent = {
|
|
154
|
+
id: config.id || crypto.randomUUID(),
|
|
155
|
+
orgId,
|
|
156
|
+
config,
|
|
157
|
+
state: 'draft',
|
|
158
|
+
stateHistory: [],
|
|
159
|
+
health: {
|
|
160
|
+
status: 'unknown',
|
|
161
|
+
lastCheck: new Date().toISOString(),
|
|
162
|
+
uptime: 0,
|
|
163
|
+
consecutiveFailures: 0,
|
|
164
|
+
checks: [],
|
|
165
|
+
},
|
|
166
|
+
usage: this.emptyUsage(),
|
|
167
|
+
createdAt: new Date().toISOString(),
|
|
168
|
+
updatedAt: new Date().toISOString(),
|
|
169
|
+
version: 1,
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
this.agents.set(agent.id, agent);
|
|
173
|
+
await this.persistAgent(agent);
|
|
174
|
+
this.emitEvent(agent, 'created', { createdBy });
|
|
175
|
+
|
|
176
|
+
return agent;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Update agent configuration (must be in draft, ready, stopped, or error state)
|
|
181
|
+
*/
|
|
182
|
+
async updateConfig(agentId: string, updates: Partial<AgentConfig>, updatedBy: string): Promise<ManagedAgent> {
|
|
183
|
+
const agent = this.getAgent(agentId);
|
|
184
|
+
if (!agent) throw new Error(`Agent ${agentId} not found`);
|
|
185
|
+
|
|
186
|
+
const mutableStates: AgentState[] = ['draft', 'ready', 'stopped', 'error'];
|
|
187
|
+
if (!mutableStates.includes(agent.state)) {
|
|
188
|
+
throw new Error(`Cannot update config in state "${agent.state}". Stop the agent first.`);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Merge updates
|
|
192
|
+
agent.config = { ...agent.config, ...updates, updatedAt: new Date().toISOString() };
|
|
193
|
+
agent.updatedAt = new Date().toISOString();
|
|
194
|
+
agent.version++;
|
|
195
|
+
|
|
196
|
+
// If all required fields are set, transition to 'ready'
|
|
197
|
+
if (agent.state === 'draft' && this.isConfigComplete(agent.config)) {
|
|
198
|
+
this.transition(agent, 'ready', 'Configuration complete', updatedBy);
|
|
199
|
+
} else if (agent.state !== 'draft') {
|
|
200
|
+
this.transition(agent, 'ready', 'Configuration updated', updatedBy);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
await this.persistAgent(agent);
|
|
204
|
+
this.emitEvent(agent, 'configured', { updatedBy, changes: Object.keys(updates) });
|
|
205
|
+
|
|
206
|
+
return agent;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Deploy an agent to its target environment
|
|
211
|
+
*/
|
|
212
|
+
async deploy(agentId: string, deployedBy: string): Promise<ManagedAgent> {
|
|
213
|
+
const agent = this.getAgent(agentId);
|
|
214
|
+
if (!agent) throw new Error(`Agent ${agentId} not found`);
|
|
215
|
+
|
|
216
|
+
if (!['ready', 'stopped', 'error'].includes(agent.state)) {
|
|
217
|
+
throw new Error(`Cannot deploy from state "${agent.state}"`);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (!this.isConfigComplete(agent.config)) {
|
|
221
|
+
throw new Error('Agent configuration is incomplete');
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Transition: provisioning
|
|
225
|
+
this.transition(agent, 'provisioning', 'Deployment initiated', deployedBy);
|
|
226
|
+
await this.persistAgent(agent);
|
|
227
|
+
|
|
228
|
+
try {
|
|
229
|
+
// Run deployment
|
|
230
|
+
this.transition(agent, 'deploying', 'Pushing configuration', 'system');
|
|
231
|
+
|
|
232
|
+
const result = await this.deployer.deploy(agent.config, (event) => {
|
|
233
|
+
this.emitEvent(agent, 'deployed', { phase: event.phase, status: event.status, message: event.message });
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
if (result.success) {
|
|
237
|
+
this.transition(agent, 'starting', 'Deployment successful, agent starting', 'system');
|
|
238
|
+
agent.lastDeployedAt = new Date().toISOString();
|
|
239
|
+
|
|
240
|
+
// Wait for agent to be healthy
|
|
241
|
+
const healthy = await this.waitForHealthy(agent, 60_000);
|
|
242
|
+
if (healthy) {
|
|
243
|
+
this.transition(agent, 'running', 'Agent is healthy and running', 'system');
|
|
244
|
+
this.startHealthCheckLoop(agent);
|
|
245
|
+
} else {
|
|
246
|
+
this.transition(agent, 'degraded', 'Agent started but health check failed', 'system');
|
|
247
|
+
this.startHealthCheckLoop(agent);
|
|
248
|
+
}
|
|
249
|
+
} else {
|
|
250
|
+
this.transition(agent, 'error', `Deployment failed: ${result.error}`, 'system');
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
await this.persistAgent(agent);
|
|
254
|
+
return agent;
|
|
255
|
+
|
|
256
|
+
} catch (error: any) {
|
|
257
|
+
this.transition(agent, 'error', `Deployment error: ${error.message}`, 'system');
|
|
258
|
+
await this.persistAgent(agent);
|
|
259
|
+
throw error;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Stop a running agent
|
|
265
|
+
*/
|
|
266
|
+
async stop(agentId: string, stoppedBy: string, reason?: string): Promise<ManagedAgent> {
|
|
267
|
+
const agent = this.getAgent(agentId);
|
|
268
|
+
if (!agent) throw new Error(`Agent ${agentId} not found`);
|
|
269
|
+
|
|
270
|
+
if (!['running', 'degraded', 'starting', 'error'].includes(agent.state)) {
|
|
271
|
+
throw new Error(`Cannot stop from state "${agent.state}"`);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
this.stopHealthCheckLoop(agentId);
|
|
275
|
+
|
|
276
|
+
try {
|
|
277
|
+
await this.deployer.stop(agent.config);
|
|
278
|
+
this.transition(agent, 'stopped', reason || 'Stopped by user', stoppedBy);
|
|
279
|
+
} catch (error: any) {
|
|
280
|
+
this.transition(agent, 'stopped', `Stopped with error: ${error.message}`, stoppedBy);
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
await this.persistAgent(agent);
|
|
284
|
+
this.emitEvent(agent, 'stopped', { stoppedBy, reason });
|
|
285
|
+
return agent;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Restart a running agent
|
|
290
|
+
*/
|
|
291
|
+
async restart(agentId: string, restartedBy: string): Promise<ManagedAgent> {
|
|
292
|
+
const agent = this.getAgent(agentId);
|
|
293
|
+
if (!agent) throw new Error(`Agent ${agentId} not found`);
|
|
294
|
+
|
|
295
|
+
this.transition(agent, 'updating', 'Restarting', restartedBy);
|
|
296
|
+
|
|
297
|
+
try {
|
|
298
|
+
await this.deployer.restart(agent.config);
|
|
299
|
+
const healthy = await this.waitForHealthy(agent, 30_000);
|
|
300
|
+
this.transition(agent, healthy ? 'running' : 'degraded', 'Restarted', 'system');
|
|
301
|
+
} catch (error: any) {
|
|
302
|
+
this.transition(agent, 'error', `Restart failed: ${error.message}`, 'system');
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
await this.persistAgent(agent);
|
|
306
|
+
this.emitEvent(agent, 'restarted', { restartedBy });
|
|
307
|
+
return agent;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Hot-update config on a running agent (no full redeploy)
|
|
312
|
+
*/
|
|
313
|
+
async hotUpdate(agentId: string, updates: Partial<AgentConfig>, updatedBy: string): Promise<ManagedAgent> {
|
|
314
|
+
const agent = this.getAgent(agentId);
|
|
315
|
+
if (!agent) throw new Error(`Agent ${agentId} not found`);
|
|
316
|
+
|
|
317
|
+
if (agent.state !== 'running' && agent.state !== 'degraded') {
|
|
318
|
+
throw new Error(`Hot update only works on running agents (current: "${agent.state}")`);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
const prevState = agent.state;
|
|
322
|
+
this.transition(agent, 'updating', 'Hot config update', updatedBy);
|
|
323
|
+
|
|
324
|
+
// Merge config
|
|
325
|
+
agent.config = { ...agent.config, ...updates, updatedAt: new Date().toISOString() };
|
|
326
|
+
agent.version++;
|
|
327
|
+
|
|
328
|
+
try {
|
|
329
|
+
await this.deployer.updateConfig(agent.config);
|
|
330
|
+
this.transition(agent, prevState, 'Config updated successfully', 'system');
|
|
331
|
+
} catch (error: any) {
|
|
332
|
+
this.transition(agent, 'degraded', `Config update failed: ${error.message}`, 'system');
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
await this.persistAgent(agent);
|
|
336
|
+
this.emitEvent(agent, 'updated', { updatedBy, hotUpdate: true });
|
|
337
|
+
return agent;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Destroy an agent completely (stop + delete all resources)
|
|
342
|
+
*/
|
|
343
|
+
async destroy(agentId: string, destroyedBy: string): Promise<void> {
|
|
344
|
+
const agent = this.getAgent(agentId);
|
|
345
|
+
if (!agent) throw new Error(`Agent ${agentId} not found`);
|
|
346
|
+
|
|
347
|
+
this.transition(agent, 'destroying', 'Agent being destroyed', destroyedBy);
|
|
348
|
+
this.stopHealthCheckLoop(agentId);
|
|
349
|
+
|
|
350
|
+
// Stop if running
|
|
351
|
+
if (['running', 'degraded', 'starting'].includes(agent.state)) {
|
|
352
|
+
try { await this.deployer.stop(agent.config); } catch { /* best effort */ }
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
this.emitEvent(agent, 'destroyed', { destroyedBy });
|
|
356
|
+
this.agents.delete(agentId);
|
|
357
|
+
// DB cleanup would happen here
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// ─── Monitoring ─────────────────────────────────────
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Record a tool call for usage tracking
|
|
364
|
+
*/
|
|
365
|
+
recordToolCall(agentId: string, toolId: string, opts?: {
|
|
366
|
+
tokensUsed?: number;
|
|
367
|
+
costUsd?: number;
|
|
368
|
+
isExternalAction?: boolean;
|
|
369
|
+
error?: boolean;
|
|
370
|
+
}) {
|
|
371
|
+
const agent = this.agents.get(agentId);
|
|
372
|
+
if (!agent) return;
|
|
373
|
+
|
|
374
|
+
const usage = agent.usage;
|
|
375
|
+
usage.toolCallsToday++;
|
|
376
|
+
usage.toolCallsThisMonth++;
|
|
377
|
+
if (opts?.tokensUsed) {
|
|
378
|
+
usage.tokensToday += opts.tokensUsed;
|
|
379
|
+
usage.tokensThisMonth += opts.tokensUsed;
|
|
380
|
+
}
|
|
381
|
+
if (opts?.costUsd) {
|
|
382
|
+
usage.costToday += opts.costUsd;
|
|
383
|
+
usage.costThisMonth += opts.costUsd;
|
|
384
|
+
}
|
|
385
|
+
if (opts?.isExternalAction) {
|
|
386
|
+
usage.externalActionsToday++;
|
|
387
|
+
usage.externalActionsThisMonth++;
|
|
388
|
+
}
|
|
389
|
+
if (opts?.error) {
|
|
390
|
+
usage.errorsToday++;
|
|
391
|
+
}
|
|
392
|
+
usage.lastUpdated = new Date().toISOString();
|
|
393
|
+
|
|
394
|
+
// Budget checks
|
|
395
|
+
if (usage.tokenBudgetMonthly > 0 && usage.tokensThisMonth >= usage.tokenBudgetMonthly) {
|
|
396
|
+
this.emitEvent(agent, 'budget_exceeded', { type: 'tokens', used: usage.tokensThisMonth, budget: usage.tokenBudgetMonthly });
|
|
397
|
+
// Auto-stop on budget exceeded
|
|
398
|
+
this.stop(agentId, 'system', 'Monthly token budget exceeded').catch(() => {});
|
|
399
|
+
} else if (usage.tokenBudgetMonthly > 0 && usage.tokensThisMonth >= usage.tokenBudgetMonthly * 0.8) {
|
|
400
|
+
this.emitEvent(agent, 'budget_warning', { type: 'tokens', used: usage.tokensThisMonth, budget: usage.tokenBudgetMonthly, percent: 80 });
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
if (usage.costBudgetMonthly > 0 && usage.costThisMonth >= usage.costBudgetMonthly) {
|
|
404
|
+
this.emitEvent(agent, 'budget_exceeded', { type: 'cost', used: usage.costThisMonth, budget: usage.costBudgetMonthly });
|
|
405
|
+
this.stop(agentId, 'system', 'Monthly cost budget exceeded').catch(() => {});
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
this.emitEvent(agent, 'tool_call', { toolId, ...opts });
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/**
|
|
412
|
+
* Get all agents for an org
|
|
413
|
+
*/
|
|
414
|
+
getAgentsByOrg(orgId: string): ManagedAgent[] {
|
|
415
|
+
return Array.from(this.agents.values()).filter(a => a.orgId === orgId);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
/**
|
|
419
|
+
* Get a single agent
|
|
420
|
+
*/
|
|
421
|
+
getAgent(agentId: string): ManagedAgent | undefined {
|
|
422
|
+
return this.agents.get(agentId);
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/**
|
|
426
|
+
* Get org-wide usage summary
|
|
427
|
+
*/
|
|
428
|
+
getOrgUsage(orgId: string): {
|
|
429
|
+
totalAgents: number;
|
|
430
|
+
runningAgents: number;
|
|
431
|
+
totalTokensToday: number;
|
|
432
|
+
totalCostToday: number;
|
|
433
|
+
totalToolCallsToday: number;
|
|
434
|
+
totalErrorsToday: number;
|
|
435
|
+
agents: { id: string; name: string; state: AgentState; usage: AgentUsage }[];
|
|
436
|
+
} {
|
|
437
|
+
const agents = this.getAgentsByOrg(orgId);
|
|
438
|
+
return {
|
|
439
|
+
totalAgents: agents.length,
|
|
440
|
+
runningAgents: agents.filter(a => a.state === 'running').length,
|
|
441
|
+
totalTokensToday: agents.reduce((sum, a) => sum + a.usage.tokensToday, 0),
|
|
442
|
+
totalCostToday: agents.reduce((sum, a) => sum + a.usage.costToday, 0),
|
|
443
|
+
totalToolCallsToday: agents.reduce((sum, a) => sum + a.usage.toolCallsToday, 0),
|
|
444
|
+
totalErrorsToday: agents.reduce((sum, a) => sum + a.usage.errorsToday, 0),
|
|
445
|
+
agents: agents.map(a => ({ id: a.id, name: a.config.displayName, state: a.state, usage: a.usage })),
|
|
446
|
+
};
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/**
|
|
450
|
+
* Subscribe to lifecycle events (for dashboard real-time updates)
|
|
451
|
+
*/
|
|
452
|
+
onEvent(listener: (event: LifecycleEvent) => void): () => void {
|
|
453
|
+
this.eventListeners.push(listener);
|
|
454
|
+
return () => { this.eventListeners = this.eventListeners.filter(l => l !== listener); };
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
/**
|
|
458
|
+
* Reset daily counters (call at midnight via cron)
|
|
459
|
+
*/
|
|
460
|
+
resetDailyCounters() {
|
|
461
|
+
for (const agent of this.agents.values()) {
|
|
462
|
+
agent.usage.tokensToday = 0;
|
|
463
|
+
agent.usage.toolCallsToday = 0;
|
|
464
|
+
agent.usage.externalActionsToday = 0;
|
|
465
|
+
agent.usage.costToday = 0;
|
|
466
|
+
agent.usage.errorsToday = 0;
|
|
467
|
+
agent.usage.totalSessionsToday = 0;
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
/**
|
|
472
|
+
* Reset monthly counters (call on 1st of month)
|
|
473
|
+
*/
|
|
474
|
+
resetMonthlyCounters() {
|
|
475
|
+
for (const agent of this.agents.values()) {
|
|
476
|
+
agent.usage.tokensThisMonth = 0;
|
|
477
|
+
agent.usage.toolCallsThisMonth = 0;
|
|
478
|
+
agent.usage.externalActionsThisMonth = 0;
|
|
479
|
+
agent.usage.costThisMonth = 0;
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// ─── Health Check Loop ────────────────────────────────
|
|
484
|
+
|
|
485
|
+
private startHealthCheckLoop(agent: ManagedAgent) {
|
|
486
|
+
this.stopHealthCheckLoop(agent.id);
|
|
487
|
+
|
|
488
|
+
const interval = setInterval(async () => {
|
|
489
|
+
try {
|
|
490
|
+
const status = await this.deployer.getStatus(agent.config);
|
|
491
|
+
agent.lastHealthCheckAt = new Date().toISOString();
|
|
492
|
+
|
|
493
|
+
const check: HealthCheck = {
|
|
494
|
+
name: 'deployment_status',
|
|
495
|
+
status: status.status === 'running' ? 'pass' : 'fail',
|
|
496
|
+
message: `Status: ${status.status}, Health: ${status.healthStatus}`,
|
|
497
|
+
timestamp: new Date().toISOString(),
|
|
498
|
+
durationMs: 0,
|
|
499
|
+
};
|
|
500
|
+
|
|
501
|
+
// Keep last 10 checks
|
|
502
|
+
agent.health.checks = [check, ...agent.health.checks].slice(0, 10);
|
|
503
|
+
|
|
504
|
+
if (status.status === 'running' && status.healthStatus === 'healthy') {
|
|
505
|
+
agent.health.status = 'healthy';
|
|
506
|
+
agent.health.consecutiveFailures = 0;
|
|
507
|
+
if (status.uptime) agent.health.uptime = status.uptime;
|
|
508
|
+
if (status.metrics) {
|
|
509
|
+
agent.usage.activeSessionCount = status.metrics.activeSessionCount;
|
|
510
|
+
}
|
|
511
|
+
// Recover from degraded
|
|
512
|
+
if (agent.state === 'degraded') {
|
|
513
|
+
this.transition(agent, 'running', 'Health restored', 'system');
|
|
514
|
+
this.emitEvent(agent, 'auto_recovered', {});
|
|
515
|
+
}
|
|
516
|
+
} else {
|
|
517
|
+
agent.health.consecutiveFailures++;
|
|
518
|
+
agent.health.status = agent.health.consecutiveFailures >= 3 ? 'unhealthy' : 'degraded';
|
|
519
|
+
|
|
520
|
+
if (agent.state === 'running' && agent.health.consecutiveFailures >= 2) {
|
|
521
|
+
this.transition(agent, 'degraded', `Health degraded: ${agent.health.consecutiveFailures} consecutive failures`, 'system');
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// Auto-restart after 5 consecutive failures
|
|
525
|
+
if (agent.health.consecutiveFailures >= 5 && agent.state !== 'error') {
|
|
526
|
+
this.emitEvent(agent, 'auto_recovered', { action: 'restart', failures: agent.health.consecutiveFailures });
|
|
527
|
+
agent.health.consecutiveFailures = 0;
|
|
528
|
+
try {
|
|
529
|
+
await this.deployer.restart(agent.config);
|
|
530
|
+
this.transition(agent, 'starting', 'Auto-restarted after health failures', 'system');
|
|
531
|
+
} catch {
|
|
532
|
+
this.transition(agent, 'error', 'Auto-restart failed', 'system');
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
agent.health.lastCheck = new Date().toISOString();
|
|
538
|
+
await this.persistAgent(agent);
|
|
539
|
+
|
|
540
|
+
} catch (error: any) {
|
|
541
|
+
agent.health.consecutiveFailures++;
|
|
542
|
+
agent.health.status = 'unhealthy';
|
|
543
|
+
}
|
|
544
|
+
}, 30_000); // Every 30 seconds
|
|
545
|
+
|
|
546
|
+
this.healthCheckIntervals.set(agent.id, interval);
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
private stopHealthCheckLoop(agentId: string) {
|
|
550
|
+
const interval = this.healthCheckIntervals.get(agentId);
|
|
551
|
+
if (interval) {
|
|
552
|
+
clearInterval(interval);
|
|
553
|
+
this.healthCheckIntervals.delete(agentId);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// ─── Private Helpers ──────────────────────────────────
|
|
558
|
+
|
|
559
|
+
private transition(agent: ManagedAgent, to: AgentState, reason: string, triggeredBy: string) {
|
|
560
|
+
const from = agent.state;
|
|
561
|
+
agent.stateHistory.push({
|
|
562
|
+
from, to, reason, triggeredBy,
|
|
563
|
+
timestamp: new Date().toISOString(),
|
|
564
|
+
});
|
|
565
|
+
// Keep last 50 transitions
|
|
566
|
+
if (agent.stateHistory.length > 50) agent.stateHistory = agent.stateHistory.slice(-50);
|
|
567
|
+
agent.state = to;
|
|
568
|
+
agent.updatedAt = new Date().toISOString();
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
private isConfigComplete(config: AgentConfig): boolean {
|
|
572
|
+
return !!(
|
|
573
|
+
config.name &&
|
|
574
|
+
config.displayName &&
|
|
575
|
+
config.identity?.role &&
|
|
576
|
+
config.model?.modelId &&
|
|
577
|
+
config.deployment?.target &&
|
|
578
|
+
config.permissionProfileId
|
|
579
|
+
);
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
private async waitForHealthy(agent: ManagedAgent, timeoutMs: number): Promise<boolean> {
|
|
583
|
+
const start = Date.now();
|
|
584
|
+
while (Date.now() - start < timeoutMs) {
|
|
585
|
+
try {
|
|
586
|
+
const status = await this.deployer.getStatus(agent.config);
|
|
587
|
+
if (status.status === 'running') return true;
|
|
588
|
+
} catch { /* retry */ }
|
|
589
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
590
|
+
}
|
|
591
|
+
return false;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
private async persistAgent(agent: ManagedAgent) {
|
|
595
|
+
// In production, this writes to the database
|
|
596
|
+
// For now, just update the in-memory map
|
|
597
|
+
this.agents.set(agent.id, agent);
|
|
598
|
+
// TODO: this.db?.upsertManagedAgent(agent);
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
private emitEvent(agent: ManagedAgent, type: LifecycleEventType, data: Record<string, any>) {
|
|
602
|
+
const event: LifecycleEvent = {
|
|
603
|
+
id: crypto.randomUUID(),
|
|
604
|
+
agentId: agent.id,
|
|
605
|
+
orgId: agent.orgId,
|
|
606
|
+
type,
|
|
607
|
+
data,
|
|
608
|
+
timestamp: new Date().toISOString(),
|
|
609
|
+
};
|
|
610
|
+
for (const listener of this.eventListeners) {
|
|
611
|
+
try { listener(event); } catch { /* ignore */ }
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
private emptyUsage(): AgentUsage {
|
|
616
|
+
return {
|
|
617
|
+
tokensToday: 0, tokensThisMonth: 0, tokenBudgetMonthly: 0,
|
|
618
|
+
toolCallsToday: 0, toolCallsThisMonth: 0,
|
|
619
|
+
externalActionsToday: 0, externalActionsThisMonth: 0,
|
|
620
|
+
costToday: 0, costThisMonth: 0, costBudgetMonthly: 0,
|
|
621
|
+
activeSessionCount: 0, totalSessionsToday: 0,
|
|
622
|
+
errorsToday: 0, errorRate1h: 0,
|
|
623
|
+
lastUpdated: new Date().toISOString(),
|
|
624
|
+
};
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
/**
|
|
628
|
+
* Cleanup: stop all health check loops
|
|
629
|
+
*/
|
|
630
|
+
shutdown() {
|
|
631
|
+
for (const [id] of this.healthCheckIntervals) {
|
|
632
|
+
this.stopHealthCheckLoop(id);
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
}
|