@agenticmail/enterprise 0.5.322 → 0.5.324

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,265 @@
1
+ /**
2
+ * Cluster Manager — Multi-Instance Agent Coordination
3
+ *
4
+ * Enables a single enterprise dashboard to manage agents running across
5
+ * multiple machines (Mac Minis, VPS, cloud instances).
6
+ *
7
+ * Architecture:
8
+ * - Enterprise server = "Control Plane" (single dashboard, DB, SSE hub)
9
+ * - Agent instances = "Worker Nodes" (run on any machine, phone home to control plane)
10
+ *
11
+ * Worker nodes register via POST /cluster/register with their capabilities.
12
+ * Control plane tracks health via heartbeats (every 30s, stale after 90s).
13
+ * Agents deployed to remote workers communicate via ENTERPRISE_URL.
14
+ *
15
+ * Flow:
16
+ * 1. Worker starts, calls POST /cluster/register { nodeId, host, port, capabilities }
17
+ * 2. Control plane stores worker in memory + DB
18
+ * 3. Dashboard shows all workers and their agents
19
+ * 4. When user deploys an agent, they can pick a target worker
20
+ * 5. Worker receives deploy command via its API
21
+ * 6. Agent process starts on worker, reports status back to control plane
22
+ */
23
+
24
+ export interface WorkerNode {
25
+ nodeId: string;
26
+ name: string;
27
+ host: string; // IP or hostname reachable from control plane
28
+ port: number; // Worker API port
29
+ url: string; // Full base URL (e.g., http://192.168.1.50:3101)
30
+ platform: string; // darwin, linux, win32
31
+ arch: string; // arm64, x64
32
+ cpuCount: number;
33
+ memoryMb: number;
34
+ version: string; // @agenticmail/enterprise version
35
+ agents: string[]; // Agent IDs running on this worker
36
+ capabilities: string[];// e.g., ['gpu', 'browser', 'voice', 'docker']
37
+ status: 'online' | 'degraded' | 'offline';
38
+ registeredAt: string;
39
+ lastHeartbeat: string;
40
+ metadata?: Record<string, any>;
41
+ }
42
+
43
+ export interface ClusterStats {
44
+ totalNodes: number;
45
+ onlineNodes: number;
46
+ totalAgents: number;
47
+ totalCpus: number;
48
+ totalMemoryMb: number;
49
+ }
50
+
51
+ type NodeListener = (nodeId: string, node: WorkerNode, event: 'register' | 'heartbeat' | 'offline' | 'update') => void;
52
+
53
+ export class ClusterManager {
54
+ private nodes = new Map<string, WorkerNode>();
55
+ private listeners = new Set<NodeListener>();
56
+ private staleTimer: NodeJS.Timeout | null = null;
57
+ private staleThresholdMs = 90_000; // 90s without heartbeat = offline
58
+ private db: any = null;
59
+
60
+ constructor() {
61
+ this.staleTimer = setInterval(() => this.checkStale(), 30_000);
62
+ if (this.staleTimer.unref) this.staleTimer.unref();
63
+ }
64
+
65
+ setDb(db: any) { this.db = db; }
66
+
67
+ /** Load persisted workers from DB on startup */
68
+ async loadFromDb(): Promise<void> {
69
+ if (!this.db) return;
70
+ try {
71
+ await this.db.execute(`CREATE TABLE IF NOT EXISTS cluster_nodes (
72
+ node_id TEXT PRIMARY KEY,
73
+ name TEXT NOT NULL,
74
+ host TEXT NOT NULL,
75
+ port INTEGER NOT NULL,
76
+ url TEXT NOT NULL,
77
+ platform TEXT,
78
+ arch TEXT,
79
+ cpu_count INTEGER DEFAULT 0,
80
+ memory_mb INTEGER DEFAULT 0,
81
+ version TEXT,
82
+ agents TEXT DEFAULT '[]',
83
+ capabilities TEXT DEFAULT '[]',
84
+ status TEXT DEFAULT 'offline',
85
+ registered_at TEXT,
86
+ last_heartbeat TEXT,
87
+ metadata TEXT
88
+ )`);
89
+ const rows = await this.db.query('SELECT * FROM cluster_nodes');
90
+ for (const row of rows) {
91
+ const node: WorkerNode = {
92
+ nodeId: row.node_id,
93
+ name: row.name || row.node_id,
94
+ host: row.host,
95
+ port: row.port,
96
+ url: row.url,
97
+ platform: row.platform || 'unknown',
98
+ arch: row.arch || 'unknown',
99
+ cpuCount: row.cpu_count || 0,
100
+ memoryMb: row.memory_mb || 0,
101
+ version: row.version || 'unknown',
102
+ agents: safeParse(row.agents, []),
103
+ capabilities: safeParse(row.capabilities, []),
104
+ status: 'offline', // Start as offline; heartbeat will set online
105
+ registeredAt: row.registered_at || new Date().toISOString(),
106
+ lastHeartbeat: row.last_heartbeat || '',
107
+ metadata: safeParse(row.metadata, {}),
108
+ };
109
+ this.nodes.set(node.nodeId, node);
110
+ }
111
+ } catch (e: any) {
112
+ console.warn('[cluster] Failed to load nodes from DB:', e.message);
113
+ }
114
+ }
115
+
116
+ /** Register or re-register a worker node */
117
+ async register(data: {
118
+ nodeId: string;
119
+ name?: string;
120
+ host: string;
121
+ port: number;
122
+ platform?: string;
123
+ arch?: string;
124
+ cpuCount?: number;
125
+ memoryMb?: number;
126
+ version?: string;
127
+ agents?: string[];
128
+ capabilities?: string[];
129
+ metadata?: Record<string, any>;
130
+ }): Promise<WorkerNode> {
131
+ const now = new Date().toISOString();
132
+ const existing = this.nodes.get(data.nodeId);
133
+ const node: WorkerNode = {
134
+ nodeId: data.nodeId,
135
+ name: data.name || data.nodeId,
136
+ host: data.host,
137
+ port: data.port,
138
+ url: `http://${data.host}:${data.port}`,
139
+ platform: data.platform || 'unknown',
140
+ arch: data.arch || 'unknown',
141
+ cpuCount: data.cpuCount || 0,
142
+ memoryMb: data.memoryMb || 0,
143
+ version: data.version || 'unknown',
144
+ agents: data.agents || existing?.agents || [],
145
+ capabilities: data.capabilities || [],
146
+ status: 'online',
147
+ registeredAt: existing?.registeredAt || now,
148
+ lastHeartbeat: now,
149
+ metadata: data.metadata || existing?.metadata || {},
150
+ };
151
+ this.nodes.set(node.nodeId, node);
152
+ this.emit(node.nodeId, node, 'register');
153
+
154
+ // Persist
155
+ if (this.db) {
156
+ try {
157
+ await this.db.execute(
158
+ `INSERT INTO cluster_nodes (node_id, name, host, port, url, platform, arch, cpu_count, memory_mb, version, agents, capabilities, status, registered_at, last_heartbeat, metadata)
159
+ VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16)
160
+ ON CONFLICT (node_id) DO UPDATE SET
161
+ name=$2, host=$3, port=$4, url=$5, platform=$6, arch=$7, cpu_count=$8, memory_mb=$9, version=$10,
162
+ agents=$11, capabilities=$12, status=$13, last_heartbeat=$15, metadata=$16`,
163
+ [node.nodeId, node.name, node.host, node.port, node.url, node.platform, node.arch,
164
+ node.cpuCount, node.memoryMb, node.version, JSON.stringify(node.agents),
165
+ JSON.stringify(node.capabilities), node.status, node.registeredAt, node.lastHeartbeat,
166
+ JSON.stringify(node.metadata)]
167
+ );
168
+ } catch (e: any) { console.warn('[cluster] DB persist error:', e.message); }
169
+ }
170
+
171
+ return node;
172
+ }
173
+
174
+ /** Worker sends heartbeat (every 30s) */
175
+ heartbeat(nodeId: string, data?: { agents?: string[]; cpuUsage?: number; memoryUsage?: number }): void {
176
+ const node = this.nodes.get(nodeId);
177
+ if (!node) return;
178
+ node.lastHeartbeat = new Date().toISOString();
179
+ node.status = 'online';
180
+ if (data?.agents) node.agents = data.agents;
181
+ if (data?.cpuUsage != null && node.metadata) node.metadata.cpuUsage = data.cpuUsage;
182
+ if (data?.memoryUsage != null && node.metadata) node.metadata.memoryUsage = data.memoryUsage;
183
+ this.emit(nodeId, node, 'heartbeat');
184
+ }
185
+
186
+ /** Remove a worker node */
187
+ async remove(nodeId: string): Promise<void> {
188
+ const node = this.nodes.get(nodeId);
189
+ this.nodes.delete(nodeId);
190
+ if (node) this.emit(nodeId, { ...node, status: 'offline' }, 'offline');
191
+ if (this.db) {
192
+ try { await this.db.execute('DELETE FROM cluster_nodes WHERE node_id = $1', [nodeId]); } catch {}
193
+ }
194
+ }
195
+
196
+ /** Get a specific node */
197
+ getNode(nodeId: string): WorkerNode | undefined { return this.nodes.get(nodeId); }
198
+
199
+ /** Get all nodes */
200
+ getAllNodes(): WorkerNode[] { return Array.from(this.nodes.values()); }
201
+
202
+ /** Get online nodes */
203
+ getOnlineNodes(): WorkerNode[] { return this.getAllNodes().filter(n => n.status === 'online'); }
204
+
205
+ /** Get cluster-wide stats */
206
+ getStats(): ClusterStats {
207
+ const nodes = this.getAllNodes();
208
+ const online = nodes.filter(n => n.status === 'online');
209
+ return {
210
+ totalNodes: nodes.length,
211
+ onlineNodes: online.length,
212
+ totalAgents: nodes.reduce((s, n) => s + n.agents.length, 0),
213
+ totalCpus: online.reduce((s, n) => s + n.cpuCount, 0),
214
+ totalMemoryMb: online.reduce((s, n) => s + n.memoryMb, 0),
215
+ };
216
+ }
217
+
218
+ /** Find best node for a new agent (simple: least agents on online node) */
219
+ findBestNode(capabilities?: string[]): WorkerNode | null {
220
+ let candidates = this.getOnlineNodes();
221
+ if (capabilities?.length) {
222
+ candidates = candidates.filter(n => capabilities.every(c => n.capabilities.includes(c)));
223
+ }
224
+ if (candidates.length === 0) return null;
225
+ return candidates.sort((a, b) => a.agents.length - b.agents.length)[0];
226
+ }
227
+
228
+ /** Subscribe to cluster events (for SSE) */
229
+ subscribe(listener: NodeListener): () => void {
230
+ this.listeners.add(listener);
231
+ return () => this.listeners.delete(listener);
232
+ }
233
+
234
+ private emit(nodeId: string, node: WorkerNode, event: 'register' | 'heartbeat' | 'offline' | 'update'): void {
235
+ for (const listener of this.listeners) {
236
+ try { listener(nodeId, node, event); } catch {}
237
+ }
238
+ }
239
+
240
+ private checkStale(): void {
241
+ const now = Date.now();
242
+ for (const [nodeId, node] of this.nodes) {
243
+ if (node.status === 'offline') continue;
244
+ if (node.lastHeartbeat) {
245
+ const elapsed = now - new Date(node.lastHeartbeat).getTime();
246
+ if (elapsed > this.staleThresholdMs) {
247
+ node.status = 'offline';
248
+ node.agents = [];
249
+ this.emit(nodeId, node, 'offline');
250
+ }
251
+ }
252
+ }
253
+ }
254
+
255
+ destroy(): void {
256
+ if (this.staleTimer) clearInterval(this.staleTimer);
257
+ this.listeners.clear();
258
+ }
259
+ }
260
+
261
+ function safeParse(val: any, fallback: any): any {
262
+ if (!val) return fallback;
263
+ if (typeof val === 'object') return val;
264
+ try { return JSON.parse(val); } catch { return fallback; }
265
+ }
@@ -116,6 +116,8 @@ const tenants = new TenantManager();
116
116
  const activity = new ActivityTracker();
117
117
  import { AgentStatusTracker } from './agent-status.js';
118
118
  const agentStatus = new AgentStatusTracker();
119
+ import { ClusterManager } from './cluster.js';
120
+ const cluster = new ClusterManager();
119
121
  const dlp = new DLPEngine();
120
122
  const commBus = new AgentCommunicationBus();
121
123
  const guardrails = new GuardrailEngine({
@@ -315,6 +317,47 @@ engine.get('/agent-status-stream', (c) => {
315
317
  });
316
318
  });
317
319
 
320
+ // ─── Cluster Management ─────────────────────────────────
321
+ engine.get('/cluster/nodes', (c) => c.json({ nodes: cluster.getAllNodes(), stats: cluster.getStats() }));
322
+ engine.get('/cluster/nodes/:nodeId', (c) => {
323
+ const node = cluster.getNode(c.req.param('nodeId'));
324
+ return node ? c.json(node) : c.json({ error: 'Node not found' }, 404);
325
+ });
326
+ engine.post('/cluster/register', async (c) => {
327
+ const body = await c.req.json();
328
+ if (!body.nodeId || !body.host || !body.port) return c.json({ error: 'nodeId, host, port required' }, 400);
329
+ const node = await cluster.register(body);
330
+ return c.json(node);
331
+ });
332
+ engine.post('/cluster/heartbeat/:nodeId', async (c) => {
333
+ const body = await c.req.json().catch(() => ({}));
334
+ cluster.heartbeat(c.req.param('nodeId'), body);
335
+ return c.json({ ok: true });
336
+ });
337
+ engine.delete('/cluster/nodes/:nodeId', async (c) => {
338
+ await cluster.remove(c.req.param('nodeId'));
339
+ return c.json({ removed: true });
340
+ });
341
+ engine.get('/cluster/best-node', (c) => {
342
+ const caps = c.req.query('capabilities')?.split(',').filter(Boolean);
343
+ const node = cluster.findBestNode(caps);
344
+ return node ? c.json(node) : c.json({ error: 'No suitable node available' }, 404);
345
+ });
346
+ engine.get('/cluster/stream', (c) => {
347
+ const stream = new ReadableStream({
348
+ start(controller) {
349
+ const encoder = new TextEncoder();
350
+ const send = (d: string) => { try { controller.enqueue(encoder.encode(`data: ${d}\n\n`)); } catch { unsub(); } };
351
+ // Send current state
352
+ for (const n of cluster.getAllNodes()) send(JSON.stringify({ type: 'node', event: 'snapshot', ...n }));
353
+ const unsub = cluster.subscribe((nodeId, node, event) => send(JSON.stringify({ type: 'node', event, ...node })));
354
+ const hb = setInterval(() => send(JSON.stringify({ type: 'heartbeat' })), 30_000);
355
+ c.req.raw.signal.addEventListener('abort', () => { unsub(); clearInterval(hb); });
356
+ },
357
+ });
358
+ return new Response(stream, { headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive' } });
359
+ });
360
+
318
361
  engine.route('/community', createCommunityRoutes(communityRegistry));
319
362
  engine.route('/workforce', createWorkforceRoutes(workforce, { lifecycle }));
320
363
  engine.route('/policies', createPolicyRoutes(policyEngine));
@@ -783,6 +826,7 @@ export async function setEngineDb(
783
826
  (async () => { knowledgeImport.setDb((db as any)?.db || db); knowledgeImport.setKnowledgeEngine(knowledgeBase); await knowledgeImport.loadJobs(); })(),
784
827
  workforce.setDb(db),
785
828
  policyEngine.setDb(db),
829
+ (async () => { cluster.setDb(db); await cluster.loadFromDb(); })(),
786
830
  memoryManager.setDb(db),
787
831
  onboarding.setDb(db),
788
832
  vault.setDb(db),
@@ -1071,4 +1115,4 @@ export function setRuntime(runtime: any): void {
1071
1115
  }
1072
1116
 
1073
1117
  export { engine as engineRoutes };
1074
- export { permissionEngine, configGen, deployer, approvals, lifecycle, knowledgeBase, tenants, activity, dlp, commBus, guardrails, journal, compliance, communityRegistry, workforce, policyEngine, memoryManager, onboarding, vault, storageManager, policyImporter, knowledgeContribution, skillUpdater, agentStatus, hierarchyManager, databaseManager, orgIntegrations };
1118
+ export { permissionEngine, configGen, deployer, approvals, lifecycle, knowledgeBase, tenants, activity, dlp, commBus, guardrails, journal, compliance, communityRegistry, workforce, policyEngine, memoryManager, onboarding, vault, storageManager, policyImporter, knowledgeContribution, skillUpdater, agentStatus, hierarchyManager, databaseManager, orgIntegrations, cluster };