@agenticmail/enterprise 0.5.322 → 0.5.324
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-heartbeat-BBINFNL4.js +510 -0
- package/dist/chunk-CQYLRIQ3.js +25938 -0
- package/dist/chunk-GYB2WHMN.js +5101 -0
- package/dist/chunk-KN3T3CTD.js +4929 -0
- package/dist/chunk-SVSLIQYN.js +1519 -0
- package/dist/cli-agent-USMKX7WN.js +2473 -0
- package/dist/cli-serve-7JQ4FVUQ.js +260 -0
- package/dist/cli.js +3 -3
- package/dist/dashboard/app.js +4 -1
- package/dist/dashboard/components/icons.js +1 -0
- package/dist/dashboard/pages/agent-detail/index.js +25 -3
- package/dist/dashboard/pages/agents.js +30 -1
- package/dist/dashboard/pages/cluster.js +181 -0
- package/dist/index.js +4 -4
- package/dist/routes-XYR2RNEC.js +92 -0
- package/dist/runtime-ZOC337DD.js +45 -0
- package/dist/server-7NT4LMSQ.js +28 -0
- package/dist/setup-6NUSB4XO.js +20 -0
- package/logs/cloudflared-error.log +10 -0
- package/logs/enterprise-out.log +4 -0
- package/package.json +1 -1
- package/src/cli-agent.ts +33 -1
- package/src/dashboard/app.js +4 -1
- package/src/dashboard/components/icons.js +1 -0
- package/src/dashboard/pages/agent-detail/index.js +25 -3
- package/src/dashboard/pages/agents.js +30 -1
- package/src/dashboard/pages/cluster.js +181 -0
- package/src/engine/cluster.ts +265 -0
- package/src/engine/routes.ts +45 -1
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cluster Manager — Multi-Instance Agent Coordination
|
|
3
|
+
*
|
|
4
|
+
* Enables a single enterprise dashboard to manage agents running across
|
|
5
|
+
* multiple machines (Mac Minis, VPS, cloud instances).
|
|
6
|
+
*
|
|
7
|
+
* Architecture:
|
|
8
|
+
* - Enterprise server = "Control Plane" (single dashboard, DB, SSE hub)
|
|
9
|
+
* - Agent instances = "Worker Nodes" (run on any machine, phone home to control plane)
|
|
10
|
+
*
|
|
11
|
+
* Worker nodes register via POST /cluster/register with their capabilities.
|
|
12
|
+
* Control plane tracks health via heartbeats (every 30s, stale after 90s).
|
|
13
|
+
* Agents deployed to remote workers communicate via ENTERPRISE_URL.
|
|
14
|
+
*
|
|
15
|
+
* Flow:
|
|
16
|
+
* 1. Worker starts, calls POST /cluster/register { nodeId, host, port, capabilities }
|
|
17
|
+
* 2. Control plane stores worker in memory + DB
|
|
18
|
+
* 3. Dashboard shows all workers and their agents
|
|
19
|
+
* 4. When user deploys an agent, they can pick a target worker
|
|
20
|
+
* 5. Worker receives deploy command via its API
|
|
21
|
+
* 6. Agent process starts on worker, reports status back to control plane
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
export interface WorkerNode {
|
|
25
|
+
nodeId: string;
|
|
26
|
+
name: string;
|
|
27
|
+
host: string; // IP or hostname reachable from control plane
|
|
28
|
+
port: number; // Worker API port
|
|
29
|
+
url: string; // Full base URL (e.g., http://192.168.1.50:3101)
|
|
30
|
+
platform: string; // darwin, linux, win32
|
|
31
|
+
arch: string; // arm64, x64
|
|
32
|
+
cpuCount: number;
|
|
33
|
+
memoryMb: number;
|
|
34
|
+
version: string; // @agenticmail/enterprise version
|
|
35
|
+
agents: string[]; // Agent IDs running on this worker
|
|
36
|
+
capabilities: string[];// e.g., ['gpu', 'browser', 'voice', 'docker']
|
|
37
|
+
status: 'online' | 'degraded' | 'offline';
|
|
38
|
+
registeredAt: string;
|
|
39
|
+
lastHeartbeat: string;
|
|
40
|
+
metadata?: Record<string, any>;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export interface ClusterStats {
|
|
44
|
+
totalNodes: number;
|
|
45
|
+
onlineNodes: number;
|
|
46
|
+
totalAgents: number;
|
|
47
|
+
totalCpus: number;
|
|
48
|
+
totalMemoryMb: number;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
type NodeListener = (nodeId: string, node: WorkerNode, event: 'register' | 'heartbeat' | 'offline' | 'update') => void;
|
|
52
|
+
|
|
53
|
+
export class ClusterManager {
|
|
54
|
+
private nodes = new Map<string, WorkerNode>();
|
|
55
|
+
private listeners = new Set<NodeListener>();
|
|
56
|
+
private staleTimer: NodeJS.Timeout | null = null;
|
|
57
|
+
private staleThresholdMs = 90_000; // 90s without heartbeat = offline
|
|
58
|
+
private db: any = null;
|
|
59
|
+
|
|
60
|
+
constructor() {
|
|
61
|
+
this.staleTimer = setInterval(() => this.checkStale(), 30_000);
|
|
62
|
+
if (this.staleTimer.unref) this.staleTimer.unref();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
setDb(db: any) { this.db = db; }
|
|
66
|
+
|
|
67
|
+
/** Load persisted workers from DB on startup */
|
|
68
|
+
async loadFromDb(): Promise<void> {
|
|
69
|
+
if (!this.db) return;
|
|
70
|
+
try {
|
|
71
|
+
await this.db.execute(`CREATE TABLE IF NOT EXISTS cluster_nodes (
|
|
72
|
+
node_id TEXT PRIMARY KEY,
|
|
73
|
+
name TEXT NOT NULL,
|
|
74
|
+
host TEXT NOT NULL,
|
|
75
|
+
port INTEGER NOT NULL,
|
|
76
|
+
url TEXT NOT NULL,
|
|
77
|
+
platform TEXT,
|
|
78
|
+
arch TEXT,
|
|
79
|
+
cpu_count INTEGER DEFAULT 0,
|
|
80
|
+
memory_mb INTEGER DEFAULT 0,
|
|
81
|
+
version TEXT,
|
|
82
|
+
agents TEXT DEFAULT '[]',
|
|
83
|
+
capabilities TEXT DEFAULT '[]',
|
|
84
|
+
status TEXT DEFAULT 'offline',
|
|
85
|
+
registered_at TEXT,
|
|
86
|
+
last_heartbeat TEXT,
|
|
87
|
+
metadata TEXT
|
|
88
|
+
)`);
|
|
89
|
+
const rows = await this.db.query('SELECT * FROM cluster_nodes');
|
|
90
|
+
for (const row of rows) {
|
|
91
|
+
const node: WorkerNode = {
|
|
92
|
+
nodeId: row.node_id,
|
|
93
|
+
name: row.name || row.node_id,
|
|
94
|
+
host: row.host,
|
|
95
|
+
port: row.port,
|
|
96
|
+
url: row.url,
|
|
97
|
+
platform: row.platform || 'unknown',
|
|
98
|
+
arch: row.arch || 'unknown',
|
|
99
|
+
cpuCount: row.cpu_count || 0,
|
|
100
|
+
memoryMb: row.memory_mb || 0,
|
|
101
|
+
version: row.version || 'unknown',
|
|
102
|
+
agents: safeParse(row.agents, []),
|
|
103
|
+
capabilities: safeParse(row.capabilities, []),
|
|
104
|
+
status: 'offline', // Start as offline; heartbeat will set online
|
|
105
|
+
registeredAt: row.registered_at || new Date().toISOString(),
|
|
106
|
+
lastHeartbeat: row.last_heartbeat || '',
|
|
107
|
+
metadata: safeParse(row.metadata, {}),
|
|
108
|
+
};
|
|
109
|
+
this.nodes.set(node.nodeId, node);
|
|
110
|
+
}
|
|
111
|
+
} catch (e: any) {
|
|
112
|
+
console.warn('[cluster] Failed to load nodes from DB:', e.message);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/** Register or re-register a worker node */
|
|
117
|
+
async register(data: {
|
|
118
|
+
nodeId: string;
|
|
119
|
+
name?: string;
|
|
120
|
+
host: string;
|
|
121
|
+
port: number;
|
|
122
|
+
platform?: string;
|
|
123
|
+
arch?: string;
|
|
124
|
+
cpuCount?: number;
|
|
125
|
+
memoryMb?: number;
|
|
126
|
+
version?: string;
|
|
127
|
+
agents?: string[];
|
|
128
|
+
capabilities?: string[];
|
|
129
|
+
metadata?: Record<string, any>;
|
|
130
|
+
}): Promise<WorkerNode> {
|
|
131
|
+
const now = new Date().toISOString();
|
|
132
|
+
const existing = this.nodes.get(data.nodeId);
|
|
133
|
+
const node: WorkerNode = {
|
|
134
|
+
nodeId: data.nodeId,
|
|
135
|
+
name: data.name || data.nodeId,
|
|
136
|
+
host: data.host,
|
|
137
|
+
port: data.port,
|
|
138
|
+
url: `http://${data.host}:${data.port}`,
|
|
139
|
+
platform: data.platform || 'unknown',
|
|
140
|
+
arch: data.arch || 'unknown',
|
|
141
|
+
cpuCount: data.cpuCount || 0,
|
|
142
|
+
memoryMb: data.memoryMb || 0,
|
|
143
|
+
version: data.version || 'unknown',
|
|
144
|
+
agents: data.agents || existing?.agents || [],
|
|
145
|
+
capabilities: data.capabilities || [],
|
|
146
|
+
status: 'online',
|
|
147
|
+
registeredAt: existing?.registeredAt || now,
|
|
148
|
+
lastHeartbeat: now,
|
|
149
|
+
metadata: data.metadata || existing?.metadata || {},
|
|
150
|
+
};
|
|
151
|
+
this.nodes.set(node.nodeId, node);
|
|
152
|
+
this.emit(node.nodeId, node, 'register');
|
|
153
|
+
|
|
154
|
+
// Persist
|
|
155
|
+
if (this.db) {
|
|
156
|
+
try {
|
|
157
|
+
await this.db.execute(
|
|
158
|
+
`INSERT INTO cluster_nodes (node_id, name, host, port, url, platform, arch, cpu_count, memory_mb, version, agents, capabilities, status, registered_at, last_heartbeat, metadata)
|
|
159
|
+
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16)
|
|
160
|
+
ON CONFLICT (node_id) DO UPDATE SET
|
|
161
|
+
name=$2, host=$3, port=$4, url=$5, platform=$6, arch=$7, cpu_count=$8, memory_mb=$9, version=$10,
|
|
162
|
+
agents=$11, capabilities=$12, status=$13, last_heartbeat=$15, metadata=$16`,
|
|
163
|
+
[node.nodeId, node.name, node.host, node.port, node.url, node.platform, node.arch,
|
|
164
|
+
node.cpuCount, node.memoryMb, node.version, JSON.stringify(node.agents),
|
|
165
|
+
JSON.stringify(node.capabilities), node.status, node.registeredAt, node.lastHeartbeat,
|
|
166
|
+
JSON.stringify(node.metadata)]
|
|
167
|
+
);
|
|
168
|
+
} catch (e: any) { console.warn('[cluster] DB persist error:', e.message); }
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return node;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/** Worker sends heartbeat (every 30s) */
|
|
175
|
+
heartbeat(nodeId: string, data?: { agents?: string[]; cpuUsage?: number; memoryUsage?: number }): void {
|
|
176
|
+
const node = this.nodes.get(nodeId);
|
|
177
|
+
if (!node) return;
|
|
178
|
+
node.lastHeartbeat = new Date().toISOString();
|
|
179
|
+
node.status = 'online';
|
|
180
|
+
if (data?.agents) node.agents = data.agents;
|
|
181
|
+
if (data?.cpuUsage != null && node.metadata) node.metadata.cpuUsage = data.cpuUsage;
|
|
182
|
+
if (data?.memoryUsage != null && node.metadata) node.metadata.memoryUsage = data.memoryUsage;
|
|
183
|
+
this.emit(nodeId, node, 'heartbeat');
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/** Remove a worker node */
|
|
187
|
+
async remove(nodeId: string): Promise<void> {
|
|
188
|
+
const node = this.nodes.get(nodeId);
|
|
189
|
+
this.nodes.delete(nodeId);
|
|
190
|
+
if (node) this.emit(nodeId, { ...node, status: 'offline' }, 'offline');
|
|
191
|
+
if (this.db) {
|
|
192
|
+
try { await this.db.execute('DELETE FROM cluster_nodes WHERE node_id = $1', [nodeId]); } catch {}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/** Get a specific node */
|
|
197
|
+
getNode(nodeId: string): WorkerNode | undefined { return this.nodes.get(nodeId); }
|
|
198
|
+
|
|
199
|
+
/** Get all nodes */
|
|
200
|
+
getAllNodes(): WorkerNode[] { return Array.from(this.nodes.values()); }
|
|
201
|
+
|
|
202
|
+
/** Get online nodes */
|
|
203
|
+
getOnlineNodes(): WorkerNode[] { return this.getAllNodes().filter(n => n.status === 'online'); }
|
|
204
|
+
|
|
205
|
+
/** Get cluster-wide stats */
|
|
206
|
+
getStats(): ClusterStats {
|
|
207
|
+
const nodes = this.getAllNodes();
|
|
208
|
+
const online = nodes.filter(n => n.status === 'online');
|
|
209
|
+
return {
|
|
210
|
+
totalNodes: nodes.length,
|
|
211
|
+
onlineNodes: online.length,
|
|
212
|
+
totalAgents: nodes.reduce((s, n) => s + n.agents.length, 0),
|
|
213
|
+
totalCpus: online.reduce((s, n) => s + n.cpuCount, 0),
|
|
214
|
+
totalMemoryMb: online.reduce((s, n) => s + n.memoryMb, 0),
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/** Find best node for a new agent (simple: least agents on online node) */
|
|
219
|
+
findBestNode(capabilities?: string[]): WorkerNode | null {
|
|
220
|
+
let candidates = this.getOnlineNodes();
|
|
221
|
+
if (capabilities?.length) {
|
|
222
|
+
candidates = candidates.filter(n => capabilities.every(c => n.capabilities.includes(c)));
|
|
223
|
+
}
|
|
224
|
+
if (candidates.length === 0) return null;
|
|
225
|
+
return candidates.sort((a, b) => a.agents.length - b.agents.length)[0];
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/** Subscribe to cluster events (for SSE) */
|
|
229
|
+
subscribe(listener: NodeListener): () => void {
|
|
230
|
+
this.listeners.add(listener);
|
|
231
|
+
return () => this.listeners.delete(listener);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
private emit(nodeId: string, node: WorkerNode, event: 'register' | 'heartbeat' | 'offline' | 'update'): void {
|
|
235
|
+
for (const listener of this.listeners) {
|
|
236
|
+
try { listener(nodeId, node, event); } catch {}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
private checkStale(): void {
|
|
241
|
+
const now = Date.now();
|
|
242
|
+
for (const [nodeId, node] of this.nodes) {
|
|
243
|
+
if (node.status === 'offline') continue;
|
|
244
|
+
if (node.lastHeartbeat) {
|
|
245
|
+
const elapsed = now - new Date(node.lastHeartbeat).getTime();
|
|
246
|
+
if (elapsed > this.staleThresholdMs) {
|
|
247
|
+
node.status = 'offline';
|
|
248
|
+
node.agents = [];
|
|
249
|
+
this.emit(nodeId, node, 'offline');
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
destroy(): void {
|
|
256
|
+
if (this.staleTimer) clearInterval(this.staleTimer);
|
|
257
|
+
this.listeners.clear();
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function safeParse(val: any, fallback: any): any {
|
|
262
|
+
if (!val) return fallback;
|
|
263
|
+
if (typeof val === 'object') return val;
|
|
264
|
+
try { return JSON.parse(val); } catch { return fallback; }
|
|
265
|
+
}
|
package/src/engine/routes.ts
CHANGED
|
@@ -116,6 +116,8 @@ const tenants = new TenantManager();
|
|
|
116
116
|
const activity = new ActivityTracker();
|
|
117
117
|
import { AgentStatusTracker } from './agent-status.js';
|
|
118
118
|
const agentStatus = new AgentStatusTracker();
|
|
119
|
+
import { ClusterManager } from './cluster.js';
|
|
120
|
+
const cluster = new ClusterManager();
|
|
119
121
|
const dlp = new DLPEngine();
|
|
120
122
|
const commBus = new AgentCommunicationBus();
|
|
121
123
|
const guardrails = new GuardrailEngine({
|
|
@@ -315,6 +317,47 @@ engine.get('/agent-status-stream', (c) => {
|
|
|
315
317
|
});
|
|
316
318
|
});
|
|
317
319
|
|
|
320
|
+
// ─── Cluster Management ─────────────────────────────────
|
|
321
|
+
engine.get('/cluster/nodes', (c) => c.json({ nodes: cluster.getAllNodes(), stats: cluster.getStats() }));
|
|
322
|
+
engine.get('/cluster/nodes/:nodeId', (c) => {
|
|
323
|
+
const node = cluster.getNode(c.req.param('nodeId'));
|
|
324
|
+
return node ? c.json(node) : c.json({ error: 'Node not found' }, 404);
|
|
325
|
+
});
|
|
326
|
+
engine.post('/cluster/register', async (c) => {
|
|
327
|
+
const body = await c.req.json();
|
|
328
|
+
if (!body.nodeId || !body.host || !body.port) return c.json({ error: 'nodeId, host, port required' }, 400);
|
|
329
|
+
const node = await cluster.register(body);
|
|
330
|
+
return c.json(node);
|
|
331
|
+
});
|
|
332
|
+
engine.post('/cluster/heartbeat/:nodeId', async (c) => {
|
|
333
|
+
const body = await c.req.json().catch(() => ({}));
|
|
334
|
+
cluster.heartbeat(c.req.param('nodeId'), body);
|
|
335
|
+
return c.json({ ok: true });
|
|
336
|
+
});
|
|
337
|
+
engine.delete('/cluster/nodes/:nodeId', async (c) => {
|
|
338
|
+
await cluster.remove(c.req.param('nodeId'));
|
|
339
|
+
return c.json({ removed: true });
|
|
340
|
+
});
|
|
341
|
+
engine.get('/cluster/best-node', (c) => {
|
|
342
|
+
const caps = c.req.query('capabilities')?.split(',').filter(Boolean);
|
|
343
|
+
const node = cluster.findBestNode(caps);
|
|
344
|
+
return node ? c.json(node) : c.json({ error: 'No suitable node available' }, 404);
|
|
345
|
+
});
|
|
346
|
+
engine.get('/cluster/stream', (c) => {
|
|
347
|
+
const stream = new ReadableStream({
|
|
348
|
+
start(controller) {
|
|
349
|
+
const encoder = new TextEncoder();
|
|
350
|
+
const send = (d: string) => { try { controller.enqueue(encoder.encode(`data: ${d}\n\n`)); } catch { unsub(); } };
|
|
351
|
+
// Send current state
|
|
352
|
+
for (const n of cluster.getAllNodes()) send(JSON.stringify({ type: 'node', event: 'snapshot', ...n }));
|
|
353
|
+
const unsub = cluster.subscribe((nodeId, node, event) => send(JSON.stringify({ type: 'node', event, ...node })));
|
|
354
|
+
const hb = setInterval(() => send(JSON.stringify({ type: 'heartbeat' })), 30_000);
|
|
355
|
+
c.req.raw.signal.addEventListener('abort', () => { unsub(); clearInterval(hb); });
|
|
356
|
+
},
|
|
357
|
+
});
|
|
358
|
+
return new Response(stream, { headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive' } });
|
|
359
|
+
});
|
|
360
|
+
|
|
318
361
|
engine.route('/community', createCommunityRoutes(communityRegistry));
|
|
319
362
|
engine.route('/workforce', createWorkforceRoutes(workforce, { lifecycle }));
|
|
320
363
|
engine.route('/policies', createPolicyRoutes(policyEngine));
|
|
@@ -783,6 +826,7 @@ export async function setEngineDb(
|
|
|
783
826
|
(async () => { knowledgeImport.setDb((db as any)?.db || db); knowledgeImport.setKnowledgeEngine(knowledgeBase); await knowledgeImport.loadJobs(); })(),
|
|
784
827
|
workforce.setDb(db),
|
|
785
828
|
policyEngine.setDb(db),
|
|
829
|
+
(async () => { cluster.setDb(db); await cluster.loadFromDb(); })(),
|
|
786
830
|
memoryManager.setDb(db),
|
|
787
831
|
onboarding.setDb(db),
|
|
788
832
|
vault.setDb(db),
|
|
@@ -1071,4 +1115,4 @@ export function setRuntime(runtime: any): void {
|
|
|
1071
1115
|
}
|
|
1072
1116
|
|
|
1073
1117
|
export { engine as engineRoutes };
|
|
1074
|
-
export { permissionEngine, configGen, deployer, approvals, lifecycle, knowledgeBase, tenants, activity, dlp, commBus, guardrails, journal, compliance, communityRegistry, workforce, policyEngine, memoryManager, onboarding, vault, storageManager, policyImporter, knowledgeContribution, skillUpdater, agentStatus, hierarchyManager, databaseManager, orgIntegrations };
|
|
1118
|
+
export { permissionEngine, configGen, deployer, approvals, lifecycle, knowledgeBase, tenants, activity, dlp, commBus, guardrails, journal, compliance, communityRegistry, workforce, policyEngine, memoryManager, onboarding, vault, storageManager, policyImporter, knowledgeContribution, skillUpdater, agentStatus, hierarchyManager, databaseManager, orgIntegrations, cluster };
|