@amd-gaia/agent-ui 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +101 -0
- package/app.config.json +37 -0
- package/assets/icon.ico +0 -0
- package/assets/icon.png +0 -0
- package/assets/tray-icon-active.png +0 -0
- package/assets/tray-icon-active@2x.png +0 -0
- package/assets/tray-icon.ico +0 -0
- package/assets/tray-icon.png +0 -0
- package/assets/tray-icon@2x.png +0 -0
- package/assets/tray-iconTemplate.png +0 -0
- package/assets/tray-iconTemplate@2x.png +0 -0
- package/bin/gaia-ui.mjs +572 -0
- package/dist/assets/browser-CTB2jwNe.js +8 -0
- package/dist/assets/dm-sans-latin-Xz1IZZA0.woff2 +0 -0
- package/dist/assets/gaia-robot-NKaQnEIp.png +0 -0
- package/dist/assets/index-C7oO2M6Q.js +432 -0
- package/dist/assets/index-TyWv9Ej0.css +1 -0
- package/dist/assets/jetbrains-mono-latin-6fWv1k7M.woff2 +0 -0
- package/dist/assets/space-mono-400-Co7bH5Hm.woff2 +0 -0
- package/dist/favicon.png +0 -0
- package/dist/index.html +14 -0
- package/main.cjs +511 -0
- package/package.json +83 -0
- package/preload.cjs +61 -0
- package/services/agent-process-manager.cjs +818 -0
- package/services/notification-service.cjs +419 -0
- package/services/tray-manager.cjs +239 -0
|
@@ -0,0 +1,818 @@
|
|
|
1
|
+
// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* GAIA Agent UI — Agent Process Manager (T2)
|
|
6
|
+
*
|
|
7
|
+
* Manages OS agent subprocesses (C++ MCP servers, .NET agents, Python agents).
|
|
8
|
+
* Each agent communicates via JSON-RPC 2.0 over stdio:
|
|
9
|
+
* - stdout → JSON-RPC messages (MCP protocol + GAIA extensions)
|
|
10
|
+
* - stderr → Structured log lines → piped to terminal view
|
|
11
|
+
* - stdin → JSON-RPC requests from the tray app
|
|
12
|
+
*
|
|
13
|
+
* Cross-platform shutdown protocol (C3 fix):
|
|
14
|
+
* 1. Send JSON-RPC {"method": "shutdown"} via stdin
|
|
15
|
+
* 2. Wait up to 5s for clean exit
|
|
16
|
+
* 3. Force kill as last resort
|
|
17
|
+
*
|
|
18
|
+
* Health checking uses {"method": "ping"} (S1 fix), NOT "initialize".
|
|
19
|
+
*
|
|
20
|
+
* Config stored in ~/.gaia/tray-config.json (S2 fix).
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const { spawn } = require("child_process");
|
|
24
|
+
const { ipcMain } = require("electron");
|
|
25
|
+
const path = require("path");
|
|
26
|
+
const fs = require("fs");
|
|
27
|
+
const os = require("os");
|
|
28
|
+
const { EventEmitter } = require("events");
|
|
29
|
+
|
|
30
|
+
// ── Constants ────────────────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
const GAIA_DIR = path.join(os.homedir(), ".gaia");
|
|
33
|
+
const AGENTS_DIR = path.join(GAIA_DIR, "agents");
|
|
34
|
+
const CONFIG_PATH = path.join(GAIA_DIR, "tray-config.json");
|
|
35
|
+
const CRASH_LOG_PATH = path.join(GAIA_DIR, "crash-log.json");
|
|
36
|
+
const MANIFEST_FILENAME = "agent-manifest.json";
|
|
37
|
+
|
|
38
|
+
/** Graceful shutdown timeout before force kill (ms) */
|
|
39
|
+
const SHUTDOWN_TIMEOUT = 5000;
|
|
40
|
+
|
|
41
|
+
/** Health check interval (ms) — uses MCP "ping", not "initialize" */
|
|
42
|
+
const HEALTH_CHECK_INTERVAL = 30000;
|
|
43
|
+
|
|
44
|
+
/** Delay between sequential auto-starts (ms) */
|
|
45
|
+
const AUTO_START_DELAY = 100;
|
|
46
|
+
|
|
47
|
+
/** Max crash restarts within the crash window */
|
|
48
|
+
const MAX_CRASH_RESTARTS = 3;
|
|
49
|
+
|
|
50
|
+
/** Crash window (ms) — max restarts counted within this period */
|
|
51
|
+
const CRASH_WINDOW = 60000;
|
|
52
|
+
|
|
53
|
+
/** Delay before crash restart (ms) */
|
|
54
|
+
const CRASH_RESTART_DELAY = 2000;
|
|
55
|
+
|
|
56
|
+
/** Max lines kept in stderr buffer per agent */
|
|
57
|
+
const STDERR_BUFFER_MAX = 10000;
|
|
58
|
+
|
|
59
|
+
/** Max bytes kept in stdout buffer per agent (protects against malformed output without newlines) */
|
|
60
|
+
const STDOUT_BUFFER_MAX = 1024 * 1024; // 1 MB
|
|
61
|
+
|
|
62
|
+
// ── AgentProcessManager ──────────────────────────────────────────────────
|
|
63
|
+
|
|
64
|
+
class AgentProcessManager extends EventEmitter {
|
|
65
|
+
/**
|
|
66
|
+
* @param {Electron.BrowserWindow} mainWindow — for sending IPC events to renderer
|
|
67
|
+
*/
|
|
68
|
+
constructor(mainWindow) {
|
|
69
|
+
super();
|
|
70
|
+
|
|
71
|
+
/** @type {Electron.BrowserWindow} */
|
|
72
|
+
this.mainWindow = mainWindow;
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Running processes keyed by agentId.
|
|
76
|
+
* @type {Record<string, {
|
|
77
|
+
* process: import('child_process').ChildProcess,
|
|
78
|
+
* startedAt: number,
|
|
79
|
+
* stderrBuffer: string[],
|
|
80
|
+
* stdoutBuffer: string,
|
|
81
|
+
* rpcIdCounter: number,
|
|
82
|
+
* pendingRpc: Record<string, { resolve: Function, reject: Function, timer: NodeJS.Timeout }>,
|
|
83
|
+
* healthTimer: NodeJS.Timeout | null,
|
|
84
|
+
* stopping: boolean,
|
|
85
|
+
* }>}
|
|
86
|
+
*/
|
|
87
|
+
this.processes = {};
|
|
88
|
+
|
|
89
|
+
/** Crash timestamps per agent for rate-limiting restart attempts */
|
|
90
|
+
this._crashTimes = {};
|
|
91
|
+
|
|
92
|
+
/** Agent manifest (loaded from disk or fetched) */
|
|
93
|
+
this.manifest = this._loadManifest();
|
|
94
|
+
|
|
95
|
+
/** Tray config (for auto-start and crash-restart settings) */
|
|
96
|
+
this.config = this._loadConfig();
|
|
97
|
+
|
|
98
|
+
this._registerIpcHandlers();
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ── Public API: Lifecycle ────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Start an agent subprocess.
|
|
105
|
+
* @param {string} agentId
|
|
106
|
+
* @returns {Promise<{ pid: number }>}
|
|
107
|
+
*/
|
|
108
|
+
async startAgent(agentId) {
|
|
109
|
+
if (this.processes[agentId]) {
|
|
110
|
+
console.log(`[agent-mgr] Agent ${agentId} is already running (PID ${this.processes[agentId].process.pid})`);
|
|
111
|
+
return { pid: this.processes[agentId].process.pid };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const agentInfo = this._getAgentInfo(agentId);
|
|
115
|
+
if (!agentInfo) {
|
|
116
|
+
throw new Error(`Agent "${agentId}" not found in manifest`);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const binaryPath = this._resolveBinaryPath(agentInfo);
|
|
120
|
+
if (!binaryPath || !fs.existsSync(binaryPath)) {
|
|
121
|
+
throw new Error(
|
|
122
|
+
`Agent binary not found: ${binaryPath || "(no binary for this platform)"}`
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
console.log(`[agent-mgr] Starting agent ${agentId}: ${binaryPath}`);
|
|
127
|
+
|
|
128
|
+
const child = spawn(binaryPath, ["--stdio"], {
|
|
129
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
130
|
+
env: { ...process.env },
|
|
131
|
+
detached: false,
|
|
132
|
+
// On Windows, don't create a console window for the subprocess
|
|
133
|
+
windowsHide: true,
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
const entry = {
|
|
137
|
+
process: child,
|
|
138
|
+
startedAt: Date.now(),
|
|
139
|
+
stderrBuffer: [],
|
|
140
|
+
stdoutBuffer: "",
|
|
141
|
+
rpcIdCounter: 1,
|
|
142
|
+
pendingRpc: {},
|
|
143
|
+
healthTimer: null,
|
|
144
|
+
stopping: false, // Set to true during intentional shutdown to suppress crash recovery
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
this.processes[agentId] = entry;
|
|
148
|
+
|
|
149
|
+
// ── stdout: JSON-RPC message stream ──
|
|
150
|
+
child.stdout.on("data", (data) => {
|
|
151
|
+
this._handleStdout(agentId, data);
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// ── stderr: log lines ──
|
|
155
|
+
child.stderr.on("data", (data) => {
|
|
156
|
+
this._handleStderr(agentId, data);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
// ── Process lifecycle events ──
|
|
160
|
+
child.on("error", (err) => {
|
|
161
|
+
console.error(`[agent-mgr] Agent ${agentId} spawn error:`, err.message);
|
|
162
|
+
this._emitStatusChange(agentId, "error", err.message);
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
child.on("exit", (code, signal) => {
|
|
166
|
+
console.log(
|
|
167
|
+
`[agent-mgr] Agent ${agentId} exited (code=${code}, signal=${signal})`
|
|
168
|
+
);
|
|
169
|
+
this._handleProcessExit(agentId, code, signal);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// Start health check timer
|
|
173
|
+
entry.healthTimer = setInterval(() => {
|
|
174
|
+
this._healthCheck(agentId);
|
|
175
|
+
}, HEALTH_CHECK_INTERVAL);
|
|
176
|
+
|
|
177
|
+
this._emitStatusChange(agentId, "running");
|
|
178
|
+
console.log(`[agent-mgr] Agent ${agentId} started (PID ${child.pid})`);
|
|
179
|
+
|
|
180
|
+
return { pid: child.pid };
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Stop an agent gracefully using JSON-RPC shutdown protocol.
|
|
185
|
+
* Cross-platform (works on Windows where SIGTERM = TerminateProcess).
|
|
186
|
+
* @param {string} agentId
|
|
187
|
+
* @returns {Promise<void>}
|
|
188
|
+
*/
|
|
189
|
+
async stopAgent(agentId) {
|
|
190
|
+
const entry = this.processes[agentId];
|
|
191
|
+
if (!entry) {
|
|
192
|
+
console.log(`[agent-mgr] Agent ${agentId} is not running`);
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Guard against concurrent stopAgent() calls for the same agent
|
|
197
|
+
if (entry.stopping) {
|
|
198
|
+
console.log(`[agent-mgr] Agent ${agentId} is already being stopped — skipping duplicate`);
|
|
199
|
+
return;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
console.log(`[agent-mgr] Stopping agent ${agentId} (PID ${entry.process.pid})...`);
|
|
203
|
+
|
|
204
|
+
// Mark as intentionally stopping — suppresses crash recovery in _handleProcessExit
|
|
205
|
+
entry.stopping = true;
|
|
206
|
+
|
|
207
|
+
// Clear health check timer
|
|
208
|
+
if (entry.healthTimer) {
|
|
209
|
+
clearInterval(entry.healthTimer);
|
|
210
|
+
entry.healthTimer = null;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Step 1: Send JSON-RPC shutdown request via stdin
|
|
214
|
+
try {
|
|
215
|
+
this._sendJsonRpcRaw(agentId, "shutdown", {});
|
|
216
|
+
} catch (err) {
|
|
217
|
+
console.warn(
|
|
218
|
+
`[agent-mgr] Could not send shutdown to ${agentId}:`,
|
|
219
|
+
err.message
|
|
220
|
+
);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Step 2: Wait up to SHUTDOWN_TIMEOUT for clean exit
|
|
224
|
+
const exited = await this._waitForExit(agentId, SHUTDOWN_TIMEOUT);
|
|
225
|
+
|
|
226
|
+
// Step 3: Force kill if still running
|
|
227
|
+
if (!exited && this.processes[agentId]) {
|
|
228
|
+
console.warn(
|
|
229
|
+
`[agent-mgr] Agent ${agentId} did not exit within ${SHUTDOWN_TIMEOUT}ms, force killing...`
|
|
230
|
+
);
|
|
231
|
+
try {
|
|
232
|
+
entry.process.kill(); // SIGKILL on Unix, TerminateProcess on Windows
|
|
233
|
+
} catch {
|
|
234
|
+
// Already dead
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Note: _handleProcessExit may have already cleaned up if the process exited.
|
|
239
|
+
// _cleanupProcess is idempotent, so calling it again is safe.
|
|
240
|
+
this._cleanupProcess(agentId);
|
|
241
|
+
this._emitStatusChange(agentId, "stopped");
|
|
242
|
+
console.log(`[agent-mgr] Agent ${agentId} stopped`);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Restart an agent (stop + start).
|
|
247
|
+
* @param {string} agentId
|
|
248
|
+
*/
|
|
249
|
+
async restartAgent(agentId) {
|
|
250
|
+
await this.stopAgent(agentId);
|
|
251
|
+
return this.startAgent(agentId);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// ── Public API: Monitoring ───────────────────────────────────────────
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Get the status of a single agent.
|
|
258
|
+
* @param {string} agentId
|
|
259
|
+
* @returns {{ installed: boolean, running: boolean, pid?: number, uptime?: number, memoryMB?: number }}
|
|
260
|
+
*/
|
|
261
|
+
getAgentStatus(agentId) {
|
|
262
|
+
const entry = this.processes[agentId];
|
|
263
|
+
const agentInfo = this._getAgentInfo(agentId);
|
|
264
|
+
const binaryPath = agentInfo
|
|
265
|
+
? this._resolveBinaryPath(agentInfo)
|
|
266
|
+
: null;
|
|
267
|
+
const installed = binaryPath ? fs.existsSync(binaryPath) : false;
|
|
268
|
+
|
|
269
|
+
if (!entry) {
|
|
270
|
+
return { installed, running: false };
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
const uptime = Math.floor((Date.now() - entry.startedAt) / 1000);
|
|
274
|
+
let memoryMB = undefined;
|
|
275
|
+
|
|
276
|
+
// Try to read memory usage (not available on all platforms)
|
|
277
|
+
try {
|
|
278
|
+
if (entry.process.pid) {
|
|
279
|
+
// Node doesn't expose child memory directly, but we track it via health checks
|
|
280
|
+
memoryMB = entry._lastMemoryMB || undefined;
|
|
281
|
+
}
|
|
282
|
+
} catch {
|
|
283
|
+
// Ignore
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return {
|
|
287
|
+
installed,
|
|
288
|
+
running: true,
|
|
289
|
+
pid: entry.process.pid,
|
|
290
|
+
uptime,
|
|
291
|
+
memoryMB,
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Get statuses for all known agents.
|
|
297
|
+
* @returns {Record<string, object>}
|
|
298
|
+
*/
|
|
299
|
+
getAllAgentStatuses() {
|
|
300
|
+
const result = {};
|
|
301
|
+
|
|
302
|
+
// Include agents from manifest
|
|
303
|
+
if (this.manifest && this.manifest.agents) {
|
|
304
|
+
for (const agent of this.manifest.agents) {
|
|
305
|
+
result[agent.id] = this.getAgentStatus(agent.id);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Include any running agents not in manifest (shouldn't happen, but safety)
|
|
310
|
+
for (const agentId of Object.keys(this.processes)) {
|
|
311
|
+
if (!result[agentId]) {
|
|
312
|
+
result[agentId] = this.getAgentStatus(agentId);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return result;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// ── Public API: I/O ──────────────────────────────────────────────────
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Send a JSON-RPC request to an agent and wait for the response.
|
|
323
|
+
* @param {string} agentId
|
|
324
|
+
* @param {string} method
|
|
325
|
+
* @param {object} params
|
|
326
|
+
* @param {number} [timeoutMs=30000]
|
|
327
|
+
* @returns {Promise<any>}
|
|
328
|
+
*/
|
|
329
|
+
sendJsonRpc(agentId, method, params = {}, timeoutMs = 30000) {
|
|
330
|
+
return new Promise((resolve, reject) => {
|
|
331
|
+
const entry = this.processes[agentId];
|
|
332
|
+
if (!entry) {
|
|
333
|
+
reject(new Error(`Agent "${agentId}" is not running`));
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const id = `rpc-${entry.rpcIdCounter++}`;
|
|
338
|
+
|
|
339
|
+
const timer = setTimeout(() => {
|
|
340
|
+
delete entry.pendingRpc[id];
|
|
341
|
+
reject(new Error(`JSON-RPC timeout for ${method} (${timeoutMs}ms)`));
|
|
342
|
+
}, timeoutMs);
|
|
343
|
+
|
|
344
|
+
entry.pendingRpc[id] = { resolve, reject, timer };
|
|
345
|
+
|
|
346
|
+
this._sendJsonRpcRaw(agentId, method, params, id);
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// ── Public API: Bulk operations ──────────────────────────────────────
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Start all agents marked as auto-start in config.
|
|
354
|
+
*/
|
|
355
|
+
async startAllEnabled() {
|
|
356
|
+
const agentConfigs = this.config.agents || {};
|
|
357
|
+
|
|
358
|
+
for (const [agentId, agentCfg] of Object.entries(agentConfigs)) {
|
|
359
|
+
if (agentCfg.autoStart && !this.processes[agentId]) {
|
|
360
|
+
try {
|
|
361
|
+
await this.startAgent(agentId);
|
|
362
|
+
// Stagger starts to avoid resource spike
|
|
363
|
+
await new Promise((r) => setTimeout(r, AUTO_START_DELAY));
|
|
364
|
+
} catch (err) {
|
|
365
|
+
console.error(
|
|
366
|
+
`[agent-mgr] Failed to auto-start ${agentId}:`,
|
|
367
|
+
err.message
|
|
368
|
+
);
|
|
369
|
+
this.emit("agent-start-failed", agentId, err.message);
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* Stop all running agents gracefully.
|
|
377
|
+
*/
|
|
378
|
+
async stopAll() {
|
|
379
|
+
const agentIds = Object.keys(this.processes);
|
|
380
|
+
console.log(
|
|
381
|
+
`[agent-mgr] Stopping all agents: ${agentIds.join(", ") || "(none)"}`
|
|
382
|
+
);
|
|
383
|
+
await Promise.all(agentIds.map((id) => this.stopAgent(id)));
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
// ── Public API: Manifest ─────────────────────────────────────────────
|
|
387
|
+
|
|
388
|
+
/** @returns {object | null} The agent manifest */
|
|
389
|
+
getManifest() {
|
|
390
|
+
return this.manifest;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
/** Reload the manifest from disk. */
|
|
394
|
+
reloadManifest() {
|
|
395
|
+
this.manifest = this._loadManifest();
|
|
396
|
+
return this.manifest;
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// ── Private: stdout handling (JSON-RPC) ──────────────────────────────
|
|
400
|
+
|
|
401
|
+
_handleStdout(agentId, data) {
|
|
402
|
+
const entry = this.processes[agentId];
|
|
403
|
+
if (!entry) return;
|
|
404
|
+
|
|
405
|
+
// Buffer incoming data and split on newlines (JSON-RPC uses newline-delimited JSON)
|
|
406
|
+
entry.stdoutBuffer += data.toString();
|
|
407
|
+
|
|
408
|
+
// Safety: cap buffer size to prevent memory leak from malformed output without newlines
|
|
409
|
+
if (entry.stdoutBuffer.length > STDOUT_BUFFER_MAX) {
|
|
410
|
+
console.warn(
|
|
411
|
+
`[agent-mgr] stdout buffer for ${agentId} exceeded ${STDOUT_BUFFER_MAX} bytes — discarding`
|
|
412
|
+
);
|
|
413
|
+
entry.stdoutBuffer = "";
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
let newlineIdx;
|
|
417
|
+
while ((newlineIdx = entry.stdoutBuffer.indexOf("\n")) !== -1) {
|
|
418
|
+
const line = entry.stdoutBuffer.slice(0, newlineIdx).trim();
|
|
419
|
+
entry.stdoutBuffer = entry.stdoutBuffer.slice(newlineIdx + 1);
|
|
420
|
+
|
|
421
|
+
if (!line) continue;
|
|
422
|
+
|
|
423
|
+
try {
|
|
424
|
+
const msg = JSON.parse(line);
|
|
425
|
+
this._handleJsonRpcMessage(agentId, msg);
|
|
426
|
+
} catch (err) {
|
|
427
|
+
console.warn(
|
|
428
|
+
`[agent-mgr] Non-JSON stdout from ${agentId}: ${line.slice(0, 200)}`
|
|
429
|
+
);
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
_handleJsonRpcMessage(agentId, msg) {
|
|
435
|
+
// Check if this is a response to a pending RPC call
|
|
436
|
+
if (msg.id && this.processes[agentId]) {
|
|
437
|
+
const pending = this.processes[agentId].pendingRpc[msg.id];
|
|
438
|
+
if (pending) {
|
|
439
|
+
clearTimeout(pending.timer);
|
|
440
|
+
delete this.processes[agentId].pendingRpc[msg.id];
|
|
441
|
+
|
|
442
|
+
if (msg.error) {
|
|
443
|
+
pending.reject(
|
|
444
|
+
new Error(msg.error.message || JSON.stringify(msg.error))
|
|
445
|
+
);
|
|
446
|
+
} else {
|
|
447
|
+
pending.resolve(msg.result);
|
|
448
|
+
}
|
|
449
|
+
return;
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// It's a notification or unsolicited message — forward to renderer
|
|
454
|
+
this._sendToRenderer("agent:stdout", {
|
|
455
|
+
agentId,
|
|
456
|
+
message: msg,
|
|
457
|
+
timestamp: Date.now(),
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
// Handle specific notification methods
|
|
461
|
+
if (msg.method === "notification/send") {
|
|
462
|
+
this.emit("agent-notification", agentId, msg.params);
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// ── Private: stderr handling (log lines) ─────────────────────────────
|
|
467
|
+
|
|
468
|
+
_handleStderr(agentId, data) {
|
|
469
|
+
const entry = this.processes[agentId];
|
|
470
|
+
if (!entry) return;
|
|
471
|
+
|
|
472
|
+
const lines = data.toString().split("\n");
|
|
473
|
+
for (const rawLine of lines) {
|
|
474
|
+
const line = rawLine.trimEnd();
|
|
475
|
+
if (!line) continue;
|
|
476
|
+
|
|
477
|
+
// Add to circular buffer
|
|
478
|
+
entry.stderrBuffer.push(line);
|
|
479
|
+
if (entry.stderrBuffer.length > STDERR_BUFFER_MAX) {
|
|
480
|
+
entry.stderrBuffer.shift();
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// Forward to renderer
|
|
484
|
+
this._sendToRenderer("agent:stderr", {
|
|
485
|
+
agentId,
|
|
486
|
+
line,
|
|
487
|
+
timestamp: Date.now(),
|
|
488
|
+
});
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// ── Private: Process exit & crash recovery ───────────────────────────
|
|
493
|
+
|
|
494
|
+
_handleProcessExit(agentId, code, signal) {
|
|
495
|
+
const entry = this.processes[agentId];
|
|
496
|
+
|
|
497
|
+
// If the agent was intentionally stopped via stopAgent(), skip crash handling.
|
|
498
|
+
// stopAgent() will handle cleanup and status-change emission itself.
|
|
499
|
+
if (entry && entry.stopping) {
|
|
500
|
+
console.log(`[agent-mgr] Agent ${agentId} exited during intentional stop — skipping crash handler`);
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
// Unexpected exit — log and handle crash recovery
|
|
505
|
+
if (code !== 0 && code !== null) {
|
|
506
|
+
this._logCrash(agentId, code, signal);
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// Notify renderer of unexpected exit
|
|
510
|
+
this._sendToRenderer("agent:crashed", {
|
|
511
|
+
agentId,
|
|
512
|
+
exitCode: code,
|
|
513
|
+
signal,
|
|
514
|
+
timestamp: Date.now(),
|
|
515
|
+
});
|
|
516
|
+
|
|
517
|
+
this._cleanupProcess(agentId);
|
|
518
|
+
this._emitStatusChange(agentId, "stopped");
|
|
519
|
+
|
|
520
|
+
// Check if crash recovery is enabled (only for non-zero exits)
|
|
521
|
+
const agentConfig = (this.config.agents || {})[agentId] || {};
|
|
522
|
+
if (agentConfig.restartOnCrash && code !== 0) {
|
|
523
|
+
this._attemptCrashRestart(agentId);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
_attemptCrashRestart(agentId) {
|
|
528
|
+
// Track crash times for rate limiting
|
|
529
|
+
const now = Date.now();
|
|
530
|
+
const recentCrashes = this._crashTimes[agentId] || [];
|
|
531
|
+
|
|
532
|
+
// Filter to crashes within the window
|
|
533
|
+
const windowCrashes = recentCrashes.filter(
|
|
534
|
+
(t) => now - t < CRASH_WINDOW
|
|
535
|
+
);
|
|
536
|
+
windowCrashes.push(now);
|
|
537
|
+
this._crashTimes[agentId] = windowCrashes;
|
|
538
|
+
|
|
539
|
+
if (windowCrashes.length > MAX_CRASH_RESTARTS) {
|
|
540
|
+
console.warn(
|
|
541
|
+
`[agent-mgr] Agent ${agentId} crashed ${windowCrashes.length} times in ${CRASH_WINDOW / 1000}s — NOT restarting`
|
|
542
|
+
);
|
|
543
|
+
this.emit("agent-crash-limit", agentId, windowCrashes.length);
|
|
544
|
+
return;
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
console.log(
|
|
548
|
+
`[agent-mgr] Agent ${agentId} crashed — restarting in ${CRASH_RESTART_DELAY}ms (attempt ${windowCrashes.length}/${MAX_CRASH_RESTARTS})`
|
|
549
|
+
);
|
|
550
|
+
|
|
551
|
+
setTimeout(async () => {
|
|
552
|
+
try {
|
|
553
|
+
await this.startAgent(agentId);
|
|
554
|
+
console.log(`[agent-mgr] Agent ${agentId} restarted after crash`);
|
|
555
|
+
} catch (err) {
|
|
556
|
+
console.error(
|
|
557
|
+
`[agent-mgr] Failed to restart ${agentId} after crash:`,
|
|
558
|
+
err.message
|
|
559
|
+
);
|
|
560
|
+
}
|
|
561
|
+
}, CRASH_RESTART_DELAY);
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
// ── Private: JSON-RPC wire protocol ──────────────────────────────────
|
|
565
|
+
|
|
566
|
+
/**
|
|
567
|
+
* Send a raw JSON-RPC message via stdin.
|
|
568
|
+
* @param {string} agentId
|
|
569
|
+
* @param {string} method
|
|
570
|
+
* @param {object} params
|
|
571
|
+
* @param {string} [id] — if provided, it's a request; if omitted, a notification
|
|
572
|
+
*/
|
|
573
|
+
_sendJsonRpcRaw(agentId, method, params, id) {
|
|
574
|
+
const entry = this.processes[agentId];
|
|
575
|
+
if (!entry || !entry.process.stdin || entry.process.stdin.destroyed) {
|
|
576
|
+
throw new Error(`Cannot write to stdin of agent "${agentId}"`);
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
const msg = {
|
|
580
|
+
jsonrpc: "2.0",
|
|
581
|
+
method,
|
|
582
|
+
params: params || {},
|
|
583
|
+
};
|
|
584
|
+
if (id) msg.id = id;
|
|
585
|
+
|
|
586
|
+
const payload = JSON.stringify(msg) + "\n";
|
|
587
|
+
entry.process.stdin.write(payload);
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
// ── Private: Health check ────────────────────────────────────────────
|
|
591
|
+
|
|
592
|
+
async _healthCheck(agentId) {
|
|
593
|
+
if (!this.processes[agentId]) return;
|
|
594
|
+
|
|
595
|
+
try {
|
|
596
|
+
const result = await this.sendJsonRpc(agentId, "ping", {}, 10000);
|
|
597
|
+
// Agent is healthy
|
|
598
|
+
if (result && typeof result.memoryMB === "number") {
|
|
599
|
+
this.processes[agentId]._lastMemoryMB = result.memoryMB;
|
|
600
|
+
}
|
|
601
|
+
} catch (err) {
|
|
602
|
+
console.warn(
|
|
603
|
+
`[agent-mgr] Health check failed for ${agentId}:`,
|
|
604
|
+
err.message
|
|
605
|
+
);
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// ── Private: Process cleanup ─────────────────────────────────────────
|
|
610
|
+
|
|
611
|
+
_cleanupProcess(agentId) {
|
|
612
|
+
const entry = this.processes[agentId];
|
|
613
|
+
if (!entry) return;
|
|
614
|
+
|
|
615
|
+
// Clear health check timer
|
|
616
|
+
if (entry.healthTimer) {
|
|
617
|
+
clearInterval(entry.healthTimer);
|
|
618
|
+
entry.healthTimer = null;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// Reject any pending RPC calls
|
|
622
|
+
for (const [rpcId, pending] of Object.entries(entry.pendingRpc)) {
|
|
623
|
+
clearTimeout(pending.timer);
|
|
624
|
+
pending.reject(new Error(`Agent "${agentId}" process exited`));
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
delete this.processes[agentId];
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
/**
|
|
631
|
+
* Wait for an agent process to exit within a timeout.
|
|
632
|
+
* @returns {Promise<boolean>} true if exited, false if timed out
|
|
633
|
+
*/
|
|
634
|
+
_waitForExit(agentId, timeoutMs) {
|
|
635
|
+
return new Promise((resolve) => {
|
|
636
|
+
const entry = this.processes[agentId];
|
|
637
|
+
if (!entry) {
|
|
638
|
+
resolve(true);
|
|
639
|
+
return;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
// Check if process already exited (exitCode is set once the process exits)
|
|
643
|
+
if (entry.process.exitCode !== null) {
|
|
644
|
+
resolve(true);
|
|
645
|
+
return;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
const timer = setTimeout(() => {
|
|
649
|
+
entry.process.removeListener("exit", onExit);
|
|
650
|
+
resolve(false);
|
|
651
|
+
}, timeoutMs);
|
|
652
|
+
|
|
653
|
+
const onExit = () => {
|
|
654
|
+
clearTimeout(timer);
|
|
655
|
+
resolve(true);
|
|
656
|
+
};
|
|
657
|
+
|
|
658
|
+
entry.process.once("exit", onExit);
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
// ── Private: Events ──────────────────────────────────────────────────
|
|
663
|
+
|
|
664
|
+
_emitStatusChange(agentId, status, detail) {
|
|
665
|
+
const payload = {
|
|
666
|
+
agentId,
|
|
667
|
+
status,
|
|
668
|
+
detail,
|
|
669
|
+
timestamp: Date.now(),
|
|
670
|
+
};
|
|
671
|
+
|
|
672
|
+
this._sendToRenderer("agent:status-change", payload);
|
|
673
|
+
this.emit("status-change", payload);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
_sendToRenderer(channel, data) {
|
|
677
|
+
try {
|
|
678
|
+
if (this.mainWindow && !this.mainWindow.isDestroyed()) {
|
|
679
|
+
this.mainWindow.webContents.send(channel, data);
|
|
680
|
+
}
|
|
681
|
+
} catch (err) {
|
|
682
|
+
// Window may be closing
|
|
683
|
+
console.warn(`[agent-mgr] Could not send to renderer:`, err.message);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
// ── Private: Manifest & config ───────────────────────────────────────
|
|
688
|
+
|
|
689
|
+
_loadManifest() {
|
|
690
|
+
// Try multiple locations
|
|
691
|
+
const candidates = [
|
|
692
|
+
path.join(__dirname, "..", MANIFEST_FILENAME), // alongside main.cjs
|
|
693
|
+
path.join(GAIA_DIR, MANIFEST_FILENAME), // ~/.gaia/
|
|
694
|
+
path.join(AGENTS_DIR, MANIFEST_FILENAME), // ~/.gaia/agents/
|
|
695
|
+
];
|
|
696
|
+
|
|
697
|
+
for (const candidate of candidates) {
|
|
698
|
+
try {
|
|
699
|
+
if (fs.existsSync(candidate)) {
|
|
700
|
+
const raw = fs.readFileSync(candidate, "utf8");
|
|
701
|
+
const manifest = JSON.parse(raw);
|
|
702
|
+
console.log(`[agent-mgr] Loaded manifest from ${candidate}`);
|
|
703
|
+
return manifest;
|
|
704
|
+
}
|
|
705
|
+
} catch (err) {
|
|
706
|
+
console.warn(
|
|
707
|
+
`[agent-mgr] Error reading manifest from ${candidate}:`,
|
|
708
|
+
err.message
|
|
709
|
+
);
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
console.log("[agent-mgr] No agent manifest found — starting with empty manifest");
|
|
714
|
+
return { manifest_version: 1, agents: [] };
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
_loadConfig() {
|
|
718
|
+
try {
|
|
719
|
+
if (fs.existsSync(CONFIG_PATH)) {
|
|
720
|
+
const raw = fs.readFileSync(CONFIG_PATH, "utf8");
|
|
721
|
+
return JSON.parse(raw);
|
|
722
|
+
}
|
|
723
|
+
} catch (err) {
|
|
724
|
+
console.warn("[agent-mgr] Could not load config:", err.message);
|
|
725
|
+
}
|
|
726
|
+
return { agents: {}, tray: {} };
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
_getAgentInfo(agentId) {
|
|
730
|
+
if (!this.manifest || !this.manifest.agents) return null;
|
|
731
|
+
return this.manifest.agents.find((a) => a.id === agentId) || null;
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
_resolveBinaryPath(agentInfo) {
|
|
735
|
+
const platform = process.platform; // "win32", "darwin", "linux"
|
|
736
|
+
const binaryName =
|
|
737
|
+
agentInfo.binaries && agentInfo.binaries[platform];
|
|
738
|
+
|
|
739
|
+
if (!binaryName) return null;
|
|
740
|
+
|
|
741
|
+
// Check in ~/.gaia/agents/{agentId}/
|
|
742
|
+
return path.join(AGENTS_DIR, agentInfo.id, binaryName);
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
// ── Private: Crash logging ───────────────────────────────────────────
|
|
746
|
+
|
|
747
|
+
_logCrash(agentId, code, signal) {
|
|
748
|
+
try {
|
|
749
|
+
let crashLog = [];
|
|
750
|
+
if (fs.existsSync(CRASH_LOG_PATH)) {
|
|
751
|
+
crashLog = JSON.parse(fs.readFileSync(CRASH_LOG_PATH, "utf8"));
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
crashLog.push({
|
|
755
|
+
agentId,
|
|
756
|
+
exitCode: code,
|
|
757
|
+
signal,
|
|
758
|
+
timestamp: new Date().toISOString(),
|
|
759
|
+
});
|
|
760
|
+
|
|
761
|
+
// Keep last 100 entries
|
|
762
|
+
if (crashLog.length > 100) {
|
|
763
|
+
crashLog = crashLog.slice(-100);
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
if (!fs.existsSync(GAIA_DIR)) {
|
|
767
|
+
fs.mkdirSync(GAIA_DIR, { recursive: true });
|
|
768
|
+
}
|
|
769
|
+
fs.writeFileSync(CRASH_LOG_PATH, JSON.stringify(crashLog, null, 2), "utf8");
|
|
770
|
+
} catch (err) {
|
|
771
|
+
console.warn("[agent-mgr] Could not write crash log:", err.message);
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
// ── Private: IPC handlers ────────────────────────────────────────────
|
|
776
|
+
|
|
777
|
+
_registerIpcHandlers() {
|
|
778
|
+
ipcMain.handle("agent:start", async (_event, agentId) => {
|
|
779
|
+
return this.startAgent(agentId);
|
|
780
|
+
});
|
|
781
|
+
|
|
782
|
+
ipcMain.handle("agent:stop", async (_event, agentId) => {
|
|
783
|
+
return this.stopAgent(agentId);
|
|
784
|
+
});
|
|
785
|
+
|
|
786
|
+
ipcMain.handle("agent:restart", async (_event, agentId) => {
|
|
787
|
+
return this.restartAgent(agentId);
|
|
788
|
+
});
|
|
789
|
+
|
|
790
|
+
ipcMain.handle("agent:status", (_event, agentId) => {
|
|
791
|
+
return this.getAgentStatus(agentId);
|
|
792
|
+
});
|
|
793
|
+
|
|
794
|
+
ipcMain.handle("agent:status-all", () => {
|
|
795
|
+
return this.getAllAgentStatuses();
|
|
796
|
+
});
|
|
797
|
+
|
|
798
|
+
ipcMain.handle("agent:send-rpc", async (_event, agentId, method, params) => {
|
|
799
|
+
return this.sendJsonRpc(agentId, method, params);
|
|
800
|
+
});
|
|
801
|
+
|
|
802
|
+
ipcMain.handle("agent:get-manifest", () => {
|
|
803
|
+
return this.getManifest();
|
|
804
|
+
});
|
|
805
|
+
|
|
806
|
+
ipcMain.handle("agent:install", async (_event, agentId) => {
|
|
807
|
+
// TODO: T7 — agent installer integration
|
|
808
|
+
throw new Error("Agent installation not yet implemented");
|
|
809
|
+
});
|
|
810
|
+
|
|
811
|
+
ipcMain.handle("agent:uninstall", async (_event, agentId) => {
|
|
812
|
+
// TODO: T7 — agent uninstaller integration
|
|
813
|
+
throw new Error("Agent uninstallation not yet implemented");
|
|
814
|
+
});
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
module.exports = AgentProcessManager;
|