@agentmeshhq/agent 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/loader.test.js +44 -1
- package/dist/__tests__/loader.test.js.map +1 -1
- package/dist/__tests__/runner.test.js.map +1 -1
- package/dist/__tests__/sandbox.test.js.map +1 -1
- package/dist/__tests__/watchdog.test.d.ts +1 -0
- package/dist/__tests__/watchdog.test.js +290 -0
- package/dist/__tests__/watchdog.test.js.map +1 -0
- package/dist/cli/attach.js +20 -1
- package/dist/cli/attach.js.map +1 -1
- package/dist/cli/build.js +8 -2
- package/dist/cli/build.js.map +1 -1
- package/dist/cli/context.js.map +1 -1
- package/dist/cli/deploy.js +1 -1
- package/dist/cli/deploy.js.map +1 -1
- package/dist/cli/init.js +1 -1
- package/dist/cli/init.js.map +1 -1
- package/dist/cli/list.js +3 -3
- package/dist/cli/list.js.map +1 -1
- package/dist/cli/local.js +5 -3
- package/dist/cli/local.js.map +1 -1
- package/dist/cli/migrate.js +1 -1
- package/dist/cli/migrate.js.map +1 -1
- package/dist/cli/nudge.js +16 -3
- package/dist/cli/nudge.js.map +1 -1
- package/dist/cli/restart.js.map +1 -1
- package/dist/cli/slack.js +1 -1
- package/dist/cli/slack.js.map +1 -1
- package/dist/cli/stop.js +13 -5
- package/dist/cli/stop.js.map +1 -1
- package/dist/cli/test.js +1 -1
- package/dist/cli/test.js.map +1 -1
- package/dist/cli/token.js +2 -2
- package/dist/cli/token.js.map +1 -1
- package/dist/config/loader.d.ts +5 -1
- package/dist/config/loader.js +27 -2
- package/dist/config/loader.js.map +1 -1
- package/dist/config/schema.d.ts +13 -0
- package/dist/core/daemon.d.ts +32 -1
- package/dist/core/daemon.js +395 -19
- package/dist/core/daemon.js.map +1 -1
- package/dist/core/injector.d.ts +2 -2
- package/dist/core/injector.js +23 -4
- package/dist/core/injector.js.map +1 -1
- package/dist/core/runner.d.ts +1 -1
- package/dist/core/runner.js +44 -1
- package/dist/core/runner.js.map +1 -1
- package/dist/core/sandbox.d.ts +11 -0
- package/dist/core/sandbox.js +34 -2
- package/dist/core/sandbox.js.map +1 -1
- package/dist/core/tmux.d.ts +9 -0
- package/dist/core/tmux.js +105 -11
- package/dist/core/tmux.js.map +1 -1
- package/dist/core/watchdog.d.ts +41 -0
- package/dist/core/watchdog.js +198 -0
- package/dist/core/watchdog.js.map +1 -0
- package/dist/core/websocket.js +1 -1
- package/dist/core/websocket.js.map +1 -1
- package/dist/index.d.ts +5 -5
- package/dist/index.js +5 -5
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/__tests__/loader.test.ts +52 -4
- package/src/__tests__/runner.test.ts +1 -2
- package/src/__tests__/sandbox.test.ts +1 -1
- package/src/__tests__/watchdog.test.ts +368 -0
- package/src/cli/attach.ts +22 -1
- package/src/cli/build.ts +12 -4
- package/src/cli/context.ts +0 -1
- package/src/cli/deploy.ts +7 -5
- package/src/cli/init.ts +7 -19
- package/src/cli/list.ts +6 -10
- package/src/cli/local.ts +21 -12
- package/src/cli/migrate.ts +6 -4
- package/src/cli/nudge.ts +29 -14
- package/src/cli/restart.ts +1 -1
- package/src/cli/slack.ts +16 -15
- package/src/cli/stop.ts +14 -5
- package/src/cli/test.ts +5 -3
- package/src/cli/token.ts +4 -4
- package/src/config/loader.ts +29 -2
- package/src/config/schema.ts +14 -0
- package/src/core/daemon.ts +474 -24
- package/src/core/injector.ts +27 -4
- package/src/core/runner.ts +49 -1
- package/src/core/sandbox.ts +47 -2
- package/src/core/tmux.ts +135 -12
- package/src/core/watchdog.ts +238 -0
- package/src/core/websocket.ts +2 -2
- package/src/index.ts +6 -5
package/src/core/daemon.ts
CHANGED
|
@@ -6,31 +6,50 @@ import {
|
|
|
6
6
|
addAgentToState,
|
|
7
7
|
getAgentState,
|
|
8
8
|
loadConfig,
|
|
9
|
-
|
|
9
|
+
resetAgentRestartCount,
|
|
10
10
|
updateAgentInState,
|
|
11
11
|
} from "../config/loader.js";
|
|
12
|
-
import type { AgentConfig, Config } from "../config/schema.js";
|
|
12
|
+
import type { AgentConfig, AgentStatus, Config } from "../config/schema.js";
|
|
13
13
|
import { loadContext, loadOrCreateContext, saveContext } from "../context/index.js";
|
|
14
14
|
import { Heartbeat } from "./heartbeat.js";
|
|
15
15
|
import { handleWebSocketEvent, injectRestoredContext, injectStartupMessage } from "./injector.js";
|
|
16
16
|
import { checkInbox, fetchAssignments, registerAgent, type ServerContext } from "./registry.js";
|
|
17
|
-
import {
|
|
18
|
-
buildRunnerConfig,
|
|
19
|
-
detectRunner,
|
|
20
|
-
getRunnerDisplayName,
|
|
21
|
-
type RunnerConfig,
|
|
22
|
-
} from "./runner.js";
|
|
17
|
+
import { buildRunnerConfig, getRunnerDisplayName, type RunnerConfig } from "./runner.js";
|
|
23
18
|
import { DockerSandbox } from "./sandbox.js";
|
|
24
19
|
import {
|
|
25
20
|
captureSessionContext,
|
|
21
|
+
captureSessionOutput,
|
|
26
22
|
createSession,
|
|
27
23
|
destroySession,
|
|
28
24
|
getSessionName,
|
|
25
|
+
isSessionHealthy,
|
|
29
26
|
sessionExists,
|
|
30
27
|
updateSessionEnvironment,
|
|
31
28
|
} from "./tmux.js";
|
|
29
|
+
import {
|
|
30
|
+
checkAgentProgress,
|
|
31
|
+
cleanupOrphanContainers,
|
|
32
|
+
isProcessRunning,
|
|
33
|
+
sendNudge,
|
|
34
|
+
} from "./watchdog.js";
|
|
32
35
|
import { AgentWebSocket } from "./websocket.js";
|
|
33
36
|
|
|
37
|
+
// Maximum number of auto-restart attempts
|
|
38
|
+
const MAX_RESTART_ATTEMPTS = 3;
|
|
39
|
+
// Time after which restart count resets (30 minutes of stable operation)
|
|
40
|
+
const RESTART_COUNT_RESET_MS = 30 * 60 * 1000;
|
|
41
|
+
// Time to wait after nudging before restarting (2 minutes)
|
|
42
|
+
const NUDGE_WAIT_MS = 2 * 60 * 1000;
|
|
43
|
+
|
|
44
|
+
// Path to the sandbox OpenCode config (permissive permissions)
|
|
45
|
+
const SANDBOX_OPENCODE_CONFIG_PATH = path.join(os.homedir(), ".agentmesh", "opencode-sandbox.json");
|
|
46
|
+
|
|
47
|
+
// Sandbox OpenCode config content - allow everything since container is sandboxed
|
|
48
|
+
const SANDBOX_OPENCODE_CONFIG = {
|
|
49
|
+
$schema: "https://opencode.ai/config.json",
|
|
50
|
+
permission: "allow",
|
|
51
|
+
};
|
|
52
|
+
|
|
34
53
|
export interface DaemonOptions {
|
|
35
54
|
name: string;
|
|
36
55
|
command?: string;
|
|
@@ -71,12 +90,18 @@ export class AgentDaemon {
|
|
|
71
90
|
private serveMode: boolean;
|
|
72
91
|
private servePort: number;
|
|
73
92
|
private serveProcess: ChildProcess | null = null;
|
|
74
|
-
private serverContext: ServerContext | undefined;
|
|
75
93
|
private sandboxMode: boolean;
|
|
76
94
|
private sandboxImage: string;
|
|
77
95
|
private sandboxCpu: string;
|
|
78
96
|
private sandboxMemory: string;
|
|
79
97
|
private sandbox: DockerSandbox | null = null;
|
|
98
|
+
private healthCheckInterval: ReturnType<typeof setInterval> | null = null;
|
|
99
|
+
private serverContext: ServerContext | undefined;
|
|
100
|
+
// Auto-restart tracking
|
|
101
|
+
private restartCount = 0;
|
|
102
|
+
private lastStableTime: Date | null = null;
|
|
103
|
+
private stuckSince: Date | null = null;
|
|
104
|
+
private nudgeSentAt: Date | null = null;
|
|
80
105
|
|
|
81
106
|
constructor(options: DaemonOptions) {
|
|
82
107
|
const config = loadConfig();
|
|
@@ -84,6 +109,10 @@ export class AgentDaemon {
|
|
|
84
109
|
throw new Error("No config found. Run 'agentmesh init' first.");
|
|
85
110
|
}
|
|
86
111
|
|
|
112
|
+
// Ensure config has required fields with defaults
|
|
113
|
+
if (!config.agents) config.agents = [];
|
|
114
|
+
if (!config.defaults) config.defaults = { command: "opencode", model: "claude-sonnet-4" };
|
|
115
|
+
|
|
87
116
|
this.config = config;
|
|
88
117
|
this.agentName = options.name;
|
|
89
118
|
this.shouldRestoreContext = options.restoreContext !== false;
|
|
@@ -135,9 +164,33 @@ export class AgentDaemon {
|
|
|
135
164
|
|
|
136
165
|
console.log(`Starting agent: ${this.agentName}`);
|
|
137
166
|
|
|
167
|
+
// Check for duplicate process
|
|
168
|
+
const existingState = getAgentState(this.agentName);
|
|
169
|
+
if (existingState && existingState.pid > 0) {
|
|
170
|
+
if (isProcessRunning(existingState.pid)) {
|
|
171
|
+
throw new Error(
|
|
172
|
+
`Agent "${this.agentName}" is already running (PID: ${existingState.pid}). ` +
|
|
173
|
+
`Use 'agentmesh stop ${this.agentName}' first.`,
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
// Process not running, clean up stale state
|
|
177
|
+
console.log(`Cleaning up stale state for PID ${existingState.pid}`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Clean up orphan containers in sandbox mode
|
|
181
|
+
if (this.sandboxMode) {
|
|
182
|
+
const cleaned = cleanupOrphanContainers(this.agentName);
|
|
183
|
+
if (cleaned > 0) {
|
|
184
|
+
console.log(`Cleaned up ${cleaned} orphan container(s)`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Reset restart count on manual start
|
|
189
|
+
this.restartCount = 0;
|
|
190
|
+
this.lastStableTime = new Date();
|
|
191
|
+
|
|
138
192
|
// Register with hub first (needed for assignment check)
|
|
139
193
|
console.log("Registering with AgentMesh hub...");
|
|
140
|
-
const existingState = getAgentState(this.agentName);
|
|
141
194
|
console.log(`Existing state: ${existingState ? `agentId=${existingState.agentId}` : "none"}`);
|
|
142
195
|
|
|
143
196
|
const registration = await registerAgent({
|
|
@@ -218,6 +271,7 @@ export class AgentDaemon {
|
|
|
218
271
|
assignedProject: this.assignedProject,
|
|
219
272
|
runtimeModel: this.runnerConfig.model,
|
|
220
273
|
runnerType: this.runnerConfig.type,
|
|
274
|
+
sandboxContainer: this.sandbox?.getContainerName(),
|
|
221
275
|
});
|
|
222
276
|
|
|
223
277
|
// Start heartbeat with auto-refresh
|
|
@@ -302,11 +356,11 @@ export class AgentDaemon {
|
|
|
302
356
|
});
|
|
303
357
|
this.ws.connect();
|
|
304
358
|
|
|
305
|
-
// Check inbox and auto-nudge
|
|
359
|
+
// Check inbox and auto-nudge with full handoff details
|
|
306
360
|
console.log("Checking inbox...");
|
|
307
361
|
try {
|
|
308
362
|
const inboxItems = await checkInbox(this.config.hubUrl, this.config.workspace, this.token);
|
|
309
|
-
injectStartupMessage(this.agentName, inboxItems.length);
|
|
363
|
+
injectStartupMessage(this.agentName, inboxItems.length, inboxItems);
|
|
310
364
|
} catch (error) {
|
|
311
365
|
console.error("Failed to check inbox:", error);
|
|
312
366
|
injectStartupMessage(this.agentName, 0);
|
|
@@ -326,6 +380,9 @@ export class AgentDaemon {
|
|
|
326
380
|
|
|
327
381
|
this.isRunning = true;
|
|
328
382
|
|
|
383
|
+
// Start session health monitoring (every 60 seconds)
|
|
384
|
+
this.startHealthMonitor();
|
|
385
|
+
|
|
329
386
|
console.log(`
|
|
330
387
|
Agent "${this.agentName}" is running.
|
|
331
388
|
|
|
@@ -344,11 +401,278 @@ Nudge agent:
|
|
|
344
401
|
process.on("SIGTERM", () => this.stop());
|
|
345
402
|
}
|
|
346
403
|
|
|
404
|
+
/**
|
|
405
|
+
* Starts periodic health monitoring for the tmux session
|
|
406
|
+
* Includes auto-restart logic and progress watchdog
|
|
407
|
+
*/
|
|
408
|
+
private startHealthMonitor(): void {
|
|
409
|
+
// Skip health monitoring for serve mode (no tmux session)
|
|
410
|
+
if (this.serveMode) return;
|
|
411
|
+
|
|
412
|
+
const logDir = path.join(os.homedir(), ".agentmesh", "logs");
|
|
413
|
+
if (!fs.existsSync(logDir)) {
|
|
414
|
+
fs.mkdirSync(logDir, { recursive: true });
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
this.healthCheckInterval = setInterval(async () => {
|
|
418
|
+
if (!this.isRunning) return;
|
|
419
|
+
|
|
420
|
+
// Reset restart count after stable operation
|
|
421
|
+
if (this.lastStableTime && this.restartCount > 0) {
|
|
422
|
+
const stableTime = Date.now() - this.lastStableTime.getTime();
|
|
423
|
+
if (stableTime > RESTART_COUNT_RESET_MS) {
|
|
424
|
+
console.log(`[HEALTH] Agent stable for 30+ minutes, resetting restart count`);
|
|
425
|
+
this.restartCount = 0;
|
|
426
|
+
resetAgentRestartCount(this.agentName);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// For sandbox mode, pass container name so health check looks inside container
|
|
431
|
+
const containerName = this.sandboxMode ? this.sandbox?.getContainerName() : undefined;
|
|
432
|
+
const health = isSessionHealthy(this.agentName, containerName);
|
|
433
|
+
|
|
434
|
+
if (!health.healthy) {
|
|
435
|
+
// Session died - attempt restart
|
|
436
|
+
await this.handleSessionDeath(health.reason || "unknown", logDir);
|
|
437
|
+
return;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Session is alive - check progress watchdog
|
|
441
|
+
const progress = checkAgentProgress(this.agentName, containerName);
|
|
442
|
+
|
|
443
|
+
if (progress.status === "permission_blocked" || progress.status === "stuck") {
|
|
444
|
+
await this.handleStuckAgent(progress);
|
|
445
|
+
} else if (progress.status === "active") {
|
|
446
|
+
// Agent is working - reset stuck tracking
|
|
447
|
+
if (this.stuckSince) {
|
|
448
|
+
console.log(`[HEALTH] Agent resumed activity`);
|
|
449
|
+
this.stuckSince = null;
|
|
450
|
+
this.nudgeSentAt = null;
|
|
451
|
+
updateAgentInState(this.agentName, { stuckSince: undefined, status: "running" });
|
|
452
|
+
}
|
|
453
|
+
this.lastStableTime = new Date();
|
|
454
|
+
}
|
|
455
|
+
}, 60000); // Check every 60 seconds
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
/**
|
|
459
|
+
* Handles session death - logs crash and attempts auto-restart
|
|
460
|
+
*/
|
|
461
|
+
private async handleSessionDeath(reason: string, logDir: string): Promise<void> {
|
|
462
|
+
const timestamp = new Date().toISOString();
|
|
463
|
+
const logFile = path.join(logDir, `crash-${this.agentName}.log`);
|
|
464
|
+
|
|
465
|
+
// Capture last session output before it's gone
|
|
466
|
+
let lastOutput = "";
|
|
467
|
+
try {
|
|
468
|
+
lastOutput = captureSessionOutput(this.agentName, 200) || "Unable to capture output";
|
|
469
|
+
} catch {
|
|
470
|
+
lastOutput = "Failed to capture session output";
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
const crashLog = `
|
|
474
|
+
================================================================================
|
|
475
|
+
AGENT CRASH DETECTED
|
|
476
|
+
================================================================================
|
|
477
|
+
Timestamp: ${timestamp}
|
|
478
|
+
Agent: ${this.agentName}
|
|
479
|
+
Agent ID: ${this.agentId}
|
|
480
|
+
Reason: ${reason}
|
|
481
|
+
Restart Count: ${this.restartCount}/${MAX_RESTART_ATTEMPTS}
|
|
482
|
+
Sandbox: ${this.sandboxMode ? this.sandbox?.getContainerName() : "none"}
|
|
483
|
+
Workdir: ${this.agentConfig.workdir}
|
|
484
|
+
Model: ${this.runnerConfig.model}
|
|
485
|
+
|
|
486
|
+
--- Last Session Output ---
|
|
487
|
+
${lastOutput}
|
|
488
|
+
================================================================================
|
|
489
|
+
|
|
490
|
+
`;
|
|
491
|
+
|
|
492
|
+
fs.appendFileSync(logFile, crashLog);
|
|
493
|
+
|
|
494
|
+
// Check if we can restart
|
|
495
|
+
if (this.restartCount < MAX_RESTART_ATTEMPTS) {
|
|
496
|
+
this.restartCount++;
|
|
497
|
+
console.error(
|
|
498
|
+
`[CRASH] Session died: ${reason}. Attempting restart (${this.restartCount}/${MAX_RESTART_ATTEMPTS})...`,
|
|
499
|
+
);
|
|
500
|
+
|
|
501
|
+
updateAgentInState(this.agentName, {
|
|
502
|
+
restartCount: this.restartCount,
|
|
503
|
+
lastRestartAt: timestamp,
|
|
504
|
+
status: "running",
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
try {
|
|
508
|
+
await this.restartSession();
|
|
509
|
+
console.log(`[RESTART] Agent restarted successfully`);
|
|
510
|
+
this.lastStableTime = new Date();
|
|
511
|
+
} catch (error) {
|
|
512
|
+
console.error(`[RESTART] Failed to restart: ${(error as Error).message}`);
|
|
513
|
+
}
|
|
514
|
+
} else {
|
|
515
|
+
// Exceeded restart limit - mark as failed
|
|
516
|
+
console.error(
|
|
517
|
+
`[FAILED] Agent exceeded restart limit (${MAX_RESTART_ATTEMPTS}). Manual intervention required.`,
|
|
518
|
+
);
|
|
519
|
+
|
|
520
|
+
// Terminal bell to alert user
|
|
521
|
+
process.stdout.write("\x07");
|
|
522
|
+
|
|
523
|
+
updateAgentInState(this.agentName, {
|
|
524
|
+
status: "failed",
|
|
525
|
+
restartCount: this.restartCount,
|
|
526
|
+
});
|
|
527
|
+
|
|
528
|
+
// Stop monitoring
|
|
529
|
+
this.isRunning = false;
|
|
530
|
+
if (this.healthCheckInterval) {
|
|
531
|
+
clearInterval(this.healthCheckInterval);
|
|
532
|
+
this.healthCheckInterval = null;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Handles stuck agent - sends nudge first, then restarts if still stuck
|
|
539
|
+
*/
|
|
540
|
+
private async handleStuckAgent(progress: {
|
|
541
|
+
status: string;
|
|
542
|
+
blockedOn?: string;
|
|
543
|
+
details?: string;
|
|
544
|
+
}): Promise<void> {
|
|
545
|
+
const now = new Date();
|
|
546
|
+
|
|
547
|
+
if (!this.stuckSince) {
|
|
548
|
+
// First detection of stuck state
|
|
549
|
+
this.stuckSince = now;
|
|
550
|
+
console.log(
|
|
551
|
+
`[HEALTH] Agent appears stuck: ${progress.details || progress.blockedOn || "no activity"}`,
|
|
552
|
+
);
|
|
553
|
+
|
|
554
|
+
updateAgentInState(this.agentName, {
|
|
555
|
+
stuckSince: now.toISOString(),
|
|
556
|
+
status: "stuck",
|
|
557
|
+
});
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// If we haven't sent a nudge yet, send one
|
|
561
|
+
if (!this.nudgeSentAt) {
|
|
562
|
+
console.log(`[HEALTH] Sending nudge to unstick agent...`);
|
|
563
|
+
|
|
564
|
+
const nudgeMessage =
|
|
565
|
+
progress.status === "permission_blocked"
|
|
566
|
+
? "Please continue with your task. If you see a permission prompt, try an alternative approach that doesn't require that permission."
|
|
567
|
+
: "Please continue with your current task.";
|
|
568
|
+
|
|
569
|
+
const sent = sendNudge(this.agentName, nudgeMessage);
|
|
570
|
+
if (sent) {
|
|
571
|
+
this.nudgeSentAt = now;
|
|
572
|
+
console.log(`[HEALTH] Nudge sent successfully`);
|
|
573
|
+
} else {
|
|
574
|
+
console.log(`[HEALTH] Failed to send nudge`);
|
|
575
|
+
}
|
|
576
|
+
return;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
// Check if enough time has passed since nudge
|
|
580
|
+
const timeSinceNudge = now.getTime() - this.nudgeSentAt.getTime();
|
|
581
|
+
if (timeSinceNudge < NUDGE_WAIT_MS) {
|
|
582
|
+
// Still waiting for agent to respond to nudge
|
|
583
|
+
return;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Agent still stuck after nudge - trigger restart
|
|
587
|
+
console.log(`[HEALTH] Agent still stuck after nudge, triggering restart...`);
|
|
588
|
+
this.stuckSince = null;
|
|
589
|
+
this.nudgeSentAt = null;
|
|
590
|
+
|
|
591
|
+
await this.handleSessionDeath(
|
|
592
|
+
"stuck_after_nudge",
|
|
593
|
+
path.join(os.homedir(), ".agentmesh", "logs"),
|
|
594
|
+
);
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
/**
|
|
598
|
+
* Restarts the agent session (sandbox or non-sandbox)
|
|
599
|
+
*/
|
|
600
|
+
private async restartSession(): Promise<void> {
|
|
601
|
+
// Destroy existing session
|
|
602
|
+
destroySession(this.agentName);
|
|
603
|
+
|
|
604
|
+
if (this.sandboxMode && this.sandbox) {
|
|
605
|
+
// Restart sandbox container
|
|
606
|
+
const newContainerId = await this.sandbox.restart();
|
|
607
|
+
console.log(`[RESTART] New container: ${newContainerId.substring(0, 12)}`);
|
|
608
|
+
|
|
609
|
+
// Recreate tmux session for sandbox
|
|
610
|
+
const containerName = this.sandbox.getContainerName();
|
|
611
|
+
const sessionName = getSessionName(this.agentName);
|
|
612
|
+
|
|
613
|
+
// Build environment args for docker exec
|
|
614
|
+
const envArgs: string[] = [];
|
|
615
|
+
const allEnv = {
|
|
616
|
+
...this.runnerConfig.env,
|
|
617
|
+
AGENT_TOKEN: this.token!,
|
|
618
|
+
AGENTMESH_AGENT_ID: this.agentId!,
|
|
619
|
+
};
|
|
620
|
+
for (const [key, value] of Object.entries(allEnv)) {
|
|
621
|
+
if (value !== undefined && value !== "") {
|
|
622
|
+
envArgs.push(`-e "${key}=${value}"`);
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
const envString = envArgs.join(" ");
|
|
626
|
+
const modelArg = this.runnerConfig.env?.OPENCODE_MODEL
|
|
627
|
+
? ` --model ${this.runnerConfig.env.OPENCODE_MODEL}`
|
|
628
|
+
: "";
|
|
629
|
+
const dockerExecCommand = `docker exec -it ${envString} ${containerName} opencode${modelArg}`;
|
|
630
|
+
|
|
631
|
+
const created = createSession(this.agentName, dockerExecCommand, undefined, undefined);
|
|
632
|
+
if (!created) {
|
|
633
|
+
throw new Error("Failed to create tmux session for restarted sandbox");
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// Update state with new container name
|
|
637
|
+
updateAgentInState(this.agentName, {
|
|
638
|
+
sandboxContainer: containerName,
|
|
639
|
+
});
|
|
640
|
+
} else {
|
|
641
|
+
// Non-sandbox restart - just recreate tmux session
|
|
642
|
+
const created = createSession(
|
|
643
|
+
this.agentName,
|
|
644
|
+
this.agentConfig.command,
|
|
645
|
+
this.agentConfig.workdir,
|
|
646
|
+
this.runnerConfig.env,
|
|
647
|
+
);
|
|
648
|
+
|
|
649
|
+
if (!created) {
|
|
650
|
+
throw new Error("Failed to create tmux session");
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// Re-inject environment
|
|
654
|
+
updateSessionEnvironment(this.agentName, {
|
|
655
|
+
AGENT_TOKEN: this.token!,
|
|
656
|
+
AGENTMESH_AGENT_ID: this.agentId!,
|
|
657
|
+
...this.runnerConfig.env,
|
|
658
|
+
});
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// Wait for session to be ready
|
|
662
|
+
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
663
|
+
}
|
|
664
|
+
|
|
347
665
|
async stop(): Promise<void> {
|
|
348
666
|
console.log(`\nStopping agent: ${this.agentName}`);
|
|
349
667
|
|
|
350
668
|
this.isRunning = false;
|
|
351
669
|
|
|
670
|
+
// Stop health monitor
|
|
671
|
+
if (this.healthCheckInterval) {
|
|
672
|
+
clearInterval(this.healthCheckInterval);
|
|
673
|
+
this.healthCheckInterval = null;
|
|
674
|
+
}
|
|
675
|
+
|
|
352
676
|
// Save context before stopping
|
|
353
677
|
if (this.agentId) {
|
|
354
678
|
console.log("Saving agent context...");
|
|
@@ -369,7 +693,11 @@ Nudge agent:
|
|
|
369
693
|
|
|
370
694
|
// Stop sandbox, serve process, or destroy tmux session
|
|
371
695
|
if (this.sandboxMode && this.sandbox) {
|
|
372
|
-
console.log("Stopping sandbox
|
|
696
|
+
console.log("Stopping sandbox...");
|
|
697
|
+
// In sandbox mode, we have both a tmux session (on host) and a Docker container
|
|
698
|
+
// Destroy tmux session first (this stops docker exec)
|
|
699
|
+
destroySession(this.agentName);
|
|
700
|
+
// Then destroy the container
|
|
373
701
|
await this.sandbox.destroy();
|
|
374
702
|
this.sandbox = null;
|
|
375
703
|
} else if (this.serveMode && this.serveProcess) {
|
|
@@ -401,12 +729,35 @@ Nudge agent:
|
|
|
401
729
|
|
|
402
730
|
const workdir = this.agentConfig.workdir || process.cwd();
|
|
403
731
|
|
|
732
|
+
// Isolate OpenCode's SQLite database per agent to prevent WAL corruption.
|
|
733
|
+
// See docs/RCA-OPENCODE-SQLITE-CORRUPTION.md for details.
|
|
734
|
+
const agentDataDir = path.join(os.homedir(), ".agentmesh", "opencode-data", this.agentName);
|
|
735
|
+
const agentOpencodeDir = path.join(agentDataDir, "opencode");
|
|
736
|
+
if (!fs.existsSync(agentOpencodeDir)) {
|
|
737
|
+
fs.mkdirSync(agentOpencodeDir, { recursive: true });
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
// Copy auth.json from default OpenCode data dir so agents inherit API keys.
|
|
741
|
+
// Strips xAI provider to prevent OpenCode from defaulting to non-Anthropic models.
|
|
742
|
+
const agentAuthPath = path.join(agentOpencodeDir, "auth.json");
|
|
743
|
+
const sourceAuthPath = path.join(os.homedir(), ".local", "share", "opencode", "auth.json");
|
|
744
|
+
if (!fs.existsSync(agentAuthPath) && fs.existsSync(sourceAuthPath)) {
|
|
745
|
+
try {
|
|
746
|
+
const auth = JSON.parse(fs.readFileSync(sourceAuthPath, "utf-8"));
|
|
747
|
+
delete auth.xai;
|
|
748
|
+
fs.writeFileSync(agentAuthPath, JSON.stringify(auth, null, 2));
|
|
749
|
+
} catch {
|
|
750
|
+
// Non-fatal — agent will just need manual auth
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
|
|
404
754
|
// Build environment for opencode serve
|
|
405
755
|
const env: Record<string, string> = {
|
|
406
756
|
...process.env,
|
|
407
757
|
...this.runnerConfig.env,
|
|
408
758
|
AGENT_TOKEN: this.token!,
|
|
409
759
|
AGENTMESH_AGENT_ID: this.agentId!,
|
|
760
|
+
XDG_DATA_HOME: agentDataDir,
|
|
410
761
|
} as Record<string, string>;
|
|
411
762
|
|
|
412
763
|
// Spawn opencode serve as a child process
|
|
@@ -446,6 +797,10 @@ Nudge agent:
|
|
|
446
797
|
/**
|
|
447
798
|
* Starts agent in Docker sandbox mode
|
|
448
799
|
* Provides filesystem isolation with only workspace mounted
|
|
800
|
+
*
|
|
801
|
+
* Strategy: Start Docker container with tail -f /dev/null, then create
|
|
802
|
+
* a tmux session on the HOST that runs `docker exec -it <container> opencode`.
|
|
803
|
+
* This way tmux provides the TTY that docker exec needs.
|
|
449
804
|
*/
|
|
450
805
|
private async startSandboxMode(): Promise<void> {
|
|
451
806
|
console.log("Starting in Docker sandbox mode...");
|
|
@@ -463,11 +818,64 @@ Nudge agent:
|
|
|
463
818
|
const existingContainer = DockerSandbox.findExisting(this.agentName);
|
|
464
819
|
if (existingContainer) {
|
|
465
820
|
console.log(`Found existing sandbox container: ${existingContainer}`);
|
|
466
|
-
console.log(
|
|
821
|
+
console.log(`Stop it with: agentmesh stop ${this.agentName}`);
|
|
467
822
|
throw new Error("Sandbox container already exists");
|
|
468
823
|
}
|
|
469
824
|
|
|
825
|
+
// Build additional mounts for credentials and config
|
|
826
|
+
// The entrypoint script copies these from /tmp/ to the correct locations
|
|
827
|
+
const additionalMounts: string[] = [];
|
|
828
|
+
|
|
829
|
+
// Mount git credentials
|
|
830
|
+
const gitCredentialsPath = path.join(os.homedir(), ".git-credentials");
|
|
831
|
+
if (fs.existsSync(gitCredentialsPath)) {
|
|
832
|
+
additionalMounts.push(`${gitCredentialsPath}:/tmp/.git-credentials-host:ro`);
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
// Mount OpenCode auth.json for API provider tokens (Anthropic, OpenAI, etc.)
|
|
836
|
+
const opencodeAuthPath = path.join(os.homedir(), ".local", "share", "opencode", "auth.json");
|
|
837
|
+
if (fs.existsSync(opencodeAuthPath)) {
|
|
838
|
+
additionalMounts.push(`${opencodeAuthPath}:/tmp/.opencode-auth-host:ro`);
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
// Mount AgentMesh config for hub URL, API key, workspace
|
|
842
|
+
const agentmeshConfigPath = path.join(os.homedir(), ".agentmesh", "config.json");
|
|
843
|
+
if (fs.existsSync(agentmeshConfigPath)) {
|
|
844
|
+
additionalMounts.push(`${agentmeshConfigPath}:/tmp/.agentmesh-config-host:ro`);
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
// Create and mount permissive OpenCode config for sandbox
|
|
848
|
+
// This allows all permissions since the container is already sandboxed
|
|
849
|
+
this.ensureSandboxOpencodeConfig();
|
|
850
|
+
additionalMounts.push(`${SANDBOX_OPENCODE_CONFIG_PATH}:/workspace/opencode.json:ro`);
|
|
851
|
+
|
|
852
|
+
// Pass GitHub token as environment variable for git operations
|
|
853
|
+
const gitCredentials = fs.existsSync(gitCredentialsPath)
|
|
854
|
+
? fs.readFileSync(gitCredentialsPath, "utf-8").trim()
|
|
855
|
+
: "";
|
|
856
|
+
const gitHubToken = gitCredentials.match(/github_pat_[^\s@]+/)?.[0] || "";
|
|
857
|
+
|
|
858
|
+
// Build the command to run inside the container
|
|
859
|
+
// The agentmesh CLI inside the container will create tmux + opencode
|
|
860
|
+
const model =
|
|
861
|
+
this.runnerConfig.env?.OPENCODE_MODEL || this.runnerConfig.model || "claude-sonnet-4";
|
|
862
|
+
const containerCommand = [
|
|
863
|
+
"agentmesh",
|
|
864
|
+
"start",
|
|
865
|
+
"--name",
|
|
866
|
+
this.agentName,
|
|
867
|
+
"--model",
|
|
868
|
+
model,
|
|
869
|
+
"--foreground",
|
|
870
|
+
];
|
|
871
|
+
|
|
470
872
|
// Create sandbox configuration
|
|
873
|
+
// Isolate OpenCode's SQLite database per agent to prevent WAL corruption.
|
|
874
|
+
const hostDataDir = path.join(os.homedir(), ".agentmesh", "opencode-data", this.agentName);
|
|
875
|
+
if (!fs.existsSync(hostDataDir)) {
|
|
876
|
+
fs.mkdirSync(hostDataDir, { recursive: true });
|
|
877
|
+
}
|
|
878
|
+
|
|
471
879
|
this.sandbox = new DockerSandbox({
|
|
472
880
|
agentName: this.agentName,
|
|
473
881
|
image: this.sandboxImage,
|
|
@@ -478,10 +886,19 @@ Nudge agent:
|
|
|
478
886
|
...this.runnerConfig.env,
|
|
479
887
|
AGENT_TOKEN: this.token!,
|
|
480
888
|
AGENTMESH_AGENT_ID: this.agentId!,
|
|
889
|
+
AGENT_NAME: this.agentName,
|
|
890
|
+
// XDG_DATA_HOME set by entrypoint based on AGENT_NAME
|
|
891
|
+
// Git credentials for pushing to GitHub
|
|
892
|
+
...(gitHubToken && { GH_TOKEN: gitHubToken, GITHUB_TOKEN: gitHubToken }),
|
|
481
893
|
},
|
|
482
894
|
serveMode: this.serveMode,
|
|
483
895
|
servePort: this.servePort,
|
|
484
896
|
networkMode: "bridge",
|
|
897
|
+
additionalMounts: [
|
|
898
|
+
...additionalMounts,
|
|
899
|
+
`${hostDataDir}:/home/node/.agentmesh/opencode-data/${this.agentName}:rw`,
|
|
900
|
+
],
|
|
901
|
+
command: this.serveMode ? undefined : containerCommand,
|
|
485
902
|
});
|
|
486
903
|
|
|
487
904
|
// Validate mount policy (will throw if denied)
|
|
@@ -490,31 +907,37 @@ Nudge agent:
|
|
|
490
907
|
// Pull image if needed
|
|
491
908
|
await this.sandbox.pullImage();
|
|
492
909
|
|
|
493
|
-
// Start container
|
|
910
|
+
// Start container with agentmesh running inside
|
|
911
|
+
// The entrypoint script sets up credentials before agentmesh starts
|
|
494
912
|
await this.sandbox.start();
|
|
495
913
|
|
|
914
|
+
const containerName = this.sandbox.getContainerName();
|
|
915
|
+
|
|
496
916
|
console.log(`
|
|
497
917
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
498
|
-
|
|
918
|
+
SANDBOX MODE ACTIVE
|
|
499
919
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
500
920
|
|
|
501
|
-
Container: ${
|
|
921
|
+
Container: ${containerName}
|
|
502
922
|
Image: ${this.sandboxImage}
|
|
503
923
|
Workspace: ${workdir} -> /workspace
|
|
504
924
|
CPU: ${this.sandboxCpu} core(s)
|
|
505
925
|
Memory: ${this.sandboxMemory}
|
|
926
|
+
Model: ${model}
|
|
927
|
+
|
|
928
|
+
The agent daemon is running INSIDE the Docker container.
|
|
929
|
+
tmux session and OpenCode are managed inside the container.
|
|
506
930
|
|
|
507
|
-
|
|
508
|
-
|
|
931
|
+
Attach: agentmesh attach ${this.agentName}
|
|
932
|
+
Nudge: agentmesh nudge ${this.agentName} "message"
|
|
933
|
+
Stop: agentmesh stop ${this.agentName}
|
|
934
|
+
Logs: docker logs ${containerName}
|
|
509
935
|
|
|
510
936
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
511
937
|
`);
|
|
512
938
|
|
|
513
|
-
//
|
|
514
|
-
|
|
515
|
-
console.log("Starting opencode in sandbox container...");
|
|
516
|
-
await this.sandbox.spawnOpencode();
|
|
517
|
-
}
|
|
939
|
+
// No host tmux session needed - the container runs agentmesh which creates its own tmux
|
|
940
|
+
// Heartbeats are sent by the daemon running inside the container
|
|
518
941
|
}
|
|
519
942
|
|
|
520
943
|
/**
|
|
@@ -713,4 +1136,31 @@ Option 3: Use --auto-setup to automatically clone the repository:
|
|
|
713
1136
|
console.log(`✓ Workspace ready: ${workspacePath}\n`);
|
|
714
1137
|
return workspacePath;
|
|
715
1138
|
}
|
|
1139
|
+
|
|
1140
|
+
/**
|
|
1141
|
+
* Ensures the sandbox OpenCode config exists
|
|
1142
|
+
* Creates ~/.agentmesh/opencode-sandbox.json with permissive permissions and model
|
|
1143
|
+
*/
|
|
1144
|
+
private ensureSandboxOpencodeConfig(): void {
|
|
1145
|
+
const configDir = path.dirname(SANDBOX_OPENCODE_CONFIG_PATH);
|
|
1146
|
+
|
|
1147
|
+
if (!fs.existsSync(configDir)) {
|
|
1148
|
+
fs.mkdirSync(configDir, { recursive: true });
|
|
1149
|
+
}
|
|
1150
|
+
|
|
1151
|
+
// Build config with model if available
|
|
1152
|
+
const config: Record<string, unknown> = {
|
|
1153
|
+
...SANDBOX_OPENCODE_CONFIG,
|
|
1154
|
+
};
|
|
1155
|
+
|
|
1156
|
+
// Include model from runner config
|
|
1157
|
+
const model = this.runnerConfig.env?.OPENCODE_MODEL;
|
|
1158
|
+
if (model) {
|
|
1159
|
+
config.model = model;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
// Always write to ensure model is up to date
|
|
1163
|
+
fs.writeFileSync(SANDBOX_OPENCODE_CONFIG_PATH, JSON.stringify(config, null, 2));
|
|
1164
|
+
console.log(`Updated sandbox OpenCode config: ${SANDBOX_OPENCODE_CONFIG_PATH}`);
|
|
1165
|
+
}
|
|
716
1166
|
}
|