@agentmeshhq/agent 0.1.12 → 0.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/loader.test.js +44 -1
- package/dist/__tests__/loader.test.js.map +1 -1
- package/dist/__tests__/runner.test.js.map +1 -1
- package/dist/__tests__/sandbox.test.d.ts +1 -0
- package/dist/__tests__/sandbox.test.js +362 -0
- package/dist/__tests__/sandbox.test.js.map +1 -0
- package/dist/__tests__/watchdog.test.d.ts +1 -0
- package/dist/__tests__/watchdog.test.js +290 -0
- package/dist/__tests__/watchdog.test.js.map +1 -0
- package/dist/cli/attach.js +20 -1
- package/dist/cli/attach.js.map +1 -1
- package/dist/cli/build.js +8 -2
- package/dist/cli/build.js.map +1 -1
- package/dist/cli/context.js.map +1 -1
- package/dist/cli/deploy.js +1 -1
- package/dist/cli/deploy.js.map +1 -1
- package/dist/cli/inbox.d.ts +5 -0
- package/dist/cli/inbox.js +123 -0
- package/dist/cli/inbox.js.map +1 -0
- package/dist/cli/index.js +5 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.js +1 -1
- package/dist/cli/init.js.map +1 -1
- package/dist/cli/issue.d.ts +42 -0
- package/dist/cli/issue.js +297 -0
- package/dist/cli/issue.js.map +1 -0
- package/dist/cli/list.js +3 -3
- package/dist/cli/list.js.map +1 -1
- package/dist/cli/local.js +5 -3
- package/dist/cli/local.js.map +1 -1
- package/dist/cli/migrate.js +1 -1
- package/dist/cli/migrate.js.map +1 -1
- package/dist/cli/nudge.js +16 -3
- package/dist/cli/nudge.js.map +1 -1
- package/dist/cli/ready.d.ts +5 -0
- package/dist/cli/ready.js +131 -0
- package/dist/cli/ready.js.map +1 -0
- package/dist/cli/restart.js.map +1 -1
- package/dist/cli/slack.js +1 -1
- package/dist/cli/slack.js.map +1 -1
- package/dist/cli/start.d.ts +8 -0
- package/dist/cli/start.js +9 -0
- package/dist/cli/start.js.map +1 -1
- package/dist/cli/stop.js +13 -5
- package/dist/cli/stop.js.map +1 -1
- package/dist/cli/sync.d.ts +8 -0
- package/dist/cli/sync.js +154 -0
- package/dist/cli/sync.js.map +1 -0
- package/dist/cli/test.js +1 -1
- package/dist/cli/test.js.map +1 -1
- package/dist/cli/token.js +2 -2
- package/dist/cli/token.js.map +1 -1
- package/dist/config/loader.d.ts +5 -1
- package/dist/config/loader.js +27 -2
- package/dist/config/loader.js.map +1 -1
- package/dist/config/schema.d.ts +13 -0
- package/dist/core/daemon.d.ts +50 -0
- package/dist/core/daemon.js +445 -11
- package/dist/core/daemon.js.map +1 -1
- package/dist/core/injector.d.ts +2 -2
- package/dist/core/injector.js +23 -4
- package/dist/core/injector.js.map +1 -1
- package/dist/core/issue-cache.d.ts +44 -0
- package/dist/core/issue-cache.js +75 -0
- package/dist/core/issue-cache.js.map +1 -0
- package/dist/core/registry.d.ts +5 -0
- package/dist/core/registry.js +8 -1
- package/dist/core/registry.js.map +1 -1
- package/dist/core/runner.d.ts +1 -1
- package/dist/core/runner.js +23 -1
- package/dist/core/runner.js.map +1 -1
- package/dist/core/sandbox.d.ts +138 -0
- package/dist/core/sandbox.js +409 -0
- package/dist/core/sandbox.js.map +1 -0
- package/dist/core/tmux.d.ts +8 -0
- package/dist/core/tmux.js +28 -1
- package/dist/core/tmux.js.map +1 -1
- package/dist/core/watchdog.d.ts +41 -0
- package/dist/core/watchdog.js +198 -0
- package/dist/core/watchdog.js.map +1 -0
- package/dist/core/websocket.js +1 -1
- package/dist/core/websocket.js.map +1 -1
- package/dist/index.d.ts +5 -5
- package/dist/index.js +5 -5
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/__tests__/loader.test.ts +52 -4
- package/src/__tests__/runner.test.ts +1 -2
- package/src/__tests__/sandbox.test.ts +435 -0
- package/src/__tests__/watchdog.test.ts +368 -0
- package/src/cli/attach.ts +22 -1
- package/src/cli/build.ts +12 -4
- package/src/cli/context.ts +0 -1
- package/src/cli/deploy.ts +7 -5
- package/src/cli/index.ts +8 -1
- package/src/cli/init.ts +7 -19
- package/src/cli/list.ts +6 -10
- package/src/cli/local.ts +21 -12
- package/src/cli/migrate.ts +6 -4
- package/src/cli/nudge.ts +29 -14
- package/src/cli/restart.ts +1 -1
- package/src/cli/slack.ts +16 -15
- package/src/cli/start.ts +14 -0
- package/src/cli/stop.ts +14 -5
- package/src/cli/test.ts +5 -3
- package/src/cli/token.ts +4 -4
- package/src/config/loader.ts +29 -2
- package/src/config/schema.ts +14 -0
- package/src/core/daemon.ts +540 -17
- package/src/core/injector.ts +27 -4
- package/src/core/registry.ts +14 -1
- package/src/core/runner.ts +26 -1
- package/src/core/sandbox.ts +550 -0
- package/src/core/tmux.ts +35 -2
- package/src/core/watchdog.ts +238 -0
- package/src/core/websocket.ts +2 -2
- package/src/index.ts +6 -5
package/src/core/daemon.ts
CHANGED
|
@@ -6,30 +6,50 @@ import {
|
|
|
6
6
|
addAgentToState,
|
|
7
7
|
getAgentState,
|
|
8
8
|
loadConfig,
|
|
9
|
-
|
|
9
|
+
resetAgentRestartCount,
|
|
10
10
|
updateAgentInState,
|
|
11
11
|
} from "../config/loader.js";
|
|
12
|
-
import type { AgentConfig, Config } from "../config/schema.js";
|
|
12
|
+
import type { AgentConfig, AgentStatus, Config } from "../config/schema.js";
|
|
13
13
|
import { loadContext, loadOrCreateContext, saveContext } from "../context/index.js";
|
|
14
14
|
import { Heartbeat } from "./heartbeat.js";
|
|
15
15
|
import { handleWebSocketEvent, injectRestoredContext, injectStartupMessage } from "./injector.js";
|
|
16
|
-
import { checkInbox, fetchAssignments, registerAgent } from "./registry.js";
|
|
17
|
-
import {
|
|
18
|
-
|
|
19
|
-
detectRunner,
|
|
20
|
-
getRunnerDisplayName,
|
|
21
|
-
type RunnerConfig,
|
|
22
|
-
} from "./runner.js";
|
|
16
|
+
import { checkInbox, fetchAssignments, registerAgent, type ServerContext } from "./registry.js";
|
|
17
|
+
import { buildRunnerConfig, getRunnerDisplayName, type RunnerConfig } from "./runner.js";
|
|
18
|
+
import { DockerSandbox } from "./sandbox.js";
|
|
23
19
|
import {
|
|
24
20
|
captureSessionContext,
|
|
21
|
+
captureSessionOutput,
|
|
25
22
|
createSession,
|
|
26
23
|
destroySession,
|
|
27
24
|
getSessionName,
|
|
25
|
+
isSessionHealthy,
|
|
28
26
|
sessionExists,
|
|
29
27
|
updateSessionEnvironment,
|
|
30
28
|
} from "./tmux.js";
|
|
29
|
+
import {
|
|
30
|
+
checkAgentProgress,
|
|
31
|
+
cleanupOrphanContainers,
|
|
32
|
+
isProcessRunning,
|
|
33
|
+
sendNudge,
|
|
34
|
+
} from "./watchdog.js";
|
|
31
35
|
import { AgentWebSocket } from "./websocket.js";
|
|
32
36
|
|
|
37
|
+
// Maximum number of auto-restart attempts
|
|
38
|
+
const MAX_RESTART_ATTEMPTS = 3;
|
|
39
|
+
// Time after which restart count resets (30 minutes of stable operation)
|
|
40
|
+
const RESTART_COUNT_RESET_MS = 30 * 60 * 1000;
|
|
41
|
+
// Time to wait after nudging before restarting (2 minutes)
|
|
42
|
+
const NUDGE_WAIT_MS = 2 * 60 * 1000;
|
|
43
|
+
|
|
44
|
+
// Path to the sandbox OpenCode config (permissive permissions)
|
|
45
|
+
const SANDBOX_OPENCODE_CONFIG_PATH = path.join(os.homedir(), ".agentmesh", "opencode-sandbox.json");
|
|
46
|
+
|
|
47
|
+
// Sandbox OpenCode config content - allow everything since container is sandboxed
|
|
48
|
+
const SANDBOX_OPENCODE_CONFIG = {
|
|
49
|
+
$schema: "https://opencode.ai/config.json",
|
|
50
|
+
permission: "allow",
|
|
51
|
+
};
|
|
52
|
+
|
|
33
53
|
export interface DaemonOptions {
|
|
34
54
|
name: string;
|
|
35
55
|
command?: string;
|
|
@@ -44,6 +64,14 @@ export interface DaemonOptions {
|
|
|
44
64
|
serve?: boolean;
|
|
45
65
|
/** Port for opencode serve (default: 3001) */
|
|
46
66
|
servePort?: number;
|
|
67
|
+
/** Run agent in Docker sandbox container */
|
|
68
|
+
sandbox?: boolean;
|
|
69
|
+
/** Docker image for sandbox (default: agentmesh/agent-sandbox:latest) */
|
|
70
|
+
sandboxImage?: string;
|
|
71
|
+
/** CPU limit for sandbox (default: 1) */
|
|
72
|
+
sandboxCpu?: string;
|
|
73
|
+
/** Memory limit for sandbox (default: 2g) */
|
|
74
|
+
sandboxMemory?: string;
|
|
47
75
|
}
|
|
48
76
|
|
|
49
77
|
export class AgentDaemon {
|
|
@@ -62,6 +90,18 @@ export class AgentDaemon {
|
|
|
62
90
|
private serveMode: boolean;
|
|
63
91
|
private servePort: number;
|
|
64
92
|
private serveProcess: ChildProcess | null = null;
|
|
93
|
+
private sandboxMode: boolean;
|
|
94
|
+
private sandboxImage: string;
|
|
95
|
+
private sandboxCpu: string;
|
|
96
|
+
private sandboxMemory: string;
|
|
97
|
+
private sandbox: DockerSandbox | null = null;
|
|
98
|
+
private healthCheckInterval: ReturnType<typeof setInterval> | null = null;
|
|
99
|
+
private serverContext: ServerContext | undefined;
|
|
100
|
+
// Auto-restart tracking
|
|
101
|
+
private restartCount = 0;
|
|
102
|
+
private lastStableTime: Date | null = null;
|
|
103
|
+
private stuckSince: Date | null = null;
|
|
104
|
+
private nudgeSentAt: Date | null = null;
|
|
65
105
|
|
|
66
106
|
constructor(options: DaemonOptions) {
|
|
67
107
|
const config = loadConfig();
|
|
@@ -69,6 +109,10 @@ export class AgentDaemon {
|
|
|
69
109
|
throw new Error("No config found. Run 'agentmesh init' first.");
|
|
70
110
|
}
|
|
71
111
|
|
|
112
|
+
// Ensure config has required fields with defaults
|
|
113
|
+
if (!config.agents) config.agents = [];
|
|
114
|
+
if (!config.defaults) config.defaults = { command: "opencode", model: "claude-sonnet-4" };
|
|
115
|
+
|
|
72
116
|
this.config = config;
|
|
73
117
|
this.agentName = options.name;
|
|
74
118
|
this.shouldRestoreContext = options.restoreContext !== false;
|
|
@@ -94,6 +138,10 @@ export class AgentDaemon {
|
|
|
94
138
|
this.agentConfig = agentConfig;
|
|
95
139
|
this.serveMode = options.serve === true;
|
|
96
140
|
this.servePort = options.servePort || 3001;
|
|
141
|
+
this.sandboxMode = options.sandbox === true;
|
|
142
|
+
this.sandboxImage = options.sandboxImage || "agentmesh/agent-sandbox:latest";
|
|
143
|
+
this.sandboxCpu = options.sandboxCpu || "1";
|
|
144
|
+
this.sandboxMemory = options.sandboxMemory || "2g";
|
|
97
145
|
|
|
98
146
|
// Build runner configuration with model resolution
|
|
99
147
|
this.runnerConfig = buildRunnerConfig({
|
|
@@ -116,9 +164,33 @@ export class AgentDaemon {
|
|
|
116
164
|
|
|
117
165
|
console.log(`Starting agent: ${this.agentName}`);
|
|
118
166
|
|
|
167
|
+
// Check for duplicate process
|
|
168
|
+
const existingState = getAgentState(this.agentName);
|
|
169
|
+
if (existingState && existingState.pid > 0) {
|
|
170
|
+
if (isProcessRunning(existingState.pid)) {
|
|
171
|
+
throw new Error(
|
|
172
|
+
`Agent "${this.agentName}" is already running (PID: ${existingState.pid}). ` +
|
|
173
|
+
`Use 'agentmesh stop ${this.agentName}' first.`,
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
// Process not running, clean up stale state
|
|
177
|
+
console.log(`Cleaning up stale state for PID ${existingState.pid}`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Clean up orphan containers in sandbox mode
|
|
181
|
+
if (this.sandboxMode) {
|
|
182
|
+
const cleaned = cleanupOrphanContainers(this.agentName);
|
|
183
|
+
if (cleaned > 0) {
|
|
184
|
+
console.log(`Cleaned up ${cleaned} orphan container(s)`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Reset restart count on manual start
|
|
189
|
+
this.restartCount = 0;
|
|
190
|
+
this.lastStableTime = new Date();
|
|
191
|
+
|
|
119
192
|
// Register with hub first (needed for assignment check)
|
|
120
193
|
console.log("Registering with AgentMesh hub...");
|
|
121
|
-
const existingState = getAgentState(this.agentName);
|
|
122
194
|
console.log(`Existing state: ${existingState ? `agentId=${existingState.agentId}` : "none"}`);
|
|
123
195
|
|
|
124
196
|
const registration = await registerAgent({
|
|
@@ -128,18 +200,29 @@ export class AgentDaemon {
|
|
|
128
200
|
agentId: existingState?.agentId || this.agentConfig.agentId,
|
|
129
201
|
agentName: this.agentName,
|
|
130
202
|
model: this.agentConfig.model || this.config.defaults.model,
|
|
203
|
+
restoreContext: this.shouldRestoreContext,
|
|
131
204
|
});
|
|
132
205
|
|
|
133
206
|
this.agentId = registration.agentId;
|
|
134
207
|
this.token = registration.token;
|
|
135
208
|
|
|
136
|
-
|
|
209
|
+
if (registration.status === "re-registered") {
|
|
210
|
+
console.log(`Re-registered as: ${this.agentId}`);
|
|
211
|
+
if (registration.context && Object.keys(registration.context).length > 0) {
|
|
212
|
+
this.serverContext = registration.context;
|
|
213
|
+
console.log(`Server context restored: ${Object.keys(registration.context).join(", ")}`);
|
|
214
|
+
}
|
|
215
|
+
} else {
|
|
216
|
+
console.log(`Registered as: ${this.agentId}`);
|
|
217
|
+
}
|
|
137
218
|
|
|
138
219
|
// Check assignments and auto-setup workdir if needed (before creating tmux session)
|
|
139
220
|
await this.checkAssignments();
|
|
140
221
|
|
|
141
|
-
//
|
|
142
|
-
if (this.
|
|
222
|
+
// Choose runtime mode: sandbox > serve > tmux
|
|
223
|
+
if (this.sandboxMode) {
|
|
224
|
+
await this.startSandboxMode();
|
|
225
|
+
} else if (this.serveMode) {
|
|
143
226
|
await this.startServeMode();
|
|
144
227
|
} else {
|
|
145
228
|
// Check if session already exists
|
|
@@ -188,6 +271,7 @@ export class AgentDaemon {
|
|
|
188
271
|
assignedProject: this.assignedProject,
|
|
189
272
|
runtimeModel: this.runnerConfig.model,
|
|
190
273
|
runnerType: this.runnerConfig.type,
|
|
274
|
+
sandboxContainer: this.sandbox?.getContainerName(),
|
|
191
275
|
});
|
|
192
276
|
|
|
193
277
|
// Start heartbeat with auto-refresh
|
|
@@ -272,11 +356,11 @@ export class AgentDaemon {
|
|
|
272
356
|
});
|
|
273
357
|
this.ws.connect();
|
|
274
358
|
|
|
275
|
-
// Check inbox and auto-nudge
|
|
359
|
+
// Check inbox and auto-nudge with full handoff details
|
|
276
360
|
console.log("Checking inbox...");
|
|
277
361
|
try {
|
|
278
362
|
const inboxItems = await checkInbox(this.config.hubUrl, this.config.workspace, this.token);
|
|
279
|
-
injectStartupMessage(this.agentName, inboxItems.length);
|
|
363
|
+
injectStartupMessage(this.agentName, inboxItems.length, inboxItems);
|
|
280
364
|
} catch (error) {
|
|
281
365
|
console.error("Failed to check inbox:", error);
|
|
282
366
|
injectStartupMessage(this.agentName, 0);
|
|
@@ -296,6 +380,9 @@ export class AgentDaemon {
|
|
|
296
380
|
|
|
297
381
|
this.isRunning = true;
|
|
298
382
|
|
|
383
|
+
// Start session health monitoring (every 60 seconds)
|
|
384
|
+
this.startHealthMonitor();
|
|
385
|
+
|
|
299
386
|
console.log(`
|
|
300
387
|
Agent "${this.agentName}" is running.
|
|
301
388
|
|
|
@@ -314,11 +401,277 @@ Nudge agent:
|
|
|
314
401
|
process.on("SIGTERM", () => this.stop());
|
|
315
402
|
}
|
|
316
403
|
|
|
404
|
+
/**
|
|
405
|
+
* Starts periodic health monitoring for the tmux session
|
|
406
|
+
* Includes auto-restart logic and progress watchdog
|
|
407
|
+
*/
|
|
408
|
+
private startHealthMonitor(): void {
|
|
409
|
+
// Skip health monitoring for serve mode (no tmux session)
|
|
410
|
+
if (this.serveMode) return;
|
|
411
|
+
|
|
412
|
+
const logDir = path.join(os.homedir(), ".agentmesh", "logs");
|
|
413
|
+
if (!fs.existsSync(logDir)) {
|
|
414
|
+
fs.mkdirSync(logDir, { recursive: true });
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
this.healthCheckInterval = setInterval(async () => {
|
|
418
|
+
if (!this.isRunning) return;
|
|
419
|
+
|
|
420
|
+
// Reset restart count after stable operation
|
|
421
|
+
if (this.lastStableTime && this.restartCount > 0) {
|
|
422
|
+
const stableTime = Date.now() - this.lastStableTime.getTime();
|
|
423
|
+
if (stableTime > RESTART_COUNT_RESET_MS) {
|
|
424
|
+
console.log(`[HEALTH] Agent stable for 30+ minutes, resetting restart count`);
|
|
425
|
+
this.restartCount = 0;
|
|
426
|
+
resetAgentRestartCount(this.agentName);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
const health = isSessionHealthy(this.agentName);
|
|
431
|
+
|
|
432
|
+
if (!health.healthy) {
|
|
433
|
+
// Session died - attempt restart
|
|
434
|
+
await this.handleSessionDeath(health.reason || "unknown", logDir);
|
|
435
|
+
return;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
// Session is alive - check progress watchdog
|
|
439
|
+
const containerName = this.sandboxMode ? this.sandbox?.getContainerName() : undefined;
|
|
440
|
+
const progress = checkAgentProgress(this.agentName, containerName);
|
|
441
|
+
|
|
442
|
+
if (progress.status === "permission_blocked" || progress.status === "stuck") {
|
|
443
|
+
await this.handleStuckAgent(progress);
|
|
444
|
+
} else if (progress.status === "active") {
|
|
445
|
+
// Agent is working - reset stuck tracking
|
|
446
|
+
if (this.stuckSince) {
|
|
447
|
+
console.log(`[HEALTH] Agent resumed activity`);
|
|
448
|
+
this.stuckSince = null;
|
|
449
|
+
this.nudgeSentAt = null;
|
|
450
|
+
updateAgentInState(this.agentName, { stuckSince: undefined, status: "running" });
|
|
451
|
+
}
|
|
452
|
+
this.lastStableTime = new Date();
|
|
453
|
+
}
|
|
454
|
+
}, 60000); // Check every 60 seconds
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
/**
|
|
458
|
+
* Handles session death - logs crash and attempts auto-restart
|
|
459
|
+
*/
|
|
460
|
+
private async handleSessionDeath(reason: string, logDir: string): Promise<void> {
|
|
461
|
+
const timestamp = new Date().toISOString();
|
|
462
|
+
const logFile = path.join(logDir, `crash-${this.agentName}.log`);
|
|
463
|
+
|
|
464
|
+
// Capture last session output before it's gone
|
|
465
|
+
let lastOutput = "";
|
|
466
|
+
try {
|
|
467
|
+
lastOutput = captureSessionOutput(this.agentName, 200) || "Unable to capture output";
|
|
468
|
+
} catch {
|
|
469
|
+
lastOutput = "Failed to capture session output";
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
const crashLog = `
|
|
473
|
+
================================================================================
|
|
474
|
+
AGENT CRASH DETECTED
|
|
475
|
+
================================================================================
|
|
476
|
+
Timestamp: ${timestamp}
|
|
477
|
+
Agent: ${this.agentName}
|
|
478
|
+
Agent ID: ${this.agentId}
|
|
479
|
+
Reason: ${reason}
|
|
480
|
+
Restart Count: ${this.restartCount}/${MAX_RESTART_ATTEMPTS}
|
|
481
|
+
Sandbox: ${this.sandboxMode ? this.sandbox?.getContainerName() : "none"}
|
|
482
|
+
Workdir: ${this.agentConfig.workdir}
|
|
483
|
+
Model: ${this.runnerConfig.model}
|
|
484
|
+
|
|
485
|
+
--- Last Session Output ---
|
|
486
|
+
${lastOutput}
|
|
487
|
+
================================================================================
|
|
488
|
+
|
|
489
|
+
`;
|
|
490
|
+
|
|
491
|
+
fs.appendFileSync(logFile, crashLog);
|
|
492
|
+
|
|
493
|
+
// Check if we can restart
|
|
494
|
+
if (this.restartCount < MAX_RESTART_ATTEMPTS) {
|
|
495
|
+
this.restartCount++;
|
|
496
|
+
console.error(
|
|
497
|
+
`[CRASH] Session died: ${reason}. Attempting restart (${this.restartCount}/${MAX_RESTART_ATTEMPTS})...`,
|
|
498
|
+
);
|
|
499
|
+
|
|
500
|
+
updateAgentInState(this.agentName, {
|
|
501
|
+
restartCount: this.restartCount,
|
|
502
|
+
lastRestartAt: timestamp,
|
|
503
|
+
status: "running",
|
|
504
|
+
});
|
|
505
|
+
|
|
506
|
+
try {
|
|
507
|
+
await this.restartSession();
|
|
508
|
+
console.log(`[RESTART] Agent restarted successfully`);
|
|
509
|
+
this.lastStableTime = new Date();
|
|
510
|
+
} catch (error) {
|
|
511
|
+
console.error(`[RESTART] Failed to restart: ${(error as Error).message}`);
|
|
512
|
+
}
|
|
513
|
+
} else {
|
|
514
|
+
// Exceeded restart limit - mark as failed
|
|
515
|
+
console.error(
|
|
516
|
+
`[FAILED] Agent exceeded restart limit (${MAX_RESTART_ATTEMPTS}). Manual intervention required.`,
|
|
517
|
+
);
|
|
518
|
+
|
|
519
|
+
// Terminal bell to alert user
|
|
520
|
+
process.stdout.write("\x07");
|
|
521
|
+
|
|
522
|
+
updateAgentInState(this.agentName, {
|
|
523
|
+
status: "failed",
|
|
524
|
+
restartCount: this.restartCount,
|
|
525
|
+
});
|
|
526
|
+
|
|
527
|
+
// Stop monitoring
|
|
528
|
+
this.isRunning = false;
|
|
529
|
+
if (this.healthCheckInterval) {
|
|
530
|
+
clearInterval(this.healthCheckInterval);
|
|
531
|
+
this.healthCheckInterval = null;
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* Handles stuck agent - sends nudge first, then restarts if still stuck
|
|
538
|
+
*/
|
|
539
|
+
private async handleStuckAgent(progress: {
|
|
540
|
+
status: string;
|
|
541
|
+
blockedOn?: string;
|
|
542
|
+
details?: string;
|
|
543
|
+
}): Promise<void> {
|
|
544
|
+
const now = new Date();
|
|
545
|
+
|
|
546
|
+
if (!this.stuckSince) {
|
|
547
|
+
// First detection of stuck state
|
|
548
|
+
this.stuckSince = now;
|
|
549
|
+
console.log(
|
|
550
|
+
`[HEALTH] Agent appears stuck: ${progress.details || progress.blockedOn || "no activity"}`,
|
|
551
|
+
);
|
|
552
|
+
|
|
553
|
+
updateAgentInState(this.agentName, {
|
|
554
|
+
stuckSince: now.toISOString(),
|
|
555
|
+
status: "stuck",
|
|
556
|
+
});
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
// If we haven't sent a nudge yet, send one
|
|
560
|
+
if (!this.nudgeSentAt) {
|
|
561
|
+
console.log(`[HEALTH] Sending nudge to unstick agent...`);
|
|
562
|
+
|
|
563
|
+
const nudgeMessage =
|
|
564
|
+
progress.status === "permission_blocked"
|
|
565
|
+
? "Please continue with your task. If you see a permission prompt, try an alternative approach that doesn't require that permission."
|
|
566
|
+
: "Please continue with your current task.";
|
|
567
|
+
|
|
568
|
+
const sent = sendNudge(this.agentName, nudgeMessage);
|
|
569
|
+
if (sent) {
|
|
570
|
+
this.nudgeSentAt = now;
|
|
571
|
+
console.log(`[HEALTH] Nudge sent successfully`);
|
|
572
|
+
} else {
|
|
573
|
+
console.log(`[HEALTH] Failed to send nudge`);
|
|
574
|
+
}
|
|
575
|
+
return;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
// Check if enough time has passed since nudge
|
|
579
|
+
const timeSinceNudge = now.getTime() - this.nudgeSentAt.getTime();
|
|
580
|
+
if (timeSinceNudge < NUDGE_WAIT_MS) {
|
|
581
|
+
// Still waiting for agent to respond to nudge
|
|
582
|
+
return;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// Agent still stuck after nudge - trigger restart
|
|
586
|
+
console.log(`[HEALTH] Agent still stuck after nudge, triggering restart...`);
|
|
587
|
+
this.stuckSince = null;
|
|
588
|
+
this.nudgeSentAt = null;
|
|
589
|
+
|
|
590
|
+
await this.handleSessionDeath(
|
|
591
|
+
"stuck_after_nudge",
|
|
592
|
+
path.join(os.homedir(), ".agentmesh", "logs"),
|
|
593
|
+
);
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
/**
|
|
597
|
+
* Restarts the agent session (sandbox or non-sandbox)
|
|
598
|
+
*/
|
|
599
|
+
private async restartSession(): Promise<void> {
|
|
600
|
+
// Destroy existing session
|
|
601
|
+
destroySession(this.agentName);
|
|
602
|
+
|
|
603
|
+
if (this.sandboxMode && this.sandbox) {
|
|
604
|
+
// Restart sandbox container
|
|
605
|
+
const newContainerId = await this.sandbox.restart();
|
|
606
|
+
console.log(`[RESTART] New container: ${newContainerId.substring(0, 12)}`);
|
|
607
|
+
|
|
608
|
+
// Recreate tmux session for sandbox
|
|
609
|
+
const containerName = this.sandbox.getContainerName();
|
|
610
|
+
const sessionName = getSessionName(this.agentName);
|
|
611
|
+
|
|
612
|
+
// Build environment args for docker exec
|
|
613
|
+
const envArgs: string[] = [];
|
|
614
|
+
const allEnv = {
|
|
615
|
+
...this.runnerConfig.env,
|
|
616
|
+
AGENT_TOKEN: this.token!,
|
|
617
|
+
AGENTMESH_AGENT_ID: this.agentId!,
|
|
618
|
+
};
|
|
619
|
+
for (const [key, value] of Object.entries(allEnv)) {
|
|
620
|
+
if (value !== undefined && value !== "") {
|
|
621
|
+
envArgs.push(`-e "${key}=${value}"`);
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
const envString = envArgs.join(" ");
|
|
625
|
+
const modelArg = this.runnerConfig.env?.OPENCODE_MODEL
|
|
626
|
+
? ` --model ${this.runnerConfig.env.OPENCODE_MODEL}`
|
|
627
|
+
: "";
|
|
628
|
+
const dockerExecCommand = `docker exec -it ${envString} ${containerName} opencode${modelArg}`;
|
|
629
|
+
|
|
630
|
+
const created = createSession(this.agentName, dockerExecCommand, undefined, undefined);
|
|
631
|
+
if (!created) {
|
|
632
|
+
throw new Error("Failed to create tmux session for restarted sandbox");
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// Update state with new container name
|
|
636
|
+
updateAgentInState(this.agentName, {
|
|
637
|
+
sandboxContainer: containerName,
|
|
638
|
+
});
|
|
639
|
+
} else {
|
|
640
|
+
// Non-sandbox restart - just recreate tmux session
|
|
641
|
+
const created = createSession(
|
|
642
|
+
this.agentName,
|
|
643
|
+
this.agentConfig.command,
|
|
644
|
+
this.agentConfig.workdir,
|
|
645
|
+
this.runnerConfig.env,
|
|
646
|
+
);
|
|
647
|
+
|
|
648
|
+
if (!created) {
|
|
649
|
+
throw new Error("Failed to create tmux session");
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// Re-inject environment
|
|
653
|
+
updateSessionEnvironment(this.agentName, {
|
|
654
|
+
AGENT_TOKEN: this.token!,
|
|
655
|
+
AGENTMESH_AGENT_ID: this.agentId!,
|
|
656
|
+
...this.runnerConfig.env,
|
|
657
|
+
});
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// Wait for session to be ready
|
|
661
|
+
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
662
|
+
}
|
|
663
|
+
|
|
317
664
|
async stop(): Promise<void> {
|
|
318
665
|
console.log(`\nStopping agent: ${this.agentName}`);
|
|
319
666
|
|
|
320
667
|
this.isRunning = false;
|
|
321
668
|
|
|
669
|
+
// Stop health monitor
|
|
670
|
+
if (this.healthCheckInterval) {
|
|
671
|
+
clearInterval(this.healthCheckInterval);
|
|
672
|
+
this.healthCheckInterval = null;
|
|
673
|
+
}
|
|
674
|
+
|
|
322
675
|
// Save context before stopping
|
|
323
676
|
if (this.agentId) {
|
|
324
677
|
console.log("Saving agent context...");
|
|
@@ -337,8 +690,16 @@ Nudge agent:
|
|
|
337
690
|
this.ws = null;
|
|
338
691
|
}
|
|
339
692
|
|
|
340
|
-
// Stop serve process or destroy tmux session
|
|
341
|
-
if (this.
|
|
693
|
+
// Stop sandbox, serve process, or destroy tmux session
|
|
694
|
+
if (this.sandboxMode && this.sandbox) {
|
|
695
|
+
console.log("Stopping sandbox...");
|
|
696
|
+
// In sandbox mode, we have both a tmux session (on host) and a Docker container
|
|
697
|
+
// Destroy tmux session first (this stops docker exec)
|
|
698
|
+
destroySession(this.agentName);
|
|
699
|
+
// Then destroy the container
|
|
700
|
+
await this.sandbox.destroy();
|
|
701
|
+
this.sandbox = null;
|
|
702
|
+
} else if (this.serveMode && this.serveProcess) {
|
|
342
703
|
console.log("Stopping opencode serve...");
|
|
343
704
|
this.serveProcess.kill("SIGTERM");
|
|
344
705
|
this.serveProcess = null;
|
|
@@ -409,6 +770,141 @@ Nudge agent:
|
|
|
409
770
|
console.log(`opencode serve started on http://0.0.0.0:${this.servePort}`);
|
|
410
771
|
}
|
|
411
772
|
|
|
773
|
+
/**
|
|
774
|
+
* Starts agent in Docker sandbox mode
|
|
775
|
+
* Provides filesystem isolation with only workspace mounted
|
|
776
|
+
*
|
|
777
|
+
* Strategy: Start Docker container with tail -f /dev/null, then create
|
|
778
|
+
* a tmux session on the HOST that runs `docker exec -it <container> opencode`.
|
|
779
|
+
* This way tmux provides the TTY that docker exec needs.
|
|
780
|
+
*/
|
|
781
|
+
private async startSandboxMode(): Promise<void> {
|
|
782
|
+
console.log("Starting in Docker sandbox mode...");
|
|
783
|
+
|
|
784
|
+
// Check Docker availability
|
|
785
|
+
if (!DockerSandbox.checkDockerAvailable()) {
|
|
786
|
+
throw new Error(
|
|
787
|
+
"Docker is not available. Install Docker or use --sandbox host to run on host.",
|
|
788
|
+
);
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
const workdir = this.agentConfig.workdir || process.cwd();
|
|
792
|
+
|
|
793
|
+
// Check for existing sandbox container
|
|
794
|
+
const existingContainer = DockerSandbox.findExisting(this.agentName);
|
|
795
|
+
if (existingContainer) {
|
|
796
|
+
console.log(`Found existing sandbox container: ${existingContainer}`);
|
|
797
|
+
console.log(`Stop it with: agentmesh stop ${this.agentName}`);
|
|
798
|
+
throw new Error("Sandbox container already exists");
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
// Build additional mounts for credentials and config
|
|
802
|
+
// The entrypoint script copies these from /tmp/ to the correct locations
|
|
803
|
+
const additionalMounts: string[] = [];
|
|
804
|
+
|
|
805
|
+
// Mount git credentials
|
|
806
|
+
const gitCredentialsPath = path.join(os.homedir(), ".git-credentials");
|
|
807
|
+
if (fs.existsSync(gitCredentialsPath)) {
|
|
808
|
+
additionalMounts.push(`${gitCredentialsPath}:/tmp/.git-credentials-host:ro`);
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
// Mount OpenCode auth.json for API provider tokens (Anthropic, OpenAI, etc.)
|
|
812
|
+
const opencodeAuthPath = path.join(os.homedir(), ".local", "share", "opencode", "auth.json");
|
|
813
|
+
if (fs.existsSync(opencodeAuthPath)) {
|
|
814
|
+
additionalMounts.push(`${opencodeAuthPath}:/tmp/.opencode-auth-host:ro`);
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
// Mount AgentMesh config for hub URL, API key, workspace
|
|
818
|
+
const agentmeshConfigPath = path.join(os.homedir(), ".agentmesh", "config.json");
|
|
819
|
+
if (fs.existsSync(agentmeshConfigPath)) {
|
|
820
|
+
additionalMounts.push(`${agentmeshConfigPath}:/tmp/.agentmesh-config-host:ro`);
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
// Create and mount permissive OpenCode config for sandbox
|
|
824
|
+
// This allows all permissions since the container is already sandboxed
|
|
825
|
+
this.ensureSandboxOpencodeConfig();
|
|
826
|
+
additionalMounts.push(`${SANDBOX_OPENCODE_CONFIG_PATH}:/workspace/opencode.json:ro`);
|
|
827
|
+
|
|
828
|
+
// Pass GitHub token as environment variable for git operations
|
|
829
|
+
const gitCredentials = fs.existsSync(gitCredentialsPath)
|
|
830
|
+
? fs.readFileSync(gitCredentialsPath, "utf-8").trim()
|
|
831
|
+
: "";
|
|
832
|
+
const gitHubToken = gitCredentials.match(/github_pat_[^\s@]+/)?.[0] || "";
|
|
833
|
+
|
|
834
|
+
// Build the command to run inside the container
|
|
835
|
+
// The agentmesh CLI inside the container will create tmux + opencode
|
|
836
|
+
const model =
|
|
837
|
+
this.runnerConfig.env?.OPENCODE_MODEL || this.runnerConfig.model || "claude-sonnet-4";
|
|
838
|
+
const containerCommand = [
|
|
839
|
+
"agentmesh",
|
|
840
|
+
"start",
|
|
841
|
+
"--name",
|
|
842
|
+
this.agentName,
|
|
843
|
+
"--model",
|
|
844
|
+
model,
|
|
845
|
+
"--foreground",
|
|
846
|
+
];
|
|
847
|
+
|
|
848
|
+
// Create sandbox configuration
|
|
849
|
+
this.sandbox = new DockerSandbox({
|
|
850
|
+
agentName: this.agentName,
|
|
851
|
+
image: this.sandboxImage,
|
|
852
|
+
workspacePath: workdir,
|
|
853
|
+
cpuLimit: this.sandboxCpu,
|
|
854
|
+
memoryLimit: this.sandboxMemory,
|
|
855
|
+
env: {
|
|
856
|
+
...this.runnerConfig.env,
|
|
857
|
+
AGENT_TOKEN: this.token!,
|
|
858
|
+
AGENTMESH_AGENT_ID: this.agentId!,
|
|
859
|
+
// Git credentials for pushing to GitHub
|
|
860
|
+
...(gitHubToken && { GH_TOKEN: gitHubToken, GITHUB_TOKEN: gitHubToken }),
|
|
861
|
+
},
|
|
862
|
+
serveMode: this.serveMode,
|
|
863
|
+
servePort: this.servePort,
|
|
864
|
+
networkMode: "bridge",
|
|
865
|
+
additionalMounts: additionalMounts.length > 0 ? additionalMounts : undefined,
|
|
866
|
+
command: this.serveMode ? undefined : containerCommand,
|
|
867
|
+
});
|
|
868
|
+
|
|
869
|
+
// Validate mount policy (will throw if denied)
|
|
870
|
+
this.sandbox.validateMountPolicy();
|
|
871
|
+
|
|
872
|
+
// Pull image if needed
|
|
873
|
+
await this.sandbox.pullImage();
|
|
874
|
+
|
|
875
|
+
// Start container with agentmesh running inside
|
|
876
|
+
// The entrypoint script sets up credentials before agentmesh starts
|
|
877
|
+
await this.sandbox.start();
|
|
878
|
+
|
|
879
|
+
const containerName = this.sandbox.getContainerName();
|
|
880
|
+
|
|
881
|
+
console.log(`
|
|
882
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
883
|
+
SANDBOX MODE ACTIVE
|
|
884
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
885
|
+
|
|
886
|
+
Container: ${containerName}
|
|
887
|
+
Image: ${this.sandboxImage}
|
|
888
|
+
Workspace: ${workdir} -> /workspace
|
|
889
|
+
CPU: ${this.sandboxCpu} core(s)
|
|
890
|
+
Memory: ${this.sandboxMemory}
|
|
891
|
+
Model: ${model}
|
|
892
|
+
|
|
893
|
+
The agent daemon is running INSIDE the Docker container.
|
|
894
|
+
tmux session and OpenCode are managed inside the container.
|
|
895
|
+
|
|
896
|
+
Attach: agentmesh attach ${this.agentName}
|
|
897
|
+
Nudge: agentmesh nudge ${this.agentName} "message"
|
|
898
|
+
Stop: agentmesh stop ${this.agentName}
|
|
899
|
+
Logs: docker logs ${containerName}
|
|
900
|
+
|
|
901
|
+
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
902
|
+
`);
|
|
903
|
+
|
|
904
|
+
// No host tmux session needed - the container runs agentmesh which creates its own tmux
|
|
905
|
+
// Heartbeats are sent by the daemon running inside the container
|
|
906
|
+
}
|
|
907
|
+
|
|
412
908
|
/**
|
|
413
909
|
* Saves the current agent context to disk
|
|
414
910
|
*/
|
|
@@ -605,4 +1101,31 @@ Option 3: Use --auto-setup to automatically clone the repository:
|
|
|
605
1101
|
console.log(`✓ Workspace ready: ${workspacePath}\n`);
|
|
606
1102
|
return workspacePath;
|
|
607
1103
|
}
|
|
1104
|
+
|
|
1105
|
+
/**
|
|
1106
|
+
* Ensures the sandbox OpenCode config exists
|
|
1107
|
+
* Creates ~/.agentmesh/opencode-sandbox.json with permissive permissions and model
|
|
1108
|
+
*/
|
|
1109
|
+
private ensureSandboxOpencodeConfig(): void {
|
|
1110
|
+
const configDir = path.dirname(SANDBOX_OPENCODE_CONFIG_PATH);
|
|
1111
|
+
|
|
1112
|
+
if (!fs.existsSync(configDir)) {
|
|
1113
|
+
fs.mkdirSync(configDir, { recursive: true });
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
// Build config with model if available
|
|
1117
|
+
const config: Record<string, unknown> = {
|
|
1118
|
+
...SANDBOX_OPENCODE_CONFIG,
|
|
1119
|
+
};
|
|
1120
|
+
|
|
1121
|
+
// Include model from runner config
|
|
1122
|
+
const model = this.runnerConfig.env?.OPENCODE_MODEL;
|
|
1123
|
+
if (model) {
|
|
1124
|
+
config.model = model;
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
// Always write to ensure model is up to date
|
|
1128
|
+
fs.writeFileSync(SANDBOX_OPENCODE_CONFIG_PATH, JSON.stringify(config, null, 2));
|
|
1129
|
+
console.log(`Updated sandbox OpenCode config: ${SANDBOX_OPENCODE_CONFIG_PATH}`);
|
|
1130
|
+
}
|
|
608
1131
|
}
|