@agentmeshhq/agent 0.1.17 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +39 -0
  3. package/dist/__tests__/orphan-process.test.d.ts +11 -0
  4. package/dist/__tests__/orphan-process.test.js +286 -0
  5. package/dist/__tests__/orphan-process.test.js.map +1 -0
  6. package/dist/__tests__/runner.test.js +16 -0
  7. package/dist/__tests__/runner.test.js.map +1 -1
  8. package/dist/__tests__/watchdog.test.js +138 -12
  9. package/dist/__tests__/watchdog.test.js.map +1 -1
  10. package/dist/cli/index.js +2 -1
  11. package/dist/cli/index.js.map +1 -1
  12. package/dist/cli/start.d.ts +2 -1
  13. package/dist/cli/start.js +6 -3
  14. package/dist/cli/start.js.map +1 -1
  15. package/dist/cli/status.js +11 -0
  16. package/dist/cli/status.js.map +1 -1
  17. package/dist/cli/stop.js +7 -2
  18. package/dist/cli/stop.js.map +1 -1
  19. package/dist/config/schema.d.ts +4 -2
  20. package/dist/core/daemon/assignment-message.d.ts +12 -0
  21. package/dist/core/daemon/assignment-message.js +36 -0
  22. package/dist/core/daemon/assignment-message.js.map +1 -0
  23. package/dist/core/daemon/bootstrap.d.ts +35 -0
  24. package/dist/core/daemon/bootstrap.js +52 -0
  25. package/dist/core/daemon/bootstrap.js.map +1 -0
  26. package/dist/core/daemon/crash-log.d.ts +16 -0
  27. package/dist/core/daemon/crash-log.js +24 -0
  28. package/dist/core/daemon/crash-log.js.map +1 -0
  29. package/dist/core/daemon/health-policy.d.ts +21 -0
  30. package/dist/core/daemon/health-policy.js +32 -0
  31. package/dist/core/daemon/health-policy.js.map +1 -0
  32. package/dist/core/daemon/sandbox-config.d.ts +9 -0
  33. package/dist/core/daemon/sandbox-config.js +17 -0
  34. package/dist/core/daemon/sandbox-config.js.map +1 -0
  35. package/dist/core/daemon/state.d.ts +33 -0
  36. package/dist/core/daemon/state.js +77 -0
  37. package/dist/core/daemon/state.js.map +1 -0
  38. package/dist/core/daemon/tmux-session.d.ts +17 -0
  39. package/dist/core/daemon/tmux-session.js +34 -0
  40. package/dist/core/daemon/tmux-session.js.map +1 -0
  41. package/dist/core/daemon/workspace.d.ts +10 -0
  42. package/dist/core/daemon/workspace.js +51 -0
  43. package/dist/core/daemon/workspace.js.map +1 -0
  44. package/dist/core/daemon.d.ts +4 -7
  45. package/dist/core/daemon.js +143 -259
  46. package/dist/core/daemon.js.map +1 -1
  47. package/dist/core/injector.js +6 -0
  48. package/dist/core/injector.js.map +1 -1
  49. package/dist/core/registry.js +1 -1
  50. package/dist/core/registry.js.map +1 -1
  51. package/dist/core/runner/build.d.ts +9 -0
  52. package/dist/core/runner/build.js +53 -0
  53. package/dist/core/runner/build.js.map +1 -0
  54. package/dist/core/runner/detect.d.ts +5 -0
  55. package/dist/core/runner/detect.js +14 -0
  56. package/dist/core/runner/detect.js.map +1 -0
  57. package/dist/core/runner/index.d.ts +5 -0
  58. package/dist/core/runner/index.js +5 -0
  59. package/dist/core/runner/index.js.map +1 -0
  60. package/dist/core/runner/model.d.ts +5 -0
  61. package/dist/core/runner/model.js +7 -0
  62. package/dist/core/runner/model.js.map +1 -0
  63. package/dist/core/runner/opencode-models.d.ts +15 -0
  64. package/dist/core/runner/opencode-models.js +70 -0
  65. package/dist/core/runner/opencode-models.js.map +1 -0
  66. package/dist/core/runner/types.d.ts +19 -0
  67. package/dist/core/runner/types.js +8 -0
  68. package/dist/core/runner/types.js.map +1 -0
  69. package/dist/core/runner.d.ts +5 -47
  70. package/dist/core/runner.js +5 -167
  71. package/dist/core/runner.js.map +1 -1
  72. package/dist/core/tmux-runtime.d.ts +13 -0
  73. package/dist/core/tmux-runtime.js +72 -0
  74. package/dist/core/tmux-runtime.js.map +1 -0
  75. package/dist/core/tmux.d.ts +7 -1
  76. package/dist/core/tmux.js +75 -45
  77. package/dist/core/tmux.js.map +1 -1
  78. package/dist/core/watchdog.d.ts +18 -1
  79. package/dist/core/watchdog.js +78 -29
  80. package/dist/core/watchdog.js.map +1 -1
  81. package/package.json +30 -11
  82. package/dist/cli/inbox.d.ts +0 -5
  83. package/dist/cli/inbox.js +0 -123
  84. package/dist/cli/inbox.js.map +0 -1
  85. package/dist/cli/issue.d.ts +0 -42
  86. package/dist/cli/issue.js +0 -297
  87. package/dist/cli/issue.js.map +0 -1
  88. package/dist/cli/ready.d.ts +0 -5
  89. package/dist/cli/ready.js +0 -131
  90. package/dist/cli/ready.js.map +0 -1
  91. package/dist/cli/sync.d.ts +0 -8
  92. package/dist/cli/sync.js +0 -154
  93. package/dist/cli/sync.js.map +0 -1
  94. package/dist/core/issue-cache.d.ts +0 -44
  95. package/dist/core/issue-cache.js +0 -75
  96. package/dist/core/issue-cache.js.map +0 -1
  97. package/src/__tests__/context.test.ts +0 -464
  98. package/src/__tests__/injector.test.ts +0 -29
  99. package/src/__tests__/jwt.test.ts +0 -112
  100. package/src/__tests__/loader.test.ts +0 -239
  101. package/src/__tests__/runner.test.ts +0 -104
  102. package/src/__tests__/sandbox.test.ts +0 -435
  103. package/src/__tests__/watchdog.test.ts +0 -368
  104. package/src/cli/attach.ts +0 -22
  105. package/src/cli/build.ts +0 -145
  106. package/src/cli/config.ts +0 -148
  107. package/src/cli/context.ts +0 -231
  108. package/src/cli/deploy.ts +0 -155
  109. package/src/cli/index.ts +0 -375
  110. package/src/cli/init.ts +0 -75
  111. package/src/cli/list.ts +0 -70
  112. package/src/cli/local.ts +0 -183
  113. package/src/cli/logs.ts +0 -64
  114. package/src/cli/migrate.ts +0 -212
  115. package/src/cli/nudge.ts +0 -81
  116. package/src/cli/restart.ts +0 -59
  117. package/src/cli/slack.ts +0 -70
  118. package/src/cli/start.ts +0 -115
  119. package/src/cli/status.ts +0 -91
  120. package/src/cli/stop.ts +0 -48
  121. package/src/cli/test.ts +0 -143
  122. package/src/cli/token.ts +0 -188
  123. package/src/cli/whoami.ts +0 -142
  124. package/src/config/loader.ts +0 -121
  125. package/src/config/schema.ts +0 -68
  126. package/src/context/handoff.ts +0 -122
  127. package/src/context/index.ts +0 -8
  128. package/src/context/schema.ts +0 -111
  129. package/src/context/storage.ts +0 -197
  130. package/src/core/daemon.ts +0 -1308
  131. package/src/core/heartbeat.ts +0 -129
  132. package/src/core/injector.ts +0 -292
  133. package/src/core/registry.ts +0 -159
  134. package/src/core/runner.ts +0 -225
  135. package/src/core/sandbox.ts +0 -547
  136. package/src/core/session-id.ts +0 -111
  137. package/src/core/tmux.ts +0 -405
  138. package/src/core/watchdog.ts +0 -238
  139. package/src/core/websocket.ts +0 -94
  140. package/src/index.ts +0 -10
  141. package/src/utils/jwt.ts +0 -87
  142. package/tsconfig.json +0 -8
  143. package/vitest.config.ts +0 -12
@@ -1,16 +1,25 @@
1
- import { execSync, spawn } from "node:child_process";
1
+ import { spawn } from "node:child_process";
2
2
  import fs from "node:fs";
3
3
  import os from "node:os";
4
4
  import path from "node:path";
5
- import { addAgentToState, getAgentState, loadConfig, resetAgentRestartCount, updateAgentInState, } from "../config/loader.js";
5
+ import { getAgentState, resetAgentRestartCount, updateAgentInState } from "../config/loader.js";
6
6
  import { loadContext, loadOrCreateContext, saveContext } from "../context/index.js";
7
+ import { renderMissingWorkdirMessage } from "./daemon/assignment-message.js";
8
+ import { bootstrapDaemon } from "./daemon/bootstrap.js";
9
+ import { formatCrashLog } from "./daemon/crash-log.js";
10
+ import { getNudgeMessage, getStuckDetail, isWithinNudgeWaitWindow, shouldResetRestartCount, } from "./daemon/health-policy.js";
11
+ import { writeSandboxOpencodeConfig } from "./daemon/sandbox-config.js";
12
+ import { captureAgentChildPids, persistRunningState } from "./daemon/state.js";
13
+ import { startTmuxRuntimeSession } from "./daemon/tmux-session.js";
14
+ import { setupWorkspace } from "./daemon/workspace.js";
7
15
  import { Heartbeat } from "./heartbeat.js";
8
16
  import { handleWebSocketEvent, injectRestoredContext, injectStartupMessage } from "./injector.js";
9
17
  import { checkInbox, fetchAssignments, registerAgent } from "./registry.js";
10
- import { buildRunnerConfig, getRunnerDisplayName } from "./runner.js";
18
+ import { getRunnerDisplayName } from "./runner.js";
11
19
  import { DockerSandbox } from "./sandbox.js";
12
20
  import { getLatestSessionId, snapshotSessionId, waitForNewSessionId } from "./session-id.js";
13
- import { captureSessionContext, captureSessionOutput, createSession, destroySession, getSessionName, isSessionHealthy, sessionExists, updateSessionEnvironment, } from "./tmux.js";
21
+ import { captureSessionContext, captureSessionOutput, createSession, destroySession, isSessionHealthy, killProcessTree, updateSessionEnvironment, } from "./tmux.js";
22
+ import { prepareOpenCodeRuntime } from "./tmux-runtime.js";
14
23
  import { checkAgentProgress, cleanupOrphanContainers, isProcessRunning, sendNudge, } from "./watchdog.js";
15
24
  import { AgentWebSocket } from "./websocket.js";
16
25
  // Maximum number of auto-restart attempts
@@ -38,6 +47,7 @@ export class AgentDaemon {
38
47
  isRunning = false;
39
48
  assignedProject;
40
49
  shouldRestoreContext;
50
+ isWorkerAgent;
41
51
  autoSetup;
42
52
  serveMode;
43
53
  servePort;
@@ -48,7 +58,6 @@ export class AgentDaemon {
48
58
  sandboxMemory;
49
59
  sandbox = null;
50
60
  healthCheckInterval = null;
51
- serverContext;
52
61
  // Session resume tracking
53
62
  _preStartSessionId;
54
63
  _attemptedResumeSessionId;
@@ -58,50 +67,20 @@ export class AgentDaemon {
58
67
  stuckSince = null;
59
68
  nudgeSentAt = null;
60
69
  constructor(options) {
61
- const config = loadConfig();
62
- if (!config) {
63
- throw new Error("No config found. Run 'agentmesh init' first.");
64
- }
65
- // Ensure config has required fields with defaults
66
- if (!config.agents)
67
- config.agents = [];
68
- if (!config.defaults)
69
- config.defaults = { command: "opencode", model: "claude-sonnet-4" };
70
- this.config = config;
70
+ const boot = bootstrapDaemon(options);
71
+ this.config = boot.config;
71
72
  this.agentName = options.name;
72
- this.shouldRestoreContext = options.restoreContext !== false;
73
- this.autoSetup = options.autoSetup === true;
74
- // Find or create agent config
75
- let agentConfig = config.agents.find((a) => a.name === options.name);
76
- if (!agentConfig) {
77
- agentConfig = {
78
- name: options.name,
79
- command: options.command || config.defaults.command,
80
- workdir: options.workdir,
81
- model: options.model || config.defaults.model,
82
- };
83
- }
84
- // Override with provided options
85
- if (options.command)
86
- agentConfig.command = options.command;
87
- if (options.workdir)
88
- agentConfig.workdir = options.workdir;
89
- if (options.model)
90
- agentConfig.model = options.model;
91
- this.agentConfig = agentConfig;
92
- this.serveMode = options.serve === true;
93
- this.servePort = options.servePort || 3001;
94
- this.sandboxMode = options.sandbox === true;
95
- this.sandboxImage = options.sandboxImage || "agentmesh/agent-sandbox:latest";
96
- this.sandboxCpu = options.sandboxCpu || "1";
97
- this.sandboxMemory = options.sandboxMemory || "2g";
98
- // Build runner configuration with model resolution
99
- this.runnerConfig = buildRunnerConfig({
100
- cliModel: options.model,
101
- agentModel: agentConfig.model,
102
- defaultModel: config.defaults.model,
103
- command: agentConfig.command,
104
- });
73
+ this.shouldRestoreContext = boot.shouldRestoreContext;
74
+ this.isWorkerAgent = boot.isWorkerAgent;
75
+ this.autoSetup = boot.autoSetup;
76
+ this.agentConfig = boot.agentConfig;
77
+ this.serveMode = boot.serveMode;
78
+ this.servePort = boot.servePort;
79
+ this.sandboxMode = boot.sandboxMode;
80
+ this.sandboxImage = boot.sandboxImage;
81
+ this.sandboxCpu = boot.sandboxCpu;
82
+ this.sandboxMemory = boot.sandboxMemory;
83
+ this.runnerConfig = boot.runnerConfig;
105
84
  const runnerName = getRunnerDisplayName(this.runnerConfig.type);
106
85
  console.log(`Runner: ${runnerName}`);
107
86
  console.log(`Effective model: ${this.runnerConfig.model}`);
@@ -119,8 +98,13 @@ export class AgentDaemon {
119
98
  throw new Error(`Agent "${this.agentName}" is already running (PID: ${existingState.pid}). ` +
120
99
  `Use 'agentmesh stop ${this.agentName}' first.`);
121
100
  }
122
- // Process not running, clean up stale state
101
+ // Process not running clean up stale state and any orphaned child processes
123
102
  console.log(`Cleaning up stale state for PID ${existingState.pid}`);
103
+ const orphanPids = existingState.childPids ?? [];
104
+ if (orphanPids.length > 0) {
105
+ console.log(`[STARTUP] Found ${orphanPids.length} orphaned child PIDs from previous run — killing...`);
106
+ killProcessTree(orphanPids);
107
+ }
124
108
  }
125
109
  // Clean up orphan containers in sandbox mode
126
110
  if (this.sandboxMode) {
@@ -149,7 +133,6 @@ export class AgentDaemon {
149
133
  if (registration.status === "re-registered") {
150
134
  console.log(`Re-registered as: ${this.agentId}`);
151
135
  if (registration.context && Object.keys(registration.context).length > 0) {
152
- this.serverContext = registration.context;
153
136
  console.log(`Server context restored: ${Object.keys(registration.context).join(", ")}`);
154
137
  }
155
138
  }
@@ -166,39 +149,16 @@ export class AgentDaemon {
166
149
  await this.startServeMode();
167
150
  }
168
151
  else {
169
- // Check if session already exists
170
- const sessionName = getSessionName(this.agentName);
171
- const sessionAlreadyExists = sessionExists(sessionName);
172
- // Create tmux session if it doesn't exist
173
- if (!sessionAlreadyExists) {
174
- // Load saved context to check for OpenCode session ID (for native resume)
175
- let savedSessionId;
176
- if (this.shouldRestoreContext && this.agentId) {
177
- const savedContext = loadContext(this.agentId);
178
- savedSessionId = savedContext?.custom?.opencodeSessionId;
179
- if (savedSessionId) {
180
- console.log(`[CONTEXT] Found saved OpenCode session ID: ${savedSessionId}`);
181
- }
182
- }
183
- // Snapshot the latest session ID in logs BEFORE starting OpenCode.
184
- // This lets us detect whether OpenCode actually resumed vs created a new session.
185
- const preStartSessionId = snapshotSessionId(this.agentName);
186
- console.log(`Creating tmux session: ${sessionName}`);
187
- // Include runner env vars (e.g., OPENCODE_MODEL) at session creation
188
- const created = createSession(this.agentName, this.agentConfig.command, this.agentConfig.workdir, this.runnerConfig.env, // Apply model env at process start
189
- savedSessionId);
190
- if (!created) {
191
- throw new Error("Failed to create tmux session");
192
- }
193
- // Store pre-start snapshot for fallback detection later
194
- this._preStartSessionId = preStartSessionId;
195
- this._attemptedResumeSessionId = savedSessionId;
196
- }
197
- else {
198
- console.log(`Reconnecting to existing session: ${sessionName}`);
199
- // Update environment for existing session
200
- updateSessionEnvironment(this.agentName, this.runnerConfig.env);
201
- }
152
+ const sessionStart = startTmuxRuntimeSession({
153
+ agentName: this.agentName,
154
+ agentId: this.agentId,
155
+ command: this.agentConfig.command,
156
+ workdir: this.agentConfig.workdir,
157
+ runnerEnv: this.runnerConfig.env,
158
+ shouldRestoreContext: this.shouldRestoreContext,
159
+ });
160
+ this._preStartSessionId = sessionStart.preStartSessionId;
161
+ this._attemptedResumeSessionId = sessionStart.attemptedResumeSessionId;
202
162
  // Inject environment variables into tmux session
203
163
  console.log("Injecting environment variables...");
204
164
  updateSessionEnvironment(this.agentName, {
@@ -207,20 +167,27 @@ export class AgentDaemon {
207
167
  });
208
168
  }
209
169
  // Save state including runtime model info
210
- const sessionName = this.serveMode ? `serve:${this.servePort}` : getSessionName(this.agentName);
211
- addAgentToState({
212
- name: this.agentName,
170
+ persistRunningState({
171
+ agentName: this.agentName,
213
172
  agentId: this.agentId,
214
173
  pid: process.pid,
215
- tmuxSession: sessionName,
216
- startedAt: new Date().toISOString(),
217
174
  token: this.token,
218
175
  workdir: this.agentConfig.workdir,
219
176
  assignedProject: this.assignedProject,
220
177
  runtimeModel: this.runnerConfig.model,
221
178
  runnerType: this.runnerConfig.type,
222
179
  sandboxContainer: this.sandbox?.getContainerName(),
180
+ serveMode: this.serveMode,
181
+ servePort: this.servePort,
223
182
  });
183
+ // Track child PIDs for cleanup on restart/stop (tmux mode only — sandbox/serve manage their own)
184
+ if (!this.sandboxMode && !this.serveMode) {
185
+ const childPids = captureAgentChildPids(this.agentName);
186
+ if (childPids.length > 0) {
187
+ updateAgentInState(this.agentName, { childPids });
188
+ console.log(`[STARTUP] Tracking ${childPids.length} child PIDs: ${childPids.join(", ")}`);
189
+ }
190
+ }
224
191
  // Start heartbeat with auto-refresh
225
192
  console.log("Starting heartbeat...");
226
193
  this.heartbeat = new Heartbeat({
@@ -402,13 +369,10 @@ Nudge agent:
402
369
  if (!this.isRunning)
403
370
  return;
404
371
  // Reset restart count after stable operation
405
- if (this.lastStableTime && this.restartCount > 0) {
406
- const stableTime = Date.now() - this.lastStableTime.getTime();
407
- if (stableTime > RESTART_COUNT_RESET_MS) {
408
- console.log(`[HEALTH] Agent stable for 30+ minutes, resetting restart count`);
409
- this.restartCount = 0;
410
- resetAgentRestartCount(this.agentName);
411
- }
372
+ if (shouldResetRestartCount(this.restartCount, this.lastStableTime, RESTART_COUNT_RESET_MS)) {
373
+ console.log(`[HEALTH] Agent stable for 30+ minutes, resetting restart count`);
374
+ this.restartCount = 0;
375
+ resetAgentRestartCount(this.agentName);
412
376
  }
413
377
  // For sandbox mode, pass container name so health check looks inside container
414
378
  const containerName = this.sandboxMode ? this.sandbox?.getContainerName() : undefined;
@@ -420,7 +384,17 @@ Nudge agent:
420
384
  }
421
385
  // Session is alive - check progress watchdog
422
386
  const progress = checkAgentProgress(this.agentName, containerName);
423
- if (progress.status === "permission_blocked" || progress.status === "stuck") {
387
+ if (progress.status === "waiting_for_human") {
388
+ // Agent is intentionally waiting for human input - do not classify as stuck
389
+ if (this.stuckSince) {
390
+ // Clear any prior stuck tracking since the agent signalled a legitimate wait
391
+ this.stuckSince = null;
392
+ this.nudgeSentAt = null;
393
+ updateAgentInState(this.agentName, { stuckSince: undefined, status: "waiting" });
394
+ }
395
+ console.log(`[HEALTH] Agent is waiting for human input: ${progress.details}`);
396
+ }
397
+ else if (progress.status === "permission_blocked" || progress.status === "stuck") {
424
398
  await this.handleStuckAgent(progress);
425
399
  }
426
400
  else if (progress.status === "active") {
@@ -449,24 +423,18 @@ Nudge agent:
449
423
  catch {
450
424
  lastOutput = "Failed to capture session output";
451
425
  }
452
- const crashLog = `
453
- ================================================================================
454
- AGENT CRASH DETECTED
455
- ================================================================================
456
- Timestamp: ${timestamp}
457
- Agent: ${this.agentName}
458
- Agent ID: ${this.agentId}
459
- Reason: ${reason}
460
- Restart Count: ${this.restartCount}/${MAX_RESTART_ATTEMPTS}
461
- Sandbox: ${this.sandboxMode ? this.sandbox?.getContainerName() : "none"}
462
- Workdir: ${this.agentConfig.workdir}
463
- Model: ${this.runnerConfig.model}
464
-
465
- --- Last Session Output ---
466
- ${lastOutput}
467
- ================================================================================
468
-
469
- `;
426
+ const crashLog = formatCrashLog({
427
+ timestamp,
428
+ agentName: this.agentName,
429
+ agentId: this.agentId,
430
+ reason,
431
+ restartCount: this.restartCount,
432
+ maxRestartAttempts: MAX_RESTART_ATTEMPTS,
433
+ sandboxLabel: this.sandboxMode ? this.sandbox?.getContainerName() || "sandbox" : "none",
434
+ workdir: this.agentConfig.workdir,
435
+ model: this.runnerConfig.model,
436
+ lastOutput,
437
+ });
470
438
  fs.appendFileSync(logFile, crashLog);
471
439
  // Save context (including session ID) before restart attempt
472
440
  if (this.agentId) {
@@ -515,36 +483,36 @@ ${lastOutput}
515
483
  if (!this.stuckSince) {
516
484
  // First detection of stuck state
517
485
  this.stuckSince = now;
518
- console.log(`[HEALTH] Agent appears stuck: ${progress.details || progress.blockedOn || "no activity"}`);
486
+ console.log(`[HEALTH] Agent appears stuck: ${getStuckDetail(progress)}`);
519
487
  updateAgentInState(this.agentName, {
520
488
  stuckSince: now.toISOString(),
521
489
  status: "stuck",
522
490
  });
523
491
  }
524
- // If we haven't sent a nudge yet, send one
525
- if (!this.nudgeSentAt) {
526
- console.log(`[HEALTH] Sending nudge to unstick agent...`);
527
- const nudgeMessage = progress.status === "permission_blocked"
528
- ? "Please continue with your task. If you see a permission prompt, try an alternative approach that doesn't require that permission."
529
- : "Please continue with your current task.";
530
- const sent = sendNudge(this.agentName, nudgeMessage);
531
- if (sent) {
532
- this.nudgeSentAt = now;
533
- console.log(`[HEALTH] Nudge sent successfully`);
492
+ // Only nudge worker agents - others restart immediately
493
+ if (this.isWorkerAgent) {
494
+ // If we haven't sent a nudge yet, send one
495
+ if (!this.nudgeSentAt) {
496
+ console.log(`[HEALTH] Sending nudge to worker agent...`);
497
+ const nudgeMessage = getNudgeMessage(progress);
498
+ const sent = sendNudge(this.agentName, nudgeMessage);
499
+ if (sent) {
500
+ this.nudgeSentAt = now;
501
+ console.log(`[HEALTH] Nudge sent successfully`);
502
+ }
503
+ else {
504
+ console.log(`[HEALTH] Failed to send nudge`);
505
+ }
506
+ return;
534
507
  }
535
- else {
536
- console.log(`[HEALTH] Failed to send nudge`);
508
+ // Check if enough time has passed since nudge
509
+ if (isWithinNudgeWaitWindow(this.nudgeSentAt, NUDGE_WAIT_MS, now)) {
510
+ // Still waiting for agent to respond to nudge
511
+ return;
537
512
  }
538
- return;
539
513
  }
540
- // Check if enough time has passed since nudge
541
- const timeSinceNudge = now.getTime() - this.nudgeSentAt.getTime();
542
- if (timeSinceNudge < NUDGE_WAIT_MS) {
543
- // Still waiting for agent to respond to nudge
544
- return;
545
- }
546
- // Agent still stuck after nudge - trigger restart
547
- console.log(`[HEALTH] Agent still stuck after nudge, triggering restart...`);
514
+ // Agent still stuck - trigger restart (or restart immediately if not a worker)
515
+ console.log(`[HEALTH] Agent still stuck${this.isWorkerAgent ? " after nudge" : ""}, triggering restart...`);
548
516
  this.stuckSince = null;
549
517
  this.nudgeSentAt = null;
550
518
  await this.handleSessionDeath("stuck_after_nudge", path.join(os.homedir(), ".agentmesh", "logs"));
@@ -553,15 +521,19 @@ ${lastOutput}
553
521
  * Restarts the agent session (sandbox or non-sandbox)
554
522
  */
555
523
  async restartSession() {
556
- // Destroy existing session
557
- destroySession(this.agentName);
524
+ // Retrieve tracked child PIDs before destroying the session
525
+ const currentState = getAgentState(this.agentName);
526
+ const childPids = currentState?.childPids ?? [];
527
+ // Destroy existing session AND kill all tracked child processes
528
+ destroySession(this.agentName, childPids);
529
+ // Allow cleanup to settle before spawning a new session
530
+ await new Promise((resolve) => setTimeout(resolve, 1000));
558
531
  if (this.sandboxMode && this.sandbox) {
559
532
  // Restart sandbox container
560
533
  const newContainerId = await this.sandbox.restart();
561
534
  console.log(`[RESTART] New container: ${newContainerId.substring(0, 12)}`);
562
535
  // Recreate tmux session for sandbox
563
536
  const containerName = this.sandbox.getContainerName();
564
- const sessionName = getSessionName(this.agentName);
565
537
  // Build environment args for docker exec
566
538
  const envArgs = [];
567
539
  const allEnv = {
@@ -583,10 +555,15 @@ ${lastOutput}
583
555
  if (!created) {
584
556
  throw new Error("Failed to create tmux session for restarted sandbox");
585
557
  }
586
- // Update state with new container name
558
+ // Track new child PIDs and update state
559
+ const newChildPids = captureAgentChildPids(this.agentName);
587
560
  updateAgentInState(this.agentName, {
588
561
  sandboxContainer: containerName,
562
+ childPids: newChildPids,
589
563
  });
564
+ if (newChildPids.length > 0) {
565
+ console.log(`[RESTART] Tracking ${newChildPids.length} child PIDs: ${newChildPids.join(", ")}`);
566
+ }
590
567
  }
591
568
  else {
592
569
  // Non-sandbox restart — load saved session ID for native resume
@@ -610,6 +587,12 @@ ${lastOutput}
610
587
  AGENTMESH_AGENT_ID: this.agentId,
611
588
  ...this.runnerConfig.env,
612
589
  });
590
+ // Track new child PIDs
591
+ const newChildPids = captureAgentChildPids(this.agentName);
592
+ updateAgentInState(this.agentName, { childPids: newChildPids });
593
+ if (newChildPids.length > 0) {
594
+ console.log(`[RESTART] Tracking ${newChildPids.length} child PIDs: ${newChildPids.join(", ")}`);
595
+ }
613
596
  // Verify native resume and fallback if needed
614
597
  if (savedSessionId && savedContext) {
615
598
  const newSessionId = await waitForNewSessionId(this.agentName, preRestartSessionId, 15000);
@@ -704,27 +687,7 @@ ${lastOutput}
704
687
  async startServeMode() {
705
688
  console.log(`Starting opencode serve mode on port ${this.servePort}...`);
706
689
  const workdir = this.agentConfig.workdir || process.cwd();
707
- // Isolate OpenCode's SQLite database per agent to prevent WAL corruption.
708
- // See docs/RCA-OPENCODE-SQLITE-CORRUPTION.md for details.
709
- const agentDataDir = path.join(os.homedir(), ".agentmesh", "opencode-data", this.agentName);
710
- const agentOpencodeDir = path.join(agentDataDir, "opencode");
711
- if (!fs.existsSync(agentOpencodeDir)) {
712
- fs.mkdirSync(agentOpencodeDir, { recursive: true });
713
- }
714
- // Copy auth.json from default OpenCode data dir so agents inherit API keys.
715
- // Strips xAI provider to prevent OpenCode from defaulting to non-Anthropic models.
716
- const agentAuthPath = path.join(agentOpencodeDir, "auth.json");
717
- const sourceAuthPath = path.join(os.homedir(), ".local", "share", "opencode", "auth.json");
718
- if (!fs.existsSync(agentAuthPath) && fs.existsSync(sourceAuthPath)) {
719
- try {
720
- const auth = JSON.parse(fs.readFileSync(sourceAuthPath, "utf-8"));
721
- delete auth.xai;
722
- fs.writeFileSync(agentAuthPath, JSON.stringify(auth, null, 2));
723
- }
724
- catch {
725
- // Non-fatal — agent will just need manual auth
726
- }
727
- }
690
+ const agentDataDir = prepareOpenCodeRuntime(this.agentName);
728
691
  // Build environment for opencode serve
729
692
  const env = {
730
693
  ...process.env,
@@ -961,39 +924,22 @@ Logs: docker logs ${containerName}
961
924
  const suggestedPath = `~/.agentmesh/workspaces/${this.config.workspace}/${repoAssignment.project.code.toLowerCase()}/${this.agentName}`;
962
925
  // If --auto-setup is enabled, automatically clone the repo
963
926
  if (this.autoSetup) {
964
- this.agentConfig.workdir = this.setupWorkspace(expandedPath, repo.url, repo.default_branch, repoAssignment.project.name);
927
+ this.agentConfig.workdir = setupWorkspace({
928
+ workspacePath: expandedPath,
929
+ repoUrl: repo.url,
930
+ defaultBranch: repo.default_branch,
931
+ projectName: repoAssignment.project.name,
932
+ });
965
933
  return;
966
934
  }
967
- console.error(`
968
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
969
- ⚠️ WORKDIR REQUIRED
970
-
971
- You have a project assignment with a repository, but no workdir is configured.
972
-
973
- Project: ${repoAssignment.project.name}
974
- Repo: ${repo.full_name}
975
- Branch: ${repo.default_branch}
976
-
977
- Option 1: Set workdir in project settings (recommended)
978
- - Go to AgentMesh HQ → Projects → ${repoAssignment.project.name} → Settings
979
- - Set the workdir field to the local path
980
-
981
- Option 2: Set up workspace manually and pass --workdir:
982
-
983
- mkdir -p ${suggestedPath}
984
- git clone ${repo.url} ${suggestedPath}
985
- cd ${suggestedPath} && git checkout ${repo.default_branch}
986
-
987
- Then start the agent with:
988
-
989
- agentmesh start -n ${this.agentName} --workdir ${suggestedPath}
990
-
991
- Option 3: Use --auto-setup to automatically clone the repository:
992
-
993
- agentmesh start -n ${this.agentName} --auto-setup
994
-
995
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
996
- `);
935
+ console.error(renderMissingWorkdirMessage({
936
+ projectName: repoAssignment.project.name,
937
+ repoFullName: repo.full_name,
938
+ repoUrl: repo.url,
939
+ defaultBranch: repo.default_branch,
940
+ suggestedPath,
941
+ agentName: this.agentName,
942
+ }));
997
943
  // No session to clean up - we haven't created it yet
998
944
  process.exit(1);
999
945
  }
@@ -1004,78 +950,16 @@ Option 3: Use --auto-setup to automatically clone the repository:
1004
950
  console.log("Could not fetch assignments:", error.message);
1005
951
  }
1006
952
  }
1007
- /**
1008
- * Sets up workspace by cloning repository or using existing clone
1009
- * Returns the absolute path to the workspace
1010
- */
1011
- setupWorkspace(workspacePath, repoUrl, defaultBranch, projectName) {
1012
- console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
1013
- console.log(`🔧 AUTO-SETUP: Setting up workspace for ${projectName}`);
1014
- console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`);
1015
- // Check if directory already exists and is a git repo
1016
- const gitDir = path.join(workspacePath, ".git");
1017
- if (fs.existsSync(gitDir)) {
1018
- console.log(`✓ Workspace already exists: ${workspacePath}`);
1019
- console.log(` Updating from remote...`);
1020
- try {
1021
- // Fetch and checkout the branch
1022
- execSync(`git fetch origin`, { cwd: workspacePath, stdio: "inherit" });
1023
- execSync(`git checkout ${defaultBranch}`, { cwd: workspacePath, stdio: "inherit" });
1024
- execSync(`git pull origin ${defaultBranch}`, { cwd: workspacePath, stdio: "inherit" });
1025
- console.log(`✓ Workspace updated successfully\n`);
1026
- }
1027
- catch (error) {
1028
- console.warn(`⚠ Could not update workspace: ${error.message}`);
1029
- console.log(` Continuing with existing state...\n`);
1030
- }
1031
- return workspacePath;
1032
- }
1033
- // Create parent directories
1034
- const parentDir = path.dirname(workspacePath);
1035
- if (!fs.existsSync(parentDir)) {
1036
- console.log(`Creating directory: ${parentDir}`);
1037
- fs.mkdirSync(parentDir, { recursive: true });
1038
- }
1039
- // Clone the repository
1040
- console.log(`Cloning repository...`);
1041
- console.log(` URL: ${repoUrl}`);
1042
- console.log(` Path: ${workspacePath}`);
1043
- console.log(` Branch: ${defaultBranch}\n`);
1044
- try {
1045
- execSync(`git clone --branch ${defaultBranch} "${repoUrl}" "${workspacePath}"`, {
1046
- stdio: "inherit",
1047
- });
1048
- console.log(`\n✓ Repository cloned successfully`);
1049
- }
1050
- catch (error) {
1051
- console.error(`\n✗ Failed to clone repository: ${error.message}`);
1052
- console.error(`\nMake sure you have access to the repository and SSH keys are configured.`);
1053
- // No session to clean up - we haven't created it yet
1054
- process.exit(1);
1055
- }
1056
- console.log(`✓ Workspace ready: ${workspacePath}\n`);
1057
- return workspacePath;
1058
- }
1059
953
  /**
1060
954
  * Ensures the sandbox OpenCode config exists
1061
955
  * Creates ~/.agentmesh/opencode-sandbox.json with permissive permissions and model
1062
956
  */
1063
957
  ensureSandboxOpencodeConfig() {
1064
- const configDir = path.dirname(SANDBOX_OPENCODE_CONFIG_PATH);
1065
- if (!fs.existsSync(configDir)) {
1066
- fs.mkdirSync(configDir, { recursive: true });
1067
- }
1068
- // Build config with model if available
1069
- const config = {
1070
- ...SANDBOX_OPENCODE_CONFIG,
1071
- };
1072
- // Include model from runner config
1073
- const model = this.runnerConfig.env?.OPENCODE_MODEL;
1074
- if (model) {
1075
- config.model = model;
1076
- }
1077
- // Always write to ensure model is up to date
1078
- fs.writeFileSync(SANDBOX_OPENCODE_CONFIG_PATH, JSON.stringify(config, null, 2));
958
+ writeSandboxOpencodeConfig({
959
+ configPath: SANDBOX_OPENCODE_CONFIG_PATH,
960
+ baseConfig: SANDBOX_OPENCODE_CONFIG,
961
+ model: this.runnerConfig.env?.OPENCODE_MODEL,
962
+ });
1079
963
  console.log(`Updated sandbox OpenCode config: ${SANDBOX_OPENCODE_CONFIG_PATH}`);
1080
964
  }
1081
965
  }