npm - @agentmeshhq/agent - Versions diffs - 0.4.5 → 0.4.10 - Mend

@agentmeshhq/agent 0.4.5 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/dist/__tests__/auth-doctor-integration.test.d.ts +14 -0
package/dist/__tests__/auth-doctor-integration.test.js +130 -0
package/dist/__tests__/auth-doctor-integration.test.js.map +1 -0
package/dist/__tests__/auth-guard.integration.test.d.ts +12 -0
package/dist/__tests__/auth-guard.integration.test.js +132 -0
package/dist/__tests__/auth-guard.integration.test.js.map +1 -0
package/dist/__tests__/auth-guard.test.d.ts +17 -0
package/dist/__tests__/auth-guard.test.js +483 -0
package/dist/__tests__/auth-guard.test.js.map +1 -0
package/dist/__tests__/done-state-guard.integration.test.d.ts +1 -0
package/dist/__tests__/done-state-guard.integration.test.js +281 -0
package/dist/__tests__/done-state-guard.integration.test.js.map +1 -0
package/dist/__tests__/done-state-guard.test.d.ts +1 -0
package/dist/__tests__/done-state-guard.test.js +327 -0
package/dist/__tests__/done-state-guard.test.js.map +1 -0
package/dist/__tests__/session-recovery.test.d.ts +1 -0
package/dist/__tests__/session-recovery.test.js +16 -0
package/dist/__tests__/session-recovery.test.js.map +1 -0
package/dist/__tests__/tmux-runtime.test.d.ts +1 -0
package/dist/__tests__/tmux-runtime.test.js +113 -0
package/dist/__tests__/tmux-runtime.test.js.map +1 -0
package/dist/cli/auth.d.ts +11 -0
package/dist/cli/auth.js +92 -0
package/dist/cli/auth.js.map +1 -0
package/dist/cli/index.js +45 -1
package/dist/cli/index.js.map +1 -1
package/dist/cli/local.d.ts +4 -2
package/dist/cli/local.js +257 -108
package/dist/cli/local.js.map +1 -1
package/dist/cli/migrate.d.ts +1 -0
package/dist/cli/migrate.js +14 -10
package/dist/cli/migrate.js.map +1 -1
package/dist/cli/start.d.ts +2 -0
package/dist/cli/start.js +3 -0
package/dist/cli/start.js.map +1 -1
package/dist/cli/test.d.ts +1 -0
package/dist/cli/test.js +15 -9
package/dist/cli/test.js.map +1 -1
package/dist/config/schema.d.ts +11 -0
package/dist/config/schema.js.map +1 -1
package/dist/core/auth-guard.d.ts +155 -0
package/dist/core/auth-guard.js +498 -0
package/dist/core/auth-guard.js.map +1 -0
package/dist/core/auth-sync.d.ts +105 -0
package/dist/core/auth-sync.js +263 -0
package/dist/core/auth-sync.js.map +1 -0
package/dist/core/daemon/context-template.js +65 -0
package/dist/core/daemon/context-template.js.map +1 -1
package/dist/core/daemon/done-state-guard.d.ts +63 -0
package/dist/core/daemon/done-state-guard.js +102 -0
package/dist/core/daemon/done-state-guard.js.map +1 -0
package/dist/core/daemon/session-recovery.d.ts +1 -0
package/dist/core/daemon/session-recovery.js +7 -0
package/dist/core/daemon/session-recovery.js.map +1 -0
package/dist/core/daemon/tmux-session.d.ts +1 -0
package/dist/core/daemon/tmux-session.js +1 -1
package/dist/core/daemon/tmux-session.js.map +1 -1
package/dist/core/daemon.d.ts +18 -1
package/dist/core/daemon.js +220 -35
package/dist/core/daemon.js.map +1 -1
package/dist/core/registry.d.ts +9 -1
package/dist/core/registry.js +28 -1
package/dist/core/registry.js.map +1 -1
package/dist/core/tmux-runtime.d.ts +11 -2
package/dist/core/tmux-runtime.js +45 -19
package/dist/core/tmux-runtime.js.map +1 -1
package/dist/core/tmux.d.ts +1 -1
package/dist/core/tmux.js +7 -3
package/dist/core/tmux.js.map +1 -1
package/package.json +12 -11
package/LICENSE +0 -21

package/dist/core/daemon.d.ts CHANGED Viewed

@@ -28,6 +28,8 @@ export interface DaemonOptions {
     role?: string;
     /** Auto-accept pending handoffs in worker mode (default: enabled for --worker) */
     autoAcceptHandoffs?: boolean;
+    /** Run agent in fully autonomous mode — injects runtime-specific non-interactive flags */
+    autonomous?: boolean;
 }
 export declare class AgentDaemon {
     private agentName;
@@ -55,16 +57,20 @@ export declare class AgentDaemon {
     private projectCode;
     private projectRole;
     private autoAcceptHandoffs;
+    private autonomous;
     private healthCheckInterval;
     private stopCleanupScheduler;
+    private authHealthWatcher;
     private _preStartSessionId;
     private _attemptedResumeSessionId;
     private stuckSince;
-    private nudgeSentAt;
     private lastPendingHandoffAlertAt;
     private remoteAutomationPaused;
     private lastAutonomyPolicyFetchAt;
     private pendingClaimCreations;
+    private sessionRecoveryAttempts;
+    private lastSessionRecoveryAt;
+    private initialInboxCheckComplete;
     constructor(options: DaemonOptions);
     start(): Promise<void>;
     /**
@@ -74,6 +80,7 @@ export declare class AgentDaemon {
     private autoAcceptPendingHandoffs;
     private autoAcceptHandoffFromEvent;
     private isAutomationPaused;
+    private sweepInboxOnWebSocketConnect;
     private refreshRemoteAutonomyPolicy;
     private acceptHandoffWithRetry;
     private checkPendingHandoffSla;
@@ -88,6 +95,7 @@ export declare class AgentDaemon {
      * Handles session death - logs crash and attempts auto-restart
      */
     private handleSessionDeath;
+    private tryRecoverSession;
     /**
      * Handles stuck agent - sends nudge first, then restarts if still stuck
      */
@@ -115,6 +123,15 @@ export declare class AgentDaemon {
      * Resolves workdir from --project flag: looks up project by code, clones repo, self-assigns.
      */
     private resolveProjectWorkdir;
+    /**
+     * Evaluates whether this restart should resume in-flight work or come up idle.
+     *
+     * Pulls claims, inbox, and recent handoff history from HQ, then delegates to
+     * the pure `evaluateRestartState` function for the actual decision.
+     *
+     * Failures are non-fatal — defaults to `idle` so we fail safe.
+     */
+    private evaluateDoneStateGuard;
     /**
      * Fetches assignments from HQ and validates workdir setup
      * Uses project.workdir from HQ as source of truth, falls back to helpful instructions

package/dist/core/daemon.js CHANGED Viewed

@@ -4,21 +4,24 @@ import os from "node:os";
 import path from "node:path";
 import { getAgentState, loadState, updateAgentInState } from "../config/loader.js";
 import { loadContext, loadOrCreateContext, saveContext } from "../context/index.js";
+import { preflightAgentAuth, startAuthHealthWatcher, } from "./auth-guard.js";
 import { startCleanupScheduler } from "./cleanup/scheduler.js";
 import { renderMissingWorkdirMessage } from "./daemon/assignment-message.js";
 import { bootstrapDaemon } from "./daemon/bootstrap.js";
 import { removeClaudeMd, writeClaudeMd } from "./daemon/context-template.js";
 import { formatCrashLog } from "./daemon/crash-log.js";
+import { evaluateRestartState, filterActiveClaimsForAgent, filterCompletedHandoffsForAgent, formatRestartLifecycleLog, } from "./daemon/done-state-guard.js";
 import { cleanupGitAuth, setupGitAuth } from "./daemon/git-auth.js";
-import { getNudgeMessage, getStuckDetail, isWithinNudgeWaitWindow, } from "./daemon/health-policy.js";
+import { getStuckDetail } from "./daemon/health-policy.js";
 import { writeSandboxOpencodeConfig } from "./daemon/sandbox-config.js";
+import { isRecoverableSessionFailure } from "./daemon/session-recovery.js";
 import { captureAgentChildPids, persistRunningState } from "./daemon/state.js";
 import { startTmuxRuntimeSession } from "./daemon/tmux-session.js";
 import { configureGitIdentity, setupWorkspace, updateWorkspaceFromRemote, validatePushAccess, } from "./daemon/workspace.js";
 import { findPendingHandoffBreaches } from "./handoff-sla.js";
 import { Heartbeat } from "./heartbeat.js";
-import { handleWebSocketEvent, injectOnboardMessage, injectRestoredContext, injectStartupMessage, } from "./injector.js";
-import { checkInbox, createClaim, createSelfAssignment, fetchAssignments, fetchOnboard, fetchProjectByCode, getAgentAutonomyState, getHandoff, listClaims, registerAgent, releaseClaim, updateHandoffStatusWithRetry, } from "./registry.js";
+import { handleWebSocketEvent, injectInboxItems, injectOnboardMessage, injectRestoredContext, injectStartupMessage, } from "./injector.js";
+import { checkInbox, createClaim, createSelfAssignment, fetchAssignments, fetchHandoffsForAgent, fetchOnboard, fetchProjectByCode, getAgentAutonomyState, getHandoff, listClaims, registerAgent, releaseClaim, updateHandoffStatusWithRetry, } from "./registry.js";
 import { getRunnerDisplayName } from "./runner.js";
 import { DockerSandbox } from "./sandbox.js";
 import { getLatestSessionId, waitForNewSessionId } from "./session-id.js";
@@ -26,10 +29,11 @@ import { captureSessionContext, captureSessionOutput, destroySession, isSessionH
 import { prepareOpenCodeRuntime } from "./tmux-runtime.js";
 import { checkAgentProgress, cleanupOrphanContainers, isProcessRunning, sendNudge, } from "./watchdog.js";
 import { AgentWebSocket } from "./websocket.js";
-// Time to wait after nudging before marking as stuck (2 minutes)
-const NUDGE_WAIT_MS = 2 * 60 * 1000;
-const PENDING_HANDOFF_SLA_MINUTES = 5;
-const PENDING_HANDOFF_ALERT_COOLDOWN_MS = 5 * 60 * 1000;
+// SLA breach alert thresholds — configurable via env vars
+// AGENTMESH_HANDOFF_SLA_MINUTES: minutes before a pending handoff is considered a breach (default 5)
+// AGENTMESH_HANDOFF_SLA_COOLDOWN_MS: ms between repeated SLA alerts for the same breach (default 5 min)
+const PENDING_HANDOFF_SLA_MINUTES = Number(process.env.AGENTMESH_HANDOFF_SLA_MINUTES ?? 5);
+const PENDING_HANDOFF_ALERT_COOLDOWN_MS = Number(process.env.AGENTMESH_HANDOFF_SLA_COOLDOWN_MS ?? 5 * 60 * 1000);
 const AUTO_CLAIM_SCOPE_PREFIX = "handoff:";
 const AUTO_CLAIM_TTL_SECONDS = 1800;
 // Path to the sandbox OpenCode config (permissive permissions)
@@ -65,18 +69,22 @@ export class AgentDaemon {
     projectCode;
     projectRole;
     autoAcceptHandoffs;
+    autonomous;
     healthCheckInterval = null;
     stopCleanupScheduler = null;
+    authHealthWatcher = null;
     // Session resume tracking
     _preStartSessionId;
     _attemptedResumeSessionId;
     // Stuck detection tracking
     stuckSince = null;
-    nudgeSentAt = null;
     lastPendingHandoffAlertAt = null;
     remoteAutomationPaused = false;
     lastAutonomyPolicyFetchAt = null;
     pendingClaimCreations = new Set();
+    sessionRecoveryAttempts = 0;
+    lastSessionRecoveryAt = null;
+    initialInboxCheckComplete = false;
     constructor(options) {
         const boot = bootstrapDaemon(options);
         this.config = boot.config;
@@ -94,6 +102,7 @@ export class AgentDaemon {
         this.projectCode = boot.projectCode;
         this.projectRole = boot.projectRole;
         this.autoAcceptHandoffs = boot.autoAcceptHandoffs;
+        this.autonomous = options.autonomous ?? false;
         this.runnerConfig = boot.runnerConfig;
         const runnerName = getRunnerDisplayName(this.runnerConfig.type);
         console.log(`Runner: ${runnerName}`);
@@ -130,6 +139,13 @@ export class AgentDaemon {
         // Register with hub first (needed for assignment check)
         console.log("Registering with AgentMesh hub...");
         console.log(`Existing state: ${existingState ? `agentId=${existingState.agentId}` : "none"}`);
+        // Derive agent_type from runtime flags when not explicitly set in config.
+        // - explicit agentConfig.agentType always wins
+        // - --worker → "worker" (requires team_id on hub)
+        // - --autonomous (no --worker) → "autonomous" (standalone, visible, no team needed)
+        // - neither → "system" (hidden background agent)
+        const effectiveAgentType = this.agentConfig.agentType ??
+            (this.isWorkerAgent ? "worker" : this.autonomous ? "autonomous" : "system");
         const registration = await registerAgent({
             url: this.config.hubUrl,
             apiKey: this.config.apiKey,
@@ -138,6 +154,7 @@ export class AgentDaemon {
             agentName: this.agentName,
             model: this.agentConfig.model || this.config.defaults.model,
             restoreContext: this.shouldRestoreContext,
+            agentType: effectiveAgentType,
         });
         this.agentId = registration.agentId;
         this.token = registration.token;
@@ -214,6 +231,17 @@ export class AgentDaemon {
                     `Use --serve-port to specify a different port.`);
             }
         }
+        // Preflight: ensure per-agent auth symlink is valid before launching runner (Epic #470)
+        if (this.runnerConfig.type === "opencode") {
+            const { ok, result } = preflightAgentAuth(this.agentName);
+            if (!ok) {
+                console.warn(`[AUTH] Startup preflight failed for ${this.agentName}: ${result.message}`);
+                console.warn("[AUTH] Agent may fail provider calls. Run: agentmesh auth doctor --repair");
+            }
+            else if (result.status === "repaired") {
+                console.log(`[AUTH] Auth repaired at startup: ${result.message}`);
+            }
+        }
         // Choose runtime mode: sandbox > serve > tmux
         if (this.sandboxMode) {
             await this.startSandboxMode();
@@ -229,6 +257,7 @@ export class AgentDaemon {
                 workdir: this.agentConfig.workdir,
                 runnerEnv: this.runnerConfig.env,
                 shouldRestoreContext: this.shouldRestoreContext,
+                autonomous: this.autonomous,
             });
             this._preStartSessionId = sessionStart.preStartSessionId;
             this._attemptedResumeSessionId = sessionStart.attemptedResumeSessionId;
@@ -304,6 +333,7 @@ export class AgentDaemon {
                         },
                         onConnect: () => {
                             console.log("WebSocket reconnected with new token");
+                            void this.sweepInboxOnWebSocketConnect();
                         },
                         onDisconnect: () => {
                             console.log("WebSocket disconnected");
@@ -333,6 +363,7 @@ export class AgentDaemon {
             },
             onConnect: () => {
                 console.log("WebSocket connected");
+                void this.sweepInboxOnWebSocketConnect();
             },
             onDisconnect: () => {
                 console.log("WebSocket disconnected");
@@ -345,17 +376,40 @@ export class AgentDaemon {
         // Wait for TUI to initialize before injecting messages
         await new Promise((resolve) => setTimeout(resolve, 3000));
         await this.refreshRemoteAutonomyPolicy(true);
+        // -----------------------------------------------------------------------
+        // Done-state guard (Epic #497): determine restart state before injecting
+        // any work. If prior cycle is done, come up idle and skip auto-accept.
+        // -----------------------------------------------------------------------
+        const restartDecision = await this.evaluateDoneStateGuard();
+        console.log(formatRestartLifecycleLog(restartDecision));
+        updateAgentInState(this.agentName, {
+            lastRestartState: restartDecision.state,
+            lastRestartReason: restartDecision.reason,
+            lastRestartDecisionAt: new Date().toISOString(),
+        });
         // Check inbox and auto-nudge with full handoff details
         console.log("Checking inbox...");
         try {
             const inboxItems = await checkInbox(this.config.hubUrl, this.config.workspace, this.token);
-            const remainingItems = await this.autoAcceptPendingHandoffs(inboxItems);
-            injectStartupMessage(this.agentName, remainingItems.length, remainingItems);
+            // If the done-state guard says prior work is done, do NOT auto-accept inbox
+            // items from the stale cycle — come up idle and wait for a fresh handoff.
+            if (restartDecision.state === "idle" || restartDecision.state === "blocked") {
+                console.log(`[RESTART] Skipping auto-accept: agent is ${restartDecision.state}. ` +
+                    "Any inbox items will be surfaced but not auto-claimed.");
+                injectStartupMessage(this.agentName, inboxItems.length, inboxItems);
+            }
+            else {
+                const remainingItems = await this.autoAcceptPendingHandoffs(inboxItems);
+                injectStartupMessage(this.agentName, remainingItems.length, remainingItems);
+            }
         }
         catch (error) {
             console.error("Failed to check inbox:", error);
             injectStartupMessage(this.agentName, 0);
         }
+        finally {
+            this.initialInboxCheckComplete = true;
+        }
         // Inject onboard project context
         if (this.onboardData?.project) {
             await new Promise((resolve) => setTimeout(resolve, 1000));
@@ -548,6 +602,21 @@ Nudge agent:
         const state = getAgentState(this.agentName);
         return state?.automationPaused === true || this.remoteAutomationPaused;
     }
+    async sweepInboxOnWebSocketConnect() {
+        if (!this.token || !this.initialInboxCheckComplete) {
+            return;
+        }
+        try {
+            const inboxItems = await checkInbox(this.config.hubUrl, this.config.workspace, this.token);
+            const remainingItems = await this.autoAcceptPendingHandoffs(inboxItems);
+            if (remainingItems.length > 0) {
+                injectInboxItems(this.agentName, remainingItems);
+            }
+        }
+        catch (error) {
+            console.warn(`[WS] Failed inbox sweep on connect: ${error.message}`);
+        }
+    }
     async refreshRemoteAutonomyPolicy(force = false) {
         if (!this.token || !this.agentId) {
             return;
@@ -714,6 +783,19 @@ Nudge agent:
         // Skip health monitoring for serve mode (no tmux session)
         if (this.serveMode)
             return;
+        // Start periodic auth healthcheck for opencode runners (Epic #470)
+        if (this.runnerConfig.type === "opencode") {
+            this.authHealthWatcher = startAuthHealthWatcher(this.agentName, (event) => {
+                if (event.type === "auth-health-degraded") {
+                    console.warn(`[AUTH] ${event.message}`);
+                    console.warn("[AUTH] Run: agentmesh auth doctor --repair");
+                }
+                else if (event.type === "auth-health-repaired") {
+                    console.log(`[AUTH] ${event.message}`);
+                }
+                // auth-health-ok is silent to avoid log noise
+            });
+        }
         const logDir = path.join(os.homedir(), ".agentmesh", "logs");
         if (!fs.existsSync(logDir)) {
             fs.mkdirSync(logDir, { recursive: true });
@@ -730,6 +812,11 @@ Nudge agent:
                 await this.handleSessionDeath(health.reason || "unknown", logDir);
                 return;
             }
+            // Healthy again - clear recovery counters
+            if (this.sessionRecoveryAttempts > 0) {
+                this.sessionRecoveryAttempts = 0;
+                this.lastSessionRecoveryAt = null;
+            }
             // Session is alive - check progress watchdog
             const progress = checkAgentProgress(this.agentName, containerName);
             if (progress.status === "waiting_for_human") {
@@ -737,7 +824,6 @@ Nudge agent:
                 if (this.stuckSince) {
                     // Clear any prior stuck tracking since the agent signalled a legitimate wait
                     this.stuckSince = null;
-                    this.nudgeSentAt = null;
                     updateAgentInState(this.agentName, { stuckSince: undefined, status: "waiting" });
                 }
                 console.log(`[HEALTH] Agent is waiting for human input: ${progress.details}`);
@@ -750,7 +836,6 @@ Nudge agent:
                 if (this.stuckSince) {
                     console.log(`[HEALTH] Agent resumed activity`);
                     this.stuckSince = null;
-                    this.nudgeSentAt = null;
                     updateAgentInState(this.agentName, { stuckSince: undefined, status: "running" });
                 }
             }
@@ -781,6 +866,15 @@ Nudge agent:
             lastOutput,
         });
         fs.appendFileSync(logFile, crashLog);
+        // Recoverable local tmux failures should self-heal in worker mode.
+        const recovered = await this.tryRecoverSession(reason);
+        if (recovered) {
+            console.warn(`[RECOVERY] Session recovered after "${reason}"`);
+            updateAgentInState(this.agentName, {
+                status: "running",
+            });
+            return;
+        }
         // Save context before marking as failed
         if (this.agentId) {
             this.saveAgentContext();
@@ -799,6 +893,71 @@ Nudge agent:
             this.healthCheckInterval = null;
         }
     }
+    async tryRecoverSession(reason) {
+        if (!this.isWorkerAgent || this.serveMode || this.sandboxMode) {
+            return false;
+        }
+        if (!isRecoverableSessionFailure(reason)) {
+            return false;
+        }
+        const now = Date.now();
+        if (this.lastSessionRecoveryAt &&
+            now - this.lastSessionRecoveryAt.getTime() < 15_000 &&
+            this.sessionRecoveryAttempts >= 2) {
+            return false;
+        }
+        this.sessionRecoveryAttempts += 1;
+        this.lastSessionRecoveryAt = new Date(now);
+        try {
+            console.warn(`[RECOVERY] Attempt ${this.sessionRecoveryAttempts}: recreating session for ${this.agentName}`);
+            const sessionStart = startTmuxRuntimeSession({
+                agentName: this.agentName,
+                agentId: this.agentId,
+                command: this.agentConfig.command,
+                workdir: this.agentConfig.workdir,
+                runnerEnv: this.runnerConfig.env,
+                shouldRestoreContext: false,
+                autonomous: this.autonomous,
+            });
+            this._preStartSessionId = sessionStart.preStartSessionId;
+            this._attemptedResumeSessionId = sessionStart.attemptedResumeSessionId;
+            if (this.token && this.agentId) {
+                updateSessionEnvironment(this.agentName, {
+                    AGENT_TOKEN: this.token,
+                    AGENTMESH_AGENT_ID: this.agentId,
+                });
+            }
+            await new Promise((resolve) => setTimeout(resolve, 1500));
+            const health = isSessionHealthy(this.agentName);
+            if (!health.healthy) {
+                return false;
+            }
+            if (this.token) {
+                // Re-evaluate done-state guard on session recovery (Epic #497)
+                const recoveryDecision = await this.evaluateDoneStateGuard();
+                console.log(`[RECOVERY] ${formatRestartLifecycleLog(recoveryDecision)}`);
+                updateAgentInState(this.agentName, {
+                    lastRestartState: recoveryDecision.state,
+                    lastRestartReason: recoveryDecision.reason,
+                    lastRestartDecisionAt: new Date().toISOString(),
+                });
+                const inboxItems = await checkInbox(this.config.hubUrl, this.config.workspace, this.token);
+                if (recoveryDecision.state === "idle" || recoveryDecision.state === "blocked") {
+                    console.log(`[RECOVERY] Prior work done — coming up ${recoveryDecision.state}, not auto-resuming.`);
+                    injectStartupMessage(this.agentName, inboxItems.length, inboxItems);
+                }
+                else {
+                    const remainingItems = await this.autoAcceptPendingHandoffs(inboxItems);
+                    injectStartupMessage(this.agentName, remainingItems.length, remainingItems);
+                }
+            }
+            return true;
+        }
+        catch (error) {
+            console.warn(`[RECOVERY] Session recovery failed: ${error.message}`);
+            return false;
+        }
+    }
     /**
      * Handles stuck agent - sends nudge first, then restarts if still stuck
      */
@@ -813,34 +972,15 @@ Nudge agent:
                 status: "stuck",
             });
         }
-        // Nudge worker agents — don't escalate to restart
+        // Worker agents: log the stuck state but do not auto-nudge.
+        // Auto-nudging interrupts agents mid-task and causes more harm than good.
+        // Operators can nudge manually via CLI or the hub API if needed.
         if (this.isWorkerAgent) {
-            // If we haven't sent a nudge yet, send one
-            if (!this.nudgeSentAt) {
-                console.log(`[HEALTH] Sending nudge to worker agent...`);
-                const nudgeMessage = getNudgeMessage(progress);
-                const sent = sendNudge(this.agentName, nudgeMessage);
-                if (sent) {
-                    this.nudgeSentAt = now;
-                    console.log(`[HEALTH] Nudge sent successfully`);
-                }
-                else {
-                    console.log(`[HEALTH] Failed to send nudge`);
-                }
-                return;
-            }
-            // Check if enough time has passed since nudge
-            if (isWithinNudgeWaitWindow(this.nudgeSentAt, NUDGE_WAIT_MS, now)) {
-                // Still waiting for agent to respond to nudge
-                return;
-            }
-            // Nudge grace period expired — log warning but do NOT restart
-            console.log(`[HEALTH] Agent still stuck after nudge. Manual intervention required.`);
+            console.log(`[HEALTH] Worker agent stuck — manual intervention required if needed.`);
             updateAgentInState(this.agentName, {
                 status: "waiting",
             });
             void this.releaseAllAutoClaims("worker waiting for human intervention");
-            sendNudge(this.agentName, "[AgentMesh] Worker still blocked after nudge. Please request human intervention or resume once approvals are available.");
         }
     }
     async stop() {
@@ -856,6 +996,11 @@ Nudge agent:
             this.stopCleanupScheduler();
             this.stopCleanupScheduler = null;
         }
+        // Stop auth health watcher
+        if (this.authHealthWatcher) {
+            this.authHealthWatcher.stop();
+            this.authHealthWatcher = null;
+        }
         // Save context before stopping
         if (this.agentId) {
             console.log("Saving agent context...");
@@ -1187,6 +1332,46 @@ Logs:    docker logs ${containerName}
             console.warn(`Could not auto-assign to project: ${error.message}`);
         }
     }
+    // ---------------------------------------------------------------------------
+    // Done-state guard (Epic #497)
+    // ---------------------------------------------------------------------------
+    /**
+     * Evaluates whether this restart should resume in-flight work or come up idle.
+     *
+     * Pulls claims, inbox, and recent handoff history from HQ, then delegates to
+     * the pure `evaluateRestartState` function for the actual decision.
+     *
+     * Failures are non-fatal — defaults to `idle` so we fail safe.
+     */
+    async evaluateDoneStateGuard() {
+        const safeIdle = (reason) => ({
+            state: "idle",
+            reason,
+        });
+        if (!this.token || !this.agentId) {
+            return safeIdle("no token or agentId — cannot evaluate done-state");
+        }
+        try {
+            const [claimsRaw, inboxRaw, handoffsRaw] = await Promise.all([
+                listClaims(this.config.hubUrl, this.config.workspace, this.token).catch(() => []),
+                checkInbox(this.config.hubUrl, this.config.workspace, this.token).catch(() => []),
+                fetchHandoffsForAgent(this.config.hubUrl, this.config.workspace, this.token, this.agentId).catch(() => []),
+            ]);
+            const activeClaims = filterActiveClaimsForAgent(claimsRaw, this.agentId);
+            const completedHandoffs = filterCompletedHandoffsForAgent(handoffsRaw, this.agentId);
+            return evaluateRestartState({
+                activeClaims,
+                inboxItems: inboxRaw,
+                completedHandoffs,
+                automationPaused: this.isAutomationPaused(),
+            });
+        }
+        catch (error) {
+            // Fail safe to idle — do not speculatively resume on error
+            console.warn(`[RESTART] Done-state guard error (defaulting to idle): ${error.message}`);
+            return safeIdle(`guard evaluation failed: ${error.message}`);
+        }
+    }
     /**
      * Fetches assignments from HQ and validates workdir setup
      * Uses project.workdir from HQ as source of truth, falls back to helpful instructions