npm - neohive - Versions diffs - 6.0.2 → 6.0.3 - Mend

neohive 6.0.2 → 6.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +7 -0
package/cli.js +113 -6
package/conversation-templates/autonomous-feature.json +54 -4
package/conversation-templates/code-review.json +41 -3
package/conversation-templates/debug-squad.json +41 -3
package/conversation-templates/feature-build.json +41 -3
package/conversation-templates/research-write.json +41 -3
package/dashboard.html +2000 -690
package/dashboard.js +717 -65
package/lib/compact.js +5 -2
package/lib/config.js +4 -3
package/lib/file-io.js +3 -3
package/lib/resolve-server-data-dir.js +96 -0
package/package.json +2 -2
package/server.js +871 -147
package/templates/debate.json +24 -5
package/templates/managed.json +48 -9
package/templates/pair.json +22 -3
package/templates/review.json +26 -5
package/templates/team.json +38 -8

package/server.js CHANGED Viewed

@@ -19,9 +19,12 @@ const _agents = require('./lib/agents');
 const _messaging = require('./lib/messaging');
 const _compact = require('./lib/compact');
-// --- Structured logging ---
-const LOG_LEVEL = (process.env.NEOHIVE_LOG_LEVEL || 'warn').toLowerCase();
+const DATA_DIR = _config.DATA_DIR;
+const _envLog = process.env.NEOHIVE_LOG_LEVEL;
+const LOG_LEVEL = (_envLog != null && String(_envLog).trim() !== '' ? String(_envLog).trim() : 'warn').toLowerCase();
 const LOG_LEVELS = { error: 0, warn: 1, info: 2, debug: 3 };
 const log = {
   error: (...args) => { if (LOG_LEVELS[LOG_LEVEL] >= 0) process.stderr.write('[NEOHIVE:ERROR] ' + args.map(String).join(' ') + '\n'); },
   warn:  (...args) => { if (LOG_LEVELS[LOG_LEVEL] >= 1) process.stderr.write('[NEOHIVE:WARN] ' + args.map(String).join(' ') + '\n'); },
@@ -29,8 +32,17 @@ const log = {
   debug: (...args) => { if (LOG_LEVELS[LOG_LEVEL] >= 3) process.stderr.write('[NEOHIVE:DEBUG] ' + args.map(String).join(' ') + '\n'); },
 };
-// Data dir lives in the project where Claude Code runs, not where the package is installed
-const DATA_DIR = process.env.NEOHIVE_DATA_DIR || path.join(process.cwd(), '.neohive');
+const _rawNeohiveEnv = String(process.env.NEOHIVE_DATA_DIR || '');
+if (_rawNeohiveEnv && /\$\{|\$\s*workspaceFolder/i.test(_rawNeohiveEnv)) {
+  log.warn('[neohive] NEOHIVE_DATA_DIR looks unexpanded (' + _rawNeohiveEnv.substring(0, 60) + '…). Node will not substitute ${workspaceFolder}. Use an absolute path (re-run npx neohive init --cursor) or set env in Cursor. Effective DATA_DIR=' + DATA_DIR);
+}
+// Auto-migrate from .agent-bridge/ to .neohive/ (v5 → v6 rename)
+const _legacyDir = path.join(path.dirname(DATA_DIR), '.agent-bridge');
+if (!fs.existsSync(DATA_DIR) && fs.existsSync(_legacyDir)) {
+  try { fs.renameSync(_legacyDir, DATA_DIR); } catch {}
+}
 const MESSAGES_FILE = path.join(DATA_DIR, 'messages.jsonl');
 const HISTORY_FILE = path.join(DATA_DIR, 'history.jsonl');
 const AGENTS_FILE = path.join(DATA_DIR, 'agents.json');
@@ -46,11 +58,12 @@ const LOCKS_FILE = path.join(DATA_DIR, 'locks.json');
 const PROGRESS_FILE = path.join(DATA_DIR, 'progress.json');
 const VOTES_FILE = path.join(DATA_DIR, 'votes.json');
 const REVIEWS_FILE = path.join(DATA_DIR, 'reviews.json');
+const NOTIFICATIONS_FILE = path.join(DATA_DIR, 'notifications.json');
 const DEPS_FILE = path.join(DATA_DIR, 'dependencies.json');
 const REPUTATION_FILE = path.join(DATA_DIR, 'reputation.json');
 const COMPRESSED_FILE = path.join(DATA_DIR, 'compressed.json');
 const RULES_FILE = path.join(DATA_DIR, 'rules.json');
-// Plugins removed in v3.4.3 — unnecessary attack surface, CLIs have their own extension systems
+const AGENT_CARDS_FILE = path.join(DATA_DIR, 'agent-cards.json');
 // In-memory state for this process
 let registeredName = null;
@@ -94,7 +107,7 @@ function lockConfigFile() {
   while (Date.now() - start < maxWait) {
     try { fs.writeFileSync(CONFIG_LOCK, String(process.pid), { flag: 'wx' }); return true; }
     catch { /* lock exists, wait */ }
-    const wait = Date.now(); while (Date.now() - wait < 50) {} // busy-wait 50ms
+    try { Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 50); } catch {} // non-blocking 50ms wait
   }
   try { fs.unlinkSync(CONFIG_LOCK); } catch {}
   try { fs.writeFileSync(CONFIG_LOCK, String(process.pid), { flag: 'wx' }); return true; } catch {}
@@ -242,7 +255,7 @@ function migrateIfNeeded() {
     if (fs.existsSync(DATA_VERSION_FILE)) {
       dataVersion = parseInt(fs.readFileSync(DATA_VERSION_FILE, 'utf8').trim()) || 0;
     }
-  } catch {}
+  } catch (e) { log.debug("data version read failed:", e.message); }
   if (dataVersion >= CURRENT_DATA_VERSION) return;
   // Run migrations in order
@@ -254,7 +267,7 @@ function migrateIfNeeded() {
   try { fs.writeFileSync(DATA_VERSION_FILE, String(CURRENT_DATA_VERSION)); } catch {}
 }
-const RESERVED_NAMES = ['__system__', '__all__', '__open__', '__close__', 'system', 'dashboard', 'Dashboard'];
+const RESERVED_NAMES = ['__system__', '__all__', '__open__', '__close__', '__user__', 'system', 'dashboard', 'Dashboard'];
 function sanitizeName(name) {
   if (typeof name !== 'string' || !/^[a-zA-Z0-9_-]{1,20}$/.test(name)) {
@@ -307,7 +320,7 @@ function trimConsumedIds(agentName, ids) {
     for (const id of ids) {
       if (!currentIds.has(id)) ids.delete(id);
     }
-  } catch {}
+  } catch (e) { log.debug("consumed ID trim failed:", e.message); }
 }
 function readJsonl(file) {
@@ -368,7 +381,7 @@ function lockAgentsFile() {
   while (Date.now() - start < maxWait) {
     try { fs.writeFileSync(AGENTS_LOCK, String(process.pid), { flag: 'wx' }); return true; }
     catch { /* lock exists, wait with exponential backoff */ }
-    const wait = Date.now(); while (Date.now() - wait < backoff) {}
+    try { Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, backoff); } catch {}
     backoff = Math.min(backoff * 2, 500);
   }
   // Force-break stale lock after timeout
@@ -386,7 +399,7 @@ function withFileLock(filePath, fn) {
   while (Date.now() - start < maxWait) {
     try { fs.writeFileSync(lockPath, String(process.pid), { flag: 'wx' }); break; }
     catch { /* lock exists, wait with exponential backoff */ }
-    const wait = Date.now(); while (Date.now() - wait < backoff) {}
+    try { Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, backoff); } catch {}
     backoff = Math.min(backoff * 2, 500);
     if (Date.now() - start >= maxWait) {
       // Force-break stale lock — only if holding PID is dead
@@ -395,7 +408,7 @@ function withFileLock(filePath, fn) {
         if (lockPid && lockPid !== process.pid) {
           try { process.kill(lockPid, 0); /* PID alive — skip, don't corrupt */ return null; } catch { /* PID dead — safe to break */ }
         }
-      } catch {}
+      } catch (e) { log.debug("lock PID check failed:", e.message); }
       try { fs.unlinkSync(lockPath); } catch {}
       try { fs.writeFileSync(lockPath, String(process.pid), { flag: 'wx' }); } catch { return fn(); }
       break;
@@ -419,20 +432,20 @@ function getAgents() {
             const hb = JSON.parse(fs.readFileSync(path.join(DATA_DIR, f), 'utf8'));
             if (hb.last_activity) agents[name].last_activity = hb.last_activity;
             if (hb.pid) agents[name].pid = hb.pid;
-          } catch {}
+          } catch (e) { log.debug("heartbeat merge failed:", e.message); }
         }
       }
-    } catch {}
+    } catch (e) { log.debug("heartbeat scan failed:", e.message); }
     return agents;
   }, 1500);
 }
 function saveAgents(agents) {
-  // Safe write: serialize first, then write complete string
-  // This minimizes the window where the file could be truncated
   const data = JSON.stringify(agents);
   if (data && data.length > 2) {
     fs.writeFileSync(AGENTS_FILE, data);
+  } else {
+    log.debug('[neohive/agents.json] skipped write (empty {}): ' + AGENTS_FILE);
   }
   invalidateCache('agents');
 }
@@ -447,7 +460,7 @@ function touchHeartbeat(name) {
       last_activity: new Date().toISOString(),
       pid: process.pid,
     }));
-  } catch {}
+  } catch (e) { log.debug("heartbeat write failed:", e.message); }
 }
@@ -468,8 +481,8 @@ function isPidAlive(pid, lastActivity) {
   const cached = _pidAliveCache[cacheKey];
   if (cached && Date.now() - cached.ts < 5000) return cached.alive;
-  // Faster stale detection in autonomous mode (30s vs 60s) for quicker dead agent recovery
-  const STALE_THRESHOLD = isAutonomousMode() ? 30000 : 60000;
+  // 30s stale threshold — 3x the 10s heartbeat interval, catches dead agents faster
+  const STALE_THRESHOLD = 30000;
   let alive = false;
   // PRIORITY 1: Trust heartbeat freshness over PID status
@@ -588,6 +601,15 @@ function buildMessageResponse(msg, consumedIds) {
     }
   } catch (e) { log.debug('total message estimate failed:', e.message); }
+  // Task nudge: remind agent of their outstanding tasks
+  let taskReminder;
+  try {
+    const myTasks = getTasks().filter(t => t.assignee === registeredName && (t.status === 'pending' || t.status === 'in_progress'));
+    if (myTasks.length > 0) {
+      taskReminder = { pending: myTasks.filter(t => t.status === 'pending').length, in_progress: myTasks.filter(t => t.status === 'in_progress').length, tasks: myTasks.map(t => ({ id: t.id, title: t.title, status: t.status })) };
+    }
+  } catch (e) { log.debug('task reminder in listen failed:', e.message); }
   return {
     success: true,
     message: {
@@ -595,11 +617,14 @@ function buildMessageResponse(msg, consumedIds) {
       from: msg.from,
       content: msg.content,
       timestamp: msg.timestamp,
+      priority: classifyPriority(msg),
       ...(msg.reply_to && { reply_to: msg.reply_to }),
       ...(msg.thread_id && { thread_id: msg.thread_id }),
     },
     pending_count: pendingCount,
     agents_online: agentsOnline,
+    coordinator_mode: getConfig().coordinator_mode || 'responsive',
+    ...(taskReminder && { task_reminder: taskReminder }),
   };
 }
@@ -616,9 +641,11 @@ function autoCompact() {
     const messages = lines.map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
-    // Collect consumed IDs — for __group__ messages, only check ALIVE agents
+    // Collect consumed IDs — for __group__ messages, check ALL registered agents (alive + dead)
+    // This prevents message loss when agents reconnect after a crash
     const agents = getAgents();
-    const aliveAgentNames = Object.keys(agents).filter(n => isPidAlive(agents[n].pid, agents[n].last_activity));
+    const allAgentNames = Object.keys(agents);
+    const retentionMs = (parseInt(process.env.NEOHIVE_RETENTION_HOURS) || 24) * 3600000;
     const allConsumed = new Set();
     const perAgentConsumed = {};
     if (fs.existsSync(DATA_DIR)) {
@@ -629,18 +656,23 @@ function autoCompact() {
             const ids = JSON.parse(fs.readFileSync(path.join(DATA_DIR, f), 'utf8'));
             perAgentConsumed[agentName] = new Set(ids);
             ids.forEach(id => allConsumed.add(id));
-          } catch {}
+          } catch (e) { log.debug("consumed ID read failed:", e.message); }
         }
       }
     }
     // Keep messages that are NOT fully consumed
-    // For __group__ messages: consumed when ALL ALIVE agents have consumed it (dead agents don't block)
+    // For __group__ messages: consumed when ALL registered agents consumed OR message exceeds retention period
     // For direct messages: consumed when the recipient has consumed it
     const active = messages.filter(m => {
       if (m.to === '__group__') {
-        // __group__: check if all alive agents (except sender) have consumed
-        return !aliveAgentNames.every(n => n === m.from || (perAgentConsumed[n] && perAgentConsumed[n].has(m.id)));
+        // Time-based retention: critical messages get 2x retention
+        const msgTime = new Date(m.timestamp).getTime();
+        const msgPriority = classifyPriority(m);
+        const effectiveRetention = msgPriority === 'critical' ? retentionMs * 2 : retentionMs;
+        if (msgTime < Date.now() - effectiveRetention) return false;
+        // Check ALL registered agents (alive + dead) to prevent loss on reconnect
+        return !allAgentNames.every(n => n === m.from || (perAgentConsumed[n] && perAgentConsumed[n].has(m.id)));
       }
       // Direct: standard check
       if (!allConsumed.has(m.id)) return true;
@@ -657,9 +689,23 @@ function autoCompact() {
     }
     // Rewrite messages.jsonl atomically — write to temp file then rename
+    // Capture pre-compaction size to detect messages appended during compaction
+    const preCompactSize = Buffer.byteLength(content, 'utf8') + 1; // +1 for trailing newline trimmed earlier
     const newContent = active.map(m => JSON.stringify(m)).join('\n') + (active.length ? '\n' : '');
     const tmpFile = msgFile + '.tmp';
     fs.writeFileSync(tmpFile, newContent);
+    // Check for messages appended after our initial read
+    let lateMessages = '';
+    try {
+      const currentSize = fs.statSync(msgFile).size;
+      if (currentSize > preCompactSize) {
+        const fd = fs.openSync(msgFile, 'r');
+        const lateBuf = Buffer.alloc(currentSize - preCompactSize);
+        fs.readSync(fd, lateBuf, 0, lateBuf.length, preCompactSize);
+        fs.closeSync(fd);
+        lateMessages = lateBuf.toString('utf8');
+      }
+    } catch (e) { log.debug('late message check during compaction:', e.message); }
     try {
       fs.renameSync(tmpFile, msgFile);
     } catch {
@@ -668,7 +714,12 @@ function autoCompact() {
       try { fs.unlinkSync(tmpFile); } catch {}
       return;
     }
-    lastReadOffset = Buffer.byteLength(newContent, 'utf8');
+    // Re-append any messages that arrived during compaction
+    if (lateMessages.trim()) {
+      fs.appendFileSync(msgFile, lateMessages);
+      log.info('Re-appended ' + lateMessages.trim().split('\n').length + ' messages that arrived during compaction');
+    }
+    lastReadOffset = fs.statSync(msgFile).size;
     // Trim consumed ID files — keep only IDs still in active messages
     const activeIds = new Set(active.map(m => m.id));
@@ -840,6 +891,21 @@ function saveWorkflows(workflows) {
   });
 }
+// Save a checkpoint after a workflow step completes
+function saveWorkflowCheckpoint(wf, step) {
+  if (!wf.checkpoints) wf.checkpoints = [];
+  wf.checkpoints.push({
+    step_id: step.id,
+    step_description: step.description,
+    completed_at: step.completed_at,
+    completed_by: step.assignee || registeredName,
+    output: step.verification || step.notes || null,
+    files_changed: step.files_changed || [],
+    step_states: wf.steps.map(s => ({ id: s.id, status: s.status, assignee: s.assignee || null })),
+  });
+  if (wf.checkpoints.length > 100) wf.checkpoints = wf.checkpoints.slice(-100);
+}
 // --- Autonomous mode detection ---
 function isAutonomousMode() {
   const workflows = getWorkflows();
@@ -897,8 +963,11 @@ function findUnassignedTasks(skills) {
     const words = ((t.title || '') + ' ' + (t.description || '')).toLowerCase().split(/\W+/).filter(w => w.length > 3);
     words.forEach(w => historyKeywords.add(w));
   }
-  // Add explicit skills
+  // Add explicit skills from function param AND agent card
   if (skills) skills.forEach(s => historyKeywords.add(s.toLowerCase()));
+  const cards = readJsonFile(AGENT_CARDS_FILE) || {};
+  const myCard = cards[registeredName];
+  if (myCard && myCard.skills) myCard.skills.forEach(s => historyKeywords.add(s));
   // Score each task by affinity (keyword overlap with agent's history + skills)
   // Scale fix: cache task keyword sets to avoid O(N*M) recomputation at 100 agents
@@ -1054,12 +1123,14 @@ let _guideCache = { key: null, result: null };
 function buildGuide(level = 'standard') {
   const agents = getAgents();
   const aliveCount = Object.values(agents).filter(a => isPidAlive(a.pid, a.last_activity)).length;
-  const mode = getConfig().conversation_mode || 'direct';
+  const config = getConfig();
+  const mode = config.conversation_mode || 'direct';
+  const coordMode = config.coordinator_mode || 'responsive';
   // Cache check: reuse cached guide if nothing changed (saves rebuilding 20-50 rules)
   let rulesMtime = 0;
   try { rulesMtime = fs.existsSync(RULES_FILE) ? fs.statSync(RULES_FILE).mtimeMs : 0; } catch {}
-  const cacheKey = `${level}:${aliveCount}:${mode}:${registeredName}:${rulesMtime}`;
+  const cacheKey = `${level}:${aliveCount}:${mode}:${coordMode}:${registeredName}:${rulesMtime}`;
   if (_guideCache.key === cacheKey && _guideCache.result) return _guideCache.result;
   const channels = getChannelsData();
@@ -1072,6 +1143,7 @@ function buildGuide(level = 'standard') {
   const isQualityLead = myRole === 'quality';
   const isMonitor = myRole === 'monitor';
   const isAdvisor = myRole === 'advisor';
+  const isLeadRole = myRole === 'lead' || myRole === 'manager' || myRole === 'coordinator';
   let qualityLeadName = null;
   for (const [pName, prof] of Object.entries(profiles)) {
     if (prof.role && prof.role.toLowerCase() === 'quality' && pName !== registeredName) { qualityLeadName = pName; break; }
@@ -1143,7 +1215,7 @@ function buildGuide(level = 'standard') {
       try {
         const content = fs.readFileSync(guideFile, 'utf8').trim();
         if (content) projectRules = content.split(/\r?\n/).filter(l => l.trim() && !l.startsWith('#')).map(l => l.replace(/^[-*]\s*/, '').trim()).filter(Boolean);
-      } catch {}
+      } catch (e) { log.debug("guide file read failed:", e.message); }
     }
     // Inject dashboard-managed rules into guide
@@ -1170,7 +1242,7 @@ function buildGuide(level = 'standard') {
       quality_lead: qualityLeadName || undefined,
       tool_categories: {
         'WORK LOOP': 'get_work, verify_and_advance, retry_with_improvement',
-        'MESSAGING': 'send_message, broadcast, check_messages, get_history, handoff, share_file',
+        'MESSAGING': 'send_message, broadcast, check_messages, consume_messages, get_history, handoff, share_file',
         'COORDINATION': 'get_briefing, log_decision, get_decisions, kb_write, kb_read, kb_list',
         'TASKS': 'create_task, update_task, list_tasks, suggest_task',
         'QUALITY': 'request_review, submit_review',
@@ -1189,6 +1261,17 @@ function buildGuide(level = 'standard') {
     }
   }
+  // Lead/Coordinator mode: responsive (stay with human) vs autonomous (run in listen loop)
+  if (isLeadRole && aliveCount >= 2) {
+    const coordinatorMode = getConfig().coordinator_mode || 'responsive';
+    if (coordinatorMode === 'responsive') {
+      rules.push('RESPONSIVE COORDINATOR PATTERN: Use consume_messages() at the start of each interaction to check for agent updates non-blockingly. Process all returned messages, assign work, then return to the human immediately. Do NOT block in listen() — you need to stay responsive to both agents and the user.');
+    } else {
+      rules.push('AUTONOMOUS COORDINATOR PATTERN: Use listen() to wait for agent results. Process responses, delegate follow-up work, and continue the listen loop. Only return to the human when all tasks are complete or when you hit a blocker that requires human input.');
+    }
+    rules.push('CRITICAL: You are a Coordinator. You MUST NOT edit files, write code, or use tools like Edit/Write/Bash for code changes. Your tools are: send_message, create_task, update_task, create_workflow, advance_workflow, workflow_status, list_tasks, consume_messages, broadcast, kb_write, kb_read, log_decision. Delegate ALL code work to other agents.');
+  }
   // Tier 0 — THE one rule (always included at every level)
   const listenCmd = isManagedMode() ? 'listen()' : (mode === 'group' ? 'listen_group()' : 'listen()');
   rules.push(`AFTER EVERY ACTION, call ${listenCmd}. This is how you receive messages. NEVER skip this. NEVER use sleep(). NEVER poll with check_messages(). ${listenCmd} is your ONLY way to receive messages.`);
@@ -1247,7 +1330,7 @@ function buildGuide(level = 'standard') {
     try {
       const content = fs.readFileSync(guideFile, 'utf8').trim();
       if (content) projectRules = content.split(/\r?\n/).filter(l => l.trim() && !l.startsWith('#')).map(l => l.replace(/^[-*]\s*/, '').trim()).filter(Boolean);
-    } catch {}
+    } catch (e) { log.debug("guide file read failed:", e.message); }
   }
   // Inject dashboard-managed rules into guide
@@ -1266,7 +1349,7 @@ function buildGuide(level = 'standard') {
       ? '1. Call list_agents() to see who is online. 2. Send a message or call listen() to wait.'
       : '1. Call get_briefing() for project context. 2. Call listen_group() to join. 3. Respond and listen_group() again.',
     tool_categories: {
-      'MESSAGING': 'send_message, broadcast, listen_group, listen, check_messages, get_history, get_summary, search_messages, handoff, share_file',
+      'MESSAGING': 'send_message, broadcast, listen_group, listen, check_messages, consume_messages, get_history, get_summary, search_messages, handoff, share_file',
       'COORDINATION': 'get_briefing, log_decision, get_decisions, kb_write, kb_read, kb_list, call_vote, cast_vote, vote_status',
       'TASKS': 'create_task, update_task, list_tasks, declare_dependency, check_dependencies, suggest_task',
       'QUALITY': 'update_progress, get_progress, request_review, submit_review, get_reputation',
@@ -1292,6 +1375,17 @@ function buildGuide(level = 'standard') {
     };
   }
+  // Task reminder: show agent's pending/in_progress tasks so they remember to update them
+  if (registeredName) {
+    try {
+      const myTasks = getTasks().filter(t => t.assignee === registeredName && (t.status === 'pending' || t.status === 'in_progress'));
+      if (myTasks.length > 0) {
+        result.your_tasks = myTasks.map(t => ({ id: t.id, title: t.title, status: t.status }));
+        rules.push(`TASK STATUS: You have ${myTasks.length} task(s). Use update_task(task_id, "in_progress") when starting and update_task(task_id, "done") when complete. Your tasks: ${myTasks.map(t => t.id + ' "' + t.title.substring(0, 40) + '" (' + t.status + ')').join('; ')}`);
+      }
+    } catch (e) { log.debug('task reminder in guide failed:', e.message); }
+  }
   // Cache the result for subsequent calls with same params
   _guideCache = { key: cacheKey, result };
   return result;
@@ -1299,7 +1393,7 @@ function buildGuide(level = 'standard') {
 // --- Tool implementations ---
-function toolRegister(name, provider = null) {
+function toolRegister(name, provider = null, skills = null) {
   ensureDataDir();
   migrateIfNeeded(); // run data migrations on first register
   sanitizeName(name);
@@ -1311,12 +1405,9 @@ function toolRegister(name, provider = null) {
       return { error: `Agent "${name}" is already registered by a live process. Choose a different name.` };
     }
-    // If name was previously registered by a dead process, verify token to prevent impersonation
-    if (agents[name] && agents[name].token && !isPidAlive(agents[name].pid, agents[name].last_activity)) {
-      // Dead agent — only allow re-registration from the same process (same token)
-      if (registeredToken && registeredToken !== agents[name].token) {
-        return { error: `Agent "${name}" was previously registered by another process. Choose a different name.` };
-      }
+    // Dead agent name reclaim — allow any process to take a dead agent's name
+    if (agents[name] && !isPidAlive(agents[name].pid, agents[name].last_activity)) {
+      log.info(`Agent "${name}" reclaimed (previous PID ${agents[name].pid} is dead)`);
     }
     // Prevent re-registration under a different name from the same process
@@ -1327,74 +1418,99 @@ function toolRegister(name, provider = null) {
     const now = new Date().toISOString();
     const token = (agents[name] && agents[name].token) || generateToken();
-    agents[name] = { pid: process.pid, timestamp: now, last_activity: now, provider: provider || 'unknown', branch: currentBranch, token, started_at: now };
+    agents[name] = { pid: process.pid, ppid: process.ppid, timestamp: now, last_activity: now, provider: provider || 'unknown', branch: currentBranch, token, started_at: now };
     saveAgents(agents);
     registeredName = name;
-  registeredToken = token;
+    registeredToken = token;
+    // Auto-create profile if not exists
+    const profiles = getProfiles();
+    if (!profiles[name]) {
+      profiles[name] = { display_name: name, avatar: '', bio: '', role: '', created_at: now };
+      saveProfiles(profiles);
+    }
+    // Save agent card with skills
+    const cards = readJsonFile(AGENT_CARDS_FILE) || {};
+    cards[name] = {
+      name,
+      provider: provider || 'unknown',
+      skills: Array.isArray(skills) ? skills.map(s => String(s).toLowerCase().substring(0, 30)).slice(0, 20) : [],
+      registered_at: now,
+    };
+    writeJsonFile(AGENT_CARDS_FILE, cards);
-  // Auto-create profile if not exists
-  const profiles = getProfiles();
-  if (!profiles[name]) {
-    profiles[name] = { display_name: name, avatar: '', bio: '', role: '', created_at: now };
-    saveProfiles(profiles);
-  }
+    // Start heartbeat — updates last_activity every 10s so dashboard knows we're alive
+    // Deterministic jitter per agent to spread writes across the interval (prevents lock storms at 10 agents)
+    const heartbeatJitter = name.split('').reduce((h, c) => h + c.charCodeAt(0), 0) % 2000;
+    if (heartbeatInterval) clearInterval(heartbeatInterval);
+    heartbeatInterval = setInterval(() => {
+      try {
+        // Scale fix: write per-agent heartbeat file instead of lock+read+write agents.json
+        // Eliminates write contention — each agent writes only its own file, no locking needed
+        touchHeartbeat(registeredName);
+        const agents = getAgents(); // cached + merges heartbeat files automatically
+        // Managed mode: detect dead manager and dead turn holder
+        if (isManagedMode()) {
+          const managed = getManagedConfig();
+          let managedChanged = false;
+          // Dead manager detection
+          if (managed.manager && managed.manager !== registeredName) {
+            if (agents[managed.manager] && !isPidAlive(agents[managed.manager].pid, agents[managed.manager].last_activity)) {
+              managed.manager = null;
+              managed.floor = 'closed';
+              managed.turn_current = null;
+              managed.turn_queue = [];
+              managedChanged = true;
+              saveManagedConfig(managed);
+              broadcastSystemMessage(`[SYSTEM] Manager disconnected. Call claim_manager() to take over as the new manager.`);
+            }
+          }
-  // Start heartbeat — updates last_activity every 10s so dashboard knows we're alive
-  // Deterministic jitter per agent to spread writes across the interval (prevents lock storms at 10 agents)
-  const heartbeatJitter = name.split('').reduce((h, c) => h + c.charCodeAt(0), 0) % 2000;
-  if (heartbeatInterval) clearInterval(heartbeatInterval);
-  heartbeatInterval = setInterval(() => {
-    try {
-      // Scale fix: write per-agent heartbeat file instead of lock+read+write agents.json
-      // Eliminates write contention — each agent writes only its own file, no locking needed
-      touchHeartbeat(registeredName);
-      const agents = getAgents(); // cached + merges heartbeat files automatically
-      // Managed mode: detect dead manager and dead turn holder
-      if (isManagedMode()) {
-        const managed = getManagedConfig();
-        let managedChanged = false;
-        // Dead manager detection
-        if (managed.manager && managed.manager !== registeredName) {
-          if (agents[managed.manager] && !isPidAlive(agents[managed.manager].pid, agents[managed.manager].last_activity)) {
-            managed.manager = null;
-            managed.floor = 'closed';
-            managed.turn_current = null;
-            managed.turn_queue = [];
-            managedChanged = true;
-            saveManagedConfig(managed);
-            broadcastSystemMessage(`[SYSTEM] Manager disconnected. Call claim_manager() to take over as the new manager.`);
+          // Dead turn holder detection — unstick the floor
+          if (!managedChanged && managed.turn_current && managed.turn_current !== registeredName && managed.manager) {
+            if (agents[managed.turn_current] && !isPidAlive(agents[managed.turn_current].pid, agents[managed.turn_current].last_activity)) {
+              const deadAgent = managed.turn_current;
+              managed.turn_current = null;
+              managed.floor = 'closed';
+              managed.turn_queue = [];
+              saveManagedConfig(managed);
+              if (managed.manager !== registeredName) {
+                sendSystemMessage(managed.manager, `[FLOOR] ${deadAgent} disconnected while holding the floor. Floor returned to you.`);
+              }
+            }
           }
         }
-        // Dead turn holder detection — unstick the floor
-        if (!managedChanged && managed.turn_current && managed.turn_current !== registeredName && managed.manager) {
-          if (agents[managed.turn_current] && !isPidAlive(agents[managed.turn_current].pid, agents[managed.turn_current].last_activity)) {
-            const deadAgent = managed.turn_current;
-            managed.turn_current = null;
-            managed.floor = 'closed';
-            managed.turn_queue = [];
-            saveManagedConfig(managed);
-            if (managed.manager !== registeredName) {
-              sendSystemMessage(managed.manager, `[FLOOR] ${deadAgent} disconnected while holding the floor. Floor returned to you.`);
+        // Clean stale listening_since flags (listen times out at 5min, clear after 6min)
+        for (const [aName, aInfo] of Object.entries(agents)) {
+          if (aInfo.listening_since) {
+            const listenAge = Date.now() - new Date(aInfo.listening_since).getTime();
+            if (listenAge > 360000) {
+              aInfo.listening_since = null;
             }
           }
         }
-      }
-      // Snapshot dead agents BEFORE cleanup (for auto-recovery)
-      snapshotDeadAgents(agents);
-      // Clean up file locks held by dead agents
-      cleanStaleLocks();
-      cleanStaleChannelMembers();
-      // Auto-escalation: notify team about long-blocked tasks
-      escalateBlockedTasks();
-      // Stand-up meetings: periodic team check-ins
-      triggerStandupIfDue();
-      // Watchdog: nudge idle agents, reassign stuck work (autonomous mode only)
-      watchdogCheck();
-    } catch {}
-  }, 10000 + heartbeatJitter);
-  heartbeatInterval.unref(); // Don't prevent process exit
+        // Agent status change notifications — detect agents going offline/online
+        detectAgentStatusChanges(agents);
+        // Snapshot dead agents BEFORE cleanup (for auto-recovery)
+        snapshotDeadAgents(agents);
+        // Clean up file locks held by dead agents
+        cleanStaleLocks();
+        cleanStaleChannelMembers();
+        // Auto-escalation: notify team about long-blocked tasks
+        escalateBlockedTasks();
+        // Stand-up meetings: periodic team check-ins
+        triggerStandupIfDue();
+        // Auto-reassign stuck workflow steps from dead agents
+        checkStuckWorkflowSteps();
+        // Stale task detection: warn about tasks in_progress for >30 minutes without update
+        checkStaleTasks();
+        // Watchdog: nudge idle agents, reassign stuck work (autonomous mode only)
+        watchdogCheck();
+      } catch (e) { log.warn("heartbeat loop error:", e.message); }
+    }, 10000 + heartbeatJitter);
+    heartbeatInterval.unref(); // Don't prevent process exit
     // Fire join event + recovery data for returning agents
     const config = getConfig();
@@ -1456,7 +1572,7 @@ function toolRegister(name, provider = null) {
           // Clean up snapshot after loading
           try { fs.unlinkSync(recoveryFile); } catch {}
         }
-      } catch {}
+      } catch (e) { log.debug("recovery file parse failed:", e.message); }
     }
     // Notify other agents
@@ -1470,7 +1586,7 @@ function toolRegister(name, provider = null) {
         if (roleAssignments && roleAssignments[name]) {
           result.your_role = roleAssignments[name];
         }
-      } catch {}
+      } catch (e) { log.debug("role assignment failed:", e.message); }
     }
     return result;
@@ -1502,7 +1618,7 @@ function setListening(isListening) {
         saveAgents(agents);
       }
     } finally { unlockAgentsFile(); }
-  } catch {}
+  } catch (e) { log.debug("register workspace status failed:", e.message); }
 }
 function toolListAgents() {
@@ -1519,7 +1635,7 @@ function toolListAgents() {
       registered_at: info.timestamp,
       last_activity: lastActivity,
       idle_seconds: alive ? idleSeconds : null,
-      status: !alive ? 'dead' : idleSeconds > 60 ? 'sleeping' : 'active',
+      status: !alive ? 'offline' : (info.listening_since && alive) ? 'listening' : idleSeconds > 30 ? 'idle' : 'working',
       listening_since: info.listening_since || null,
       is_listening: !!(info.listening_since && alive),
       last_listened_at: info.last_listened_at || null,
@@ -1534,12 +1650,12 @@ function toolListAgents() {
     try {
       const ws = getWorkspace(name);
       if (ws._status) result[name].current_status = ws._status;
-    } catch {}
+    } catch (e) { log.debug("workspace status read failed:", e.message); }
   }
   return { agents: result };
 }
-async function toolSendMessage(content, to = null, reply_to = null, channel = null) {
+async function toolSendMessage(content, to = null, reply_to = null, channel = null, priority = null) {
   if (!registeredName) {
     return { error: 'You must call register() first' };
   }
@@ -1691,7 +1807,8 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
     }
   }
-  if (!agents[to]) {
+  // Allow sending to __user__ (human via dashboard) even though they're not a registered agent
+  if (to !== '__user__' && !agents[to]) {
     return { error: `Agent "${to}" is not registered` };
   }
@@ -1699,8 +1816,8 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
     return { error: 'Cannot send a message to yourself' };
   }
-  // Permission check
-  if (!canSendTo(registeredName, to)) {
+  // Permission check (skip for __user__ — human always has read access)
+  if (to !== '__user__' && !canSendTo(registeredName, to)) {
     return { error: `Permission denied: you are not allowed to send messages to "${to}"` };
   }
@@ -1741,6 +1858,7 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
     to: isGroup ? '__group__' : to,
     content,
     timestamp: new Date().toISOString(),
+    ...(priority && ['critical', 'normal', 'low'].includes(priority) && { priority }),
     ...(isGroup && to && { addressed_to: [to] }),
     ...(channel && { channel }),
     ...(reply_to && { reply_to }),
@@ -1829,7 +1947,7 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
           result._decision_hint = `Related decision exists: "${overlap.decision}" (topic: ${overlap.topic || 'general'}). Check get_decisions() before re-debating.`;
         }
       }
-    } catch {}
+    } catch (e) { log.debug("listen channel watcher setup failed:", e.message); }
   }
   if (_cooldownApplied > 0) result.cooldown_applied_ms = _cooldownApplied;
   if (channel) result.channel = channel;
@@ -1862,6 +1980,25 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
     result.you_have_messages = myPending.length;
     result.urgent = `You have ${myPending.length} unread message(s) waiting. Call listen_group() after this to read them.`;
   }
+  // Coordinator enforcement: warn if sending work assignment without creating a task first
+  const senderProfile = getProfiles()[registeredName];
+  const senderRole = senderProfile && senderProfile.role ? senderProfile.role.toLowerCase() : '';
+  const isSenderLead = senderRole === 'lead' || senderRole === 'manager' || senderRole === 'coordinator';
+  if (isSenderLead && to && to !== '__user__' && to !== '__all__' && to !== '__group__') {
+    const assignmentKeywords = /\b(implement|fix|build|add|create|update|redesign|refactor|write|deploy|test|review|research|investigate)\b/i;
+    if (assignmentKeywords.test(content)) {
+      const recentTasks = getTasks().filter(t => {
+        if (t.assignee !== to) return false;
+        const age = Date.now() - new Date(t.created_at).getTime();
+        return age < 60000; // created in last 60 seconds
+      });
+      if (recentTasks.length === 0) {
+        result.task_warning = `No task created for this assignment to ${to}. Use create_task(title, description, "${to}") to formally track this work.`;
+      }
+    }
+  }
   return result;
 }
@@ -2043,8 +2180,13 @@ function toolCheckMessages(from = null) {
     if (m.addressed_to && m.addressed_to.includes(registeredName)) addressedCount++;
   }
+  // Include pending notification count
+  const allNotifs = getNotifications();
+  const unreadNotifs = allNotifs.filter(n => !n.read_by.includes(registeredName));
   const result = {
     count: unconsumed.length,
+    pending_notifications: unreadNotifs.length,
     // Scale fix: return previews not full content — agent gets full content via listen_group()
     messages: unconsumed.map(m => ({
       id: m.id,
@@ -2068,6 +2210,60 @@ function toolCheckMessages(from = null) {
   return result;
 }
+function toolConsumeMessages(from = null, limit = null) {
+  if (!registeredName) {
+    return { error: 'You must call register() first' };
+  }
+  let unconsumed = getUnconsumedMessages(registeredName, from);
+  if (limit && limit > 0 && unconsumed.length > limit) {
+    unconsumed = unconsumed.slice(0, limit);
+  }
+  if (unconsumed.length === 0) {
+    return { success: true, count: 0, messages: [] };
+  }
+  // Mark all as consumed
+  const consumed = getConsumedIds(registeredName);
+  for (const msg of unconsumed) {
+    consumed.add(msg.id);
+    markAsRead(registeredName, msg.id);
+  }
+  saveConsumedIds(registeredName, consumed);
+  // Update read offset
+  const msgFile = getMessagesFile(currentBranch);
+  if (fs.existsSync(msgFile)) {
+    lastReadOffset = fs.statSync(msgFile).size;
+  }
+  touchActivity();
+  // Count remaining unconsumed after this batch
+  const remaining = getUnconsumedMessages(registeredName, null);
+  const agents = getAgents();
+  const agentsOnline = Object.entries(agents).filter(([, info]) => isPidAlive(info.pid, info.last_activity)).length;
+  return {
+    success: true,
+    count: unconsumed.length,
+    messages: unconsumed.map(m => ({
+      id: m.id,
+      from: m.from,
+      content: m.content,
+      timestamp: m.timestamp,
+      ...(m.reply_to && { reply_to: m.reply_to }),
+      ...(m.thread_id && { thread_id: m.thread_id }),
+      ...(m.addressed_to && { addressed_to: m.addressed_to }),
+    })),
+    remaining: remaining.length,
+    agents_online: agentsOnline,
+    coordinator_mode: getConfig().coordinator_mode || 'responsive',
+  };
+}
 function toolAckMessage(messageId) {
   if (!registeredName) {
     return { error: 'You must call register() first' };
@@ -2599,7 +2795,7 @@ async function toolListenGroup() {
             });
             chWatcher.on('error', () => {});
             channelWatchers.push(chWatcher);
-          } catch {}
+          } catch (e) { log.debug("channel watcher setup failed:", e.message); }
         }
       }
     } catch {
@@ -2638,6 +2834,72 @@ async function toolListenGroup() {
   });
 }
+// Auto speaker selection for group messages — determines who should respond
+// Priority: 1) @mentioned agents, 2) skill match, 3) round-robin fallback
+let _lastSpeakerIndex = 0;
+function selectSpeaker(msg, agentName, aliveAgentNames) {
+  // 1. If explicitly addressed, those agents respond
+  if (msg.addressed_to && msg.addressed_to.length > 0) {
+    return msg.addressed_to.includes(agentName);
+  }
+  // 2. Direct messages — always respond
+  if (msg.to === agentName) return true;
+  // 3. System messages — everyone sees, nobody needs to respond
+  if (msg.system || msg.from === '__system__') return false;
+  // 4. Skill-based matching — check if message content matches agent's skills
+  const cards = readJsonFile(AGENT_CARDS_FILE) || {};
+  const myCard = cards[agentName];
+  if (myCard && myCard.skills && myCard.skills.length > 0 && msg.content) {
+    const contentLower = msg.content.toLowerCase();
+    const hasSkillMatch = myCard.skills.some(skill => contentLower.includes(skill));
+    if (hasSkillMatch) {
+      // Check if OTHER agents also match — if multiple match, pick the best
+      const otherMatchers = aliveAgentNames.filter(n => {
+        if (n === agentName || n === msg.from) return false;
+        const card = cards[n];
+        return card && card.skills && card.skills.some(skill => contentLower.includes(skill));
+      });
+      // If this agent matches and has fewest other matchers, respond
+      if (otherMatchers.length === 0) return true;
+      // Multiple skill matches — first alphabetically gets priority (deterministic)
+      const allMatchers = [agentName, ...otherMatchers].sort();
+      return allMatchers[0] === agentName;
+    }
+  }
+  // 5. Round-robin fallback for unaddressed group messages
+  const eligible = aliveAgentNames.filter(n => n !== msg.from).sort();
+  if (eligible.length === 0) return false;
+  const selectedIndex = _lastSpeakerIndex % eligible.length;
+  const selected = eligible[selectedIndex] === agentName;
+  if (selected) _lastSpeakerIndex++;
+  return selected;
+}
+// Message priority classification: critical > normal > low
+// Critical: task assignments, human messages, workflow handoffs, system events
+// Normal: regular agent-to-agent chat
+// Low: status updates, acknowledgements
+function classifyPriority(msg) {
+  if (msg.priority) return msg.priority; // explicit priority wins
+  if (msg.from === '__user__') return 'critical';
+  if (msg.system || msg.from === '__system__') {
+    // System events about workflow/task are critical, others are normal
+    if (msg.content && (msg.content.includes('[Workflow') || msg.content.includes('[TASK') || msg.content.includes('[APPROVAL'))) return 'critical';
+    return 'normal';
+  }
+  if (msg.content) {
+    const c = msg.content;
+    if (c.includes('[Workflow') || c.includes('[HANDOFF]') || c.includes('[PLAN')) return 'critical';
+    if (c.startsWith('[STATUS]') || c.startsWith('[ACK]') || c.startsWith('[PROGRESS]')) return 'low';
+  }
+  if (msg.type === 'handoff') return 'critical';
+  return 'normal';
+}
 // Build the response for listen_group — kept lean to reduce context accumulation
 // Context/history removed: agents should call get_history() when they need it
 function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
@@ -2648,12 +2910,16 @@ function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
   const wasAddressed = batch.some(m => m.addressed_to && m.addressed_to.includes(agentName));
   sendLimit = wasAddressed ? 2 : 1;
-  // Sort batch by priority: system > threaded replies > direct > broadcast
+  // Sort batch by priority: critical(0) > normal(1) > low(2), then by type
+  const PRIORITY_ORDER = { critical: 0, normal: 1, low: 2 };
   function messagePriority(m) {
-    if (m.system || m.from === '__system__') return 0;
-    if (m.reply_to || m.thread_id) return 1;
-    if (!m.broadcast) return 2;
-    return 3;
+    const prio = PRIORITY_ORDER[classifyPriority(m)] || 1;
+    // Sub-sort within same priority: system > threaded > direct > broadcast
+    let subPrio = 3;
+    if (m.system || m.from === '__system__') subPrio = 0;
+    else if (m.reply_to || m.thread_id) subPrio = 1;
+    else if (!m.broadcast) subPrio = 2;
+    return prio * 10 + subPrio;
   }
   batch.sort((a, b) => {
     const pa = messagePriority(a), pb = messagePriority(b);
@@ -2697,6 +2963,7 @@ function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
       return {
         id: m.id, from: m.from, to: m.to, content: m.content,
         timestamp: m.timestamp,
+        priority: classifyPriority(m),
         age_seconds: ageSec,
         ...(ageSec > 30 && { delayed: true }),
         ...(m.reply_to && { reply_to: m.reply_to }),
@@ -2704,7 +2971,7 @@ function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
         ...(m.addressed_to && { addressed_to: m.addressed_to }),
         ...(m.to === '__group__' && {
           addressed_to_you: !m.addressed_to || m.addressed_to.includes(agentName),
-          should_respond: !m.addressed_to || m.addressed_to.includes(agentName),
+          should_respond: selectSpeaker(m, agentName, agentNames),
         }),
       };
     }),
@@ -2744,6 +3011,16 @@ function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
   result.next_action = isAutonomousMode()
     ? 'Process these messages, then call get_work() to continue the proactive work loop. Do NOT call listen_group() — use get_work() instead.'
     : 'After processing these messages and sending your response, call listen_group() again immediately. Never stop listening.';
+  result.coordinator_mode = getConfig().coordinator_mode || 'responsive';
+  // Task nudge: remind agent of their outstanding tasks
+  try {
+    const myTasks = getTasks().filter(t => t.assignee === agentName && (t.status === 'pending' || t.status === 'in_progress'));
+    if (myTasks.length > 0) {
+      result.task_reminder = { pending: myTasks.filter(t => t.status === 'pending').length, in_progress: myTasks.filter(t => t.status === 'in_progress').length, tasks: myTasks.map(t => ({ id: t.id, title: t.title, status: t.status })) };
+    }
+  } catch (e) { log.debug('task reminder in listen_group failed:', e.message); }
   return result;
 }
@@ -3086,7 +3363,7 @@ function toolUpdateTask(taskId, status, notes = null) {
     } else if (status === 'blocked') {
       saveWorkspace(registeredName, Object.assign(getWorkspace(registeredName), { _status: `BLOCKED on: ${task.title}`, _status_since: new Date().toISOString() }));
     }
-  } catch {}
+  } catch (e) { log.warn("verify_and_advance failed:", e.message); }
   // Task-channel auto-join: when claiming a task that has a channel, auto-join it
   if (status === 'in_progress' && task.channel) {
@@ -3100,6 +3377,7 @@ function toolUpdateTask(taskId, status, notes = null) {
   // Event hooks: task completion
   if (status === 'done') {
     fireEvent('task_complete', { title: task.title, created_by: task.created_by });
+    appendNotification('task_done', registeredName, `Task "${task.title}" completed by ${registeredName}`, task.id);
     // Check if this resolves any dependencies
     const deps = getDeps();
     for (const dep of deps) {
@@ -3128,6 +3406,50 @@ function toolUpdateTask(taskId, status, notes = null) {
     if (aliveOthers.length > 0) {
       broadcastSystemMessage(`[REVIEW NEEDED] ${registeredName} completed task "${task.title}". Team: please review the work and call submit_review() if applicable.`, registeredName);
     }
+    // Auto-sync: advance matching workflow step when task is done
+    try {
+      const workflows = getWorkflows();
+      let wfChanged = false;
+      for (const wf of workflows) {
+        if (wf.status !== 'active') continue;
+        for (const step of wf.steps) {
+          if (step.status !== 'in_progress') continue;
+          if (step.assignee !== registeredName) continue;
+          // Match by assignee — the agent who completed the task also has an in_progress step
+          step.status = 'done';
+          step.completed_at = new Date().toISOString();
+          step.notes = `Auto-completed via task "${task.title}"`;
+          saveWorkflowCheckpoint(wf, step);
+          // Start next ready steps
+          const nextSteps = findReadySteps(wf);
+          for (const ns of nextSteps) {
+            if (ns.requires_approval) {
+              ns.status = 'awaiting_approval';
+              ns.approval_requested_at = new Date().toISOString();
+              sendSystemMessage('__user__', `[APPROVAL NEEDED] Workflow "${wf.name}" — Step ${ns.id}: "${ns.description}". Approve or reject from the dashboard.`);
+            } else {
+              ns.status = 'in_progress';
+              ns.started_at = new Date().toISOString();
+              if (ns.assignee && ns.assignee !== registeredName) {
+                const handoffContent = `[Workflow "${wf.name}"] Step ${ns.id} assigned to you: ${ns.description}`;
+                messageSeq++;
+                const hMsg = { id: generateId(), seq: messageSeq, from: registeredName, to: ns.assignee, content: handoffContent, timestamp: new Date().toISOString(), type: 'handoff' };
+                fs.appendFileSync(getMessagesFile(currentBranch), JSON.stringify(hMsg) + '\n');
+                fs.appendFileSync(getHistoryFile(currentBranch), JSON.stringify(hMsg) + '\n');
+              }
+            }
+          }
+          if (wf.steps.every(s => s.status === 'done')) wf.status = 'completed';
+          wf.updated_at = new Date().toISOString();
+          wfChanged = true;
+          broadcastSystemMessage(`[WORKFLOW] Step "${step.description}" auto-advanced via task completion by ${registeredName}`);
+          break; // one step per task completion
+        }
+        if (wfChanged) break;
+      }
+      if (wfChanged) saveWorkflows(workflows);
+    } catch (e) { log.warn('auto-advance workflow on task done failed:', e.message); }
   }
   return { success: true, task_id: task.id, status: task.status, title: task.title };
@@ -3203,7 +3525,7 @@ function toolSearchMessages(query, from = null, limit = 20) {
         allMessages = allMessages.concat(chMsgs);
       }
     }
-  } catch {}
+  } catch (e) { log.warn("get_work search failed:", e.message); }
   // Sort by timestamp descending for newest-first results
   allMessages.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp));
@@ -3233,7 +3555,7 @@ function toolSearchMessages(query, from = null, limit = 20) {
           allMessages = allMessages.concat(readJsonl(chFile));
         }
       }
-    } catch {}
+    } catch (e) { log.debug("get_work detail failed:", e.message); }
     allMessages.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp));
     for (let i = 0; i < allMessages.length && results.length < limit; i++) {
       const m = allMessages[i];
@@ -3411,7 +3733,8 @@ function toolCreateWorkflow(name, steps, autonomous = false, parallel = false) {
       description: step.description.substring(0, 200),
       assignee: step.assignee || null,
       depends_on: Array.isArray(step.depends_on) ? step.depends_on : [],
-      status: 'pending', // all start pending; we'll activate ready ones below
+      requires_approval: !!step.requires_approval,
+      status: 'pending',
       started_at: null,
       completed_at: null,
       notes: '',
@@ -3500,11 +3823,37 @@ function toolAdvanceWorkflow(workflowId, notes) {
   currentStep.completed_at = new Date().toISOString();
   if (notes) currentStep.notes = notes.substring(0, 500);
+  // Save checkpoint
+  saveWorkflowCheckpoint(wf, currentStep);
+  // Auto-sync: mark matching in_progress tasks as done
+  try {
+    const tasks = getTasks();
+    const matchingTask = tasks.find(t =>
+      t.status === 'in_progress' && t.assignee === registeredName
+    );
+    if (matchingTask) {
+      matchingTask.status = 'done';
+      matchingTask.updated_at = new Date().toISOString();
+      matchingTask.notes.push({ by: '__system__', text: `Auto-completed via workflow step "${currentStep.description}"`, at: new Date().toISOString() });
+      saveTasks(tasks);
+    }
+  } catch (e) { log.warn('auto-complete task on workflow advance failed:', e.message); }
   // Find all ready steps (supports parallel via depends_on)
   const nextSteps = findReadySteps(wf);
   if (nextSteps.length > 0) {
     const agents = getAgents();
     for (const step of nextSteps) {
+      // Check if step requires human approval before starting
+      if (step.requires_approval) {
+        step.status = 'awaiting_approval';
+        step.approval_requested_at = new Date().toISOString();
+        sendSystemMessage('__user__',
+          `[APPROVAL NEEDED] Workflow "${wf.name}" — Step ${step.id}: "${step.description}". Approve or reject from the dashboard.`
+        );
+        continue;
+      }
       step.status = 'in_progress';
       step.started_at = new Date().toISOString();
       if (step.assignee && agents[step.assignee] && step.assignee !== registeredName && canSendTo(registeredName, step.assignee)) {
@@ -3524,6 +3873,7 @@ function toolAdvanceWorkflow(workflowId, notes) {
   const doneCount = wf.steps.filter(s => s.status === 'done').length;
   const pct = Math.round((doneCount / wf.steps.length) * 100);
+  appendNotification('workflow_advanced', registeredName, `Workflow "${wf.name}" step ${currentStep.id} done (${pct}%)`, wf.id);
   return {
     success: true,
@@ -3535,14 +3885,32 @@ function toolAdvanceWorkflow(workflowId, notes) {
   };
 }
-function toolWorkflowStatus(workflowId) {
+function toolWorkflowStatus(workflowId, action, checkpointIndex) {
   const workflows = getWorkflows();
+  // Rollback action
+  if (action === 'rollback' && workflowId && checkpointIndex !== undefined) {
+    const wf = workflows.find(w => w.id === workflowId);
+    if (!wf) return { error: `Workflow not found: ${workflowId}` };
+    if (!wf.checkpoints || !wf.checkpoints[checkpointIndex]) return { error: 'Checkpoint not found' };
+    const checkpoint = wf.checkpoints[checkpointIndex];
+    for (const savedStep of checkpoint.step_states) {
+      const step = wf.steps.find(s => s.id === savedStep.id);
+      if (step) { step.status = savedStep.status; step.assignee = savedStep.assignee; }
+    }
+    wf.updated_at = new Date().toISOString();
+    saveWorkflows(workflows);
+    broadcastSystemMessage(`[WORKFLOW] Rolled back "${wf.name}" to checkpoint: step "${checkpoint.step_description}"`);
+    return { success: true, rolled_back_to: checkpoint };
+  }
   if (workflowId) {
     const wf = workflows.find(w => w.id === workflowId);
     if (!wf) return { error: `Workflow not found: ${workflowId}` };
     const doneCount = wf.steps.filter(s => s.status === 'done').length;
     const pct = Math.round((doneCount / wf.steps.length) * 100);
     const result = { workflow: wf, progress: `${doneCount}/${wf.steps.length} (${pct}%)` };
+    if (wf.checkpoints) result.checkpoints = wf.checkpoints.length;
     if (wf.status === 'completed') result.report = generateCompletionReport(wf);
     return result;
   }
@@ -3550,7 +3918,7 @@ function toolWorkflowStatus(workflowId) {
     count: workflows.length,
     workflows: workflows.map(w => {
       const doneCount = w.steps.filter(s => s.status === 'done').length;
-      return { id: w.id, name: w.name, status: w.status, steps: w.steps.length, done: doneCount, progress: Math.round((doneCount / w.steps.length) * 100) + '%' };
+      return { id: w.id, name: w.name, status: w.status, steps: w.steps.length, done: doneCount, progress: Math.round((doneCount / w.steps.length) * 100) + '%', checkpoints: w.checkpoints ? w.checkpoints.length : 0 };
     }),
   };
 }
@@ -3880,7 +4248,8 @@ async function toolVerifyAndAdvance(params) {
     // AUTO-ADVANCE
     currentStep.status = 'done';
     currentStep.completed_at = new Date().toISOString();
-    clearCheckpoint(registeredName, workflow_id, currentStep.id); // Item 8: clear checkpoint on completion
+    saveWorkflowCheckpoint(wf, currentStep);
+    clearCheckpoint(registeredName, workflow_id, currentStep.id);
     return advanceToNextSteps(false);
   }
@@ -3888,6 +4257,7 @@ async function toolVerifyAndAdvance(params) {
     // ADVANCE BUT FLAG
     currentStep.status = 'done';
     currentStep.completed_at = new Date().toISOString();
+    saveWorkflowCheckpoint(wf, currentStep);
     currentStep.flagged = true;
     currentStep.flag_reason = `Low confidence (${confidence}%). May need review later.`;
     clearCheckpoint(registeredName, workflow_id, currentStep.id); // Item 8: clear checkpoint
@@ -4057,6 +4427,67 @@ function reassignWorkFrom(deadAgentName) {
   return reassignCount;
 }
+// Auto-reassign workflow steps from dead agents after timeout
+function checkStuckWorkflowSteps() {
+  if (!registeredName) return;
+  const workflows = getWorkflows();
+  const agents = getAgents();
+  const timeoutMs = (parseInt(process.env.NEOHIVE_STEP_TIMEOUT_MINUTES) || 5) * 60000;
+  let changed = false;
+  for (const wf of workflows) {
+    if (wf.status !== 'active') continue;
+    if (wf.paused) continue;
+    for (const step of wf.steps) {
+      if (step.status !== 'in_progress') continue;
+      if (!step.assignee) continue;
+      if (!step.started_at) continue;
+      const elapsed = Date.now() - new Date(step.started_at).getTime();
+      if (elapsed < timeoutMs) continue;
+      const agentInfo = agents[step.assignee];
+      if (agentInfo && isPidAlive(agentInfo.pid, agentInfo.last_activity)) continue;
+      log.warn(`Workflow step ${step.id} reassigned: ${step.assignee} offline for ${Math.round(elapsed / 60000)}min`);
+      const deadAgent = step.assignee;
+      step.status = 'pending';
+      step.assignee = null;
+      step.reassigned_from = deadAgent;
+      step.reassigned_at = new Date().toISOString();
+      changed = true;
+      broadcastSystemMessage(
+        `[WORKFLOW] Step "${step.description}" reassigned — ${deadAgent} went offline. Next available agent will pick it up via get_work().`
+      );
+    }
+  }
+  if (changed) saveWorkflows(workflows);
+}
+// Stale task detection: warn about tasks in_progress for >30 minutes without update
+const _staleTaskWarned = new Set();
+function checkStaleTasks() {
+  try {
+    const tasks = getTasks();
+    const staleThresholdMs = 30 * 60 * 1000; // 30 minutes
+    const now = Date.now();
+    for (const task of tasks) {
+      if (task.status !== 'in_progress') continue;
+      if (!task.updated_at) continue;
+      const elapsed = now - new Date(task.updated_at).getTime();
+      if (elapsed < staleThresholdMs) continue;
+      if (_staleTaskWarned.has(task.id)) continue;
+      _staleTaskWarned.add(task.id);
+      const mins = Math.round(elapsed / 60000);
+      broadcastSystemMessage(`[WARNING] Stale task: "${task.title}" assigned to ${task.assignee || 'unassigned'} — in_progress for ${mins}min without update. Agent should call update_task("${task.id}", "done") or report a blocker.`);
+      log.warn(`Stale task detected: ${task.id} "${task.title}" (${mins}min)`);
+    }
+  } catch (e) { log.debug('stale task check failed:', e.message); }
+}
 function watchdogCheck() {
   // Run in autonomous mode always, AND in group mode when agents are idle 5+ min
   if (!isAutonomousMode() && !isGroupMode()) return;
@@ -4160,7 +4591,7 @@ function watchdogCheck() {
         sendSystemMessage(worker, `[REBALANCE] You've been moved from ${quietTeam.name} to ${busyTeam.name} — they have ${busyTeam.pendingTasks} pending tasks and need help.`);
       }
     }
-  } catch {}
+  } catch (e) { log.warn("escalate blocked tasks failed:", e.message); }
   // UE5 safety: detect stale UE5 locks (ue5-editor, ue5-compile)
   try {
@@ -4185,7 +4616,7 @@ function watchdogCheck() {
       }
     }
     if (locksChanged) writeJsonFile(LOCKS_FILE, locks);
-  } catch {}
+  } catch (e) { log.warn("stale lock cleanup failed:", e.message); }
   if (agentsChanged) saveAgents(agents);
   if (workflowsChanged) saveWorkflows(workflows);
@@ -4407,7 +4838,7 @@ function generateCompletionReport(workflow) {
         totalRetries += relevant.length;
         for (const r of relevant) retryDetails.push({ agent: name, task: r.task, attempt: r.attempt });
       }
-    } catch {}
+    } catch (e) { log.debug("auto-plan retry scan failed:", e.message); }
   }
   const report = {
@@ -4585,7 +5016,7 @@ function autoAssignRoles() {
         }
       }
       saveChannelsData(channels);
-    } catch {}
+    } catch (e) { log.warn("stale channel cleanup failed:", e.message); }
   }
   return assignments;
@@ -4941,7 +5372,7 @@ function toolForkConversation(fromMessageId, branchName) {
         saveAgents(agents);
       }
     } finally { unlockAgentsFile(); }
-  } catch {}
+  } catch (e) { log.warn("auto role rebalance failed:", e.message); }
   return { success: true, branch: branchName, forked_from: branches[branchName].forked_from, messages_copied: forkedHistory.length };
 }
@@ -4965,7 +5396,7 @@ function toolSwitchBranch(branchName) {
         saveAgents(agents);
       }
     } finally { unlockAgentsFile(); }
-  } catch {}
+  } catch (e) { log.warn("quality lead failover failed:", e.message); }
   return { success: true, branch: branchName, message: `Switched to branch "${branchName}". Read offset reset.` };
 }
@@ -5029,6 +5460,71 @@ function getReviews() { return cachedRead('reviews', () => readJsonFile(REVIEWS_
 function getDeps() { return cachedRead('deps', () => readJsonFile(DEPS_FILE) || [], 2000); }
 function getRules() { return cachedRead('rules', () => readJsonFile(RULES_FILE) || [], 2000); }
+// --- Notification system ---
+const MAX_NOTIFICATIONS = 500;
+function getNotifications() {
+  return readJsonFile(NOTIFICATIONS_FILE) || [];
+}
+function saveNotifications(notifs) {
+  // Prune to max cap
+  if (notifs.length > MAX_NOTIFICATIONS) {
+    notifs = notifs.slice(notifs.length - MAX_NOTIFICATIONS);
+  }
+  writeJsonFile(NOTIFICATIONS_FILE, notifs);
+}
+function appendNotification(type, sourceAgent, summary, relatedId) {
+  const notifs = getNotifications();
+  notifs.push({
+    id: 'notif_' + Date.now().toString(36) + Math.random().toString(36).slice(2, 6),
+    type: type,
+    source_agent: sourceAgent || registeredName || '__system__',
+    related_id: relatedId || null,
+    summary: summary,
+    timestamp: new Date().toISOString(),
+    read_by: [],
+  });
+  saveNotifications(notifs);
+}
+function toolGetNotifications(since, type) {
+  if (!registeredName) return { error: 'You must call register() first' };
+  let notifs = getNotifications();
+  // Filter unread for this agent
+  notifs = notifs.filter(n => !n.read_by.includes(registeredName));
+  if (since) {
+    const sinceTs = new Date(since).getTime();
+    notifs = notifs.filter(n => new Date(n.timestamp).getTime() > sinceTs);
+  }
+  if (type) {
+    notifs = notifs.filter(n => n.type === type);
+  }
+  // Mark as read
+  if (notifs.length > 0) {
+    const allNotifs = getNotifications();
+    const readIds = new Set(notifs.map(n => n.id));
+    for (const n of allNotifs) {
+      if (readIds.has(n.id) && !n.read_by.includes(registeredName)) {
+        n.read_by.push(registeredName);
+      }
+    }
+    saveNotifications(allNotifs);
+  }
+  return {
+    count: notifs.length,
+    notifications: notifs.map(n => ({
+      id: n.id,
+      type: n.type,
+      source_agent: n.source_agent,
+      related_id: n.related_id,
+      summary: n.summary,
+      timestamp: n.timestamp,
+    })),
+  };
+}
 // --- Channel helpers ---
 const CHANNELS_FILE_PATH = path.join(DATA_DIR, 'channels.json');
@@ -5166,7 +5662,7 @@ function escalateBlockedTasks() {
       }
     }
     if (changed) saveTasks(tasks);
-  } catch {}
+  } catch (e) { log.warn("watchdog check failed:", e.message); }
 }
 // Stand-up meetings: periodic team check-ins triggered by heartbeat
@@ -5207,7 +5703,27 @@ function triggerStandupIfDue() {
     summary += ' Each agent: report what you did, what\'s blocked, what\'s next. Then call listen_group().';
     broadcastSystemMessage(summary, registeredName);
-  } catch {}
+  } catch (e) { log.warn("standup trigger failed:", e.message); }
+}
+// --- Agent status change detection (heartbeat-driven) ---
+const _prevAgentAlive = {};
+function detectAgentStatusChanges(agents) {
+  for (const [name, info] of Object.entries(agents)) {
+    if (name === registeredName) continue;
+    const alive = isPidAlive(info.pid, info.last_activity);
+    const wasAlive = _prevAgentAlive[name];
+    if (wasAlive !== undefined && wasAlive !== alive) {
+      if (!alive) {
+        broadcastSystemMessage(`[STATUS] ${name} is unreachable`, name);
+        appendNotification('agent_offline', name, `${name} went offline`, null);
+      } else {
+        broadcastSystemMessage(`[STATUS] ${name} is back online`, null);
+        appendNotification('agent_online', name, `${name} came back online`, null);
+      }
+    }
+    _prevAgentAlive[name] = alive;
+  }
 }
 // Auto-recovery: snapshot dead agent state before cleanup
@@ -5249,7 +5765,7 @@ function snapshotDeadAgents(agents) {
           kb_entries_written: kbKeysWritten,
         });
       }
-    } catch {}
+    } catch (e) { log.warn("dead agent snapshot failed:", e.message); }
     // Quality Lead instant failover: if dead agent was Quality Lead, promote replacement immediately
     try {
@@ -5301,7 +5817,7 @@ function snapshotDeadAgents(agents) {
           broadcastSystemMessage(`[MONITOR FAILOVER] ${name} (Monitor) went offline. ${newMonitor} has been auto-promoted.`, newMonitor);
         }
       }
-    } catch {}
+    } catch (e) { log.warn("monitor failover failed:", e.message); }
   }
 }
@@ -6118,6 +6634,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
               type: 'string',
               description: 'AI provider/CLI name (e.g. "Claude", "OpenAI", "Gemini"). Shown in dashboard.',
             },
+            skills: {
+              type: 'array',
+              items: { type: 'string' },
+              description: 'Skills like "python", "testing", "frontend", "design". Used for smart task routing.',
+            },
           },
           required: ['name'],
         },
@@ -6152,6 +6673,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
               type: 'string',
               description: 'Channel to send to (optional — omit for #general). Use join_channel() first to create channels.',
             },
+            priority: {
+              type: 'string',
+              enum: ['critical', 'normal', 'low'],
+              description: 'Message priority (optional — auto-classified if omitted). Critical messages are delivered first and retained longer.',
+            },
           },
           required: ['content'],
         },
@@ -6226,6 +6752,40 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
           },
         },
       },
+      {
+        name: 'consume_messages',
+        description: 'Non-blocking check that returns ALL unconsumed messages with full content AND marks them as consumed. Unlike check_messages (peek-only) or listen (blocking), this is a one-shot "grab everything and mark it read" call. Ideal for agents that need to process a batch of messages without blocking.',
+        inputSchema: {
+          type: 'object',
+          properties: {
+            from: {
+              type: 'string',
+              description: 'Only consume messages from this specific agent (optional)',
+            },
+            limit: {
+              type: 'number',
+              description: 'Max number of messages to consume (default: all)',
+            },
+          },
+        },
+      },
+      {
+        name: 'get_notifications',
+        description: 'Get unread notifications (task completions, workflow advances, agent status changes). Returns and marks as read. Non-blocking — use this instead of listen() when you need a quick status update without waiting.',
+        inputSchema: {
+          type: 'object',
+          properties: {
+            since: {
+              type: 'string',
+              description: 'Only return notifications after this ISO timestamp (optional)',
+            },
+            type: {
+              type: 'string',
+              description: 'Filter by type: task_done, workflow_advanced, agent_online, agent_offline, approval_needed (optional)',
+            },
+          },
+        },
+      },
       {
         name: 'ack_message',
         description: 'Acknowledge that you have processed a message. Lets the sender verify delivery via get_history.',
@@ -6454,11 +7014,13 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
       },
       {
         name: 'workflow_status',
-        description: 'Get status of a specific workflow or all workflows. Shows step progress and completion percentage.',
+        description: 'Get status of a specific workflow or all workflows. Shows step progress, checkpoints, and completion percentage. Use action="rollback" to rollback to a checkpoint.',
         inputSchema: {
           type: 'object',
           properties: {
             workflow_id: { type: 'string', description: 'Workflow ID (optional — omit for all workflows)' },
+            action: { type: 'string', enum: ['status', 'rollback'], description: 'Action (default: status)' },
+            checkpoint_index: { type: 'number', description: 'Checkpoint index to rollback to (for rollback action)' },
           },
         },
       },
@@ -6804,13 +7366,13 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
     switch (name) {
       case 'register':
-        result = toolRegister(args.name, args?.provider);
+        result = toolRegister(args.name, args?.provider, args?.skills);
         break;
       case 'list_agents':
         result = toolListAgents();
         break;
       case 'send_message':
-        result = await toolSendMessage(args.content, args?.to, args?.reply_to, args?.channel);
+        result = await toolSendMessage(args.content, args?.to, args?.reply_to, args?.channel, args?.priority);
         break;
       case 'wait_for_reply':
         result = await toolWaitForReply(args?.timeout_seconds, args?.from);
@@ -6827,6 +7389,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       case 'check_messages':
         result = toolCheckMessages(args?.from);
         break;
+      case 'consume_messages':
+        result = toolConsumeMessages(args?.from, args?.limit);
+        break;
+      case 'get_notifications':
+        result = toolGetNotifications(args?.since, args?.type);
+        break;
       case 'ack_message':
         result = toolAckMessage(args.message_id);
         break;
@@ -6876,7 +7444,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
         result = toolAdvanceWorkflow(args.workflow_id, args?.notes);
         break;
       case 'workflow_status':
-        result = toolWorkflowStatus(args?.workflow_id);
+        result = toolWorkflowStatus(args?.workflow_id, args?.action, args?.checkpoint_index);
         break;
       case 'fork_conversation':
         result = toolForkConversation(args?.from_message_id, args.branch_name);
@@ -7028,7 +7596,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
     // Global hook: on non-listen tools, check for pending messages and nudge with escalating urgency
     // Enhanced nudge: includes sender names, addressed count, and message preview
-    const listenTools = ['listen', 'listen_group', 'listen_codex', 'wait_for_reply', 'check_messages'];
+    const listenTools = ['listen', 'listen_group', 'listen_codex', 'wait_for_reply', 'check_messages', 'consume_messages'];
     if (registeredName && !listenTools.includes(name) && (isGroupMode() || isManagedMode())) {
       try {
         const pending = getUnconsumedMessages(registeredName);
@@ -7064,7 +7632,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
             result._nudge = `${pending.length} messages waiting${addressedHint}: ${senderSummary}. Latest: "${preview}...". Call listen_group().`;
           }
         }
-      } catch {}
+      } catch (e) { log.debug("nudge detection failed:", e.message); }
     }
     // Global hook: reputation tracking
@@ -7094,6 +7662,35 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
       try { autoCompress(); } catch (e) { log.debug('auto-compress failed:', e.message); }
     }
+    // Coordinator mode hint: inject into every tool response for lead/manager/coordinator agents
+    if (registeredName && typeof result === 'object' && result !== null) {
+      try {
+        const prof = getProfiles()[registeredName];
+        const role = prof && prof.role ? prof.role.toLowerCase() : '';
+        if (role === 'lead' || role === 'manager' || role === 'coordinator') {
+          const coordMode = getConfig().coordinator_mode || 'responsive';
+          result.coordinator_mode = coordMode;
+          result.coordinator_hint = coordMode === 'responsive'
+            ? 'MODE: Stay with me — do NOT call listen(). Use consume_messages/workflow_status between human interactions.'
+            : 'MODE: Run autonomously — use listen() to wait for agent results.';
+        }
+      } catch (e) { log.debug('coordinator mode hint failed:', e.message); }
+    }
+    // Unread message hint: check if agent has pending messages on every tool call
+    // This ensures agents see messages even when they forget to call listen()
+    if (registeredName && typeof result === 'object' && result !== null && !listenTools.includes(name)) {
+      try {
+        const unread = getUnconsumedMessages(registeredName);
+        if (unread.length > 0) {
+          const latest = unread[unread.length - 1];
+          result.unread_messages = unread.length;
+          result.unread_preview = `${latest.from}: "${latest.content.substring(0, 100).replace(/\n/g, ' ')}"`;
+          result.unread_action = `You have ${unread.length} unread message(s). Call listen() to receive them.`;
+        }
+      } catch (e) { log.debug('unread message hint failed:', e.message); }
+    }
     return {
       content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
     };
@@ -7161,14 +7758,141 @@ async function main() {
     console.error('Fix: Run "npx neohive doctor" to diagnose the issue.');
     process.exit(1);
   }
-  try {
-    const transport = new StdioServerTransport();
-    await server.connect(transport);
-    console.error('Neohive MCP server v6.0.0 running (66 tools)');
-  } catch (e) {
-    console.error('ERROR: MCP server failed to start: ' + e.message);
-    console.error('Fix: Run "npx neohive doctor" to check your setup.');
-    process.exit(1);
+  // HTTP persistent server mode: --http flag or NEOHIVE_TRANSPORT=http
+  const useHttp = process.argv.includes('--http') || process.env.NEOHIVE_TRANSPORT === 'http';
+  if (useHttp) {
+    try {
+      const http = require('http');
+      const { randomUUID } = require('crypto');
+      const { StreamableHTTPServerTransport } = require('@modelcontextprotocol/sdk/server/streamableHttp.js');
+      const { isInitializeRequest } = require('@modelcontextprotocol/sdk/types.js');
+      const PORT = parseInt(process.env.NEOHIVE_SERVER_PORT || '4321', 10);
+      const sessions = {};
+      const httpServer = http.createServer(async (req, res) => {
+        // CORS headers for local dev
+        res.setHeader('Access-Control-Allow-Origin', '*');
+        res.setHeader('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
+        res.setHeader('Access-Control-Allow-Headers', 'Content-Type, mcp-session-id');
+        res.setHeader('Access-Control-Expose-Headers', 'mcp-session-id');
+        if (req.method === 'OPTIONS') {
+          res.writeHead(204);
+          res.end();
+          return;
+        }
+        // Health check endpoint
+        if (req.url === '/health') {
+          res.writeHead(200, { 'Content-Type': 'application/json' });
+          res.end(JSON.stringify({ status: 'ok', sessions: Object.keys(sessions).length }));
+          return;
+        }
+        if (req.url === '/mcp') {
+          if (req.method === 'POST') {
+            // Parse JSON body
+            let body = '';
+            for await (const chunk of req) body += chunk;
+            let parsed;
+            try { parsed = JSON.parse(body); } catch {
+              res.writeHead(400, { 'Content-Type': 'application/json' });
+              res.end(JSON.stringify({ jsonrpc: '2.0', error: { code: -32700, message: 'Parse error' }, id: null }));
+              return;
+            }
+            const sessionId = req.headers['mcp-session-id'];
+            if (sessionId && sessions[sessionId]) {
+              // Existing session — route to its transport
+              await sessions[sessionId].transport.handleRequest(req, res, parsed);
+            } else if (!sessionId && isInitializeRequest(parsed)) {
+              // New session initialization
+              const transport = new StreamableHTTPServerTransport({
+                sessionIdGenerator: () => randomUUID(),
+                onsessioninitialized: (sid) => {
+                  sessions[sid] = { transport, createdAt: Date.now() };
+                  console.error(`[HTTP] Session created: ${sid}`);
+                },
+              });
+              transport.onclose = () => {
+                const sid = transport.sessionId;
+                if (sid && sessions[sid]) {
+                  delete sessions[sid];
+                  console.error(`[HTTP] Session closed: ${sid}`);
+                }
+              };
+              await server.connect(transport);
+              await transport.handleRequest(req, res, parsed);
+            } else {
+              res.writeHead(400, { 'Content-Type': 'application/json' });
+              res.end(JSON.stringify({ jsonrpc: '2.0', error: { code: -32000, message: 'Bad Request: No valid session ID' }, id: null }));
+            }
+          } else if (req.method === 'GET') {
+            // SSE stream for server-initiated notifications
+            const sessionId = req.headers['mcp-session-id'];
+            if (sessionId && sessions[sessionId]) {
+              await sessions[sessionId].transport.handleRequest(req, res);
+            } else {
+              res.writeHead(400, { 'Content-Type': 'application/json' });
+              res.end(JSON.stringify({ error: 'Missing or invalid session ID' }));
+            }
+          } else if (req.method === 'DELETE') {
+            // Session termination
+            const sessionId = req.headers['mcp-session-id'];
+            if (sessionId && sessions[sessionId]) {
+              await sessions[sessionId].transport.close();
+              delete sessions[sessionId];
+              res.writeHead(200, { 'Content-Type': 'application/json' });
+              res.end(JSON.stringify({ success: true }));
+            } else {
+              res.writeHead(404, { 'Content-Type': 'application/json' });
+              res.end(JSON.stringify({ error: 'Session not found' }));
+            }
+          } else {
+            res.writeHead(405, { Allow: 'GET, POST, DELETE' });
+            res.end('Method Not Allowed');
+          }
+        } else {
+          res.writeHead(404);
+          res.end('Not Found');
+        }
+      });
+      httpServer.listen(PORT, () => {
+        console.error(`Neohive MCP server v6.0.0 running in HTTP mode on port ${PORT}`);
+        console.error(`Endpoint: http://localhost:${PORT}/mcp`);
+        console.error(`Health: http://localhost:${PORT}/health`);
+      });
+      // Graceful shutdown
+      process.on('SIGINT', () => {
+        console.error('\n[HTTP] Shutting down...');
+        for (const sid of Object.keys(sessions)) {
+          try { sessions[sid].transport.close(); } catch {}
+        }
+        httpServer.close(() => process.exit(0));
+      });
+    } catch (e) {
+      console.error('ERROR: HTTP server failed to start: ' + e.message);
+      console.error('Fix: Ensure @modelcontextprotocol/sdk is up to date.');
+      process.exit(1);
+    }
+  } else {
+    // Default: stdio transport (one agent per process)
+    try {
+      const transport = new StdioServerTransport();
+      await server.connect(transport);
+    } catch (e) {
+      console.error('ERROR: MCP server failed to start: ' + e.message);
+      console.error('Fix: Run "npx neohive doctor" to check your setup.');
+      process.exit(1);
+    }
   }
 }