neohive 6.3.0 → 6.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -82,7 +82,7 @@ const SERVER_CONFIG = {
82
82
  // Polling / Heartbeat intervals (ms)
83
83
  HEARTBEAT_INTERVAL_MS: 15000, // how often agents write heartbeat files
84
84
  POLL_INTERVAL_MS: 2000, // message polling cycle
85
- AUTONOMOUS_LISTEN_MS: 30000, // max listen timeout in autonomous mode
85
+ AUTONOMOUS_LISTEN_MS: 90000, // max listen timeout in autonomous mode
86
86
  CODEX_LISTEN_MS: 90000, // max listen timeout for Codex agents
87
87
 
88
88
  // Agent health thresholds (ms)
@@ -124,6 +124,7 @@ let currentBranch = 'main'; // which branch this agent is on
124
124
  let lastSentAt = 0; // timestamp of last sent message (for group cooldown)
125
125
  let sendsSinceLastListen = 0; // enforced: must listen between sends in group mode
126
126
  let consecutiveNonListenCalls = 0; // escalating listen() enforcement counter
127
+ let pendingUserReply = false; // true when __user__ message received but not yet replied to
127
128
  let _isCurrentlyListening = false; // true when agent is in a listen() call
128
129
  let sendLimit = 1; // default: 1 send per listen cycle (2 if addressed)
129
130
  let unaddressedSends = 0; // response budget: unaddressed sends counter
@@ -585,9 +586,27 @@ function buildMessageResponse(msg, consumedIds) {
585
586
  } catch (e) { log.debug('task reminder in listen failed:', e.message); }
586
587
 
587
588
  const isSystemMsg = msg.from === '__system__' || msg.system === true;
588
- const nextAction = isSystemMsg
589
- ? 'Process this message, then call listen().'
590
- : `Do what this message asks. When finished, send_message(to="${msg.from}") with what you did and files changed, then call listen().`;
589
+ if (msg.from === '__user__') pendingUserReply = true;
590
+
591
+ // Generate a specific next_action for review requests so reviewers know to read the file
592
+ let nextAction;
593
+ if (isSystemMsg && msg.content) {
594
+ const reviewMatch = msg.content.match(/submit_review\("(rev_[a-z0-9]+)"/);
595
+ const fileMatch = msg.content.match(/read(?:ing)?(?: the)? (?:file )?"([^"]+)"/i) ||
596
+ msg.content.match(/review of "([^"]+)"/i);
597
+ if (reviewMatch) {
598
+ const reviewId = reviewMatch[1];
599
+ const filePath = fileMatch ? fileMatch[1] : null;
600
+ nextAction = filePath
601
+ ? `REVIEW REQUIRED: Read "${filePath}" first, then call submit_review("${reviewId}", "approved"/"changes_requested", "<your findings — min 50 chars>"). Do NOT submit without reading the file.`
602
+ : `REVIEW REQUIRED: Read the relevant files for this review, then call submit_review("${reviewId}", "approved"/"changes_requested", "<your findings — min 50 chars>"). Feedback is required.`;
603
+ }
604
+ }
605
+ if (!nextAction) {
606
+ nextAction = isSystemMsg
607
+ ? 'Process this message, then call listen().'
608
+ : `Do what this message asks. When finished, send_message(to="${msg.from}") with what you did and files changed, then call listen().`;
609
+ }
591
610
 
592
611
  return {
593
612
  success: true,
@@ -1164,7 +1183,7 @@ function buildGuide(level = 'standard') {
1164
1183
  } else {
1165
1184
  rules.push('ROLE: Managed agent. The manager controls your turn.');
1166
1185
  rules.push('LOOP: listen() → receive work → update_task(id, "in_progress") → do work → update_task(id, "done") → send_message(manager, summary) → listen(). Never stop.');
1167
- rules.push('Never call get_work() or check_messages() in managed mode.');
1186
+ rules.push('Never call get_work() or messages() in managed mode.');
1168
1187
  }
1169
1188
  rules.push('Keep messages short (2-3 paragraphs). Report what you did and what files changed.');
1170
1189
  }
@@ -1238,18 +1257,20 @@ function buildGuide(level = 'standard') {
1238
1257
  }
1239
1258
  }
1240
1259
 
1241
- if (isLeadRole && aliveCount >= 2) {
1260
+ if (isLeadRole) {
1242
1261
  const coordinatorMode = getConfig().coordinator_mode || 'responsive';
1243
1262
  if (coordinatorMode === 'responsive') {
1244
- rules.push('COORDINATOR: Use consume_messages() to check updates non-blockingly. Do NOT block in listen() — stay responsive to the user.');
1263
+ rules.push('COORDINATOR: Use messages(action="check") to check updates non-blockingly. Do NOT block in listen() — stay responsive to the user.');
1245
1264
  } else {
1246
1265
  rules.push('COORDINATOR: Use listen() to wait for agent results. Only return to human when all tasks are done or blocked.');
1247
1266
  }
1248
1267
  rules.push('Coordinators do NOT edit files or write code. Delegate ALL code work to other agents.');
1249
1268
  }
1250
1269
 
1251
- const listenCmd = isManagedMode() ? 'listen()' : (mode === 'group' ? 'listen_group()' : 'listen()');
1252
- rules.push(`After EVERY action, call ${listenCmd}. Never use sleep() or poll with check_messages().`);
1270
+ const listenCmd = isManagedMode() ? 'listen()' : (mode === 'group' ? 'listen(mode="group")' : 'listen()');
1271
+ if (!isLeadRole) {
1272
+ rules.push(`After EVERY action, call ${listenCmd}. Never use sleep() or poll with messages().`);
1273
+ }
1253
1274
 
1254
1275
  if (level === 'minimal') {
1255
1276
  rules.push('Lock files before editing (lock_file/unlock_file).');
@@ -1543,6 +1564,15 @@ function toolRegister(name, provider = null, skills = null) {
1543
1564
  nextAction = 'Call get_briefing() to load project context';
1544
1565
  }
1545
1566
 
1567
+ // Lead/coordinator gets role-specific next_action regardless of agent count
1568
+ const myRoleStr = (guide.your_role || '').toLowerCase();
1569
+ if (myRoleStr === 'lead' || myRoleStr === 'manager' || myRoleStr === 'coordinator') {
1570
+ const coordinatorMode = getConfig().coordinator_mode || 'responsive';
1571
+ nextAction = coordinatorMode === 'autonomous'
1572
+ ? 'Call get_briefing() to load project context, then listen() to coordinate your team.'
1573
+ : 'Call get_briefing() to load project context, then messages(action="check") to check for pending work.';
1574
+ }
1575
+
1546
1576
  // --- Build the result: next_action FIRST, then context ---
1547
1577
  const result = {
1548
1578
  success: true,
@@ -1665,7 +1695,7 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
1665
1695
  const effectiveSendLimit = isAutonomousMode() ? 5 : sendLimit;
1666
1696
  const myRole = (getProfiles()[registeredName] || {}).role;
1667
1697
  if (isGroupMode() && sendsSinceLastListen >= effectiveSendLimit && myRole !== 'Coordinator') {
1668
- return { error: `You must call listen_group() before sending again. You've sent ${sendsSinceLastListen} message(s) without listening (limit: ${effectiveSendLimit}). This prevents message storms.` };
1698
+ return { error: `You must call listen() before sending again. You've sent ${sendsSinceLastListen} message(s) without listening (limit: ${effectiveSendLimit}). This prevents message storms.` };
1669
1699
  }
1670
1700
 
1671
1701
  // Response budget: track unaddressed sends, hint when depleted
@@ -1925,6 +1955,9 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
1925
1955
  sendsSinceLastListen++;
1926
1956
  if (isGroupMode() && !msg.addressed_to) { unaddressedSends++; }
1927
1957
 
1958
+ // Clear pending user reply flag when agent successfully replies to __user__
1959
+ if (to === '__user__') pendingUserReply = false;
1960
+
1928
1961
  const result = { success: true, messageId: msg.id, from: msg.from, to: msg.to };
1929
1962
 
1930
1963
  // Decision overlap hint: warn if message content overlaps with existing decisions
@@ -1960,7 +1993,7 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
1960
1993
  if (!recipientAlive) {
1961
1994
  result.warning = `Agent "${to}" appears offline (PID not running). Message queued but may not be received until they reconnect.`;
1962
1995
  } else if (to !== '__user__' && agents[to] && !agents[to].listening_since) {
1963
- result.note = `Agent "${to}" is currently working (not in listen mode). Message queued — they'll see it when they finish their current task and call listen_group().`;
1996
+ result.note = `Agent "${to}" is currently working (not in listen mode). Message queued — they'll see it when they finish their current task and call listen().`;
1964
1997
  }
1965
1998
 
1966
1999
  // Coordinator enforcement: warn if sending work assignment without creating a task first
@@ -2003,7 +2036,7 @@ function toolBroadcast(content) {
2003
2036
  const effectiveSendLimitBcast = isAutonomousMode() ? 5 : sendLimit;
2004
2037
  const myRole = (getProfiles()[registeredName] || {}).role;
2005
2038
  if (isGroupMode() && sendsSinceLastListen >= effectiveSendLimitBcast && myRole !== 'Coordinator') {
2006
- return { error: `You must call listen_group() before broadcasting again. You've sent ${sendsSinceLastListen} message(s) without listening (limit: ${effectiveSendLimitBcast}).` };
2039
+ return { error: `You must call listen() before broadcasting again. You've sent ${sendsSinceLastListen} message(s) without listening (limit: ${effectiveSendLimitBcast}).` };
2007
2040
  }
2008
2041
 
2009
2042
  const rateErr = checkRateLimit(content, '__broadcast__');
@@ -2134,104 +2167,11 @@ async function toolWaitForReply(timeoutSeconds = 300, from = null) {
2134
2167
  };
2135
2168
  }
2136
2169
 
2137
- function toolCheckMessages(from = null) {
2138
- if (!registeredName) {
2139
- return { error: 'You must call register() first' };
2140
- }
2141
-
2142
- const unconsumed = getUnconsumedMessages(registeredName, from);
2143
-
2144
- // Rich summary: senders, addressed count, urgency — same as enhanced nudge
2145
- const senders = {};
2146
- let addressedCount = 0;
2147
- for (const m of unconsumed) {
2148
- senders[m.from] = (senders[m.from] || 0) + 1;
2149
- if (m.addressed_to && m.addressed_to.includes(registeredName)) addressedCount++;
2150
- }
2151
-
2152
- // Include pending notification count
2153
- const allNotifs = getNotifications();
2154
- const unreadNotifs = allNotifs.filter(n => !n.read_by.includes(registeredName));
2155
-
2156
- const result = {
2157
- count: unconsumed.length,
2158
- pending_notifications: unreadNotifs.length,
2159
- // Scale fix: return previews not full content — agent gets full content via listen_group()
2160
- messages: unconsumed.map(m => ({
2161
- id: m.id,
2162
- from: m.from,
2163
- preview: m.content.substring(0, 120),
2164
- timestamp: m.timestamp,
2165
- ...(m.addressed_to && { addressed_to: m.addressed_to }),
2166
- })),
2167
- };
2168
-
2169
- if (unconsumed.length > 0) {
2170
- result.senders = senders;
2171
- result.addressed_to_you = addressedCount;
2172
- const latest = unconsumed[unconsumed.length - 1];
2173
- result.preview = `${latest.from}: "${latest.content.substring(0, 80).replace(/\n/g, ' ')}..."`;
2174
- const oldestAge = Math.round((Date.now() - new Date(unconsumed[0].timestamp).getTime()) / 1000);
2175
- result.urgency = oldestAge > 120 ? 'critical' : oldestAge > 30 ? 'urgent' : 'normal';
2176
- result.action_required = 'You have unread messages. Call listen() to receive and process them. Do NOT call check_messages() again — it does not consume messages and you will see the same messages repeatedly.';
2177
- }
2178
-
2179
- return result;
2180
- }
2170
+ // toolCheckMessages and toolConsumeMessages removed — dead code.
2171
+ // Routing goes through: case 'messages' → messaging.handlers['check_messages' | 'consume_messages']
2172
+ // Source of truth: agent-bridge/tools/messaging.js
2181
2173
 
2182
- function toolConsumeMessages(from = null, limit = null) {
2183
- if (!registeredName) {
2184
- return { error: 'You must call register() first' };
2185
- }
2186
-
2187
- let unconsumed = getUnconsumedMessages(registeredName, from);
2188
- if (limit && limit > 0 && unconsumed.length > limit) {
2189
- unconsumed = unconsumed.slice(0, limit);
2190
- }
2191
-
2192
- if (unconsumed.length === 0) {
2193
- return { success: true, count: 0, messages: [] };
2194
- }
2195
-
2196
- // Mark all as consumed
2197
- const consumed = getConsumedIds(registeredName);
2198
- for (const msg of unconsumed) {
2199
- consumed.add(msg.id);
2200
- markAsRead(registeredName, msg.id);
2201
- }
2202
- saveConsumedIds(registeredName, consumed);
2203
-
2204
- // Update read offset
2205
- const msgFile = getMessagesFile(currentBranch);
2206
- if (fs.existsSync(msgFile)) {
2207
- lastReadOffset = fs.statSync(msgFile).size;
2208
- }
2209
-
2210
- touchActivity();
2211
-
2212
- // Count remaining unconsumed after this batch
2213
- const remaining = getUnconsumedMessages(registeredName, null);
2214
-
2215
- const agents = getAgents();
2216
- const agentsOnline = Object.entries(agents).filter(([, info]) => isPidAlive(info.pid, info.last_activity)).length;
2217
-
2218
- return {
2219
- success: true,
2220
- count: unconsumed.length,
2221
- messages: unconsumed.map(m => ({
2222
- id: m.id,
2223
- from: m.from,
2224
- content: m.content,
2225
- timestamp: m.timestamp,
2226
- ...(m.reply_to && { reply_to: m.reply_to }),
2227
- ...(m.thread_id && { thread_id: m.thread_id }),
2228
- ...(m.addressed_to && { addressed_to: m.addressed_to }),
2229
- })),
2230
- remaining: remaining.length,
2231
- agents_online: agentsOnline,
2232
- coordinator_mode: getConfig().coordinator_mode || 'responsive',
2233
- };
2234
- }
2174
+ // toolConsumeMessages removed dead code. See agent-bridge/tools/messaging.js
2235
2175
 
2236
2176
  function toolAckMessage(messageId) {
2237
2177
  if (!registeredName) {
@@ -2282,6 +2222,9 @@ async function toolListen(from = null, outcome = null, task_id = null, summary =
2282
2222
  if (newStatus) toolUpdateTask(task_id, newStatus, summary || '');
2283
2223
  }
2284
2224
 
2225
+ // Clear pending user reply flag — warning was shown, agent is now entering the listen loop
2226
+ pendingUserReply = false;
2227
+
2285
2228
  // Auto-detect group/managed mode and delegate to toolListenGroup
2286
2229
  // This prevents agents from calling the "wrong" listen function
2287
2230
  if (isGroupMode() || isManagedMode()) {
@@ -2385,12 +2328,15 @@ async function toolListen(from = null, outcome = null, task_id = null, summary =
2385
2328
 
2386
2329
  heartbeatTimer = setInterval(() => { touchHeartbeat(registeredName); }, 15000);
2387
2330
 
2331
+ const listenTimeoutMs = (getConfig().listen_poll_interval || 120) * 1000;
2388
2332
  timer = setTimeout(() => {
2389
2333
  touchActivity();
2390
2334
  autoCompact();
2391
2335
  if (checkMessages()) return;
2392
- setupWatcher();
2393
- }, 300000);
2336
+ // Return cleanly so Claude sees retry:true rather than a client-side MCP timeout
2337
+ setListening(false);
2338
+ done({ retry: true, next_action: 'No messages. Call listen() again immediately.' });
2339
+ }, listenTimeoutMs);
2394
2340
  }
2395
2341
 
2396
2342
  setupWatcher();
@@ -2409,10 +2355,10 @@ async function toolListenCodex(from = null, outcome = null, task_id = null, summ
2409
2355
  const taskList = getTasks();
2410
2356
  const task = taskList.find(t => t.id === task_id);
2411
2357
  if (!task) {
2412
- return { error: true, message: `Invalid task_id "${task_id}" — task does not exist. Check list_tasks() and call listen_codex() again with the correct task_id.` };
2358
+ return { error: true, message: `Invalid task_id "${task_id}" — task does not exist. Check list_tasks() and call listen(mode="codex") again with the correct task_id.` };
2413
2359
  }
2414
2360
  if (task.assignee && task.assignee !== registeredName) {
2415
- return { error: true, message: `Task "${task_id}" is assigned to ${task.assignee}, not to you (${registeredName}). You cannot update another agent's task via listen_codex().` };
2361
+ return { error: true, message: `Task "${task_id}" is assigned to ${task.assignee}, not to you (${registeredName}). You cannot update another agent's task via listen(mode="codex").` };
2416
2362
  }
2417
2363
  const statusMap = { completed: 'done', blocked: 'blocked', failed: 'blocked_permanent' };
2418
2364
  const newStatus = statusMap[outcome];
@@ -2540,9 +2486,9 @@ function toolSetConversationMode(mode) {
2540
2486
  }
2541
2487
 
2542
2488
  const messages = {
2543
- group: 'Group mode enabled. Use listen_group() to receive batched messages. All messages are shared with everyone.',
2489
+ group: 'Group mode enabled. Use listen(mode="group") to receive batched messages. All messages are shared with everyone.',
2544
2490
  direct: 'Direct mode enabled. Use listen() for point-to-point messaging.',
2545
- managed: 'Managed mode enabled. Call claim_manager() to become the manager, or wait for the manager to give you the floor via yield_floor(). Use listen() or listen_group() to receive messages.',
2491
+ managed: 'Managed mode enabled. Call claim_manager() to become the manager, or wait for the manager to give you the floor via yield_floor(). Use listen() to receive messages.',
2546
2492
  };
2547
2493
  return { success: true, mode, message: messages[mode] };
2548
2494
  }
@@ -2703,10 +2649,10 @@ async function toolListenGroup(outcome = null, task_id = null, summary = null) {
2703
2649
  const taskList = getTasks();
2704
2650
  const task = taskList.find(t => t.id === task_id);
2705
2651
  if (!task) {
2706
- return { error: true, message: `Invalid task_id "${task_id}" — task does not exist. Check list_tasks() and call listen_group() again with the correct task_id.` };
2652
+ return { error: true, message: `Invalid task_id "${task_id}" — task does not exist. Check list_tasks() and call listen() again with the correct task_id.` };
2707
2653
  }
2708
2654
  if (task.assignee && task.assignee !== registeredName) {
2709
- return { error: true, message: `Task "${task_id}" is assigned to ${task.assignee}, not to you (${registeredName}). You cannot update another agent's task via listen_group().` };
2655
+ return { error: true, message: `Task "${task_id}" is assigned to ${task.assignee}, not to you (${registeredName}). You cannot update another agent's task via listen().` };
2710
2656
  }
2711
2657
  const statusMap = { completed: 'done', blocked: 'blocked', failed: 'blocked_permanent' };
2712
2658
  const newStatus = statusMap[outcome];
@@ -2722,9 +2668,12 @@ async function toolListenGroup(outcome = null, task_id = null, summary = null) {
2722
2668
 
2723
2669
  const consumed = getConsumedIds(registeredName);
2724
2670
 
2725
- // Autonomous mode: cap listen at 30s — agents should use get_work() instead
2726
- const autonomousTimeout = isAutonomousMode() ? SERVER_CONFIG.AUTONOMOUS_LISTEN_MS : null;
2727
- const MAX_LISTEN_MS = 300000; // 5 minutes — MCP has no tool timeout, heartbeat keeps agent alive
2671
+ // Autonomous mode: cap listen at 90s — agents should use get_work() instead
2672
+ // Responsive mode (Stay with me) overrides autonomous timeout — always uses configured listen interval
2673
+ const coordinatorMode = getConfig().coordinator_mode || 'responsive';
2674
+ const autonomousTimeout = (coordinatorMode !== 'responsive' && isAutonomousMode()) ? SERVER_CONFIG.AUTONOMOUS_LISTEN_MS : null;
2675
+ const configuredListenMs = (getConfig().listen_poll_interval || 120) * 1000;
2676
+ const MAX_LISTEN_MS = configuredListenMs; // configurable via dashboard settings (default 2 min)
2728
2677
  const listenStart = Date.now();
2729
2678
 
2730
2679
  // Helper: collect unconsumed messages from all sources (general + channels)
@@ -2957,8 +2906,8 @@ function classifyPriority(msg) {
2957
2906
  return 'normal';
2958
2907
  }
2959
2908
 
2960
- // Build the response for listen_group — kept lean to reduce context accumulation
2961
- // Context/history removed: agents should call get_history() when they need it
2909
+ // Build the response for listen (group mode) — kept lean to reduce context accumulation
2910
+ // Context/history removed: agents should call messages(action="history") when they need it
2962
2911
  function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
2963
2912
  saveConsumedIds(agentName, consumed);
2964
2913
  touchActivity();
@@ -3065,6 +3014,10 @@ function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
3065
3014
  }
3066
3015
  }
3067
3016
 
3017
+ if (batch.some(m => m.from === '__user__')) {
3018
+ pendingUserReply = true;
3019
+ }
3020
+
3068
3021
  if (isAutonomousMode()) {
3069
3022
  result.next_action = 'Process these messages, then call get_work().';
3070
3023
  } else if (result.should_respond === false) {
@@ -3439,7 +3392,7 @@ function toolUpdateTask(taskId, status, notes = null) {
3439
3392
  task.status = 'in_review';
3440
3393
  task.updated_at = new Date().toISOString();
3441
3394
  saveTasks(tasks);
3442
- broadcastSystemMessage(`[REVIEW GATE] ${registeredName} tried to mark "${task.title}" done but no review exists. Auto-created review ${reviewId}. A reviewer must approve before this task can be completed.`, registeredName);
3395
+ broadcastSystemMessage(`[REVIEW GATE] ${registeredName} tried to mark "${task.title}" done but no review exists. Auto-created review ${reviewId}. To review: (1) read the relevant files for "${task.title}", (2) call submit_review("${reviewId}", "approved"/"changes_requested", "<your findings — min 50 chars>"). Feedback is required.`, registeredName);
3443
3396
  logViolation('review_gate_blocked', registeredName, `Task "${task.title}" (${task.id}) blocked — no approved review. Auto-created ${reviewId}.`);
3444
3397
  touchActivity();
3445
3398
  return {
@@ -4275,12 +4228,14 @@ async function toolGetWork(params = {}) {
4275
4228
  // 9. Truly idle — try role rebalancing before returning
4276
4229
  rebalanceRoles(); // Item 5: check if workload requires role changes
4277
4230
  touchActivity();
4231
+ const config = getConfig();
4232
+ const idleInterval = config.idle_poll_interval || 90;
4278
4233
  const idleResult = {
4279
4234
  type: 'idle',
4280
- next_action: isManagedMode() ? 'Call listen() to wait for work.' : 'Call get_work() again in 30 seconds.',
4235
+ next_action: isManagedMode() ? 'Call listen() to wait for work.' : `Call get_work() again in ${idleInterval} seconds.`,
4281
4236
  instruction: isManagedMode()
4282
4237
  ? 'No work available right now. Call listen() to wait for the manager to assign work or give you the floor.'
4283
- : 'No work available right now. Call get_work() again in 30 seconds.'
4238
+ : `No work available right now. Call get_work() again in ${idleInterval} seconds.`
4284
4239
  };
4285
4240
  // Item 4: warn demoted agents
4286
4241
  const agentRep = getReputation();
@@ -5510,7 +5465,7 @@ function toolStartPlan(params) {
5510
5465
  broadcastSystemMessage(
5511
5466
  `[PLAN LAUNCHED] "${name}" — ${steps.length} steps, autonomous mode, ${useParallel ? 'parallel' : 'sequential'}. ` +
5512
5467
  `${startedSteps.length} step(s) started. ` +
5513
- `All agents: call get_work() to enter the autonomous work loop. Do NOT call listen_group().`
5468
+ `All agents: call get_work() to enter the autonomous work loop. Do NOT call listen().`
5514
5469
  );
5515
5470
 
5516
5471
  touchActivity();
@@ -5819,6 +5774,59 @@ function toolListChannels() {
5819
5774
  return { channels: result, your_channels: getAgentChannels(registeredName) };
5820
5775
  }
5821
5776
 
5777
+ // --- Self-healing Watchdog: reclaim tasks from dead/stale agents ---
5778
+ // Specified in GEMINI.md: runs every 60s; scans in_progress tasks.
5779
+ function runSelfHealingWatchdog() {
5780
+ if (!registeredName) return;
5781
+ try {
5782
+ const tasks = getTasks();
5783
+ const agents = getAgents();
5784
+ let changed = false;
5785
+ const now = Date.now();
5786
+ const STALE_THRESHOLD_MS = 300000; // 5 minutes
5787
+
5788
+ for (const task of tasks) {
5789
+ if (task.status !== 'in_progress' || !task.assignee) continue;
5790
+
5791
+ const assignee = agents[task.assignee];
5792
+ let isStale = false;
5793
+
5794
+ if (!assignee) {
5795
+ isStale = true; // Assignee no longer in registry
5796
+ } else {
5797
+ const lastActivity = assignee.last_activity ? new Date(assignee.last_activity).getTime() : 0;
5798
+ const heartbeatStale = now - lastActivity > STALE_THRESHOLD_MS;
5799
+ const pidDead = !isPidAlive(assignee.pid, assignee.last_activity);
5800
+
5801
+ if (pidDead && heartbeatStale) {
5802
+ isStale = true;
5803
+ }
5804
+ }
5805
+
5806
+ if (isStale) {
5807
+ const retryCount = (task.retry_count || 0) + 1;
5808
+ task.retry_count = retryCount;
5809
+ task.updated_at = new Date().toISOString();
5810
+
5811
+ if (retryCount >= 3) {
5812
+ task.status = 'blocked_permanent';
5813
+ task.blocked_reason = `Agent "${task.assignee}" failed 3 times (PID dead + heartbeat stale >5min). Coordinator intervention required.`;
5814
+ broadcastSystemMessage(`⛔ [WATCHDOG: POISON PILL] Task "${task.title}" marked as blocked_permanent after 3 failed attempts by ${task.assignee}. Coordinator intervention required.`, registeredName);
5815
+ } else {
5816
+ const oldAssignee = task.assignee;
5817
+ task.status = 'pending';
5818
+ task.assignee = null;
5819
+ changed = true;
5820
+ broadcastSystemMessage(`↺ [WATCHDOG: RECLAIMED] Task "${task.title}" reclaimed from stale agent "${oldAssignee}" (retry ${retryCount}/3). Reset to pending.`, registeredName);
5821
+ }
5822
+ changed = true;
5823
+ }
5824
+ }
5825
+
5826
+ if (changed) saveTasks(tasks);
5827
+ } catch (e) { log.warn("Self-healing watchdog failed:", e.message); }
5828
+ }
5829
+
5822
5830
  // Auto-escalation: notify team about tasks blocked for >5 minutes
5823
5831
  // Uses task.escalated_at field for cross-process dedup (file-based, not in-memory)
5824
5832
  function escalateBlockedTasks() {
@@ -5878,7 +5886,7 @@ function triggerStandupIfDue() {
5878
5886
  if (inProgress.length > 0) summary += ` In progress: ${inProgress.map(t => `"${t.title}" (${t.assignee || '?'})`).join(', ')}.`;
5879
5887
  if (blocked.length > 0) summary += ` BLOCKED: ${blocked.map(t => `"${t.title}" (${t.assignee || '?'})`).join(', ')}.`;
5880
5888
  if (recentDone.length > 0) summary += ` Recently done: ${recentDone.length} task(s).`;
5881
- summary += ' Each agent: report what you did, what\'s blocked, what\'s next. Then call listen_group().';
5889
+ summary += ' Each agent: report what you did, what\'s blocked, what\'s next. Then call listen().';
5882
5890
 
5883
5891
  broadcastSystemMessage(summary, registeredName);
5884
5892
  } catch (e) { log.warn("standup trigger failed:", e.message); }
@@ -6172,7 +6180,7 @@ function toolGetGuide(level = 'standard') {
6172
6180
  const guide = buildGuide(level);
6173
6181
  guide.your_name = registeredName;
6174
6182
  if (level !== 'minimal') {
6175
- guide.workflow = '1. get_briefing → 2. list_tasks/suggest_task → 3. claim task → 4. lock_file → 5. work → 6. unlock_file → 7. update_task done → 8. listen_group';
6183
+ guide.workflow = '1. get_briefing → 2. list_tasks/suggest_task → 3. claim task → 4. lock_file → 5. work → 6. unlock_file → 7. update_task done → 8. listen()';
6176
6184
  }
6177
6185
  return guide;
6178
6186
  }
@@ -7183,7 +7191,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
7183
7191
  tools: [
7184
7192
  {
7185
7193
  name: 'register',
7186
- description: 'Register this agent\'s identity. Must be called first. Returns a collaboration guide with all tool categories, critical rules, and workflow patterns — READ IT CAREFULLY before doing anything else. Then call get_briefing() for project context, then listen_group() to join the conversation.',
7194
+ description: 'Register this agent\'s identity. Must be called first. Returns a collaboration guide with all tool categories, critical rules, and workflow patterns — READ IT CAREFULLY before doing anything else. Then call get_briefing() for project context, then listen() to join the conversation.',
7187
7195
  inputSchema: {
7188
7196
  type: 'object',
7189
7197
  properties: {
@@ -7281,7 +7289,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
7281
7289
  },
7282
7290
  {
7283
7291
  name: 'listen',
7284
- description: 'Listen for messages. Use mode="standard" (default, direct 1:1), mode="group" (group/managed conversation, batched), or mode="codex" (Codex CLI — returns after 90s). Auto-detects mode from conversation state when mode is omitted. Replaces listen_group and listen_codex (now deprecated aliases).',
7292
+ description: 'Listen for messages. Use mode="standard" (default, direct 1:1), mode="group" (group/managed conversation, batched), or mode="codex" (Codex CLI — returns after 90s). Auto-detects mode from conversation state when mode is omitted.',
7285
7293
  inputSchema: {
7286
7294
  type: 'object',
7287
7295
  properties: {
@@ -7586,7 +7594,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
7586
7594
  // Escalating listen() enforcement — block tools after too many non-listen calls
7587
7595
  // send_message is exempt so blocked agents can escalate to coordinator before calling listen()
7588
7596
  // messages is exempt (unified query tool — replaces check_messages/consume_messages)
7589
- const listenExemptTools = new Set(['register', 'get_briefing', 'get_guide', 'listen', 'wait_for_reply', 'update_profile', 'list_agents', 'add_rule', 'remove_rule', 'toggle_rule', 'list_rules', 'send_message', 'messages']);
7597
+ // lock_file and unlock_file are safety housekeeping, not comms exempt from the listen counter
7598
+ const listenExemptTools = new Set(['register', 'get_briefing', 'get_guide', 'listen', 'wait_for_reply', 'update_profile', 'list_agents', 'add_rule', 'remove_rule', 'toggle_rule', 'list_rules', 'send_message', 'messages', 'lock_file', 'unlock_file']);
7590
7599
  if (listenExemptTools.has(name)) {
7591
7600
  if (name === 'listen' || name === 'wait_for_reply') {
7592
7601
  consecutiveNonListenCalls = 0;
@@ -7608,7 +7617,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
7608
7617
 
7609
7618
  if (!isCoordinatorExempt) {
7610
7619
  consecutiveNonListenCalls++;
7611
- if (consecutiveNonListenCalls >= 5) {
7620
+ if (consecutiveNonListenCalls >= 15) {
7612
7621
  const coordinator = (() => {
7613
7622
  try {
7614
7623
  const profs = getProfiles();
@@ -7625,6 +7634,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
7625
7634
  `Do NOT skip step 1. Do NOT call any other tool. Start with send_message now.`
7626
7635
  }],
7627
7636
  isError: true,
7637
+ next_action: `Call send_message(to="${coordinator}", content="I was blocked after ${consecutiveNonListenCalls} calls without listen(). I need to call ${name}. Should I proceed?") then immediately call listen().`,
7628
7638
  };
7629
7639
  }
7630
7640
  }
@@ -7832,6 +7842,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
7832
7842
  if (last3.length >= 3 && last3.every(c => c.tool === name && c.argsHash === argsHash)) {
7833
7843
  result._stuck_hint = `You have called ${name} 3 times with the same error. Consider: broadcasting for help, trying a different approach, or calling suggest_task() to find other work.`;
7834
7844
  }
7845
+ result.next_action = 'Fix the error above, then call listen() to continue.';
7835
7846
  return {
7836
7847
  content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
7837
7848
  isError: true,
@@ -7880,18 +7891,30 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
7880
7891
 
7881
7892
  if (isResponsiveCoordinator) {
7882
7893
  // Responsive coordinators must NEVER be told to call listen().
7883
- // Replace any tool-set listen() directive with consume_messages() or nothing.
7884
- if (!result.next_action || /\blisten\(\)/i.test(result.next_action)) {
7885
- try {
7886
- const pending = getUnconsumedMessages(registeredName);
7887
- if (pending.length > 0) {
7888
- result.next_action = `${pending.length} agent update(s) waiting. Call consume_messages() to read them.`;
7889
- } else {
7890
- delete result.next_action;
7891
- }
7892
- } catch {
7893
- delete result.next_action;
7894
+ // Three cases:
7895
+ // 1. No next_action set by tool → inject consume_messages hint if pending, else nothing
7896
+ // 2. Bare listen() directive → replace entirely with coordinator hint
7897
+ // 3. Compound "Do X, then listen()." → strip the listen() tail, keep the lead instruction
7898
+ const na = result.next_action || '';
7899
+ const bareListenRe = /^call listen\(\)/i;
7900
+ const tailListenRe = /,?\s*then call listen\(\)[^.]*\./i;
7901
+ try {
7902
+ const pending = getUnconsumedMessages(registeredName);
7903
+ const pendingHint = pending.length > 0
7904
+ ? `${pending.length} agent update(s) waiting. Call messages(action="consume") to read them.`
7905
+ : null;
7906
+ if (!na || bareListenRe.test(na)) {
7907
+ // No guidance or bare listen() — replace with coordinator hint or nothing
7908
+ if (pendingHint) result.next_action = pendingHint;
7909
+ else delete result.next_action;
7910
+ } else if (tailListenRe.test(na)) {
7911
+ // Compound instruction ending in "then call listen()" — strip just the listen() tail
7912
+ const stripped = na.replace(tailListenRe, '.').replace(/\.\.$/, '.').trim();
7913
+ result.next_action = pendingHint ? `${stripped} Then: ${pendingHint}` : stripped;
7894
7914
  }
7915
+ // else: next_action has no listen() reference — preserve as-is
7916
+ } catch {
7917
+ if (bareListenRe.test(na)) delete result.next_action;
7895
7918
  }
7896
7919
  } else {
7897
7920
  if (!result.next_action) {
@@ -7911,8 +7934,13 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
7911
7934
  } catch {}
7912
7935
  }
7913
7936
 
7914
- if (consecutiveNonListenCalls >= 3) {
7915
- result.next_action = `WARNING: ${consecutiveNonListenCalls} calls without listen(). Tools BLOCKED at 5. Call listen() NOW.`;
7937
+ if (consecutiveNonListenCalls >= 10) {
7938
+ result.next_action = `WARNING: ${consecutiveNonListenCalls} calls without listen(). Tools BLOCKED at 15. Call listen() NOW.`;
7939
+ }
7940
+
7941
+ // Soft-enforce user reply: remind agent they have an unanswered user message
7942
+ if (pendingUserReply && result.next_action && name !== 'send_message') {
7943
+ result.next_action += " NOTE: You have an unanswered user message — call send_message(to='__user__') before your next listen().";
7916
7944
  }
7917
7945
  }
7918
7946
  }
@@ -8035,9 +8063,19 @@ function autoReclaimDeadSeat() {
8035
8063
  autoReclaimedName = true; // mark as auto-reclaimed so toolRegister() can override it
8036
8064
  registeredToken = agents[bestName].token || '';
8037
8065
  touchHeartbeat(bestName);
8038
- // Start 10s heartbeat interval so the agent stays alive past the first 30s window
8066
+ // Start 10s heartbeat interval; watchdog runs every 60s (6 ticks)
8039
8067
  if (heartbeatInterval) clearInterval(heartbeatInterval);
8040
- heartbeatInterval = setInterval(() => { touchHeartbeat(registeredName); }, 10000);
8068
+ let watchdogTick = 0;
8069
+ heartbeatInterval = setInterval(() => {
8070
+ touchHeartbeat(registeredName);
8071
+ watchdogTick++;
8072
+ if (watchdogTick >= 6) {
8073
+ watchdogTick = 0;
8074
+ runSelfHealingWatchdog();
8075
+ escalateBlockedTasks();
8076
+ triggerStandupIfDue();
8077
+ }
8078
+ }, 10000);
8041
8079
  heartbeatInterval.unref();
8042
8080
  console.error(`[neohive] Auto-reclaimed seat "${bestName}" (previous PID dead)`);
8043
8081
  } catch (e) {