neohive 6.0.2 → 6.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/server.js CHANGED
@@ -19,9 +19,12 @@ const _agents = require('./lib/agents');
19
19
  const _messaging = require('./lib/messaging');
20
20
  const _compact = require('./lib/compact');
21
21
 
22
- // --- Structured logging ---
23
- const LOG_LEVEL = (process.env.NEOHIVE_LOG_LEVEL || 'warn').toLowerCase();
22
+ const DATA_DIR = _config.DATA_DIR;
23
+
24
+ const _envLog = process.env.NEOHIVE_LOG_LEVEL;
25
+ const LOG_LEVEL = (_envLog != null && String(_envLog).trim() !== '' ? String(_envLog).trim() : 'warn').toLowerCase();
24
26
  const LOG_LEVELS = { error: 0, warn: 1, info: 2, debug: 3 };
27
+
25
28
  const log = {
26
29
  error: (...args) => { if (LOG_LEVELS[LOG_LEVEL] >= 0) process.stderr.write('[NEOHIVE:ERROR] ' + args.map(String).join(' ') + '\n'); },
27
30
  warn: (...args) => { if (LOG_LEVELS[LOG_LEVEL] >= 1) process.stderr.write('[NEOHIVE:WARN] ' + args.map(String).join(' ') + '\n'); },
@@ -29,8 +32,17 @@ const log = {
29
32
  debug: (...args) => { if (LOG_LEVELS[LOG_LEVEL] >= 3) process.stderr.write('[NEOHIVE:DEBUG] ' + args.map(String).join(' ') + '\n'); },
30
33
  };
31
34
 
32
- // Data dir lives in the project where Claude Code runs, not where the package is installed
33
- const DATA_DIR = process.env.NEOHIVE_DATA_DIR || path.join(process.cwd(), '.neohive');
35
+ const _rawNeohiveEnv = String(process.env.NEOHIVE_DATA_DIR || '');
36
+ if (_rawNeohiveEnv && /\$\{|\$\s*workspaceFolder/i.test(_rawNeohiveEnv)) {
37
+ log.warn('[neohive] NEOHIVE_DATA_DIR looks unexpanded (' + _rawNeohiveEnv.substring(0, 60) + '…). Node will not substitute ${workspaceFolder}. Use an absolute path (re-run npx neohive init --cursor) or set env in Cursor. Effective DATA_DIR=' + DATA_DIR);
38
+ }
39
+
40
+ // Auto-migrate from .agent-bridge/ to .neohive/ (v5 → v6 rename)
41
+ const _legacyDir = path.join(path.dirname(DATA_DIR), '.agent-bridge');
42
+ if (!fs.existsSync(DATA_DIR) && fs.existsSync(_legacyDir)) {
43
+ try { fs.renameSync(_legacyDir, DATA_DIR); } catch {}
44
+ }
45
+
34
46
  const MESSAGES_FILE = path.join(DATA_DIR, 'messages.jsonl');
35
47
  const HISTORY_FILE = path.join(DATA_DIR, 'history.jsonl');
36
48
  const AGENTS_FILE = path.join(DATA_DIR, 'agents.json');
@@ -46,11 +58,12 @@ const LOCKS_FILE = path.join(DATA_DIR, 'locks.json');
46
58
  const PROGRESS_FILE = path.join(DATA_DIR, 'progress.json');
47
59
  const VOTES_FILE = path.join(DATA_DIR, 'votes.json');
48
60
  const REVIEWS_FILE = path.join(DATA_DIR, 'reviews.json');
61
+ const NOTIFICATIONS_FILE = path.join(DATA_DIR, 'notifications.json');
49
62
  const DEPS_FILE = path.join(DATA_DIR, 'dependencies.json');
50
63
  const REPUTATION_FILE = path.join(DATA_DIR, 'reputation.json');
51
64
  const COMPRESSED_FILE = path.join(DATA_DIR, 'compressed.json');
52
65
  const RULES_FILE = path.join(DATA_DIR, 'rules.json');
53
- // Plugins removed in v3.4.3 — unnecessary attack surface, CLIs have their own extension systems
66
+ const AGENT_CARDS_FILE = path.join(DATA_DIR, 'agent-cards.json');
54
67
 
55
68
  // In-memory state for this process
56
69
  let registeredName = null;
@@ -94,7 +107,7 @@ function lockConfigFile() {
94
107
  while (Date.now() - start < maxWait) {
95
108
  try { fs.writeFileSync(CONFIG_LOCK, String(process.pid), { flag: 'wx' }); return true; }
96
109
  catch { /* lock exists, wait */ }
97
- const wait = Date.now(); while (Date.now() - wait < 50) {} // busy-wait 50ms
110
+ try { Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 50); } catch {} // non-blocking 50ms wait
98
111
  }
99
112
  try { fs.unlinkSync(CONFIG_LOCK); } catch {}
100
113
  try { fs.writeFileSync(CONFIG_LOCK, String(process.pid), { flag: 'wx' }); return true; } catch {}
@@ -242,7 +255,7 @@ function migrateIfNeeded() {
242
255
  if (fs.existsSync(DATA_VERSION_FILE)) {
243
256
  dataVersion = parseInt(fs.readFileSync(DATA_VERSION_FILE, 'utf8').trim()) || 0;
244
257
  }
245
- } catch {}
258
+ } catch (e) { log.debug("data version read failed:", e.message); }
246
259
  if (dataVersion >= CURRENT_DATA_VERSION) return;
247
260
 
248
261
  // Run migrations in order
@@ -254,7 +267,7 @@ function migrateIfNeeded() {
254
267
  try { fs.writeFileSync(DATA_VERSION_FILE, String(CURRENT_DATA_VERSION)); } catch {}
255
268
  }
256
269
 
257
- const RESERVED_NAMES = ['__system__', '__all__', '__open__', '__close__', 'system', 'dashboard', 'Dashboard'];
270
+ const RESERVED_NAMES = ['__system__', '__all__', '__open__', '__close__', '__user__', 'system', 'dashboard', 'Dashboard'];
258
271
 
259
272
  function sanitizeName(name) {
260
273
  if (typeof name !== 'string' || !/^[a-zA-Z0-9_-]{1,20}$/.test(name)) {
@@ -307,7 +320,7 @@ function trimConsumedIds(agentName, ids) {
307
320
  for (const id of ids) {
308
321
  if (!currentIds.has(id)) ids.delete(id);
309
322
  }
310
- } catch {}
323
+ } catch (e) { log.debug("consumed ID trim failed:", e.message); }
311
324
  }
312
325
 
313
326
  function readJsonl(file) {
@@ -368,7 +381,7 @@ function lockAgentsFile() {
368
381
  while (Date.now() - start < maxWait) {
369
382
  try { fs.writeFileSync(AGENTS_LOCK, String(process.pid), { flag: 'wx' }); return true; }
370
383
  catch { /* lock exists, wait with exponential backoff */ }
371
- const wait = Date.now(); while (Date.now() - wait < backoff) {}
384
+ try { Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, backoff); } catch {}
372
385
  backoff = Math.min(backoff * 2, 500);
373
386
  }
374
387
  // Force-break stale lock after timeout
@@ -386,7 +399,7 @@ function withFileLock(filePath, fn) {
386
399
  while (Date.now() - start < maxWait) {
387
400
  try { fs.writeFileSync(lockPath, String(process.pid), { flag: 'wx' }); break; }
388
401
  catch { /* lock exists, wait with exponential backoff */ }
389
- const wait = Date.now(); while (Date.now() - wait < backoff) {}
402
+ try { Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, backoff); } catch {}
390
403
  backoff = Math.min(backoff * 2, 500);
391
404
  if (Date.now() - start >= maxWait) {
392
405
  // Force-break stale lock — only if holding PID is dead
@@ -395,7 +408,7 @@ function withFileLock(filePath, fn) {
395
408
  if (lockPid && lockPid !== process.pid) {
396
409
  try { process.kill(lockPid, 0); /* PID alive — skip, don't corrupt */ return null; } catch { /* PID dead — safe to break */ }
397
410
  }
398
- } catch {}
411
+ } catch (e) { log.debug("lock PID check failed:", e.message); }
399
412
  try { fs.unlinkSync(lockPath); } catch {}
400
413
  try { fs.writeFileSync(lockPath, String(process.pid), { flag: 'wx' }); } catch { return fn(); }
401
414
  break;
@@ -419,20 +432,20 @@ function getAgents() {
419
432
  const hb = JSON.parse(fs.readFileSync(path.join(DATA_DIR, f), 'utf8'));
420
433
  if (hb.last_activity) agents[name].last_activity = hb.last_activity;
421
434
  if (hb.pid) agents[name].pid = hb.pid;
422
- } catch {}
435
+ } catch (e) { log.debug("heartbeat merge failed:", e.message); }
423
436
  }
424
437
  }
425
- } catch {}
438
+ } catch (e) { log.debug("heartbeat scan failed:", e.message); }
426
439
  return agents;
427
440
  }, 1500);
428
441
  }
429
442
 
430
443
  function saveAgents(agents) {
431
- // Safe write: serialize first, then write complete string
432
- // This minimizes the window where the file could be truncated
433
444
  const data = JSON.stringify(agents);
434
445
  if (data && data.length > 2) {
435
446
  fs.writeFileSync(AGENTS_FILE, data);
447
+ } else {
448
+ log.debug('[neohive/agents.json] skipped write (empty {}): ' + AGENTS_FILE);
436
449
  }
437
450
  invalidateCache('agents');
438
451
  }
@@ -447,7 +460,7 @@ function touchHeartbeat(name) {
447
460
  last_activity: new Date().toISOString(),
448
461
  pid: process.pid,
449
462
  }));
450
- } catch {}
463
+ } catch (e) { log.debug("heartbeat write failed:", e.message); }
451
464
  }
452
465
 
453
466
 
@@ -468,8 +481,8 @@ function isPidAlive(pid, lastActivity) {
468
481
  const cached = _pidAliveCache[cacheKey];
469
482
  if (cached && Date.now() - cached.ts < 5000) return cached.alive;
470
483
 
471
- // Faster stale detection in autonomous mode (30s vs 60s) for quicker dead agent recovery
472
- const STALE_THRESHOLD = isAutonomousMode() ? 30000 : 60000;
484
+ // 30s stale threshold 3x the 10s heartbeat interval, catches dead agents faster
485
+ const STALE_THRESHOLD = 30000;
473
486
  let alive = false;
474
487
 
475
488
  // PRIORITY 1: Trust heartbeat freshness over PID status
@@ -588,6 +601,15 @@ function buildMessageResponse(msg, consumedIds) {
588
601
  }
589
602
  } catch (e) { log.debug('total message estimate failed:', e.message); }
590
603
 
604
+ // Task nudge: remind agent of their outstanding tasks
605
+ let taskReminder;
606
+ try {
607
+ const myTasks = getTasks().filter(t => t.assignee === registeredName && (t.status === 'pending' || t.status === 'in_progress'));
608
+ if (myTasks.length > 0) {
609
+ taskReminder = { pending: myTasks.filter(t => t.status === 'pending').length, in_progress: myTasks.filter(t => t.status === 'in_progress').length, tasks: myTasks.map(t => ({ id: t.id, title: t.title, status: t.status })) };
610
+ }
611
+ } catch (e) { log.debug('task reminder in listen failed:', e.message); }
612
+
591
613
  return {
592
614
  success: true,
593
615
  message: {
@@ -595,11 +617,14 @@ function buildMessageResponse(msg, consumedIds) {
595
617
  from: msg.from,
596
618
  content: msg.content,
597
619
  timestamp: msg.timestamp,
620
+ priority: classifyPriority(msg),
598
621
  ...(msg.reply_to && { reply_to: msg.reply_to }),
599
622
  ...(msg.thread_id && { thread_id: msg.thread_id }),
600
623
  },
601
624
  pending_count: pendingCount,
602
625
  agents_online: agentsOnline,
626
+ coordinator_mode: getConfig().coordinator_mode || 'responsive',
627
+ ...(taskReminder && { task_reminder: taskReminder }),
603
628
  };
604
629
  }
605
630
 
@@ -616,9 +641,11 @@ function autoCompact() {
616
641
 
617
642
  const messages = lines.map(l => { try { return JSON.parse(l); } catch { return null; } }).filter(Boolean);
618
643
 
619
- // Collect consumed IDs — for __group__ messages, only check ALIVE agents
644
+ // Collect consumed IDs — for __group__ messages, check ALL registered agents (alive + dead)
645
+ // This prevents message loss when agents reconnect after a crash
620
646
  const agents = getAgents();
621
- const aliveAgentNames = Object.keys(agents).filter(n => isPidAlive(agents[n].pid, agents[n].last_activity));
647
+ const allAgentNames = Object.keys(agents);
648
+ const retentionMs = (parseInt(process.env.NEOHIVE_RETENTION_HOURS) || 24) * 3600000;
622
649
  const allConsumed = new Set();
623
650
  const perAgentConsumed = {};
624
651
  if (fs.existsSync(DATA_DIR)) {
@@ -629,18 +656,23 @@ function autoCompact() {
629
656
  const ids = JSON.parse(fs.readFileSync(path.join(DATA_DIR, f), 'utf8'));
630
657
  perAgentConsumed[agentName] = new Set(ids);
631
658
  ids.forEach(id => allConsumed.add(id));
632
- } catch {}
659
+ } catch (e) { log.debug("consumed ID read failed:", e.message); }
633
660
  }
634
661
  }
635
662
  }
636
663
 
637
664
  // Keep messages that are NOT fully consumed
638
- // For __group__ messages: consumed when ALL ALIVE agents have consumed it (dead agents don't block)
665
+ // For __group__ messages: consumed when ALL registered agents consumed OR message exceeds retention period
639
666
  // For direct messages: consumed when the recipient has consumed it
640
667
  const active = messages.filter(m => {
641
668
  if (m.to === '__group__') {
642
- // __group__: check if all alive agents (except sender) have consumed
643
- return !aliveAgentNames.every(n => n === m.from || (perAgentConsumed[n] && perAgentConsumed[n].has(m.id)));
669
+ // Time-based retention: critical messages get 2x retention
670
+ const msgTime = new Date(m.timestamp).getTime();
671
+ const msgPriority = classifyPriority(m);
672
+ const effectiveRetention = msgPriority === 'critical' ? retentionMs * 2 : retentionMs;
673
+ if (msgTime < Date.now() - effectiveRetention) return false;
674
+ // Check ALL registered agents (alive + dead) to prevent loss on reconnect
675
+ return !allAgentNames.every(n => n === m.from || (perAgentConsumed[n] && perAgentConsumed[n].has(m.id)));
644
676
  }
645
677
  // Direct: standard check
646
678
  if (!allConsumed.has(m.id)) return true;
@@ -657,9 +689,23 @@ function autoCompact() {
657
689
  }
658
690
 
659
691
  // Rewrite messages.jsonl atomically — write to temp file then rename
692
+ // Capture pre-compaction size to detect messages appended during compaction
693
+ const preCompactSize = Buffer.byteLength(content, 'utf8') + 1; // +1 for trailing newline trimmed earlier
660
694
  const newContent = active.map(m => JSON.stringify(m)).join('\n') + (active.length ? '\n' : '');
661
695
  const tmpFile = msgFile + '.tmp';
662
696
  fs.writeFileSync(tmpFile, newContent);
697
+ // Check for messages appended after our initial read
698
+ let lateMessages = '';
699
+ try {
700
+ const currentSize = fs.statSync(msgFile).size;
701
+ if (currentSize > preCompactSize) {
702
+ const fd = fs.openSync(msgFile, 'r');
703
+ const lateBuf = Buffer.alloc(currentSize - preCompactSize);
704
+ fs.readSync(fd, lateBuf, 0, lateBuf.length, preCompactSize);
705
+ fs.closeSync(fd);
706
+ lateMessages = lateBuf.toString('utf8');
707
+ }
708
+ } catch (e) { log.debug('late message check during compaction:', e.message); }
663
709
  try {
664
710
  fs.renameSync(tmpFile, msgFile);
665
711
  } catch {
@@ -668,7 +714,12 @@ function autoCompact() {
668
714
  try { fs.unlinkSync(tmpFile); } catch {}
669
715
  return;
670
716
  }
671
- lastReadOffset = Buffer.byteLength(newContent, 'utf8');
717
+ // Re-append any messages that arrived during compaction
718
+ if (lateMessages.trim()) {
719
+ fs.appendFileSync(msgFile, lateMessages);
720
+ log.info('Re-appended ' + lateMessages.trim().split('\n').length + ' messages that arrived during compaction');
721
+ }
722
+ lastReadOffset = fs.statSync(msgFile).size;
672
723
 
673
724
  // Trim consumed ID files — keep only IDs still in active messages
674
725
  const activeIds = new Set(active.map(m => m.id));
@@ -840,6 +891,21 @@ function saveWorkflows(workflows) {
840
891
  });
841
892
  }
842
893
 
894
+ // Save a checkpoint after a workflow step completes
895
+ function saveWorkflowCheckpoint(wf, step) {
896
+ if (!wf.checkpoints) wf.checkpoints = [];
897
+ wf.checkpoints.push({
898
+ step_id: step.id,
899
+ step_description: step.description,
900
+ completed_at: step.completed_at,
901
+ completed_by: step.assignee || registeredName,
902
+ output: step.verification || step.notes || null,
903
+ files_changed: step.files_changed || [],
904
+ step_states: wf.steps.map(s => ({ id: s.id, status: s.status, assignee: s.assignee || null })),
905
+ });
906
+ if (wf.checkpoints.length > 100) wf.checkpoints = wf.checkpoints.slice(-100);
907
+ }
908
+
843
909
  // --- Autonomous mode detection ---
844
910
  function isAutonomousMode() {
845
911
  const workflows = getWorkflows();
@@ -897,8 +963,11 @@ function findUnassignedTasks(skills) {
897
963
  const words = ((t.title || '') + ' ' + (t.description || '')).toLowerCase().split(/\W+/).filter(w => w.length > 3);
898
964
  words.forEach(w => historyKeywords.add(w));
899
965
  }
900
- // Add explicit skills
966
+ // Add explicit skills from function param AND agent card
901
967
  if (skills) skills.forEach(s => historyKeywords.add(s.toLowerCase()));
968
+ const cards = readJsonFile(AGENT_CARDS_FILE) || {};
969
+ const myCard = cards[registeredName];
970
+ if (myCard && myCard.skills) myCard.skills.forEach(s => historyKeywords.add(s));
902
971
 
903
972
  // Score each task by affinity (keyword overlap with agent's history + skills)
904
973
  // Scale fix: cache task keyword sets to avoid O(N*M) recomputation at 100 agents
@@ -1054,12 +1123,14 @@ let _guideCache = { key: null, result: null };
1054
1123
  function buildGuide(level = 'standard') {
1055
1124
  const agents = getAgents();
1056
1125
  const aliveCount = Object.values(agents).filter(a => isPidAlive(a.pid, a.last_activity)).length;
1057
- const mode = getConfig().conversation_mode || 'direct';
1126
+ const config = getConfig();
1127
+ const mode = config.conversation_mode || 'direct';
1128
+ const coordMode = config.coordinator_mode || 'responsive';
1058
1129
 
1059
1130
  // Cache check: reuse cached guide if nothing changed (saves rebuilding 20-50 rules)
1060
1131
  let rulesMtime = 0;
1061
1132
  try { rulesMtime = fs.existsSync(RULES_FILE) ? fs.statSync(RULES_FILE).mtimeMs : 0; } catch {}
1062
- const cacheKey = `${level}:${aliveCount}:${mode}:${registeredName}:${rulesMtime}`;
1133
+ const cacheKey = `${level}:${aliveCount}:${mode}:${coordMode}:${registeredName}:${rulesMtime}`;
1063
1134
  if (_guideCache.key === cacheKey && _guideCache.result) return _guideCache.result;
1064
1135
 
1065
1136
  const channels = getChannelsData();
@@ -1072,6 +1143,7 @@ function buildGuide(level = 'standard') {
1072
1143
  const isQualityLead = myRole === 'quality';
1073
1144
  const isMonitor = myRole === 'monitor';
1074
1145
  const isAdvisor = myRole === 'advisor';
1146
+ const isLeadRole = myRole === 'lead' || myRole === 'manager' || myRole === 'coordinator';
1075
1147
  let qualityLeadName = null;
1076
1148
  for (const [pName, prof] of Object.entries(profiles)) {
1077
1149
  if (prof.role && prof.role.toLowerCase() === 'quality' && pName !== registeredName) { qualityLeadName = pName; break; }
@@ -1143,7 +1215,7 @@ function buildGuide(level = 'standard') {
1143
1215
  try {
1144
1216
  const content = fs.readFileSync(guideFile, 'utf8').trim();
1145
1217
  if (content) projectRules = content.split(/\r?\n/).filter(l => l.trim() && !l.startsWith('#')).map(l => l.replace(/^[-*]\s*/, '').trim()).filter(Boolean);
1146
- } catch {}
1218
+ } catch (e) { log.debug("guide file read failed:", e.message); }
1147
1219
  }
1148
1220
 
1149
1221
  // Inject dashboard-managed rules into guide
@@ -1170,7 +1242,7 @@ function buildGuide(level = 'standard') {
1170
1242
  quality_lead: qualityLeadName || undefined,
1171
1243
  tool_categories: {
1172
1244
  'WORK LOOP': 'get_work, verify_and_advance, retry_with_improvement',
1173
- 'MESSAGING': 'send_message, broadcast, check_messages, get_history, handoff, share_file',
1245
+ 'MESSAGING': 'send_message, broadcast, check_messages, consume_messages, get_history, handoff, share_file',
1174
1246
  'COORDINATION': 'get_briefing, log_decision, get_decisions, kb_write, kb_read, kb_list',
1175
1247
  'TASKS': 'create_task, update_task, list_tasks, suggest_task',
1176
1248
  'QUALITY': 'request_review, submit_review',
@@ -1189,6 +1261,17 @@ function buildGuide(level = 'standard') {
1189
1261
  }
1190
1262
  }
1191
1263
 
1264
+ // Lead/Coordinator mode: responsive (stay with human) vs autonomous (run in listen loop)
1265
+ if (isLeadRole && aliveCount >= 2) {
1266
+ const coordinatorMode = getConfig().coordinator_mode || 'responsive';
1267
+ if (coordinatorMode === 'responsive') {
1268
+ rules.push('RESPONSIVE COORDINATOR PATTERN: Use consume_messages() at the start of each interaction to check for agent updates non-blockingly. Process all returned messages, assign work, then return to the human immediately. Do NOT block in listen() — you need to stay responsive to both agents and the user.');
1269
+ } else {
1270
+ rules.push('AUTONOMOUS COORDINATOR PATTERN: Use listen() to wait for agent results. Process responses, delegate follow-up work, and continue the listen loop. Only return to the human when all tasks are complete or when you hit a blocker that requires human input.');
1271
+ }
1272
+ rules.push('CRITICAL: You are a Coordinator. You MUST NOT edit files, write code, or use tools like Edit/Write/Bash for code changes. Your tools are: send_message, create_task, update_task, create_workflow, advance_workflow, workflow_status, list_tasks, consume_messages, broadcast, kb_write, kb_read, log_decision. Delegate ALL code work to other agents.');
1273
+ }
1274
+
1192
1275
  // Tier 0 — THE one rule (always included at every level)
1193
1276
  const listenCmd = isManagedMode() ? 'listen()' : (mode === 'group' ? 'listen_group()' : 'listen()');
1194
1277
  rules.push(`AFTER EVERY ACTION, call ${listenCmd}. This is how you receive messages. NEVER skip this. NEVER use sleep(). NEVER poll with check_messages(). ${listenCmd} is your ONLY way to receive messages.`);
@@ -1247,7 +1330,7 @@ function buildGuide(level = 'standard') {
1247
1330
  try {
1248
1331
  const content = fs.readFileSync(guideFile, 'utf8').trim();
1249
1332
  if (content) projectRules = content.split(/\r?\n/).filter(l => l.trim() && !l.startsWith('#')).map(l => l.replace(/^[-*]\s*/, '').trim()).filter(Boolean);
1250
- } catch {}
1333
+ } catch (e) { log.debug("guide file read failed:", e.message); }
1251
1334
  }
1252
1335
 
1253
1336
  // Inject dashboard-managed rules into guide
@@ -1266,7 +1349,7 @@ function buildGuide(level = 'standard') {
1266
1349
  ? '1. Call list_agents() to see who is online. 2. Send a message or call listen() to wait.'
1267
1350
  : '1. Call get_briefing() for project context. 2. Call listen_group() to join. 3. Respond and listen_group() again.',
1268
1351
  tool_categories: {
1269
- 'MESSAGING': 'send_message, broadcast, listen_group, listen, check_messages, get_history, get_summary, search_messages, handoff, share_file',
1352
+ 'MESSAGING': 'send_message, broadcast, listen_group, listen, check_messages, consume_messages, get_history, get_summary, search_messages, handoff, share_file',
1270
1353
  'COORDINATION': 'get_briefing, log_decision, get_decisions, kb_write, kb_read, kb_list, call_vote, cast_vote, vote_status',
1271
1354
  'TASKS': 'create_task, update_task, list_tasks, declare_dependency, check_dependencies, suggest_task',
1272
1355
  'QUALITY': 'update_progress, get_progress, request_review, submit_review, get_reputation',
@@ -1292,6 +1375,17 @@ function buildGuide(level = 'standard') {
1292
1375
  };
1293
1376
  }
1294
1377
 
1378
+ // Task reminder: show agent's pending/in_progress tasks so they remember to update them
1379
+ if (registeredName) {
1380
+ try {
1381
+ const myTasks = getTasks().filter(t => t.assignee === registeredName && (t.status === 'pending' || t.status === 'in_progress'));
1382
+ if (myTasks.length > 0) {
1383
+ result.your_tasks = myTasks.map(t => ({ id: t.id, title: t.title, status: t.status }));
1384
+ rules.push(`TASK STATUS: You have ${myTasks.length} task(s). Use update_task(task_id, "in_progress") when starting and update_task(task_id, "done") when complete. Your tasks: ${myTasks.map(t => t.id + ' "' + t.title.substring(0, 40) + '" (' + t.status + ')').join('; ')}`);
1385
+ }
1386
+ } catch (e) { log.debug('task reminder in guide failed:', e.message); }
1387
+ }
1388
+
1295
1389
  // Cache the result for subsequent calls with same params
1296
1390
  _guideCache = { key: cacheKey, result };
1297
1391
  return result;
@@ -1299,7 +1393,7 @@ function buildGuide(level = 'standard') {
1299
1393
 
1300
1394
  // --- Tool implementations ---
1301
1395
 
1302
- function toolRegister(name, provider = null) {
1396
+ function toolRegister(name, provider = null, skills = null) {
1303
1397
  ensureDataDir();
1304
1398
  migrateIfNeeded(); // run data migrations on first register
1305
1399
  sanitizeName(name);
@@ -1311,12 +1405,9 @@ function toolRegister(name, provider = null) {
1311
1405
  return { error: `Agent "${name}" is already registered by a live process. Choose a different name.` };
1312
1406
  }
1313
1407
 
1314
- // If name was previously registered by a dead process, verify token to prevent impersonation
1315
- if (agents[name] && agents[name].token && !isPidAlive(agents[name].pid, agents[name].last_activity)) {
1316
- // Dead agent only allow re-registration from the same process (same token)
1317
- if (registeredToken && registeredToken !== agents[name].token) {
1318
- return { error: `Agent "${name}" was previously registered by another process. Choose a different name.` };
1319
- }
1408
+ // Dead agent name reclaim allow any process to take a dead agent's name
1409
+ if (agents[name] && !isPidAlive(agents[name].pid, agents[name].last_activity)) {
1410
+ log.info(`Agent "${name}" reclaimed (previous PID ${agents[name].pid} is dead)`);
1320
1411
  }
1321
1412
 
1322
1413
  // Prevent re-registration under a different name from the same process
@@ -1327,74 +1418,99 @@ function toolRegister(name, provider = null) {
1327
1418
 
1328
1419
  const now = new Date().toISOString();
1329
1420
  const token = (agents[name] && agents[name].token) || generateToken();
1330
- agents[name] = { pid: process.pid, timestamp: now, last_activity: now, provider: provider || 'unknown', branch: currentBranch, token, started_at: now };
1421
+ agents[name] = { pid: process.pid, ppid: process.ppid, timestamp: now, last_activity: now, provider: provider || 'unknown', branch: currentBranch, token, started_at: now };
1331
1422
  saveAgents(agents);
1332
1423
  registeredName = name;
1333
- registeredToken = token;
1424
+ registeredToken = token;
1425
+
1426
+ // Auto-create profile if not exists
1427
+ const profiles = getProfiles();
1428
+ if (!profiles[name]) {
1429
+ profiles[name] = { display_name: name, avatar: '', bio: '', role: '', created_at: now };
1430
+ saveProfiles(profiles);
1431
+ }
1432
+
1433
+ // Save agent card with skills
1434
+ const cards = readJsonFile(AGENT_CARDS_FILE) || {};
1435
+ cards[name] = {
1436
+ name,
1437
+ provider: provider || 'unknown',
1438
+ skills: Array.isArray(skills) ? skills.map(s => String(s).toLowerCase().substring(0, 30)).slice(0, 20) : [],
1439
+ registered_at: now,
1440
+ };
1441
+ writeJsonFile(AGENT_CARDS_FILE, cards);
1334
1442
 
1335
- // Auto-create profile if not exists
1336
- const profiles = getProfiles();
1337
- if (!profiles[name]) {
1338
- profiles[name] = { display_name: name, avatar: '', bio: '', role: '', created_at: now };
1339
- saveProfiles(profiles);
1340
- }
1443
+ // Start heartbeat updates last_activity every 10s so dashboard knows we're alive
1444
+ // Deterministic jitter per agent to spread writes across the interval (prevents lock storms at 10 agents)
1445
+ const heartbeatJitter = name.split('').reduce((h, c) => h + c.charCodeAt(0), 0) % 2000;
1446
+ if (heartbeatInterval) clearInterval(heartbeatInterval);
1447
+ heartbeatInterval = setInterval(() => {
1448
+ try {
1449
+ // Scale fix: write per-agent heartbeat file instead of lock+read+write agents.json
1450
+ // Eliminates write contention — each agent writes only its own file, no locking needed
1451
+ touchHeartbeat(registeredName);
1452
+ const agents = getAgents(); // cached + merges heartbeat files automatically
1453
+ // Managed mode: detect dead manager and dead turn holder
1454
+ if (isManagedMode()) {
1455
+ const managed = getManagedConfig();
1456
+ let managedChanged = false;
1457
+
1458
+ // Dead manager detection
1459
+ if (managed.manager && managed.manager !== registeredName) {
1460
+ if (agents[managed.manager] && !isPidAlive(agents[managed.manager].pid, agents[managed.manager].last_activity)) {
1461
+ managed.manager = null;
1462
+ managed.floor = 'closed';
1463
+ managed.turn_current = null;
1464
+ managed.turn_queue = [];
1465
+ managedChanged = true;
1466
+ saveManagedConfig(managed);
1467
+ broadcastSystemMessage(`[SYSTEM] Manager disconnected. Call claim_manager() to take over as the new manager.`);
1468
+ }
1469
+ }
1341
1470
 
1342
- // Start heartbeat updates last_activity every 10s so dashboard knows we're alive
1343
- // Deterministic jitter per agent to spread writes across the interval (prevents lock storms at 10 agents)
1344
- const heartbeatJitter = name.split('').reduce((h, c) => h + c.charCodeAt(0), 0) % 2000;
1345
- if (heartbeatInterval) clearInterval(heartbeatInterval);
1346
- heartbeatInterval = setInterval(() => {
1347
- try {
1348
- // Scale fix: write per-agent heartbeat file instead of lock+read+write agents.json
1349
- // Eliminates write contention — each agent writes only its own file, no locking needed
1350
- touchHeartbeat(registeredName);
1351
- const agents = getAgents(); // cached + merges heartbeat files automatically
1352
- // Managed mode: detect dead manager and dead turn holder
1353
- if (isManagedMode()) {
1354
- const managed = getManagedConfig();
1355
- let managedChanged = false;
1356
-
1357
- // Dead manager detection
1358
- if (managed.manager && managed.manager !== registeredName) {
1359
- if (agents[managed.manager] && !isPidAlive(agents[managed.manager].pid, agents[managed.manager].last_activity)) {
1360
- managed.manager = null;
1361
- managed.floor = 'closed';
1362
- managed.turn_current = null;
1363
- managed.turn_queue = [];
1364
- managedChanged = true;
1365
- saveManagedConfig(managed);
1366
- broadcastSystemMessage(`[SYSTEM] Manager disconnected. Call claim_manager() to take over as the new manager.`);
1471
+ // Dead turn holder detection unstick the floor
1472
+ if (!managedChanged && managed.turn_current && managed.turn_current !== registeredName && managed.manager) {
1473
+ if (agents[managed.turn_current] && !isPidAlive(agents[managed.turn_current].pid, agents[managed.turn_current].last_activity)) {
1474
+ const deadAgent = managed.turn_current;
1475
+ managed.turn_current = null;
1476
+ managed.floor = 'closed';
1477
+ managed.turn_queue = [];
1478
+ saveManagedConfig(managed);
1479
+ if (managed.manager !== registeredName) {
1480
+ sendSystemMessage(managed.manager, `[FLOOR] ${deadAgent} disconnected while holding the floor. Floor returned to you.`);
1481
+ }
1482
+ }
1367
1483
  }
1368
1484
  }
1369
-
1370
- // Dead turn holder detection unstick the floor
1371
- if (!managedChanged && managed.turn_current && managed.turn_current !== registeredName && managed.manager) {
1372
- if (agents[managed.turn_current] && !isPidAlive(agents[managed.turn_current].pid, agents[managed.turn_current].last_activity)) {
1373
- const deadAgent = managed.turn_current;
1374
- managed.turn_current = null;
1375
- managed.floor = 'closed';
1376
- managed.turn_queue = [];
1377
- saveManagedConfig(managed);
1378
- if (managed.manager !== registeredName) {
1379
- sendSystemMessage(managed.manager, `[FLOOR] ${deadAgent} disconnected while holding the floor. Floor returned to you.`);
1485
+ // Clean stale listening_since flags (listen times out at 5min, clear after 6min)
1486
+ for (const [aName, aInfo] of Object.entries(agents)) {
1487
+ if (aInfo.listening_since) {
1488
+ const listenAge = Date.now() - new Date(aInfo.listening_since).getTime();
1489
+ if (listenAge > 360000) {
1490
+ aInfo.listening_since = null;
1380
1491
  }
1381
1492
  }
1382
1493
  }
1383
- }
1384
- // Snapshot dead agents BEFORE cleanup (for auto-recovery)
1385
- snapshotDeadAgents(agents);
1386
- // Clean up file locks held by dead agents
1387
- cleanStaleLocks();
1388
- cleanStaleChannelMembers();
1389
- // Auto-escalation: notify team about long-blocked tasks
1390
- escalateBlockedTasks();
1391
- // Stand-up meetings: periodic team check-ins
1392
- triggerStandupIfDue();
1393
- // Watchdog: nudge idle agents, reassign stuck work (autonomous mode only)
1394
- watchdogCheck();
1395
- } catch {}
1396
- }, 10000 + heartbeatJitter);
1397
- heartbeatInterval.unref(); // Don't prevent process exit
1494
+ // Agent status change notifications — detect agents going offline/online
1495
+ detectAgentStatusChanges(agents);
1496
+ // Snapshot dead agents BEFORE cleanup (for auto-recovery)
1497
+ snapshotDeadAgents(agents);
1498
+ // Clean up file locks held by dead agents
1499
+ cleanStaleLocks();
1500
+ cleanStaleChannelMembers();
1501
+ // Auto-escalation: notify team about long-blocked tasks
1502
+ escalateBlockedTasks();
1503
+ // Stand-up meetings: periodic team check-ins
1504
+ triggerStandupIfDue();
1505
+ // Auto-reassign stuck workflow steps from dead agents
1506
+ checkStuckWorkflowSteps();
1507
+ // Stale task detection: warn about tasks in_progress for >30 minutes without update
1508
+ checkStaleTasks();
1509
+ // Watchdog: nudge idle agents, reassign stuck work (autonomous mode only)
1510
+ watchdogCheck();
1511
+ } catch (e) { log.warn("heartbeat loop error:", e.message); }
1512
+ }, 10000 + heartbeatJitter);
1513
+ heartbeatInterval.unref(); // Don't prevent process exit
1398
1514
 
1399
1515
  // Fire join event + recovery data for returning agents
1400
1516
  const config = getConfig();
@@ -1456,7 +1572,7 @@ function toolRegister(name, provider = null) {
1456
1572
  // Clean up snapshot after loading
1457
1573
  try { fs.unlinkSync(recoveryFile); } catch {}
1458
1574
  }
1459
- } catch {}
1575
+ } catch (e) { log.debug("recovery file parse failed:", e.message); }
1460
1576
  }
1461
1577
 
1462
1578
  // Notify other agents
@@ -1470,7 +1586,7 @@ function toolRegister(name, provider = null) {
1470
1586
  if (roleAssignments && roleAssignments[name]) {
1471
1587
  result.your_role = roleAssignments[name];
1472
1588
  }
1473
- } catch {}
1589
+ } catch (e) { log.debug("role assignment failed:", e.message); }
1474
1590
  }
1475
1591
 
1476
1592
  return result;
@@ -1502,7 +1618,7 @@ function setListening(isListening) {
1502
1618
  saveAgents(agents);
1503
1619
  }
1504
1620
  } finally { unlockAgentsFile(); }
1505
- } catch {}
1621
+ } catch (e) { log.debug("register workspace status failed:", e.message); }
1506
1622
  }
1507
1623
 
1508
1624
  function toolListAgents() {
@@ -1519,7 +1635,7 @@ function toolListAgents() {
1519
1635
  registered_at: info.timestamp,
1520
1636
  last_activity: lastActivity,
1521
1637
  idle_seconds: alive ? idleSeconds : null,
1522
- status: !alive ? 'dead' : idleSeconds > 60 ? 'sleeping' : 'active',
1638
+ status: !alive ? 'offline' : (info.listening_since && alive) ? 'listening' : idleSeconds > 30 ? 'idle' : 'working',
1523
1639
  listening_since: info.listening_since || null,
1524
1640
  is_listening: !!(info.listening_since && alive),
1525
1641
  last_listened_at: info.last_listened_at || null,
@@ -1534,12 +1650,12 @@ function toolListAgents() {
1534
1650
  try {
1535
1651
  const ws = getWorkspace(name);
1536
1652
  if (ws._status) result[name].current_status = ws._status;
1537
- } catch {}
1653
+ } catch (e) { log.debug("workspace status read failed:", e.message); }
1538
1654
  }
1539
1655
  return { agents: result };
1540
1656
  }
1541
1657
 
1542
- async function toolSendMessage(content, to = null, reply_to = null, channel = null) {
1658
+ async function toolSendMessage(content, to = null, reply_to = null, channel = null, priority = null) {
1543
1659
  if (!registeredName) {
1544
1660
  return { error: 'You must call register() first' };
1545
1661
  }
@@ -1691,7 +1807,8 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
1691
1807
  }
1692
1808
  }
1693
1809
 
1694
- if (!agents[to]) {
1810
+ // Allow sending to __user__ (human via dashboard) even though they're not a registered agent
1811
+ if (to !== '__user__' && !agents[to]) {
1695
1812
  return { error: `Agent "${to}" is not registered` };
1696
1813
  }
1697
1814
 
@@ -1699,8 +1816,8 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
1699
1816
  return { error: 'Cannot send a message to yourself' };
1700
1817
  }
1701
1818
 
1702
- // Permission check
1703
- if (!canSendTo(registeredName, to)) {
1819
+ // Permission check (skip for __user__ — human always has read access)
1820
+ if (to !== '__user__' && !canSendTo(registeredName, to)) {
1704
1821
  return { error: `Permission denied: you are not allowed to send messages to "${to}"` };
1705
1822
  }
1706
1823
 
@@ -1741,6 +1858,7 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
1741
1858
  to: isGroup ? '__group__' : to,
1742
1859
  content,
1743
1860
  timestamp: new Date().toISOString(),
1861
+ ...(priority && ['critical', 'normal', 'low'].includes(priority) && { priority }),
1744
1862
  ...(isGroup && to && { addressed_to: [to] }),
1745
1863
  ...(channel && { channel }),
1746
1864
  ...(reply_to && { reply_to }),
@@ -1829,7 +1947,7 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
1829
1947
  result._decision_hint = `Related decision exists: "${overlap.decision}" (topic: ${overlap.topic || 'general'}). Check get_decisions() before re-debating.`;
1830
1948
  }
1831
1949
  }
1832
- } catch {}
1950
+ } catch (e) { log.debug("listen channel watcher setup failed:", e.message); }
1833
1951
  }
1834
1952
  if (_cooldownApplied > 0) result.cooldown_applied_ms = _cooldownApplied;
1835
1953
  if (channel) result.channel = channel;
@@ -1862,6 +1980,25 @@ async function toolSendMessage(content, to = null, reply_to = null, channel = nu
1862
1980
  result.you_have_messages = myPending.length;
1863
1981
  result.urgent = `You have ${myPending.length} unread message(s) waiting. Call listen_group() after this to read them.`;
1864
1982
  }
1983
+
1984
+ // Coordinator enforcement: warn if sending work assignment without creating a task first
1985
+ const senderProfile = getProfiles()[registeredName];
1986
+ const senderRole = senderProfile && senderProfile.role ? senderProfile.role.toLowerCase() : '';
1987
+ const isSenderLead = senderRole === 'lead' || senderRole === 'manager' || senderRole === 'coordinator';
1988
+ if (isSenderLead && to && to !== '__user__' && to !== '__all__' && to !== '__group__') {
1989
+ const assignmentKeywords = /\b(implement|fix|build|add|create|update|redesign|refactor|write|deploy|test|review|research|investigate)\b/i;
1990
+ if (assignmentKeywords.test(content)) {
1991
+ const recentTasks = getTasks().filter(t => {
1992
+ if (t.assignee !== to) return false;
1993
+ const age = Date.now() - new Date(t.created_at).getTime();
1994
+ return age < 60000; // created in last 60 seconds
1995
+ });
1996
+ if (recentTasks.length === 0) {
1997
+ result.task_warning = `No task created for this assignment to ${to}. Use create_task(title, description, "${to}") to formally track this work.`;
1998
+ }
1999
+ }
2000
+ }
2001
+
1865
2002
  return result;
1866
2003
  }
1867
2004
 
@@ -2043,8 +2180,13 @@ function toolCheckMessages(from = null) {
2043
2180
  if (m.addressed_to && m.addressed_to.includes(registeredName)) addressedCount++;
2044
2181
  }
2045
2182
 
2183
+ // Include pending notification count
2184
+ const allNotifs = getNotifications();
2185
+ const unreadNotifs = allNotifs.filter(n => !n.read_by.includes(registeredName));
2186
+
2046
2187
  const result = {
2047
2188
  count: unconsumed.length,
2189
+ pending_notifications: unreadNotifs.length,
2048
2190
  // Scale fix: return previews not full content — agent gets full content via listen_group()
2049
2191
  messages: unconsumed.map(m => ({
2050
2192
  id: m.id,
@@ -2068,6 +2210,60 @@ function toolCheckMessages(from = null) {
2068
2210
  return result;
2069
2211
  }
2070
2212
 
2213
+ function toolConsumeMessages(from = null, limit = null) {
2214
+ if (!registeredName) {
2215
+ return { error: 'You must call register() first' };
2216
+ }
2217
+
2218
+ let unconsumed = getUnconsumedMessages(registeredName, from);
2219
+ if (limit && limit > 0 && unconsumed.length > limit) {
2220
+ unconsumed = unconsumed.slice(0, limit);
2221
+ }
2222
+
2223
+ if (unconsumed.length === 0) {
2224
+ return { success: true, count: 0, messages: [] };
2225
+ }
2226
+
2227
+ // Mark all as consumed
2228
+ const consumed = getConsumedIds(registeredName);
2229
+ for (const msg of unconsumed) {
2230
+ consumed.add(msg.id);
2231
+ markAsRead(registeredName, msg.id);
2232
+ }
2233
+ saveConsumedIds(registeredName, consumed);
2234
+
2235
+ // Update read offset
2236
+ const msgFile = getMessagesFile(currentBranch);
2237
+ if (fs.existsSync(msgFile)) {
2238
+ lastReadOffset = fs.statSync(msgFile).size;
2239
+ }
2240
+
2241
+ touchActivity();
2242
+
2243
+ // Count remaining unconsumed after this batch
2244
+ const remaining = getUnconsumedMessages(registeredName, null);
2245
+
2246
+ const agents = getAgents();
2247
+ const agentsOnline = Object.entries(agents).filter(([, info]) => isPidAlive(info.pid, info.last_activity)).length;
2248
+
2249
+ return {
2250
+ success: true,
2251
+ count: unconsumed.length,
2252
+ messages: unconsumed.map(m => ({
2253
+ id: m.id,
2254
+ from: m.from,
2255
+ content: m.content,
2256
+ timestamp: m.timestamp,
2257
+ ...(m.reply_to && { reply_to: m.reply_to }),
2258
+ ...(m.thread_id && { thread_id: m.thread_id }),
2259
+ ...(m.addressed_to && { addressed_to: m.addressed_to }),
2260
+ })),
2261
+ remaining: remaining.length,
2262
+ agents_online: agentsOnline,
2263
+ coordinator_mode: getConfig().coordinator_mode || 'responsive',
2264
+ };
2265
+ }
2266
+
2071
2267
  function toolAckMessage(messageId) {
2072
2268
  if (!registeredName) {
2073
2269
  return { error: 'You must call register() first' };
@@ -2599,7 +2795,7 @@ async function toolListenGroup() {
2599
2795
  });
2600
2796
  chWatcher.on('error', () => {});
2601
2797
  channelWatchers.push(chWatcher);
2602
- } catch {}
2798
+ } catch (e) { log.debug("channel watcher setup failed:", e.message); }
2603
2799
  }
2604
2800
  }
2605
2801
  } catch {
@@ -2638,6 +2834,72 @@ async function toolListenGroup() {
2638
2834
  });
2639
2835
  }
2640
2836
 
2837
+ // Auto speaker selection for group messages — determines who should respond
2838
+ // Priority: 1) @mentioned agents, 2) skill match, 3) round-robin fallback
2839
+ let _lastSpeakerIndex = 0;
2840
+ function selectSpeaker(msg, agentName, aliveAgentNames) {
2841
+ // 1. If explicitly addressed, those agents respond
2842
+ if (msg.addressed_to && msg.addressed_to.length > 0) {
2843
+ return msg.addressed_to.includes(agentName);
2844
+ }
2845
+
2846
+ // 2. Direct messages — always respond
2847
+ if (msg.to === agentName) return true;
2848
+
2849
+ // 3. System messages — everyone sees, nobody needs to respond
2850
+ if (msg.system || msg.from === '__system__') return false;
2851
+
2852
+ // 4. Skill-based matching — check if message content matches agent's skills
2853
+ const cards = readJsonFile(AGENT_CARDS_FILE) || {};
2854
+ const myCard = cards[agentName];
2855
+ if (myCard && myCard.skills && myCard.skills.length > 0 && msg.content) {
2856
+ const contentLower = msg.content.toLowerCase();
2857
+ const hasSkillMatch = myCard.skills.some(skill => contentLower.includes(skill));
2858
+ if (hasSkillMatch) {
2859
+ // Check if OTHER agents also match — if multiple match, pick the best
2860
+ const otherMatchers = aliveAgentNames.filter(n => {
2861
+ if (n === agentName || n === msg.from) return false;
2862
+ const card = cards[n];
2863
+ return card && card.skills && card.skills.some(skill => contentLower.includes(skill));
2864
+ });
2865
+ // If this agent matches and has fewest other matchers, respond
2866
+ if (otherMatchers.length === 0) return true;
2867
+ // Multiple skill matches — first alphabetically gets priority (deterministic)
2868
+ const allMatchers = [agentName, ...otherMatchers].sort();
2869
+ return allMatchers[0] === agentName;
2870
+ }
2871
+ }
2872
+
2873
+ // 5. Round-robin fallback for unaddressed group messages
2874
+ const eligible = aliveAgentNames.filter(n => n !== msg.from).sort();
2875
+ if (eligible.length === 0) return false;
2876
+ const selectedIndex = _lastSpeakerIndex % eligible.length;
2877
+ const selected = eligible[selectedIndex] === agentName;
2878
+ if (selected) _lastSpeakerIndex++;
2879
+ return selected;
2880
+ }
2881
+
2882
+ // Message priority classification: critical > normal > low
2883
+ // Critical: task assignments, human messages, workflow handoffs, system events
2884
+ // Normal: regular agent-to-agent chat
2885
+ // Low: status updates, acknowledgements
2886
+ function classifyPriority(msg) {
2887
+ if (msg.priority) return msg.priority; // explicit priority wins
2888
+ if (msg.from === '__user__') return 'critical';
2889
+ if (msg.system || msg.from === '__system__') {
2890
+ // System events about workflow/task are critical, others are normal
2891
+ if (msg.content && (msg.content.includes('[Workflow') || msg.content.includes('[TASK') || msg.content.includes('[APPROVAL'))) return 'critical';
2892
+ return 'normal';
2893
+ }
2894
+ if (msg.content) {
2895
+ const c = msg.content;
2896
+ if (c.includes('[Workflow') || c.includes('[HANDOFF]') || c.includes('[PLAN')) return 'critical';
2897
+ if (c.startsWith('[STATUS]') || c.startsWith('[ACK]') || c.startsWith('[PROGRESS]')) return 'low';
2898
+ }
2899
+ if (msg.type === 'handoff') return 'critical';
2900
+ return 'normal';
2901
+ }
2902
+
2641
2903
  // Build the response for listen_group — kept lean to reduce context accumulation
2642
2904
  // Context/history removed: agents should call get_history() when they need it
2643
2905
  function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
@@ -2648,12 +2910,16 @@ function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
2648
2910
  const wasAddressed = batch.some(m => m.addressed_to && m.addressed_to.includes(agentName));
2649
2911
  sendLimit = wasAddressed ? 2 : 1;
2650
2912
 
2651
- // Sort batch by priority: system > threaded replies > direct > broadcast
2913
+ // Sort batch by priority: critical(0) > normal(1) > low(2), then by type
2914
+ const PRIORITY_ORDER = { critical: 0, normal: 1, low: 2 };
2652
2915
  function messagePriority(m) {
2653
- if (m.system || m.from === '__system__') return 0;
2654
- if (m.reply_to || m.thread_id) return 1;
2655
- if (!m.broadcast) return 2;
2656
- return 3;
2916
+ const prio = PRIORITY_ORDER[classifyPriority(m)] || 1;
2917
+ // Sub-sort within same priority: system > threaded > direct > broadcast
2918
+ let subPrio = 3;
2919
+ if (m.system || m.from === '__system__') subPrio = 0;
2920
+ else if (m.reply_to || m.thread_id) subPrio = 1;
2921
+ else if (!m.broadcast) subPrio = 2;
2922
+ return prio * 10 + subPrio;
2657
2923
  }
2658
2924
  batch.sort((a, b) => {
2659
2925
  const pa = messagePriority(a), pb = messagePriority(b);
@@ -2697,6 +2963,7 @@ function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
2697
2963
  return {
2698
2964
  id: m.id, from: m.from, to: m.to, content: m.content,
2699
2965
  timestamp: m.timestamp,
2966
+ priority: classifyPriority(m),
2700
2967
  age_seconds: ageSec,
2701
2968
  ...(ageSec > 30 && { delayed: true }),
2702
2969
  ...(m.reply_to && { reply_to: m.reply_to }),
@@ -2704,7 +2971,7 @@ function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
2704
2971
  ...(m.addressed_to && { addressed_to: m.addressed_to }),
2705
2972
  ...(m.to === '__group__' && {
2706
2973
  addressed_to_you: !m.addressed_to || m.addressed_to.includes(agentName),
2707
- should_respond: !m.addressed_to || m.addressed_to.includes(agentName),
2974
+ should_respond: selectSpeaker(m, agentName, agentNames),
2708
2975
  }),
2709
2976
  };
2710
2977
  }),
@@ -2744,6 +3011,16 @@ function buildListenGroupResponse(batch, consumed, agentName, listenStart) {
2744
3011
  result.next_action = isAutonomousMode()
2745
3012
  ? 'Process these messages, then call get_work() to continue the proactive work loop. Do NOT call listen_group() — use get_work() instead.'
2746
3013
  : 'After processing these messages and sending your response, call listen_group() again immediately. Never stop listening.';
3014
+ result.coordinator_mode = getConfig().coordinator_mode || 'responsive';
3015
+
3016
+ // Task nudge: remind agent of their outstanding tasks
3017
+ try {
3018
+ const myTasks = getTasks().filter(t => t.assignee === agentName && (t.status === 'pending' || t.status === 'in_progress'));
3019
+ if (myTasks.length > 0) {
3020
+ result.task_reminder = { pending: myTasks.filter(t => t.status === 'pending').length, in_progress: myTasks.filter(t => t.status === 'in_progress').length, tasks: myTasks.map(t => ({ id: t.id, title: t.title, status: t.status })) };
3021
+ }
3022
+ } catch (e) { log.debug('task reminder in listen_group failed:', e.message); }
3023
+
2747
3024
  return result;
2748
3025
  }
2749
3026
 
@@ -3086,7 +3363,7 @@ function toolUpdateTask(taskId, status, notes = null) {
3086
3363
  } else if (status === 'blocked') {
3087
3364
  saveWorkspace(registeredName, Object.assign(getWorkspace(registeredName), { _status: `BLOCKED on: ${task.title}`, _status_since: new Date().toISOString() }));
3088
3365
  }
3089
- } catch {}
3366
+ } catch (e) { log.warn("verify_and_advance failed:", e.message); }
3090
3367
 
3091
3368
  // Task-channel auto-join: when claiming a task that has a channel, auto-join it
3092
3369
  if (status === 'in_progress' && task.channel) {
@@ -3100,6 +3377,7 @@ function toolUpdateTask(taskId, status, notes = null) {
3100
3377
  // Event hooks: task completion
3101
3378
  if (status === 'done') {
3102
3379
  fireEvent('task_complete', { title: task.title, created_by: task.created_by });
3380
+ appendNotification('task_done', registeredName, `Task "${task.title}" completed by ${registeredName}`, task.id);
3103
3381
  // Check if this resolves any dependencies
3104
3382
  const deps = getDeps();
3105
3383
  for (const dep of deps) {
@@ -3128,6 +3406,50 @@ function toolUpdateTask(taskId, status, notes = null) {
3128
3406
  if (aliveOthers.length > 0) {
3129
3407
  broadcastSystemMessage(`[REVIEW NEEDED] ${registeredName} completed task "${task.title}". Team: please review the work and call submit_review() if applicable.`, registeredName);
3130
3408
  }
3409
+
3410
+ // Auto-sync: advance matching workflow step when task is done
3411
+ try {
3412
+ const workflows = getWorkflows();
3413
+ let wfChanged = false;
3414
+ for (const wf of workflows) {
3415
+ if (wf.status !== 'active') continue;
3416
+ for (const step of wf.steps) {
3417
+ if (step.status !== 'in_progress') continue;
3418
+ if (step.assignee !== registeredName) continue;
3419
+ // Match by assignee — the agent who completed the task also has an in_progress step
3420
+ step.status = 'done';
3421
+ step.completed_at = new Date().toISOString();
3422
+ step.notes = `Auto-completed via task "${task.title}"`;
3423
+ saveWorkflowCheckpoint(wf, step);
3424
+ // Start next ready steps
3425
+ const nextSteps = findReadySteps(wf);
3426
+ for (const ns of nextSteps) {
3427
+ if (ns.requires_approval) {
3428
+ ns.status = 'awaiting_approval';
3429
+ ns.approval_requested_at = new Date().toISOString();
3430
+ sendSystemMessage('__user__', `[APPROVAL NEEDED] Workflow "${wf.name}" — Step ${ns.id}: "${ns.description}". Approve or reject from the dashboard.`);
3431
+ } else {
3432
+ ns.status = 'in_progress';
3433
+ ns.started_at = new Date().toISOString();
3434
+ if (ns.assignee && ns.assignee !== registeredName) {
3435
+ const handoffContent = `[Workflow "${wf.name}"] Step ${ns.id} assigned to you: ${ns.description}`;
3436
+ messageSeq++;
3437
+ const hMsg = { id: generateId(), seq: messageSeq, from: registeredName, to: ns.assignee, content: handoffContent, timestamp: new Date().toISOString(), type: 'handoff' };
3438
+ fs.appendFileSync(getMessagesFile(currentBranch), JSON.stringify(hMsg) + '\n');
3439
+ fs.appendFileSync(getHistoryFile(currentBranch), JSON.stringify(hMsg) + '\n');
3440
+ }
3441
+ }
3442
+ }
3443
+ if (wf.steps.every(s => s.status === 'done')) wf.status = 'completed';
3444
+ wf.updated_at = new Date().toISOString();
3445
+ wfChanged = true;
3446
+ broadcastSystemMessage(`[WORKFLOW] Step "${step.description}" auto-advanced via task completion by ${registeredName}`);
3447
+ break; // one step per task completion
3448
+ }
3449
+ if (wfChanged) break;
3450
+ }
3451
+ if (wfChanged) saveWorkflows(workflows);
3452
+ } catch (e) { log.warn('auto-advance workflow on task done failed:', e.message); }
3131
3453
  }
3132
3454
 
3133
3455
  return { success: true, task_id: task.id, status: task.status, title: task.title };
@@ -3203,7 +3525,7 @@ function toolSearchMessages(query, from = null, limit = 20) {
3203
3525
  allMessages = allMessages.concat(chMsgs);
3204
3526
  }
3205
3527
  }
3206
- } catch {}
3528
+ } catch (e) { log.warn("get_work search failed:", e.message); }
3207
3529
  // Sort by timestamp descending for newest-first results
3208
3530
  allMessages.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp));
3209
3531
 
@@ -3233,7 +3555,7 @@ function toolSearchMessages(query, from = null, limit = 20) {
3233
3555
  allMessages = allMessages.concat(readJsonl(chFile));
3234
3556
  }
3235
3557
  }
3236
- } catch {}
3558
+ } catch (e) { log.debug("get_work detail failed:", e.message); }
3237
3559
  allMessages.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp));
3238
3560
  for (let i = 0; i < allMessages.length && results.length < limit; i++) {
3239
3561
  const m = allMessages[i];
@@ -3411,7 +3733,8 @@ function toolCreateWorkflow(name, steps, autonomous = false, parallel = false) {
3411
3733
  description: step.description.substring(0, 200),
3412
3734
  assignee: step.assignee || null,
3413
3735
  depends_on: Array.isArray(step.depends_on) ? step.depends_on : [],
3414
- status: 'pending', // all start pending; we'll activate ready ones below
3736
+ requires_approval: !!step.requires_approval,
3737
+ status: 'pending',
3415
3738
  started_at: null,
3416
3739
  completed_at: null,
3417
3740
  notes: '',
@@ -3500,11 +3823,37 @@ function toolAdvanceWorkflow(workflowId, notes) {
3500
3823
  currentStep.completed_at = new Date().toISOString();
3501
3824
  if (notes) currentStep.notes = notes.substring(0, 500);
3502
3825
 
3826
+ // Save checkpoint
3827
+ saveWorkflowCheckpoint(wf, currentStep);
3828
+
3829
+ // Auto-sync: mark matching in_progress tasks as done
3830
+ try {
3831
+ const tasks = getTasks();
3832
+ const matchingTask = tasks.find(t =>
3833
+ t.status === 'in_progress' && t.assignee === registeredName
3834
+ );
3835
+ if (matchingTask) {
3836
+ matchingTask.status = 'done';
3837
+ matchingTask.updated_at = new Date().toISOString();
3838
+ matchingTask.notes.push({ by: '__system__', text: `Auto-completed via workflow step "${currentStep.description}"`, at: new Date().toISOString() });
3839
+ saveTasks(tasks);
3840
+ }
3841
+ } catch (e) { log.warn('auto-complete task on workflow advance failed:', e.message); }
3842
+
3503
3843
  // Find all ready steps (supports parallel via depends_on)
3504
3844
  const nextSteps = findReadySteps(wf);
3505
3845
  if (nextSteps.length > 0) {
3506
3846
  const agents = getAgents();
3507
3847
  for (const step of nextSteps) {
3848
+ // Check if step requires human approval before starting
3849
+ if (step.requires_approval) {
3850
+ step.status = 'awaiting_approval';
3851
+ step.approval_requested_at = new Date().toISOString();
3852
+ sendSystemMessage('__user__',
3853
+ `[APPROVAL NEEDED] Workflow "${wf.name}" — Step ${step.id}: "${step.description}". Approve or reject from the dashboard.`
3854
+ );
3855
+ continue;
3856
+ }
3508
3857
  step.status = 'in_progress';
3509
3858
  step.started_at = new Date().toISOString();
3510
3859
  if (step.assignee && agents[step.assignee] && step.assignee !== registeredName && canSendTo(registeredName, step.assignee)) {
@@ -3524,6 +3873,7 @@ function toolAdvanceWorkflow(workflowId, notes) {
3524
3873
 
3525
3874
  const doneCount = wf.steps.filter(s => s.status === 'done').length;
3526
3875
  const pct = Math.round((doneCount / wf.steps.length) * 100);
3876
+ appendNotification('workflow_advanced', registeredName, `Workflow "${wf.name}" step ${currentStep.id} done (${pct}%)`, wf.id);
3527
3877
 
3528
3878
  return {
3529
3879
  success: true,
@@ -3535,14 +3885,32 @@ function toolAdvanceWorkflow(workflowId, notes) {
3535
3885
  };
3536
3886
  }
3537
3887
 
3538
- function toolWorkflowStatus(workflowId) {
3888
+ function toolWorkflowStatus(workflowId, action, checkpointIndex) {
3539
3889
  const workflows = getWorkflows();
3890
+
3891
+ // Rollback action
3892
+ if (action === 'rollback' && workflowId && checkpointIndex !== undefined) {
3893
+ const wf = workflows.find(w => w.id === workflowId);
3894
+ if (!wf) return { error: `Workflow not found: ${workflowId}` };
3895
+ if (!wf.checkpoints || !wf.checkpoints[checkpointIndex]) return { error: 'Checkpoint not found' };
3896
+ const checkpoint = wf.checkpoints[checkpointIndex];
3897
+ for (const savedStep of checkpoint.step_states) {
3898
+ const step = wf.steps.find(s => s.id === savedStep.id);
3899
+ if (step) { step.status = savedStep.status; step.assignee = savedStep.assignee; }
3900
+ }
3901
+ wf.updated_at = new Date().toISOString();
3902
+ saveWorkflows(workflows);
3903
+ broadcastSystemMessage(`[WORKFLOW] Rolled back "${wf.name}" to checkpoint: step "${checkpoint.step_description}"`);
3904
+ return { success: true, rolled_back_to: checkpoint };
3905
+ }
3906
+
3540
3907
  if (workflowId) {
3541
3908
  const wf = workflows.find(w => w.id === workflowId);
3542
3909
  if (!wf) return { error: `Workflow not found: ${workflowId}` };
3543
3910
  const doneCount = wf.steps.filter(s => s.status === 'done').length;
3544
3911
  const pct = Math.round((doneCount / wf.steps.length) * 100);
3545
3912
  const result = { workflow: wf, progress: `${doneCount}/${wf.steps.length} (${pct}%)` };
3913
+ if (wf.checkpoints) result.checkpoints = wf.checkpoints.length;
3546
3914
  if (wf.status === 'completed') result.report = generateCompletionReport(wf);
3547
3915
  return result;
3548
3916
  }
@@ -3550,7 +3918,7 @@ function toolWorkflowStatus(workflowId) {
3550
3918
  count: workflows.length,
3551
3919
  workflows: workflows.map(w => {
3552
3920
  const doneCount = w.steps.filter(s => s.status === 'done').length;
3553
- return { id: w.id, name: w.name, status: w.status, steps: w.steps.length, done: doneCount, progress: Math.round((doneCount / w.steps.length) * 100) + '%' };
3921
+ return { id: w.id, name: w.name, status: w.status, steps: w.steps.length, done: doneCount, progress: Math.round((doneCount / w.steps.length) * 100) + '%', checkpoints: w.checkpoints ? w.checkpoints.length : 0 };
3554
3922
  }),
3555
3923
  };
3556
3924
  }
@@ -3880,7 +4248,8 @@ async function toolVerifyAndAdvance(params) {
3880
4248
  // AUTO-ADVANCE
3881
4249
  currentStep.status = 'done';
3882
4250
  currentStep.completed_at = new Date().toISOString();
3883
- clearCheckpoint(registeredName, workflow_id, currentStep.id); // Item 8: clear checkpoint on completion
4251
+ saveWorkflowCheckpoint(wf, currentStep);
4252
+ clearCheckpoint(registeredName, workflow_id, currentStep.id);
3884
4253
  return advanceToNextSteps(false);
3885
4254
  }
3886
4255
 
@@ -3888,6 +4257,7 @@ async function toolVerifyAndAdvance(params) {
3888
4257
  // ADVANCE BUT FLAG
3889
4258
  currentStep.status = 'done';
3890
4259
  currentStep.completed_at = new Date().toISOString();
4260
+ saveWorkflowCheckpoint(wf, currentStep);
3891
4261
  currentStep.flagged = true;
3892
4262
  currentStep.flag_reason = `Low confidence (${confidence}%). May need review later.`;
3893
4263
  clearCheckpoint(registeredName, workflow_id, currentStep.id); // Item 8: clear checkpoint
@@ -4057,6 +4427,67 @@ function reassignWorkFrom(deadAgentName) {
4057
4427
  return reassignCount;
4058
4428
  }
4059
4429
 
4430
+ // Auto-reassign workflow steps from dead agents after timeout
4431
+ function checkStuckWorkflowSteps() {
4432
+ if (!registeredName) return;
4433
+ const workflows = getWorkflows();
4434
+ const agents = getAgents();
4435
+ const timeoutMs = (parseInt(process.env.NEOHIVE_STEP_TIMEOUT_MINUTES) || 5) * 60000;
4436
+ let changed = false;
4437
+
4438
+ for (const wf of workflows) {
4439
+ if (wf.status !== 'active') continue;
4440
+ if (wf.paused) continue;
4441
+
4442
+ for (const step of wf.steps) {
4443
+ if (step.status !== 'in_progress') continue;
4444
+ if (!step.assignee) continue;
4445
+ if (!step.started_at) continue;
4446
+
4447
+ const elapsed = Date.now() - new Date(step.started_at).getTime();
4448
+ if (elapsed < timeoutMs) continue;
4449
+
4450
+ const agentInfo = agents[step.assignee];
4451
+ if (agentInfo && isPidAlive(agentInfo.pid, agentInfo.last_activity)) continue;
4452
+
4453
+ log.warn(`Workflow step ${step.id} reassigned: ${step.assignee} offline for ${Math.round(elapsed / 60000)}min`);
4454
+ const deadAgent = step.assignee;
4455
+ step.status = 'pending';
4456
+ step.assignee = null;
4457
+ step.reassigned_from = deadAgent;
4458
+ step.reassigned_at = new Date().toISOString();
4459
+ changed = true;
4460
+
4461
+ broadcastSystemMessage(
4462
+ `[WORKFLOW] Step "${step.description}" reassigned — ${deadAgent} went offline. Next available agent will pick it up via get_work().`
4463
+ );
4464
+ }
4465
+ }
4466
+
4467
+ if (changed) saveWorkflows(workflows);
4468
+ }
4469
+
4470
+ // Stale task detection: warn about tasks in_progress for >30 minutes without update
4471
+ const _staleTaskWarned = new Set();
4472
+ function checkStaleTasks() {
4473
+ try {
4474
+ const tasks = getTasks();
4475
+ const staleThresholdMs = 30 * 60 * 1000; // 30 minutes
4476
+ const now = Date.now();
4477
+ for (const task of tasks) {
4478
+ if (task.status !== 'in_progress') continue;
4479
+ if (!task.updated_at) continue;
4480
+ const elapsed = now - new Date(task.updated_at).getTime();
4481
+ if (elapsed < staleThresholdMs) continue;
4482
+ if (_staleTaskWarned.has(task.id)) continue;
4483
+ _staleTaskWarned.add(task.id);
4484
+ const mins = Math.round(elapsed / 60000);
4485
+ broadcastSystemMessage(`[WARNING] Stale task: "${task.title}" assigned to ${task.assignee || 'unassigned'} — in_progress for ${mins}min without update. Agent should call update_task("${task.id}", "done") or report a blocker.`);
4486
+ log.warn(`Stale task detected: ${task.id} "${task.title}" (${mins}min)`);
4487
+ }
4488
+ } catch (e) { log.debug('stale task check failed:', e.message); }
4489
+ }
4490
+
4060
4491
  function watchdogCheck() {
4061
4492
  // Run in autonomous mode always, AND in group mode when agents are idle 5+ min
4062
4493
  if (!isAutonomousMode() && !isGroupMode()) return;
@@ -4160,7 +4591,7 @@ function watchdogCheck() {
4160
4591
  sendSystemMessage(worker, `[REBALANCE] You've been moved from ${quietTeam.name} to ${busyTeam.name} — they have ${busyTeam.pendingTasks} pending tasks and need help.`);
4161
4592
  }
4162
4593
  }
4163
- } catch {}
4594
+ } catch (e) { log.warn("escalate blocked tasks failed:", e.message); }
4164
4595
 
4165
4596
  // UE5 safety: detect stale UE5 locks (ue5-editor, ue5-compile)
4166
4597
  try {
@@ -4185,7 +4616,7 @@ function watchdogCheck() {
4185
4616
  }
4186
4617
  }
4187
4618
  if (locksChanged) writeJsonFile(LOCKS_FILE, locks);
4188
- } catch {}
4619
+ } catch (e) { log.warn("stale lock cleanup failed:", e.message); }
4189
4620
 
4190
4621
  if (agentsChanged) saveAgents(agents);
4191
4622
  if (workflowsChanged) saveWorkflows(workflows);
@@ -4407,7 +4838,7 @@ function generateCompletionReport(workflow) {
4407
4838
  totalRetries += relevant.length;
4408
4839
  for (const r of relevant) retryDetails.push({ agent: name, task: r.task, attempt: r.attempt });
4409
4840
  }
4410
- } catch {}
4841
+ } catch (e) { log.debug("auto-plan retry scan failed:", e.message); }
4411
4842
  }
4412
4843
 
4413
4844
  const report = {
@@ -4585,7 +5016,7 @@ function autoAssignRoles() {
4585
5016
  }
4586
5017
  }
4587
5018
  saveChannelsData(channels);
4588
- } catch {}
5019
+ } catch (e) { log.warn("stale channel cleanup failed:", e.message); }
4589
5020
  }
4590
5021
 
4591
5022
  return assignments;
@@ -4941,7 +5372,7 @@ function toolForkConversation(fromMessageId, branchName) {
4941
5372
  saveAgents(agents);
4942
5373
  }
4943
5374
  } finally { unlockAgentsFile(); }
4944
- } catch {}
5375
+ } catch (e) { log.warn("auto role rebalance failed:", e.message); }
4945
5376
 
4946
5377
  return { success: true, branch: branchName, forked_from: branches[branchName].forked_from, messages_copied: forkedHistory.length };
4947
5378
  }
@@ -4965,7 +5396,7 @@ function toolSwitchBranch(branchName) {
4965
5396
  saveAgents(agents);
4966
5397
  }
4967
5398
  } finally { unlockAgentsFile(); }
4968
- } catch {}
5399
+ } catch (e) { log.warn("quality lead failover failed:", e.message); }
4969
5400
 
4970
5401
  return { success: true, branch: branchName, message: `Switched to branch "${branchName}". Read offset reset.` };
4971
5402
  }
@@ -5029,6 +5460,71 @@ function getReviews() { return cachedRead('reviews', () => readJsonFile(REVIEWS_
5029
5460
  function getDeps() { return cachedRead('deps', () => readJsonFile(DEPS_FILE) || [], 2000); }
5030
5461
  function getRules() { return cachedRead('rules', () => readJsonFile(RULES_FILE) || [], 2000); }
5031
5462
 
5463
+ // --- Notification system ---
5464
+ const MAX_NOTIFICATIONS = 500;
5465
+
5466
+ function getNotifications() {
5467
+ return readJsonFile(NOTIFICATIONS_FILE) || [];
5468
+ }
5469
+
5470
+ function saveNotifications(notifs) {
5471
+ // Prune to max cap
5472
+ if (notifs.length > MAX_NOTIFICATIONS) {
5473
+ notifs = notifs.slice(notifs.length - MAX_NOTIFICATIONS);
5474
+ }
5475
+ writeJsonFile(NOTIFICATIONS_FILE, notifs);
5476
+ }
5477
+
5478
+ function appendNotification(type, sourceAgent, summary, relatedId) {
5479
+ const notifs = getNotifications();
5480
+ notifs.push({
5481
+ id: 'notif_' + Date.now().toString(36) + Math.random().toString(36).slice(2, 6),
5482
+ type: type,
5483
+ source_agent: sourceAgent || registeredName || '__system__',
5484
+ related_id: relatedId || null,
5485
+ summary: summary,
5486
+ timestamp: new Date().toISOString(),
5487
+ read_by: [],
5488
+ });
5489
+ saveNotifications(notifs);
5490
+ }
5491
+
5492
+ function toolGetNotifications(since, type) {
5493
+ if (!registeredName) return { error: 'You must call register() first' };
5494
+ let notifs = getNotifications();
5495
+ // Filter unread for this agent
5496
+ notifs = notifs.filter(n => !n.read_by.includes(registeredName));
5497
+ if (since) {
5498
+ const sinceTs = new Date(since).getTime();
5499
+ notifs = notifs.filter(n => new Date(n.timestamp).getTime() > sinceTs);
5500
+ }
5501
+ if (type) {
5502
+ notifs = notifs.filter(n => n.type === type);
5503
+ }
5504
+ // Mark as read
5505
+ if (notifs.length > 0) {
5506
+ const allNotifs = getNotifications();
5507
+ const readIds = new Set(notifs.map(n => n.id));
5508
+ for (const n of allNotifs) {
5509
+ if (readIds.has(n.id) && !n.read_by.includes(registeredName)) {
5510
+ n.read_by.push(registeredName);
5511
+ }
5512
+ }
5513
+ saveNotifications(allNotifs);
5514
+ }
5515
+ return {
5516
+ count: notifs.length,
5517
+ notifications: notifs.map(n => ({
5518
+ id: n.id,
5519
+ type: n.type,
5520
+ source_agent: n.source_agent,
5521
+ related_id: n.related_id,
5522
+ summary: n.summary,
5523
+ timestamp: n.timestamp,
5524
+ })),
5525
+ };
5526
+ }
5527
+
5032
5528
  // --- Channel helpers ---
5033
5529
  const CHANNELS_FILE_PATH = path.join(DATA_DIR, 'channels.json');
5034
5530
 
@@ -5166,7 +5662,7 @@ function escalateBlockedTasks() {
5166
5662
  }
5167
5663
  }
5168
5664
  if (changed) saveTasks(tasks);
5169
- } catch {}
5665
+ } catch (e) { log.warn("watchdog check failed:", e.message); }
5170
5666
  }
5171
5667
 
5172
5668
  // Stand-up meetings: periodic team check-ins triggered by heartbeat
@@ -5207,7 +5703,27 @@ function triggerStandupIfDue() {
5207
5703
  summary += ' Each agent: report what you did, what\'s blocked, what\'s next. Then call listen_group().';
5208
5704
 
5209
5705
  broadcastSystemMessage(summary, registeredName);
5210
- } catch {}
5706
+ } catch (e) { log.warn("standup trigger failed:", e.message); }
5707
+ }
5708
+
5709
+ // --- Agent status change detection (heartbeat-driven) ---
5710
+ const _prevAgentAlive = {};
5711
+ function detectAgentStatusChanges(agents) {
5712
+ for (const [name, info] of Object.entries(agents)) {
5713
+ if (name === registeredName) continue;
5714
+ const alive = isPidAlive(info.pid, info.last_activity);
5715
+ const wasAlive = _prevAgentAlive[name];
5716
+ if (wasAlive !== undefined && wasAlive !== alive) {
5717
+ if (!alive) {
5718
+ broadcastSystemMessage(`[STATUS] ${name} is unreachable`, name);
5719
+ appendNotification('agent_offline', name, `${name} went offline`, null);
5720
+ } else {
5721
+ broadcastSystemMessage(`[STATUS] ${name} is back online`, null);
5722
+ appendNotification('agent_online', name, `${name} came back online`, null);
5723
+ }
5724
+ }
5725
+ _prevAgentAlive[name] = alive;
5726
+ }
5211
5727
  }
5212
5728
 
5213
5729
  // Auto-recovery: snapshot dead agent state before cleanup
@@ -5249,7 +5765,7 @@ function snapshotDeadAgents(agents) {
5249
5765
  kb_entries_written: kbKeysWritten,
5250
5766
  });
5251
5767
  }
5252
- } catch {}
5768
+ } catch (e) { log.warn("dead agent snapshot failed:", e.message); }
5253
5769
 
5254
5770
  // Quality Lead instant failover: if dead agent was Quality Lead, promote replacement immediately
5255
5771
  try {
@@ -5301,7 +5817,7 @@ function snapshotDeadAgents(agents) {
5301
5817
  broadcastSystemMessage(`[MONITOR FAILOVER] ${name} (Monitor) went offline. ${newMonitor} has been auto-promoted.`, newMonitor);
5302
5818
  }
5303
5819
  }
5304
- } catch {}
5820
+ } catch (e) { log.warn("monitor failover failed:", e.message); }
5305
5821
  }
5306
5822
  }
5307
5823
 
@@ -6118,6 +6634,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
6118
6634
  type: 'string',
6119
6635
  description: 'AI provider/CLI name (e.g. "Claude", "OpenAI", "Gemini"). Shown in dashboard.',
6120
6636
  },
6637
+ skills: {
6638
+ type: 'array',
6639
+ items: { type: 'string' },
6640
+ description: 'Skills like "python", "testing", "frontend", "design". Used for smart task routing.',
6641
+ },
6121
6642
  },
6122
6643
  required: ['name'],
6123
6644
  },
@@ -6152,6 +6673,11 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
6152
6673
  type: 'string',
6153
6674
  description: 'Channel to send to (optional — omit for #general). Use join_channel() first to create channels.',
6154
6675
  },
6676
+ priority: {
6677
+ type: 'string',
6678
+ enum: ['critical', 'normal', 'low'],
6679
+ description: 'Message priority (optional — auto-classified if omitted). Critical messages are delivered first and retained longer.',
6680
+ },
6155
6681
  },
6156
6682
  required: ['content'],
6157
6683
  },
@@ -6226,6 +6752,40 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
6226
6752
  },
6227
6753
  },
6228
6754
  },
6755
+ {
6756
+ name: 'consume_messages',
6757
+ description: 'Non-blocking check that returns ALL unconsumed messages with full content AND marks them as consumed. Unlike check_messages (peek-only) or listen (blocking), this is a one-shot "grab everything and mark it read" call. Ideal for agents that need to process a batch of messages without blocking.',
6758
+ inputSchema: {
6759
+ type: 'object',
6760
+ properties: {
6761
+ from: {
6762
+ type: 'string',
6763
+ description: 'Only consume messages from this specific agent (optional)',
6764
+ },
6765
+ limit: {
6766
+ type: 'number',
6767
+ description: 'Max number of messages to consume (default: all)',
6768
+ },
6769
+ },
6770
+ },
6771
+ },
6772
+ {
6773
+ name: 'get_notifications',
6774
+ description: 'Get unread notifications (task completions, workflow advances, agent status changes). Returns and marks as read. Non-blocking — use this instead of listen() when you need a quick status update without waiting.',
6775
+ inputSchema: {
6776
+ type: 'object',
6777
+ properties: {
6778
+ since: {
6779
+ type: 'string',
6780
+ description: 'Only return notifications after this ISO timestamp (optional)',
6781
+ },
6782
+ type: {
6783
+ type: 'string',
6784
+ description: 'Filter by type: task_done, workflow_advanced, agent_online, agent_offline, approval_needed (optional)',
6785
+ },
6786
+ },
6787
+ },
6788
+ },
6229
6789
  {
6230
6790
  name: 'ack_message',
6231
6791
  description: 'Acknowledge that you have processed a message. Lets the sender verify delivery via get_history.',
@@ -6454,11 +7014,13 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
6454
7014
  },
6455
7015
  {
6456
7016
  name: 'workflow_status',
6457
- description: 'Get status of a specific workflow or all workflows. Shows step progress and completion percentage.',
7017
+ description: 'Get status of a specific workflow or all workflows. Shows step progress, checkpoints, and completion percentage. Use action="rollback" to rollback to a checkpoint.',
6458
7018
  inputSchema: {
6459
7019
  type: 'object',
6460
7020
  properties: {
6461
7021
  workflow_id: { type: 'string', description: 'Workflow ID (optional — omit for all workflows)' },
7022
+ action: { type: 'string', enum: ['status', 'rollback'], description: 'Action (default: status)' },
7023
+ checkpoint_index: { type: 'number', description: 'Checkpoint index to rollback to (for rollback action)' },
6462
7024
  },
6463
7025
  },
6464
7026
  },
@@ -6804,13 +7366,13 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
6804
7366
 
6805
7367
  switch (name) {
6806
7368
  case 'register':
6807
- result = toolRegister(args.name, args?.provider);
7369
+ result = toolRegister(args.name, args?.provider, args?.skills);
6808
7370
  break;
6809
7371
  case 'list_agents':
6810
7372
  result = toolListAgents();
6811
7373
  break;
6812
7374
  case 'send_message':
6813
- result = await toolSendMessage(args.content, args?.to, args?.reply_to, args?.channel);
7375
+ result = await toolSendMessage(args.content, args?.to, args?.reply_to, args?.channel, args?.priority);
6814
7376
  break;
6815
7377
  case 'wait_for_reply':
6816
7378
  result = await toolWaitForReply(args?.timeout_seconds, args?.from);
@@ -6827,6 +7389,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
6827
7389
  case 'check_messages':
6828
7390
  result = toolCheckMessages(args?.from);
6829
7391
  break;
7392
+ case 'consume_messages':
7393
+ result = toolConsumeMessages(args?.from, args?.limit);
7394
+ break;
7395
+ case 'get_notifications':
7396
+ result = toolGetNotifications(args?.since, args?.type);
7397
+ break;
6830
7398
  case 'ack_message':
6831
7399
  result = toolAckMessage(args.message_id);
6832
7400
  break;
@@ -6876,7 +7444,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
6876
7444
  result = toolAdvanceWorkflow(args.workflow_id, args?.notes);
6877
7445
  break;
6878
7446
  case 'workflow_status':
6879
- result = toolWorkflowStatus(args?.workflow_id);
7447
+ result = toolWorkflowStatus(args?.workflow_id, args?.action, args?.checkpoint_index);
6880
7448
  break;
6881
7449
  case 'fork_conversation':
6882
7450
  result = toolForkConversation(args?.from_message_id, args.branch_name);
@@ -7028,7 +7596,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
7028
7596
 
7029
7597
  // Global hook: on non-listen tools, check for pending messages and nudge with escalating urgency
7030
7598
  // Enhanced nudge: includes sender names, addressed count, and message preview
7031
- const listenTools = ['listen', 'listen_group', 'listen_codex', 'wait_for_reply', 'check_messages'];
7599
+ const listenTools = ['listen', 'listen_group', 'listen_codex', 'wait_for_reply', 'check_messages', 'consume_messages'];
7032
7600
  if (registeredName && !listenTools.includes(name) && (isGroupMode() || isManagedMode())) {
7033
7601
  try {
7034
7602
  const pending = getUnconsumedMessages(registeredName);
@@ -7064,7 +7632,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
7064
7632
  result._nudge = `${pending.length} messages waiting${addressedHint}: ${senderSummary}. Latest: "${preview}...". Call listen_group().`;
7065
7633
  }
7066
7634
  }
7067
- } catch {}
7635
+ } catch (e) { log.debug("nudge detection failed:", e.message); }
7068
7636
  }
7069
7637
 
7070
7638
  // Global hook: reputation tracking
@@ -7094,6 +7662,35 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
7094
7662
  try { autoCompress(); } catch (e) { log.debug('auto-compress failed:', e.message); }
7095
7663
  }
7096
7664
 
7665
+ // Coordinator mode hint: inject into every tool response for lead/manager/coordinator agents
7666
+ if (registeredName && typeof result === 'object' && result !== null) {
7667
+ try {
7668
+ const prof = getProfiles()[registeredName];
7669
+ const role = prof && prof.role ? prof.role.toLowerCase() : '';
7670
+ if (role === 'lead' || role === 'manager' || role === 'coordinator') {
7671
+ const coordMode = getConfig().coordinator_mode || 'responsive';
7672
+ result.coordinator_mode = coordMode;
7673
+ result.coordinator_hint = coordMode === 'responsive'
7674
+ ? 'MODE: Stay with me — do NOT call listen(). Use consume_messages/workflow_status between human interactions.'
7675
+ : 'MODE: Run autonomously — use listen() to wait for agent results.';
7676
+ }
7677
+ } catch (e) { log.debug('coordinator mode hint failed:', e.message); }
7678
+ }
7679
+
7680
+ // Unread message hint: check if agent has pending messages on every tool call
7681
+ // This ensures agents see messages even when they forget to call listen()
7682
+ if (registeredName && typeof result === 'object' && result !== null && !listenTools.includes(name)) {
7683
+ try {
7684
+ const unread = getUnconsumedMessages(registeredName);
7685
+ if (unread.length > 0) {
7686
+ const latest = unread[unread.length - 1];
7687
+ result.unread_messages = unread.length;
7688
+ result.unread_preview = `${latest.from}: "${latest.content.substring(0, 100).replace(/\n/g, ' ')}"`;
7689
+ result.unread_action = `You have ${unread.length} unread message(s). Call listen() to receive them.`;
7690
+ }
7691
+ } catch (e) { log.debug('unread message hint failed:', e.message); }
7692
+ }
7693
+
7097
7694
  return {
7098
7695
  content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
7099
7696
  };
@@ -7161,14 +7758,141 @@ async function main() {
7161
7758
  console.error('Fix: Run "npx neohive doctor" to diagnose the issue.');
7162
7759
  process.exit(1);
7163
7760
  }
7164
- try {
7165
- const transport = new StdioServerTransport();
7166
- await server.connect(transport);
7167
- console.error('Neohive MCP server v6.0.0 running (66 tools)');
7168
- } catch (e) {
7169
- console.error('ERROR: MCP server failed to start: ' + e.message);
7170
- console.error('Fix: Run "npx neohive doctor" to check your setup.');
7171
- process.exit(1);
7761
+
7762
+ // HTTP persistent server mode: --http flag or NEOHIVE_TRANSPORT=http
7763
+ const useHttp = process.argv.includes('--http') || process.env.NEOHIVE_TRANSPORT === 'http';
7764
+
7765
+ if (useHttp) {
7766
+ try {
7767
+ const http = require('http');
7768
+ const { randomUUID } = require('crypto');
7769
+ const { StreamableHTTPServerTransport } = require('@modelcontextprotocol/sdk/server/streamableHttp.js');
7770
+ const { isInitializeRequest } = require('@modelcontextprotocol/sdk/types.js');
7771
+
7772
+ const PORT = parseInt(process.env.NEOHIVE_SERVER_PORT || '4321', 10);
7773
+ const sessions = {};
7774
+
7775
+ const httpServer = http.createServer(async (req, res) => {
7776
+ // CORS headers for local dev
7777
+ res.setHeader('Access-Control-Allow-Origin', '*');
7778
+ res.setHeader('Access-Control-Allow-Methods', 'GET, POST, DELETE, OPTIONS');
7779
+ res.setHeader('Access-Control-Allow-Headers', 'Content-Type, mcp-session-id');
7780
+ res.setHeader('Access-Control-Expose-Headers', 'mcp-session-id');
7781
+
7782
+ if (req.method === 'OPTIONS') {
7783
+ res.writeHead(204);
7784
+ res.end();
7785
+ return;
7786
+ }
7787
+
7788
+ // Health check endpoint
7789
+ if (req.url === '/health') {
7790
+ res.writeHead(200, { 'Content-Type': 'application/json' });
7791
+ res.end(JSON.stringify({ status: 'ok', sessions: Object.keys(sessions).length }));
7792
+ return;
7793
+ }
7794
+
7795
+ if (req.url === '/mcp') {
7796
+ if (req.method === 'POST') {
7797
+ // Parse JSON body
7798
+ let body = '';
7799
+ for await (const chunk of req) body += chunk;
7800
+ let parsed;
7801
+ try { parsed = JSON.parse(body); } catch {
7802
+ res.writeHead(400, { 'Content-Type': 'application/json' });
7803
+ res.end(JSON.stringify({ jsonrpc: '2.0', error: { code: -32700, message: 'Parse error' }, id: null }));
7804
+ return;
7805
+ }
7806
+
7807
+ const sessionId = req.headers['mcp-session-id'];
7808
+
7809
+ if (sessionId && sessions[sessionId]) {
7810
+ // Existing session — route to its transport
7811
+ await sessions[sessionId].transport.handleRequest(req, res, parsed);
7812
+ } else if (!sessionId && isInitializeRequest(parsed)) {
7813
+ // New session initialization
7814
+ const transport = new StreamableHTTPServerTransport({
7815
+ sessionIdGenerator: () => randomUUID(),
7816
+ onsessioninitialized: (sid) => {
7817
+ sessions[sid] = { transport, createdAt: Date.now() };
7818
+ console.error(`[HTTP] Session created: ${sid}`);
7819
+ },
7820
+ });
7821
+
7822
+ transport.onclose = () => {
7823
+ const sid = transport.sessionId;
7824
+ if (sid && sessions[sid]) {
7825
+ delete sessions[sid];
7826
+ console.error(`[HTTP] Session closed: ${sid}`);
7827
+ }
7828
+ };
7829
+
7830
+ await server.connect(transport);
7831
+ await transport.handleRequest(req, res, parsed);
7832
+ } else {
7833
+ res.writeHead(400, { 'Content-Type': 'application/json' });
7834
+ res.end(JSON.stringify({ jsonrpc: '2.0', error: { code: -32000, message: 'Bad Request: No valid session ID' }, id: null }));
7835
+ }
7836
+ } else if (req.method === 'GET') {
7837
+ // SSE stream for server-initiated notifications
7838
+ const sessionId = req.headers['mcp-session-id'];
7839
+ if (sessionId && sessions[sessionId]) {
7840
+ await sessions[sessionId].transport.handleRequest(req, res);
7841
+ } else {
7842
+ res.writeHead(400, { 'Content-Type': 'application/json' });
7843
+ res.end(JSON.stringify({ error: 'Missing or invalid session ID' }));
7844
+ }
7845
+ } else if (req.method === 'DELETE') {
7846
+ // Session termination
7847
+ const sessionId = req.headers['mcp-session-id'];
7848
+ if (sessionId && sessions[sessionId]) {
7849
+ await sessions[sessionId].transport.close();
7850
+ delete sessions[sessionId];
7851
+ res.writeHead(200, { 'Content-Type': 'application/json' });
7852
+ res.end(JSON.stringify({ success: true }));
7853
+ } else {
7854
+ res.writeHead(404, { 'Content-Type': 'application/json' });
7855
+ res.end(JSON.stringify({ error: 'Session not found' }));
7856
+ }
7857
+ } else {
7858
+ res.writeHead(405, { Allow: 'GET, POST, DELETE' });
7859
+ res.end('Method Not Allowed');
7860
+ }
7861
+ } else {
7862
+ res.writeHead(404);
7863
+ res.end('Not Found');
7864
+ }
7865
+ });
7866
+
7867
+ httpServer.listen(PORT, () => {
7868
+ console.error(`Neohive MCP server v6.0.0 running in HTTP mode on port ${PORT}`);
7869
+ console.error(`Endpoint: http://localhost:${PORT}/mcp`);
7870
+ console.error(`Health: http://localhost:${PORT}/health`);
7871
+ });
7872
+
7873
+ // Graceful shutdown
7874
+ process.on('SIGINT', () => {
7875
+ console.error('\n[HTTP] Shutting down...');
7876
+ for (const sid of Object.keys(sessions)) {
7877
+ try { sessions[sid].transport.close(); } catch {}
7878
+ }
7879
+ httpServer.close(() => process.exit(0));
7880
+ });
7881
+ } catch (e) {
7882
+ console.error('ERROR: HTTP server failed to start: ' + e.message);
7883
+ console.error('Fix: Ensure @modelcontextprotocol/sdk is up to date.');
7884
+ process.exit(1);
7885
+ }
7886
+ } else {
7887
+ // Default: stdio transport (one agent per process)
7888
+ try {
7889
+ const transport = new StdioServerTransport();
7890
+ await server.connect(transport);
7891
+ } catch (e) {
7892
+ console.error('ERROR: MCP server failed to start: ' + e.message);
7893
+ console.error('Fix: Run "npx neohive doctor" to check your setup.');
7894
+ process.exit(1);
7895
+ }
7172
7896
  }
7173
7897
  }
7174
7898