openclaw-node-harness 2.0.4 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/README.md +646 -3
  2. package/bin/hyperagent.mjs +419 -0
  3. package/bin/lane-watchdog.js +23 -2
  4. package/bin/mesh-agent.js +439 -28
  5. package/bin/mesh-bridge.js +69 -3
  6. package/bin/mesh-health-publisher.js +41 -1
  7. package/bin/mesh-task-daemon.js +821 -26
  8. package/bin/mesh.js +411 -20
  9. package/config/claude-settings.json +95 -0
  10. package/config/daemon.json.template +2 -1
  11. package/config/git-hooks/pre-commit +13 -0
  12. package/config/git-hooks/pre-push +12 -0
  13. package/config/harness-rules.json +174 -0
  14. package/config/plan-templates/team-bugfix.yaml +52 -0
  15. package/config/plan-templates/team-deploy.yaml +50 -0
  16. package/config/plan-templates/team-feature.yaml +71 -0
  17. package/config/roles/qa-engineer.yaml +36 -0
  18. package/config/roles/solidity-dev.yaml +51 -0
  19. package/config/roles/tech-architect.yaml +36 -0
  20. package/config/rules/framework/solidity.md +22 -0
  21. package/config/rules/framework/typescript.md +21 -0
  22. package/config/rules/framework/unity.md +21 -0
  23. package/config/rules/universal/design-docs.md +18 -0
  24. package/config/rules/universal/git-hygiene.md +18 -0
  25. package/config/rules/universal/security.md +19 -0
  26. package/config/rules/universal/test-standards.md +19 -0
  27. package/identity/DELEGATION.md +6 -6
  28. package/install.sh +296 -10
  29. package/lib/agent-activity.js +2 -2
  30. package/lib/circling-parser.js +119 -0
  31. package/lib/exec-safety.js +105 -0
  32. package/lib/hyperagent-store.mjs +652 -0
  33. package/lib/kanban-io.js +24 -31
  34. package/lib/llm-providers.js +16 -0
  35. package/lib/mcp-knowledge/bench.mjs +118 -0
  36. package/lib/mcp-knowledge/core.mjs +530 -0
  37. package/lib/mcp-knowledge/package.json +25 -0
  38. package/lib/mcp-knowledge/server.mjs +252 -0
  39. package/lib/mcp-knowledge/test.mjs +802 -0
  40. package/lib/memory-budget.mjs +261 -0
  41. package/lib/mesh-collab.js +483 -165
  42. package/lib/mesh-harness.js +427 -0
  43. package/lib/mesh-plans.js +79 -50
  44. package/lib/mesh-tasks.js +132 -49
  45. package/lib/nats-resolve.js +4 -4
  46. package/lib/plan-templates.js +226 -0
  47. package/lib/pre-compression-flush.mjs +322 -0
  48. package/lib/role-loader.js +292 -0
  49. package/lib/rule-loader.js +358 -0
  50. package/lib/session-store.mjs +461 -0
  51. package/lib/transcript-parser.mjs +292 -0
  52. package/mission-control/drizzle/soul_schema_update.sql +29 -0
  53. package/mission-control/drizzle.config.ts +1 -4
  54. package/mission-control/package-lock.json +1571 -83
  55. package/mission-control/package.json +6 -2
  56. package/mission-control/scripts/gen-chronology.js +3 -3
  57. package/mission-control/scripts/import-pipeline-v2.js +0 -16
  58. package/mission-control/scripts/import-pipeline.js +0 -15
  59. package/mission-control/src/app/api/cowork/clusters/[id]/members/route.ts +117 -0
  60. package/mission-control/src/app/api/cowork/clusters/[id]/route.ts +84 -0
  61. package/mission-control/src/app/api/cowork/clusters/route.ts +141 -0
  62. package/mission-control/src/app/api/cowork/dispatch/route.ts +128 -0
  63. package/mission-control/src/app/api/cowork/events/route.ts +65 -0
  64. package/mission-control/src/app/api/cowork/intervene/route.ts +259 -0
  65. package/mission-control/src/app/api/cowork/sessions/[id]/route.ts +37 -0
  66. package/mission-control/src/app/api/cowork/sessions/route.ts +64 -0
  67. package/mission-control/src/app/api/diagnostics/route.ts +97 -0
  68. package/mission-control/src/app/api/diagnostics/test-runner/route.ts +990 -0
  69. package/mission-control/src/app/api/memory/search/route.ts +6 -3
  70. package/mission-control/src/app/api/mesh/events/route.ts +95 -19
  71. package/mission-control/src/app/api/mesh/identity/route.ts +11 -0
  72. package/mission-control/src/app/api/mesh/tasks/[id]/route.ts +92 -0
  73. package/mission-control/src/app/api/mesh/tasks/route.ts +91 -0
  74. package/mission-control/src/app/api/souls/[id]/evolution/route.ts +21 -5
  75. package/mission-control/src/app/api/souls/[id]/prompt/route.ts +7 -1
  76. package/mission-control/src/app/api/souls/[id]/propagate/route.ts +14 -2
  77. package/mission-control/src/app/api/tasks/[id]/handoff/route.ts +8 -2
  78. package/mission-control/src/app/api/tasks/[id]/route.ts +90 -4
  79. package/mission-control/src/app/api/tasks/route.ts +21 -30
  80. package/mission-control/src/app/api/workspace/read/route.ts +11 -0
  81. package/mission-control/src/app/cowork/page.tsx +261 -0
  82. package/mission-control/src/app/diagnostics/page.tsx +385 -0
  83. package/mission-control/src/app/graph/page.tsx +26 -0
  84. package/mission-control/src/app/memory/page.tsx +1 -1
  85. package/mission-control/src/app/obsidian/page.tsx +36 -6
  86. package/mission-control/src/app/roadmap/page.tsx +24 -0
  87. package/mission-control/src/app/souls/page.tsx +2 -2
  88. package/mission-control/src/components/board/execution-config.tsx +431 -0
  89. package/mission-control/src/components/board/kanban-board.tsx +75 -9
  90. package/mission-control/src/components/board/kanban-column.tsx +135 -19
  91. package/mission-control/src/components/board/task-card.tsx +55 -2
  92. package/mission-control/src/components/board/unified-task-dialog.tsx +82 -4
  93. package/mission-control/src/components/cowork/cluster-card.tsx +176 -0
  94. package/mission-control/src/components/cowork/create-cluster-dialog.tsx +251 -0
  95. package/mission-control/src/components/cowork/dispatch-form.tsx +423 -0
  96. package/mission-control/src/components/cowork/role-picker.tsx +102 -0
  97. package/mission-control/src/components/cowork/session-card.tsx +284 -0
  98. package/mission-control/src/components/layout/sidebar.tsx +39 -2
  99. package/mission-control/src/lib/__tests__/daily-log.test.ts +82 -0
  100. package/mission-control/src/lib/__tests__/memory-md.test.ts +87 -0
  101. package/mission-control/src/lib/__tests__/mesh-kv-sync.test.ts +465 -0
  102. package/mission-control/src/lib/__tests__/mocks/mock-kv.ts +131 -0
  103. package/mission-control/src/lib/__tests__/status-kanban.test.ts +46 -0
  104. package/mission-control/src/lib/__tests__/task-markdown.test.ts +188 -0
  105. package/mission-control/src/lib/__tests__/wikilinks.test.ts +175 -0
  106. package/mission-control/src/lib/config.ts +67 -0
  107. package/mission-control/src/lib/db/index.ts +85 -1
  108. package/mission-control/src/lib/db/schema.ts +61 -3
  109. package/mission-control/src/lib/hooks.ts +309 -0
  110. package/mission-control/src/lib/memory/entities.ts +3 -2
  111. package/mission-control/src/lib/memory/extract.ts +2 -1
  112. package/mission-control/src/lib/memory/retrieval.ts +3 -2
  113. package/mission-control/src/lib/nats.ts +66 -1
  114. package/mission-control/src/lib/parsers/task-markdown.ts +52 -2
  115. package/mission-control/src/lib/parsers/transcript.ts +4 -4
  116. package/mission-control/src/lib/scheduler.ts +12 -11
  117. package/mission-control/src/lib/sync/mesh-kv.ts +279 -0
  118. package/mission-control/src/lib/sync/tasks.ts +23 -1
  119. package/mission-control/src/lib/task-id.ts +32 -0
  120. package/mission-control/src/lib/tts/index.ts +33 -9
  121. package/mission-control/src/middleware.ts +82 -0
  122. package/mission-control/tsconfig.json +2 -1
  123. package/mission-control/vitest.config.ts +14 -0
  124. package/package.json +15 -2
  125. package/services/launchd/ai.openclaw.log-rotate.plist +11 -0
  126. package/services/launchd/ai.openclaw.mesh-deploy-listener.plist +4 -0
  127. package/services/launchd/ai.openclaw.mesh-health-publisher.plist +4 -0
  128. package/services/launchd/ai.openclaw.mission-control.plist +1 -1
  129. package/services/service-manifest.json +1 -1
  130. package/skills/cc-godmode/references/agents.md +8 -8
  131. package/uninstall.sh +37 -9
  132. package/workspace-bin/memory-daemon.mjs +199 -5
  133. package/workspace-bin/session-search.mjs +204 -0
  134. package/workspace-bin/web-fetch.mjs +65 -0
@@ -35,16 +35,29 @@ const { connect, StringCodec } = require('nats');
35
35
  const { createTask, TaskStore, TASK_STATUS, KV_BUCKET } = require('../lib/mesh-tasks');
36
36
  const { createSession, CollabStore, COLLAB_STATUS, COLLAB_KV_BUCKET } = require('../lib/mesh-collab');
37
37
  const { createPlan, autoRoutePlan, PlanStore, PLAN_STATUS, SUBTASK_STATUS, PLANS_KV_BUCKET } = require('../lib/mesh-plans');
38
+ const { findRole, findRoleByScope, validateRequiredOutputs, checkForbiddenPatterns } = require('../lib/role-loader');
38
39
  const os = require('os');
40
+ const path = require('path');
41
+
42
+ // Role search directories
43
+ const ROLE_DIRS = [
44
+ path.join(process.env.HOME || '/root', '.openclaw', 'roles'),
45
+ path.join(__dirname, '..', 'config', 'roles'),
46
+ ];
39
47
 
40
48
  const sc = StringCodec();
41
49
  const { NATS_URL } = require('../lib/nats-resolve');
42
50
  const BUDGET_CHECK_INTERVAL = 30000; // 30s
43
51
  const STALL_MINUTES = parseInt(process.env.MESH_STALL_MINUTES || '5'); // no heartbeat for this long → stalled
52
+ const CIRCLING_STEP_TIMEOUT_MS = parseInt(process.env.MESH_CIRCLING_STEP_TIMEOUT_MS || String(10 * 60 * 1000)); // 10 min default
44
53
  const NODE_ID = os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
45
54
 
46
55
  let nc, store, collabStore, planStore;
47
56
 
57
+ // Active step timers for circling sessions — keyed by sessionId.
58
+ // Cleared when the step completes normally; fires degrade logic if step hangs.
59
+ const circlingStepTimers = new Map();
60
+
48
61
  // ── Logging ─────────────────────────────────────────
49
62
 
50
63
  function log(msg) {
@@ -223,15 +236,90 @@ async function handleComplete(msg) {
223
236
  const { task_id, result } = parseRequest(msg);
224
237
  if (!task_id) return respondError(msg, 'task_id is required');
225
238
 
226
- const task = await store.markCompleted(task_id, result || { success: true });
227
- if (!task) return respondError(msg, `Task ${task_id} not found`);
239
+ // Determine if this task requires human review before completing.
240
+ // requires_review logic:
241
+ // - explicit true/false on task → honor it
242
+ // - null (default) → auto-compute:
243
+ // * mode: human → always (by definition)
244
+ // * mode: soul → always (creative/strategic work, no mechanical verification)
245
+ // * collab_mesh without metric → yes (peer review without mechanical check)
246
+ // * solo_mesh WITH metric → no (metric IS the verification)
247
+ // * solo_mesh WITHOUT metric → yes (no mechanical check = human must validate)
248
+ // * local → no (Daedalus/companion handles these interactively)
249
+ const existingTask = await store.get(task_id);
250
+ if (!existingTask) return respondError(msg, `Task ${task_id} not found`);
251
+
252
+ let needsReview = existingTask.requires_review;
253
+ if (needsReview === null || needsReview === undefined) {
254
+ const mode = existingTask.collaboration ? 'collab_mesh' : (existingTask.tags?.includes('soul') ? 'soul' : 'solo_mesh');
255
+ const hasMetric = !!existingTask.metric;
256
+
257
+ if (mode === 'soul' || existingTask.tags?.includes('human')) {
258
+ needsReview = true;
259
+ } else if (mode === 'collab_mesh' && !hasMetric) {
260
+ needsReview = true;
261
+ } else if (mode === 'solo_mesh' && !hasMetric) {
262
+ needsReview = true;
263
+ } else {
264
+ needsReview = false;
265
+ }
266
+ }
228
267
 
229
- const elapsed = task.started_at
230
- ? ((new Date(task.completed_at) - new Date(task.started_at)) / 60000).toFixed(1)
231
- : '?';
268
+ // Role-based post-completion validation — runs UNCONDITIONALLY on all tasks
269
+ // with a role, regardless of review status. Validation results are included
270
+ // in the pending_review metadata so human reviewers see structured checks.
271
+ let roleValidation = { passed: true, issues: [] };
272
+ if (existingTask.role) {
273
+ const role = findRole(existingTask.role, ROLE_DIRS);
274
+ if (role) {
275
+ const outputFiles = result?.artifacts || [];
276
+ const harnessFiles = (result?.harness?.violations || []).flatMap(v => v.files || []);
277
+ const allFiles = [...new Set([...outputFiles, ...harnessFiles])];
278
+
279
+ if (allFiles.length > 0) {
280
+ const reqResult = validateRequiredOutputs(role, allFiles, null);
281
+ if (!reqResult.passed) {
282
+ roleValidation.passed = false;
283
+ roleValidation.issues.push(...reqResult.failures.map(f => `[required_output] ${f.description}: ${f.detail}`));
284
+ }
285
+ }
232
286
 
233
- log(`COMPLETE ${task_id} in ${elapsed}m: ${result?.summary || 'no summary'}`);
234
- publishEvent('completed', task);
287
+ if (!roleValidation.passed) {
288
+ log(`ROLE VALIDATION FAILED for ${task_id} (role: ${role.id}): ${roleValidation.issues.length} issue(s)`);
289
+ for (const issue of roleValidation.issues) log(` - ${issue}`);
290
+ needsReview = true; // force review if validation failed on auto-complete path
291
+ } else {
292
+ log(`ROLE VALIDATION PASSED for ${task_id} (role: ${role.id})`);
293
+ }
294
+ }
295
+ }
296
+
297
+ let task;
298
+ if (needsReview) {
299
+ // Gate: task goes to pending_review instead of completed
300
+ // Include role validation results in the review metadata
301
+ const enrichedResult = {
302
+ ...(result || { success: true }),
303
+ role_validation: roleValidation,
304
+ };
305
+ task = await store.markPendingReview(task_id, enrichedResult);
306
+ const elapsed = task.started_at
307
+ ? ((new Date(task.review_requested_at) - new Date(task.started_at)) / 60000).toFixed(1)
308
+ : '?';
309
+ log(`PENDING REVIEW ${task_id} in ${elapsed}m: ${result?.summary || 'no summary'}`);
310
+ log(` Approve: mesh task approve ${task_id} | Reject: mesh task reject ${task_id} --reason "..."`);
311
+ publishEvent('pending_review', task);
312
+ // Update plan subtask status so `mesh plan show` reflects pending_review
313
+ await updatePlanSubtaskStatus(task_id, 'pending_review');
314
+ // Do NOT advance plan wave — task is not yet "completed" for dependency purposes
315
+ } else {
316
+ task = await store.markCompleted(task_id, result || { success: true });
317
+ const elapsed = task.started_at
318
+ ? ((new Date(task.completed_at) - new Date(task.started_at)) / 60000).toFixed(1)
319
+ : '?';
320
+ log(`COMPLETE ${task_id} in ${elapsed}m: ${result?.summary || 'no summary'}`);
321
+ publishEvent('completed', task);
322
+ }
235
323
 
236
324
  // NOTE: no cleanupTaskCollabSession here — collab tasks complete via
237
325
  // evaluateRound → markCompleted on the session, then store.markCompleted
@@ -241,8 +329,10 @@ async function handleComplete(msg) {
241
329
  collabStore.clearAuditErrorCount(task.collab_session_id);
242
330
  }
243
331
 
244
- // Check if this task belongs to a plan
245
- await checkPlanProgress(task_id, 'completed');
332
+ // Only advance plan if actually completed (not pending_review)
333
+ if (task.status === TASK_STATUS.COMPLETED) {
334
+ await checkPlanProgress(task_id, 'completed');
335
+ }
246
336
 
247
337
  respond(msg, task);
248
338
  }
@@ -262,10 +352,52 @@ async function handleFail(msg) {
262
352
  publishEvent('failed', task);
263
353
  await cleanupTaskCollabSession(task, `Parent task ${task_id} failed: ${reason}`);
264
354
 
265
- // Check if this task belongs to a plan
355
+ // Phase F: Escalation — if the task has a role with escalation mapping,
356
+ // create an escalation task before cascading failure through the plan.
357
+ let escalated = false;
358
+ if (task.role) {
359
+ const role = findRole(task.role, ROLE_DIRS);
360
+ if (role && role.escalation) {
361
+ // Determine failure type for escalation routing
362
+ let failureType = 'on_metric_failure';
363
+ if (reason && reason.includes('Budget exceeded')) failureType = 'on_budget_exceeded';
364
+ if (reason && reason.includes('scope')) failureType = 'on_scope_violation';
365
+
366
+ const escalationTarget = role.escalation[failureType];
367
+ if (escalationTarget) {
368
+ const escalationTask = createTask({
369
+ task_id: `ESC-${task_id}-${Date.now()}`,
370
+ title: `[Escalation] ${task.title}`,
371
+ description: [
372
+ `Escalated from ${task_id} (role: ${task.role}, failure: ${failureType}).`,
373
+ `Original reason: ${reason}`,
374
+ '',
375
+ `Original description: ${task.description}`,
376
+ ].join('\n'),
377
+ budget_minutes: Math.ceil(task.budget_minutes * 1.5), // 50% more budget
378
+ metric: task.metric,
379
+ scope: task.scope,
380
+ success_criteria: task.success_criteria,
381
+ role: escalationTarget === 'human' ? null : escalationTarget,
382
+ requires_review: escalationTarget === 'human' ? true : null,
383
+ tags: [...(task.tags || []), 'escalation', `escalated_from:${task_id}`],
384
+ plan_id: task.plan_id,
385
+ subtask_id: task.subtask_id, // Wire back to original plan subtask for recovery
386
+ });
387
+ await store.put(escalationTask);
388
+ publishEvent('submitted', escalationTask);
389
+ log(`ESCALATED ${task_id} → ${escalationTask.task_id} (target role: ${escalationTarget})`);
390
+ escalated = true;
391
+ }
392
+ }
393
+ }
394
+
395
+ // Check if this task belongs to a plan (escalation doesn't block cascade —
396
+ // the escalation task is independent. If the plan has abort_on_critical_fail
397
+ // and this was critical, it still aborts. The escalation is a parallel attempt.)
266
398
  await checkPlanProgress(task_id, 'failed');
267
399
 
268
- respond(msg, task);
400
+ respond(msg, { ...task, escalated, escalation_task_id: escalated ? `ESC-${task_id}-${Date.now()}` : null });
269
401
  }
270
402
 
271
403
  /**
@@ -367,6 +499,44 @@ async function handleCancel(msg) {
367
499
  respond(msg, task);
368
500
  }
369
501
 
502
+ // ── Task Review (Approval Gate) ─────────────────────
503
+
504
+ /**
505
+ * mesh.tasks.approve — Human approves a pending_review task.
506
+ * Transitions to completed and advances plan wave if applicable.
507
+ */
508
+ async function handleTaskApprove(msg) {
509
+ const { task_id } = parseRequest(msg);
510
+ if (!task_id) return respondError(msg, 'task_id is required');
511
+
512
+ const task = await store.markApproved(task_id);
513
+ if (!task) return respondError(msg, `Task ${task_id} not found or not in pending_review status`);
514
+
515
+ log(`APPROVED ${task_id}: human review passed`);
516
+ publishEvent('completed', task);
517
+
518
+ // Now advance plan wave (this was blocked while in pending_review)
519
+ await checkPlanProgress(task_id, 'completed');
520
+
521
+ respond(msg, task);
522
+ }
523
+
524
+ /**
525
+ * mesh.tasks.reject — Human rejects a pending_review task.
526
+ * Re-queues the task with rejection reason injected for next attempt.
527
+ */
528
+ async function handleTaskReject(msg) {
529
+ const { task_id, reason } = parseRequest(msg);
530
+ if (!task_id) return respondError(msg, 'task_id is required');
531
+
532
+ const task = await store.markRejected(task_id, reason || 'Rejected by reviewer');
533
+ if (!task) return respondError(msg, `Task ${task_id} not found or not in pending_review status`);
534
+
535
+ log(`REJECTED ${task_id}: ${reason || 'no reason'} — re-queued for retry`);
536
+ publishEvent('rejected', task);
537
+ respond(msg, task);
538
+ }
539
+
370
540
  // ── Budget Enforcement + Stall Detection ────────────
371
541
 
372
542
  async function detectStalls() {
@@ -435,6 +605,9 @@ async function detectStalls() {
435
605
  );
436
606
  if (releasedTask) publishEvent('released', releasedTask);
437
607
 
608
+ // Update plan progress if this task belongs to a plan
609
+ await checkPlanProgress(task.task_id, 'failed');
610
+
438
611
  // Notify the agent's node (fire-and-forget)
439
612
  if (task.owner) {
440
613
  nc.publish(`mesh.agent.${task.owner}.stall`, sc.encode(JSON.stringify({
@@ -446,6 +619,31 @@ async function detectStalls() {
446
619
  }
447
620
  }
448
621
 
622
+ /**
623
+ * Process proposed tasks — worker nodes write tasks with status "proposed"
624
+ * directly to KV. The lead daemon validates and transitions them.
625
+ */
626
+ async function processProposals() {
627
+ const proposed = await store.list({ status: TASK_STATUS.PROPOSED });
628
+ for (const task of proposed) {
629
+ // Basic validation: must have title and origin
630
+ if (!task.title || !task.origin) {
631
+ task.status = TASK_STATUS.REJECTED;
632
+ task.result = { success: false, summary: 'Missing required fields (title, origin)' };
633
+ await store.put(task);
634
+ log(`REJECTED ${task.task_id}: missing required fields`);
635
+ publishEvent('rejected', task);
636
+ continue;
637
+ }
638
+
639
+ // Accept: transition to queued
640
+ task.status = TASK_STATUS.QUEUED;
641
+ await store.put(task);
642
+ log(`ACCEPTED proposal ${task.task_id} from ${task.origin}: "${task.title}"`);
643
+ publishEvent('submitted', task);
644
+ }
645
+ }
646
+
449
647
  async function enforceBudgets() {
450
648
  const overBudget = await store.findOverBudget();
451
649
 
@@ -461,6 +659,19 @@ async function enforceBudgets() {
461
659
  );
462
660
  if (failedTask) publishEvent('failed', failedTask);
463
661
 
662
+ // Clean up any collab session for this task
663
+ if (collabStore && task.collab_session_id) {
664
+ try {
665
+ await collabStore.markAborted(task.collab_session_id, `Budget exceeded for task ${task.task_id}`);
666
+ log(`BUDGET → COLLAB: aborted session ${task.collab_session_id}`);
667
+ } catch (err) {
668
+ log(`BUDGET → COLLAB ERROR: ${err.message}`);
669
+ }
670
+ }
671
+
672
+ // Update plan progress if this task belongs to a plan
673
+ await checkPlanProgress(task.task_id, 'failed');
674
+
464
675
  // Publish notification so the agent knows
465
676
  nc.publish(`mesh.agent.${task.owner}.budget_exceeded`, sc.encode(JSON.stringify({
466
677
  task_id: task.task_id,
@@ -537,7 +748,23 @@ async function handleCollabJoin(msg) {
537
748
 
538
749
  // Check if recruiting should close → start first round
539
750
  if (collabStore.isRecruitingDone(session)) {
540
- await startCollabRound(session.session_id);
751
+ // Circling Strategy: assign worker_node_id before starting
752
+ if (session.mode === 'circling_strategy' && session.circling) {
753
+ const freshSession = await collabStore.get(session.session_id);
754
+ if (freshSession.circling && !freshSession.circling.worker_node_id) {
755
+ // Assign all role IDs at recruiting close — stable for the session lifetime.
756
+ const workerNode = freshSession.nodes.find(n => n.role === 'worker') || freshSession.nodes[0];
757
+ freshSession.circling.worker_node_id = workerNode.node_id;
758
+ const reviewers = freshSession.nodes.filter(n => n.node_id !== workerNode.node_id);
759
+ freshSession.circling.reviewerA_node_id = reviewers[0]?.node_id || null;
760
+ freshSession.circling.reviewerB_node_id = reviewers[1]?.node_id || null;
761
+ await collabStore.put(freshSession);
762
+ log(`CIRCLING: Roles assigned → Worker: ${workerNode.node_id}, RevA: ${reviewers[0]?.node_id}, RevB: ${reviewers[1]?.node_id}`);
763
+ }
764
+ await startCirclingStep(session.session_id);
765
+ } else {
766
+ await startCollabRound(session.session_id);
767
+ }
541
768
  }
542
769
 
543
770
  respond(msg, session);
@@ -561,6 +788,12 @@ async function handleCollabLeave(msg) {
561
788
  if (session.status === COLLAB_STATUS.ACTIVE && session.nodes.length < session.min_nodes) {
562
789
  await collabStore.markAborted(session_id, `Below min_nodes: ${session.nodes.length} < ${session.min_nodes}`);
563
790
  publishCollabEvent('aborted', session);
791
+ } else if (session.status === COLLAB_STATUS.ACTIVE) {
792
+ // Re-check if the round is now complete (removed node excluded from quorum)
793
+ const updated = await collabStore.get(session_id);
794
+ if (updated && collabStore.isRoundComplete(updated)) {
795
+ await evaluateRound(session_id);
796
+ }
564
797
  }
565
798
 
566
799
  respond(msg, session);
@@ -636,11 +869,74 @@ async function handleCollabReflect(msg) {
636
869
  });
637
870
  publishCollabEvent('reflection_received', session);
638
871
 
872
+ // Circling Strategy: handle two-step barrier, artifact storage, directed handoffs
873
+ if (session.mode === 'circling_strategy' && session.circling) {
874
+ // Store circling artifacts
875
+ if (reflection.circling_artifacts && reflection.circling_artifacts.length > 0) {
876
+ const { current_subround, current_step } = session.circling;
877
+ const isWorker = reflection.node_id === session.circling.worker_node_id;
878
+ // Use stored reviewer IDs for stable identity (falls back to array-index if not set)
879
+ let nodeRole;
880
+ if (isWorker) {
881
+ nodeRole = 'worker';
882
+ } else if (session.circling.reviewerA_node_id && session.circling.reviewerB_node_id) {
883
+ nodeRole = reflection.node_id === session.circling.reviewerA_node_id ? 'reviewerA' : 'reviewerB';
884
+ } else {
885
+ const reviewerNodes = session.nodes.filter(n => n.node_id !== session.circling.worker_node_id);
886
+ nodeRole = reviewerNodes[0]?.node_id === reflection.node_id ? 'reviewerA' : 'reviewerB';
887
+ }
888
+
889
+ for (const art of reflection.circling_artifacts) {
890
+ const key = `sr${current_subround}_step${current_step}_${nodeRole}_${art.type}`;
891
+ await collabStore.storeArtifact(session_id, key, art.content);
892
+ log(`CIRCLING ARTIFACT: ${key} stored (${(art.content || '').length} chars)`);
893
+ }
894
+ } else if (reflection.parse_failed) {
895
+ // Parse failure: record and check retry threshold.
896
+ // If a node consistently fails, the barrier still advances (the reflection counts)
897
+ // but downstream nodes get [UNAVAILABLE] placeholders. After 3 failures for the
898
+ // same node+step, log a critical warning — the only full recovery is the daemon's
899
+ // global stall timeout. See: mesh-collab.js recordArtifactFailure / getArtifactFailureCount
900
+ const failCount = await collabStore.recordArtifactFailure(session_id, reflection.node_id);
901
+ log(`CIRCLING PARSE FAILURE: ${reflection.node_id} in ${session_id} (attempt ${failCount})`);
902
+ await collabStore.appendAudit(session_id, 'artifact_parse_failed', {
903
+ node_id: reflection.node_id,
904
+ step: session.circling.current_step,
905
+ subround: session.circling.current_subround,
906
+ failure_count: failCount,
907
+ });
908
+ if (failCount >= 3) {
909
+ log(`CIRCLING CRITICAL: ${reflection.node_id} failed ${failCount}x at SR${session.circling.current_subround}/Step${session.circling.current_step} — no artifacts will be available for downstream nodes`);
910
+ }
911
+ } else {
912
+ // No artifacts but not a parse failure — unexpected
913
+ log(`CIRCLING WARNING: ${reflection.node_id} submitted reflection without artifacts in ${session_id}`);
914
+ }
915
+
916
+ // Check if current circling step is complete (all 3 nodes submitted)
917
+ const freshSession = await collabStore.get(session_id);
918
+ if (collabStore.isCirclingStepComplete(freshSession)) {
919
+ clearCirclingStepTimer(session_id);
920
+ const nextState = await collabStore.advanceCirclingStep(session_id);
921
+ if (!nextState) {
922
+ log(`CIRCLING ERROR: advanceCirclingStep returned null for ${session_id}`);
923
+ } else if (nextState.phase === 'complete') {
924
+ // Finalization done — complete the session
925
+ await completeCirclingSession(session_id);
926
+ } else if (nextState.needsGate) {
927
+ // Automation tier gate — wait for human approval
928
+ log(`CIRCLING GATE: ${session_id} SR${nextState.subround} — waiting for human approval (tier ${freshSession.circling.automation_tier})`);
929
+ publishCollabEvent('circling_gate', freshSession);
930
+ } else {
931
+ // Auto-advance to next step
932
+ await startCirclingStep(session_id);
933
+ }
934
+ }
639
935
  // Sequential mode: advance turn, notify next node or evaluate round
640
936
  // Parallel mode: check if all reflections are in → evaluate convergence
641
937
  // NOTE: Node.js single-threaded event loop prevents concurrent execution of this
642
938
  // handler — no mutex needed. advanceTurn() is safe without CAS here.
643
- if (session.mode === 'sequential') {
939
+ } else if (session.mode === 'sequential') {
644
940
  const nextNodeId = await collabStore.advanceTurn(session_id);
645
941
  if (nextNodeId) {
646
942
  // Notify only the next-turn node with accumulated intra-round intel
@@ -928,6 +1224,275 @@ async function evaluateRound(sessionId) {
928
1224
  }
929
1225
  }
930
1226
 
1227
+ // ── Circling Strategy Functions ──────────────────────
1228
+
1229
+ /**
1230
+ * Start a circling step: compile directed inputs and notify each node.
1231
+ * Called after advanceCirclingStep transitions the state machine.
1232
+ * Also creates a new round in the session (for reflection storage).
1233
+ */
1234
+ async function startCirclingStep(sessionId) {
1235
+ const session = await collabStore.get(sessionId);
1236
+ if (!session || !session.circling) return;
1237
+
1238
+ const { phase, current_subround, current_step } = session.circling;
1239
+
1240
+ // Record step start time for timeout rehydration after daemon restart
1241
+ session.circling.step_started_at = new Date().toISOString();
1242
+ await collabStore.put(session);
1243
+
1244
+ // Start a new round in the session for reflection storage
1245
+ // (each step gets its own round to keep reflections organized)
1246
+ const round = await collabStore.startRound(sessionId);
1247
+ if (!round) {
1248
+ log(`CIRCLING ERROR: startRound failed for ${sessionId} (aborted?)`);
1249
+ return;
1250
+ }
1251
+
1252
+ const freshSession = await collabStore.get(sessionId);
1253
+ const parentTask = await store.get(freshSession.task_id);
1254
+ const taskDescription = parentTask?.description || '';
1255
+
1256
+ const stepLabel = phase === 'init' ? 'Init'
1257
+ : phase === 'finalization' ? 'Finalization'
1258
+ : `SR${current_subround} Step${current_step}`;
1259
+ log(`CIRCLING ${sessionId} ${stepLabel} START (${freshSession.nodes.length} nodes)`);
1260
+
1261
+ await collabStore.appendAudit(sessionId, 'circling_step_started', {
1262
+ phase, subround: current_subround, step: current_step,
1263
+ nodes: freshSession.nodes.map(n => n.node_id),
1264
+ });
1265
+ publishCollabEvent('circling_step_started', freshSession);
1266
+
1267
+ // Notify each node with their directed input
1268
+ for (const node of freshSession.nodes) {
1269
+ const directedInput = collabStore.compileDirectedInput(freshSession, node.node_id, taskDescription);
1270
+
1271
+ nc.publish(`mesh.collab.${sessionId}.node.${node.node_id}.round`, sc.encode(JSON.stringify({
1272
+ session_id: sessionId,
1273
+ task_id: freshSession.task_id,
1274
+ round_number: freshSession.current_round,
1275
+ directed_input: directedInput,
1276
+ shared_intel: '', // empty for circling — uses directed_input instead
1277
+ my_scope: node.scope,
1278
+ my_role: node.role,
1279
+ mode: 'circling_strategy',
1280
+ circling_phase: phase,
1281
+ circling_step: current_step,
1282
+ circling_subround: current_subround,
1283
+ })));
1284
+ }
1285
+
1286
+ // Set step-level timeout. If the barrier isn't met within CIRCLING_STEP_TIMEOUT_MS,
1287
+ // mark unresponsive nodes as dead and force-advance with degraded input.
1288
+ clearCirclingStepTimer(sessionId);
1289
+ const stepSnapshot = { phase, subround: current_subround, step: current_step };
1290
+ const timer = setTimeout(() => handleCirclingStepTimeout(sessionId, stepSnapshot), CIRCLING_STEP_TIMEOUT_MS);
1291
+ circlingStepTimers.set(sessionId, timer);
1292
+ }
1293
+
1294
+ /**
1295
+ * Handle a circling step timeout. If the step hasn't advanced since the timer was set,
1296
+ * mark nodes that haven't submitted as dead and force-advance.
1297
+ */
1298
+ async function handleCirclingStepTimeout(sessionId, stepSnapshot) {
1299
+ circlingStepTimers.delete(sessionId);
1300
+
1301
+ const session = await collabStore.get(sessionId);
1302
+ if (!session || !session.circling) return;
1303
+
1304
+ const { phase, current_subround, current_step } = session.circling;
1305
+
1306
+ // Check if the step already advanced (timer is stale)
1307
+ if (phase !== stepSnapshot.phase ||
1308
+ current_subround !== stepSnapshot.subround ||
1309
+ current_step !== stepSnapshot.step) {
1310
+ return; // Step already moved on — nothing to do
1311
+ }
1312
+
1313
+ log(`CIRCLING STEP TIMEOUT: ${sessionId} ${phase}/SR${current_subround}/Step${current_step} — forcing advance`);
1314
+
1315
+ const currentRound = session.rounds[session.rounds.length - 1];
1316
+ if (!currentRound) return;
1317
+
1318
+ const submittedNodeIds = new Set(
1319
+ currentRound.reflections
1320
+ .filter(r => r.circling_step === current_step)
1321
+ .map(r => r.node_id)
1322
+ );
1323
+
1324
+ // Mark nodes that haven't submitted as dead
1325
+ for (const node of session.nodes) {
1326
+ if (node.status !== 'dead' && !submittedNodeIds.has(node.node_id)) {
1327
+ await collabStore.setNodeStatus(sessionId, node.node_id, 'dead');
1328
+ log(`CIRCLING STEP TIMEOUT: marked ${node.node_id} as dead (no submission within ${CIRCLING_STEP_TIMEOUT_MS / 60000}m)`);
1329
+ await collabStore.appendAudit(sessionId, 'node_marked_dead', {
1330
+ node_id: node.node_id,
1331
+ reason: `Circling step timeout: no reflection for ${phase}/SR${current_subround}/Step${current_step}`,
1332
+ });
1333
+ }
1334
+ }
1335
+
1336
+ // Re-check barrier with dead nodes excluded
1337
+ const freshSession = await collabStore.get(sessionId);
1338
+ if (collabStore.isCirclingStepComplete(freshSession)) {
1339
+ const nextState = await collabStore.advanceCirclingStep(sessionId);
1340
+ if (!nextState) {
1341
+ log(`CIRCLING STEP TIMEOUT ERROR: advanceCirclingStep returned null for ${sessionId}`);
1342
+ } else if (nextState.phase === 'complete') {
1343
+ await completeCirclingSession(sessionId);
1344
+ } else if (nextState.needsGate) {
1345
+ log(`CIRCLING GATE: ${sessionId} SR${nextState.subround} — waiting for human approval (timeout-forced)`);
1346
+ publishCollabEvent('circling_gate', freshSession);
1347
+ } else {
1348
+ await startCirclingStep(sessionId);
1349
+ }
1350
+ } else {
1351
+ // Still not enough submissions even after marking dead nodes.
1352
+ // All active nodes are dead — abort the session.
1353
+ log(`CIRCLING STEP TIMEOUT: ${sessionId} — no active nodes remain. Aborting.`);
1354
+ await collabStore.markAborted(sessionId, `All nodes timed out at ${phase}/SR${current_subround}/Step${current_step}`);
1355
+ publishCollabEvent('aborted', await collabStore.get(sessionId));
1356
+ await store.markReleased(session.task_id, `Circling session aborted: all nodes timed out`);
1357
+ }
1358
+ }
1359
+
1360
+ function clearCirclingStepTimer(sessionId) {
1361
+ const existing = circlingStepTimers.get(sessionId);
1362
+ if (existing) {
1363
+ clearTimeout(existing);
1364
+ circlingStepTimers.delete(sessionId);
1365
+ }
1366
+ }
1367
+
1368
+ /**
1369
+ * Complete a circling session after finalization.
1370
+ * Checks finalization votes: Worker converged + both Reviewers converged → COMPLETE.
1371
+ * Any blocked vote → escalation gate (all tiers gate on finalization).
1372
+ */
1373
+ async function completeCirclingSession(sessionId) {
1374
+ clearCirclingStepTimer(sessionId);
1375
+ const session = await collabStore.get(sessionId);
1376
+ if (!session || !session.circling) return;
1377
+
1378
+ const lastRound = session.rounds[session.rounds.length - 1];
1379
+ if (!lastRound) return;
1380
+
1381
+ // Check finalization votes
1382
+ const blockedVotes = lastRound.reflections.filter(r => r.vote === 'blocked');
1383
+
1384
+ if (blockedVotes.length > 0) {
1385
+ // Escalation: reviewer flagged critical concern
1386
+ log(`CIRCLING ESCALATION ${sessionId}: ${blockedVotes.length} blocked vote(s) in finalization`);
1387
+ await collabStore.appendAudit(sessionId, 'circling_escalation', {
1388
+ blocked_nodes: blockedVotes.map(r => r.node_id),
1389
+ summaries: blockedVotes.map(r => r.summary),
1390
+ });
1391
+ // Gate on finalization (all tiers)
1392
+ publishCollabEvent('circling_gate', session);
1393
+ return;
1394
+ }
1395
+
1396
+ // All converged → complete
1397
+ const finalArtifact = collabStore.getLatestArtifact(session, 'worker', 'workArtifact');
1398
+ const completionDiff = collabStore.getLatestArtifact(session, 'worker', 'completionDiff');
1399
+
1400
+ log(`CIRCLING COMPLETED ${sessionId}: ${session.circling.current_subround} sub-rounds`);
1401
+ await collabStore.markConverged(sessionId);
1402
+
1403
+ await collabStore.markCompleted(sessionId, {
1404
+ artifacts: finalArtifact ? ['workArtifact'] : [],
1405
+ summary: `Circling Strategy completed: ${session.circling.current_subround} sub-rounds, ${session.nodes.length} nodes. ${completionDiff ? 'CompletionDiff available.' : ''}`,
1406
+ node_contributions: Object.fromEntries(
1407
+ lastRound.reflections.map(r => [r.node_id, r.summary])
1408
+ ),
1409
+ circling_final_artifact: finalArtifact,
1410
+ circling_completion_diff: completionDiff,
1411
+ });
1412
+ await collabStore.appendAudit(sessionId, 'session_completed', {
1413
+ outcome: 'circling_finalized',
1414
+ subrounds: session.circling.current_subround,
1415
+ node_count: session.nodes.length,
1416
+ });
1417
+
1418
+ // Complete parent task
1419
+ const completedSession = await collabStore.get(sessionId);
1420
+ await store.markCompleted(session.task_id, completedSession.result);
1421
+ publishEvent('completed', await store.get(session.task_id));
1422
+ publishCollabEvent('completed', completedSession);
1423
+ }
1424
+
1425
+ /**
1426
+ * mesh.collab.gate.approve — Human approves a circling tier gate.
1427
+ * Resumes the circling protocol after a gate point.
1428
+ */
1429
+ async function handleCirclingGateApprove(msg) {
1430
+ const { session_id } = parseRequest(msg);
1431
+ if (!session_id) return respondError(msg, 'session_id required');
1432
+
1433
+ const session = await collabStore.get(session_id);
1434
+ if (!session || !session.circling) return respondError(msg, 'Not a circling session');
1435
+
1436
+ log(`CIRCLING GATE APPROVED: ${session_id} — resuming`);
1437
+ await collabStore.appendAudit(session_id, 'gate_approved', {
1438
+ phase: session.circling.phase,
1439
+ subround: session.circling.current_subround,
1440
+ });
1441
+
1442
+ // If finalization phase with blocked votes, the gate approve means "accept anyway"
1443
+ if (session.circling.phase === 'complete' || session.circling.phase === 'finalization') {
1444
+ // Force complete
1445
+ const lastRound = session.rounds[session.rounds.length - 1];
1446
+ const finalArtifact = collabStore.getLatestArtifact(session, 'worker', 'workArtifact');
1447
+ await collabStore.markConverged(session_id);
1448
+ await collabStore.markCompleted(session_id, {
1449
+ artifacts: finalArtifact ? ['workArtifact'] : [],
1450
+ summary: `Circling completed via gate approval after ${session.circling.current_subround} sub-rounds`,
1451
+ node_contributions: Object.fromEntries(
1452
+ (lastRound?.reflections || []).map(r => [r.node_id, r.summary])
1453
+ ),
1454
+ circling_final_artifact: finalArtifact,
1455
+ });
1456
+ const completedSession = await collabStore.get(session_id);
1457
+ await store.markCompleted(session.task_id, completedSession.result);
1458
+ publishEvent('completed', await store.get(session.task_id));
1459
+ publishCollabEvent('completed', completedSession);
1460
+ } else {
1461
+ // Mid-protocol gate (tier 3) — resume next step
1462
+ await startCirclingStep(session_id);
1463
+ }
1464
+
1465
+ respond(msg, { approved: true });
1466
+ }
1467
+
1468
+ /**
1469
+ * mesh.collab.gate.reject — Human rejects a circling tier gate.
1470
+ * Forces another sub-round.
1471
+ */
1472
+ async function handleCirclingGateReject(msg) {
1473
+ const { session_id } = parseRequest(msg);
1474
+ if (!session_id) return respondError(msg, 'session_id required');
1475
+
1476
+ const session = await collabStore.get(session_id);
1477
+ if (!session || !session.circling) return respondError(msg, 'Not a circling session');
1478
+
1479
+ log(`CIRCLING GATE REJECTED: ${session_id} — forcing another sub-round`);
1480
+ await collabStore.appendAudit(session_id, 'gate_rejected', {
1481
+ phase: session.circling.phase,
1482
+ subround: session.circling.current_subround,
1483
+ });
1484
+
1485
+ // Reset to circling phase, increment subround, step 1
1486
+ session.circling.phase = 'circling';
1487
+ session.circling.max_subrounds++; // allow one more
1488
+ session.circling.current_step = 1;
1489
+ session.circling.current_subround++;
1490
+ await collabStore.put(session);
1491
+
1492
+ await startCirclingStep(session_id);
1493
+ respond(msg, { rejected: true, new_subround: session.circling.current_subround });
1494
+ }
1495
+
931
1496
  // ── Collab Recruiting Timer ─────────────────────────
932
1497
 
933
1498
  /**
@@ -941,7 +1506,31 @@ async function checkRecruitingDeadlines() {
941
1506
 
942
1507
  if (session.nodes.length >= session.min_nodes) {
943
1508
  log(`COLLAB RECRUIT DONE ${session.session_id}: ${session.nodes.length} nodes joined. Starting round 1.`);
944
- await startCollabRound(session.session_id);
1509
+ if (session.mode === 'circling_strategy' && session.circling) {
1510
+ // Circling requires exactly 3 nodes (1 worker + 2 reviewers).
1511
+ // Even if min_nodes was misconfigured, refuse to start with <3.
1512
+ const hasWorker = session.nodes.some(n => n.role === 'worker');
1513
+ const reviewerCount = session.nodes.filter(n => n.role === 'reviewer').length;
1514
+ if (session.nodes.length < 3 || !hasWorker || reviewerCount < 2) {
1515
+ log(`COLLAB RECRUIT FAILED ${session.session_id}: circling requires 1 worker + 2 reviewers, got ${session.nodes.length} nodes (worker: ${hasWorker}, reviewers: ${reviewerCount}). Aborting.`);
1516
+ await collabStore.markAborted(session.session_id, `Circling requires 1 worker + 2 reviewers; got ${session.nodes.length} nodes`);
1517
+ publishCollabEvent('aborted', await collabStore.get(session.session_id));
1518
+ await store.markReleased(session.task_id, `Circling session failed: insufficient role distribution`);
1519
+ continue;
1520
+ }
1521
+ // Assign all role IDs if not yet assigned
1522
+ if (!session.circling.worker_node_id) {
1523
+ const workerNode = session.nodes.find(n => n.role === 'worker') || session.nodes[0];
1524
+ session.circling.worker_node_id = workerNode.node_id;
1525
+ const reviewers = session.nodes.filter(n => n.node_id !== workerNode.node_id);
1526
+ session.circling.reviewerA_node_id = reviewers[0]?.node_id || null;
1527
+ session.circling.reviewerB_node_id = reviewers[1]?.node_id || null;
1528
+ await collabStore.put(session);
1529
+ }
1530
+ await startCirclingStep(session.session_id);
1531
+ } else {
1532
+ await startCollabRound(session.session_id);
1533
+ }
945
1534
  } else {
946
1535
  log(`COLLAB RECRUIT FAILED ${session.session_id}: only ${session.nodes.length}/${session.min_nodes} nodes. Aborting.`);
947
1536
  await collabStore.markAborted(session.session_id, `Not enough nodes: ${session.nodes.length} < ${session.min_nodes}`);
@@ -952,6 +1541,46 @@ async function checkRecruitingDeadlines() {
952
1541
  }
953
1542
  }
954
1543
 
1544
+ // ── Circling Step Timeout Sweep ──────────────────────
1545
+
1546
+ /**
1547
+ * Periodic sweep for stale circling steps. Handles timer rehydration after
1548
+ * daemon restart — in-memory timers are lost on crash, but step_started_at
1549
+ * in the session survives in JetStream KV.
1550
+ *
1551
+ * Runs every 60s. For each active circling session, checks if the current
1552
+ * step has been running longer than CIRCLING_STEP_TIMEOUT_MS. If so, fires
1553
+ * the timeout handler (which marks dead nodes and force-advances).
1554
+ *
1555
+ * Also serves as a safety net for timer drift or missed clearTimeout calls.
1556
+ */
1557
+ async function sweepCirclingStepTimeouts() {
1558
+ try {
1559
+ const active = await collabStore.list({ status: COLLAB_STATUS.ACTIVE });
1560
+ for (const session of active) {
1561
+ if (session.mode !== 'circling_strategy' || !session.circling) continue;
1562
+ if (session.circling.phase === 'complete') continue;
1563
+ if (!session.circling.step_started_at) continue;
1564
+
1565
+ // Skip if an in-memory timer is already tracking this session
1566
+ if (circlingStepTimers.has(session.session_id)) continue;
1567
+
1568
+ const elapsed = Date.now() - new Date(session.circling.step_started_at).getTime();
1569
+ if (elapsed > CIRCLING_STEP_TIMEOUT_MS) {
1570
+ log(`CIRCLING SWEEP: ${session.session_id} step stale (${(elapsed / 60000).toFixed(1)}m elapsed). Firing timeout handler.`);
1571
+ const stepSnapshot = {
1572
+ phase: session.circling.phase,
1573
+ subround: session.circling.current_subround,
1574
+ step: session.circling.current_step,
1575
+ };
1576
+ await handleCirclingStepTimeout(session.session_id, stepSnapshot);
1577
+ }
1578
+ }
1579
+ } catch (err) {
1580
+ log(`CIRCLING SWEEP ERROR: ${err.message}`);
1581
+ }
1582
+ }
1583
+
955
1584
  // ── Plan Event Publishing ───────────────────────────
956
1585
 
957
1586
  function publishPlanEvent(eventType, plan) {
@@ -1136,6 +1765,11 @@ async function advancePlanWave(planId) {
1136
1765
  case 'solo_mesh':
1137
1766
  case 'collab_mesh': {
1138
1767
  // Submit as mesh task — inherit routing fields from parent task
1768
+ // Auto-assign role from scope if subtask doesn't specify one
1769
+ const subtaskRole = st.role || (st.scope && st.scope.length > 0
1770
+ ? (findRoleByScope(st.scope, ROLE_DIRS)?.id || null)
1771
+ : null);
1772
+
1139
1773
  const meshTask = createTask({
1140
1774
  task_id: st.subtask_id,
1141
1775
  title: st.title,
@@ -1146,8 +1780,12 @@ async function advancePlanWave(planId) {
1146
1780
  success_criteria: st.success_criteria,
1147
1781
  tags: ['plan', planId],
1148
1782
  collaboration: st.delegation.collaboration || undefined,
1783
+ plan_id: planId,
1784
+ subtask_id: st.subtask_id,
1785
+ role: subtaskRole,
1149
1786
  ...inheritedRouting,
1150
1787
  });
1788
+ if (subtaskRole) log(` → AUTO-ROLE ${st.subtask_id}: ${subtaskRole} (matched from scope)`);
1151
1789
  await store.put(meshTask);
1152
1790
  st.mesh_task_id = meshTask.task_id;
1153
1791
  publishEvent('submitted', meshTask);
@@ -1201,33 +1839,175 @@ async function advancePlanWave(planId) {
1201
1839
  publishPlanEvent('wave_started', plan);
1202
1840
  }
1203
1841
 
1842
+ /**
1843
+ * Update a plan subtask's status without triggering wave advancement.
1844
+ * Used for intermediate states like pending_review.
1845
+ */
1846
+ async function updatePlanSubtaskStatus(taskId, newStatus) {
1847
+ const task = await store.get(taskId);
1848
+ if (!task || !task.plan_id) return;
1849
+ const plan = await planStore.get(task.plan_id);
1850
+ if (!plan) return;
1851
+ const st = plan.subtasks.find(s => s.mesh_task_id === taskId || s.subtask_id === taskId);
1852
+ if (!st) return;
1853
+ st.status = newStatus;
1854
+ await planStore.put(plan);
1855
+ log(`PLAN SUBTASK ${st.subtask_id} → ${newStatus} (no wave advance)`);
1856
+ }
1857
+
1204
1858
  // ── Plan Progress on Task Completion ────────────────
1205
1859
 
1206
1860
  /**
1207
1861
  * When a mesh task completes, check if it belongs to a plan and update accordingly.
1208
- * Called after handleComplete/handleFail.
1862
+ * Called after handleComplete/handleFail and from detectStalls/enforceBudgets.
1209
1863
  */
1210
1864
  async function checkPlanProgress(taskId, status) {
1211
- // Look for plans that reference this task
1212
- const allPlans = await planStore.list({ status: PLAN_STATUS.EXECUTING });
1213
- for (const plan of allPlans) {
1214
- const st = plan.subtasks.find(s => s.mesh_task_id === taskId || s.subtask_id === taskId);
1215
- if (!st) continue;
1865
+ let plan = null;
1866
+ let st = null;
1867
+
1868
+ // Fast path: O(1) lookup via plan_id back-reference on the task
1869
+ const task = await store.get(taskId);
1870
+ if (task && task.plan_id) {
1871
+ plan = await planStore.get(task.plan_id);
1872
+ if (plan) {
1873
+ // Match by mesh_task_id, subtask_id, OR the task's subtask_id field
1874
+ // (escalation tasks carry the original subtask_id for plan recovery)
1875
+ st = plan.subtasks.find(s =>
1876
+ s.mesh_task_id === taskId ||
1877
+ s.subtask_id === taskId ||
1878
+ (task.subtask_id && s.subtask_id === task.subtask_id)
1879
+ );
1880
+ }
1881
+ }
1882
+
1883
+ // LEGACY: Remove after 2026-06-01. O(n*m) fallback for tasks created before
1884
+ // plan_id back-reference was added. Track invocations to know when safe to delete.
1885
+ if (!st) {
1886
+ const allPlans = await planStore.list({ status: PLAN_STATUS.EXECUTING });
1887
+ for (const p of allPlans) {
1888
+ const found = p.subtasks.find(s => s.mesh_task_id === taskId || s.subtask_id === taskId);
1889
+ if (found) {
1890
+ plan = p;
1891
+ st = found;
1892
+ break;
1893
+ }
1894
+ }
1895
+ }
1896
+
1897
+ if (!plan || !st) return;
1898
+
1899
+ // Escalation recovery: if a subtask was FAILED/BLOCKED but an escalation task
1900
+ // completes successfully for it, override status to COMPLETED and unblock dependents.
1901
+ const isEscalationRecovery = (
1902
+ status === 'completed' &&
1903
+ (st.status === SUBTASK_STATUS.FAILED || st.status === SUBTASK_STATUS.BLOCKED) &&
1904
+ task && task.tags && task.tags.includes('escalation')
1905
+ );
1906
+
1907
+ if (isEscalationRecovery) {
1908
+ log(`ESCALATION RECOVERY ${plan.plan_id}: subtask ${st.subtask_id} recovered by ${taskId}`);
1909
+ st.status = SUBTASK_STATUS.COMPLETED;
1910
+ st.result = { success: true, summary: `Recovered by escalation task ${taskId}` };
1911
+ // Unblock any dependents that were blocked by the original failure
1912
+ for (const dep of plan.subtasks) {
1913
+ if (dep.status === SUBTASK_STATUS.BLOCKED && dep.depends_on.includes(st.subtask_id)) {
1914
+ dep.status = SUBTASK_STATUS.PENDING;
1915
+ dep.result = null;
1916
+ log(` UNBLOCKED: ${dep.subtask_id} (dependency ${st.subtask_id} recovered)`);
1917
+ }
1918
+ }
1919
+ await planStore.put(plan);
1920
+ publishPlanEvent('subtask_recovered', plan);
1921
+ await advancePlanWave(plan.plan_id);
1922
+ return;
1923
+ }
1924
+
1925
+ st.status = status === 'completed' ? SUBTASK_STATUS.COMPLETED : SUBTASK_STATUS.FAILED;
1926
+ await planStore.put(plan);
1927
+
1928
+ log(`PLAN PROGRESS ${plan.plan_id}: subtask ${st.subtask_id} → ${st.status}`);
1216
1929
 
1217
- st.status = status === 'completed' ? SUBTASK_STATUS.COMPLETED : SUBTASK_STATUS.FAILED;
1930
+ if (st.status === SUBTASK_STATUS.COMPLETED) {
1931
+ publishPlanEvent('subtask_completed', plan);
1932
+ await advancePlanWave(plan.plan_id);
1933
+ return;
1934
+ }
1935
+
1936
+ // Subtask failed — apply failure policy
1937
+ if (st.status === SUBTASK_STATUS.FAILED) {
1938
+ publishPlanEvent('subtask_failed', plan);
1939
+
1940
+ // Cascade: block all transitive dependents
1941
+ const blockedIds = cascadeFailure(plan, st.subtask_id);
1218
1942
  await planStore.put(plan);
1219
1943
 
1220
- log(`PLAN PROGRESS ${plan.plan_id}: subtask ${st.subtask_id} → ${st.status}`);
1944
+ const policy = plan.failure_policy || 'continue_best_effort';
1945
+
1946
+ if (policy === 'abort_on_first_fail') {
1947
+ await planStore.markAborted(plan.plan_id, `Subtask ${st.subtask_id} failed (abort_on_first_fail)`);
1948
+ publishPlanEvent('aborted', await planStore.get(plan.plan_id));
1949
+ log(`PLAN ABORTED ${plan.plan_id}: ${st.subtask_id} failed (abort_on_first_fail policy)`);
1950
+ return;
1951
+ }
1952
+
1953
+ if (policy === 'abort_on_critical_fail') {
1954
+ // Check direct failure
1955
+ if (st.critical) {
1956
+ await planStore.markAborted(plan.plan_id, `Critical subtask ${st.subtask_id} failed (abort_on_critical_fail)`);
1957
+ publishPlanEvent('aborted', await planStore.get(plan.plan_id));
1958
+ log(`PLAN ABORTED ${plan.plan_id}: critical subtask ${st.subtask_id} failed`);
1959
+ return;
1960
+ }
1221
1961
 
1222
- if (st.status === SUBTASK_STATUS.COMPLETED) {
1223
- publishPlanEvent('subtask_completed', plan);
1224
- await advancePlanWave(plan.plan_id);
1962
+ // Check if cascade blocked any critical subtasks — a blocked critical is
1963
+ // functionally equivalent to a failed critical (the plan can't achieve its goal)
1964
+ const blockedCritical = plan.subtasks.filter(
1965
+ s => blockedIds.has(s.subtask_id) && s.critical
1966
+ );
1967
+ if (blockedCritical.length > 0) {
1968
+ const ids = blockedCritical.map(s => s.subtask_id).join(', ');
1969
+ await planStore.markAborted(
1970
+ plan.plan_id,
1971
+ `Critical subtask(s) ${ids} blocked by failed dependency ${st.subtask_id} (abort_on_critical_fail)`
1972
+ );
1973
+ publishPlanEvent('aborted', await planStore.get(plan.plan_id));
1974
+ log(`PLAN ABORTED ${plan.plan_id}: critical subtask(s) [${ids}] blocked by ${st.subtask_id}`);
1975
+ return;
1976
+ }
1225
1977
  }
1226
1978
 
1227
- break;
1979
+ // continue_best_effort: try to advance independent branches
1980
+ await advancePlanWave(plan.plan_id);
1228
1981
  }
1229
1982
  }
1230
1983
 
1984
+ /**
1985
+ * Cascade failure: BFS from failed subtask, mark all transitive dependents as BLOCKED.
1986
+ * Mutates plan.subtasks in place.
1987
+ * @returns {Set<string>} IDs of all newly-blocked subtasks
1988
+ */
1989
+ function cascadeFailure(plan, failedSubtaskId) {
1990
+ const blocked = new Set();
1991
+ const queue = [failedSubtaskId];
1992
+
1993
+ while (queue.length > 0) {
1994
+ const current = queue.shift();
1995
+ for (const st of plan.subtasks) {
1996
+ if (st.depends_on.includes(current) && !blocked.has(st.subtask_id)) {
1997
+ if (st.status === SUBTASK_STATUS.PENDING || st.status === SUBTASK_STATUS.QUEUED) {
1998
+ st.status = SUBTASK_STATUS.BLOCKED;
1999
+ st.result = { success: false, summary: `Blocked by failed dependency: ${failedSubtaskId}` };
2000
+ blocked.add(st.subtask_id);
2001
+ queue.push(st.subtask_id);
2002
+ log(` CASCADE: ${st.subtask_id} blocked by ${failedSubtaskId}`);
2003
+ }
2004
+ }
2005
+ }
2006
+ }
2007
+
2008
+ return blocked;
2009
+ }
2010
+
1231
2011
  // ── Main ────────────────────────────────────────────
1232
2012
 
1233
2013
  async function main() {
@@ -1265,6 +2045,8 @@ async function main() {
1265
2045
  'mesh.tasks.list': handleList,
1266
2046
  'mesh.tasks.get': handleGet,
1267
2047
  'mesh.tasks.cancel': handleCancel,
2048
+ 'mesh.tasks.approve': handleTaskApprove,
2049
+ 'mesh.tasks.reject': handleTaskReject,
1268
2050
  // Collab handlers
1269
2051
  'mesh.collab.create': handleCollabCreate,
1270
2052
  'mesh.collab.join': handleCollabJoin,
@@ -1273,6 +2055,9 @@ async function main() {
1273
2055
  'mesh.collab.find': handleCollabFind,
1274
2056
  'mesh.collab.reflect': handleCollabReflect,
1275
2057
  'mesh.collab.recruiting': handleCollabRecruiting,
2058
+ // Circling Strategy gate handlers
2059
+ 'mesh.collab.gate.approve': handleCirclingGateApprove,
2060
+ 'mesh.collab.gate.reject': handleCirclingGateReject,
1276
2061
  // Plan handlers
1277
2062
  'mesh.plans.create': handlePlanCreate,
1278
2063
  'mesh.plans.get': handlePlanGet,
@@ -1300,12 +2085,16 @@ async function main() {
1300
2085
  }
1301
2086
 
1302
2087
  // Start enforcement loops
2088
+ const proposalTimer = setInterval(processProposals, BUDGET_CHECK_INTERVAL);
1303
2089
  const budgetTimer = setInterval(enforceBudgets, BUDGET_CHECK_INTERVAL);
1304
2090
  const stallTimer = setInterval(detectStalls, BUDGET_CHECK_INTERVAL);
1305
2091
  const recruitTimer = setInterval(checkRecruitingDeadlines, 5000); // check every 5s
2092
+ const circlingStepSweepTimer = setInterval(sweepCirclingStepTimeouts, 60000); // every 60s
2093
+ log(`Proposal processing: every ${BUDGET_CHECK_INTERVAL / 1000}s`);
1306
2094
  log(`Budget enforcement: every ${BUDGET_CHECK_INTERVAL / 1000}s`);
1307
2095
  log(`Stall detection: every ${BUDGET_CHECK_INTERVAL / 1000}s (threshold: ${STALL_MINUTES}m)`);
1308
2096
  log(`Collab recruiting check: every 5s`);
2097
+ log(`Circling step timeout sweep: every 60s (threshold: ${CIRCLING_STEP_TIMEOUT_MS / 60000}m)`);
1309
2098
 
1310
2099
 
1311
2100
  log('Task daemon ready.');
@@ -1313,9 +2102,15 @@ async function main() {
1313
2102
  // Shutdown handler
1314
2103
  const shutdown = async () => {
1315
2104
  log('Shutting down...');
2105
+ clearInterval(proposalTimer);
1316
2106
  clearInterval(budgetTimer);
1317
2107
  clearInterval(stallTimer);
1318
2108
  clearInterval(recruitTimer);
2109
+ if (circlingStepSweepTimer) clearInterval(circlingStepSweepTimer);
2110
+ if (circlingStepTimers) {
2111
+ for (const timer of circlingStepTimers.values()) clearTimeout(timer);
2112
+ circlingStepTimers.clear();
2113
+ }
1319
2114
  for (const sub of subs) sub.unsubscribe();
1320
2115
  await nc.drain();
1321
2116
  process.exit(0);