openclaw-node-harness 2.0.3 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +646 -3
- package/bin/hyperagent.mjs +419 -0
- package/bin/mesh-agent.js +603 -81
- package/bin/mesh-bridge.js +340 -11
- package/bin/mesh-deploy-listener.js +119 -97
- package/bin/mesh-deploy.js +8 -0
- package/bin/mesh-task-daemon.js +1005 -40
- package/bin/mesh.js +423 -6
- package/config/claude-settings.json +95 -0
- package/config/daemon.json.template +2 -1
- package/config/git-hooks/pre-commit +13 -0
- package/config/git-hooks/pre-push +12 -0
- package/config/harness-rules.json +174 -0
- package/config/plan-templates/team-bugfix.yaml +52 -0
- package/config/plan-templates/team-deploy.yaml +50 -0
- package/config/plan-templates/team-feature.yaml +71 -0
- package/config/roles/qa-engineer.yaml +36 -0
- package/config/roles/solidity-dev.yaml +51 -0
- package/config/roles/tech-architect.yaml +36 -0
- package/config/rules/framework/solidity.md +22 -0
- package/config/rules/framework/typescript.md +21 -0
- package/config/rules/framework/unity.md +21 -0
- package/config/rules/universal/design-docs.md +18 -0
- package/config/rules/universal/git-hygiene.md +18 -0
- package/config/rules/universal/security.md +19 -0
- package/config/rules/universal/test-standards.md +19 -0
- package/identity/DELEGATION.md +6 -6
- package/install.sh +300 -8
- package/lib/circling-parser.js +119 -0
- package/lib/hyperagent-store.mjs +652 -0
- package/lib/kanban-io.js +59 -10
- package/lib/mcp-knowledge/bench.mjs +118 -0
- package/lib/mcp-knowledge/core.mjs +528 -0
- package/lib/mcp-knowledge/package.json +25 -0
- package/lib/mcp-knowledge/server.mjs +245 -0
- package/lib/mcp-knowledge/test.mjs +802 -0
- package/lib/memory-budget.mjs +261 -0
- package/lib/mesh-collab.js +354 -4
- package/lib/mesh-harness.js +427 -0
- package/lib/mesh-plans.js +13 -5
- package/lib/mesh-registry.js +11 -2
- package/lib/mesh-tasks.js +67 -0
- package/lib/plan-templates.js +226 -0
- package/lib/pre-compression-flush.mjs +320 -0
- package/lib/role-loader.js +292 -0
- package/lib/rule-loader.js +358 -0
- package/lib/session-store.mjs +458 -0
- package/lib/transcript-parser.mjs +292 -0
- package/mission-control/drizzle/soul_schema_update.sql +29 -0
- package/mission-control/drizzle.config.ts +1 -4
- package/mission-control/package-lock.json +1571 -83
- package/mission-control/package.json +6 -2
- package/mission-control/scripts/gen-chronology.js +3 -3
- package/mission-control/scripts/import-pipeline-v2.js +0 -16
- package/mission-control/scripts/import-pipeline.js +0 -15
- package/mission-control/src/app/api/cowork/clusters/[id]/members/route.ts +117 -0
- package/mission-control/src/app/api/cowork/clusters/[id]/route.ts +84 -0
- package/mission-control/src/app/api/cowork/clusters/route.ts +141 -0
- package/mission-control/src/app/api/cowork/dispatch/route.ts +128 -0
- package/mission-control/src/app/api/cowork/events/route.ts +65 -0
- package/mission-control/src/app/api/cowork/intervene/route.ts +259 -0
- package/mission-control/src/app/api/cowork/sessions/[id]/route.ts +37 -0
- package/mission-control/src/app/api/cowork/sessions/route.ts +64 -0
- package/mission-control/src/app/api/diagnostics/route.ts +97 -0
- package/mission-control/src/app/api/diagnostics/test-runner/route.ts +990 -0
- package/mission-control/src/app/api/mesh/events/route.ts +95 -19
- package/mission-control/src/app/api/mesh/identity/route.ts +11 -0
- package/mission-control/src/app/api/mesh/tasks/[id]/route.ts +92 -0
- package/mission-control/src/app/api/mesh/tasks/route.ts +91 -0
- package/mission-control/src/app/api/tasks/[id]/handoff/route.ts +1 -1
- package/mission-control/src/app/api/tasks/[id]/route.ts +90 -4
- package/mission-control/src/app/api/tasks/route.ts +21 -30
- package/mission-control/src/app/cowork/page.tsx +261 -0
- package/mission-control/src/app/diagnostics/page.tsx +385 -0
- package/mission-control/src/app/graph/page.tsx +26 -0
- package/mission-control/src/app/memory/page.tsx +1 -1
- package/mission-control/src/app/obsidian/page.tsx +36 -6
- package/mission-control/src/app/roadmap/page.tsx +24 -0
- package/mission-control/src/app/souls/page.tsx +2 -2
- package/mission-control/src/components/board/execution-config.tsx +431 -0
- package/mission-control/src/components/board/kanban-board.tsx +75 -9
- package/mission-control/src/components/board/kanban-column.tsx +135 -19
- package/mission-control/src/components/board/task-card.tsx +55 -2
- package/mission-control/src/components/board/unified-task-dialog.tsx +82 -4
- package/mission-control/src/components/cowork/cluster-card.tsx +176 -0
- package/mission-control/src/components/cowork/create-cluster-dialog.tsx +251 -0
- package/mission-control/src/components/cowork/dispatch-form.tsx +423 -0
- package/mission-control/src/components/cowork/role-picker.tsx +102 -0
- package/mission-control/src/components/cowork/session-card.tsx +284 -0
- package/mission-control/src/components/layout/sidebar.tsx +39 -2
- package/mission-control/src/lib/__tests__/daily-log.test.ts +82 -0
- package/mission-control/src/lib/__tests__/memory-md.test.ts +87 -0
- package/mission-control/src/lib/__tests__/mesh-kv-sync.test.ts +465 -0
- package/mission-control/src/lib/__tests__/mocks/mock-kv.ts +131 -0
- package/mission-control/src/lib/__tests__/status-kanban.test.ts +46 -0
- package/mission-control/src/lib/__tests__/task-markdown.test.ts +188 -0
- package/mission-control/src/lib/__tests__/wikilinks.test.ts +175 -0
- package/mission-control/src/lib/config.ts +58 -0
- package/mission-control/src/lib/db/index.ts +69 -0
- package/mission-control/src/lib/db/schema.ts +61 -3
- package/mission-control/src/lib/hooks.ts +309 -0
- package/mission-control/src/lib/memory/entities.ts +3 -2
- package/mission-control/src/lib/nats.ts +66 -1
- package/mission-control/src/lib/parsers/task-markdown.ts +52 -2
- package/mission-control/src/lib/parsers/transcript.ts +4 -4
- package/mission-control/src/lib/scheduler.ts +12 -11
- package/mission-control/src/lib/sync/mesh-kv.ts +279 -0
- package/mission-control/src/lib/sync/tasks.ts +23 -1
- package/mission-control/src/lib/task-id.ts +32 -0
- package/mission-control/src/lib/tts/index.ts +33 -9
- package/mission-control/tsconfig.json +2 -1
- package/mission-control/vitest.config.ts +14 -0
- package/package.json +15 -2
- package/services/service-manifest.json +1 -1
- package/skills/cc-godmode/references/agents.md +8 -8
- package/workspace-bin/memory-daemon.mjs +199 -5
- package/workspace-bin/session-search.mjs +204 -0
- package/workspace-bin/web-fetch.mjs +65 -0
package/bin/mesh-task-daemon.js
CHANGED
|
@@ -35,16 +35,29 @@ const { connect, StringCodec } = require('nats');
|
|
|
35
35
|
const { createTask, TaskStore, TASK_STATUS, KV_BUCKET } = require('../lib/mesh-tasks');
|
|
36
36
|
const { createSession, CollabStore, COLLAB_STATUS, COLLAB_KV_BUCKET } = require('../lib/mesh-collab');
|
|
37
37
|
const { createPlan, autoRoutePlan, PlanStore, PLAN_STATUS, SUBTASK_STATUS, PLANS_KV_BUCKET } = require('../lib/mesh-plans');
|
|
38
|
+
const { findRole, findRoleByScope, validateRequiredOutputs, checkForbiddenPatterns } = require('../lib/role-loader');
|
|
38
39
|
const os = require('os');
|
|
40
|
+
const path = require('path');
|
|
41
|
+
|
|
42
|
+
// Role search directories
|
|
43
|
+
const ROLE_DIRS = [
|
|
44
|
+
path.join(process.env.HOME || '/root', '.openclaw', 'roles'),
|
|
45
|
+
path.join(__dirname, '..', 'config', 'roles'),
|
|
46
|
+
];
|
|
39
47
|
|
|
40
48
|
const sc = StringCodec();
|
|
41
49
|
const { NATS_URL } = require('../lib/nats-resolve');
|
|
42
50
|
const BUDGET_CHECK_INTERVAL = 30000; // 30s
|
|
43
51
|
const STALL_MINUTES = parseInt(process.env.MESH_STALL_MINUTES || '5'); // no heartbeat for this long → stalled
|
|
52
|
+
const CIRCLING_STEP_TIMEOUT_MS = parseInt(process.env.MESH_CIRCLING_STEP_TIMEOUT_MS || String(10 * 60 * 1000)); // 10 min default
|
|
44
53
|
const NODE_ID = os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
|
|
45
54
|
|
|
46
55
|
let nc, store, collabStore, planStore;
|
|
47
56
|
|
|
57
|
+
// Active step timers for circling sessions — keyed by sessionId.
|
|
58
|
+
// Cleared when the step completes normally; fires degrade logic if step hangs.
|
|
59
|
+
const circlingStepTimers = new Map();
|
|
60
|
+
|
|
48
61
|
// ── Logging ─────────────────────────────────────────
|
|
49
62
|
|
|
50
63
|
function log(msg) {
|
|
@@ -131,6 +144,36 @@ async function handleSubmit(msg) {
|
|
|
131
144
|
respond(msg, task);
|
|
132
145
|
}
|
|
133
146
|
|
|
147
|
+
/**
|
|
148
|
+
* Abort any collab session tied to a task that is being terminated.
|
|
149
|
+
* Shared by handleFail, handleRelease, handleCancel.
|
|
150
|
+
*
|
|
151
|
+
* NOT called from handleComplete — that path goes through evaluateRound
|
|
152
|
+
* which already calls collabStore.markCompleted() on the session.
|
|
153
|
+
*
|
|
154
|
+
* markAborted() is idempotent: no-op if session is already completed/aborted.
|
|
155
|
+
* This makes double-abort safe (e.g. stall detection → release race).
|
|
156
|
+
*/
|
|
157
|
+
async function cleanupTaskCollabSession(task, reason) {
|
|
158
|
+
if (!task.collab_session_id || !collabStore) return;
|
|
159
|
+
try {
|
|
160
|
+
// markAborted returns null if session doesn't exist or is already completed/aborted.
|
|
161
|
+
// Non-null means we actually transitioned the session to aborted.
|
|
162
|
+
const session = await collabStore.markAborted(task.collab_session_id, reason);
|
|
163
|
+
if (session) {
|
|
164
|
+
await collabStore.appendAudit(task.collab_session_id, 'session_aborted', { reason });
|
|
165
|
+
publishCollabEvent('aborted', session);
|
|
166
|
+
log(`COLLAB ABORTED ${task.collab_session_id}: ${reason}`);
|
|
167
|
+
}
|
|
168
|
+
// Clean up audit error rate-limit counter
|
|
169
|
+
// NOTE: sessions expiring via KV TTL bypass this — residual Map entry is negligible
|
|
170
|
+
// for a homelab mesh but worth noting.
|
|
171
|
+
collabStore.clearAuditErrorCount(task.collab_session_id);
|
|
172
|
+
} catch (err) {
|
|
173
|
+
log(`COLLAB CLEANUP WARN: could not abort session ${task.collab_session_id}: ${err.message}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
134
177
|
/**
|
|
135
178
|
* mesh.tasks.claim — Agent requests the next available task.
|
|
136
179
|
* Expects: { node_id }
|
|
@@ -193,18 +236,103 @@ async function handleComplete(msg) {
|
|
|
193
236
|
const { task_id, result } = parseRequest(msg);
|
|
194
237
|
if (!task_id) return respondError(msg, 'task_id is required');
|
|
195
238
|
|
|
196
|
-
|
|
197
|
-
|
|
239
|
+
// Determine if this task requires human review before completing.
|
|
240
|
+
// requires_review logic:
|
|
241
|
+
// - explicit true/false on task → honor it
|
|
242
|
+
// - null (default) → auto-compute:
|
|
243
|
+
// * mode: human → always (by definition)
|
|
244
|
+
// * mode: soul → always (creative/strategic work, no mechanical verification)
|
|
245
|
+
// * collab_mesh without metric → yes (peer review without mechanical check)
|
|
246
|
+
// * solo_mesh WITH metric → no (metric IS the verification)
|
|
247
|
+
// * solo_mesh WITHOUT metric → yes (no mechanical check = human must validate)
|
|
248
|
+
// * local → no (Daedalus/companion handles these interactively)
|
|
249
|
+
const existingTask = await store.get(task_id);
|
|
250
|
+
if (!existingTask) return respondError(msg, `Task ${task_id} not found`);
|
|
251
|
+
|
|
252
|
+
let needsReview = existingTask.requires_review;
|
|
253
|
+
if (needsReview === null || needsReview === undefined) {
|
|
254
|
+
const mode = existingTask.collaboration ? 'collab_mesh' : (existingTask.tags?.includes('soul') ? 'soul' : 'solo_mesh');
|
|
255
|
+
const hasMetric = !!existingTask.metric;
|
|
256
|
+
|
|
257
|
+
if (mode === 'soul' || existingTask.tags?.includes('human')) {
|
|
258
|
+
needsReview = true;
|
|
259
|
+
} else if (mode === 'collab_mesh' && !hasMetric) {
|
|
260
|
+
needsReview = true;
|
|
261
|
+
} else if (mode === 'solo_mesh' && !hasMetric) {
|
|
262
|
+
needsReview = true;
|
|
263
|
+
} else {
|
|
264
|
+
needsReview = false;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Role-based post-completion validation — runs UNCONDITIONALLY on all tasks
|
|
269
|
+
// with a role, regardless of review status. Validation results are included
|
|
270
|
+
// in the pending_review metadata so human reviewers see structured checks.
|
|
271
|
+
let roleValidation = { passed: true, issues: [] };
|
|
272
|
+
if (existingTask.role) {
|
|
273
|
+
const role = findRole(existingTask.role, ROLE_DIRS);
|
|
274
|
+
if (role) {
|
|
275
|
+
const outputFiles = result?.artifacts || [];
|
|
276
|
+
const harnessFiles = (result?.harness?.violations || []).flatMap(v => v.files || []);
|
|
277
|
+
const allFiles = [...new Set([...outputFiles, ...harnessFiles])];
|
|
278
|
+
|
|
279
|
+
if (allFiles.length > 0) {
|
|
280
|
+
const reqResult = validateRequiredOutputs(role, allFiles, null);
|
|
281
|
+
if (!reqResult.passed) {
|
|
282
|
+
roleValidation.passed = false;
|
|
283
|
+
roleValidation.issues.push(...reqResult.failures.map(f => `[required_output] ${f.description}: ${f.detail}`));
|
|
284
|
+
}
|
|
285
|
+
}
|
|
198
286
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
287
|
+
if (!roleValidation.passed) {
|
|
288
|
+
log(`ROLE VALIDATION FAILED for ${task_id} (role: ${role.id}): ${roleValidation.issues.length} issue(s)`);
|
|
289
|
+
for (const issue of roleValidation.issues) log(` - ${issue}`);
|
|
290
|
+
needsReview = true; // force review if validation failed on auto-complete path
|
|
291
|
+
} else {
|
|
292
|
+
log(`ROLE VALIDATION PASSED for ${task_id} (role: ${role.id})`);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
202
296
|
|
|
203
|
-
|
|
204
|
-
|
|
297
|
+
let task;
|
|
298
|
+
if (needsReview) {
|
|
299
|
+
// Gate: task goes to pending_review instead of completed
|
|
300
|
+
// Include role validation results in the review metadata
|
|
301
|
+
const enrichedResult = {
|
|
302
|
+
...(result || { success: true }),
|
|
303
|
+
role_validation: roleValidation,
|
|
304
|
+
};
|
|
305
|
+
task = await store.markPendingReview(task_id, enrichedResult);
|
|
306
|
+
const elapsed = task.started_at
|
|
307
|
+
? ((new Date(task.review_requested_at) - new Date(task.started_at)) / 60000).toFixed(1)
|
|
308
|
+
: '?';
|
|
309
|
+
log(`PENDING REVIEW ${task_id} in ${elapsed}m: ${result?.summary || 'no summary'}`);
|
|
310
|
+
log(` Approve: mesh task approve ${task_id} | Reject: mesh task reject ${task_id} --reason "..."`);
|
|
311
|
+
publishEvent('pending_review', task);
|
|
312
|
+
// Update plan subtask status so `mesh plan show` reflects pending_review
|
|
313
|
+
await updatePlanSubtaskStatus(task_id, 'pending_review');
|
|
314
|
+
// Do NOT advance plan wave — task is not yet "completed" for dependency purposes
|
|
315
|
+
} else {
|
|
316
|
+
task = await store.markCompleted(task_id, result || { success: true });
|
|
317
|
+
const elapsed = task.started_at
|
|
318
|
+
? ((new Date(task.completed_at) - new Date(task.started_at)) / 60000).toFixed(1)
|
|
319
|
+
: '?';
|
|
320
|
+
log(`COMPLETE ${task_id} in ${elapsed}m: ${result?.summary || 'no summary'}`);
|
|
321
|
+
publishEvent('completed', task);
|
|
322
|
+
}
|
|
205
323
|
|
|
206
|
-
//
|
|
207
|
-
|
|
324
|
+
// NOTE: no cleanupTaskCollabSession here — collab tasks complete via
|
|
325
|
+
// evaluateRound → markCompleted on the session, then store.markCompleted
|
|
326
|
+
// on the parent task. Calling cleanupTaskCollabSession would markAborted
|
|
327
|
+
// on an already-completed session. Clean up audit counter only.
|
|
328
|
+
if (task.collab_session_id && collabStore) {
|
|
329
|
+
collabStore.clearAuditErrorCount(task.collab_session_id);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// Only advance plan if actually completed (not pending_review)
|
|
333
|
+
if (task.status === TASK_STATUS.COMPLETED) {
|
|
334
|
+
await checkPlanProgress(task_id, 'completed');
|
|
335
|
+
}
|
|
208
336
|
|
|
209
337
|
respond(msg, task);
|
|
210
338
|
}
|
|
@@ -222,11 +350,54 @@ async function handleFail(msg) {
|
|
|
222
350
|
|
|
223
351
|
log(`FAIL ${task_id}: ${reason}`);
|
|
224
352
|
publishEvent('failed', task);
|
|
353
|
+
await cleanupTaskCollabSession(task, `Parent task ${task_id} failed: ${reason}`);
|
|
354
|
+
|
|
355
|
+
// Phase F: Escalation — if the task has a role with escalation mapping,
|
|
356
|
+
// create an escalation task before cascading failure through the plan.
|
|
357
|
+
let escalated = false;
|
|
358
|
+
if (task.role) {
|
|
359
|
+
const role = findRole(task.role, ROLE_DIRS);
|
|
360
|
+
if (role && role.escalation) {
|
|
361
|
+
// Determine failure type for escalation routing
|
|
362
|
+
let failureType = 'on_metric_failure';
|
|
363
|
+
if (reason && reason.includes('Budget exceeded')) failureType = 'on_budget_exceeded';
|
|
364
|
+
if (reason && reason.includes('scope')) failureType = 'on_scope_violation';
|
|
365
|
+
|
|
366
|
+
const escalationTarget = role.escalation[failureType];
|
|
367
|
+
if (escalationTarget) {
|
|
368
|
+
const escalationTask = createTask({
|
|
369
|
+
task_id: `ESC-${task_id}-${Date.now()}`,
|
|
370
|
+
title: `[Escalation] ${task.title}`,
|
|
371
|
+
description: [
|
|
372
|
+
`Escalated from ${task_id} (role: ${task.role}, failure: ${failureType}).`,
|
|
373
|
+
`Original reason: ${reason}`,
|
|
374
|
+
'',
|
|
375
|
+
`Original description: ${task.description}`,
|
|
376
|
+
].join('\n'),
|
|
377
|
+
budget_minutes: Math.ceil(task.budget_minutes * 1.5), // 50% more budget
|
|
378
|
+
metric: task.metric,
|
|
379
|
+
scope: task.scope,
|
|
380
|
+
success_criteria: task.success_criteria,
|
|
381
|
+
role: escalationTarget === 'human' ? null : escalationTarget,
|
|
382
|
+
requires_review: escalationTarget === 'human' ? true : null,
|
|
383
|
+
tags: [...(task.tags || []), 'escalation', `escalated_from:${task_id}`],
|
|
384
|
+
plan_id: task.plan_id,
|
|
385
|
+
subtask_id: task.subtask_id, // Wire back to original plan subtask for recovery
|
|
386
|
+
});
|
|
387
|
+
await store.put(escalationTask);
|
|
388
|
+
publishEvent('submitted', escalationTask);
|
|
389
|
+
log(`ESCALATED ${task_id} → ${escalationTask.task_id} (target role: ${escalationTarget})`);
|
|
390
|
+
escalated = true;
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
}
|
|
225
394
|
|
|
226
|
-
// Check if this task belongs to a plan
|
|
395
|
+
// Check if this task belongs to a plan (escalation doesn't block cascade —
|
|
396
|
+
// the escalation task is independent. If the plan has abort_on_critical_fail
|
|
397
|
+
// and this was critical, it still aborts. The escalation is a parallel attempt.)
|
|
227
398
|
await checkPlanProgress(task_id, 'failed');
|
|
228
399
|
|
|
229
|
-
respond(msg, task);
|
|
400
|
+
respond(msg, { ...task, escalated, escalation_task_id: escalated ? `ESC-${task_id}-${Date.now()}` : null });
|
|
230
401
|
}
|
|
231
402
|
|
|
232
403
|
/**
|
|
@@ -302,6 +473,7 @@ async function handleRelease(msg) {
|
|
|
302
473
|
|
|
303
474
|
log(`RELEASED ${task_id}: ${reason || 'no reason'} (needs human triage)`);
|
|
304
475
|
publishEvent('released', task);
|
|
476
|
+
await cleanupTaskCollabSession(task, `Parent task ${task_id} released: ${reason || 'human triage'}`);
|
|
305
477
|
respond(msg, task);
|
|
306
478
|
}
|
|
307
479
|
|
|
@@ -323,6 +495,45 @@ async function handleCancel(msg) {
|
|
|
323
495
|
|
|
324
496
|
log(`CANCEL ${task_id}: ${reason || 'no reason'}`);
|
|
325
497
|
publishEvent('cancelled', task);
|
|
498
|
+
await cleanupTaskCollabSession(task, `Parent task ${task_id} cancelled: ${reason || 'no reason'}`);
|
|
499
|
+
respond(msg, task);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// ── Task Review (Approval Gate) ─────────────────────
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* mesh.tasks.approve — Human approves a pending_review task.
|
|
506
|
+
* Transitions to completed and advances plan wave if applicable.
|
|
507
|
+
*/
|
|
508
|
+
async function handleTaskApprove(msg) {
|
|
509
|
+
const { task_id } = parseRequest(msg);
|
|
510
|
+
if (!task_id) return respondError(msg, 'task_id is required');
|
|
511
|
+
|
|
512
|
+
const task = await store.markApproved(task_id);
|
|
513
|
+
if (!task) return respondError(msg, `Task ${task_id} not found or not in pending_review status`);
|
|
514
|
+
|
|
515
|
+
log(`APPROVED ${task_id}: human review passed`);
|
|
516
|
+
publishEvent('completed', task);
|
|
517
|
+
|
|
518
|
+
// Now advance plan wave (this was blocked while in pending_review)
|
|
519
|
+
await checkPlanProgress(task_id, 'completed');
|
|
520
|
+
|
|
521
|
+
respond(msg, task);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* mesh.tasks.reject — Human rejects a pending_review task.
|
|
526
|
+
* Re-queues the task with rejection reason injected for next attempt.
|
|
527
|
+
*/
|
|
528
|
+
async function handleTaskReject(msg) {
|
|
529
|
+
const { task_id, reason } = parseRequest(msg);
|
|
530
|
+
if (!task_id) return respondError(msg, 'task_id is required');
|
|
531
|
+
|
|
532
|
+
const task = await store.markRejected(task_id, reason || 'Rejected by reviewer');
|
|
533
|
+
if (!task) return respondError(msg, `Task ${task_id} not found or not in pending_review status`);
|
|
534
|
+
|
|
535
|
+
log(`REJECTED ${task_id}: ${reason || 'no reason'} — re-queued for retry`);
|
|
536
|
+
publishEvent('rejected', task);
|
|
326
537
|
respond(msg, task);
|
|
327
538
|
}
|
|
328
539
|
|
|
@@ -358,6 +569,35 @@ async function detectStalls() {
|
|
|
358
569
|
}
|
|
359
570
|
}
|
|
360
571
|
|
|
572
|
+
// Mark stalled node as dead in any collab sessions it belongs to.
|
|
573
|
+
// This unblocks isRoundComplete() which otherwise waits forever for
|
|
574
|
+
// a reflection from a crashed node.
|
|
575
|
+
// Uses findActiveSessionsByNode() — O(sessions) single pass instead of
|
|
576
|
+
// the previous O(sessions × nodes) list-then-find pattern.
|
|
577
|
+
if (task.owner && collabStore) {
|
|
578
|
+
try {
|
|
579
|
+
const sessions = await collabStore.findActiveSessionsByNode(task.owner);
|
|
580
|
+
for (const session of sessions) {
|
|
581
|
+
const node = session.nodes.find(n => n.node_id === task.owner);
|
|
582
|
+
if (node && node.status !== 'dead') {
|
|
583
|
+
await collabStore.setNodeStatus(session.session_id, task.owner, 'dead');
|
|
584
|
+
log(`STALL → COLLAB: marked ${task.owner} as dead in session ${session.session_id}`);
|
|
585
|
+
await collabStore.appendAudit(session.session_id, 'node_marked_dead', {
|
|
586
|
+
node_id: task.owner, reason: `Stall detected: no heartbeat for ${silentMin}m`,
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
// Re-check if the round is now complete (dead nodes excluded)
|
|
590
|
+
const updated = await collabStore.get(session.session_id);
|
|
591
|
+
if (updated && collabStore.isRoundComplete(updated)) {
|
|
592
|
+
await evaluateRound(session.session_id);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
} catch (err) {
|
|
597
|
+
log(`STALL → COLLAB ERROR: ${err.message}`);
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
|
|
361
601
|
const releasedTask = await store.markReleased(
|
|
362
602
|
task.task_id,
|
|
363
603
|
`Stall detected: no agent heartbeat for ${silentMin}m, alive check failed`,
|
|
@@ -365,6 +605,9 @@ async function detectStalls() {
|
|
|
365
605
|
);
|
|
366
606
|
if (releasedTask) publishEvent('released', releasedTask);
|
|
367
607
|
|
|
608
|
+
// Update plan progress if this task belongs to a plan
|
|
609
|
+
await checkPlanProgress(task.task_id, 'failed');
|
|
610
|
+
|
|
368
611
|
// Notify the agent's node (fire-and-forget)
|
|
369
612
|
if (task.owner) {
|
|
370
613
|
nc.publish(`mesh.agent.${task.owner}.stall`, sc.encode(JSON.stringify({
|
|
@@ -376,6 +619,31 @@ async function detectStalls() {
|
|
|
376
619
|
}
|
|
377
620
|
}
|
|
378
621
|
|
|
622
|
+
/**
|
|
623
|
+
* Process proposed tasks — worker nodes write tasks with status "proposed"
|
|
624
|
+
* directly to KV. The lead daemon validates and transitions them.
|
|
625
|
+
*/
|
|
626
|
+
async function processProposals() {
|
|
627
|
+
const proposed = await store.list({ status: TASK_STATUS.PROPOSED });
|
|
628
|
+
for (const task of proposed) {
|
|
629
|
+
// Basic validation: must have title and origin
|
|
630
|
+
if (!task.title || !task.origin) {
|
|
631
|
+
task.status = TASK_STATUS.REJECTED;
|
|
632
|
+
task.result = { success: false, summary: 'Missing required fields (title, origin)' };
|
|
633
|
+
await store.put(task);
|
|
634
|
+
log(`REJECTED ${task.task_id}: missing required fields`);
|
|
635
|
+
publishEvent('rejected', task);
|
|
636
|
+
continue;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
// Accept: transition to queued
|
|
640
|
+
task.status = TASK_STATUS.QUEUED;
|
|
641
|
+
await store.put(task);
|
|
642
|
+
log(`ACCEPTED proposal ${task.task_id} from ${task.origin}: "${task.title}"`);
|
|
643
|
+
publishEvent('submitted', task);
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
379
647
|
async function enforceBudgets() {
|
|
380
648
|
const overBudget = await store.findOverBudget();
|
|
381
649
|
|
|
@@ -391,6 +659,19 @@ async function enforceBudgets() {
|
|
|
391
659
|
);
|
|
392
660
|
if (failedTask) publishEvent('failed', failedTask);
|
|
393
661
|
|
|
662
|
+
// Clean up any collab session for this task
|
|
663
|
+
if (collabStore && task.collab_session_id) {
|
|
664
|
+
try {
|
|
665
|
+
await collabStore.markAborted(task.collab_session_id, `Budget exceeded for task ${task.task_id}`);
|
|
666
|
+
log(`BUDGET → COLLAB: aborted session ${task.collab_session_id}`);
|
|
667
|
+
} catch (err) {
|
|
668
|
+
log(`BUDGET → COLLAB ERROR: ${err.message}`);
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
// Update plan progress if this task belongs to a plan
|
|
673
|
+
await checkPlanProgress(task.task_id, 'failed');
|
|
674
|
+
|
|
394
675
|
// Publish notification so the agent knows
|
|
395
676
|
nc.publish(`mesh.agent.${task.owner}.budget_exceeded`, sc.encode(JSON.stringify({
|
|
396
677
|
task_id: task.task_id,
|
|
@@ -467,7 +748,23 @@ async function handleCollabJoin(msg) {
|
|
|
467
748
|
|
|
468
749
|
// Check if recruiting should close → start first round
|
|
469
750
|
if (collabStore.isRecruitingDone(session)) {
|
|
470
|
-
|
|
751
|
+
// Circling Strategy: assign worker_node_id before starting
|
|
752
|
+
if (session.mode === 'circling_strategy' && session.circling) {
|
|
753
|
+
const freshSession = await collabStore.get(session.session_id);
|
|
754
|
+
if (freshSession.circling && !freshSession.circling.worker_node_id) {
|
|
755
|
+
// Assign all role IDs at recruiting close — stable for the session lifetime.
|
|
756
|
+
const workerNode = freshSession.nodes.find(n => n.role === 'worker') || freshSession.nodes[0];
|
|
757
|
+
freshSession.circling.worker_node_id = workerNode.node_id;
|
|
758
|
+
const reviewers = freshSession.nodes.filter(n => n.node_id !== workerNode.node_id);
|
|
759
|
+
freshSession.circling.reviewerA_node_id = reviewers[0]?.node_id || null;
|
|
760
|
+
freshSession.circling.reviewerB_node_id = reviewers[1]?.node_id || null;
|
|
761
|
+
await collabStore.put(freshSession);
|
|
762
|
+
log(`CIRCLING: Roles assigned → Worker: ${workerNode.node_id}, RevA: ${reviewers[0]?.node_id}, RevB: ${reviewers[1]?.node_id}`);
|
|
763
|
+
}
|
|
764
|
+
await startCirclingStep(session.session_id);
|
|
765
|
+
} else {
|
|
766
|
+
await startCollabRound(session.session_id);
|
|
767
|
+
}
|
|
471
768
|
}
|
|
472
769
|
|
|
473
770
|
respond(msg, session);
|
|
@@ -491,6 +788,12 @@ async function handleCollabLeave(msg) {
|
|
|
491
788
|
if (session.status === COLLAB_STATUS.ACTIVE && session.nodes.length < session.min_nodes) {
|
|
492
789
|
await collabStore.markAborted(session_id, `Below min_nodes: ${session.nodes.length} < ${session.min_nodes}`);
|
|
493
790
|
publishCollabEvent('aborted', session);
|
|
791
|
+
} else if (session.status === COLLAB_STATUS.ACTIVE) {
|
|
792
|
+
// Re-check if the round is now complete (removed node excluded from quorum)
|
|
793
|
+
const updated = await collabStore.get(session_id);
|
|
794
|
+
if (updated && collabStore.isRoundComplete(updated)) {
|
|
795
|
+
await evaluateRound(session_id);
|
|
796
|
+
}
|
|
494
797
|
}
|
|
495
798
|
|
|
496
799
|
respond(msg, session);
|
|
@@ -526,6 +829,26 @@ async function handleCollabFind(msg) {
|
|
|
526
829
|
respond(msg, session);
|
|
527
830
|
}
|
|
528
831
|
|
|
832
|
+
/**
|
|
833
|
+
* mesh.collab.recruiting — List all sessions currently recruiting nodes.
|
|
834
|
+
* Used by agents to discover collab sessions they should join.
|
|
835
|
+
* Returns: array of { session_id, task_id, mode, min_nodes, max_nodes, current_nodes, recruiting_deadline }
|
|
836
|
+
*/
|
|
837
|
+
async function handleCollabRecruiting(msg) {
|
|
838
|
+
const recruiting = await collabStore.list({ status: COLLAB_STATUS.RECRUITING });
|
|
839
|
+
const summaries = recruiting.map(s => ({
|
|
840
|
+
session_id: s.session_id,
|
|
841
|
+
task_id: s.task_id,
|
|
842
|
+
mode: s.mode,
|
|
843
|
+
min_nodes: s.min_nodes,
|
|
844
|
+
max_nodes: s.max_nodes,
|
|
845
|
+
current_nodes: s.nodes.length,
|
|
846
|
+
node_ids: s.nodes.map(n => n.node_id || n.id),
|
|
847
|
+
recruiting_deadline: s.recruiting_deadline,
|
|
848
|
+
}));
|
|
849
|
+
respond(msg, summaries);
|
|
850
|
+
}
|
|
851
|
+
|
|
529
852
|
/**
|
|
530
853
|
* mesh.collab.reflect — Node submits a reflection for the current round.
|
|
531
854
|
* Expects: { session_id, node_id, summary, learnings, artifacts, confidence, vote }
|
|
@@ -546,8 +869,83 @@ async function handleCollabReflect(msg) {
|
|
|
546
869
|
});
|
|
547
870
|
publishCollabEvent('reflection_received', session);
|
|
548
871
|
|
|
549
|
-
//
|
|
550
|
-
if (
|
|
872
|
+
// Circling Strategy: handle two-step barrier, artifact storage, directed handoffs
|
|
873
|
+
if (session.mode === 'circling_strategy' && session.circling) {
|
|
874
|
+
// Store circling artifacts
|
|
875
|
+
if (reflection.circling_artifacts && reflection.circling_artifacts.length > 0) {
|
|
876
|
+
const { current_subround, current_step } = session.circling;
|
|
877
|
+
const isWorker = reflection.node_id === session.circling.worker_node_id;
|
|
878
|
+
// Use stored reviewer IDs for stable identity (falls back to array-index if not set)
|
|
879
|
+
let nodeRole;
|
|
880
|
+
if (isWorker) {
|
|
881
|
+
nodeRole = 'worker';
|
|
882
|
+
} else if (session.circling.reviewerA_node_id && session.circling.reviewerB_node_id) {
|
|
883
|
+
nodeRole = reflection.node_id === session.circling.reviewerA_node_id ? 'reviewerA' : 'reviewerB';
|
|
884
|
+
} else {
|
|
885
|
+
const reviewerNodes = session.nodes.filter(n => n.node_id !== session.circling.worker_node_id);
|
|
886
|
+
nodeRole = reviewerNodes[0]?.node_id === reflection.node_id ? 'reviewerA' : 'reviewerB';
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
for (const art of reflection.circling_artifacts) {
|
|
890
|
+
const key = `sr${current_subround}_step${current_step}_${nodeRole}_${art.type}`;
|
|
891
|
+
await collabStore.storeArtifact(session_id, key, art.content);
|
|
892
|
+
log(`CIRCLING ARTIFACT: ${key} stored (${(art.content || '').length} chars)`);
|
|
893
|
+
}
|
|
894
|
+
} else if (reflection.parse_failed) {
|
|
895
|
+
// Parse failure: record and check retry threshold.
|
|
896
|
+
// If a node consistently fails, the barrier still advances (the reflection counts)
|
|
897
|
+
// but downstream nodes get [UNAVAILABLE] placeholders. After 3 failures for the
|
|
898
|
+
// same node+step, log a critical warning — the only full recovery is the daemon's
|
|
899
|
+
// global stall timeout. See: mesh-collab.js recordArtifactFailure / getArtifactFailureCount
|
|
900
|
+
const failCount = await collabStore.recordArtifactFailure(session_id, reflection.node_id);
|
|
901
|
+
log(`CIRCLING PARSE FAILURE: ${reflection.node_id} in ${session_id} (attempt ${failCount})`);
|
|
902
|
+
await collabStore.appendAudit(session_id, 'artifact_parse_failed', {
|
|
903
|
+
node_id: reflection.node_id,
|
|
904
|
+
step: session.circling.current_step,
|
|
905
|
+
subround: session.circling.current_subround,
|
|
906
|
+
failure_count: failCount,
|
|
907
|
+
});
|
|
908
|
+
if (failCount >= 3) {
|
|
909
|
+
log(`CIRCLING CRITICAL: ${reflection.node_id} failed ${failCount}x at SR${session.circling.current_subround}/Step${session.circling.current_step} — no artifacts will be available for downstream nodes`);
|
|
910
|
+
}
|
|
911
|
+
} else {
|
|
912
|
+
// No artifacts but not a parse failure — unexpected
|
|
913
|
+
log(`CIRCLING WARNING: ${reflection.node_id} submitted reflection without artifacts in ${session_id}`);
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
// Check if current circling step is complete (all 3 nodes submitted)
|
|
917
|
+
const freshSession = await collabStore.get(session_id);
|
|
918
|
+
if (collabStore.isCirclingStepComplete(freshSession)) {
|
|
919
|
+
clearCirclingStepTimer(session_id);
|
|
920
|
+
const nextState = await collabStore.advanceCirclingStep(session_id);
|
|
921
|
+
if (!nextState) {
|
|
922
|
+
log(`CIRCLING ERROR: advanceCirclingStep returned null for ${session_id}`);
|
|
923
|
+
} else if (nextState.phase === 'complete') {
|
|
924
|
+
// Finalization done — complete the session
|
|
925
|
+
await completeCirclingSession(session_id);
|
|
926
|
+
} else if (nextState.needsGate) {
|
|
927
|
+
// Automation tier gate — wait for human approval
|
|
928
|
+
log(`CIRCLING GATE: ${session_id} SR${nextState.subround} — waiting for human approval (tier ${freshSession.circling.automation_tier})`);
|
|
929
|
+
publishCollabEvent('circling_gate', freshSession);
|
|
930
|
+
} else {
|
|
931
|
+
// Auto-advance to next step
|
|
932
|
+
await startCirclingStep(session_id);
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
// Sequential mode: advance turn, notify next node or evaluate round
|
|
936
|
+
// Parallel mode: check if all reflections are in → evaluate convergence
|
|
937
|
+
// NOTE: Node.js single-threaded event loop prevents concurrent execution of this
|
|
938
|
+
// handler — no mutex needed. advanceTurn() is safe without CAS here.
|
|
939
|
+
} else if (session.mode === 'sequential') {
|
|
940
|
+
const nextNodeId = await collabStore.advanceTurn(session_id);
|
|
941
|
+
if (nextNodeId) {
|
|
942
|
+
// Notify only the next-turn node with accumulated intra-round intel
|
|
943
|
+
await notifySequentialTurn(session_id, nextNodeId);
|
|
944
|
+
} else {
|
|
945
|
+
// All turns done → evaluate round
|
|
946
|
+
await evaluateRound(session_id);
|
|
947
|
+
}
|
|
948
|
+
} else if (collabStore.isRoundComplete(session)) {
|
|
551
949
|
await evaluateRound(session_id);
|
|
552
950
|
}
|
|
553
951
|
|
|
@@ -657,8 +1055,14 @@ async function startCollabRound(sessionId) {
|
|
|
657
1055
|
const scopeStrategy = session.scope_strategy || 'shared';
|
|
658
1056
|
const nodeScopes = computeNodeScopes(session.nodes, taskScope, scopeStrategy);
|
|
659
1057
|
|
|
660
|
-
//
|
|
661
|
-
|
|
1058
|
+
// Sequential mode: only notify the current_turn node.
|
|
1059
|
+
// Other nodes get notified via notifySequentialTurn() as turns advance.
|
|
1060
|
+
// Parallel mode: notify all nodes at once.
|
|
1061
|
+
const nodesToNotify = session.mode === 'sequential' && session.current_turn
|
|
1062
|
+
? session.nodes.filter(n => n.node_id === session.current_turn)
|
|
1063
|
+
: session.nodes;
|
|
1064
|
+
|
|
1065
|
+
for (const node of nodesToNotify) {
|
|
662
1066
|
const effectiveScope = nodeScopes[node.node_id] || node.scope;
|
|
663
1067
|
nc.publish(`mesh.collab.${sessionId}.node.${node.node_id}.round`, sc.encode(JSON.stringify({
|
|
664
1068
|
session_id: sessionId,
|
|
@@ -674,6 +1078,57 @@ async function startCollabRound(sessionId) {
|
|
|
674
1078
|
}
|
|
675
1079
|
}
|
|
676
1080
|
|
|
1081
|
+
/**
|
|
1082
|
+
* Notify the next node in a sequential turn.
|
|
1083
|
+
* Includes intra-round reflections so far as additional shared intel.
|
|
1084
|
+
*/
|
|
1085
|
+
async function notifySequentialTurn(sessionId, nextNodeId) {
|
|
1086
|
+
const session = await collabStore.get(sessionId);
|
|
1087
|
+
if (!session) return;
|
|
1088
|
+
|
|
1089
|
+
const currentRound = session.rounds[session.rounds.length - 1];
|
|
1090
|
+
if (!currentRound) return;
|
|
1091
|
+
|
|
1092
|
+
// Compile intra-round intel from reflections already submitted this round
|
|
1093
|
+
const intraLines = [`=== INTRA-ROUND ${currentRound.round_number} (turns so far) ===\n`];
|
|
1094
|
+
for (const r of currentRound.reflections) {
|
|
1095
|
+
intraLines.push(`## Turn: ${r.node_id}${r.parse_failed ? ' [PARSE FAILED]' : ''}`);
|
|
1096
|
+
if (r.summary) intraLines.push(`Summary: ${r.summary}`);
|
|
1097
|
+
if (r.learnings) intraLines.push(`Learnings: ${r.learnings}`);
|
|
1098
|
+
if (r.artifacts.length > 0) intraLines.push(`Artifacts: ${r.artifacts.join(', ')}`);
|
|
1099
|
+
intraLines.push(`Confidence: ${r.confidence} | Vote: ${r.vote}`);
|
|
1100
|
+
intraLines.push('');
|
|
1101
|
+
}
|
|
1102
|
+
const intraRoundIntel = intraLines.join('\n');
|
|
1103
|
+
const combinedIntel = currentRound.shared_intel
|
|
1104
|
+
? currentRound.shared_intel + '\n\n' + intraRoundIntel
|
|
1105
|
+
: intraRoundIntel;
|
|
1106
|
+
|
|
1107
|
+
const parentTask = await store.get(session.task_id);
|
|
1108
|
+
const taskScope = parentTask?.scope || [];
|
|
1109
|
+
const scopeStrategy = session.scope_strategy || 'shared';
|
|
1110
|
+
const nodeScopes = computeNodeScopes(session.nodes, taskScope, scopeStrategy);
|
|
1111
|
+
const nextNode = session.nodes.find(n => n.node_id === nextNodeId);
|
|
1112
|
+
|
|
1113
|
+
nc.publish(`mesh.collab.${sessionId}.node.${nextNodeId}.round`, sc.encode(JSON.stringify({
|
|
1114
|
+
session_id: sessionId,
|
|
1115
|
+
task_id: session.task_id,
|
|
1116
|
+
round_number: currentRound.round_number,
|
|
1117
|
+
shared_intel: combinedIntel,
|
|
1118
|
+
my_scope: nodeScopes[nextNodeId] || nextNode?.scope || ['*'],
|
|
1119
|
+
my_role: nextNode?.role || 'worker',
|
|
1120
|
+
mode: 'sequential',
|
|
1121
|
+
current_turn: nextNodeId,
|
|
1122
|
+
scope_strategy: scopeStrategy,
|
|
1123
|
+
})));
|
|
1124
|
+
|
|
1125
|
+
log(`COLLAB SEQ ${sessionId} R${currentRound.round_number}: Turn advanced to ${nextNodeId}`);
|
|
1126
|
+
await collabStore.appendAudit(sessionId, 'turn_advanced', {
|
|
1127
|
+
round: currentRound.round_number, next_node: nextNodeId,
|
|
1128
|
+
reflections_so_far: currentRound.reflections.length,
|
|
1129
|
+
});
|
|
1130
|
+
}
|
|
1131
|
+
|
|
677
1132
|
/**
|
|
678
1133
|
* Evaluate the current round: check convergence, advance or complete.
|
|
679
1134
|
*/
|
|
@@ -702,10 +1157,11 @@ async function evaluateRound(sessionId) {
|
|
|
702
1157
|
await collabStore.markConverged(sessionId);
|
|
703
1158
|
publishCollabEvent('converged', session);
|
|
704
1159
|
|
|
705
|
-
//
|
|
1160
|
+
// Re-fetch after markConverged to ensure fresh state
|
|
1161
|
+
const freshSession = await collabStore.get(sessionId);
|
|
706
1162
|
const allArtifacts = [];
|
|
707
1163
|
const contributions = {};
|
|
708
|
-
for (const round of
|
|
1164
|
+
for (const round of freshSession.rounds) {
|
|
709
1165
|
for (const r of round.reflections) {
|
|
710
1166
|
allArtifacts.push(...r.artifacts);
|
|
711
1167
|
contributions[r.node_id] = r.summary;
|
|
@@ -714,20 +1170,20 @@ async function evaluateRound(sessionId) {
|
|
|
714
1170
|
|
|
715
1171
|
await collabStore.markCompleted(sessionId, {
|
|
716
1172
|
artifacts: [...new Set(allArtifacts)],
|
|
717
|
-
summary: `Converged after ${
|
|
1173
|
+
summary: `Converged after ${freshSession.current_round} rounds with ${freshSession.nodes.length} nodes`,
|
|
718
1174
|
node_contributions: contributions,
|
|
719
1175
|
});
|
|
720
1176
|
await collabStore.appendAudit(sessionId, 'session_completed', {
|
|
721
|
-
outcome: 'converged', rounds:
|
|
1177
|
+
outcome: 'converged', rounds: freshSession.current_round,
|
|
722
1178
|
artifacts: [...new Set(allArtifacts)].length,
|
|
723
|
-
node_count:
|
|
1179
|
+
node_count: freshSession.nodes.length, recruited_count: freshSession.recruited_count,
|
|
724
1180
|
});
|
|
725
1181
|
|
|
726
1182
|
// Complete the parent task
|
|
727
|
-
const
|
|
728
|
-
await store.markCompleted(
|
|
729
|
-
publishEvent('completed', await store.get(
|
|
730
|
-
publishCollabEvent('completed',
|
|
1183
|
+
const completedSession = await collabStore.get(sessionId);
|
|
1184
|
+
await store.markCompleted(freshSession.task_id, completedSession.result);
|
|
1185
|
+
publishEvent('completed', await store.get(freshSession.task_id));
|
|
1186
|
+
publishCollabEvent('completed', completedSession);
|
|
731
1187
|
|
|
732
1188
|
} else if (maxReached) {
|
|
733
1189
|
log(`COLLAB MAX ROUNDS ${sessionId}: ${session.current_round}/${session.max_rounds}. Completing with current artifacts.`);
|
|
@@ -768,6 +1224,275 @@ async function evaluateRound(sessionId) {
|
|
|
768
1224
|
}
|
|
769
1225
|
}
|
|
770
1226
|
|
|
1227
|
+
// ── Circling Strategy Functions ──────────────────────
|
|
1228
|
+
|
|
1229
|
+
/**
|
|
1230
|
+
* Start a circling step: compile directed inputs and notify each node.
|
|
1231
|
+
* Called after advanceCirclingStep transitions the state machine.
|
|
1232
|
+
* Also creates a new round in the session (for reflection storage).
|
|
1233
|
+
*/
|
|
1234
|
+
async function startCirclingStep(sessionId) {
|
|
1235
|
+
const session = await collabStore.get(sessionId);
|
|
1236
|
+
if (!session || !session.circling) return;
|
|
1237
|
+
|
|
1238
|
+
const { phase, current_subround, current_step } = session.circling;
|
|
1239
|
+
|
|
1240
|
+
// Record step start time for timeout rehydration after daemon restart
|
|
1241
|
+
session.circling.step_started_at = new Date().toISOString();
|
|
1242
|
+
await collabStore.put(session);
|
|
1243
|
+
|
|
1244
|
+
// Start a new round in the session for reflection storage
|
|
1245
|
+
// (each step gets its own round to keep reflections organized)
|
|
1246
|
+
const round = await collabStore.startRound(sessionId);
|
|
1247
|
+
if (!round) {
|
|
1248
|
+
log(`CIRCLING ERROR: startRound failed for ${sessionId} (aborted?)`);
|
|
1249
|
+
return;
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
const freshSession = await collabStore.get(sessionId);
|
|
1253
|
+
const parentTask = await store.get(freshSession.task_id);
|
|
1254
|
+
const taskDescription = parentTask?.description || '';
|
|
1255
|
+
|
|
1256
|
+
const stepLabel = phase === 'init' ? 'Init'
|
|
1257
|
+
: phase === 'finalization' ? 'Finalization'
|
|
1258
|
+
: `SR${current_subround} Step${current_step}`;
|
|
1259
|
+
log(`CIRCLING ${sessionId} ${stepLabel} START (${freshSession.nodes.length} nodes)`);
|
|
1260
|
+
|
|
1261
|
+
await collabStore.appendAudit(sessionId, 'circling_step_started', {
|
|
1262
|
+
phase, subround: current_subround, step: current_step,
|
|
1263
|
+
nodes: freshSession.nodes.map(n => n.node_id),
|
|
1264
|
+
});
|
|
1265
|
+
publishCollabEvent('circling_step_started', freshSession);
|
|
1266
|
+
|
|
1267
|
+
// Notify each node with their directed input
|
|
1268
|
+
for (const node of freshSession.nodes) {
|
|
1269
|
+
const directedInput = collabStore.compileDirectedInput(freshSession, node.node_id, taskDescription);
|
|
1270
|
+
|
|
1271
|
+
nc.publish(`mesh.collab.${sessionId}.node.${node.node_id}.round`, sc.encode(JSON.stringify({
|
|
1272
|
+
session_id: sessionId,
|
|
1273
|
+
task_id: freshSession.task_id,
|
|
1274
|
+
round_number: freshSession.current_round,
|
|
1275
|
+
directed_input: directedInput,
|
|
1276
|
+
shared_intel: '', // empty for circling — uses directed_input instead
|
|
1277
|
+
my_scope: node.scope,
|
|
1278
|
+
my_role: node.role,
|
|
1279
|
+
mode: 'circling_strategy',
|
|
1280
|
+
circling_phase: phase,
|
|
1281
|
+
circling_step: current_step,
|
|
1282
|
+
circling_subround: current_subround,
|
|
1283
|
+
})));
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
// Set step-level timeout. If the barrier isn't met within CIRCLING_STEP_TIMEOUT_MS,
|
|
1287
|
+
// mark unresponsive nodes as dead and force-advance with degraded input.
|
|
1288
|
+
clearCirclingStepTimer(sessionId);
|
|
1289
|
+
const stepSnapshot = { phase, subround: current_subround, step: current_step };
|
|
1290
|
+
const timer = setTimeout(() => handleCirclingStepTimeout(sessionId, stepSnapshot), CIRCLING_STEP_TIMEOUT_MS);
|
|
1291
|
+
circlingStepTimers.set(sessionId, timer);
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
/**
|
|
1295
|
+
* Handle a circling step timeout. If the step hasn't advanced since the timer was set,
|
|
1296
|
+
* mark nodes that haven't submitted as dead and force-advance.
|
|
1297
|
+
*/
|
|
1298
|
+
async function handleCirclingStepTimeout(sessionId, stepSnapshot) {
|
|
1299
|
+
circlingStepTimers.delete(sessionId);
|
|
1300
|
+
|
|
1301
|
+
const session = await collabStore.get(sessionId);
|
|
1302
|
+
if (!session || !session.circling) return;
|
|
1303
|
+
|
|
1304
|
+
const { phase, current_subround, current_step } = session.circling;
|
|
1305
|
+
|
|
1306
|
+
// Check if the step already advanced (timer is stale)
|
|
1307
|
+
if (phase !== stepSnapshot.phase ||
|
|
1308
|
+
current_subround !== stepSnapshot.subround ||
|
|
1309
|
+
current_step !== stepSnapshot.step) {
|
|
1310
|
+
return; // Step already moved on — nothing to do
|
|
1311
|
+
}
|
|
1312
|
+
|
|
1313
|
+
log(`CIRCLING STEP TIMEOUT: ${sessionId} ${phase}/SR${current_subround}/Step${current_step} — forcing advance`);
|
|
1314
|
+
|
|
1315
|
+
const currentRound = session.rounds[session.rounds.length - 1];
|
|
1316
|
+
if (!currentRound) return;
|
|
1317
|
+
|
|
1318
|
+
const submittedNodeIds = new Set(
|
|
1319
|
+
currentRound.reflections
|
|
1320
|
+
.filter(r => r.circling_step === current_step)
|
|
1321
|
+
.map(r => r.node_id)
|
|
1322
|
+
);
|
|
1323
|
+
|
|
1324
|
+
// Mark nodes that haven't submitted as dead
|
|
1325
|
+
for (const node of session.nodes) {
|
|
1326
|
+
if (node.status !== 'dead' && !submittedNodeIds.has(node.node_id)) {
|
|
1327
|
+
await collabStore.setNodeStatus(sessionId, node.node_id, 'dead');
|
|
1328
|
+
log(`CIRCLING STEP TIMEOUT: marked ${node.node_id} as dead (no submission within ${CIRCLING_STEP_TIMEOUT_MS / 60000}m)`);
|
|
1329
|
+
await collabStore.appendAudit(sessionId, 'node_marked_dead', {
|
|
1330
|
+
node_id: node.node_id,
|
|
1331
|
+
reason: `Circling step timeout: no reflection for ${phase}/SR${current_subround}/Step${current_step}`,
|
|
1332
|
+
});
|
|
1333
|
+
}
|
|
1334
|
+
}
|
|
1335
|
+
|
|
1336
|
+
// Re-check barrier with dead nodes excluded
|
|
1337
|
+
const freshSession = await collabStore.get(sessionId);
|
|
1338
|
+
if (collabStore.isCirclingStepComplete(freshSession)) {
|
|
1339
|
+
const nextState = await collabStore.advanceCirclingStep(sessionId);
|
|
1340
|
+
if (!nextState) {
|
|
1341
|
+
log(`CIRCLING STEP TIMEOUT ERROR: advanceCirclingStep returned null for ${sessionId}`);
|
|
1342
|
+
} else if (nextState.phase === 'complete') {
|
|
1343
|
+
await completeCirclingSession(sessionId);
|
|
1344
|
+
} else if (nextState.needsGate) {
|
|
1345
|
+
log(`CIRCLING GATE: ${sessionId} SR${nextState.subround} — waiting for human approval (timeout-forced)`);
|
|
1346
|
+
publishCollabEvent('circling_gate', freshSession);
|
|
1347
|
+
} else {
|
|
1348
|
+
await startCirclingStep(sessionId);
|
|
1349
|
+
}
|
|
1350
|
+
} else {
|
|
1351
|
+
// Still not enough submissions even after marking dead nodes.
|
|
1352
|
+
// All active nodes are dead — abort the session.
|
|
1353
|
+
log(`CIRCLING STEP TIMEOUT: ${sessionId} — no active nodes remain. Aborting.`);
|
|
1354
|
+
await collabStore.markAborted(sessionId, `All nodes timed out at ${phase}/SR${current_subround}/Step${current_step}`);
|
|
1355
|
+
publishCollabEvent('aborted', await collabStore.get(sessionId));
|
|
1356
|
+
await store.markReleased(session.task_id, `Circling session aborted: all nodes timed out`);
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
function clearCirclingStepTimer(sessionId) {
|
|
1361
|
+
const existing = circlingStepTimers.get(sessionId);
|
|
1362
|
+
if (existing) {
|
|
1363
|
+
clearTimeout(existing);
|
|
1364
|
+
circlingStepTimers.delete(sessionId);
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
|
|
1368
|
+
/**
|
|
1369
|
+
* Complete a circling session after finalization.
|
|
1370
|
+
* Checks finalization votes: Worker converged + both Reviewers converged → COMPLETE.
|
|
1371
|
+
* Any blocked vote → escalation gate (all tiers gate on finalization).
|
|
1372
|
+
*/
|
|
1373
|
+
async function completeCirclingSession(sessionId) {
|
|
1374
|
+
clearCirclingStepTimer(sessionId);
|
|
1375
|
+
const session = await collabStore.get(sessionId);
|
|
1376
|
+
if (!session || !session.circling) return;
|
|
1377
|
+
|
|
1378
|
+
const lastRound = session.rounds[session.rounds.length - 1];
|
|
1379
|
+
if (!lastRound) return;
|
|
1380
|
+
|
|
1381
|
+
// Check finalization votes
|
|
1382
|
+
const blockedVotes = lastRound.reflections.filter(r => r.vote === 'blocked');
|
|
1383
|
+
|
|
1384
|
+
if (blockedVotes.length > 0) {
|
|
1385
|
+
// Escalation: reviewer flagged critical concern
|
|
1386
|
+
log(`CIRCLING ESCALATION ${sessionId}: ${blockedVotes.length} blocked vote(s) in finalization`);
|
|
1387
|
+
await collabStore.appendAudit(sessionId, 'circling_escalation', {
|
|
1388
|
+
blocked_nodes: blockedVotes.map(r => r.node_id),
|
|
1389
|
+
summaries: blockedVotes.map(r => r.summary),
|
|
1390
|
+
});
|
|
1391
|
+
// Gate on finalization (all tiers)
|
|
1392
|
+
publishCollabEvent('circling_gate', session);
|
|
1393
|
+
return;
|
|
1394
|
+
}
|
|
1395
|
+
|
|
1396
|
+
// All converged → complete
|
|
1397
|
+
const finalArtifact = collabStore.getLatestArtifact(session, 'worker', 'workArtifact');
|
|
1398
|
+
const completionDiff = collabStore.getLatestArtifact(session, 'worker', 'completionDiff');
|
|
1399
|
+
|
|
1400
|
+
log(`CIRCLING COMPLETED ${sessionId}: ${session.circling.current_subround} sub-rounds`);
|
|
1401
|
+
await collabStore.markConverged(sessionId);
|
|
1402
|
+
|
|
1403
|
+
await collabStore.markCompleted(sessionId, {
|
|
1404
|
+
artifacts: finalArtifact ? ['workArtifact'] : [],
|
|
1405
|
+
summary: `Circling Strategy completed: ${session.circling.current_subround} sub-rounds, ${session.nodes.length} nodes. ${completionDiff ? 'CompletionDiff available.' : ''}`,
|
|
1406
|
+
node_contributions: Object.fromEntries(
|
|
1407
|
+
lastRound.reflections.map(r => [r.node_id, r.summary])
|
|
1408
|
+
),
|
|
1409
|
+
circling_final_artifact: finalArtifact,
|
|
1410
|
+
circling_completion_diff: completionDiff,
|
|
1411
|
+
});
|
|
1412
|
+
await collabStore.appendAudit(sessionId, 'session_completed', {
|
|
1413
|
+
outcome: 'circling_finalized',
|
|
1414
|
+
subrounds: session.circling.current_subround,
|
|
1415
|
+
node_count: session.nodes.length,
|
|
1416
|
+
});
|
|
1417
|
+
|
|
1418
|
+
// Complete parent task
|
|
1419
|
+
const completedSession = await collabStore.get(sessionId);
|
|
1420
|
+
await store.markCompleted(session.task_id, completedSession.result);
|
|
1421
|
+
publishEvent('completed', await store.get(session.task_id));
|
|
1422
|
+
publishCollabEvent('completed', completedSession);
|
|
1423
|
+
}
|
|
1424
|
+
|
|
1425
|
+
/**
|
|
1426
|
+
* mesh.collab.gate.approve — Human approves a circling tier gate.
|
|
1427
|
+
* Resumes the circling protocol after a gate point.
|
|
1428
|
+
*/
|
|
1429
|
+
async function handleCirclingGateApprove(msg) {
|
|
1430
|
+
const { session_id } = parseRequest(msg);
|
|
1431
|
+
if (!session_id) return respondError(msg, 'session_id required');
|
|
1432
|
+
|
|
1433
|
+
const session = await collabStore.get(session_id);
|
|
1434
|
+
if (!session || !session.circling) return respondError(msg, 'Not a circling session');
|
|
1435
|
+
|
|
1436
|
+
log(`CIRCLING GATE APPROVED: ${session_id} — resuming`);
|
|
1437
|
+
await collabStore.appendAudit(session_id, 'gate_approved', {
|
|
1438
|
+
phase: session.circling.phase,
|
|
1439
|
+
subround: session.circling.current_subround,
|
|
1440
|
+
});
|
|
1441
|
+
|
|
1442
|
+
// If finalization phase with blocked votes, the gate approve means "accept anyway"
|
|
1443
|
+
if (session.circling.phase === 'complete' || session.circling.phase === 'finalization') {
|
|
1444
|
+
// Force complete
|
|
1445
|
+
const lastRound = session.rounds[session.rounds.length - 1];
|
|
1446
|
+
const finalArtifact = collabStore.getLatestArtifact(session, 'worker', 'workArtifact');
|
|
1447
|
+
await collabStore.markConverged(session_id);
|
|
1448
|
+
await collabStore.markCompleted(session_id, {
|
|
1449
|
+
artifacts: finalArtifact ? ['workArtifact'] : [],
|
|
1450
|
+
summary: `Circling completed via gate approval after ${session.circling.current_subround} sub-rounds`,
|
|
1451
|
+
node_contributions: Object.fromEntries(
|
|
1452
|
+
(lastRound?.reflections || []).map(r => [r.node_id, r.summary])
|
|
1453
|
+
),
|
|
1454
|
+
circling_final_artifact: finalArtifact,
|
|
1455
|
+
});
|
|
1456
|
+
const completedSession = await collabStore.get(session_id);
|
|
1457
|
+
await store.markCompleted(session.task_id, completedSession.result);
|
|
1458
|
+
publishEvent('completed', await store.get(session.task_id));
|
|
1459
|
+
publishCollabEvent('completed', completedSession);
|
|
1460
|
+
} else {
|
|
1461
|
+
// Mid-protocol gate (tier 3) — resume next step
|
|
1462
|
+
await startCirclingStep(session_id);
|
|
1463
|
+
}
|
|
1464
|
+
|
|
1465
|
+
respond(msg, { approved: true });
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
/**
|
|
1469
|
+
* mesh.collab.gate.reject — Human rejects a circling tier gate.
|
|
1470
|
+
* Forces another sub-round.
|
|
1471
|
+
*/
|
|
1472
|
+
async function handleCirclingGateReject(msg) {
|
|
1473
|
+
const { session_id } = parseRequest(msg);
|
|
1474
|
+
if (!session_id) return respondError(msg, 'session_id required');
|
|
1475
|
+
|
|
1476
|
+
const session = await collabStore.get(session_id);
|
|
1477
|
+
if (!session || !session.circling) return respondError(msg, 'Not a circling session');
|
|
1478
|
+
|
|
1479
|
+
log(`CIRCLING GATE REJECTED: ${session_id} — forcing another sub-round`);
|
|
1480
|
+
await collabStore.appendAudit(session_id, 'gate_rejected', {
|
|
1481
|
+
phase: session.circling.phase,
|
|
1482
|
+
subround: session.circling.current_subround,
|
|
1483
|
+
});
|
|
1484
|
+
|
|
1485
|
+
// Reset to circling phase, increment subround, step 1
|
|
1486
|
+
session.circling.phase = 'circling';
|
|
1487
|
+
session.circling.max_subrounds++; // allow one more
|
|
1488
|
+
session.circling.current_step = 1;
|
|
1489
|
+
session.circling.current_subround++;
|
|
1490
|
+
await collabStore.put(session);
|
|
1491
|
+
|
|
1492
|
+
await startCirclingStep(session_id);
|
|
1493
|
+
respond(msg, { rejected: true, new_subround: session.circling.current_subround });
|
|
1494
|
+
}
|
|
1495
|
+
|
|
771
1496
|
// ── Collab Recruiting Timer ─────────────────────────
|
|
772
1497
|
|
|
773
1498
|
/**
|
|
@@ -781,7 +1506,31 @@ async function checkRecruitingDeadlines() {
|
|
|
781
1506
|
|
|
782
1507
|
if (session.nodes.length >= session.min_nodes) {
|
|
783
1508
|
log(`COLLAB RECRUIT DONE ${session.session_id}: ${session.nodes.length} nodes joined. Starting round 1.`);
|
|
784
|
-
|
|
1509
|
+
if (session.mode === 'circling_strategy' && session.circling) {
|
|
1510
|
+
// Circling requires exactly 3 nodes (1 worker + 2 reviewers).
|
|
1511
|
+
// Even if min_nodes was misconfigured, refuse to start with <3.
|
|
1512
|
+
const hasWorker = session.nodes.some(n => n.role === 'worker');
|
|
1513
|
+
const reviewerCount = session.nodes.filter(n => n.role === 'reviewer').length;
|
|
1514
|
+
if (session.nodes.length < 3 || !hasWorker || reviewerCount < 2) {
|
|
1515
|
+
log(`COLLAB RECRUIT FAILED ${session.session_id}: circling requires 1 worker + 2 reviewers, got ${session.nodes.length} nodes (worker: ${hasWorker}, reviewers: ${reviewerCount}). Aborting.`);
|
|
1516
|
+
await collabStore.markAborted(session.session_id, `Circling requires 1 worker + 2 reviewers; got ${session.nodes.length} nodes`);
|
|
1517
|
+
publishCollabEvent('aborted', await collabStore.get(session.session_id));
|
|
1518
|
+
await store.markReleased(session.task_id, `Circling session failed: insufficient role distribution`);
|
|
1519
|
+
continue;
|
|
1520
|
+
}
|
|
1521
|
+
// Assign all role IDs if not yet assigned
|
|
1522
|
+
if (!session.circling.worker_node_id) {
|
|
1523
|
+
const workerNode = session.nodes.find(n => n.role === 'worker') || session.nodes[0];
|
|
1524
|
+
session.circling.worker_node_id = workerNode.node_id;
|
|
1525
|
+
const reviewers = session.nodes.filter(n => n.node_id !== workerNode.node_id);
|
|
1526
|
+
session.circling.reviewerA_node_id = reviewers[0]?.node_id || null;
|
|
1527
|
+
session.circling.reviewerB_node_id = reviewers[1]?.node_id || null;
|
|
1528
|
+
await collabStore.put(session);
|
|
1529
|
+
}
|
|
1530
|
+
await startCirclingStep(session.session_id);
|
|
1531
|
+
} else {
|
|
1532
|
+
await startCollabRound(session.session_id);
|
|
1533
|
+
}
|
|
785
1534
|
} else {
|
|
786
1535
|
log(`COLLAB RECRUIT FAILED ${session.session_id}: only ${session.nodes.length}/${session.min_nodes} nodes. Aborting.`);
|
|
787
1536
|
await collabStore.markAborted(session.session_id, `Not enough nodes: ${session.nodes.length} < ${session.min_nodes}`);
|
|
@@ -792,6 +1541,46 @@ async function checkRecruitingDeadlines() {
|
|
|
792
1541
|
}
|
|
793
1542
|
}
|
|
794
1543
|
|
|
1544
|
+
// ── Circling Step Timeout Sweep ──────────────────────
|
|
1545
|
+
|
|
1546
|
+
/**
|
|
1547
|
+
* Periodic sweep for stale circling steps. Handles timer rehydration after
|
|
1548
|
+
* daemon restart — in-memory timers are lost on crash, but step_started_at
|
|
1549
|
+
* in the session survives in JetStream KV.
|
|
1550
|
+
*
|
|
1551
|
+
* Runs every 60s. For each active circling session, checks if the current
|
|
1552
|
+
* step has been running longer than CIRCLING_STEP_TIMEOUT_MS. If so, fires
|
|
1553
|
+
* the timeout handler (which marks dead nodes and force-advances).
|
|
1554
|
+
*
|
|
1555
|
+
* Also serves as a safety net for timer drift or missed clearTimeout calls.
|
|
1556
|
+
*/
|
|
1557
|
+
async function sweepCirclingStepTimeouts() {
|
|
1558
|
+
try {
|
|
1559
|
+
const active = await collabStore.list({ status: COLLAB_STATUS.ACTIVE });
|
|
1560
|
+
for (const session of active) {
|
|
1561
|
+
if (session.mode !== 'circling_strategy' || !session.circling) continue;
|
|
1562
|
+
if (session.circling.phase === 'complete') continue;
|
|
1563
|
+
if (!session.circling.step_started_at) continue;
|
|
1564
|
+
|
|
1565
|
+
// Skip if an in-memory timer is already tracking this session
|
|
1566
|
+
if (circlingStepTimers.has(session.session_id)) continue;
|
|
1567
|
+
|
|
1568
|
+
const elapsed = Date.now() - new Date(session.circling.step_started_at).getTime();
|
|
1569
|
+
if (elapsed > CIRCLING_STEP_TIMEOUT_MS) {
|
|
1570
|
+
log(`CIRCLING SWEEP: ${session.session_id} step stale (${(elapsed / 60000).toFixed(1)}m elapsed). Firing timeout handler.`);
|
|
1571
|
+
const stepSnapshot = {
|
|
1572
|
+
phase: session.circling.phase,
|
|
1573
|
+
subround: session.circling.current_subround,
|
|
1574
|
+
step: session.circling.current_step,
|
|
1575
|
+
};
|
|
1576
|
+
await handleCirclingStepTimeout(session.session_id, stepSnapshot);
|
|
1577
|
+
}
|
|
1578
|
+
}
|
|
1579
|
+
} catch (err) {
|
|
1580
|
+
log(`CIRCLING SWEEP ERROR: ${err.message}`);
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
|
|
795
1584
|
// ── Plan Event Publishing ───────────────────────────
|
|
796
1585
|
|
|
797
1586
|
function publishPlanEvent(eventType, plan) {
|
|
@@ -955,6 +1744,19 @@ async function advancePlanWave(planId) {
|
|
|
955
1744
|
const waveNum = ready[0].wave;
|
|
956
1745
|
log(`PLAN WAVE ${planId} W${waveNum}: dispatching ${ready.length} subtasks`);
|
|
957
1746
|
|
|
1747
|
+
// Inherit routing fields from parent task so subtasks use the same LLM/node preferences.
|
|
1748
|
+
// CONSTRAINT: Subtasks cannot override routing independently — they always inherit from the
|
|
1749
|
+
// parent task. If per-subtask routing is needed, extend the subtask schema in mesh-plans.js
|
|
1750
|
+
// (e.g. subtask.llm_provider) and merge here with subtask fields taking priority.
|
|
1751
|
+
const parentTask = await store.get(plan.parent_task_id);
|
|
1752
|
+
const inheritedRouting = {};
|
|
1753
|
+
if (parentTask) {
|
|
1754
|
+
if (parentTask.llm_provider) inheritedRouting.llm_provider = parentTask.llm_provider;
|
|
1755
|
+
if (parentTask.llm_model) inheritedRouting.llm_model = parentTask.llm_model;
|
|
1756
|
+
if (parentTask.preferred_nodes) inheritedRouting.preferred_nodes = parentTask.preferred_nodes;
|
|
1757
|
+
if (parentTask.exclude_nodes) inheritedRouting.exclude_nodes = parentTask.exclude_nodes;
|
|
1758
|
+
}
|
|
1759
|
+
|
|
958
1760
|
for (const st of ready) {
|
|
959
1761
|
st.status = SUBTASK_STATUS.QUEUED;
|
|
960
1762
|
|
|
@@ -962,7 +1764,12 @@ async function advancePlanWave(planId) {
|
|
|
962
1764
|
switch (st.delegation.mode) {
|
|
963
1765
|
case 'solo_mesh':
|
|
964
1766
|
case 'collab_mesh': {
|
|
965
|
-
// Submit as mesh task
|
|
1767
|
+
// Submit as mesh task — inherit routing fields from parent task
|
|
1768
|
+
// Auto-assign role from scope if subtask doesn't specify one
|
|
1769
|
+
const subtaskRole = st.role || (st.scope && st.scope.length > 0
|
|
1770
|
+
? (findRoleByScope(st.scope, ROLE_DIRS)?.id || null)
|
|
1771
|
+
: null);
|
|
1772
|
+
|
|
966
1773
|
const meshTask = createTask({
|
|
967
1774
|
task_id: st.subtask_id,
|
|
968
1775
|
title: st.title,
|
|
@@ -973,7 +1780,12 @@ async function advancePlanWave(planId) {
|
|
|
973
1780
|
success_criteria: st.success_criteria,
|
|
974
1781
|
tags: ['plan', planId],
|
|
975
1782
|
collaboration: st.delegation.collaboration || undefined,
|
|
1783
|
+
plan_id: planId,
|
|
1784
|
+
subtask_id: st.subtask_id,
|
|
1785
|
+
role: subtaskRole,
|
|
1786
|
+
...inheritedRouting,
|
|
976
1787
|
});
|
|
1788
|
+
if (subtaskRole) log(` → AUTO-ROLE ${st.subtask_id}: ${subtaskRole} (matched from scope)`);
|
|
977
1789
|
await store.put(meshTask);
|
|
978
1790
|
st.mesh_task_id = meshTask.task_id;
|
|
979
1791
|
publishEvent('submitted', meshTask);
|
|
@@ -1027,33 +1839,175 @@ async function advancePlanWave(planId) {
|
|
|
1027
1839
|
publishPlanEvent('wave_started', plan);
|
|
1028
1840
|
}
|
|
1029
1841
|
|
|
1842
|
+
/**
|
|
1843
|
+
* Update a plan subtask's status without triggering wave advancement.
|
|
1844
|
+
* Used for intermediate states like pending_review.
|
|
1845
|
+
*/
|
|
1846
|
+
async function updatePlanSubtaskStatus(taskId, newStatus) {
|
|
1847
|
+
const task = await store.get(taskId);
|
|
1848
|
+
if (!task || !task.plan_id) return;
|
|
1849
|
+
const plan = await planStore.get(task.plan_id);
|
|
1850
|
+
if (!plan) return;
|
|
1851
|
+
const st = plan.subtasks.find(s => s.mesh_task_id === taskId || s.subtask_id === taskId);
|
|
1852
|
+
if (!st) return;
|
|
1853
|
+
st.status = newStatus;
|
|
1854
|
+
await planStore.put(plan);
|
|
1855
|
+
log(`PLAN SUBTASK ${st.subtask_id} → ${newStatus} (no wave advance)`);
|
|
1856
|
+
}
|
|
1857
|
+
|
|
1030
1858
|
// ── Plan Progress on Task Completion ────────────────
|
|
1031
1859
|
|
|
1032
1860
|
/**
|
|
1033
1861
|
* When a mesh task completes, check if it belongs to a plan and update accordingly.
|
|
1034
|
-
* Called after handleComplete/handleFail.
|
|
1862
|
+
* Called after handleComplete/handleFail and from detectStalls/enforceBudgets.
|
|
1035
1863
|
*/
|
|
1036
1864
|
async function checkPlanProgress(taskId, status) {
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1865
|
+
let plan = null;
|
|
1866
|
+
let st = null;
|
|
1867
|
+
|
|
1868
|
+
// Fast path: O(1) lookup via plan_id back-reference on the task
|
|
1869
|
+
const task = await store.get(taskId);
|
|
1870
|
+
if (task && task.plan_id) {
|
|
1871
|
+
plan = await planStore.get(task.plan_id);
|
|
1872
|
+
if (plan) {
|
|
1873
|
+
// Match by mesh_task_id, subtask_id, OR the task's subtask_id field
|
|
1874
|
+
// (escalation tasks carry the original subtask_id for plan recovery)
|
|
1875
|
+
st = plan.subtasks.find(s =>
|
|
1876
|
+
s.mesh_task_id === taskId ||
|
|
1877
|
+
s.subtask_id === taskId ||
|
|
1878
|
+
(task.subtask_id && s.subtask_id === task.subtask_id)
|
|
1879
|
+
);
|
|
1880
|
+
}
|
|
1881
|
+
}
|
|
1882
|
+
|
|
1883
|
+
// LEGACY: Remove after 2026-06-01. O(n*m) fallback for tasks created before
|
|
1884
|
+
// plan_id back-reference was added. Track invocations to know when safe to delete.
|
|
1885
|
+
if (!st) {
|
|
1886
|
+
const allPlans = await planStore.list({ status: PLAN_STATUS.EXECUTING });
|
|
1887
|
+
for (const p of allPlans) {
|
|
1888
|
+
const found = p.subtasks.find(s => s.mesh_task_id === taskId || s.subtask_id === taskId);
|
|
1889
|
+
if (found) {
|
|
1890
|
+
plan = p;
|
|
1891
|
+
st = found;
|
|
1892
|
+
break;
|
|
1893
|
+
}
|
|
1894
|
+
}
|
|
1895
|
+
}
|
|
1896
|
+
|
|
1897
|
+
if (!plan || !st) return;
|
|
1898
|
+
|
|
1899
|
+
// Escalation recovery: if a subtask was FAILED/BLOCKED but an escalation task
|
|
1900
|
+
// completes successfully for it, override status to COMPLETED and unblock dependents.
|
|
1901
|
+
const isEscalationRecovery = (
|
|
1902
|
+
status === 'completed' &&
|
|
1903
|
+
(st.status === SUBTASK_STATUS.FAILED || st.status === SUBTASK_STATUS.BLOCKED) &&
|
|
1904
|
+
task && task.tags && task.tags.includes('escalation')
|
|
1905
|
+
);
|
|
1906
|
+
|
|
1907
|
+
if (isEscalationRecovery) {
|
|
1908
|
+
log(`ESCALATION RECOVERY ${plan.plan_id}: subtask ${st.subtask_id} recovered by ${taskId}`);
|
|
1909
|
+
st.status = SUBTASK_STATUS.COMPLETED;
|
|
1910
|
+
st.result = { success: true, summary: `Recovered by escalation task ${taskId}` };
|
|
1911
|
+
// Unblock any dependents that were blocked by the original failure
|
|
1912
|
+
for (const dep of plan.subtasks) {
|
|
1913
|
+
if (dep.status === SUBTASK_STATUS.BLOCKED && dep.depends_on.includes(st.subtask_id)) {
|
|
1914
|
+
dep.status = SUBTASK_STATUS.PENDING;
|
|
1915
|
+
dep.result = null;
|
|
1916
|
+
log(` UNBLOCKED: ${dep.subtask_id} (dependency ${st.subtask_id} recovered)`);
|
|
1917
|
+
}
|
|
1918
|
+
}
|
|
1919
|
+
await planStore.put(plan);
|
|
1920
|
+
publishPlanEvent('subtask_recovered', plan);
|
|
1921
|
+
await advancePlanWave(plan.plan_id);
|
|
1922
|
+
return;
|
|
1923
|
+
}
|
|
1924
|
+
|
|
1925
|
+
st.status = status === 'completed' ? SUBTASK_STATUS.COMPLETED : SUBTASK_STATUS.FAILED;
|
|
1926
|
+
await planStore.put(plan);
|
|
1927
|
+
|
|
1928
|
+
log(`PLAN PROGRESS ${plan.plan_id}: subtask ${st.subtask_id} → ${st.status}`);
|
|
1929
|
+
|
|
1930
|
+
if (st.status === SUBTASK_STATUS.COMPLETED) {
|
|
1931
|
+
publishPlanEvent('subtask_completed', plan);
|
|
1932
|
+
await advancePlanWave(plan.plan_id);
|
|
1933
|
+
return;
|
|
1934
|
+
}
|
|
1935
|
+
|
|
1936
|
+
// Subtask failed — apply failure policy
|
|
1937
|
+
if (st.status === SUBTASK_STATUS.FAILED) {
|
|
1938
|
+
publishPlanEvent('subtask_failed', plan);
|
|
1042
1939
|
|
|
1043
|
-
|
|
1940
|
+
// Cascade: block all transitive dependents
|
|
1941
|
+
const blockedIds = cascadeFailure(plan, st.subtask_id);
|
|
1044
1942
|
await planStore.put(plan);
|
|
1045
1943
|
|
|
1046
|
-
|
|
1944
|
+
const policy = plan.failure_policy || 'continue_best_effort';
|
|
1945
|
+
|
|
1946
|
+
if (policy === 'abort_on_first_fail') {
|
|
1947
|
+
await planStore.markAborted(plan.plan_id, `Subtask ${st.subtask_id} failed (abort_on_first_fail)`);
|
|
1948
|
+
publishPlanEvent('aborted', await planStore.get(plan.plan_id));
|
|
1949
|
+
log(`PLAN ABORTED ${plan.plan_id}: ${st.subtask_id} failed (abort_on_first_fail policy)`);
|
|
1950
|
+
return;
|
|
1951
|
+
}
|
|
1047
1952
|
|
|
1048
|
-
if (
|
|
1049
|
-
|
|
1050
|
-
|
|
1953
|
+
if (policy === 'abort_on_critical_fail') {
|
|
1954
|
+
// Check direct failure
|
|
1955
|
+
if (st.critical) {
|
|
1956
|
+
await planStore.markAborted(plan.plan_id, `Critical subtask ${st.subtask_id} failed (abort_on_critical_fail)`);
|
|
1957
|
+
publishPlanEvent('aborted', await planStore.get(plan.plan_id));
|
|
1958
|
+
log(`PLAN ABORTED ${plan.plan_id}: critical subtask ${st.subtask_id} failed`);
|
|
1959
|
+
return;
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
// Check if cascade blocked any critical subtasks — a blocked critical is
|
|
1963
|
+
// functionally equivalent to a failed critical (the plan can't achieve its goal)
|
|
1964
|
+
const blockedCritical = plan.subtasks.filter(
|
|
1965
|
+
s => blockedIds.has(s.subtask_id) && s.critical
|
|
1966
|
+
);
|
|
1967
|
+
if (blockedCritical.length > 0) {
|
|
1968
|
+
const ids = blockedCritical.map(s => s.subtask_id).join(', ');
|
|
1969
|
+
await planStore.markAborted(
|
|
1970
|
+
plan.plan_id,
|
|
1971
|
+
`Critical subtask(s) ${ids} blocked by failed dependency ${st.subtask_id} (abort_on_critical_fail)`
|
|
1972
|
+
);
|
|
1973
|
+
publishPlanEvent('aborted', await planStore.get(plan.plan_id));
|
|
1974
|
+
log(`PLAN ABORTED ${plan.plan_id}: critical subtask(s) [${ids}] blocked by ${st.subtask_id}`);
|
|
1975
|
+
return;
|
|
1976
|
+
}
|
|
1051
1977
|
}
|
|
1052
1978
|
|
|
1053
|
-
|
|
1979
|
+
// continue_best_effort: try to advance independent branches
|
|
1980
|
+
await advancePlanWave(plan.plan_id);
|
|
1054
1981
|
}
|
|
1055
1982
|
}
|
|
1056
1983
|
|
|
1984
|
+
/**
|
|
1985
|
+
* Cascade failure: BFS from failed subtask, mark all transitive dependents as BLOCKED.
|
|
1986
|
+
* Mutates plan.subtasks in place.
|
|
1987
|
+
* @returns {Set<string>} IDs of all newly-blocked subtasks
|
|
1988
|
+
*/
|
|
1989
|
+
function cascadeFailure(plan, failedSubtaskId) {
|
|
1990
|
+
const blocked = new Set();
|
|
1991
|
+
const queue = [failedSubtaskId];
|
|
1992
|
+
|
|
1993
|
+
while (queue.length > 0) {
|
|
1994
|
+
const current = queue.shift();
|
|
1995
|
+
for (const st of plan.subtasks) {
|
|
1996
|
+
if (st.depends_on.includes(current) && !blocked.has(st.subtask_id)) {
|
|
1997
|
+
if (st.status === SUBTASK_STATUS.PENDING || st.status === SUBTASK_STATUS.QUEUED) {
|
|
1998
|
+
st.status = SUBTASK_STATUS.BLOCKED;
|
|
1999
|
+
st.result = { success: false, summary: `Blocked by failed dependency: ${failedSubtaskId}` };
|
|
2000
|
+
blocked.add(st.subtask_id);
|
|
2001
|
+
queue.push(st.subtask_id);
|
|
2002
|
+
log(` CASCADE: ${st.subtask_id} blocked by ${failedSubtaskId}`);
|
|
2003
|
+
}
|
|
2004
|
+
}
|
|
2005
|
+
}
|
|
2006
|
+
}
|
|
2007
|
+
|
|
2008
|
+
return blocked;
|
|
2009
|
+
}
|
|
2010
|
+
|
|
1057
2011
|
// ── Main ────────────────────────────────────────────
|
|
1058
2012
|
|
|
1059
2013
|
async function main() {
|
|
@@ -1091,6 +2045,8 @@ async function main() {
|
|
|
1091
2045
|
'mesh.tasks.list': handleList,
|
|
1092
2046
|
'mesh.tasks.get': handleGet,
|
|
1093
2047
|
'mesh.tasks.cancel': handleCancel,
|
|
2048
|
+
'mesh.tasks.approve': handleTaskApprove,
|
|
2049
|
+
'mesh.tasks.reject': handleTaskReject,
|
|
1094
2050
|
// Collab handlers
|
|
1095
2051
|
'mesh.collab.create': handleCollabCreate,
|
|
1096
2052
|
'mesh.collab.join': handleCollabJoin,
|
|
@@ -1098,6 +2054,10 @@ async function main() {
|
|
|
1098
2054
|
'mesh.collab.status': handleCollabStatus,
|
|
1099
2055
|
'mesh.collab.find': handleCollabFind,
|
|
1100
2056
|
'mesh.collab.reflect': handleCollabReflect,
|
|
2057
|
+
'mesh.collab.recruiting': handleCollabRecruiting,
|
|
2058
|
+
// Circling Strategy gate handlers
|
|
2059
|
+
'mesh.collab.gate.approve': handleCirclingGateApprove,
|
|
2060
|
+
'mesh.collab.gate.reject': handleCirclingGateReject,
|
|
1101
2061
|
// Plan handlers
|
|
1102
2062
|
'mesh.plans.create': handlePlanCreate,
|
|
1103
2063
|
'mesh.plans.get': handlePlanGet,
|
|
@@ -1116,7 +2076,7 @@ async function main() {
|
|
|
1116
2076
|
try {
|
|
1117
2077
|
await handler(msg);
|
|
1118
2078
|
} catch (err) {
|
|
1119
|
-
log(`ERROR handling ${subject}: ${err.message}`);
|
|
2079
|
+
log(`ERROR handling ${subject}: ${err.message}\n${err.stack}`);
|
|
1120
2080
|
try { respondError(msg, err.message); } catch {}
|
|
1121
2081
|
}
|
|
1122
2082
|
}
|
|
@@ -1125,12 +2085,16 @@ async function main() {
|
|
|
1125
2085
|
}
|
|
1126
2086
|
|
|
1127
2087
|
// Start enforcement loops
|
|
2088
|
+
const proposalTimer = setInterval(processProposals, BUDGET_CHECK_INTERVAL);
|
|
1128
2089
|
const budgetTimer = setInterval(enforceBudgets, BUDGET_CHECK_INTERVAL);
|
|
1129
2090
|
const stallTimer = setInterval(detectStalls, BUDGET_CHECK_INTERVAL);
|
|
1130
2091
|
const recruitTimer = setInterval(checkRecruitingDeadlines, 5000); // check every 5s
|
|
2092
|
+
const circlingStepSweepTimer = setInterval(sweepCirclingStepTimeouts, 60000); // every 60s
|
|
2093
|
+
log(`Proposal processing: every ${BUDGET_CHECK_INTERVAL / 1000}s`);
|
|
1131
2094
|
log(`Budget enforcement: every ${BUDGET_CHECK_INTERVAL / 1000}s`);
|
|
1132
2095
|
log(`Stall detection: every ${BUDGET_CHECK_INTERVAL / 1000}s (threshold: ${STALL_MINUTES}m)`);
|
|
1133
2096
|
log(`Collab recruiting check: every 5s`);
|
|
2097
|
+
log(`Circling step timeout sweep: every 60s (threshold: ${CIRCLING_STEP_TIMEOUT_MS / 60000}m)`);
|
|
1134
2098
|
|
|
1135
2099
|
|
|
1136
2100
|
log('Task daemon ready.');
|
|
@@ -1138,6 +2102,7 @@ async function main() {
|
|
|
1138
2102
|
// Shutdown handler
|
|
1139
2103
|
const shutdown = async () => {
|
|
1140
2104
|
log('Shutting down...');
|
|
2105
|
+
clearInterval(proposalTimer);
|
|
1141
2106
|
clearInterval(budgetTimer);
|
|
1142
2107
|
clearInterval(stallTimer);
|
|
1143
2108
|
clearInterval(recruitTimer);
|