openclaw-node-harness 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/mesh-agent.js +214 -81
- package/bin/mesh-bridge.js +274 -10
- package/bin/mesh-deploy-listener.js +119 -97
- package/bin/mesh-deploy.js +8 -0
- package/bin/mesh-task-daemon.js +190 -15
- package/bin/mesh.js +20 -5
- package/install.sh +7 -0
- package/lib/kanban-io.js +50 -10
- package/lib/mesh-collab.js +53 -3
- package/lib/mesh-registry.js +11 -2
- package/package.json +1 -1
package/bin/mesh-task-daemon.js
CHANGED
|
@@ -131,6 +131,36 @@ async function handleSubmit(msg) {
|
|
|
131
131
|
respond(msg, task);
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
+
/**
|
|
135
|
+
* Abort any collab session tied to a task that is being terminated.
|
|
136
|
+
* Shared by handleFail, handleRelease, handleCancel.
|
|
137
|
+
*
|
|
138
|
+
* NOT called from handleComplete — that path goes through evaluateRound
|
|
139
|
+
* which already calls collabStore.markCompleted() on the session.
|
|
140
|
+
*
|
|
141
|
+
* markAborted() is idempotent: no-op if session is already completed/aborted.
|
|
142
|
+
* This makes double-abort safe (e.g. stall detection → release race).
|
|
143
|
+
*/
|
|
144
|
+
async function cleanupTaskCollabSession(task, reason) {
|
|
145
|
+
if (!task.collab_session_id || !collabStore) return;
|
|
146
|
+
try {
|
|
147
|
+
// markAborted returns null if session doesn't exist or is already completed/aborted.
|
|
148
|
+
// Non-null means we actually transitioned the session to aborted.
|
|
149
|
+
const session = await collabStore.markAborted(task.collab_session_id, reason);
|
|
150
|
+
if (session) {
|
|
151
|
+
await collabStore.appendAudit(task.collab_session_id, 'session_aborted', { reason });
|
|
152
|
+
publishCollabEvent('aborted', session);
|
|
153
|
+
log(`COLLAB ABORTED ${task.collab_session_id}: ${reason}`);
|
|
154
|
+
}
|
|
155
|
+
// Clean up audit error rate-limit counter
|
|
156
|
+
// NOTE: sessions expiring via KV TTL bypass this — residual Map entry is negligible
|
|
157
|
+
// for a homelab mesh but worth noting.
|
|
158
|
+
collabStore.clearAuditErrorCount(task.collab_session_id);
|
|
159
|
+
} catch (err) {
|
|
160
|
+
log(`COLLAB CLEANUP WARN: could not abort session ${task.collab_session_id}: ${err.message}`);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
134
164
|
/**
|
|
135
165
|
* mesh.tasks.claim — Agent requests the next available task.
|
|
136
166
|
* Expects: { node_id }
|
|
@@ -203,6 +233,14 @@ async function handleComplete(msg) {
|
|
|
203
233
|
log(`COMPLETE ${task_id} in ${elapsed}m: ${result?.summary || 'no summary'}`);
|
|
204
234
|
publishEvent('completed', task);
|
|
205
235
|
|
|
236
|
+
// NOTE: no cleanupTaskCollabSession here — collab tasks complete via
|
|
237
|
+
// evaluateRound → markCompleted on the session, then store.markCompleted
|
|
238
|
+
// on the parent task. Calling cleanupTaskCollabSession would markAborted
|
|
239
|
+
// on an already-completed session. Clean up audit counter only.
|
|
240
|
+
if (task.collab_session_id && collabStore) {
|
|
241
|
+
collabStore.clearAuditErrorCount(task.collab_session_id);
|
|
242
|
+
}
|
|
243
|
+
|
|
206
244
|
// Check if this task belongs to a plan
|
|
207
245
|
await checkPlanProgress(task_id, 'completed');
|
|
208
246
|
|
|
@@ -222,6 +260,7 @@ async function handleFail(msg) {
|
|
|
222
260
|
|
|
223
261
|
log(`FAIL ${task_id}: ${reason}`);
|
|
224
262
|
publishEvent('failed', task);
|
|
263
|
+
await cleanupTaskCollabSession(task, `Parent task ${task_id} failed: ${reason}`);
|
|
225
264
|
|
|
226
265
|
// Check if this task belongs to a plan
|
|
227
266
|
await checkPlanProgress(task_id, 'failed');
|
|
@@ -302,6 +341,7 @@ async function handleRelease(msg) {
|
|
|
302
341
|
|
|
303
342
|
log(`RELEASED ${task_id}: ${reason || 'no reason'} (needs human triage)`);
|
|
304
343
|
publishEvent('released', task);
|
|
344
|
+
await cleanupTaskCollabSession(task, `Parent task ${task_id} released: ${reason || 'human triage'}`);
|
|
305
345
|
respond(msg, task);
|
|
306
346
|
}
|
|
307
347
|
|
|
@@ -323,6 +363,7 @@ async function handleCancel(msg) {
|
|
|
323
363
|
|
|
324
364
|
log(`CANCEL ${task_id}: ${reason || 'no reason'}`);
|
|
325
365
|
publishEvent('cancelled', task);
|
|
366
|
+
await cleanupTaskCollabSession(task, `Parent task ${task_id} cancelled: ${reason || 'no reason'}`);
|
|
326
367
|
respond(msg, task);
|
|
327
368
|
}
|
|
328
369
|
|
|
@@ -358,6 +399,35 @@ async function detectStalls() {
|
|
|
358
399
|
}
|
|
359
400
|
}
|
|
360
401
|
|
|
402
|
+
// Mark stalled node as dead in any collab sessions it belongs to.
|
|
403
|
+
// This unblocks isRoundComplete() which otherwise waits forever for
|
|
404
|
+
// a reflection from a crashed node.
|
|
405
|
+
// Uses findActiveSessionsByNode() — O(sessions) single pass instead of
|
|
406
|
+
// the previous O(sessions × nodes) list-then-find pattern.
|
|
407
|
+
if (task.owner && collabStore) {
|
|
408
|
+
try {
|
|
409
|
+
const sessions = await collabStore.findActiveSessionsByNode(task.owner);
|
|
410
|
+
for (const session of sessions) {
|
|
411
|
+
const node = session.nodes.find(n => n.node_id === task.owner);
|
|
412
|
+
if (node && node.status !== 'dead') {
|
|
413
|
+
await collabStore.setNodeStatus(session.session_id, task.owner, 'dead');
|
|
414
|
+
log(`STALL → COLLAB: marked ${task.owner} as dead in session ${session.session_id}`);
|
|
415
|
+
await collabStore.appendAudit(session.session_id, 'node_marked_dead', {
|
|
416
|
+
node_id: task.owner, reason: `Stall detected: no heartbeat for ${silentMin}m`,
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
// Re-check if the round is now complete (dead nodes excluded)
|
|
420
|
+
const updated = await collabStore.get(session.session_id);
|
|
421
|
+
if (updated && collabStore.isRoundComplete(updated)) {
|
|
422
|
+
await evaluateRound(session.session_id);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
} catch (err) {
|
|
427
|
+
log(`STALL → COLLAB ERROR: ${err.message}`);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
361
431
|
const releasedTask = await store.markReleased(
|
|
362
432
|
task.task_id,
|
|
363
433
|
`Stall detected: no agent heartbeat for ${silentMin}m, alive check failed`,
|
|
@@ -526,6 +596,26 @@ async function handleCollabFind(msg) {
|
|
|
526
596
|
respond(msg, session);
|
|
527
597
|
}
|
|
528
598
|
|
|
599
|
+
/**
|
|
600
|
+
* mesh.collab.recruiting — List all sessions currently recruiting nodes.
|
|
601
|
+
* Used by agents to discover collab sessions they should join.
|
|
602
|
+
* Returns: array of { session_id, task_id, mode, min_nodes, max_nodes, current_nodes, recruiting_deadline }
|
|
603
|
+
*/
|
|
604
|
+
async function handleCollabRecruiting(msg) {
|
|
605
|
+
const recruiting = await collabStore.list({ status: COLLAB_STATUS.RECRUITING });
|
|
606
|
+
const summaries = recruiting.map(s => ({
|
|
607
|
+
session_id: s.session_id,
|
|
608
|
+
task_id: s.task_id,
|
|
609
|
+
mode: s.mode,
|
|
610
|
+
min_nodes: s.min_nodes,
|
|
611
|
+
max_nodes: s.max_nodes,
|
|
612
|
+
current_nodes: s.nodes.length,
|
|
613
|
+
node_ids: s.nodes.map(n => n.node_id || n.id),
|
|
614
|
+
recruiting_deadline: s.recruiting_deadline,
|
|
615
|
+
}));
|
|
616
|
+
respond(msg, summaries);
|
|
617
|
+
}
|
|
618
|
+
|
|
529
619
|
/**
|
|
530
620
|
* mesh.collab.reflect — Node submits a reflection for the current round.
|
|
531
621
|
* Expects: { session_id, node_id, summary, learnings, artifacts, confidence, vote }
|
|
@@ -546,8 +636,20 @@ async function handleCollabReflect(msg) {
|
|
|
546
636
|
});
|
|
547
637
|
publishCollabEvent('reflection_received', session);
|
|
548
638
|
|
|
549
|
-
//
|
|
550
|
-
if
|
|
639
|
+
// Sequential mode: advance turn, notify next node or evaluate round
|
|
640
|
+
// Parallel mode: check if all reflections are in → evaluate convergence
|
|
641
|
+
// NOTE: Node.js single-threaded event loop prevents concurrent execution of this
|
|
642
|
+
// handler — no mutex needed. advanceTurn() is safe without CAS here.
|
|
643
|
+
if (session.mode === 'sequential') {
|
|
644
|
+
const nextNodeId = await collabStore.advanceTurn(session_id);
|
|
645
|
+
if (nextNodeId) {
|
|
646
|
+
// Notify only the next-turn node with accumulated intra-round intel
|
|
647
|
+
await notifySequentialTurn(session_id, nextNodeId);
|
|
648
|
+
} else {
|
|
649
|
+
// All turns done → evaluate round
|
|
650
|
+
await evaluateRound(session_id);
|
|
651
|
+
}
|
|
652
|
+
} else if (collabStore.isRoundComplete(session)) {
|
|
551
653
|
await evaluateRound(session_id);
|
|
552
654
|
}
|
|
553
655
|
|
|
@@ -657,8 +759,14 @@ async function startCollabRound(sessionId) {
|
|
|
657
759
|
const scopeStrategy = session.scope_strategy || 'shared';
|
|
658
760
|
const nodeScopes = computeNodeScopes(session.nodes, taskScope, scopeStrategy);
|
|
659
761
|
|
|
660
|
-
//
|
|
661
|
-
|
|
762
|
+
// Sequential mode: only notify the current_turn node.
|
|
763
|
+
// Other nodes get notified via notifySequentialTurn() as turns advance.
|
|
764
|
+
// Parallel mode: notify all nodes at once.
|
|
765
|
+
const nodesToNotify = session.mode === 'sequential' && session.current_turn
|
|
766
|
+
? session.nodes.filter(n => n.node_id === session.current_turn)
|
|
767
|
+
: session.nodes;
|
|
768
|
+
|
|
769
|
+
for (const node of nodesToNotify) {
|
|
662
770
|
const effectiveScope = nodeScopes[node.node_id] || node.scope;
|
|
663
771
|
nc.publish(`mesh.collab.${sessionId}.node.${node.node_id}.round`, sc.encode(JSON.stringify({
|
|
664
772
|
session_id: sessionId,
|
|
@@ -674,6 +782,57 @@ async function startCollabRound(sessionId) {
|
|
|
674
782
|
}
|
|
675
783
|
}
|
|
676
784
|
|
|
785
|
+
/**
|
|
786
|
+
* Notify the next node in a sequential turn.
|
|
787
|
+
* Includes intra-round reflections so far as additional shared intel.
|
|
788
|
+
*/
|
|
789
|
+
async function notifySequentialTurn(sessionId, nextNodeId) {
|
|
790
|
+
const session = await collabStore.get(sessionId);
|
|
791
|
+
if (!session) return;
|
|
792
|
+
|
|
793
|
+
const currentRound = session.rounds[session.rounds.length - 1];
|
|
794
|
+
if (!currentRound) return;
|
|
795
|
+
|
|
796
|
+
// Compile intra-round intel from reflections already submitted this round
|
|
797
|
+
const intraLines = [`=== INTRA-ROUND ${currentRound.round_number} (turns so far) ===\n`];
|
|
798
|
+
for (const r of currentRound.reflections) {
|
|
799
|
+
intraLines.push(`## Turn: ${r.node_id}${r.parse_failed ? ' [PARSE FAILED]' : ''}`);
|
|
800
|
+
if (r.summary) intraLines.push(`Summary: ${r.summary}`);
|
|
801
|
+
if (r.learnings) intraLines.push(`Learnings: ${r.learnings}`);
|
|
802
|
+
if (r.artifacts.length > 0) intraLines.push(`Artifacts: ${r.artifacts.join(', ')}`);
|
|
803
|
+
intraLines.push(`Confidence: ${r.confidence} | Vote: ${r.vote}`);
|
|
804
|
+
intraLines.push('');
|
|
805
|
+
}
|
|
806
|
+
const intraRoundIntel = intraLines.join('\n');
|
|
807
|
+
const combinedIntel = currentRound.shared_intel
|
|
808
|
+
? currentRound.shared_intel + '\n\n' + intraRoundIntel
|
|
809
|
+
: intraRoundIntel;
|
|
810
|
+
|
|
811
|
+
const parentTask = await store.get(session.task_id);
|
|
812
|
+
const taskScope = parentTask?.scope || [];
|
|
813
|
+
const scopeStrategy = session.scope_strategy || 'shared';
|
|
814
|
+
const nodeScopes = computeNodeScopes(session.nodes, taskScope, scopeStrategy);
|
|
815
|
+
const nextNode = session.nodes.find(n => n.node_id === nextNodeId);
|
|
816
|
+
|
|
817
|
+
nc.publish(`mesh.collab.${sessionId}.node.${nextNodeId}.round`, sc.encode(JSON.stringify({
|
|
818
|
+
session_id: sessionId,
|
|
819
|
+
task_id: session.task_id,
|
|
820
|
+
round_number: currentRound.round_number,
|
|
821
|
+
shared_intel: combinedIntel,
|
|
822
|
+
my_scope: nodeScopes[nextNodeId] || nextNode?.scope || ['*'],
|
|
823
|
+
my_role: nextNode?.role || 'worker',
|
|
824
|
+
mode: 'sequential',
|
|
825
|
+
current_turn: nextNodeId,
|
|
826
|
+
scope_strategy: scopeStrategy,
|
|
827
|
+
})));
|
|
828
|
+
|
|
829
|
+
log(`COLLAB SEQ ${sessionId} R${currentRound.round_number}: Turn advanced to ${nextNodeId}`);
|
|
830
|
+
await collabStore.appendAudit(sessionId, 'turn_advanced', {
|
|
831
|
+
round: currentRound.round_number, next_node: nextNodeId,
|
|
832
|
+
reflections_so_far: currentRound.reflections.length,
|
|
833
|
+
});
|
|
834
|
+
}
|
|
835
|
+
|
|
677
836
|
/**
|
|
678
837
|
* Evaluate the current round: check convergence, advance or complete.
|
|
679
838
|
*/
|
|
@@ -702,10 +861,11 @@ async function evaluateRound(sessionId) {
|
|
|
702
861
|
await collabStore.markConverged(sessionId);
|
|
703
862
|
publishCollabEvent('converged', session);
|
|
704
863
|
|
|
705
|
-
//
|
|
864
|
+
// Re-fetch after markConverged to ensure fresh state
|
|
865
|
+
const freshSession = await collabStore.get(sessionId);
|
|
706
866
|
const allArtifacts = [];
|
|
707
867
|
const contributions = {};
|
|
708
|
-
for (const round of
|
|
868
|
+
for (const round of freshSession.rounds) {
|
|
709
869
|
for (const r of round.reflections) {
|
|
710
870
|
allArtifacts.push(...r.artifacts);
|
|
711
871
|
contributions[r.node_id] = r.summary;
|
|
@@ -714,20 +874,20 @@ async function evaluateRound(sessionId) {
|
|
|
714
874
|
|
|
715
875
|
await collabStore.markCompleted(sessionId, {
|
|
716
876
|
artifacts: [...new Set(allArtifacts)],
|
|
717
|
-
summary: `Converged after ${
|
|
877
|
+
summary: `Converged after ${freshSession.current_round} rounds with ${freshSession.nodes.length} nodes`,
|
|
718
878
|
node_contributions: contributions,
|
|
719
879
|
});
|
|
720
880
|
await collabStore.appendAudit(sessionId, 'session_completed', {
|
|
721
|
-
outcome: 'converged', rounds:
|
|
881
|
+
outcome: 'converged', rounds: freshSession.current_round,
|
|
722
882
|
artifacts: [...new Set(allArtifacts)].length,
|
|
723
|
-
node_count:
|
|
883
|
+
node_count: freshSession.nodes.length, recruited_count: freshSession.recruited_count,
|
|
724
884
|
});
|
|
725
885
|
|
|
726
886
|
// Complete the parent task
|
|
727
|
-
const
|
|
728
|
-
await store.markCompleted(
|
|
729
|
-
publishEvent('completed', await store.get(
|
|
730
|
-
publishCollabEvent('completed',
|
|
887
|
+
const completedSession = await collabStore.get(sessionId);
|
|
888
|
+
await store.markCompleted(freshSession.task_id, completedSession.result);
|
|
889
|
+
publishEvent('completed', await store.get(freshSession.task_id));
|
|
890
|
+
publishCollabEvent('completed', completedSession);
|
|
731
891
|
|
|
732
892
|
} else if (maxReached) {
|
|
733
893
|
log(`COLLAB MAX ROUNDS ${sessionId}: ${session.current_round}/${session.max_rounds}. Completing with current artifacts.`);
|
|
@@ -955,6 +1115,19 @@ async function advancePlanWave(planId) {
|
|
|
955
1115
|
const waveNum = ready[0].wave;
|
|
956
1116
|
log(`PLAN WAVE ${planId} W${waveNum}: dispatching ${ready.length} subtasks`);
|
|
957
1117
|
|
|
1118
|
+
// Inherit routing fields from parent task so subtasks use the same LLM/node preferences.
|
|
1119
|
+
// CONSTRAINT: Subtasks cannot override routing independently — they always inherit from the
|
|
1120
|
+
// parent task. If per-subtask routing is needed, extend the subtask schema in mesh-plans.js
|
|
1121
|
+
// (e.g. subtask.llm_provider) and merge here with subtask fields taking priority.
|
|
1122
|
+
const parentTask = await store.get(plan.parent_task_id);
|
|
1123
|
+
const inheritedRouting = {};
|
|
1124
|
+
if (parentTask) {
|
|
1125
|
+
if (parentTask.llm_provider) inheritedRouting.llm_provider = parentTask.llm_provider;
|
|
1126
|
+
if (parentTask.llm_model) inheritedRouting.llm_model = parentTask.llm_model;
|
|
1127
|
+
if (parentTask.preferred_nodes) inheritedRouting.preferred_nodes = parentTask.preferred_nodes;
|
|
1128
|
+
if (parentTask.exclude_nodes) inheritedRouting.exclude_nodes = parentTask.exclude_nodes;
|
|
1129
|
+
}
|
|
1130
|
+
|
|
958
1131
|
for (const st of ready) {
|
|
959
1132
|
st.status = SUBTASK_STATUS.QUEUED;
|
|
960
1133
|
|
|
@@ -962,7 +1135,7 @@ async function advancePlanWave(planId) {
|
|
|
962
1135
|
switch (st.delegation.mode) {
|
|
963
1136
|
case 'solo_mesh':
|
|
964
1137
|
case 'collab_mesh': {
|
|
965
|
-
// Submit as mesh task
|
|
1138
|
+
// Submit as mesh task — inherit routing fields from parent task
|
|
966
1139
|
const meshTask = createTask({
|
|
967
1140
|
task_id: st.subtask_id,
|
|
968
1141
|
title: st.title,
|
|
@@ -973,6 +1146,7 @@ async function advancePlanWave(planId) {
|
|
|
973
1146
|
success_criteria: st.success_criteria,
|
|
974
1147
|
tags: ['plan', planId],
|
|
975
1148
|
collaboration: st.delegation.collaboration || undefined,
|
|
1149
|
+
...inheritedRouting,
|
|
976
1150
|
});
|
|
977
1151
|
await store.put(meshTask);
|
|
978
1152
|
st.mesh_task_id = meshTask.task_id;
|
|
@@ -1098,6 +1272,7 @@ async function main() {
|
|
|
1098
1272
|
'mesh.collab.status': handleCollabStatus,
|
|
1099
1273
|
'mesh.collab.find': handleCollabFind,
|
|
1100
1274
|
'mesh.collab.reflect': handleCollabReflect,
|
|
1275
|
+
'mesh.collab.recruiting': handleCollabRecruiting,
|
|
1101
1276
|
// Plan handlers
|
|
1102
1277
|
'mesh.plans.create': handlePlanCreate,
|
|
1103
1278
|
'mesh.plans.get': handlePlanGet,
|
|
@@ -1116,7 +1291,7 @@ async function main() {
|
|
|
1116
1291
|
try {
|
|
1117
1292
|
await handler(msg);
|
|
1118
1293
|
} catch (err) {
|
|
1119
|
-
log(`ERROR handling ${subject}: ${err.message}`);
|
|
1294
|
+
log(`ERROR handling ${subject}: ${err.message}\n${err.stack}`);
|
|
1120
1295
|
try { respondError(msg, err.message); } catch {}
|
|
1121
1296
|
}
|
|
1122
1297
|
}
|
package/bin/mesh.js
CHANGED
|
@@ -397,6 +397,10 @@ async function cmdSubmit(args) {
|
|
|
397
397
|
success_criteria: task.success_criteria || [],
|
|
398
398
|
scope: task.scope || [],
|
|
399
399
|
priority: task.auto_priority || 0,
|
|
400
|
+
llm_provider: task.provider || task.llm_provider || null,
|
|
401
|
+
llm_model: task.model || task.llm_model || null,
|
|
402
|
+
preferred_nodes: task.preferred_nodes || [],
|
|
403
|
+
exclude_nodes: task.exclude_nodes || [],
|
|
400
404
|
});
|
|
401
405
|
console.log(`Submitted: ${result.data.task_id} [${result.data.status}]`);
|
|
402
406
|
// Mark as 'submitted' — NOT 'running'. The card reflects actual mesh state.
|
|
@@ -451,12 +455,18 @@ async function cmdSubmit(args) {
|
|
|
451
455
|
scope: task.scope || [],
|
|
452
456
|
priority: task.priority || 0,
|
|
453
457
|
tags: task.tags || [],
|
|
458
|
+
llm_provider: task.provider || task.llm_provider || null,
|
|
459
|
+
llm_model: task.model || task.llm_model || null,
|
|
460
|
+
preferred_nodes: task.preferred_nodes || [],
|
|
461
|
+
exclude_nodes: task.exclude_nodes || [],
|
|
462
|
+
collaboration: task.collaboration || undefined,
|
|
454
463
|
});
|
|
455
464
|
|
|
456
465
|
console.log(`Submitted: ${result.data.task_id} "${result.data.title}"`);
|
|
457
466
|
console.log(` Status: ${result.data.status}`);
|
|
458
467
|
console.log(` Budget: ${result.data.budget_minutes}m`);
|
|
459
468
|
console.log(` Metric: ${result.data.metric || 'none'}`);
|
|
469
|
+
if (result.data.llm_provider) console.log(` Provider: ${result.data.llm_provider}`);
|
|
460
470
|
await nc.close();
|
|
461
471
|
}
|
|
462
472
|
|
|
@@ -598,7 +608,11 @@ async function cmdRepair(args) {
|
|
|
598
608
|
*/
|
|
599
609
|
async function cmdDeploy(args) {
|
|
600
610
|
const { execSync } = require('child_process');
|
|
601
|
-
|
|
611
|
+
// Prefer openclaw-node (git repo) over openclaw (runtime)
|
|
612
|
+
const defaultRepo = fs.existsSync(path.join(os.homedir(), 'openclaw-node', '.git'))
|
|
613
|
+
? path.join(os.homedir(), 'openclaw-node')
|
|
614
|
+
: path.join(os.homedir(), 'openclaw');
|
|
615
|
+
const repoDir = process.env.OPENCLAW_REPO_DIR || defaultRepo;
|
|
602
616
|
const force = args.includes('--force');
|
|
603
617
|
|
|
604
618
|
// Parse --component flags
|
|
@@ -658,7 +672,7 @@ async function cmdDeploy(args) {
|
|
|
658
672
|
await nc.flush();
|
|
659
673
|
console.log('Deploy trigger sent.\n');
|
|
660
674
|
|
|
661
|
-
// Poll for results (
|
|
675
|
+
// Poll for results (10s timeout)
|
|
662
676
|
console.log('Waiting for node responses...');
|
|
663
677
|
const deadline = Date.now() + 15000;
|
|
664
678
|
const seen = new Set();
|
|
@@ -668,6 +682,7 @@ async function cmdDeploy(args) {
|
|
|
668
682
|
const resultsKv = await js.views.kv('MESH_DEPLOY_RESULTS');
|
|
669
683
|
|
|
670
684
|
while (Date.now() < deadline) {
|
|
685
|
+
// Check all nodes
|
|
671
686
|
const allAliasNodes = [...new Set(Object.values(NODE_ALIASES))];
|
|
672
687
|
const checkNodes = targetNodes.length > 0 ? targetNodes : allAliasNodes;
|
|
673
688
|
|
|
@@ -731,9 +746,9 @@ function cmdHelp() {
|
|
|
731
746
|
' mesh repair Self-repair this node',
|
|
732
747
|
' mesh repair --all Self-repair ALL nodes',
|
|
733
748
|
' mesh deploy Deploy to all nodes',
|
|
734
|
-
' mesh deploy --force Force deploy (
|
|
735
|
-
' mesh deploy --
|
|
736
|
-
' mesh deploy --
|
|
749
|
+
' mesh deploy --force Force deploy (skip cache)',
|
|
750
|
+
' mesh deploy --node ubuntu Deploy to specific node',
|
|
751
|
+
' mesh deploy --component mesh-daemons Deploy specific component',
|
|
737
752
|
'',
|
|
738
753
|
'NODE ALIASES:',
|
|
739
754
|
' ubuntu, linux = Ubuntu VM (calos-vmware-virtual-platform)',
|
package/install.sh
CHANGED
|
@@ -230,6 +230,10 @@ if [ -z "$NODE_ROLE" ]; then
|
|
|
230
230
|
NODE_ROLE="worker"
|
|
231
231
|
fi
|
|
232
232
|
fi
|
|
233
|
+
if [ "$NODE_ROLE" != "lead" ] && [ "$NODE_ROLE" != "worker" ]; then
|
|
234
|
+
error "Invalid role: $NODE_ROLE (must be 'lead' or 'worker')"
|
|
235
|
+
exit 1
|
|
236
|
+
fi
|
|
233
237
|
export OPENCLAW_NODE_ROLE="$NODE_ROLE"
|
|
234
238
|
info "Node role: $NODE_ROLE"
|
|
235
239
|
|
|
@@ -692,6 +696,9 @@ else
|
|
|
692
696
|
if command -v envsubst >/dev/null 2>&1; then
|
|
693
697
|
envsubst < "$TEMPLATE" > "$DEST"
|
|
694
698
|
else
|
|
699
|
+
# NOTE: sed delimiter is |. If OPENCLAW_NATS_TOKEN ever contains |
|
|
700
|
+
# (unlikely — tokens are hex/base64), this substitution will break.
|
|
701
|
+
# Prefer envsubst (above) when available; it has no delimiter issue.
|
|
695
702
|
sed \
|
|
696
703
|
-e "s|\${HOME}|$HOME|g" \
|
|
697
704
|
-e "s|\${NODE_BIN}|$NODE_BIN|g" \
|
package/lib/kanban-io.js
CHANGED
|
@@ -53,8 +53,14 @@ function withMkdirLock(filePath, fn) {
|
|
|
53
53
|
if (Date.now() - start > maxWait) {
|
|
54
54
|
throw new Error(`kanban-io: lock timeout after ${maxWait}ms on ${filePath}`);
|
|
55
55
|
}
|
|
56
|
-
//
|
|
57
|
-
|
|
56
|
+
// Sleep ~10ms — Atomics.wait is precise but throws on main thread
|
|
57
|
+
// in some Node.js builds; fall back to busy-spin (rare contention path)
|
|
58
|
+
try {
|
|
59
|
+
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 10);
|
|
60
|
+
} catch {
|
|
61
|
+
const end = Date.now() + 10;
|
|
62
|
+
while (Date.now() < end) { /* busy-wait fallback */ }
|
|
63
|
+
}
|
|
58
64
|
}
|
|
59
65
|
}
|
|
60
66
|
|
|
@@ -81,9 +87,7 @@ function parseTasks(content) {
|
|
|
81
87
|
const liveIdx = content.indexOf('## Live Tasks');
|
|
82
88
|
if (liveIdx === -1) return tasks;
|
|
83
89
|
|
|
84
|
-
const
|
|
85
|
-
const nextSectionIdx = afterLive.indexOf('\n## ', 1); // skip the current ## Live Tasks
|
|
86
|
-
const liveSection = nextSectionIdx >= 0 ? afterLive.slice(0, nextSectionIdx) : afterLive;
|
|
90
|
+
const liveSection = content.slice(liveIdx);
|
|
87
91
|
const lines = liveSection.split('\n');
|
|
88
92
|
|
|
89
93
|
let current = null;
|
|
@@ -110,6 +114,13 @@ function parseTasks(content) {
|
|
|
110
114
|
budget_minutes: current.budget_minutes || 30,
|
|
111
115
|
scope: current.scope || [],
|
|
112
116
|
updated_at: current.updated_at || '',
|
|
117
|
+
// Mesh routing
|
|
118
|
+
llm_provider: current.llm_provider || null,
|
|
119
|
+
llm_model: current.llm_model || null,
|
|
120
|
+
preferred_nodes: current.preferred_nodes || [],
|
|
121
|
+
exclude_nodes: current.exclude_nodes || [],
|
|
122
|
+
collaboration: current.collaboration || null,
|
|
123
|
+
collab_result: current.collab_result || null,
|
|
113
124
|
});
|
|
114
125
|
}
|
|
115
126
|
}
|
|
@@ -119,7 +130,7 @@ function parseTasks(content) {
|
|
|
119
130
|
const taskIdMatch = line.match(/^- task_id:\s*(.+)$/);
|
|
120
131
|
if (taskIdMatch) {
|
|
121
132
|
flush();
|
|
122
|
-
current = { task_id: taskIdMatch[1].trim(), success_criteria: [], artifacts: [], scope: [] };
|
|
133
|
+
current = { task_id: taskIdMatch[1].trim(), success_criteria: [], artifacts: [], scope: [], preferred_nodes: [], exclude_nodes: [] };
|
|
123
134
|
currentArrayKey = null;
|
|
124
135
|
continue;
|
|
125
136
|
}
|
|
@@ -176,6 +187,31 @@ function parseTasks(content) {
|
|
|
176
187
|
current.scope = [];
|
|
177
188
|
currentArrayKey = 'scope';
|
|
178
189
|
break;
|
|
190
|
+
// Mesh routing fields
|
|
191
|
+
case 'llm_provider':
|
|
192
|
+
case 'provider':
|
|
193
|
+
current.llm_provider = value || null; currentArrayKey = null; break;
|
|
194
|
+
case 'llm_model':
|
|
195
|
+
case 'model':
|
|
196
|
+
current.llm_model = value || null; currentArrayKey = null; break;
|
|
197
|
+
case 'preferred_nodes':
|
|
198
|
+
current.preferred_nodes = [];
|
|
199
|
+
currentArrayKey = 'preferred_nodes';
|
|
200
|
+
break;
|
|
201
|
+
case 'exclude_nodes':
|
|
202
|
+
current.exclude_nodes = [];
|
|
203
|
+
currentArrayKey = 'exclude_nodes';
|
|
204
|
+
break;
|
|
205
|
+
case 'collaboration':
|
|
206
|
+
try { current.collaboration = value ? JSON.parse(value) : null; }
|
|
207
|
+
catch { current.collaboration = null; }
|
|
208
|
+
currentArrayKey = null;
|
|
209
|
+
break;
|
|
210
|
+
case 'collab_result':
|
|
211
|
+
try { current.collab_result = value ? JSON.parse(value) : null; }
|
|
212
|
+
catch { current.collab_result = null; }
|
|
213
|
+
currentArrayKey = null;
|
|
214
|
+
break;
|
|
179
215
|
default:
|
|
180
216
|
currentArrayKey = null;
|
|
181
217
|
break;
|
|
@@ -236,7 +272,11 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
|
|
|
236
272
|
const blockLines = lines.slice(blockStart, blockEnd);
|
|
237
273
|
|
|
238
274
|
// Update scalar fields
|
|
239
|
-
for (const [key,
|
|
275
|
+
for (const [key, rawValue] of Object.entries(fieldUpdates)) {
|
|
276
|
+
// Serialize objects/arrays as JSON so the parser can read them back
|
|
277
|
+
const value = (rawValue !== null && typeof rawValue === 'object')
|
|
278
|
+
? JSON.stringify(rawValue)
|
|
279
|
+
: rawValue;
|
|
240
280
|
const fieldRegex = new RegExp(`^ ${key}:\\s*.*$`);
|
|
241
281
|
let found = false;
|
|
242
282
|
for (let i = 1; i < blockLines.length; i++) {
|
|
@@ -249,7 +289,7 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
|
|
|
249
289
|
if (!found) {
|
|
250
290
|
// Insert before updated_at if it exists, otherwise at end of block
|
|
251
291
|
const updatedAtIdx = blockLines.findIndex(l => l.match(/^ updated_at:/));
|
|
252
|
-
const insertIdx = updatedAtIdx
|
|
292
|
+
const insertIdx = updatedAtIdx > 0 ? updatedAtIdx : blockLines.length;
|
|
253
293
|
blockLines.splice(insertIdx, 0, ` ${key}: ${value}`);
|
|
254
294
|
}
|
|
255
295
|
}
|
|
@@ -262,7 +302,7 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
|
|
|
262
302
|
if (headerIdx === -1) {
|
|
263
303
|
// Insert the array before updated_at
|
|
264
304
|
const updatedAtIdx = blockLines.findIndex(l => l.match(/^ updated_at:/));
|
|
265
|
-
const insertIdx = updatedAtIdx
|
|
305
|
+
const insertIdx = updatedAtIdx > 0 ? updatedAtIdx : blockLines.length;
|
|
266
306
|
const newLines = [` ${key}:`];
|
|
267
307
|
for (const item of items) {
|
|
268
308
|
newLines.push(` - ${item}`);
|
|
@@ -290,7 +330,7 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
|
|
|
290
330
|
if (headerIdx === -1) {
|
|
291
331
|
// Insert the array before updated_at
|
|
292
332
|
const updatedAtIdx = blockLines.findIndex(l => l.match(/^ updated_at:/));
|
|
293
|
-
const insertIdx = updatedAtIdx
|
|
333
|
+
const insertIdx = updatedAtIdx > 0 ? updatedAtIdx : blockLines.length;
|
|
294
334
|
const newLines = [` ${key}:`];
|
|
295
335
|
for (const item of items) {
|
|
296
336
|
newLines.push(` - ${item}`);
|
package/lib/mesh-collab.js
CHANGED
|
@@ -105,6 +105,10 @@ function createSession(taskId, collabSpec) {
|
|
|
105
105
|
|
|
106
106
|
// ── CollabStore (KV-backed) ─────────────────────────
|
|
107
107
|
|
|
108
|
+
// Rate-limit audit error logs: max 3 per session, then go silent
|
|
109
|
+
const _auditErrorCounts = new Map();
|
|
110
|
+
const AUDIT_ERROR_LOG_LIMIT = 3;
|
|
111
|
+
|
|
108
112
|
class CollabStore {
|
|
109
113
|
constructor(kv) {
|
|
110
114
|
this.kv = kv;
|
|
@@ -139,7 +143,14 @@ class CollabStore {
|
|
|
139
143
|
...detail,
|
|
140
144
|
});
|
|
141
145
|
await this.put(session);
|
|
142
|
-
} catch {
|
|
146
|
+
} catch (err) {
|
|
147
|
+
// Best-effort — never block on audit, but log first N failures per session
|
|
148
|
+
const count = (_auditErrorCounts.get(sessionId) || 0) + 1;
|
|
149
|
+
_auditErrorCounts.set(sessionId, count);
|
|
150
|
+
if (count <= AUDIT_ERROR_LOG_LIMIT) {
|
|
151
|
+
console.error(`[collab] audit append failed for ${sessionId}/${event}: ${err.message}${count === AUDIT_ERROR_LOG_LIMIT ? ' (suppressing further audit errors for this session)' : ''}`);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
143
154
|
}
|
|
144
155
|
|
|
145
156
|
/**
|
|
@@ -176,6 +187,30 @@ class CollabStore {
|
|
|
176
187
|
return sessions[0] || null;
|
|
177
188
|
}
|
|
178
189
|
|
|
190
|
+
/**
|
|
191
|
+
* Find active sessions that contain a given node.
|
|
192
|
+
* O(sessions) single pass — avoids the O(sessions × nodes) scan
|
|
193
|
+
* that detectStalls() previously used with list() + inner find().
|
|
194
|
+
*/
|
|
195
|
+
async findActiveSessionsByNode(nodeId) {
|
|
196
|
+
const results = [];
|
|
197
|
+
const allKeys = [];
|
|
198
|
+
const keys = await this.kv.keys();
|
|
199
|
+
for await (const key of keys) {
|
|
200
|
+
allKeys.push(key);
|
|
201
|
+
}
|
|
202
|
+
for (const key of allKeys) {
|
|
203
|
+
const entry = await this.kv.get(key);
|
|
204
|
+
if (!entry || !entry.value) continue;
|
|
205
|
+
const session = JSON.parse(sc.decode(entry.value));
|
|
206
|
+
if (session.status !== COLLAB_STATUS.ACTIVE) continue;
|
|
207
|
+
if (session.nodes.some(n => n.node_id === nodeId)) {
|
|
208
|
+
results.push(session);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return results;
|
|
212
|
+
}
|
|
213
|
+
|
|
179
214
|
// ── Node Management ────────────────────────────────
|
|
180
215
|
|
|
181
216
|
/**
|
|
@@ -190,7 +225,8 @@ class CollabStore {
|
|
|
190
225
|
// Check max_nodes
|
|
191
226
|
if (session.max_nodes && session.nodes.length >= session.max_nodes) return null;
|
|
192
227
|
|
|
193
|
-
// Check duplicate
|
|
228
|
+
// Check duplicate — single-threaded event loop prevents concurrent joins
|
|
229
|
+
// from interleaving between find() and push(). No mutex needed.
|
|
194
230
|
if (session.nodes.find(n => n.node_id === nodeId)) return null;
|
|
195
231
|
|
|
196
232
|
session.nodes.push({
|
|
@@ -320,6 +356,9 @@ class CollabStore {
|
|
|
320
356
|
const session = await this.get(sessionId);
|
|
321
357
|
if (!session) return null;
|
|
322
358
|
|
|
359
|
+
// Only accept reflections on active sessions
|
|
360
|
+
if (session.status !== COLLAB_STATUS.ACTIVE) return null;
|
|
361
|
+
|
|
323
362
|
const currentRound = session.rounds[session.rounds.length - 1];
|
|
324
363
|
if (!currentRound) return null;
|
|
325
364
|
|
|
@@ -506,11 +545,14 @@ class CollabStore {
|
|
|
506
545
|
}
|
|
507
546
|
|
|
508
547
|
/**
|
|
509
|
-
* Mark session as aborted.
|
|
548
|
+
* Mark session as aborted. Returns null (no-op) if already completed or aborted.
|
|
549
|
+
* Callers can use truthiness to detect whether the abort actually happened.
|
|
510
550
|
*/
|
|
511
551
|
async markAborted(sessionId, reason) {
|
|
512
552
|
const session = await this.get(sessionId);
|
|
513
553
|
if (!session) return null;
|
|
554
|
+
// Guard: don't corrupt completed/aborted sessions
|
|
555
|
+
if (['completed', 'aborted'].includes(session.status)) return null;
|
|
514
556
|
session.status = COLLAB_STATUS.ABORTED;
|
|
515
557
|
session.completed_at = new Date().toISOString();
|
|
516
558
|
session.result = { success: false, summary: reason, aborted: true };
|
|
@@ -518,6 +560,14 @@ class CollabStore {
|
|
|
518
560
|
return session;
|
|
519
561
|
}
|
|
520
562
|
|
|
563
|
+
/**
|
|
564
|
+
* Clear the audit error rate-limit counter for a session.
|
|
565
|
+
* Call when a session is finalized (completed/aborted) to prevent Map leak.
|
|
566
|
+
*/
|
|
567
|
+
clearAuditErrorCount(sessionId) {
|
|
568
|
+
_auditErrorCounts.delete(sessionId);
|
|
569
|
+
}
|
|
570
|
+
|
|
521
571
|
/**
|
|
522
572
|
* Get a summary of the session for reporting.
|
|
523
573
|
*/
|