openclaw-node-harness 2.0.2 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fleet-deploy.js +1 -1
- package/bin/mesh-agent.js +217 -84
- package/bin/mesh-bridge.js +274 -10
- package/bin/mesh-deploy-listener.js +120 -98
- package/bin/mesh-deploy.js +11 -3
- package/bin/mesh-health-publisher.js +1 -1
- package/bin/mesh-task-daemon.js +190 -15
- package/bin/mesh.js +170 -22
- package/bin/openclaw-node-init.js +147 -3
- package/install.sh +7 -0
- package/lib/kanban-io.js +50 -10
- package/lib/mesh-collab.js +53 -3
- package/lib/mesh-registry.js +11 -2
- package/lib/mesh-tasks.js +6 -7
- package/package.json +1 -1
package/bin/mesh-task-daemon.js
CHANGED
|
@@ -131,6 +131,36 @@ async function handleSubmit(msg) {
|
|
|
131
131
|
respond(msg, task);
|
|
132
132
|
}
|
|
133
133
|
|
|
134
|
+
/**
|
|
135
|
+
* Abort any collab session tied to a task that is being terminated.
|
|
136
|
+
* Shared by handleFail, handleRelease, handleCancel.
|
|
137
|
+
*
|
|
138
|
+
* NOT called from handleComplete — that path goes through evaluateRound
|
|
139
|
+
* which already calls collabStore.markCompleted() on the session.
|
|
140
|
+
*
|
|
141
|
+
* markAborted() is idempotent: no-op if session is already completed/aborted.
|
|
142
|
+
* This makes double-abort safe (e.g. stall detection → release race).
|
|
143
|
+
*/
|
|
144
|
+
async function cleanupTaskCollabSession(task, reason) {
|
|
145
|
+
if (!task.collab_session_id || !collabStore) return;
|
|
146
|
+
try {
|
|
147
|
+
// markAborted returns null if session doesn't exist or is already completed/aborted.
|
|
148
|
+
// Non-null means we actually transitioned the session to aborted.
|
|
149
|
+
const session = await collabStore.markAborted(task.collab_session_id, reason);
|
|
150
|
+
if (session) {
|
|
151
|
+
await collabStore.appendAudit(task.collab_session_id, 'session_aborted', { reason });
|
|
152
|
+
publishCollabEvent('aborted', session);
|
|
153
|
+
log(`COLLAB ABORTED ${task.collab_session_id}: ${reason}`);
|
|
154
|
+
}
|
|
155
|
+
// Clean up audit error rate-limit counter
|
|
156
|
+
// NOTE: sessions expiring via KV TTL bypass this — residual Map entry is negligible
|
|
157
|
+
// for a homelab mesh but worth noting.
|
|
158
|
+
collabStore.clearAuditErrorCount(task.collab_session_id);
|
|
159
|
+
} catch (err) {
|
|
160
|
+
log(`COLLAB CLEANUP WARN: could not abort session ${task.collab_session_id}: ${err.message}`);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
134
164
|
/**
|
|
135
165
|
* mesh.tasks.claim — Agent requests the next available task.
|
|
136
166
|
* Expects: { node_id }
|
|
@@ -203,6 +233,14 @@ async function handleComplete(msg) {
|
|
|
203
233
|
log(`COMPLETE ${task_id} in ${elapsed}m: ${result?.summary || 'no summary'}`);
|
|
204
234
|
publishEvent('completed', task);
|
|
205
235
|
|
|
236
|
+
// NOTE: no cleanupTaskCollabSession here — collab tasks complete via
|
|
237
|
+
// evaluateRound → markCompleted on the session, then store.markCompleted
|
|
238
|
+
// on the parent task. Calling cleanupTaskCollabSession would markAborted
|
|
239
|
+
// on an already-completed session. Clean up audit counter only.
|
|
240
|
+
if (task.collab_session_id && collabStore) {
|
|
241
|
+
collabStore.clearAuditErrorCount(task.collab_session_id);
|
|
242
|
+
}
|
|
243
|
+
|
|
206
244
|
// Check if this task belongs to a plan
|
|
207
245
|
await checkPlanProgress(task_id, 'completed');
|
|
208
246
|
|
|
@@ -222,6 +260,7 @@ async function handleFail(msg) {
|
|
|
222
260
|
|
|
223
261
|
log(`FAIL ${task_id}: ${reason}`);
|
|
224
262
|
publishEvent('failed', task);
|
|
263
|
+
await cleanupTaskCollabSession(task, `Parent task ${task_id} failed: ${reason}`);
|
|
225
264
|
|
|
226
265
|
// Check if this task belongs to a plan
|
|
227
266
|
await checkPlanProgress(task_id, 'failed');
|
|
@@ -302,6 +341,7 @@ async function handleRelease(msg) {
|
|
|
302
341
|
|
|
303
342
|
log(`RELEASED ${task_id}: ${reason || 'no reason'} (needs human triage)`);
|
|
304
343
|
publishEvent('released', task);
|
|
344
|
+
await cleanupTaskCollabSession(task, `Parent task ${task_id} released: ${reason || 'human triage'}`);
|
|
305
345
|
respond(msg, task);
|
|
306
346
|
}
|
|
307
347
|
|
|
@@ -323,6 +363,7 @@ async function handleCancel(msg) {
|
|
|
323
363
|
|
|
324
364
|
log(`CANCEL ${task_id}: ${reason || 'no reason'}`);
|
|
325
365
|
publishEvent('cancelled', task);
|
|
366
|
+
await cleanupTaskCollabSession(task, `Parent task ${task_id} cancelled: ${reason || 'no reason'}`);
|
|
326
367
|
respond(msg, task);
|
|
327
368
|
}
|
|
328
369
|
|
|
@@ -358,6 +399,35 @@ async function detectStalls() {
|
|
|
358
399
|
}
|
|
359
400
|
}
|
|
360
401
|
|
|
402
|
+
// Mark stalled node as dead in any collab sessions it belongs to.
|
|
403
|
+
// This unblocks isRoundComplete() which otherwise waits forever for
|
|
404
|
+
// a reflection from a crashed node.
|
|
405
|
+
// Uses findActiveSessionsByNode() — O(sessions) single pass instead of
|
|
406
|
+
// the previous O(sessions × nodes) list-then-find pattern.
|
|
407
|
+
if (task.owner && collabStore) {
|
|
408
|
+
try {
|
|
409
|
+
const sessions = await collabStore.findActiveSessionsByNode(task.owner);
|
|
410
|
+
for (const session of sessions) {
|
|
411
|
+
const node = session.nodes.find(n => n.node_id === task.owner);
|
|
412
|
+
if (node && node.status !== 'dead') {
|
|
413
|
+
await collabStore.setNodeStatus(session.session_id, task.owner, 'dead');
|
|
414
|
+
log(`STALL → COLLAB: marked ${task.owner} as dead in session ${session.session_id}`);
|
|
415
|
+
await collabStore.appendAudit(session.session_id, 'node_marked_dead', {
|
|
416
|
+
node_id: task.owner, reason: `Stall detected: no heartbeat for ${silentMin}m`,
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
// Re-check if the round is now complete (dead nodes excluded)
|
|
420
|
+
const updated = await collabStore.get(session.session_id);
|
|
421
|
+
if (updated && collabStore.isRoundComplete(updated)) {
|
|
422
|
+
await evaluateRound(session.session_id);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
} catch (err) {
|
|
427
|
+
log(`STALL → COLLAB ERROR: ${err.message}`);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
361
431
|
const releasedTask = await store.markReleased(
|
|
362
432
|
task.task_id,
|
|
363
433
|
`Stall detected: no agent heartbeat for ${silentMin}m, alive check failed`,
|
|
@@ -526,6 +596,26 @@ async function handleCollabFind(msg) {
|
|
|
526
596
|
respond(msg, session);
|
|
527
597
|
}
|
|
528
598
|
|
|
599
|
+
/**
|
|
600
|
+
* mesh.collab.recruiting — List all sessions currently recruiting nodes.
|
|
601
|
+
* Used by agents to discover collab sessions they should join.
|
|
602
|
+
* Returns: array of { session_id, task_id, mode, min_nodes, max_nodes, current_nodes, recruiting_deadline }
|
|
603
|
+
*/
|
|
604
|
+
async function handleCollabRecruiting(msg) {
|
|
605
|
+
const recruiting = await collabStore.list({ status: COLLAB_STATUS.RECRUITING });
|
|
606
|
+
const summaries = recruiting.map(s => ({
|
|
607
|
+
session_id: s.session_id,
|
|
608
|
+
task_id: s.task_id,
|
|
609
|
+
mode: s.mode,
|
|
610
|
+
min_nodes: s.min_nodes,
|
|
611
|
+
max_nodes: s.max_nodes,
|
|
612
|
+
current_nodes: s.nodes.length,
|
|
613
|
+
node_ids: s.nodes.map(n => n.node_id || n.id),
|
|
614
|
+
recruiting_deadline: s.recruiting_deadline,
|
|
615
|
+
}));
|
|
616
|
+
respond(msg, summaries);
|
|
617
|
+
}
|
|
618
|
+
|
|
529
619
|
/**
|
|
530
620
|
* mesh.collab.reflect — Node submits a reflection for the current round.
|
|
531
621
|
* Expects: { session_id, node_id, summary, learnings, artifacts, confidence, vote }
|
|
@@ -546,8 +636,20 @@ async function handleCollabReflect(msg) {
|
|
|
546
636
|
});
|
|
547
637
|
publishCollabEvent('reflection_received', session);
|
|
548
638
|
|
|
549
|
-
//
|
|
550
|
-
if
|
|
639
|
+
// Sequential mode: advance turn, notify next node or evaluate round
|
|
640
|
+
// Parallel mode: check if all reflections are in → evaluate convergence
|
|
641
|
+
// NOTE: Node.js single-threaded event loop prevents concurrent execution of this
|
|
642
|
+
// handler — no mutex needed. advanceTurn() is safe without CAS here.
|
|
643
|
+
if (session.mode === 'sequential') {
|
|
644
|
+
const nextNodeId = await collabStore.advanceTurn(session_id);
|
|
645
|
+
if (nextNodeId) {
|
|
646
|
+
// Notify only the next-turn node with accumulated intra-round intel
|
|
647
|
+
await notifySequentialTurn(session_id, nextNodeId);
|
|
648
|
+
} else {
|
|
649
|
+
// All turns done → evaluate round
|
|
650
|
+
await evaluateRound(session_id);
|
|
651
|
+
}
|
|
652
|
+
} else if (collabStore.isRoundComplete(session)) {
|
|
551
653
|
await evaluateRound(session_id);
|
|
552
654
|
}
|
|
553
655
|
|
|
@@ -657,8 +759,14 @@ async function startCollabRound(sessionId) {
|
|
|
657
759
|
const scopeStrategy = session.scope_strategy || 'shared';
|
|
658
760
|
const nodeScopes = computeNodeScopes(session.nodes, taskScope, scopeStrategy);
|
|
659
761
|
|
|
660
|
-
//
|
|
661
|
-
|
|
762
|
+
// Sequential mode: only notify the current_turn node.
|
|
763
|
+
// Other nodes get notified via notifySequentialTurn() as turns advance.
|
|
764
|
+
// Parallel mode: notify all nodes at once.
|
|
765
|
+
const nodesToNotify = session.mode === 'sequential' && session.current_turn
|
|
766
|
+
? session.nodes.filter(n => n.node_id === session.current_turn)
|
|
767
|
+
: session.nodes;
|
|
768
|
+
|
|
769
|
+
for (const node of nodesToNotify) {
|
|
662
770
|
const effectiveScope = nodeScopes[node.node_id] || node.scope;
|
|
663
771
|
nc.publish(`mesh.collab.${sessionId}.node.${node.node_id}.round`, sc.encode(JSON.stringify({
|
|
664
772
|
session_id: sessionId,
|
|
@@ -674,6 +782,57 @@ async function startCollabRound(sessionId) {
|
|
|
674
782
|
}
|
|
675
783
|
}
|
|
676
784
|
|
|
785
|
+
/**
|
|
786
|
+
* Notify the next node in a sequential turn.
|
|
787
|
+
* Includes intra-round reflections so far as additional shared intel.
|
|
788
|
+
*/
|
|
789
|
+
async function notifySequentialTurn(sessionId, nextNodeId) {
|
|
790
|
+
const session = await collabStore.get(sessionId);
|
|
791
|
+
if (!session) return;
|
|
792
|
+
|
|
793
|
+
const currentRound = session.rounds[session.rounds.length - 1];
|
|
794
|
+
if (!currentRound) return;
|
|
795
|
+
|
|
796
|
+
// Compile intra-round intel from reflections already submitted this round
|
|
797
|
+
const intraLines = [`=== INTRA-ROUND ${currentRound.round_number} (turns so far) ===\n`];
|
|
798
|
+
for (const r of currentRound.reflections) {
|
|
799
|
+
intraLines.push(`## Turn: ${r.node_id}${r.parse_failed ? ' [PARSE FAILED]' : ''}`);
|
|
800
|
+
if (r.summary) intraLines.push(`Summary: ${r.summary}`);
|
|
801
|
+
if (r.learnings) intraLines.push(`Learnings: ${r.learnings}`);
|
|
802
|
+
if (r.artifacts.length > 0) intraLines.push(`Artifacts: ${r.artifacts.join(', ')}`);
|
|
803
|
+
intraLines.push(`Confidence: ${r.confidence} | Vote: ${r.vote}`);
|
|
804
|
+
intraLines.push('');
|
|
805
|
+
}
|
|
806
|
+
const intraRoundIntel = intraLines.join('\n');
|
|
807
|
+
const combinedIntel = currentRound.shared_intel
|
|
808
|
+
? currentRound.shared_intel + '\n\n' + intraRoundIntel
|
|
809
|
+
: intraRoundIntel;
|
|
810
|
+
|
|
811
|
+
const parentTask = await store.get(session.task_id);
|
|
812
|
+
const taskScope = parentTask?.scope || [];
|
|
813
|
+
const scopeStrategy = session.scope_strategy || 'shared';
|
|
814
|
+
const nodeScopes = computeNodeScopes(session.nodes, taskScope, scopeStrategy);
|
|
815
|
+
const nextNode = session.nodes.find(n => n.node_id === nextNodeId);
|
|
816
|
+
|
|
817
|
+
nc.publish(`mesh.collab.${sessionId}.node.${nextNodeId}.round`, sc.encode(JSON.stringify({
|
|
818
|
+
session_id: sessionId,
|
|
819
|
+
task_id: session.task_id,
|
|
820
|
+
round_number: currentRound.round_number,
|
|
821
|
+
shared_intel: combinedIntel,
|
|
822
|
+
my_scope: nodeScopes[nextNodeId] || nextNode?.scope || ['*'],
|
|
823
|
+
my_role: nextNode?.role || 'worker',
|
|
824
|
+
mode: 'sequential',
|
|
825
|
+
current_turn: nextNodeId,
|
|
826
|
+
scope_strategy: scopeStrategy,
|
|
827
|
+
})));
|
|
828
|
+
|
|
829
|
+
log(`COLLAB SEQ ${sessionId} R${currentRound.round_number}: Turn advanced to ${nextNodeId}`);
|
|
830
|
+
await collabStore.appendAudit(sessionId, 'turn_advanced', {
|
|
831
|
+
round: currentRound.round_number, next_node: nextNodeId,
|
|
832
|
+
reflections_so_far: currentRound.reflections.length,
|
|
833
|
+
});
|
|
834
|
+
}
|
|
835
|
+
|
|
677
836
|
/**
|
|
678
837
|
* Evaluate the current round: check convergence, advance or complete.
|
|
679
838
|
*/
|
|
@@ -702,10 +861,11 @@ async function evaluateRound(sessionId) {
|
|
|
702
861
|
await collabStore.markConverged(sessionId);
|
|
703
862
|
publishCollabEvent('converged', session);
|
|
704
863
|
|
|
705
|
-
//
|
|
864
|
+
// Re-fetch after markConverged to ensure fresh state
|
|
865
|
+
const freshSession = await collabStore.get(sessionId);
|
|
706
866
|
const allArtifacts = [];
|
|
707
867
|
const contributions = {};
|
|
708
|
-
for (const round of
|
|
868
|
+
for (const round of freshSession.rounds) {
|
|
709
869
|
for (const r of round.reflections) {
|
|
710
870
|
allArtifacts.push(...r.artifacts);
|
|
711
871
|
contributions[r.node_id] = r.summary;
|
|
@@ -714,20 +874,20 @@ async function evaluateRound(sessionId) {
|
|
|
714
874
|
|
|
715
875
|
await collabStore.markCompleted(sessionId, {
|
|
716
876
|
artifacts: [...new Set(allArtifacts)],
|
|
717
|
-
summary: `Converged after ${
|
|
877
|
+
summary: `Converged after ${freshSession.current_round} rounds with ${freshSession.nodes.length} nodes`,
|
|
718
878
|
node_contributions: contributions,
|
|
719
879
|
});
|
|
720
880
|
await collabStore.appendAudit(sessionId, 'session_completed', {
|
|
721
|
-
outcome: 'converged', rounds:
|
|
881
|
+
outcome: 'converged', rounds: freshSession.current_round,
|
|
722
882
|
artifacts: [...new Set(allArtifacts)].length,
|
|
723
|
-
node_count:
|
|
883
|
+
node_count: freshSession.nodes.length, recruited_count: freshSession.recruited_count,
|
|
724
884
|
});
|
|
725
885
|
|
|
726
886
|
// Complete the parent task
|
|
727
|
-
const
|
|
728
|
-
await store.markCompleted(
|
|
729
|
-
publishEvent('completed', await store.get(
|
|
730
|
-
publishCollabEvent('completed',
|
|
887
|
+
const completedSession = await collabStore.get(sessionId);
|
|
888
|
+
await store.markCompleted(freshSession.task_id, completedSession.result);
|
|
889
|
+
publishEvent('completed', await store.get(freshSession.task_id));
|
|
890
|
+
publishCollabEvent('completed', completedSession);
|
|
731
891
|
|
|
732
892
|
} else if (maxReached) {
|
|
733
893
|
log(`COLLAB MAX ROUNDS ${sessionId}: ${session.current_round}/${session.max_rounds}. Completing with current artifacts.`);
|
|
@@ -955,6 +1115,19 @@ async function advancePlanWave(planId) {
|
|
|
955
1115
|
const waveNum = ready[0].wave;
|
|
956
1116
|
log(`PLAN WAVE ${planId} W${waveNum}: dispatching ${ready.length} subtasks`);
|
|
957
1117
|
|
|
1118
|
+
// Inherit routing fields from parent task so subtasks use the same LLM/node preferences.
|
|
1119
|
+
// CONSTRAINT: Subtasks cannot override routing independently — they always inherit from the
|
|
1120
|
+
// parent task. If per-subtask routing is needed, extend the subtask schema in mesh-plans.js
|
|
1121
|
+
// (e.g. subtask.llm_provider) and merge here with subtask fields taking priority.
|
|
1122
|
+
const parentTask = await store.get(plan.parent_task_id);
|
|
1123
|
+
const inheritedRouting = {};
|
|
1124
|
+
if (parentTask) {
|
|
1125
|
+
if (parentTask.llm_provider) inheritedRouting.llm_provider = parentTask.llm_provider;
|
|
1126
|
+
if (parentTask.llm_model) inheritedRouting.llm_model = parentTask.llm_model;
|
|
1127
|
+
if (parentTask.preferred_nodes) inheritedRouting.preferred_nodes = parentTask.preferred_nodes;
|
|
1128
|
+
if (parentTask.exclude_nodes) inheritedRouting.exclude_nodes = parentTask.exclude_nodes;
|
|
1129
|
+
}
|
|
1130
|
+
|
|
958
1131
|
for (const st of ready) {
|
|
959
1132
|
st.status = SUBTASK_STATUS.QUEUED;
|
|
960
1133
|
|
|
@@ -962,7 +1135,7 @@ async function advancePlanWave(planId) {
|
|
|
962
1135
|
switch (st.delegation.mode) {
|
|
963
1136
|
case 'solo_mesh':
|
|
964
1137
|
case 'collab_mesh': {
|
|
965
|
-
// Submit as mesh task
|
|
1138
|
+
// Submit as mesh task — inherit routing fields from parent task
|
|
966
1139
|
const meshTask = createTask({
|
|
967
1140
|
task_id: st.subtask_id,
|
|
968
1141
|
title: st.title,
|
|
@@ -973,6 +1146,7 @@ async function advancePlanWave(planId) {
|
|
|
973
1146
|
success_criteria: st.success_criteria,
|
|
974
1147
|
tags: ['plan', planId],
|
|
975
1148
|
collaboration: st.delegation.collaboration || undefined,
|
|
1149
|
+
...inheritedRouting,
|
|
976
1150
|
});
|
|
977
1151
|
await store.put(meshTask);
|
|
978
1152
|
st.mesh_task_id = meshTask.task_id;
|
|
@@ -1098,6 +1272,7 @@ async function main() {
|
|
|
1098
1272
|
'mesh.collab.status': handleCollabStatus,
|
|
1099
1273
|
'mesh.collab.find': handleCollabFind,
|
|
1100
1274
|
'mesh.collab.reflect': handleCollabReflect,
|
|
1275
|
+
'mesh.collab.recruiting': handleCollabRecruiting,
|
|
1101
1276
|
// Plan handlers
|
|
1102
1277
|
'mesh.plans.create': handlePlanCreate,
|
|
1103
1278
|
'mesh.plans.get': handlePlanGet,
|
|
@@ -1116,7 +1291,7 @@ async function main() {
|
|
|
1116
1291
|
try {
|
|
1117
1292
|
await handler(msg);
|
|
1118
1293
|
} catch (err) {
|
|
1119
|
-
log(`ERROR handling ${subject}: ${err.message}`);
|
|
1294
|
+
log(`ERROR handling ${subject}: ${err.message}\n${err.stack}`);
|
|
1120
1295
|
try { respondError(msg, err.message); } catch {}
|
|
1121
1296
|
}
|
|
1122
1297
|
}
|
package/bin/mesh.js
CHANGED
|
@@ -29,23 +29,44 @@ const path = require('path');
|
|
|
29
29
|
const os = require('os');
|
|
30
30
|
|
|
31
31
|
// ─── Config ──────────────────────────────────────────
|
|
32
|
-
// NATS URL
|
|
33
|
-
const
|
|
32
|
+
// ── NATS URL resolution: env var → ~/.openclaw/openclaw.env → fallback IP ──
|
|
33
|
+
const NATS_FALLBACK = 'nats://100.91.131.61:4222';
|
|
34
|
+
function resolveNatsUrl() {
|
|
35
|
+
if (process.env.OPENCLAW_NATS) return process.env.OPENCLAW_NATS;
|
|
36
|
+
try {
|
|
37
|
+
const envFile = path.join(os.homedir(), '.openclaw', 'openclaw.env');
|
|
38
|
+
if (fs.existsSync(envFile)) {
|
|
39
|
+
const content = fs.readFileSync(envFile, 'utf8');
|
|
40
|
+
const match = content.match(/^\s*OPENCLAW_NATS\s*=\s*(.+)/m);
|
|
41
|
+
if (match && match[1].trim()) return match[1].trim();
|
|
42
|
+
}
|
|
43
|
+
} catch {}
|
|
44
|
+
return NATS_FALLBACK;
|
|
45
|
+
}
|
|
46
|
+
const NATS_URL = resolveNatsUrl();
|
|
34
47
|
const SHARED_DIR = path.join(os.homedir(), 'openclaw', 'shared');
|
|
35
48
|
const LOCAL_NODE = os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
|
|
36
49
|
const sc = StringCodec();
|
|
37
50
|
|
|
38
51
|
// ─── Known nodes (for --node shortcuts) ──────────────
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
52
|
+
const NODE_ALIASES_DEFAULTS = {
|
|
53
|
+
'ubuntu': 'calos-vmware-virtual-platform',
|
|
54
|
+
'linux': 'calos-vmware-virtual-platform',
|
|
55
|
+
'mac': 'moltymacs-virtual-machine-local',
|
|
56
|
+
'macos': 'moltymacs-virtual-machine-local',
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
function loadNodeAliases() {
|
|
60
|
+
const aliasPath = path.join(os.homedir(), '.openclaw', 'mesh-aliases.json');
|
|
61
|
+
try {
|
|
62
|
+
if (fs.existsSync(aliasPath)) {
|
|
63
|
+
const custom = JSON.parse(fs.readFileSync(aliasPath, 'utf8'));
|
|
64
|
+
return { ...NODE_ALIASES_DEFAULTS, ...custom };
|
|
65
|
+
}
|
|
66
|
+
} catch {}
|
|
67
|
+
return NODE_ALIASES_DEFAULTS;
|
|
48
68
|
}
|
|
69
|
+
const NODE_ALIASES = loadNodeAliases();
|
|
49
70
|
|
|
50
71
|
/**
|
|
51
72
|
* Resolve a node name — accepts aliases, full IDs, or "self"/"local"
|
|
@@ -98,7 +119,7 @@ function checkExecSafety(command) {
|
|
|
98
119
|
*/
|
|
99
120
|
async function natsConnect() {
|
|
100
121
|
try {
|
|
101
|
-
return await connect(
|
|
122
|
+
return await connect({ servers: NATS_URL, timeout: 5000 });
|
|
102
123
|
} catch (err) {
|
|
103
124
|
console.error(`Error: Cannot connect to NATS at ${NATS_URL}`);
|
|
104
125
|
console.error(`Is the NATS server running? Is Tailscale connected?`);
|
|
@@ -140,21 +161,15 @@ async function collectHeartbeats(nc, waitMs = 3000) {
|
|
|
140
161
|
uptime: os.uptime(),
|
|
141
162
|
};
|
|
142
163
|
|
|
143
|
-
// Force-unsubscribe after deadline to prevent hanging if no messages arrive
|
|
144
|
-
const timer = setTimeout(() => sub.unsubscribe(), waitMs);
|
|
145
|
-
|
|
146
164
|
// Listen for heartbeats for a few seconds
|
|
147
165
|
const deadline = Date.now() + waitMs;
|
|
148
166
|
for await (const msg of sub) {
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
}
|
|
154
|
-
} catch {}
|
|
167
|
+
const s = JSON.parse(sc.decode(msg.data));
|
|
168
|
+
if (s.node !== LOCAL_NODE) {
|
|
169
|
+
nodes[s.node] = s;
|
|
170
|
+
}
|
|
155
171
|
if (Date.now() >= deadline) break;
|
|
156
172
|
}
|
|
157
|
-
clearTimeout(timer);
|
|
158
173
|
sub.unsubscribe();
|
|
159
174
|
return nodes;
|
|
160
175
|
}
|
|
@@ -382,6 +397,10 @@ async function cmdSubmit(args) {
|
|
|
382
397
|
success_criteria: task.success_criteria || [],
|
|
383
398
|
scope: task.scope || [],
|
|
384
399
|
priority: task.auto_priority || 0,
|
|
400
|
+
llm_provider: task.provider || task.llm_provider || null,
|
|
401
|
+
llm_model: task.model || task.llm_model || null,
|
|
402
|
+
preferred_nodes: task.preferred_nodes || [],
|
|
403
|
+
exclude_nodes: task.exclude_nodes || [],
|
|
385
404
|
});
|
|
386
405
|
console.log(`Submitted: ${result.data.task_id} [${result.data.status}]`);
|
|
387
406
|
// Mark as 'submitted' — NOT 'running'. The card reflects actual mesh state.
|
|
@@ -436,12 +455,18 @@ async function cmdSubmit(args) {
|
|
|
436
455
|
scope: task.scope || [],
|
|
437
456
|
priority: task.priority || 0,
|
|
438
457
|
tags: task.tags || [],
|
|
458
|
+
llm_provider: task.provider || task.llm_provider || null,
|
|
459
|
+
llm_model: task.model || task.llm_model || null,
|
|
460
|
+
preferred_nodes: task.preferred_nodes || [],
|
|
461
|
+
exclude_nodes: task.exclude_nodes || [],
|
|
462
|
+
collaboration: task.collaboration || undefined,
|
|
439
463
|
});
|
|
440
464
|
|
|
441
465
|
console.log(`Submitted: ${result.data.task_id} "${result.data.title}"`);
|
|
442
466
|
console.log(` Status: ${result.data.status}`);
|
|
443
467
|
console.log(` Budget: ${result.data.budget_minutes}m`);
|
|
444
468
|
console.log(` Metric: ${result.data.metric || 'none'}`);
|
|
469
|
+
if (result.data.llm_provider) console.log(` Provider: ${result.data.llm_provider}`);
|
|
445
470
|
await nc.close();
|
|
446
471
|
}
|
|
447
472
|
|
|
@@ -575,6 +600,124 @@ async function cmdRepair(args) {
|
|
|
575
600
|
}
|
|
576
601
|
}
|
|
577
602
|
|
|
603
|
+
/**
|
|
604
|
+
* mesh deploy [--force] [--component <name>] [--node <name>] — trigger fleet deploy.
|
|
605
|
+
*
|
|
606
|
+
* Publishes mesh.deploy.trigger to NATS. All nodes with mesh-deploy-listener
|
|
607
|
+
* will pull from git and self-deploy. Polls MESH_DEPLOY_RESULTS for status.
|
|
608
|
+
*/
|
|
609
|
+
async function cmdDeploy(args) {
|
|
610
|
+
const { execSync } = require('child_process');
|
|
611
|
+
// Prefer openclaw-node (git repo) over openclaw (runtime)
|
|
612
|
+
const defaultRepo = fs.existsSync(path.join(os.homedir(), 'openclaw-node', '.git'))
|
|
613
|
+
? path.join(os.homedir(), 'openclaw-node')
|
|
614
|
+
: path.join(os.homedir(), 'openclaw');
|
|
615
|
+
const repoDir = process.env.OPENCLAW_REPO_DIR || defaultRepo;
|
|
616
|
+
const force = args.includes('--force');
|
|
617
|
+
|
|
618
|
+
// Parse --component flags
|
|
619
|
+
const components = [];
|
|
620
|
+
for (let i = 0; i < args.length; i++) {
|
|
621
|
+
if (args[i] === '--component' && args[i + 1]) {
|
|
622
|
+
components.push(args[i + 1]);
|
|
623
|
+
i++;
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
// Parse --node flags (target specific nodes, default: all)
|
|
628
|
+
const targetNodes = [];
|
|
629
|
+
for (let i = 0; i < args.length; i++) {
|
|
630
|
+
if (args[i] === '--node' && args[i + 1]) {
|
|
631
|
+
targetNodes.push(resolveNode(args[i + 1]));
|
|
632
|
+
i++;
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
// Get current SHA and branch
|
|
637
|
+
let sha, branch;
|
|
638
|
+
try {
|
|
639
|
+
sha = execSync('git rev-parse --short HEAD', { cwd: repoDir, encoding: 'utf8' }).trim();
|
|
640
|
+
branch = execSync('git rev-parse --abbrev-ref HEAD', { cwd: repoDir, encoding: 'utf8' }).trim();
|
|
641
|
+
} catch {
|
|
642
|
+
console.error(`Error: Cannot read git state from ${repoDir}`);
|
|
643
|
+
process.exit(1);
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
console.log(`Deploying ${sha} (${branch})${force ? ' [FORCE]' : ''}`);
|
|
647
|
+
if (components.length > 0) console.log(` Components: ${components.join(', ')}`);
|
|
648
|
+
if (targetNodes.length > 0) console.log(` Targets: ${targetNodes.join(', ')}`);
|
|
649
|
+
else console.log(' Targets: all nodes');
|
|
650
|
+
|
|
651
|
+
const nc = await natsConnect();
|
|
652
|
+
|
|
653
|
+
const trigger = {
|
|
654
|
+
sha,
|
|
655
|
+
branch,
|
|
656
|
+
components: components.length > 0 ? components : ['all'],
|
|
657
|
+
nodes: targetNodes.length > 0 ? targetNodes : ['all'],
|
|
658
|
+
force,
|
|
659
|
+
initiator: LOCAL_NODE,
|
|
660
|
+
timestamp: new Date().toISOString(),
|
|
661
|
+
};
|
|
662
|
+
|
|
663
|
+
// Write "latest" marker so offline nodes can catch up
|
|
664
|
+
try {
|
|
665
|
+
const js = nc.jetstream();
|
|
666
|
+
const resultsKv = await js.views.kv('MESH_DEPLOY_RESULTS', { history: 5, ttl: 7 * 24 * 60 * 60 * 1000 });
|
|
667
|
+
await resultsKv.put('latest', sc.encode(JSON.stringify({ sha, branch })));
|
|
668
|
+
} catch {}
|
|
669
|
+
|
|
670
|
+
// Publish trigger
|
|
671
|
+
nc.publish('mesh.deploy.trigger', sc.encode(JSON.stringify(trigger)));
|
|
672
|
+
await nc.flush();
|
|
673
|
+
console.log('Deploy trigger sent.\n');
|
|
674
|
+
|
|
675
|
+
// Poll for results (10s timeout)
|
|
676
|
+
console.log('Waiting for node responses...');
|
|
677
|
+
const deadline = Date.now() + 15000;
|
|
678
|
+
const seen = new Set();
|
|
679
|
+
|
|
680
|
+
try {
|
|
681
|
+
const js = nc.jetstream();
|
|
682
|
+
const resultsKv = await js.views.kv('MESH_DEPLOY_RESULTS');
|
|
683
|
+
|
|
684
|
+
while (Date.now() < deadline) {
|
|
685
|
+
// Check all nodes
|
|
686
|
+
const allAliasNodes = [...new Set(Object.values(NODE_ALIASES))];
|
|
687
|
+
const checkNodes = targetNodes.length > 0 ? targetNodes : allAliasNodes;
|
|
688
|
+
|
|
689
|
+
for (const nodeId of checkNodes) {
|
|
690
|
+
if (seen.has(nodeId)) continue;
|
|
691
|
+
const key = `${sha}-${nodeId}`;
|
|
692
|
+
try {
|
|
693
|
+
const entry = await resultsKv.get(key);
|
|
694
|
+
if (entry && entry.value) {
|
|
695
|
+
const result = JSON.parse(sc.decode(entry.value));
|
|
696
|
+
if (result.status === 'success' || result.status === 'failed' || result.status === 'skipped') {
|
|
697
|
+
const icon = result.status === 'success' ? '\x1b[32m✓\x1b[0m' : result.status === 'skipped' ? '\x1b[33m-\x1b[0m' : '\x1b[31m✗\x1b[0m';
|
|
698
|
+
console.log(` ${icon} ${nodeId}: ${result.status} (${result.durationSeconds || 0}s)`);
|
|
699
|
+
if (result.errors && result.errors.length > 0) {
|
|
700
|
+
for (const e of result.errors) console.log(` Error: ${e}`);
|
|
701
|
+
}
|
|
702
|
+
seen.add(nodeId);
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
} catch {}
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
if (seen.size >= checkNodes.length) break;
|
|
709
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
710
|
+
}
|
|
711
|
+
} catch {}
|
|
712
|
+
|
|
713
|
+
if (seen.size === 0) {
|
|
714
|
+
console.log(' (no responses yet — nodes may still be deploying)');
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
console.log('');
|
|
718
|
+
await nc.close();
|
|
719
|
+
}
|
|
720
|
+
|
|
578
721
|
/**
|
|
579
722
|
* mesh help — show usage.
|
|
580
723
|
*/
|
|
@@ -602,6 +745,10 @@ function cmdHelp() {
|
|
|
602
745
|
' mesh health --json Health check (JSON output)',
|
|
603
746
|
' mesh repair Self-repair this node',
|
|
604
747
|
' mesh repair --all Self-repair ALL nodes',
|
|
748
|
+
' mesh deploy Deploy to all nodes',
|
|
749
|
+
' mesh deploy --force Force deploy (skip cache)',
|
|
750
|
+
' mesh deploy --node ubuntu Deploy to specific node',
|
|
751
|
+
' mesh deploy --component mesh-daemons Deploy specific component',
|
|
605
752
|
'',
|
|
606
753
|
'NODE ALIASES:',
|
|
607
754
|
' ubuntu, linux = Ubuntu VM (calos-vmware-virtual-platform)',
|
|
@@ -632,6 +779,7 @@ async function main() {
|
|
|
632
779
|
case 'tasks': return cmdTasks(args);
|
|
633
780
|
case 'health': return cmdHealth(args);
|
|
634
781
|
case 'repair': return cmdRepair(args);
|
|
782
|
+
case 'deploy': return cmdDeploy(args);
|
|
635
783
|
case 'help':
|
|
636
784
|
case '--help':
|
|
637
785
|
case '-h': return cmdHelp();
|