openclaw-node-harness 2.0.2 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -131,6 +131,36 @@ async function handleSubmit(msg) {
131
131
  respond(msg, task);
132
132
  }
133
133
 
134
+ /**
135
+ * Abort any collab session tied to a task that is being terminated.
136
+ * Shared by handleFail, handleRelease, handleCancel.
137
+ *
138
+ * NOT called from handleComplete — that path goes through evaluateRound
139
+ * which already calls collabStore.markCompleted() on the session.
140
+ *
141
+ * markAborted() is idempotent: no-op if session is already completed/aborted.
142
+ * This makes double-abort safe (e.g. stall detection → release race).
143
+ */
144
+ async function cleanupTaskCollabSession(task, reason) {
145
+ if (!task.collab_session_id || !collabStore) return;
146
+ try {
147
+ // markAborted returns null if session doesn't exist or is already completed/aborted.
148
+ // Non-null means we actually transitioned the session to aborted.
149
+ const session = await collabStore.markAborted(task.collab_session_id, reason);
150
+ if (session) {
151
+ await collabStore.appendAudit(task.collab_session_id, 'session_aborted', { reason });
152
+ publishCollabEvent('aborted', session);
153
+ log(`COLLAB ABORTED ${task.collab_session_id}: ${reason}`);
154
+ }
155
+ // Clean up audit error rate-limit counter
156
+ // NOTE: sessions expiring via KV TTL bypass this — residual Map entry is negligible
157
+ // for a homelab mesh but worth noting.
158
+ collabStore.clearAuditErrorCount(task.collab_session_id);
159
+ } catch (err) {
160
+ log(`COLLAB CLEANUP WARN: could not abort session ${task.collab_session_id}: ${err.message}`);
161
+ }
162
+ }
163
+
134
164
  /**
135
165
  * mesh.tasks.claim — Agent requests the next available task.
136
166
  * Expects: { node_id }
@@ -203,6 +233,14 @@ async function handleComplete(msg) {
203
233
  log(`COMPLETE ${task_id} in ${elapsed}m: ${result?.summary || 'no summary'}`);
204
234
  publishEvent('completed', task);
205
235
 
236
+ // NOTE: no cleanupTaskCollabSession here — collab tasks complete via
237
+ // evaluateRound → markCompleted on the session, then store.markCompleted
238
+ // on the parent task. Calling cleanupTaskCollabSession would markAborted
239
+ // on an already-completed session. Clean up audit counter only.
240
+ if (task.collab_session_id && collabStore) {
241
+ collabStore.clearAuditErrorCount(task.collab_session_id);
242
+ }
243
+
206
244
  // Check if this task belongs to a plan
207
245
  await checkPlanProgress(task_id, 'completed');
208
246
 
@@ -222,6 +260,7 @@ async function handleFail(msg) {
222
260
 
223
261
  log(`FAIL ${task_id}: ${reason}`);
224
262
  publishEvent('failed', task);
263
+ await cleanupTaskCollabSession(task, `Parent task ${task_id} failed: ${reason}`);
225
264
 
226
265
  // Check if this task belongs to a plan
227
266
  await checkPlanProgress(task_id, 'failed');
@@ -302,6 +341,7 @@ async function handleRelease(msg) {
302
341
 
303
342
  log(`RELEASED ${task_id}: ${reason || 'no reason'} (needs human triage)`);
304
343
  publishEvent('released', task);
344
+ await cleanupTaskCollabSession(task, `Parent task ${task_id} released: ${reason || 'human triage'}`);
305
345
  respond(msg, task);
306
346
  }
307
347
 
@@ -323,6 +363,7 @@ async function handleCancel(msg) {
323
363
 
324
364
  log(`CANCEL ${task_id}: ${reason || 'no reason'}`);
325
365
  publishEvent('cancelled', task);
366
+ await cleanupTaskCollabSession(task, `Parent task ${task_id} cancelled: ${reason || 'no reason'}`);
326
367
  respond(msg, task);
327
368
  }
328
369
 
@@ -358,6 +399,35 @@ async function detectStalls() {
358
399
  }
359
400
  }
360
401
 
402
+ // Mark stalled node as dead in any collab sessions it belongs to.
403
+ // This unblocks isRoundComplete() which otherwise waits forever for
404
+ // a reflection from a crashed node.
405
+ // Uses findActiveSessionsByNode() — O(sessions) single pass instead of
406
+ // the previous O(sessions × nodes) list-then-find pattern.
407
+ if (task.owner && collabStore) {
408
+ try {
409
+ const sessions = await collabStore.findActiveSessionsByNode(task.owner);
410
+ for (const session of sessions) {
411
+ const node = session.nodes.find(n => n.node_id === task.owner);
412
+ if (node && node.status !== 'dead') {
413
+ await collabStore.setNodeStatus(session.session_id, task.owner, 'dead');
414
+ log(`STALL → COLLAB: marked ${task.owner} as dead in session ${session.session_id}`);
415
+ await collabStore.appendAudit(session.session_id, 'node_marked_dead', {
416
+ node_id: task.owner, reason: `Stall detected: no heartbeat for ${silentMin}m`,
417
+ });
418
+
419
+ // Re-check if the round is now complete (dead nodes excluded)
420
+ const updated = await collabStore.get(session.session_id);
421
+ if (updated && collabStore.isRoundComplete(updated)) {
422
+ await evaluateRound(session.session_id);
423
+ }
424
+ }
425
+ }
426
+ } catch (err) {
427
+ log(`STALL → COLLAB ERROR: ${err.message}`);
428
+ }
429
+ }
430
+
361
431
  const releasedTask = await store.markReleased(
362
432
  task.task_id,
363
433
  `Stall detected: no agent heartbeat for ${silentMin}m, alive check failed`,
@@ -526,6 +596,26 @@ async function handleCollabFind(msg) {
526
596
  respond(msg, session);
527
597
  }
528
598
 
599
+ /**
600
+ * mesh.collab.recruiting — List all sessions currently recruiting nodes.
601
+ * Used by agents to discover collab sessions they should join.
602
+ * Returns: array of { session_id, task_id, mode, min_nodes, max_nodes, current_nodes, recruiting_deadline }
603
+ */
604
+ async function handleCollabRecruiting(msg) {
605
+ const recruiting = await collabStore.list({ status: COLLAB_STATUS.RECRUITING });
606
+ const summaries = recruiting.map(s => ({
607
+ session_id: s.session_id,
608
+ task_id: s.task_id,
609
+ mode: s.mode,
610
+ min_nodes: s.min_nodes,
611
+ max_nodes: s.max_nodes,
612
+ current_nodes: s.nodes.length,
613
+ node_ids: s.nodes.map(n => n.node_id || n.id),
614
+ recruiting_deadline: s.recruiting_deadline,
615
+ }));
616
+ respond(msg, summaries);
617
+ }
618
+
529
619
  /**
530
620
  * mesh.collab.reflect — Node submits a reflection for the current round.
531
621
  * Expects: { session_id, node_id, summary, learnings, artifacts, confidence, vote }
@@ -546,8 +636,20 @@ async function handleCollabReflect(msg) {
546
636
  });
547
637
  publishCollabEvent('reflection_received', session);
548
638
 
549
- // Check if all reflections are in evaluate convergence
550
- if (collabStore.isRoundComplete(session)) {
639
+ // Sequential mode: advance turn, notify next node or evaluate round
640
+ // Parallel mode: check if all reflections are in → evaluate convergence
641
+ // NOTE: Node.js single-threaded event loop prevents concurrent execution of this
642
+ // handler — no mutex needed. advanceTurn() is safe without CAS here.
643
+ if (session.mode === 'sequential') {
644
+ const nextNodeId = await collabStore.advanceTurn(session_id);
645
+ if (nextNodeId) {
646
+ // Notify only the next-turn node with accumulated intra-round intel
647
+ await notifySequentialTurn(session_id, nextNodeId);
648
+ } else {
649
+ // All turns done → evaluate round
650
+ await evaluateRound(session_id);
651
+ }
652
+ } else if (collabStore.isRoundComplete(session)) {
551
653
  await evaluateRound(session_id);
552
654
  }
553
655
 
@@ -657,8 +759,14 @@ async function startCollabRound(sessionId) {
657
759
  const scopeStrategy = session.scope_strategy || 'shared';
658
760
  const nodeScopes = computeNodeScopes(session.nodes, taskScope, scopeStrategy);
659
761
 
660
- // Notify each node with their enforced scope
661
- for (const node of session.nodes) {
762
+ // Sequential mode: only notify the current_turn node.
763
+ // Other nodes get notified via notifySequentialTurn() as turns advance.
764
+ // Parallel mode: notify all nodes at once.
765
+ const nodesToNotify = session.mode === 'sequential' && session.current_turn
766
+ ? session.nodes.filter(n => n.node_id === session.current_turn)
767
+ : session.nodes;
768
+
769
+ for (const node of nodesToNotify) {
662
770
  const effectiveScope = nodeScopes[node.node_id] || node.scope;
663
771
  nc.publish(`mesh.collab.${sessionId}.node.${node.node_id}.round`, sc.encode(JSON.stringify({
664
772
  session_id: sessionId,
@@ -674,6 +782,57 @@ async function startCollabRound(sessionId) {
674
782
  }
675
783
  }
676
784
 
785
+ /**
786
+ * Notify the next node in a sequential turn.
787
+ * Includes intra-round reflections so far as additional shared intel.
788
+ */
789
+ async function notifySequentialTurn(sessionId, nextNodeId) {
790
+ const session = await collabStore.get(sessionId);
791
+ if (!session) return;
792
+
793
+ const currentRound = session.rounds[session.rounds.length - 1];
794
+ if (!currentRound) return;
795
+
796
+ // Compile intra-round intel from reflections already submitted this round
797
+ const intraLines = [`=== INTRA-ROUND ${currentRound.round_number} (turns so far) ===\n`];
798
+ for (const r of currentRound.reflections) {
799
+ intraLines.push(`## Turn: ${r.node_id}${r.parse_failed ? ' [PARSE FAILED]' : ''}`);
800
+ if (r.summary) intraLines.push(`Summary: ${r.summary}`);
801
+ if (r.learnings) intraLines.push(`Learnings: ${r.learnings}`);
802
+ if (r.artifacts.length > 0) intraLines.push(`Artifacts: ${r.artifacts.join(', ')}`);
803
+ intraLines.push(`Confidence: ${r.confidence} | Vote: ${r.vote}`);
804
+ intraLines.push('');
805
+ }
806
+ const intraRoundIntel = intraLines.join('\n');
807
+ const combinedIntel = currentRound.shared_intel
808
+ ? currentRound.shared_intel + '\n\n' + intraRoundIntel
809
+ : intraRoundIntel;
810
+
811
+ const parentTask = await store.get(session.task_id);
812
+ const taskScope = parentTask?.scope || [];
813
+ const scopeStrategy = session.scope_strategy || 'shared';
814
+ const nodeScopes = computeNodeScopes(session.nodes, taskScope, scopeStrategy);
815
+ const nextNode = session.nodes.find(n => n.node_id === nextNodeId);
816
+
817
+ nc.publish(`mesh.collab.${sessionId}.node.${nextNodeId}.round`, sc.encode(JSON.stringify({
818
+ session_id: sessionId,
819
+ task_id: session.task_id,
820
+ round_number: currentRound.round_number,
821
+ shared_intel: combinedIntel,
822
+ my_scope: nodeScopes[nextNodeId] || nextNode?.scope || ['*'],
823
+ my_role: nextNode?.role || 'worker',
824
+ mode: 'sequential',
825
+ current_turn: nextNodeId,
826
+ scope_strategy: scopeStrategy,
827
+ })));
828
+
829
+ log(`COLLAB SEQ ${sessionId} R${currentRound.round_number}: Turn advanced to ${nextNodeId}`);
830
+ await collabStore.appendAudit(sessionId, 'turn_advanced', {
831
+ round: currentRound.round_number, next_node: nextNodeId,
832
+ reflections_so_far: currentRound.reflections.length,
833
+ });
834
+ }
835
+
677
836
  /**
678
837
  * Evaluate the current round: check convergence, advance or complete.
679
838
  */
@@ -702,10 +861,11 @@ async function evaluateRound(sessionId) {
702
861
  await collabStore.markConverged(sessionId);
703
862
  publishCollabEvent('converged', session);
704
863
 
705
- // Collect artifacts from all reflections
864
+ // Re-fetch after markConverged to ensure fresh state
865
+ const freshSession = await collabStore.get(sessionId);
706
866
  const allArtifacts = [];
707
867
  const contributions = {};
708
- for (const round of session.rounds) {
868
+ for (const round of freshSession.rounds) {
709
869
  for (const r of round.reflections) {
710
870
  allArtifacts.push(...r.artifacts);
711
871
  contributions[r.node_id] = r.summary;
@@ -714,20 +874,20 @@ async function evaluateRound(sessionId) {
714
874
 
715
875
  await collabStore.markCompleted(sessionId, {
716
876
  artifacts: [...new Set(allArtifacts)],
717
- summary: `Converged after ${session.current_round} rounds with ${session.nodes.length} nodes`,
877
+ summary: `Converged after ${freshSession.current_round} rounds with ${freshSession.nodes.length} nodes`,
718
878
  node_contributions: contributions,
719
879
  });
720
880
  await collabStore.appendAudit(sessionId, 'session_completed', {
721
- outcome: 'converged', rounds: session.current_round,
881
+ outcome: 'converged', rounds: freshSession.current_round,
722
882
  artifacts: [...new Set(allArtifacts)].length,
723
- node_count: session.nodes.length, recruited_count: session.recruited_count,
883
+ node_count: freshSession.nodes.length, recruited_count: freshSession.recruited_count,
724
884
  });
725
885
 
726
886
  // Complete the parent task
727
- const updatedSession = await collabStore.get(sessionId);
728
- await store.markCompleted(session.task_id, updatedSession.result);
729
- publishEvent('completed', await store.get(session.task_id));
730
- publishCollabEvent('completed', updatedSession);
887
+ const completedSession = await collabStore.get(sessionId);
888
+ await store.markCompleted(freshSession.task_id, completedSession.result);
889
+ publishEvent('completed', await store.get(freshSession.task_id));
890
+ publishCollabEvent('completed', completedSession);
731
891
 
732
892
  } else if (maxReached) {
733
893
  log(`COLLAB MAX ROUNDS ${sessionId}: ${session.current_round}/${session.max_rounds}. Completing with current artifacts.`);
@@ -955,6 +1115,19 @@ async function advancePlanWave(planId) {
955
1115
  const waveNum = ready[0].wave;
956
1116
  log(`PLAN WAVE ${planId} W${waveNum}: dispatching ${ready.length} subtasks`);
957
1117
 
1118
+ // Inherit routing fields from parent task so subtasks use the same LLM/node preferences.
1119
+ // CONSTRAINT: Subtasks cannot override routing independently — they always inherit from the
1120
+ // parent task. If per-subtask routing is needed, extend the subtask schema in mesh-plans.js
1121
+ // (e.g. subtask.llm_provider) and merge here with subtask fields taking priority.
1122
+ const parentTask = await store.get(plan.parent_task_id);
1123
+ const inheritedRouting = {};
1124
+ if (parentTask) {
1125
+ if (parentTask.llm_provider) inheritedRouting.llm_provider = parentTask.llm_provider;
1126
+ if (parentTask.llm_model) inheritedRouting.llm_model = parentTask.llm_model;
1127
+ if (parentTask.preferred_nodes) inheritedRouting.preferred_nodes = parentTask.preferred_nodes;
1128
+ if (parentTask.exclude_nodes) inheritedRouting.exclude_nodes = parentTask.exclude_nodes;
1129
+ }
1130
+
958
1131
  for (const st of ready) {
959
1132
  st.status = SUBTASK_STATUS.QUEUED;
960
1133
 
@@ -962,7 +1135,7 @@ async function advancePlanWave(planId) {
962
1135
  switch (st.delegation.mode) {
963
1136
  case 'solo_mesh':
964
1137
  case 'collab_mesh': {
965
- // Submit as mesh task
1138
+ // Submit as mesh task — inherit routing fields from parent task
966
1139
  const meshTask = createTask({
967
1140
  task_id: st.subtask_id,
968
1141
  title: st.title,
@@ -973,6 +1146,7 @@ async function advancePlanWave(planId) {
973
1146
  success_criteria: st.success_criteria,
974
1147
  tags: ['plan', planId],
975
1148
  collaboration: st.delegation.collaboration || undefined,
1149
+ ...inheritedRouting,
976
1150
  });
977
1151
  await store.put(meshTask);
978
1152
  st.mesh_task_id = meshTask.task_id;
@@ -1098,6 +1272,7 @@ async function main() {
1098
1272
  'mesh.collab.status': handleCollabStatus,
1099
1273
  'mesh.collab.find': handleCollabFind,
1100
1274
  'mesh.collab.reflect': handleCollabReflect,
1275
+ 'mesh.collab.recruiting': handleCollabRecruiting,
1101
1276
  // Plan handlers
1102
1277
  'mesh.plans.create': handlePlanCreate,
1103
1278
  'mesh.plans.get': handlePlanGet,
@@ -1116,7 +1291,7 @@ async function main() {
1116
1291
  try {
1117
1292
  await handler(msg);
1118
1293
  } catch (err) {
1119
- log(`ERROR handling ${subject}: ${err.message}`);
1294
+ log(`ERROR handling ${subject}: ${err.message}\n${err.stack}`);
1120
1295
  try { respondError(msg, err.message); } catch {}
1121
1296
  }
1122
1297
  }
package/bin/mesh.js CHANGED
@@ -29,23 +29,44 @@ const path = require('path');
29
29
  const os = require('os');
30
30
 
31
31
  // ─── Config ──────────────────────────────────────────
32
- // NATS URL resolved via shared lib (env var → openclaw.env → .mesh-config localhost fallback)
33
- const { NATS_URL, natsConnectOpts } = require('../lib/nats-resolve');
32
+ // ── NATS URL resolution: env var → ~/.openclaw/openclaw.env → fallback IP ──
33
+ const NATS_FALLBACK = 'nats://100.91.131.61:4222';
34
+ function resolveNatsUrl() {
35
+ if (process.env.OPENCLAW_NATS) return process.env.OPENCLAW_NATS;
36
+ try {
37
+ const envFile = path.join(os.homedir(), '.openclaw', 'openclaw.env');
38
+ if (fs.existsSync(envFile)) {
39
+ const content = fs.readFileSync(envFile, 'utf8');
40
+ const match = content.match(/^\s*OPENCLAW_NATS\s*=\s*(.+)/m);
41
+ if (match && match[1].trim()) return match[1].trim();
42
+ }
43
+ } catch {}
44
+ return NATS_FALLBACK;
45
+ }
46
+ const NATS_URL = resolveNatsUrl();
34
47
  const SHARED_DIR = path.join(os.homedir(), 'openclaw', 'shared');
35
48
  const LOCAL_NODE = os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
36
49
  const sc = StringCodec();
37
50
 
38
51
  // ─── Known nodes (for --node shortcuts) ──────────────
39
- // Load from ~/.openclaw/mesh-aliases.json if it exists, otherwise empty.
40
- let NODE_ALIASES = {};
41
- try {
42
- const aliasFile = path.join(os.homedir(), '.openclaw', 'mesh-aliases.json');
43
- if (fs.existsSync(aliasFile)) {
44
- NODE_ALIASES = JSON.parse(fs.readFileSync(aliasFile, 'utf8'));
45
- }
46
- } catch {
47
- // File missing or malformed — proceed with no aliases
52
+ const NODE_ALIASES_DEFAULTS = {
53
+ 'ubuntu': 'calos-vmware-virtual-platform',
54
+ 'linux': 'calos-vmware-virtual-platform',
55
+ 'mac': 'moltymacs-virtual-machine-local',
56
+ 'macos': 'moltymacs-virtual-machine-local',
57
+ };
58
+
59
+ function loadNodeAliases() {
60
+ const aliasPath = path.join(os.homedir(), '.openclaw', 'mesh-aliases.json');
61
+ try {
62
+ if (fs.existsSync(aliasPath)) {
63
+ const custom = JSON.parse(fs.readFileSync(aliasPath, 'utf8'));
64
+ return { ...NODE_ALIASES_DEFAULTS, ...custom };
65
+ }
66
+ } catch {}
67
+ return NODE_ALIASES_DEFAULTS;
48
68
  }
69
+ const NODE_ALIASES = loadNodeAliases();
49
70
 
50
71
  /**
51
72
  * Resolve a node name — accepts aliases, full IDs, or "self"/"local"
@@ -98,7 +119,7 @@ function checkExecSafety(command) {
98
119
  */
99
120
  async function natsConnect() {
100
121
  try {
101
- return await connect(natsConnectOpts({ timeout: 5000 }));
122
+ return await connect({ servers: NATS_URL, timeout: 5000 });
102
123
  } catch (err) {
103
124
  console.error(`Error: Cannot connect to NATS at ${NATS_URL}`);
104
125
  console.error(`Is the NATS server running? Is Tailscale connected?`);
@@ -140,21 +161,15 @@ async function collectHeartbeats(nc, waitMs = 3000) {
140
161
  uptime: os.uptime(),
141
162
  };
142
163
 
143
- // Force-unsubscribe after deadline to prevent hanging if no messages arrive
144
- const timer = setTimeout(() => sub.unsubscribe(), waitMs);
145
-
146
164
  // Listen for heartbeats for a few seconds
147
165
  const deadline = Date.now() + waitMs;
148
166
  for await (const msg of sub) {
149
- try {
150
- const s = JSON.parse(sc.decode(msg.data));
151
- if (s.node !== LOCAL_NODE) {
152
- nodes[s.node] = s;
153
- }
154
- } catch {}
167
+ const s = JSON.parse(sc.decode(msg.data));
168
+ if (s.node !== LOCAL_NODE) {
169
+ nodes[s.node] = s;
170
+ }
155
171
  if (Date.now() >= deadline) break;
156
172
  }
157
- clearTimeout(timer);
158
173
  sub.unsubscribe();
159
174
  return nodes;
160
175
  }
@@ -382,6 +397,10 @@ async function cmdSubmit(args) {
382
397
  success_criteria: task.success_criteria || [],
383
398
  scope: task.scope || [],
384
399
  priority: task.auto_priority || 0,
400
+ llm_provider: task.provider || task.llm_provider || null,
401
+ llm_model: task.model || task.llm_model || null,
402
+ preferred_nodes: task.preferred_nodes || [],
403
+ exclude_nodes: task.exclude_nodes || [],
385
404
  });
386
405
  console.log(`Submitted: ${result.data.task_id} [${result.data.status}]`);
387
406
  // Mark as 'submitted' — NOT 'running'. The card reflects actual mesh state.
@@ -436,12 +455,18 @@ async function cmdSubmit(args) {
436
455
  scope: task.scope || [],
437
456
  priority: task.priority || 0,
438
457
  tags: task.tags || [],
458
+ llm_provider: task.provider || task.llm_provider || null,
459
+ llm_model: task.model || task.llm_model || null,
460
+ preferred_nodes: task.preferred_nodes || [],
461
+ exclude_nodes: task.exclude_nodes || [],
462
+ collaboration: task.collaboration || undefined,
439
463
  });
440
464
 
441
465
  console.log(`Submitted: ${result.data.task_id} "${result.data.title}"`);
442
466
  console.log(` Status: ${result.data.status}`);
443
467
  console.log(` Budget: ${result.data.budget_minutes}m`);
444
468
  console.log(` Metric: ${result.data.metric || 'none'}`);
469
+ if (result.data.llm_provider) console.log(` Provider: ${result.data.llm_provider}`);
445
470
  await nc.close();
446
471
  }
447
472
 
@@ -575,6 +600,124 @@ async function cmdRepair(args) {
575
600
  }
576
601
  }
577
602
 
603
+ /**
604
+ * mesh deploy [--force] [--component <name>] [--node <name>] — trigger fleet deploy.
605
+ *
606
+ * Publishes mesh.deploy.trigger to NATS. All nodes with mesh-deploy-listener
607
+ * will pull from git and self-deploy. Polls MESH_DEPLOY_RESULTS for status.
608
+ */
609
+ async function cmdDeploy(args) {
610
+ const { execSync } = require('child_process');
611
+ // Prefer openclaw-node (git repo) over openclaw (runtime)
612
+ const defaultRepo = fs.existsSync(path.join(os.homedir(), 'openclaw-node', '.git'))
613
+ ? path.join(os.homedir(), 'openclaw-node')
614
+ : path.join(os.homedir(), 'openclaw');
615
+ const repoDir = process.env.OPENCLAW_REPO_DIR || defaultRepo;
616
+ const force = args.includes('--force');
617
+
618
+ // Parse --component flags
619
+ const components = [];
620
+ for (let i = 0; i < args.length; i++) {
621
+ if (args[i] === '--component' && args[i + 1]) {
622
+ components.push(args[i + 1]);
623
+ i++;
624
+ }
625
+ }
626
+
627
+ // Parse --node flags (target specific nodes, default: all)
628
+ const targetNodes = [];
629
+ for (let i = 0; i < args.length; i++) {
630
+ if (args[i] === '--node' && args[i + 1]) {
631
+ targetNodes.push(resolveNode(args[i + 1]));
632
+ i++;
633
+ }
634
+ }
635
+
636
+ // Get current SHA and branch
637
+ let sha, branch;
638
+ try {
639
+ sha = execSync('git rev-parse --short HEAD', { cwd: repoDir, encoding: 'utf8' }).trim();
640
+ branch = execSync('git rev-parse --abbrev-ref HEAD', { cwd: repoDir, encoding: 'utf8' }).trim();
641
+ } catch {
642
+ console.error(`Error: Cannot read git state from ${repoDir}`);
643
+ process.exit(1);
644
+ }
645
+
646
+ console.log(`Deploying ${sha} (${branch})${force ? ' [FORCE]' : ''}`);
647
+ if (components.length > 0) console.log(` Components: ${components.join(', ')}`);
648
+ if (targetNodes.length > 0) console.log(` Targets: ${targetNodes.join(', ')}`);
649
+ else console.log(' Targets: all nodes');
650
+
651
+ const nc = await natsConnect();
652
+
653
+ const trigger = {
654
+ sha,
655
+ branch,
656
+ components: components.length > 0 ? components : ['all'],
657
+ nodes: targetNodes.length > 0 ? targetNodes : ['all'],
658
+ force,
659
+ initiator: LOCAL_NODE,
660
+ timestamp: new Date().toISOString(),
661
+ };
662
+
663
+ // Write "latest" marker so offline nodes can catch up
664
+ try {
665
+ const js = nc.jetstream();
666
+ const resultsKv = await js.views.kv('MESH_DEPLOY_RESULTS', { history: 5, ttl: 7 * 24 * 60 * 60 * 1000 });
667
+ await resultsKv.put('latest', sc.encode(JSON.stringify({ sha, branch })));
668
+ } catch {}
669
+
670
+ // Publish trigger
671
+ nc.publish('mesh.deploy.trigger', sc.encode(JSON.stringify(trigger)));
672
+ await nc.flush();
673
+ console.log('Deploy trigger sent.\n');
674
+
675
+ // Poll for results (10s timeout)
676
+ console.log('Waiting for node responses...');
677
+ const deadline = Date.now() + 15000;
678
+ const seen = new Set();
679
+
680
+ try {
681
+ const js = nc.jetstream();
682
+ const resultsKv = await js.views.kv('MESH_DEPLOY_RESULTS');
683
+
684
+ while (Date.now() < deadline) {
685
+ // Check all nodes
686
+ const allAliasNodes = [...new Set(Object.values(NODE_ALIASES))];
687
+ const checkNodes = targetNodes.length > 0 ? targetNodes : allAliasNodes;
688
+
689
+ for (const nodeId of checkNodes) {
690
+ if (seen.has(nodeId)) continue;
691
+ const key = `${sha}-${nodeId}`;
692
+ try {
693
+ const entry = await resultsKv.get(key);
694
+ if (entry && entry.value) {
695
+ const result = JSON.parse(sc.decode(entry.value));
696
+ if (result.status === 'success' || result.status === 'failed' || result.status === 'skipped') {
697
+ const icon = result.status === 'success' ? '\x1b[32m✓\x1b[0m' : result.status === 'skipped' ? '\x1b[33m-\x1b[0m' : '\x1b[31m✗\x1b[0m';
698
+ console.log(` ${icon} ${nodeId}: ${result.status} (${result.durationSeconds || 0}s)`);
699
+ if (result.errors && result.errors.length > 0) {
700
+ for (const e of result.errors) console.log(` Error: ${e}`);
701
+ }
702
+ seen.add(nodeId);
703
+ }
704
+ }
705
+ } catch {}
706
+ }
707
+
708
+ if (seen.size >= checkNodes.length) break;
709
+ await new Promise(r => setTimeout(r, 2000));
710
+ }
711
+ } catch {}
712
+
713
+ if (seen.size === 0) {
714
+ console.log(' (no responses yet — nodes may still be deploying)');
715
+ }
716
+
717
+ console.log('');
718
+ await nc.close();
719
+ }
720
+
578
721
  /**
579
722
  * mesh help — show usage.
580
723
  */
@@ -602,6 +745,10 @@ function cmdHelp() {
602
745
  ' mesh health --json Health check (JSON output)',
603
746
  ' mesh repair Self-repair this node',
604
747
  ' mesh repair --all Self-repair ALL nodes',
748
+ ' mesh deploy Deploy to all nodes',
749
+ ' mesh deploy --force Force deploy (skip cache)',
750
+ ' mesh deploy --node ubuntu Deploy to specific node',
751
+ ' mesh deploy --component mesh-daemons Deploy specific component',
605
752
  '',
606
753
  'NODE ALIASES:',
607
754
  ' ubuntu, linux = Ubuntu VM (calos-vmware-virtual-platform)',
@@ -632,6 +779,7 @@ async function main() {
632
779
  case 'tasks': return cmdTasks(args);
633
780
  case 'health': return cmdHealth(args);
634
781
  case 'repair': return cmdRepair(args);
782
+ case 'deploy': return cmdDeploy(args);
635
783
  case 'help':
636
784
  case '--help':
637
785
  case '-h': return cmdHelp();