openclaw-node-harness 2.0.2 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,7 +23,7 @@ const os = require('os');
23
23
 
24
24
  const { NATS_URL, natsConnectOpts } = require('../lib/nats-resolve');
25
25
  const sc = StringCodec();
26
- const REPO_DIR = process.env.OPENCLAW_REPO_DIR || path.join(os.homedir(), 'openclaw-node');
26
+ const REPO_DIR = process.env.OPENCLAW_REPO_DIR || path.join(os.homedir(), 'openclaw');
27
27
  const NODE_ID = process.env.OPENCLAW_NODE_ID ||
28
28
  os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
29
29
 
package/bin/mesh-agent.js CHANGED
@@ -117,7 +117,7 @@ function buildInitialPrompt(task) {
117
117
  parts.push('');
118
118
  }
119
119
 
120
- if (task.success_criteria.length > 0) {
120
+ if (task.success_criteria && task.success_criteria.length > 0) {
121
121
  parts.push('## Success Criteria');
122
122
  for (const c of task.success_criteria) {
123
123
  parts.push(`- ${c}`);
@@ -132,7 +132,7 @@ function buildInitialPrompt(task) {
132
132
  parts.push('');
133
133
  }
134
134
 
135
- if (task.scope.length > 0) {
135
+ if (task.scope && task.scope.length > 0) {
136
136
  parts.push('## Scope');
137
137
  parts.push('Only modify these files/paths:');
138
138
  for (const s of task.scope) {
@@ -192,7 +192,7 @@ function buildRetryPrompt(task, previousAttempts, attemptNumber) {
192
192
  parts.push('');
193
193
  }
194
194
 
195
- if (task.scope.length > 0) {
195
+ if (task.scope && task.scope.length > 0) {
196
196
  parts.push('## Scope');
197
197
  for (const s of task.scope) {
198
198
  parts.push(`- ${s}`);
@@ -294,18 +294,31 @@ function commitAndMergeWorktree(worktreePath, taskId, summary) {
294
294
 
295
295
  log(`Committed ${sha} on ${branch}: ${commitMsg}`);
296
296
 
297
- // Merge into main (from workspace)
298
- try {
299
- execSync(`git merge --no-ff "${branch}" -m "Merge ${branch}: ${taskId}"`, {
300
- cwd: WORKSPACE, timeout: 30000, stdio: 'pipe',
301
- });
302
- log(`Merged ${branch} into main`);
303
- return { committed: true, merged: true, sha };
304
- } catch (mergeErr) {
305
- // Merge conflict — abort and keep branch for human resolution
306
- execSync('git merge --abort', { cwd: WORKSPACE, timeout: 5000, stdio: 'ignore' });
307
- log(`MERGE CONFLICT on ${branch} branch kept for manual resolution`);
308
- return { committed: true, merged: false, sha };
297
+ // Merge into main (from workspace).
298
+ // Parallel collab: multiple nodes may merge concurrently. If the first attempt
299
+ // fails (e.g., another node merged first), retry once after pulling.
300
+ const mergeMsg = `Merge ${branch}: ${taskId}`;
301
+ for (let attempt = 0; attempt < 2; attempt++) {
302
+ try {
303
+ execSync(`git merge --no-ff "${branch}" -m "${mergeMsg.replace(/"/g, '\\"')}"`, {
304
+ cwd: WORKSPACE, timeout: 30000, stdio: 'pipe',
305
+ });
306
+ log(`Merged ${branch} into main${attempt > 0 ? ' (retry succeeded)' : ''}`);
307
+ return { committed: true, merged: true, sha };
308
+ } catch (mergeErr) {
309
+ execSync('git merge --abort', { cwd: WORKSPACE, timeout: 5000, stdio: 'ignore' });
310
+ if (attempt === 0) {
311
+ // First failure: pull and retry (handles race with parallel merge)
312
+ try {
313
+ log(`Merge attempt 1 failed for ${branch} — fast-forward pulling and retrying`);
314
+ execSync('git pull --ff-only', { cwd: WORKSPACE, timeout: 15000, stdio: 'pipe' });
315
+ } catch { /* best effort pull */ }
316
+ } else {
317
+ // Second failure: real conflict — keep branch for human resolution
318
+ log(`MERGE CONFLICT on ${branch} — branch kept for manual resolution`);
319
+ return { committed: true, merged: false, sha, conflict: true };
320
+ }
321
+ }
309
322
  }
310
323
  } catch (err) {
311
324
  log(`Commit/merge warning: ${err.message}`);
@@ -377,14 +390,23 @@ function runLLM(prompt, task, worktreePath) {
377
390
  timeout: (task.budget_minutes || 30) * 60 * 1000, // kill if exceeds budget
378
391
  });
379
392
 
380
- // Heartbeat: signal daemon with activity state
393
+ // Heartbeat: signal daemon with activity state.
394
+ // getActivityState reads Claude JSONL files — only useful for Claude provider.
395
+ // For other providers, send a basic heartbeat (process alive = active).
396
+ const isClaude = provider.name === 'claude';
381
397
  const heartbeatTimer = setInterval(async () => {
382
398
  try {
383
- const activity = await getActivityState(cleanCwd);
384
399
  const payload = { task_id: task.task_id };
385
- if (activity) {
386
- payload.activity_state = activity.state;
387
- payload.activity_timestamp = activity.timestamp?.toISOString();
400
+ if (isClaude) {
401
+ const activity = await getActivityState(cleanCwd);
402
+ if (activity) {
403
+ payload.activity_state = activity.state;
404
+ payload.activity_timestamp = activity.timestamp?.toISOString();
405
+ }
406
+ } else {
407
+ // Non-Claude: process is running → active
408
+ payload.activity_state = 'active';
409
+ payload.activity_timestamp = new Date().toISOString();
388
410
  }
389
411
  await natsRequest('mesh.tasks.heartbeat', payload);
390
412
  } catch {
@@ -649,6 +671,13 @@ async function executeCollabTask(task) {
649
671
  return;
650
672
  }
651
673
 
674
+ // Subscribe to round notifications BEFORE joining — prevents race condition
675
+ // where the daemon starts round 1 immediately upon last node joining,
676
+ // but the joining node hasn't subscribed yet and misses the notification.
677
+ const roundSub = nc.subscribe(`mesh.collab.${sessionId}.node.${NODE_ID}.round`);
678
+ let roundsDone = false;
679
+ let lastKnownSessionStatus = null; // tracks why rounds ended (completed/aborted/converged)
680
+
652
681
  // Join the session using the discovered session_id
653
682
  let session;
654
683
  try {
@@ -659,6 +688,7 @@ async function executeCollabTask(task) {
659
688
  session = joinResult;
660
689
  } catch (err) {
661
690
  log(`COLLAB JOIN FAILED: ${err.message} (session: ${sessionId})`);
691
+ roundSub.unsubscribe();
662
692
  await natsRequest('mesh.tasks.fail', {
663
693
  task_id: task.task_id,
664
694
  reason: `Failed to join collab session ${sessionId}: ${err.message}`,
@@ -669,6 +699,7 @@ async function executeCollabTask(task) {
669
699
 
670
700
  if (!session) {
671
701
  log(`COLLAB JOIN RETURNED NULL for session ${sessionId}`);
702
+ roundSub.unsubscribe();
672
703
  await natsRequest('mesh.tasks.fail', {
673
704
  task_id: task.task_id,
674
705
  reason: `Collab session ${sessionId} rejected join (full, closed, or duplicate node).`,
@@ -684,87 +715,114 @@ async function executeCollabTask(task) {
684
715
  const worktreePath = createWorktree(`${task.task_id}-${NODE_ID}`);
685
716
  const taskDir = worktreePath || WORKSPACE;
686
717
 
687
- // Subscribe to round notifications for this session and this node
688
- const roundSub = nc.subscribe(`mesh.collab.${sessionId}.node.${NODE_ID}.round`);
689
- let roundsDone = false;
718
+ // Periodic session heartbeat detects abort/completion while waiting for rounds
719
+ const sessionHeartbeat = setInterval(async () => {
720
+ try {
721
+ const status = await natsRequest('mesh.collab.status', { session_id: sessionId }, 5000);
722
+ if (['aborted', 'completed'].includes(status.status)) {
723
+ log(`COLLAB HEARTBEAT: Session ${sessionId} is ${status.status}. Unsubscribing.`);
724
+ lastKnownSessionStatus = status.status;
725
+ roundsDone = true;
726
+ roundSub.unsubscribe();
727
+ }
728
+ } catch { /* best effort */ }
729
+ }, 10000);
690
730
 
691
731
  // Signal start
692
732
  await natsRequest('mesh.tasks.start', { task_id: task.task_id }).catch(() => {});
693
733
 
694
- for await (const roundMsg of roundSub) {
695
- if (roundsDone) break;
734
+ try {
735
+ for await (const roundMsg of roundSub) {
736
+ if (roundsDone) break;
696
737
 
697
- const roundData = JSON.parse(sc.decode(roundMsg.data));
698
- const { round_number, shared_intel, my_scope, my_role, mode, current_turn } = roundData;
738
+ const roundData = JSON.parse(sc.decode(roundMsg.data));
739
+ const { round_number, shared_intel, my_scope, my_role, mode, current_turn } = roundData;
699
740
 
700
- // Sequential mode: skip if it's not our turn
701
- if (mode === 'sequential' && current_turn && current_turn !== NODE_ID) {
702
- log(`COLLAB R${round_number}: Not our turn (current: ${current_turn}). Waiting.`);
703
- continue;
704
- }
741
+ // Sequential mode safety guard: skip if it's not our turn.
742
+ // The daemon (notifySequentialTurn) only sends to the current-turn node,
743
+ // so this should not normally trigger. Kept as a defensive check.
744
+ if (mode === 'sequential' && current_turn && current_turn !== NODE_ID) {
745
+ log(`COLLAB R${round_number}: Not our turn (current: ${current_turn}). Waiting.`);
746
+ continue;
747
+ }
705
748
 
706
- log(`COLLAB R${round_number}: Starting work (role: ${my_role}, scope: ${JSON.stringify(my_scope)})`);
749
+ log(`COLLAB R${round_number}: Starting work (role: ${my_role}, scope: ${JSON.stringify(my_scope)})`);
707
750
 
708
- // Build round-specific prompt
709
- const prompt = buildCollabPrompt(task, round_number, shared_intel, my_scope, my_role);
751
+ // Build round-specific prompt
752
+ const prompt = buildCollabPrompt(task, round_number, shared_intel, my_scope, my_role);
710
753
 
711
- if (DRY_RUN) {
712
- log(`[DRY RUN] Collab prompt:\n${prompt}`);
713
- break;
714
- }
754
+ if (DRY_RUN) {
755
+ log(`[DRY RUN] Collab prompt:\n${prompt}`);
756
+ break;
757
+ }
715
758
 
716
- // Execute Claude
717
- const llmResult = await runLLM(prompt, task, worktreePath);
718
- const output = llmResult.stdout || '';
759
+ // Execute Claude
760
+ const llmResult = await runLLM(prompt, task, worktreePath);
761
+ const output = llmResult.stdout || '';
719
762
 
720
- // Parse reflection from output
721
- const reflection = parseReflection(output);
763
+ // Parse reflection from output
764
+ const reflection = parseReflection(output);
722
765
 
723
- // List modified files
724
- let artifacts = [];
725
- try {
726
- if (worktreePath) {
727
- const status = require('child_process').execSync('git status --porcelain', {
728
- cwd: worktreePath, timeout: 5000, encoding: 'utf-8',
729
- }).trim();
730
- artifacts = status.split('\n').filter(Boolean).map(line => line.slice(3));
766
+ // List modified files
767
+ let artifacts = [];
768
+ try {
769
+ if (worktreePath) {
770
+ const status = require('child_process').execSync('git status --porcelain', {
771
+ cwd: worktreePath, timeout: 5000, encoding: 'utf-8',
772
+ }).trim();
773
+ artifacts = status.split('\n').filter(Boolean).map(line => line.slice(3));
774
+ }
775
+ } catch { /* best effort */ }
776
+
777
+ // Submit reflection
778
+ try {
779
+ await natsRequest('mesh.collab.reflect', {
780
+ session_id: sessionId,
781
+ node_id: NODE_ID,
782
+ round: round_number,
783
+ summary: reflection.summary,
784
+ learnings: reflection.learnings,
785
+ artifacts,
786
+ confidence: reflection.confidence,
787
+ vote: reflection.vote,
788
+ parse_failed: reflection.parse_failed,
789
+ });
790
+ const parseTag = reflection.parse_failed ? ' [PARSE FAILED]' : '';
791
+ log(`COLLAB R${round_number}: Reflection submitted (vote: ${reflection.vote}, conf: ${reflection.confidence}${parseTag})`);
792
+ } catch (err) {
793
+ log(`COLLAB R${round_number}: Reflection submit failed: ${err.message}`);
731
794
  }
732
- } catch { /* best effort */ }
733
795
 
734
- // Submit reflection
735
- try {
736
- await natsRequest('mesh.collab.reflect', {
737
- session_id: sessionId,
738
- node_id: NODE_ID,
739
- round: round_number,
740
- summary: reflection.summary,
741
- learnings: reflection.learnings,
742
- artifacts,
743
- confidence: reflection.confidence,
744
- vote: reflection.vote,
745
- parse_failed: reflection.parse_failed,
746
- });
747
- const parseTag = reflection.parse_failed ? ' [PARSE FAILED]' : '';
748
- log(`COLLAB R${round_number}: Reflection submitted (vote: ${reflection.vote}, conf: ${reflection.confidence}${parseTag})`);
749
- } catch (err) {
750
- log(`COLLAB R${round_number}: Reflection submit failed: ${err.message}`);
796
+ // Check if session is done (converged/completed/aborted)
797
+ try {
798
+ const status = await natsRequest('mesh.collab.status', { session_id: sessionId });
799
+ if (['converged', 'completed', 'aborted'].includes(status.status)) {
800
+ log(`COLLAB: Session ${sessionId} is ${status.status}. Done.`);
801
+ lastKnownSessionStatus = status.status;
802
+ roundsDone = true;
803
+ }
804
+ } catch { /* continue listening */ }
751
805
  }
752
-
753
- // Check if session is done (converged/completed/aborted)
754
- try {
755
- const status = await natsRequest('mesh.collab.status', { session_id: sessionId });
756
- if (['converged', 'completed', 'aborted'].includes(status.status)) {
757
- log(`COLLAB: Session ${sessionId} is ${status.status}. Done.`);
758
- roundsDone = true;
759
- }
760
- } catch { /* continue listening */ }
806
+ } finally {
807
+ clearInterval(sessionHeartbeat);
808
+ roundSub.unsubscribe();
761
809
  }
762
810
 
763
- roundSub.unsubscribe();
764
-
765
- // Commit and merge worktree
766
- const mergeResult = commitAndMergeWorktree(worktreePath, `${task.task_id}-${NODE_ID}`, `collab contribution from ${NODE_ID}`);
767
- cleanupWorktree(worktreePath, mergeResult && !mergeResult?.merged);
811
+ // Commit and merge only on successful convergence — don't merge partial
812
+ // work from aborted/failed sessions into main.
813
+ // Uses lastKnownSessionStatus (set during round loop or heartbeat) instead of
814
+ // a fresh network read, which could see stale state due to NATS latency.
815
+ try {
816
+ if (['completed', 'converged'].includes(lastKnownSessionStatus)) {
817
+ const mergeResult = commitAndMergeWorktree(worktreePath, `${task.task_id}-${NODE_ID}`, `collab contribution from ${NODE_ID}`);
818
+ cleanupWorktree(worktreePath, mergeResult && !mergeResult?.merged);
819
+ } else {
820
+ log(`COLLAB: Session ${sessionId} ended as ${lastKnownSessionStatus || 'unknown'} — discarding worktree`);
821
+ cleanupWorktree(worktreePath, false);
822
+ }
823
+ } catch (err) {
824
+ log(`COLLAB WORKTREE CLEANUP FAILED: ${err.message}`);
825
+ }
768
826
 
769
827
  writeAgentState('idle', null);
770
828
  log(`COLLAB DONE: ${task.task_id} (node: ${NODE_ID})`);
@@ -986,8 +1044,82 @@ async function main() {
986
1044
  })();
987
1045
  log(` Listening: mesh.agent.${NODE_ID}.alive`);
988
1046
 
1047
+ // Subscribe to collab recruit broadcasts — allows this node to join
1048
+ // collab sessions without being the claiming node
1049
+ const recruitSub = nc.subscribe('mesh.collab.*.recruit');
1050
+ (async () => {
1051
+ for await (const msg of recruitSub) {
1052
+ try {
1053
+ const recruit = JSON.parse(sc.decode(msg.data));
1054
+ if (currentTaskId) continue; // busy
1055
+
1056
+ // Fetch task to check preferences and get collab spec
1057
+ const task = await natsRequest('mesh.tasks.get', { task_id: recruit.task_id }, 5000);
1058
+ if (!task || !task.collaboration) continue;
1059
+ if (task.owner === NODE_ID) continue; // we claimed it, already handling
1060
+
1061
+ // Check preferred_nodes
1062
+ if (task.preferred_nodes && task.preferred_nodes.length > 0) {
1063
+ if (!task.preferred_nodes.includes(NODE_ID)) continue;
1064
+ }
1065
+ // Check exclude_nodes
1066
+ if (task.exclude_nodes && task.exclude_nodes.length > 0) {
1067
+ if (task.exclude_nodes.includes(NODE_ID)) continue;
1068
+ }
1069
+
1070
+ log(`RECRUIT: Joining collab session ${recruit.session_id} for task ${recruit.task_id}`);
1071
+ currentTaskId = task.task_id;
1072
+ await executeCollabTask(task);
1073
+ currentTaskId = null;
1074
+ } catch (err) {
1075
+ log(`RECRUIT ERROR: ${err.message}`);
1076
+ currentTaskId = null;
1077
+ }
1078
+ }
1079
+ })();
1080
+ log(` Listening: mesh.collab.*.recruit (collab recruiting)`);
1081
+
1082
+ // Also poll for recruiting sessions on idle — catches recruits we missed
1083
+ async function checkRecruitingSessions() {
1084
+ if (currentTaskId) return; // busy
1085
+ try {
1086
+ const sessions = await natsRequest('mesh.collab.recruiting', {}, 5000);
1087
+ if (!sessions || !Array.isArray(sessions) || sessions.length === 0) return;
1088
+
1089
+ for (const s of sessions) {
1090
+ if (currentTaskId) break; // became busy
1091
+ // Skip if we already joined this session
1092
+ if (s.node_ids && s.node_ids.includes(NODE_ID)) continue;
1093
+ // Skip if session is full
1094
+ if (s.max_nodes && s.current_nodes >= s.max_nodes) continue;
1095
+
1096
+ // Fetch task to check preferences
1097
+ const task = await natsRequest('mesh.tasks.get', { task_id: s.task_id }, 5000);
1098
+ if (!task || !task.collaboration) continue;
1099
+ if (task.preferred_nodes && task.preferred_nodes.length > 0) {
1100
+ if (!task.preferred_nodes.includes(NODE_ID)) continue;
1101
+ }
1102
+ if (task.exclude_nodes && task.exclude_nodes.length > 0) {
1103
+ if (task.exclude_nodes.includes(NODE_ID)) continue;
1104
+ }
1105
+
1106
+ log(`RECRUIT POLL: Joining collab session ${s.session_id} for task ${s.task_id}`);
1107
+ currentTaskId = task.task_id;
1108
+ await executeCollabTask(task);
1109
+ currentTaskId = null;
1110
+ }
1111
+ } catch { /* silent — recruiting poll is best-effort */ }
1112
+ }
1113
+
989
1114
  while (running) {
990
1115
  try {
1116
+ // Check for recruiting collab sessions before trying to claim
1117
+ await checkRecruitingSessions();
1118
+ if (currentTaskId) {
1119
+ await new Promise(r => setTimeout(r, POLL_INTERVAL));
1120
+ continue;
1121
+ }
1122
+
991
1123
  // Claim next available task (longer timeout — KV operations on remote NATS can be slow)
992
1124
  const task = await natsRequest('mesh.tasks.claim', { node_id: NODE_ID }, 60000);
993
1125
 
@@ -1035,3 +1167,4 @@ main().catch(err => {
1035
1167
  console.error(`[mesh-agent] Fatal: ${err.message}`);
1036
1168
  process.exit(1);
1037
1169
  });
1170
+ // deploy-v7f0130b