openclaw-node-harness 2.0.2 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fleet-deploy.js +1 -1
- package/bin/mesh-agent.js +217 -84
- package/bin/mesh-bridge.js +274 -10
- package/bin/mesh-deploy-listener.js +120 -98
- package/bin/mesh-deploy.js +11 -3
- package/bin/mesh-health-publisher.js +1 -1
- package/bin/mesh-task-daemon.js +190 -15
- package/bin/mesh.js +170 -22
- package/bin/openclaw-node-init.js +147 -3
- package/install.sh +7 -0
- package/lib/kanban-io.js +50 -10
- package/lib/mesh-collab.js +53 -3
- package/lib/mesh-registry.js +11 -2
- package/lib/mesh-tasks.js +6 -7
- package/package.json +1 -1
package/bin/fleet-deploy.js
CHANGED
|
@@ -23,7 +23,7 @@ const os = require('os');
|
|
|
23
23
|
|
|
24
24
|
const { NATS_URL, natsConnectOpts } = require('../lib/nats-resolve');
|
|
25
25
|
const sc = StringCodec();
|
|
26
|
-
const REPO_DIR = process.env.OPENCLAW_REPO_DIR || path.join(os.homedir(), 'openclaw
|
|
26
|
+
const REPO_DIR = process.env.OPENCLAW_REPO_DIR || path.join(os.homedir(), 'openclaw');
|
|
27
27
|
const NODE_ID = process.env.OPENCLAW_NODE_ID ||
|
|
28
28
|
os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
|
|
29
29
|
|
package/bin/mesh-agent.js
CHANGED
|
@@ -117,7 +117,7 @@ function buildInitialPrompt(task) {
|
|
|
117
117
|
parts.push('');
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
-
if (task.success_criteria.length > 0) {
|
|
120
|
+
if (task.success_criteria && task.success_criteria.length > 0) {
|
|
121
121
|
parts.push('## Success Criteria');
|
|
122
122
|
for (const c of task.success_criteria) {
|
|
123
123
|
parts.push(`- ${c}`);
|
|
@@ -132,7 +132,7 @@ function buildInitialPrompt(task) {
|
|
|
132
132
|
parts.push('');
|
|
133
133
|
}
|
|
134
134
|
|
|
135
|
-
if (task.scope.length > 0) {
|
|
135
|
+
if (task.scope && task.scope.length > 0) {
|
|
136
136
|
parts.push('## Scope');
|
|
137
137
|
parts.push('Only modify these files/paths:');
|
|
138
138
|
for (const s of task.scope) {
|
|
@@ -192,7 +192,7 @@ function buildRetryPrompt(task, previousAttempts, attemptNumber) {
|
|
|
192
192
|
parts.push('');
|
|
193
193
|
}
|
|
194
194
|
|
|
195
|
-
if (task.scope.length > 0) {
|
|
195
|
+
if (task.scope && task.scope.length > 0) {
|
|
196
196
|
parts.push('## Scope');
|
|
197
197
|
for (const s of task.scope) {
|
|
198
198
|
parts.push(`- ${s}`);
|
|
@@ -294,18 +294,31 @@ function commitAndMergeWorktree(worktreePath, taskId, summary) {
|
|
|
294
294
|
|
|
295
295
|
log(`Committed ${sha} on ${branch}: ${commitMsg}`);
|
|
296
296
|
|
|
297
|
-
// Merge into main (from workspace)
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
297
|
+
// Merge into main (from workspace).
|
|
298
|
+
// Parallel collab: multiple nodes may merge concurrently. If the first attempt
|
|
299
|
+
// fails (e.g., another node merged first), retry once after pulling.
|
|
300
|
+
const mergeMsg = `Merge ${branch}: ${taskId}`;
|
|
301
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
302
|
+
try {
|
|
303
|
+
execSync(`git merge --no-ff "${branch}" -m "${mergeMsg.replace(/"/g, '\\"')}"`, {
|
|
304
|
+
cwd: WORKSPACE, timeout: 30000, stdio: 'pipe',
|
|
305
|
+
});
|
|
306
|
+
log(`Merged ${branch} into main${attempt > 0 ? ' (retry succeeded)' : ''}`);
|
|
307
|
+
return { committed: true, merged: true, sha };
|
|
308
|
+
} catch (mergeErr) {
|
|
309
|
+
execSync('git merge --abort', { cwd: WORKSPACE, timeout: 5000, stdio: 'ignore' });
|
|
310
|
+
if (attempt === 0) {
|
|
311
|
+
// First failure: pull and retry (handles race with parallel merge)
|
|
312
|
+
try {
|
|
313
|
+
log(`Merge attempt 1 failed for ${branch} — fast-forward pulling and retrying`);
|
|
314
|
+
execSync('git pull --ff-only', { cwd: WORKSPACE, timeout: 15000, stdio: 'pipe' });
|
|
315
|
+
} catch { /* best effort pull */ }
|
|
316
|
+
} else {
|
|
317
|
+
// Second failure: real conflict — keep branch for human resolution
|
|
318
|
+
log(`MERGE CONFLICT on ${branch} — branch kept for manual resolution`);
|
|
319
|
+
return { committed: true, merged: false, sha, conflict: true };
|
|
320
|
+
}
|
|
321
|
+
}
|
|
309
322
|
}
|
|
310
323
|
} catch (err) {
|
|
311
324
|
log(`Commit/merge warning: ${err.message}`);
|
|
@@ -377,14 +390,23 @@ function runLLM(prompt, task, worktreePath) {
|
|
|
377
390
|
timeout: (task.budget_minutes || 30) * 60 * 1000, // kill if exceeds budget
|
|
378
391
|
});
|
|
379
392
|
|
|
380
|
-
// Heartbeat: signal daemon with activity state
|
|
393
|
+
// Heartbeat: signal daemon with activity state.
|
|
394
|
+
// getActivityState reads Claude JSONL files — only useful for Claude provider.
|
|
395
|
+
// For other providers, send a basic heartbeat (process alive = active).
|
|
396
|
+
const isClaude = provider.name === 'claude';
|
|
381
397
|
const heartbeatTimer = setInterval(async () => {
|
|
382
398
|
try {
|
|
383
|
-
const activity = await getActivityState(cleanCwd);
|
|
384
399
|
const payload = { task_id: task.task_id };
|
|
385
|
-
if (
|
|
386
|
-
|
|
387
|
-
|
|
400
|
+
if (isClaude) {
|
|
401
|
+
const activity = await getActivityState(cleanCwd);
|
|
402
|
+
if (activity) {
|
|
403
|
+
payload.activity_state = activity.state;
|
|
404
|
+
payload.activity_timestamp = activity.timestamp?.toISOString();
|
|
405
|
+
}
|
|
406
|
+
} else {
|
|
407
|
+
// Non-Claude: process is running → active
|
|
408
|
+
payload.activity_state = 'active';
|
|
409
|
+
payload.activity_timestamp = new Date().toISOString();
|
|
388
410
|
}
|
|
389
411
|
await natsRequest('mesh.tasks.heartbeat', payload);
|
|
390
412
|
} catch {
|
|
@@ -649,6 +671,13 @@ async function executeCollabTask(task) {
|
|
|
649
671
|
return;
|
|
650
672
|
}
|
|
651
673
|
|
|
674
|
+
// Subscribe to round notifications BEFORE joining — prevents race condition
|
|
675
|
+
// where the daemon starts round 1 immediately upon last node joining,
|
|
676
|
+
// but the joining node hasn't subscribed yet and misses the notification.
|
|
677
|
+
const roundSub = nc.subscribe(`mesh.collab.${sessionId}.node.${NODE_ID}.round`);
|
|
678
|
+
let roundsDone = false;
|
|
679
|
+
let lastKnownSessionStatus = null; // tracks why rounds ended (completed/aborted/converged)
|
|
680
|
+
|
|
652
681
|
// Join the session using the discovered session_id
|
|
653
682
|
let session;
|
|
654
683
|
try {
|
|
@@ -659,6 +688,7 @@ async function executeCollabTask(task) {
|
|
|
659
688
|
session = joinResult;
|
|
660
689
|
} catch (err) {
|
|
661
690
|
log(`COLLAB JOIN FAILED: ${err.message} (session: ${sessionId})`);
|
|
691
|
+
roundSub.unsubscribe();
|
|
662
692
|
await natsRequest('mesh.tasks.fail', {
|
|
663
693
|
task_id: task.task_id,
|
|
664
694
|
reason: `Failed to join collab session ${sessionId}: ${err.message}`,
|
|
@@ -669,6 +699,7 @@ async function executeCollabTask(task) {
|
|
|
669
699
|
|
|
670
700
|
if (!session) {
|
|
671
701
|
log(`COLLAB JOIN RETURNED NULL for session ${sessionId}`);
|
|
702
|
+
roundSub.unsubscribe();
|
|
672
703
|
await natsRequest('mesh.tasks.fail', {
|
|
673
704
|
task_id: task.task_id,
|
|
674
705
|
reason: `Collab session ${sessionId} rejected join (full, closed, or duplicate node).`,
|
|
@@ -684,87 +715,114 @@ async function executeCollabTask(task) {
|
|
|
684
715
|
const worktreePath = createWorktree(`${task.task_id}-${NODE_ID}`);
|
|
685
716
|
const taskDir = worktreePath || WORKSPACE;
|
|
686
717
|
|
|
687
|
-
//
|
|
688
|
-
const
|
|
689
|
-
|
|
718
|
+
// Periodic session heartbeat — detects abort/completion while waiting for rounds
|
|
719
|
+
const sessionHeartbeat = setInterval(async () => {
|
|
720
|
+
try {
|
|
721
|
+
const status = await natsRequest('mesh.collab.status', { session_id: sessionId }, 5000);
|
|
722
|
+
if (['aborted', 'completed'].includes(status.status)) {
|
|
723
|
+
log(`COLLAB HEARTBEAT: Session ${sessionId} is ${status.status}. Unsubscribing.`);
|
|
724
|
+
lastKnownSessionStatus = status.status;
|
|
725
|
+
roundsDone = true;
|
|
726
|
+
roundSub.unsubscribe();
|
|
727
|
+
}
|
|
728
|
+
} catch { /* best effort */ }
|
|
729
|
+
}, 10000);
|
|
690
730
|
|
|
691
731
|
// Signal start
|
|
692
732
|
await natsRequest('mesh.tasks.start', { task_id: task.task_id }).catch(() => {});
|
|
693
733
|
|
|
694
|
-
|
|
695
|
-
|
|
734
|
+
try {
|
|
735
|
+
for await (const roundMsg of roundSub) {
|
|
736
|
+
if (roundsDone) break;
|
|
696
737
|
|
|
697
|
-
|
|
698
|
-
|
|
738
|
+
const roundData = JSON.parse(sc.decode(roundMsg.data));
|
|
739
|
+
const { round_number, shared_intel, my_scope, my_role, mode, current_turn } = roundData;
|
|
699
740
|
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
741
|
+
// Sequential mode safety guard: skip if it's not our turn.
|
|
742
|
+
// The daemon (notifySequentialTurn) only sends to the current-turn node,
|
|
743
|
+
// so this should not normally trigger. Kept as a defensive check.
|
|
744
|
+
if (mode === 'sequential' && current_turn && current_turn !== NODE_ID) {
|
|
745
|
+
log(`COLLAB R${round_number}: Not our turn (current: ${current_turn}). Waiting.`);
|
|
746
|
+
continue;
|
|
747
|
+
}
|
|
705
748
|
|
|
706
|
-
|
|
749
|
+
log(`COLLAB R${round_number}: Starting work (role: ${my_role}, scope: ${JSON.stringify(my_scope)})`);
|
|
707
750
|
|
|
708
|
-
|
|
709
|
-
|
|
751
|
+
// Build round-specific prompt
|
|
752
|
+
const prompt = buildCollabPrompt(task, round_number, shared_intel, my_scope, my_role);
|
|
710
753
|
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
754
|
+
if (DRY_RUN) {
|
|
755
|
+
log(`[DRY RUN] Collab prompt:\n${prompt}`);
|
|
756
|
+
break;
|
|
757
|
+
}
|
|
715
758
|
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
759
|
+
// Execute Claude
|
|
760
|
+
const llmResult = await runLLM(prompt, task, worktreePath);
|
|
761
|
+
const output = llmResult.stdout || '';
|
|
719
762
|
|
|
720
|
-
|
|
721
|
-
|
|
763
|
+
// Parse reflection from output
|
|
764
|
+
const reflection = parseReflection(output);
|
|
722
765
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
766
|
+
// List modified files
|
|
767
|
+
let artifacts = [];
|
|
768
|
+
try {
|
|
769
|
+
if (worktreePath) {
|
|
770
|
+
const status = require('child_process').execSync('git status --porcelain', {
|
|
771
|
+
cwd: worktreePath, timeout: 5000, encoding: 'utf-8',
|
|
772
|
+
}).trim();
|
|
773
|
+
artifacts = status.split('\n').filter(Boolean).map(line => line.slice(3));
|
|
774
|
+
}
|
|
775
|
+
} catch { /* best effort */ }
|
|
776
|
+
|
|
777
|
+
// Submit reflection
|
|
778
|
+
try {
|
|
779
|
+
await natsRequest('mesh.collab.reflect', {
|
|
780
|
+
session_id: sessionId,
|
|
781
|
+
node_id: NODE_ID,
|
|
782
|
+
round: round_number,
|
|
783
|
+
summary: reflection.summary,
|
|
784
|
+
learnings: reflection.learnings,
|
|
785
|
+
artifacts,
|
|
786
|
+
confidence: reflection.confidence,
|
|
787
|
+
vote: reflection.vote,
|
|
788
|
+
parse_failed: reflection.parse_failed,
|
|
789
|
+
});
|
|
790
|
+
const parseTag = reflection.parse_failed ? ' [PARSE FAILED]' : '';
|
|
791
|
+
log(`COLLAB R${round_number}: Reflection submitted (vote: ${reflection.vote}, conf: ${reflection.confidence}${parseTag})`);
|
|
792
|
+
} catch (err) {
|
|
793
|
+
log(`COLLAB R${round_number}: Reflection submit failed: ${err.message}`);
|
|
731
794
|
}
|
|
732
|
-
} catch { /* best effort */ }
|
|
733
795
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
confidence: reflection.confidence,
|
|
744
|
-
vote: reflection.vote,
|
|
745
|
-
parse_failed: reflection.parse_failed,
|
|
746
|
-
});
|
|
747
|
-
const parseTag = reflection.parse_failed ? ' [PARSE FAILED]' : '';
|
|
748
|
-
log(`COLLAB R${round_number}: Reflection submitted (vote: ${reflection.vote}, conf: ${reflection.confidence}${parseTag})`);
|
|
749
|
-
} catch (err) {
|
|
750
|
-
log(`COLLAB R${round_number}: Reflection submit failed: ${err.message}`);
|
|
796
|
+
// Check if session is done (converged/completed/aborted)
|
|
797
|
+
try {
|
|
798
|
+
const status = await natsRequest('mesh.collab.status', { session_id: sessionId });
|
|
799
|
+
if (['converged', 'completed', 'aborted'].includes(status.status)) {
|
|
800
|
+
log(`COLLAB: Session ${sessionId} is ${status.status}. Done.`);
|
|
801
|
+
lastKnownSessionStatus = status.status;
|
|
802
|
+
roundsDone = true;
|
|
803
|
+
}
|
|
804
|
+
} catch { /* continue listening */ }
|
|
751
805
|
}
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
const status = await natsRequest('mesh.collab.status', { session_id: sessionId });
|
|
756
|
-
if (['converged', 'completed', 'aborted'].includes(status.status)) {
|
|
757
|
-
log(`COLLAB: Session ${sessionId} is ${status.status}. Done.`);
|
|
758
|
-
roundsDone = true;
|
|
759
|
-
}
|
|
760
|
-
} catch { /* continue listening */ }
|
|
806
|
+
} finally {
|
|
807
|
+
clearInterval(sessionHeartbeat);
|
|
808
|
+
roundSub.unsubscribe();
|
|
761
809
|
}
|
|
762
810
|
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
//
|
|
766
|
-
|
|
767
|
-
|
|
811
|
+
// Commit and merge only on successful convergence — don't merge partial
|
|
812
|
+
// work from aborted/failed sessions into main.
|
|
813
|
+
// Uses lastKnownSessionStatus (set during round loop or heartbeat) instead of
|
|
814
|
+
// a fresh network read, which could see stale state due to NATS latency.
|
|
815
|
+
try {
|
|
816
|
+
if (['completed', 'converged'].includes(lastKnownSessionStatus)) {
|
|
817
|
+
const mergeResult = commitAndMergeWorktree(worktreePath, `${task.task_id}-${NODE_ID}`, `collab contribution from ${NODE_ID}`);
|
|
818
|
+
cleanupWorktree(worktreePath, mergeResult && !mergeResult?.merged);
|
|
819
|
+
} else {
|
|
820
|
+
log(`COLLAB: Session ${sessionId} ended as ${lastKnownSessionStatus || 'unknown'} — discarding worktree`);
|
|
821
|
+
cleanupWorktree(worktreePath, false);
|
|
822
|
+
}
|
|
823
|
+
} catch (err) {
|
|
824
|
+
log(`COLLAB WORKTREE CLEANUP FAILED: ${err.message}`);
|
|
825
|
+
}
|
|
768
826
|
|
|
769
827
|
writeAgentState('idle', null);
|
|
770
828
|
log(`COLLAB DONE: ${task.task_id} (node: ${NODE_ID})`);
|
|
@@ -986,8 +1044,82 @@ async function main() {
|
|
|
986
1044
|
})();
|
|
987
1045
|
log(` Listening: mesh.agent.${NODE_ID}.alive`);
|
|
988
1046
|
|
|
1047
|
+
// Subscribe to collab recruit broadcasts — allows this node to join
|
|
1048
|
+
// collab sessions without being the claiming node
|
|
1049
|
+
const recruitSub = nc.subscribe('mesh.collab.*.recruit');
|
|
1050
|
+
(async () => {
|
|
1051
|
+
for await (const msg of recruitSub) {
|
|
1052
|
+
try {
|
|
1053
|
+
const recruit = JSON.parse(sc.decode(msg.data));
|
|
1054
|
+
if (currentTaskId) continue; // busy
|
|
1055
|
+
|
|
1056
|
+
// Fetch task to check preferences and get collab spec
|
|
1057
|
+
const task = await natsRequest('mesh.tasks.get', { task_id: recruit.task_id }, 5000);
|
|
1058
|
+
if (!task || !task.collaboration) continue;
|
|
1059
|
+
if (task.owner === NODE_ID) continue; // we claimed it, already handling
|
|
1060
|
+
|
|
1061
|
+
// Check preferred_nodes
|
|
1062
|
+
if (task.preferred_nodes && task.preferred_nodes.length > 0) {
|
|
1063
|
+
if (!task.preferred_nodes.includes(NODE_ID)) continue;
|
|
1064
|
+
}
|
|
1065
|
+
// Check exclude_nodes
|
|
1066
|
+
if (task.exclude_nodes && task.exclude_nodes.length > 0) {
|
|
1067
|
+
if (task.exclude_nodes.includes(NODE_ID)) continue;
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
log(`RECRUIT: Joining collab session ${recruit.session_id} for task ${recruit.task_id}`);
|
|
1071
|
+
currentTaskId = task.task_id;
|
|
1072
|
+
await executeCollabTask(task);
|
|
1073
|
+
currentTaskId = null;
|
|
1074
|
+
} catch (err) {
|
|
1075
|
+
log(`RECRUIT ERROR: ${err.message}`);
|
|
1076
|
+
currentTaskId = null;
|
|
1077
|
+
}
|
|
1078
|
+
}
|
|
1079
|
+
})();
|
|
1080
|
+
log(` Listening: mesh.collab.*.recruit (collab recruiting)`);
|
|
1081
|
+
|
|
1082
|
+
// Also poll for recruiting sessions on idle — catches recruits we missed
|
|
1083
|
+
async function checkRecruitingSessions() {
|
|
1084
|
+
if (currentTaskId) return; // busy
|
|
1085
|
+
try {
|
|
1086
|
+
const sessions = await natsRequest('mesh.collab.recruiting', {}, 5000);
|
|
1087
|
+
if (!sessions || !Array.isArray(sessions) || sessions.length === 0) return;
|
|
1088
|
+
|
|
1089
|
+
for (const s of sessions) {
|
|
1090
|
+
if (currentTaskId) break; // became busy
|
|
1091
|
+
// Skip if we already joined this session
|
|
1092
|
+
if (s.node_ids && s.node_ids.includes(NODE_ID)) continue;
|
|
1093
|
+
// Skip if session is full
|
|
1094
|
+
if (s.max_nodes && s.current_nodes >= s.max_nodes) continue;
|
|
1095
|
+
|
|
1096
|
+
// Fetch task to check preferences
|
|
1097
|
+
const task = await natsRequest('mesh.tasks.get', { task_id: s.task_id }, 5000);
|
|
1098
|
+
if (!task || !task.collaboration) continue;
|
|
1099
|
+
if (task.preferred_nodes && task.preferred_nodes.length > 0) {
|
|
1100
|
+
if (!task.preferred_nodes.includes(NODE_ID)) continue;
|
|
1101
|
+
}
|
|
1102
|
+
if (task.exclude_nodes && task.exclude_nodes.length > 0) {
|
|
1103
|
+
if (task.exclude_nodes.includes(NODE_ID)) continue;
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
log(`RECRUIT POLL: Joining collab session ${s.session_id} for task ${s.task_id}`);
|
|
1107
|
+
currentTaskId = task.task_id;
|
|
1108
|
+
await executeCollabTask(task);
|
|
1109
|
+
currentTaskId = null;
|
|
1110
|
+
}
|
|
1111
|
+
} catch { /* silent — recruiting poll is best-effort */ }
|
|
1112
|
+
}
|
|
1113
|
+
|
|
989
1114
|
while (running) {
|
|
990
1115
|
try {
|
|
1116
|
+
// Check for recruiting collab sessions before trying to claim
|
|
1117
|
+
await checkRecruitingSessions();
|
|
1118
|
+
if (currentTaskId) {
|
|
1119
|
+
await new Promise(r => setTimeout(r, POLL_INTERVAL));
|
|
1120
|
+
continue;
|
|
1121
|
+
}
|
|
1122
|
+
|
|
991
1123
|
// Claim next available task (longer timeout — KV operations on remote NATS can be slow)
|
|
992
1124
|
const task = await natsRequest('mesh.tasks.claim', { node_id: NODE_ID }, 60000);
|
|
993
1125
|
|
|
@@ -1035,3 +1167,4 @@ main().catch(err => {
|
|
|
1035
1167
|
console.error(`[mesh-agent] Fatal: ${err.message}`);
|
|
1036
1168
|
process.exit(1);
|
|
1037
1169
|
});
|
|
1170
|
+
// deploy-v7f0130b
|