openclaw-node-harness 2.0.2 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/fleet-deploy.js +1 -1
- package/bin/mesh-agent.js +217 -84
- package/bin/mesh-bridge.js +274 -10
- package/bin/mesh-deploy-listener.js +120 -98
- package/bin/mesh-deploy.js +11 -3
- package/bin/mesh-health-publisher.js +1 -1
- package/bin/mesh-task-daemon.js +190 -15
- package/bin/mesh.js +170 -22
- package/bin/openclaw-node-init.js +147 -3
- package/install.sh +7 -0
- package/lib/kanban-io.js +50 -10
- package/lib/mesh-collab.js +53 -3
- package/lib/mesh-registry.js +11 -2
- package/lib/mesh-tasks.js +6 -7
- package/package.json +1 -1
|
@@ -460,14 +460,60 @@ function installLaunchdService(meshDir, nodeBin, nodeId, provider, natsUrl) {
|
|
|
460
460
|
return;
|
|
461
461
|
}
|
|
462
462
|
|
|
463
|
+
// Deploy listener plist
|
|
464
|
+
const deployPlistPath = path.join(plistDir, 'ai.openclaw.deploy-listener.plist');
|
|
465
|
+
const deployPlist = `<?xml version="1.0" encoding="UTF-8"?>
|
|
466
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
467
|
+
<plist version="1.0">
|
|
468
|
+
<dict>
|
|
469
|
+
<key>Label</key>
|
|
470
|
+
<string>ai.openclaw.deploy-listener</string>
|
|
471
|
+
<key>ProgramArguments</key>
|
|
472
|
+
<array>
|
|
473
|
+
<string>${nodeBin}</string>
|
|
474
|
+
<string>${meshDir}/bin/mesh-deploy-listener.js</string>
|
|
475
|
+
</array>
|
|
476
|
+
<key>KeepAlive</key>
|
|
477
|
+
<true/>
|
|
478
|
+
<key>RunAtLoad</key>
|
|
479
|
+
<true/>
|
|
480
|
+
<key>StandardOutPath</key>
|
|
481
|
+
<string>${os.homedir()}/.openclaw/workspace/.tmp/mesh-deploy-listener.log</string>
|
|
482
|
+
<key>StandardErrorPath</key>
|
|
483
|
+
<string>${os.homedir()}/.openclaw/workspace/.tmp/mesh-deploy-listener.err</string>
|
|
484
|
+
<key>EnvironmentVariables</key>
|
|
485
|
+
<dict>
|
|
486
|
+
<key>OPENCLAW_NATS</key>
|
|
487
|
+
<string>${natsUrl}</string>
|
|
488
|
+
<key>OPENCLAW_NODE_ID</key>
|
|
489
|
+
<string>${nodeId}</string>
|
|
490
|
+
<key>OPENCLAW_NODE_ROLE</key>
|
|
491
|
+
<string>worker</string>
|
|
492
|
+
<key>OPENCLAW_REPO_DIR</key>
|
|
493
|
+
<string>${meshDir}</string>
|
|
494
|
+
<key>PATH</key>
|
|
495
|
+
<string>/usr/local/bin:/usr/bin:/bin:/opt/homebrew/bin:${os.homedir()}/.npm-global/bin</string>
|
|
496
|
+
<key>NODE_PATH</key>
|
|
497
|
+
<string>${meshDir}/node_modules:${meshDir}/lib</string>
|
|
498
|
+
</dict>
|
|
499
|
+
<key>ThrottleInterval</key>
|
|
500
|
+
<integer>30</integer>
|
|
501
|
+
</dict>
|
|
502
|
+
</plist>`;
|
|
503
|
+
|
|
463
504
|
fs.mkdirSync(plistDir, { recursive: true });
|
|
464
505
|
fs.writeFileSync(plistPath, plist);
|
|
465
|
-
ok(`
|
|
506
|
+
ok(`Mesh agent service written: ${plistPath}`);
|
|
507
|
+
fs.writeFileSync(deployPlistPath, deployPlist);
|
|
508
|
+
ok(`Deploy listener service written: ${deployPlistPath}`);
|
|
466
509
|
|
|
467
510
|
try {
|
|
468
511
|
execSync(`launchctl unload "${plistPath}" 2>/dev/null || true`, { stdio: 'pipe' });
|
|
469
512
|
execSync(`launchctl load "${plistPath}"`, { stdio: 'pipe' });
|
|
470
|
-
ok('
|
|
513
|
+
ok('Mesh agent loaded and started');
|
|
514
|
+
execSync(`launchctl unload "${deployPlistPath}" 2>/dev/null || true`, { stdio: 'pipe' });
|
|
515
|
+
execSync(`launchctl load "${deployPlistPath}"`, { stdio: 'pipe' });
|
|
516
|
+
ok('Deploy listener loaded and started');
|
|
471
517
|
} catch (e) {
|
|
472
518
|
warn(`Service load warning: ${e.message}`);
|
|
473
519
|
}
|
|
@@ -508,11 +554,40 @@ WantedBy=default.target
|
|
|
508
554
|
fs.writeFileSync(servicePath, service);
|
|
509
555
|
ok(`Systemd service written: ${servicePath}`);
|
|
510
556
|
|
|
557
|
+
// Deploy listener service
|
|
558
|
+
const deployServicePath = path.join(serviceDir, 'openclaw-deploy-listener.service');
|
|
559
|
+
const deployService = `[Unit]
|
|
560
|
+
Description=OpenClaw Deploy Listener
|
|
561
|
+
After=network-online.target
|
|
562
|
+
Wants=network-online.target
|
|
563
|
+
|
|
564
|
+
[Service]
|
|
565
|
+
Type=simple
|
|
566
|
+
ExecStart=${nodeBin} ${meshDir}/bin/mesh-deploy-listener.js
|
|
567
|
+
Restart=always
|
|
568
|
+
RestartSec=30
|
|
569
|
+
Environment=OPENCLAW_NATS=${natsUrl}
|
|
570
|
+
Environment=OPENCLAW_NODE_ID=${nodeId}
|
|
571
|
+
Environment=OPENCLAW_NODE_ROLE=worker
|
|
572
|
+
Environment=OPENCLAW_REPO_DIR=${meshDir}
|
|
573
|
+
Environment=NODE_PATH=${meshDir}/node_modules:${meshDir}/lib
|
|
574
|
+
Environment=PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:${os.homedir()}/.local/bin:${os.homedir()}/.npm-global/bin
|
|
575
|
+
WorkingDirectory=${meshDir}
|
|
576
|
+
|
|
577
|
+
[Install]
|
|
578
|
+
WantedBy=default.target
|
|
579
|
+
`;
|
|
580
|
+
fs.writeFileSync(deployServicePath, deployService);
|
|
581
|
+
ok(`Deploy listener service written: ${deployServicePath}`);
|
|
582
|
+
|
|
511
583
|
try {
|
|
512
584
|
execSync('systemctl --user daemon-reload', { stdio: 'pipe' });
|
|
513
585
|
execSync('systemctl --user enable openclaw-mesh-agent', { stdio: 'pipe' });
|
|
514
586
|
execSync('systemctl --user start openclaw-mesh-agent', { stdio: 'pipe' });
|
|
515
|
-
ok('
|
|
587
|
+
ok('Mesh agent enabled and started');
|
|
588
|
+
execSync('systemctl --user enable openclaw-deploy-listener', { stdio: 'pipe' });
|
|
589
|
+
execSync('systemctl --user start openclaw-deploy-listener', { stdio: 'pipe' });
|
|
590
|
+
ok('Deploy listener enabled and started');
|
|
516
591
|
} catch (e) {
|
|
517
592
|
warn(`Service start warning: ${e.message}`);
|
|
518
593
|
warn('Try manually: systemctl --user start openclaw-mesh-agent');
|
|
@@ -630,6 +705,71 @@ async function verifyNatsHealth(natsUrl, nodeId) {
|
|
|
630
705
|
}
|
|
631
706
|
}
|
|
632
707
|
|
|
708
|
+
// ── Mesh Topology Discovery ──────────────────────────
|
|
709
|
+
|
|
710
|
+
async function discoverTopology(natsUrl, localNodeId) {
|
|
711
|
+
log('Discovering mesh topology...');
|
|
712
|
+
|
|
713
|
+
if (DRY_RUN) {
|
|
714
|
+
warn('[DRY RUN] Would query MESH_NODE_HEALTH and write mesh-aliases.json');
|
|
715
|
+
return;
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
try {
|
|
719
|
+
const nats = require('nats');
|
|
720
|
+
const nc = await nats.connect({ servers: natsUrl, timeout: 10000 });
|
|
721
|
+
const sc = nats.StringCodec();
|
|
722
|
+
const js = nc.jetstream();
|
|
723
|
+
|
|
724
|
+
const aliases = {};
|
|
725
|
+
|
|
726
|
+
// Query MESH_NODE_HEALTH for all known nodes
|
|
727
|
+
try {
|
|
728
|
+
const kv = await js.views.kv('MESH_NODE_HEALTH');
|
|
729
|
+
const keys = await kv.keys();
|
|
730
|
+
for await (const key of keys) {
|
|
731
|
+
const entry = await kv.get(key);
|
|
732
|
+
if (entry && entry.value) {
|
|
733
|
+
const health = JSON.parse(sc.decode(entry.value));
|
|
734
|
+
const nodeId = health.nodeId || key;
|
|
735
|
+
// Create short alias from node ID (strip common suffixes)
|
|
736
|
+
const short = nodeId
|
|
737
|
+
.replace(/-virtual-machine.*$/i, '')
|
|
738
|
+
.replace(/-vmware.*$/i, '')
|
|
739
|
+
.replace(/-local$/, '');
|
|
740
|
+
aliases[short] = nodeId;
|
|
741
|
+
if (health.role === 'lead') aliases['lead'] = nodeId;
|
|
742
|
+
ok(`Peer: ${nodeId} (${health.role || 'worker'}, ${health.tailscaleIp || 'unknown'})`);
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
} catch {
|
|
746
|
+
warn('MESH_NODE_HEALTH bucket not available — skipping topology');
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
// Also add self
|
|
750
|
+
const selfShort = localNodeId
|
|
751
|
+
.replace(/-virtual-machine.*$/i, '')
|
|
752
|
+
.replace(/-vmware.*$/i, '')
|
|
753
|
+
.replace(/-local$/, '');
|
|
754
|
+
aliases[selfShort] = localNodeId;
|
|
755
|
+
aliases['self'] = localNodeId;
|
|
756
|
+
|
|
757
|
+
await nc.drain();
|
|
758
|
+
|
|
759
|
+
if (Object.keys(aliases).length > 1) {
|
|
760
|
+
const aliasPath = path.join(os.homedir(), '.openclaw', 'mesh-aliases.json');
|
|
761
|
+
fs.writeFileSync(aliasPath, JSON.stringify(aliases, null, 2) + '\n', { mode: 0o644 });
|
|
762
|
+
ok(`Mesh aliases written: ${aliasPath} (${Object.keys(aliases).length} entries)`);
|
|
763
|
+
} else {
|
|
764
|
+
warn('No peers found in MESH_NODE_HEALTH — mesh-aliases.json will only have self');
|
|
765
|
+
const aliasPath = path.join(os.homedir(), '.openclaw', 'mesh-aliases.json');
|
|
766
|
+
fs.writeFileSync(aliasPath, JSON.stringify(aliases, null, 2) + '\n', { mode: 0o644 });
|
|
767
|
+
}
|
|
768
|
+
} catch (e) {
|
|
769
|
+
warn(`Topology discovery failed: ${e.message} (non-fatal)`);
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
|
|
633
773
|
// ── Main ──────────────────────────────────────────────
|
|
634
774
|
|
|
635
775
|
async function main() {
|
|
@@ -699,6 +839,10 @@ async function main() {
|
|
|
699
839
|
const natsHealthy = await verifyNatsHealth(config.nats, nodeId);
|
|
700
840
|
const healthy = serviceAlive && natsHealthy;
|
|
701
841
|
|
|
842
|
+
// ── Step 9: Discover mesh topology ──
|
|
843
|
+
step(9, 'Discovering mesh topology...');
|
|
844
|
+
await discoverTopology(config.nats, nodeId);
|
|
845
|
+
|
|
702
846
|
// ── Done ──
|
|
703
847
|
console.log(`\n${BOLD}${GREEN}═══════════════════════════════════════${RESET}`);
|
|
704
848
|
if (healthy) {
|
package/install.sh
CHANGED
|
@@ -230,6 +230,10 @@ if [ -z "$NODE_ROLE" ]; then
|
|
|
230
230
|
NODE_ROLE="worker"
|
|
231
231
|
fi
|
|
232
232
|
fi
|
|
233
|
+
if [ "$NODE_ROLE" != "lead" ] && [ "$NODE_ROLE" != "worker" ]; then
|
|
234
|
+
error "Invalid role: $NODE_ROLE (must be 'lead' or 'worker')"
|
|
235
|
+
exit 1
|
|
236
|
+
fi
|
|
233
237
|
export OPENCLAW_NODE_ROLE="$NODE_ROLE"
|
|
234
238
|
info "Node role: $NODE_ROLE"
|
|
235
239
|
|
|
@@ -692,6 +696,9 @@ else
|
|
|
692
696
|
if command -v envsubst >/dev/null 2>&1; then
|
|
693
697
|
envsubst < "$TEMPLATE" > "$DEST"
|
|
694
698
|
else
|
|
699
|
+
# NOTE: sed delimiter is |. If OPENCLAW_NATS_TOKEN ever contains |
|
|
700
|
+
# (unlikely — tokens are hex/base64), this substitution will break.
|
|
701
|
+
# Prefer envsubst (above) when available; it has no delimiter issue.
|
|
695
702
|
sed \
|
|
696
703
|
-e "s|\${HOME}|$HOME|g" \
|
|
697
704
|
-e "s|\${NODE_BIN}|$NODE_BIN|g" \
|
package/lib/kanban-io.js
CHANGED
|
@@ -53,8 +53,14 @@ function withMkdirLock(filePath, fn) {
|
|
|
53
53
|
if (Date.now() - start > maxWait) {
|
|
54
54
|
throw new Error(`kanban-io: lock timeout after ${maxWait}ms on ${filePath}`);
|
|
55
55
|
}
|
|
56
|
-
//
|
|
57
|
-
|
|
56
|
+
// Sleep ~10ms — Atomics.wait is precise but throws on main thread
|
|
57
|
+
// in some Node.js builds; fall back to busy-spin (rare contention path)
|
|
58
|
+
try {
|
|
59
|
+
Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 10);
|
|
60
|
+
} catch {
|
|
61
|
+
const end = Date.now() + 10;
|
|
62
|
+
while (Date.now() < end) { /* busy-wait fallback */ }
|
|
63
|
+
}
|
|
58
64
|
}
|
|
59
65
|
}
|
|
60
66
|
|
|
@@ -81,9 +87,7 @@ function parseTasks(content) {
|
|
|
81
87
|
const liveIdx = content.indexOf('## Live Tasks');
|
|
82
88
|
if (liveIdx === -1) return tasks;
|
|
83
89
|
|
|
84
|
-
const
|
|
85
|
-
const nextSectionIdx = afterLive.indexOf('\n## ', 1); // skip the current ## Live Tasks
|
|
86
|
-
const liveSection = nextSectionIdx >= 0 ? afterLive.slice(0, nextSectionIdx) : afterLive;
|
|
90
|
+
const liveSection = content.slice(liveIdx);
|
|
87
91
|
const lines = liveSection.split('\n');
|
|
88
92
|
|
|
89
93
|
let current = null;
|
|
@@ -110,6 +114,13 @@ function parseTasks(content) {
|
|
|
110
114
|
budget_minutes: current.budget_minutes || 30,
|
|
111
115
|
scope: current.scope || [],
|
|
112
116
|
updated_at: current.updated_at || '',
|
|
117
|
+
// Mesh routing
|
|
118
|
+
llm_provider: current.llm_provider || null,
|
|
119
|
+
llm_model: current.llm_model || null,
|
|
120
|
+
preferred_nodes: current.preferred_nodes || [],
|
|
121
|
+
exclude_nodes: current.exclude_nodes || [],
|
|
122
|
+
collaboration: current.collaboration || null,
|
|
123
|
+
collab_result: current.collab_result || null,
|
|
113
124
|
});
|
|
114
125
|
}
|
|
115
126
|
}
|
|
@@ -119,7 +130,7 @@ function parseTasks(content) {
|
|
|
119
130
|
const taskIdMatch = line.match(/^- task_id:\s*(.+)$/);
|
|
120
131
|
if (taskIdMatch) {
|
|
121
132
|
flush();
|
|
122
|
-
current = { task_id: taskIdMatch[1].trim(), success_criteria: [], artifacts: [], scope: [] };
|
|
133
|
+
current = { task_id: taskIdMatch[1].trim(), success_criteria: [], artifacts: [], scope: [], preferred_nodes: [], exclude_nodes: [] };
|
|
123
134
|
currentArrayKey = null;
|
|
124
135
|
continue;
|
|
125
136
|
}
|
|
@@ -176,6 +187,31 @@ function parseTasks(content) {
|
|
|
176
187
|
current.scope = [];
|
|
177
188
|
currentArrayKey = 'scope';
|
|
178
189
|
break;
|
|
190
|
+
// Mesh routing fields
|
|
191
|
+
case 'llm_provider':
|
|
192
|
+
case 'provider':
|
|
193
|
+
current.llm_provider = value || null; currentArrayKey = null; break;
|
|
194
|
+
case 'llm_model':
|
|
195
|
+
case 'model':
|
|
196
|
+
current.llm_model = value || null; currentArrayKey = null; break;
|
|
197
|
+
case 'preferred_nodes':
|
|
198
|
+
current.preferred_nodes = [];
|
|
199
|
+
currentArrayKey = 'preferred_nodes';
|
|
200
|
+
break;
|
|
201
|
+
case 'exclude_nodes':
|
|
202
|
+
current.exclude_nodes = [];
|
|
203
|
+
currentArrayKey = 'exclude_nodes';
|
|
204
|
+
break;
|
|
205
|
+
case 'collaboration':
|
|
206
|
+
try { current.collaboration = value ? JSON.parse(value) : null; }
|
|
207
|
+
catch { current.collaboration = null; }
|
|
208
|
+
currentArrayKey = null;
|
|
209
|
+
break;
|
|
210
|
+
case 'collab_result':
|
|
211
|
+
try { current.collab_result = value ? JSON.parse(value) : null; }
|
|
212
|
+
catch { current.collab_result = null; }
|
|
213
|
+
currentArrayKey = null;
|
|
214
|
+
break;
|
|
179
215
|
default:
|
|
180
216
|
currentArrayKey = null;
|
|
181
217
|
break;
|
|
@@ -236,7 +272,11 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
|
|
|
236
272
|
const blockLines = lines.slice(blockStart, blockEnd);
|
|
237
273
|
|
|
238
274
|
// Update scalar fields
|
|
239
|
-
for (const [key,
|
|
275
|
+
for (const [key, rawValue] of Object.entries(fieldUpdates)) {
|
|
276
|
+
// Serialize objects/arrays as JSON so the parser can read them back
|
|
277
|
+
const value = (rawValue !== null && typeof rawValue === 'object')
|
|
278
|
+
? JSON.stringify(rawValue)
|
|
279
|
+
: rawValue;
|
|
240
280
|
const fieldRegex = new RegExp(`^ ${key}:\\s*.*$`);
|
|
241
281
|
let found = false;
|
|
242
282
|
for (let i = 1; i < blockLines.length; i++) {
|
|
@@ -249,7 +289,7 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
|
|
|
249
289
|
if (!found) {
|
|
250
290
|
// Insert before updated_at if it exists, otherwise at end of block
|
|
251
291
|
const updatedAtIdx = blockLines.findIndex(l => l.match(/^ updated_at:/));
|
|
252
|
-
const insertIdx = updatedAtIdx
|
|
292
|
+
const insertIdx = updatedAtIdx > 0 ? updatedAtIdx : blockLines.length;
|
|
253
293
|
blockLines.splice(insertIdx, 0, ` ${key}: ${value}`);
|
|
254
294
|
}
|
|
255
295
|
}
|
|
@@ -262,7 +302,7 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
|
|
|
262
302
|
if (headerIdx === -1) {
|
|
263
303
|
// Insert the array before updated_at
|
|
264
304
|
const updatedAtIdx = blockLines.findIndex(l => l.match(/^ updated_at:/));
|
|
265
|
-
const insertIdx = updatedAtIdx
|
|
305
|
+
const insertIdx = updatedAtIdx > 0 ? updatedAtIdx : blockLines.length;
|
|
266
306
|
const newLines = [` ${key}:`];
|
|
267
307
|
for (const item of items) {
|
|
268
308
|
newLines.push(` - ${item}`);
|
|
@@ -290,7 +330,7 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
|
|
|
290
330
|
if (headerIdx === -1) {
|
|
291
331
|
// Insert the array before updated_at
|
|
292
332
|
const updatedAtIdx = blockLines.findIndex(l => l.match(/^ updated_at:/));
|
|
293
|
-
const insertIdx = updatedAtIdx
|
|
333
|
+
const insertIdx = updatedAtIdx > 0 ? updatedAtIdx : blockLines.length;
|
|
294
334
|
const newLines = [` ${key}:`];
|
|
295
335
|
for (const item of items) {
|
|
296
336
|
newLines.push(` - ${item}`);
|
package/lib/mesh-collab.js
CHANGED
|
@@ -105,6 +105,10 @@ function createSession(taskId, collabSpec) {
|
|
|
105
105
|
|
|
106
106
|
// ── CollabStore (KV-backed) ─────────────────────────
|
|
107
107
|
|
|
108
|
+
// Rate-limit audit error logs: max 3 per session, then go silent
|
|
109
|
+
const _auditErrorCounts = new Map();
|
|
110
|
+
const AUDIT_ERROR_LOG_LIMIT = 3;
|
|
111
|
+
|
|
108
112
|
class CollabStore {
|
|
109
113
|
constructor(kv) {
|
|
110
114
|
this.kv = kv;
|
|
@@ -139,7 +143,14 @@ class CollabStore {
|
|
|
139
143
|
...detail,
|
|
140
144
|
});
|
|
141
145
|
await this.put(session);
|
|
142
|
-
} catch {
|
|
146
|
+
} catch (err) {
|
|
147
|
+
// Best-effort — never block on audit, but log first N failures per session
|
|
148
|
+
const count = (_auditErrorCounts.get(sessionId) || 0) + 1;
|
|
149
|
+
_auditErrorCounts.set(sessionId, count);
|
|
150
|
+
if (count <= AUDIT_ERROR_LOG_LIMIT) {
|
|
151
|
+
console.error(`[collab] audit append failed for ${sessionId}/${event}: ${err.message}${count === AUDIT_ERROR_LOG_LIMIT ? ' (suppressing further audit errors for this session)' : ''}`);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
143
154
|
}
|
|
144
155
|
|
|
145
156
|
/**
|
|
@@ -176,6 +187,30 @@ class CollabStore {
|
|
|
176
187
|
return sessions[0] || null;
|
|
177
188
|
}
|
|
178
189
|
|
|
190
|
+
/**
|
|
191
|
+
* Find active sessions that contain a given node.
|
|
192
|
+
* O(sessions) single pass — avoids the O(sessions × nodes) scan
|
|
193
|
+
* that detectStalls() previously used with list() + inner find().
|
|
194
|
+
*/
|
|
195
|
+
async findActiveSessionsByNode(nodeId) {
|
|
196
|
+
const results = [];
|
|
197
|
+
const allKeys = [];
|
|
198
|
+
const keys = await this.kv.keys();
|
|
199
|
+
for await (const key of keys) {
|
|
200
|
+
allKeys.push(key);
|
|
201
|
+
}
|
|
202
|
+
for (const key of allKeys) {
|
|
203
|
+
const entry = await this.kv.get(key);
|
|
204
|
+
if (!entry || !entry.value) continue;
|
|
205
|
+
const session = JSON.parse(sc.decode(entry.value));
|
|
206
|
+
if (session.status !== COLLAB_STATUS.ACTIVE) continue;
|
|
207
|
+
if (session.nodes.some(n => n.node_id === nodeId)) {
|
|
208
|
+
results.push(session);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return results;
|
|
212
|
+
}
|
|
213
|
+
|
|
179
214
|
// ── Node Management ────────────────────────────────
|
|
180
215
|
|
|
181
216
|
/**
|
|
@@ -190,7 +225,8 @@ class CollabStore {
|
|
|
190
225
|
// Check max_nodes
|
|
191
226
|
if (session.max_nodes && session.nodes.length >= session.max_nodes) return null;
|
|
192
227
|
|
|
193
|
-
// Check duplicate
|
|
228
|
+
// Check duplicate — single-threaded event loop prevents concurrent joins
|
|
229
|
+
// from interleaving between find() and push(). No mutex needed.
|
|
194
230
|
if (session.nodes.find(n => n.node_id === nodeId)) return null;
|
|
195
231
|
|
|
196
232
|
session.nodes.push({
|
|
@@ -320,6 +356,9 @@ class CollabStore {
|
|
|
320
356
|
const session = await this.get(sessionId);
|
|
321
357
|
if (!session) return null;
|
|
322
358
|
|
|
359
|
+
// Only accept reflections on active sessions
|
|
360
|
+
if (session.status !== COLLAB_STATUS.ACTIVE) return null;
|
|
361
|
+
|
|
323
362
|
const currentRound = session.rounds[session.rounds.length - 1];
|
|
324
363
|
if (!currentRound) return null;
|
|
325
364
|
|
|
@@ -506,11 +545,14 @@ class CollabStore {
|
|
|
506
545
|
}
|
|
507
546
|
|
|
508
547
|
/**
|
|
509
|
-
* Mark session as aborted.
|
|
548
|
+
* Mark session as aborted. Returns null (no-op) if already completed or aborted.
|
|
549
|
+
* Callers can use truthiness to detect whether the abort actually happened.
|
|
510
550
|
*/
|
|
511
551
|
async markAborted(sessionId, reason) {
|
|
512
552
|
const session = await this.get(sessionId);
|
|
513
553
|
if (!session) return null;
|
|
554
|
+
// Guard: don't corrupt completed/aborted sessions
|
|
555
|
+
if (['completed', 'aborted'].includes(session.status)) return null;
|
|
514
556
|
session.status = COLLAB_STATUS.ABORTED;
|
|
515
557
|
session.completed_at = new Date().toISOString();
|
|
516
558
|
session.result = { success: false, summary: reason, aborted: true };
|
|
@@ -518,6 +560,14 @@ class CollabStore {
|
|
|
518
560
|
return session;
|
|
519
561
|
}
|
|
520
562
|
|
|
563
|
+
/**
|
|
564
|
+
* Clear the audit error rate-limit counter for a session.
|
|
565
|
+
* Call when a session is finalized (completed/aborted) to prevent Map leak.
|
|
566
|
+
*/
|
|
567
|
+
clearAuditErrorCount(sessionId) {
|
|
568
|
+
_auditErrorCounts.delete(sessionId);
|
|
569
|
+
}
|
|
570
|
+
|
|
521
571
|
/**
|
|
522
572
|
* Get a summary of the session for reporting.
|
|
523
573
|
*/
|
package/lib/mesh-registry.js
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* mesh-registry.js — NATS KV tool registry for OpenClaw mesh.
|
|
3
3
|
*
|
|
4
|
+
* STATUS: UNUSED — fully implemented but no callers exist yet. Kept for
|
|
5
|
+
* future tool-mesh integration. Review before adopting; remove if still
|
|
6
|
+
* uncalled by next major release.
|
|
7
|
+
*
|
|
4
8
|
* Shared library for:
|
|
5
9
|
* - Registering tools in MESH_TOOLS KV bucket
|
|
6
10
|
* - Heartbeat refresh (keeps tools alive via TTL)
|
|
@@ -36,7 +40,9 @@ class MeshRegistry {
|
|
|
36
40
|
|
|
37
41
|
async init() {
|
|
38
42
|
const js = this.nc.jetstream();
|
|
39
|
-
|
|
43
|
+
// TTL: entries auto-expire after 120s if not refreshed by heartbeat (60s interval).
|
|
44
|
+
// Prevents stale entries from crashed services that never called shutdown().
|
|
45
|
+
this.kv = await js.views.kv(KV_BUCKET, { ttl: 120_000 });
|
|
40
46
|
return this;
|
|
41
47
|
}
|
|
42
48
|
|
|
@@ -111,7 +117,10 @@ class MeshRegistry {
|
|
|
111
117
|
for (const [toolName, manifest] of this.manifests) {
|
|
112
118
|
const kvKey = `${this.nodeId}.${toolName}`;
|
|
113
119
|
try {
|
|
114
|
-
await this.kv.put(kvKey, sc.encode(JSON.stringify(
|
|
120
|
+
await this.kv.put(kvKey, sc.encode(JSON.stringify({
|
|
121
|
+
...manifest,
|
|
122
|
+
last_heartbeat: new Date().toISOString(),
|
|
123
|
+
})));
|
|
115
124
|
} catch (err) {
|
|
116
125
|
console.error(`[mesh-registry] heartbeat failed for ${kvKey}: ${err.message}`);
|
|
117
126
|
}
|
package/lib/mesh-tasks.js
CHANGED
|
@@ -140,15 +140,15 @@ class TaskStore {
|
|
|
140
140
|
// Apply filters
|
|
141
141
|
if (filter.status && task.status !== filter.status) continue;
|
|
142
142
|
if (filter.owner && task.owner !== filter.owner) continue;
|
|
143
|
-
if (filter.tag && !task.tags.includes(filter.tag)) continue;
|
|
143
|
+
if (filter.tag && (!task.tags || !task.tags.includes(filter.tag))) continue;
|
|
144
144
|
|
|
145
145
|
tasks.push(task);
|
|
146
146
|
}
|
|
147
147
|
|
|
148
148
|
// Sort by priority (higher first), then created_at (older first)
|
|
149
149
|
tasks.sort((a, b) => {
|
|
150
|
-
if (b.priority !== a.priority) return b.priority - a.priority;
|
|
151
|
-
return new Date(a.created_at) - new Date(b.created_at);
|
|
150
|
+
if ((b.priority || 0) !== (a.priority || 0)) return (b.priority || 0) - (a.priority || 0);
|
|
151
|
+
return (new Date(a.created_at || 0)) - (new Date(b.created_at || 0));
|
|
152
152
|
});
|
|
153
153
|
|
|
154
154
|
return tasks;
|
|
@@ -169,7 +169,7 @@ class TaskStore {
|
|
|
169
169
|
if (task.exclude_nodes && task.exclude_nodes.includes(nodeId)) continue;
|
|
170
170
|
|
|
171
171
|
// Respect dependencies
|
|
172
|
-
if (task.depends_on.length > 0) {
|
|
172
|
+
if (task.depends_on && task.depends_on.length > 0) {
|
|
173
173
|
const depsReady = await this._checkDeps(task.depends_on);
|
|
174
174
|
if (!depsReady) continue;
|
|
175
175
|
}
|
|
@@ -192,9 +192,8 @@ class TaskStore {
|
|
|
192
192
|
task.status = TASK_STATUS.CLAIMED;
|
|
193
193
|
task.owner = nodeId;
|
|
194
194
|
task.claimed_at = new Date().toISOString();
|
|
195
|
-
task.
|
|
196
|
-
|
|
197
|
-
).toISOString();
|
|
195
|
+
const budgetMs = (task.budget_minutes || 30) * 60 * 1000;
|
|
196
|
+
task.budget_deadline = new Date(Date.now() + budgetMs).toISOString();
|
|
198
197
|
|
|
199
198
|
await this.put(task);
|
|
200
199
|
return task;
|
package/package.json
CHANGED