openclaw-node-harness 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/bin/lane-watchdog.js +54 -23
  2. package/bin/mesh-agent.js +49 -18
  3. package/bin/mesh-bridge.js +3 -2
  4. package/bin/mesh-deploy.js +4 -0
  5. package/bin/mesh-health-publisher.js +41 -1
  6. package/bin/mesh-task-daemon.js +14 -4
  7. package/bin/mesh.js +17 -43
  8. package/install.sh +3 -2
  9. package/lib/agent-activity.js +2 -2
  10. package/lib/exec-safety.js +163 -0
  11. package/lib/kanban-io.js +20 -33
  12. package/lib/llm-providers.js +27 -0
  13. package/lib/mcp-knowledge/core.mjs +7 -5
  14. package/lib/mcp-knowledge/server.mjs +8 -1
  15. package/lib/mesh-collab.js +274 -250
  16. package/lib/mesh-harness.js +6 -0
  17. package/lib/mesh-plans.js +84 -45
  18. package/lib/mesh-tasks.js +113 -81
  19. package/lib/nats-resolve.js +4 -4
  20. package/lib/pre-compression-flush.mjs +2 -0
  21. package/lib/session-store.mjs +6 -3
  22. package/mission-control/package-lock.json +4188 -3698
  23. package/mission-control/package.json +2 -2
  24. package/mission-control/src/app/api/diagnostics/route.ts +8 -0
  25. package/mission-control/src/app/api/diagnostics/test-runner/route.ts +8 -0
  26. package/mission-control/src/app/api/memory/graph/route.ts +34 -18
  27. package/mission-control/src/app/api/memory/search/route.ts +9 -5
  28. package/mission-control/src/app/api/mesh/identity/route.ts +13 -5
  29. package/mission-control/src/app/api/mesh/nodes/route.ts +8 -0
  30. package/mission-control/src/app/api/settings/gateway/route.ts +62 -0
  31. package/mission-control/src/app/api/souls/[id]/evolution/route.ts +49 -12
  32. package/mission-control/src/app/api/souls/[id]/prompt/route.ts +7 -1
  33. package/mission-control/src/app/api/souls/[id]/propagate/route.ts +24 -5
  34. package/mission-control/src/app/api/souls/route.ts +6 -4
  35. package/mission-control/src/app/api/tasks/[id]/handoff/route.ts +7 -1
  36. package/mission-control/src/app/api/tasks/[id]/route.ts +20 -4
  37. package/mission-control/src/app/api/tasks/route.ts +68 -9
  38. package/mission-control/src/app/api/workspace/read/route.ts +11 -0
  39. package/mission-control/src/lib/config.ts +11 -2
  40. package/mission-control/src/lib/db/index.ts +16 -1
  41. package/mission-control/src/lib/memory/extract.ts +2 -1
  42. package/mission-control/src/lib/memory/retrieval.ts +3 -2
  43. package/mission-control/src/lib/sync/tasks.ts +4 -1
  44. package/mission-control/src/middleware.ts +82 -0
  45. package/package.json +1 -1
  46. package/services/launchd/ai.openclaw.lane-watchdog.plist +1 -1
  47. package/services/launchd/ai.openclaw.log-rotate.plist +11 -0
  48. package/services/launchd/ai.openclaw.mesh-agent.plist +4 -0
  49. package/services/launchd/ai.openclaw.mesh-deploy-listener.plist +4 -0
  50. package/services/launchd/ai.openclaw.mesh-health-publisher.plist +4 -0
  51. package/services/launchd/ai.openclaw.mission-control.plist +5 -4
  52. package/uninstall.sh +37 -9
@@ -37,6 +37,11 @@ let lastInterventionAt = 0;
37
37
  let logWatcher = null;
38
38
  let errWatcher = null;
39
39
 
40
+ // Incident log dedup: suppress identical messages within 60s
41
+ let lastIncidentMsg = '';
42
+ let lastIncidentAt = 0;
43
+ let suppressedCount = 0;
44
+
40
45
  // Track detected events
41
46
  const events = {
42
47
  agentTimeout: null, // timestamp of last "embedded run timeout"
@@ -45,6 +50,22 @@ const events = {
45
50
 
46
51
  // --- Helpers ---
47
52
  function log(msg) {
53
+ const now = Date.now();
54
+ // Dedup: suppress identical messages within 60s
55
+ if (msg === lastIncidentMsg && (now - lastIncidentAt) < 60_000) {
56
+ suppressedCount++;
57
+ return;
58
+ }
59
+ // If we suppressed duplicates, emit a summary before the new message
60
+ if (suppressedCount > 0) {
61
+ const summaryLine = `${new Date().toISOString()} [lane-watchdog] (suppressed ${suppressedCount} duplicate message(s))`;
62
+ console.log(summaryLine);
63
+ try { fs.appendFileSync(INCIDENT_LOG, summaryLine + '\n'); } catch { /* best effort */ }
64
+ }
65
+ lastIncidentMsg = msg;
66
+ lastIncidentAt = now;
67
+ suppressedCount = 0;
68
+
48
69
  const ts = new Date().toISOString();
49
70
  const line = `${ts} [lane-watchdog] ${msg}`;
50
71
  console.log(line);
@@ -172,29 +193,39 @@ function tailLog(filePath, label) {
172
193
 
173
194
  const watcher = fs.watch(filePath, { persistent: true }, () => {
174
195
  try {
175
- const stat = fs.statSync(filePath);
176
- if (stat.size < fileSize) {
177
- // Log was rotated
196
+ // Read from current fileSize to EOF — avoid TOCTOU race by not
197
+ // pre-checking stat.size. createReadStream with just `start` reads
198
+ // to the end of the file atomically, then we update fileSize from
199
+ // the bytes actually read.
200
+ const stream = fs.createReadStream(filePath, {
201
+ start: fileSize,
202
+ encoding: 'utf8'
203
+ });
204
+ let buffer = '';
205
+ let bytesRead = 0;
206
+ stream.on('data', chunk => { buffer += chunk; bytesRead += Buffer.byteLength(chunk, 'utf8'); });
207
+ stream.on('end', () => {
208
+ if (bytesRead === 0) return; // no new data
209
+ const lines = buffer.split('\n').filter(Boolean);
210
+ for (const line of lines) {
211
+ parseLine(line);
212
+ }
213
+ fileSize += bytesRead;
214
+ });
215
+ stream.on('error', (err) => {
216
+ if (err.code === 'ENOENT') {
217
+ // File was deleted/rotated — reset position
218
+ fileSize = 0;
219
+ } else {
220
+ log(`ERROR: reading ${label}: ${err.message}`);
221
+ }
222
+ });
223
+ } catch (err) {
224
+ if (err.code === 'ENOENT') {
178
225
  fileSize = 0;
226
+ } else {
227
+ log(`ERROR: reading ${label}: ${err.message}`);
179
228
  }
180
- if (stat.size > fileSize) {
181
- const stream = fs.createReadStream(filePath, {
182
- start: fileSize,
183
- end: stat.size,
184
- encoding: 'utf8'
185
- });
186
- let buffer = '';
187
- stream.on('data', chunk => { buffer += chunk; });
188
- stream.on('end', () => {
189
- const lines = buffer.split('\n').filter(Boolean);
190
- for (const line of lines) {
191
- parseLine(line);
192
- }
193
- fileSize = stat.size;
194
- });
195
- }
196
- } catch (err) {
197
- log(`ERROR: reading ${label}: ${err.message}`);
198
229
  }
199
230
  });
200
231
 
@@ -220,8 +251,8 @@ function main() {
220
251
  for (const sig of ['SIGTERM', 'SIGINT']) {
221
252
  process.on(sig, () => {
222
253
  log(`Received ${sig}, shutting down`);
223
- if (logWatcher) fs.unwatchFile(GATEWAY_LOG);
224
- if (errWatcher) fs.unwatchFile(GATEWAY_ERR_LOG);
254
+ if (logWatcher) logWatcher.close();
255
+ if (errWatcher) errWatcher.close();
225
256
  process.exit(0);
226
257
  });
227
258
  }
package/bin/mesh-agent.js CHANGED
@@ -36,7 +36,7 @@
36
36
  */
37
37
 
38
38
  const { connect, StringCodec } = require('nats');
39
- const { spawn, execSync } = require('child_process');
39
+ const { spawn, execSync, execFileSync } = require('child_process');
40
40
  const os = require('os');
41
41
  const path = require('path');
42
42
  const fs = require('fs');
@@ -46,7 +46,7 @@ const { loadHarnessRules, runMeshHarness, runPostCommitValidation, formatHarness
46
46
  const { findRole, formatRoleForPrompt } = require('../lib/role-loader');
47
47
 
48
48
  const sc = StringCodec();
49
- const { NATS_URL } = require('../lib/nats-resolve');
49
+ const { NATS_URL, natsConnectOpts } = require('../lib/nats-resolve');
50
50
  const { resolveProvider, resolveModel } = require('../lib/llm-providers');
51
51
  const NODE_ID = process.env.MESH_NODE_ID || os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
52
52
  const POLL_INTERVAL = parseInt(process.env.MESH_POLL_INTERVAL || '15000'); // 15s between polls
@@ -198,8 +198,9 @@ function buildInitialPrompt(task) {
198
198
  }
199
199
 
200
200
  if (task.metric) {
201
+ const safeMetric = isAllowedMetric(task.metric) ? task.metric : '[metric command filtered for security]';
201
202
  parts.push(`## Verification`);
202
- parts.push(`Run this command to check your work: \`${task.metric}\``);
203
+ parts.push(`Run this command to check your work: \`${safeMetric}\``);
203
204
  parts.push(`Your changes are only accepted if this command exits with code 0.`);
204
205
  parts.push('');
205
206
  }
@@ -224,7 +225,8 @@ function buildInitialPrompt(task) {
224
225
  parts.push('- Make minimal, focused changes. Do not add scope beyond what is asked.');
225
226
  parts.push('- If you hit a blocker you cannot resolve, explain what is blocking you clearly.');
226
227
  if (task.metric) {
227
- parts.push(`- After making changes, run \`${task.metric}\` to verify.`);
228
+ const safeMetric = isAllowedMetric(task.metric) ? task.metric : '[metric command filtered for security]';
229
+ parts.push(`- After making changes, run \`${safeMetric}\` to verify.`);
228
230
  parts.push('- If verification fails, analyze the failure and iterate on your approach.');
229
231
  }
230
232
 
@@ -264,8 +266,9 @@ function buildRetryPrompt(task, previousAttempts, attemptNumber) {
264
266
  }
265
267
 
266
268
  if (task.metric) {
269
+ const safeMetric = isAllowedMetric(task.metric) ? task.metric : '[metric command filtered for security]';
267
270
  parts.push(`## Verification`);
268
- parts.push(`Run: \`${task.metric}\``);
271
+ parts.push(`Run: \`${safeMetric}\``);
269
272
  parts.push(`Must exit code 0.`);
270
273
  parts.push('');
271
274
  }
@@ -289,7 +292,8 @@ function buildRetryPrompt(task, previousAttempts, attemptNumber) {
289
292
  parts.push('- Read the relevant files before making changes.');
290
293
  parts.push('- Make minimal, focused changes.');
291
294
  if (task.metric) {
292
- parts.push(`- Run \`${task.metric}\` to verify before finishing.`);
295
+ const safeMetric = isAllowedMetric(task.metric) ? task.metric : '[metric command filtered for security]';
296
+ parts.push(`- Run \`${safeMetric}\` to verify before finishing.`);
293
297
  }
294
298
 
295
299
  return parts.join('\n');
@@ -305,6 +309,9 @@ const WORKTREE_BASE = process.env.MESH_WORKTREE_BASE || path.join(process.env.HO
305
309
  * On failure, returns null (falls back to shared workspace).
306
310
  */
307
311
  function createWorktree(taskId) {
312
+ if (!/^[\w][\w.-]{0,127}$/.test(taskId)) {
313
+ throw new Error(`Invalid taskId: contains unsafe characters`);
314
+ }
308
315
  const worktreePath = path.join(WORKTREE_BASE, taskId);
309
316
  const branch = `mesh/${taskId}`;
310
317
 
@@ -315,19 +322,19 @@ function createWorktree(taskId) {
315
322
  if (fs.existsSync(worktreePath)) {
316
323
  log(`Cleaning stale worktree: ${worktreePath}`);
317
324
  try {
318
- execSync(`git worktree remove --force "${worktreePath}"`, { cwd: WORKSPACE, timeout: 10000 });
325
+ execFileSync('git', ['worktree', 'remove', '--force', worktreePath], { cwd: WORKSPACE, timeout: 10000 });
319
326
  } catch {
320
327
  // If git worktree remove fails, manually clean up
321
328
  fs.rmSync(worktreePath, { recursive: true, force: true });
322
329
  }
323
330
  // Also clean up the branch if it exists
324
331
  try {
325
- execSync(`git branch -D "${branch}"`, { cwd: WORKSPACE, timeout: 5000, stdio: 'ignore' });
332
+ execFileSync('git', ['branch', '-D', branch], { cwd: WORKSPACE, timeout: 5000, stdio: 'ignore' });
326
333
  } catch { /* branch may not exist */ }
327
334
  }
328
335
 
329
336
  // Create new worktree branched off HEAD
330
- execSync(`git worktree add -b "${branch}" "${worktreePath}" HEAD`, {
337
+ execFileSync('git', ['worktree', 'add', '-b', branch, worktreePath, 'HEAD'], {
331
338
  cwd: WORKSPACE,
332
339
  timeout: 30000,
333
340
  stdio: 'pipe',
@@ -375,7 +382,7 @@ function commitAndMergeWorktree(worktreePath, taskId, summary) {
375
382
  log(`WARNING: commit message doesn't follow conventional format: "${commitMsg}"`);
376
383
  }
377
384
 
378
- execSync(`git commit -m "${commitMsg.replace(/"/g, '\\"')}"`, {
385
+ execFileSync('git', ['commit', '-m', commitMsg], {
379
386
  cwd: worktreePath, timeout: 10000, stdio: 'pipe',
380
387
  });
381
388
 
@@ -391,7 +398,7 @@ function commitAndMergeWorktree(worktreePath, taskId, summary) {
391
398
  const mergeMsg = `Merge ${branch}: ${taskId}`;
392
399
  for (let attempt = 0; attempt < 2; attempt++) {
393
400
  try {
394
- execSync(`git merge --no-ff "${branch}" -m "${mergeMsg.replace(/"/g, '\\"')}"`, {
401
+ execFileSync('git', ['merge', '--no-ff', branch, '-m', mergeMsg], {
395
402
  cwd: WORKSPACE, timeout: 30000, stdio: 'pipe',
396
403
  });
397
404
  log(`Merged ${branch} into main${attempt > 0 ? ' (retry succeeded)' : ''}`);
@@ -429,13 +436,13 @@ function cleanupWorktree(worktreePath, keep = false) {
429
436
  const branch = `mesh/${taskId}`;
430
437
 
431
438
  try {
432
- execSync(`git worktree remove --force "${worktreePath}"`, {
439
+ execFileSync('git', ['worktree', 'remove', '--force', worktreePath], {
433
440
  cwd: WORKSPACE,
434
441
  timeout: 10000,
435
442
  stdio: 'pipe',
436
443
  });
437
444
  if (!keep) {
438
- execSync(`git branch -D "${branch}"`, {
445
+ execFileSync('git', ['branch', '-D', branch], {
439
446
  cwd: WORKSPACE,
440
447
  timeout: 5000,
441
448
  stdio: 'ignore',
@@ -507,9 +514,10 @@ function runLLM(prompt, task, worktreePath) {
507
514
 
508
515
  let stdout = '';
509
516
  let stderr = '';
517
+ const MAX_OUTPUT = 1024 * 1024; // 1MB cap
510
518
 
511
- child.stdout.on('data', (d) => { stdout += d.toString(); });
512
- child.stderr.on('data', (d) => { stderr += d.toString(); });
519
+ child.stdout.on('data', (d) => { if (stdout.length < MAX_OUTPUT) stdout += d.toString().slice(0, MAX_OUTPUT - stdout.length); });
520
+ child.stderr.on('data', (d) => { if (stderr.length < MAX_OUTPUT) stderr += d.toString().slice(0, MAX_OUTPUT - stderr.length); });
513
521
 
514
522
  child.on('close', (code) => {
515
523
  clearInterval(heartbeatTimer);
@@ -525,10 +533,25 @@ function runLLM(prompt, task, worktreePath) {
525
533
 
526
534
  // ── Metric Evaluation ─────────────────────────────────
527
535
 
536
+ const ALLOWED_METRIC_PREFIXES = [
537
+ 'npm test', 'npm run', 'node ', 'pytest', 'cargo test',
538
+ 'go test', 'make test', 'jest', 'vitest', 'mocha',
539
+ ];
540
+
541
+ function isAllowedMetric(cmd) {
542
+ if (/[\n\r\0;`]|\$\(|\|\||&&|<\(|>\(|<<|>>|>\s|\|/.test(cmd)) return false;
543
+ if (/\bnode\s+(-e\b|--eval\b|-p\b|--print\b|-r\b|--require\b|--import\b)/.test(cmd)) return false;
544
+ return ALLOWED_METRIC_PREFIXES.some(prefix => cmd.startsWith(prefix));
545
+ }
546
+
528
547
  /**
529
548
  * Run the task's metric command. Returns { passed, output }.
530
549
  */
531
550
  function evaluateMetric(metric, cwd) {
551
+ if (!isAllowedMetric(metric)) {
552
+ log(`WARNING: Metric command blocked by security filter: ${metric}`);
553
+ return Promise.resolve({ passed: false, output: 'Metric command blocked by security filter' });
554
+ }
532
555
  return new Promise((resolve) => {
533
556
  const child = spawn('bash', ['-c', metric], {
534
557
  cwd: cwd || WORKSPACE,
@@ -1011,6 +1034,9 @@ async function executeCollabTask(task) {
1011
1034
  // Create worktree for isolation
1012
1035
  const worktreePath = createWorktree(`${task.task_id}-${NODE_ID}`);
1013
1036
  const taskDir = worktreePath || WORKSPACE;
1037
+ if (!worktreePath) {
1038
+ log(`WARNING: Collab task ${task.task_id} running in shared workspace — isolation not achieved`);
1039
+ }
1014
1040
 
1015
1041
  // Periodic session heartbeat — detects abort/completion while waiting for rounds
1016
1042
  const sessionHeartbeat = setInterval(async () => {
@@ -1169,9 +1195,13 @@ async function executeTask(task) {
1169
1195
  // Create isolated worktree for this task (falls back to shared workspace on failure)
1170
1196
  const worktreePath = createWorktree(task.task_id);
1171
1197
  const taskDir = worktreePath || WORKSPACE;
1198
+ const workspaceIsolated = !!worktreePath;
1199
+ if (!workspaceIsolated) {
1200
+ log(`WARNING: Task ${task.task_id} running in shared workspace — isolation not achieved`);
1201
+ }
1172
1202
 
1173
- // Signal start
1174
- await natsRequest('mesh.tasks.start', { task_id: task.task_id });
1203
+ // Signal start (include isolation status so daemon knows)
1204
+ await natsRequest('mesh.tasks.start', { task_id: task.task_id, workspace_isolated: workspaceIsolated });
1175
1205
  writeAgentState('working', task.task_id);
1176
1206
  log(`Started: ${task.task_id} (dir: ${worktreePath ? 'worktree' : 'workspace'})`);
1177
1207
 
@@ -1394,8 +1424,9 @@ async function main() {
1394
1424
  log(` Poll interval: ${POLL_INTERVAL / 1000}s`);
1395
1425
  log(` Mode: ${ONCE ? 'single task' : 'continuous'} ${DRY_RUN ? '(dry run)' : ''}`);
1396
1426
 
1427
+ const natsOpts = natsConnectOpts();
1397
1428
  nc = await connect({
1398
- servers: NATS_URL,
1429
+ ...natsOpts,
1399
1430
  timeout: 5000,
1400
1431
  reconnect: true,
1401
1432
  maxReconnectAttempts: 10,
@@ -21,7 +21,7 @@ const path = require('path');
21
21
  const { readTasks, updateTaskInPlace, isoTimestamp, ACTIVE_TASKS_PATH } = require('../lib/kanban-io');
22
22
 
23
23
  const sc = StringCodec();
24
- const { NATS_URL } = require('../lib/nats-resolve');
24
+ const { NATS_URL, natsConnectOpts } = require('../lib/nats-resolve');
25
25
  const DISPATCH_INTERVAL = parseInt(process.env.BRIDGE_DISPATCH_INTERVAL || '10000'); // 10s
26
26
  const LOG_DIR = path.join(process.env.HOME, '.openclaw', 'workspace', 'memory', 'mesh-logs');
27
27
  const WORKSPACE = path.join(process.env.HOME, '.openclaw', 'workspace');
@@ -726,8 +726,9 @@ async function main() {
726
726
  log(` Dispatch interval: ${DISPATCH_INTERVAL / 1000}s`);
727
727
  log(` Mode: ${DRY_RUN ? 'dry run' : 'live'}`);
728
728
 
729
+ const natsOpts = natsConnectOpts();
729
730
  nc = await connect({
730
- servers: NATS_URL,
731
+ ...natsOpts,
731
732
  timeout: 5000,
732
733
  reconnect: true,
733
734
  maxReconnectAttempts: 10,
@@ -47,6 +47,10 @@ const crypto = require('crypto');
47
47
  const IS_MAC = os.platform() === 'darwin';
48
48
  const HOME = os.homedir();
49
49
  const DEPLOY_BRANCH = process.env.OPENCLAW_DEPLOY_BRANCH || 'main';
50
+ if (!/^[a-zA-Z0-9._\/-]+$/.test(DEPLOY_BRANCH)) {
51
+ console.error(`Invalid DEPLOY_BRANCH: ${DEPLOY_BRANCH}`);
52
+ process.exit(1);
53
+ }
50
54
  const REPO_DIR = process.env.OPENCLAW_REPO_DIR || path.join(HOME, 'openclaw');
51
55
 
52
56
  // KNOWN ISSUE: Two-directory problem
@@ -36,6 +36,12 @@ const IS_MAC = os.platform() === "darwin";
36
36
 
37
37
  const { ROLE_COMPONENTS } = require('../lib/mesh-roles');
38
38
 
39
+ // ── Circuit Breaker State ───────────────────────────────────────────────
40
+ let consecutiveFailures = 0;
41
+ let skipTicksRemaining = 0;
42
+ let lastErrorMsg = '';
43
+ let lastErrorRepeatCount = 0;
44
+
39
45
  // ── Health Gathering ─────────────────────────────────────────────────────
40
46
  // All the expensive execSync calls happen here, on our own schedule.
41
47
  // No request timeout to race against.
@@ -226,11 +232,45 @@ async function main() {
226
232
 
227
233
  // Publish immediately, then every interval
228
234
  async function publish() {
235
+ // Circuit breaker: skip ticks during backoff
236
+ if (skipTicksRemaining > 0) {
237
+ skipTicksRemaining--;
238
+ return;
239
+ }
240
+
229
241
  try {
230
242
  const health = gatherHealth();
231
243
  await kv.put(NODE_ID, sc.encode(JSON.stringify(health)));
244
+ // Reset on success
245
+ if (consecutiveFailures > 0) {
246
+ console.log(`[health-publisher] recovered after ${consecutiveFailures} consecutive failures`);
247
+ }
248
+ consecutiveFailures = 0;
249
+ lastErrorMsg = '';
250
+ lastErrorRepeatCount = 0;
232
251
  } catch (err) {
233
- console.error("[health-publisher] publish failed:", err.message);
252
+ consecutiveFailures++;
253
+ const msg = err.message;
254
+
255
+ // Log dedup: after 3 identical consecutive errors, log every 10th
256
+ if (msg === lastErrorMsg) {
257
+ lastErrorRepeatCount++;
258
+ if (lastErrorRepeatCount === 3) {
259
+ console.error(`[health-publisher] suppressing repeated errors (${lastErrorRepeatCount} occurrences): ${msg}`);
260
+ } else if (lastErrorRepeatCount > 3 && lastErrorRepeatCount % 10 === 0) {
261
+ console.error(`[health-publisher] suppressing repeated errors (${lastErrorRepeatCount} occurrences): ${msg}`);
262
+ }
263
+ // Silently skip logs between dedup thresholds
264
+ } else {
265
+ lastErrorMsg = msg;
266
+ lastErrorRepeatCount = 1;
267
+ console.error("[health-publisher] publish failed:", msg);
268
+ }
269
+
270
+ // Exponential backoff: skip 2^min(N,6) ticks (max ~64 ticks / ~16 min at 15s)
271
+ const backoffTicks = Math.pow(2, Math.min(consecutiveFailures, 6));
272
+ skipTicksRemaining = backoffTicks;
273
+ console.error(`[health-publisher] backoff: skipping next ${backoffTicks} ticks (failures=${consecutiveFailures})`);
234
274
  }
235
275
  }
236
276
 
@@ -46,7 +46,7 @@ const ROLE_DIRS = [
46
46
  ];
47
47
 
48
48
  const sc = StringCodec();
49
- const { NATS_URL } = require('../lib/nats-resolve');
49
+ const { NATS_URL, natsConnectOpts } = require('../lib/nats-resolve');
50
50
  const BUDGET_CHECK_INTERVAL = 30000; // 30s
51
51
  const STALL_MINUTES = parseInt(process.env.MESH_STALL_MINUTES || '5'); // no heartbeat for this long → stalled
52
52
  const CIRCLING_STEP_TIMEOUT_MS = parseInt(process.env.MESH_CIRCLING_STEP_TIMEOUT_MS || String(10 * 60 * 1000)); // 10 min default
@@ -2013,7 +2013,8 @@ function cascadeFailure(plan, failedSubtaskId) {
2013
2013
  async function main() {
2014
2014
  log('Starting mesh task daemon...');
2015
2015
 
2016
- nc = await connect({ servers: NATS_URL, timeout: 5000 });
2016
+ const natsOpts = natsConnectOpts();
2017
+ nc = await connect({ ...natsOpts, timeout: 5000 });
2017
2018
  log(`Connected to NATS at ${NATS_URL}`);
2018
2019
 
2019
2020
  // Initialize task store
@@ -2085,8 +2086,12 @@ async function main() {
2085
2086
  }
2086
2087
 
2087
2088
  // Start enforcement loops
2088
- const proposalTimer = setInterval(processProposals, BUDGET_CHECK_INTERVAL);
2089
- const budgetTimer = setInterval(enforceBudgets, BUDGET_CHECK_INTERVAL);
2089
+ const proposalTimer = setInterval(async () => {
2090
+ try { await processProposals(); } catch (err) { log(`processProposals error: ${err.message}`); }
2091
+ }, BUDGET_CHECK_INTERVAL);
2092
+ const budgetTimer = setInterval(async () => {
2093
+ try { await enforceBudgets(); } catch (err) { log(`enforceBudgets error: ${err.message}`); }
2094
+ }, BUDGET_CHECK_INTERVAL);
2090
2095
  const stallTimer = setInterval(detectStalls, BUDGET_CHECK_INTERVAL);
2091
2096
  const recruitTimer = setInterval(checkRecruitingDeadlines, 5000); // check every 5s
2092
2097
  const circlingStepSweepTimer = setInterval(sweepCirclingStepTimeouts, 60000); // every 60s
@@ -2106,6 +2111,11 @@ async function main() {
2106
2111
  clearInterval(budgetTimer);
2107
2112
  clearInterval(stallTimer);
2108
2113
  clearInterval(recruitTimer);
2114
+ if (circlingStepSweepTimer) clearInterval(circlingStepSweepTimer);
2115
+ if (circlingStepTimers) {
2116
+ for (const timer of circlingStepTimers.values()) clearTimeout(timer);
2117
+ circlingStepTimers.clear();
2118
+ }
2109
2119
  for (const sub of subs) sub.unsubscribe();
2110
2120
  await nc.drain();
2111
2121
  process.exit(0);
package/bin/mesh.js CHANGED
@@ -27,23 +27,7 @@ const { connect, StringCodec, createInbox } = require('nats');
27
27
  const fs = require('fs');
28
28
  const path = require('path');
29
29
  const os = require('os');
30
-
31
- // ─── Config ──────────────────────────────────────────
32
- // ── NATS URL resolution: env var → ~/.openclaw/openclaw.env → fallback IP ──
33
- const NATS_FALLBACK = 'nats://100.91.131.61:4222';
34
- function resolveNatsUrl() {
35
- if (process.env.OPENCLAW_NATS) return process.env.OPENCLAW_NATS;
36
- try {
37
- const envFile = path.join(os.homedir(), '.openclaw', 'openclaw.env');
38
- if (fs.existsSync(envFile)) {
39
- const content = fs.readFileSync(envFile, 'utf8');
40
- const match = content.match(/^\s*OPENCLAW_NATS\s*=\s*(.+)/m);
41
- if (match && match[1].trim()) return match[1].trim();
42
- }
43
- } catch {}
44
- return NATS_FALLBACK;
45
- }
46
- const NATS_URL = resolveNatsUrl();
30
+ const { natsConnectOpts } = require('../lib/nats-resolve');
47
31
  const SHARED_DIR = path.join(os.homedir(), 'openclaw', 'shared');
48
32
  const LOCAL_NODE = os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
49
33
  const sc = StringCodec();
@@ -88,27 +72,16 @@ function remoteNode() {
88
72
 
89
73
  // ─── Exec safety ─────────────────────────────────────
90
74
 
91
- const DESTRUCTIVE_PATTERNS = [
92
- /\brm\s+(-[a-zA-Z]*)?r[a-zA-Z]*f/, // rm -rf, rm -fr, rm --recursive --force
93
- /\brm\s+(-[a-zA-Z]*)?f[a-zA-Z]*r/, // rm -fr variants
94
- /\bmkfs\b/, // format filesystem
95
- /\bdd\s+.*of=/, // raw disk write
96
- /\b>\s*\/dev\/[sh]d/, // write to raw device
97
- /\bcurl\b.*\|\s*(ba)?sh/, // curl pipe to shell
98
- /\bwget\b.*\|\s*(ba)?sh/, // wget pipe to shell
99
- /\bchmod\s+(-[a-zA-Z]*\s+)?777\s+\//, // chmod 777 on root paths
100
- /\b:(){ :\|:& };:/, // fork bomb
101
- ];
75
+ const { checkDestructivePatterns } = require('../lib/exec-safety');
102
76
 
103
77
  function checkExecSafety(command) {
104
- for (const pattern of DESTRUCTIVE_PATTERNS) {
105
- if (pattern.test(command)) {
106
- console.error(`BLOCKED: Command matches destructive pattern.`);
107
- console.error(` Command: ${command}`);
108
- console.error(` Pattern: ${pattern}`);
109
- console.error(`\nIf this is intentional, SSH into the node and run it directly.`);
110
- process.exit(1);
111
- }
78
+ const result = checkDestructivePatterns(command);
79
+ if (result.blocked) {
80
+ console.error(`BLOCKED: Command matches destructive pattern.`);
81
+ console.error(` Command: ${command}`);
82
+ console.error(` Pattern: ${result.pattern}`);
83
+ console.error(`\nIf this is intentional, SSH into the node and run it directly.`);
84
+ process.exit(1);
112
85
  }
113
86
  }
114
87
 
@@ -118,10 +91,11 @@ function checkExecSafety(command) {
118
91
  * Connect to NATS with a short timeout (this is a CLI tool, not a daemon).
119
92
  */
120
93
  async function natsConnect() {
94
+ const opts = natsConnectOpts();
121
95
  try {
122
- return await connect({ servers: NATS_URL, timeout: 5000 });
96
+ return await connect({ ...opts, timeout: 5000 });
123
97
  } catch (err) {
124
- console.error(`Error: Cannot connect to NATS at ${NATS_URL}`);
98
+ console.error(`Error: Cannot connect to NATS at ${opts.servers}`);
125
99
  console.error(`Is the NATS server running? Is Tailscale connected?`);
126
100
  process.exit(1);
127
101
  }
@@ -891,7 +865,7 @@ async function cmdPlan(args) {
891
865
  }
892
866
 
893
867
  // Submit to mesh via NATS
894
- const nc = await connect({ servers: NATS_URL, timeout: 5000 });
868
+ const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
895
869
  try {
896
870
  const reply = await nc.request(
897
871
  'mesh.plans.create',
@@ -919,7 +893,7 @@ async function cmdPlan(args) {
919
893
  if (args[i] === '--status' && args[i + 1]) { statusFilter = args[++i]; }
920
894
  }
921
895
 
922
- const nc = await connect({ servers: NATS_URL, timeout: 5000 });
896
+ const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
923
897
  try {
924
898
  const payload = statusFilter ? { status: statusFilter } : {};
925
899
  const reply = await nc.request(
@@ -952,7 +926,7 @@ async function cmdPlan(args) {
952
926
  process.exit(1);
953
927
  }
954
928
 
955
- const nc = await connect({ servers: NATS_URL, timeout: 5000 });
929
+ const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
956
930
  try {
957
931
  const reply = await nc.request(
958
932
  'mesh.plans.get',
@@ -1050,7 +1024,7 @@ async function cmdPlan(args) {
1050
1024
  process.exit(1);
1051
1025
  }
1052
1026
 
1053
- const nc = await connect({ servers: NATS_URL, timeout: 5000 });
1027
+ const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
1054
1028
  try {
1055
1029
  const reply = await nc.request(
1056
1030
  'mesh.plans.approve',
@@ -1079,7 +1053,7 @@ async function cmdPlan(args) {
1079
1053
  process.exit(1);
1080
1054
  }
1081
1055
 
1082
- const nc = await connect({ servers: NATS_URL, timeout: 5000 });
1056
+ const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
1083
1057
  try {
1084
1058
  const reply = await nc.request(
1085
1059
  'mesh.plans.abort',
package/install.sh CHANGED
@@ -770,8 +770,9 @@ else
770
770
  fi
771
771
 
772
772
  if [ "$OS" = "macos" ]; then
773
- TEMPLATE="$LAUNCHD_TEMPLATES/ai.openclaw.${SVC_NAME}.plist"
774
- DEST="$LAUNCHD_DEST/ai.openclaw.${SVC_NAME}.plist"
773
+ LAUNCHD_SVC_NAME="${SVC_NAME#openclaw-}"
774
+ TEMPLATE="$LAUNCHD_TEMPLATES/ai.openclaw.${LAUNCHD_SVC_NAME}.plist"
775
+ DEST="$LAUNCHD_DEST/ai.openclaw.${LAUNCHD_SVC_NAME}.plist"
775
776
 
776
777
  if [ ! -f "$TEMPLATE" ]; then
777
778
  warn " Template not found: $TEMPLATE"
@@ -147,7 +147,7 @@ async function readLastEntry(filePath) {
147
147
  } else {
148
148
  const fh = await open(filePath, 'r');
149
149
  try {
150
- const buffer = Buffer.allocUnsafe(chunkSize);
150
+ const buffer = Buffer.alloc(chunkSize);
151
151
  await fh.read(buffer, 0, chunkSize, offset);
152
152
  content = buffer.toString('utf-8');
153
153
  } finally {
@@ -195,7 +195,7 @@ async function parseJsonlTail(filePath, maxBytes = 131072) {
195
195
  const fh = await open(filePath, 'r');
196
196
  try {
197
197
  const length = size - offset;
198
- const buffer = Buffer.allocUnsafe(length);
198
+ const buffer = Buffer.alloc(length);
199
199
  await fh.read(buffer, 0, length, offset);
200
200
  content = buffer.toString('utf-8');
201
201
  } finally {