openclaw-node-harness 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/lane-watchdog.js +54 -23
- package/bin/mesh-agent.js +49 -18
- package/bin/mesh-bridge.js +3 -2
- package/bin/mesh-deploy.js +4 -0
- package/bin/mesh-health-publisher.js +41 -1
- package/bin/mesh-task-daemon.js +14 -4
- package/bin/mesh.js +17 -43
- package/install.sh +3 -2
- package/lib/agent-activity.js +2 -2
- package/lib/exec-safety.js +163 -0
- package/lib/kanban-io.js +20 -33
- package/lib/llm-providers.js +27 -0
- package/lib/mcp-knowledge/core.mjs +7 -5
- package/lib/mcp-knowledge/server.mjs +8 -1
- package/lib/mesh-collab.js +274 -250
- package/lib/mesh-harness.js +6 -0
- package/lib/mesh-plans.js +84 -45
- package/lib/mesh-tasks.js +113 -81
- package/lib/nats-resolve.js +4 -4
- package/lib/pre-compression-flush.mjs +2 -0
- package/lib/session-store.mjs +6 -3
- package/mission-control/package-lock.json +4188 -3698
- package/mission-control/package.json +2 -2
- package/mission-control/src/app/api/diagnostics/route.ts +8 -0
- package/mission-control/src/app/api/diagnostics/test-runner/route.ts +8 -0
- package/mission-control/src/app/api/memory/graph/route.ts +34 -18
- package/mission-control/src/app/api/memory/search/route.ts +9 -5
- package/mission-control/src/app/api/mesh/identity/route.ts +13 -5
- package/mission-control/src/app/api/mesh/nodes/route.ts +8 -0
- package/mission-control/src/app/api/settings/gateway/route.ts +62 -0
- package/mission-control/src/app/api/souls/[id]/evolution/route.ts +49 -12
- package/mission-control/src/app/api/souls/[id]/prompt/route.ts +7 -1
- package/mission-control/src/app/api/souls/[id]/propagate/route.ts +24 -5
- package/mission-control/src/app/api/souls/route.ts +6 -4
- package/mission-control/src/app/api/tasks/[id]/handoff/route.ts +7 -1
- package/mission-control/src/app/api/tasks/[id]/route.ts +20 -4
- package/mission-control/src/app/api/tasks/route.ts +68 -9
- package/mission-control/src/app/api/workspace/read/route.ts +11 -0
- package/mission-control/src/lib/config.ts +11 -2
- package/mission-control/src/lib/db/index.ts +16 -1
- package/mission-control/src/lib/memory/extract.ts +2 -1
- package/mission-control/src/lib/memory/retrieval.ts +3 -2
- package/mission-control/src/lib/sync/tasks.ts +4 -1
- package/mission-control/src/middleware.ts +82 -0
- package/package.json +1 -1
- package/services/launchd/ai.openclaw.lane-watchdog.plist +1 -1
- package/services/launchd/ai.openclaw.log-rotate.plist +11 -0
- package/services/launchd/ai.openclaw.mesh-agent.plist +4 -0
- package/services/launchd/ai.openclaw.mesh-deploy-listener.plist +4 -0
- package/services/launchd/ai.openclaw.mesh-health-publisher.plist +4 -0
- package/services/launchd/ai.openclaw.mission-control.plist +5 -4
- package/uninstall.sh +37 -9
package/bin/lane-watchdog.js
CHANGED
|
@@ -37,6 +37,11 @@ let lastInterventionAt = 0;
|
|
|
37
37
|
let logWatcher = null;
|
|
38
38
|
let errWatcher = null;
|
|
39
39
|
|
|
40
|
+
// Incident log dedup: suppress identical messages within 60s
|
|
41
|
+
let lastIncidentMsg = '';
|
|
42
|
+
let lastIncidentAt = 0;
|
|
43
|
+
let suppressedCount = 0;
|
|
44
|
+
|
|
40
45
|
// Track detected events
|
|
41
46
|
const events = {
|
|
42
47
|
agentTimeout: null, // timestamp of last "embedded run timeout"
|
|
@@ -45,6 +50,22 @@ const events = {
|
|
|
45
50
|
|
|
46
51
|
// --- Helpers ---
|
|
47
52
|
function log(msg) {
|
|
53
|
+
const now = Date.now();
|
|
54
|
+
// Dedup: suppress identical messages within 60s
|
|
55
|
+
if (msg === lastIncidentMsg && (now - lastIncidentAt) < 60_000) {
|
|
56
|
+
suppressedCount++;
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
// If we suppressed duplicates, emit a summary before the new message
|
|
60
|
+
if (suppressedCount > 0) {
|
|
61
|
+
const summaryLine = `${new Date().toISOString()} [lane-watchdog] (suppressed ${suppressedCount} duplicate message(s))`;
|
|
62
|
+
console.log(summaryLine);
|
|
63
|
+
try { fs.appendFileSync(INCIDENT_LOG, summaryLine + '\n'); } catch { /* best effort */ }
|
|
64
|
+
}
|
|
65
|
+
lastIncidentMsg = msg;
|
|
66
|
+
lastIncidentAt = now;
|
|
67
|
+
suppressedCount = 0;
|
|
68
|
+
|
|
48
69
|
const ts = new Date().toISOString();
|
|
49
70
|
const line = `${ts} [lane-watchdog] ${msg}`;
|
|
50
71
|
console.log(line);
|
|
@@ -172,29 +193,39 @@ function tailLog(filePath, label) {
|
|
|
172
193
|
|
|
173
194
|
const watcher = fs.watch(filePath, { persistent: true }, () => {
|
|
174
195
|
try {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
196
|
+
// Read from current fileSize to EOF — avoid TOCTOU race by not
|
|
197
|
+
// pre-checking stat.size. createReadStream with just `start` reads
|
|
198
|
+
// to the end of the file atomically, then we update fileSize from
|
|
199
|
+
// the bytes actually read.
|
|
200
|
+
const stream = fs.createReadStream(filePath, {
|
|
201
|
+
start: fileSize,
|
|
202
|
+
encoding: 'utf8'
|
|
203
|
+
});
|
|
204
|
+
let buffer = '';
|
|
205
|
+
let bytesRead = 0;
|
|
206
|
+
stream.on('data', chunk => { buffer += chunk; bytesRead += Buffer.byteLength(chunk, 'utf8'); });
|
|
207
|
+
stream.on('end', () => {
|
|
208
|
+
if (bytesRead === 0) return; // no new data
|
|
209
|
+
const lines = buffer.split('\n').filter(Boolean);
|
|
210
|
+
for (const line of lines) {
|
|
211
|
+
parseLine(line);
|
|
212
|
+
}
|
|
213
|
+
fileSize += bytesRead;
|
|
214
|
+
});
|
|
215
|
+
stream.on('error', (err) => {
|
|
216
|
+
if (err.code === 'ENOENT') {
|
|
217
|
+
// File was deleted/rotated — reset position
|
|
218
|
+
fileSize = 0;
|
|
219
|
+
} else {
|
|
220
|
+
log(`ERROR: reading ${label}: ${err.message}`);
|
|
221
|
+
}
|
|
222
|
+
});
|
|
223
|
+
} catch (err) {
|
|
224
|
+
if (err.code === 'ENOENT') {
|
|
178
225
|
fileSize = 0;
|
|
226
|
+
} else {
|
|
227
|
+
log(`ERROR: reading ${label}: ${err.message}`);
|
|
179
228
|
}
|
|
180
|
-
if (stat.size > fileSize) {
|
|
181
|
-
const stream = fs.createReadStream(filePath, {
|
|
182
|
-
start: fileSize,
|
|
183
|
-
end: stat.size,
|
|
184
|
-
encoding: 'utf8'
|
|
185
|
-
});
|
|
186
|
-
let buffer = '';
|
|
187
|
-
stream.on('data', chunk => { buffer += chunk; });
|
|
188
|
-
stream.on('end', () => {
|
|
189
|
-
const lines = buffer.split('\n').filter(Boolean);
|
|
190
|
-
for (const line of lines) {
|
|
191
|
-
parseLine(line);
|
|
192
|
-
}
|
|
193
|
-
fileSize = stat.size;
|
|
194
|
-
});
|
|
195
|
-
}
|
|
196
|
-
} catch (err) {
|
|
197
|
-
log(`ERROR: reading ${label}: ${err.message}`);
|
|
198
229
|
}
|
|
199
230
|
});
|
|
200
231
|
|
|
@@ -220,8 +251,8 @@ function main() {
|
|
|
220
251
|
for (const sig of ['SIGTERM', 'SIGINT']) {
|
|
221
252
|
process.on(sig, () => {
|
|
222
253
|
log(`Received ${sig}, shutting down`);
|
|
223
|
-
if (logWatcher)
|
|
224
|
-
if (errWatcher)
|
|
254
|
+
if (logWatcher) logWatcher.close();
|
|
255
|
+
if (errWatcher) errWatcher.close();
|
|
225
256
|
process.exit(0);
|
|
226
257
|
});
|
|
227
258
|
}
|
package/bin/mesh-agent.js
CHANGED
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
*/
|
|
37
37
|
|
|
38
38
|
const { connect, StringCodec } = require('nats');
|
|
39
|
-
const { spawn, execSync } = require('child_process');
|
|
39
|
+
const { spawn, execSync, execFileSync } = require('child_process');
|
|
40
40
|
const os = require('os');
|
|
41
41
|
const path = require('path');
|
|
42
42
|
const fs = require('fs');
|
|
@@ -46,7 +46,7 @@ const { loadHarnessRules, runMeshHarness, runPostCommitValidation, formatHarness
|
|
|
46
46
|
const { findRole, formatRoleForPrompt } = require('../lib/role-loader');
|
|
47
47
|
|
|
48
48
|
const sc = StringCodec();
|
|
49
|
-
const { NATS_URL } = require('../lib/nats-resolve');
|
|
49
|
+
const { NATS_URL, natsConnectOpts } = require('../lib/nats-resolve');
|
|
50
50
|
const { resolveProvider, resolveModel } = require('../lib/llm-providers');
|
|
51
51
|
const NODE_ID = process.env.MESH_NODE_ID || os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
|
|
52
52
|
const POLL_INTERVAL = parseInt(process.env.MESH_POLL_INTERVAL || '15000'); // 15s between polls
|
|
@@ -198,8 +198,9 @@ function buildInitialPrompt(task) {
|
|
|
198
198
|
}
|
|
199
199
|
|
|
200
200
|
if (task.metric) {
|
|
201
|
+
const safeMetric = isAllowedMetric(task.metric) ? task.metric : '[metric command filtered for security]';
|
|
201
202
|
parts.push(`## Verification`);
|
|
202
|
-
parts.push(`Run this command to check your work: \`${
|
|
203
|
+
parts.push(`Run this command to check your work: \`${safeMetric}\``);
|
|
203
204
|
parts.push(`Your changes are only accepted if this command exits with code 0.`);
|
|
204
205
|
parts.push('');
|
|
205
206
|
}
|
|
@@ -224,7 +225,8 @@ function buildInitialPrompt(task) {
|
|
|
224
225
|
parts.push('- Make minimal, focused changes. Do not add scope beyond what is asked.');
|
|
225
226
|
parts.push('- If you hit a blocker you cannot resolve, explain what is blocking you clearly.');
|
|
226
227
|
if (task.metric) {
|
|
227
|
-
|
|
228
|
+
const safeMetric = isAllowedMetric(task.metric) ? task.metric : '[metric command filtered for security]';
|
|
229
|
+
parts.push(`- After making changes, run \`${safeMetric}\` to verify.`);
|
|
228
230
|
parts.push('- If verification fails, analyze the failure and iterate on your approach.');
|
|
229
231
|
}
|
|
230
232
|
|
|
@@ -264,8 +266,9 @@ function buildRetryPrompt(task, previousAttempts, attemptNumber) {
|
|
|
264
266
|
}
|
|
265
267
|
|
|
266
268
|
if (task.metric) {
|
|
269
|
+
const safeMetric = isAllowedMetric(task.metric) ? task.metric : '[metric command filtered for security]';
|
|
267
270
|
parts.push(`## Verification`);
|
|
268
|
-
parts.push(`Run: \`${
|
|
271
|
+
parts.push(`Run: \`${safeMetric}\``);
|
|
269
272
|
parts.push(`Must exit code 0.`);
|
|
270
273
|
parts.push('');
|
|
271
274
|
}
|
|
@@ -289,7 +292,8 @@ function buildRetryPrompt(task, previousAttempts, attemptNumber) {
|
|
|
289
292
|
parts.push('- Read the relevant files before making changes.');
|
|
290
293
|
parts.push('- Make minimal, focused changes.');
|
|
291
294
|
if (task.metric) {
|
|
292
|
-
|
|
295
|
+
const safeMetric = isAllowedMetric(task.metric) ? task.metric : '[metric command filtered for security]';
|
|
296
|
+
parts.push(`- Run \`${safeMetric}\` to verify before finishing.`);
|
|
293
297
|
}
|
|
294
298
|
|
|
295
299
|
return parts.join('\n');
|
|
@@ -305,6 +309,9 @@ const WORKTREE_BASE = process.env.MESH_WORKTREE_BASE || path.join(process.env.HO
|
|
|
305
309
|
* On failure, returns null (falls back to shared workspace).
|
|
306
310
|
*/
|
|
307
311
|
function createWorktree(taskId) {
|
|
312
|
+
if (!/^[\w][\w.-]{0,127}$/.test(taskId)) {
|
|
313
|
+
throw new Error(`Invalid taskId: contains unsafe characters`);
|
|
314
|
+
}
|
|
308
315
|
const worktreePath = path.join(WORKTREE_BASE, taskId);
|
|
309
316
|
const branch = `mesh/${taskId}`;
|
|
310
317
|
|
|
@@ -315,19 +322,19 @@ function createWorktree(taskId) {
|
|
|
315
322
|
if (fs.existsSync(worktreePath)) {
|
|
316
323
|
log(`Cleaning stale worktree: ${worktreePath}`);
|
|
317
324
|
try {
|
|
318
|
-
|
|
325
|
+
execFileSync('git', ['worktree', 'remove', '--force', worktreePath], { cwd: WORKSPACE, timeout: 10000 });
|
|
319
326
|
} catch {
|
|
320
327
|
// If git worktree remove fails, manually clean up
|
|
321
328
|
fs.rmSync(worktreePath, { recursive: true, force: true });
|
|
322
329
|
}
|
|
323
330
|
// Also clean up the branch if it exists
|
|
324
331
|
try {
|
|
325
|
-
|
|
332
|
+
execFileSync('git', ['branch', '-D', branch], { cwd: WORKSPACE, timeout: 5000, stdio: 'ignore' });
|
|
326
333
|
} catch { /* branch may not exist */ }
|
|
327
334
|
}
|
|
328
335
|
|
|
329
336
|
// Create new worktree branched off HEAD
|
|
330
|
-
|
|
337
|
+
execFileSync('git', ['worktree', 'add', '-b', branch, worktreePath, 'HEAD'], {
|
|
331
338
|
cwd: WORKSPACE,
|
|
332
339
|
timeout: 30000,
|
|
333
340
|
stdio: 'pipe',
|
|
@@ -375,7 +382,7 @@ function commitAndMergeWorktree(worktreePath, taskId, summary) {
|
|
|
375
382
|
log(`WARNING: commit message doesn't follow conventional format: "${commitMsg}"`);
|
|
376
383
|
}
|
|
377
384
|
|
|
378
|
-
|
|
385
|
+
execFileSync('git', ['commit', '-m', commitMsg], {
|
|
379
386
|
cwd: worktreePath, timeout: 10000, stdio: 'pipe',
|
|
380
387
|
});
|
|
381
388
|
|
|
@@ -391,7 +398,7 @@ function commitAndMergeWorktree(worktreePath, taskId, summary) {
|
|
|
391
398
|
const mergeMsg = `Merge ${branch}: ${taskId}`;
|
|
392
399
|
for (let attempt = 0; attempt < 2; attempt++) {
|
|
393
400
|
try {
|
|
394
|
-
|
|
401
|
+
execFileSync('git', ['merge', '--no-ff', branch, '-m', mergeMsg], {
|
|
395
402
|
cwd: WORKSPACE, timeout: 30000, stdio: 'pipe',
|
|
396
403
|
});
|
|
397
404
|
log(`Merged ${branch} into main${attempt > 0 ? ' (retry succeeded)' : ''}`);
|
|
@@ -429,13 +436,13 @@ function cleanupWorktree(worktreePath, keep = false) {
|
|
|
429
436
|
const branch = `mesh/${taskId}`;
|
|
430
437
|
|
|
431
438
|
try {
|
|
432
|
-
|
|
439
|
+
execFileSync('git', ['worktree', 'remove', '--force', worktreePath], {
|
|
433
440
|
cwd: WORKSPACE,
|
|
434
441
|
timeout: 10000,
|
|
435
442
|
stdio: 'pipe',
|
|
436
443
|
});
|
|
437
444
|
if (!keep) {
|
|
438
|
-
|
|
445
|
+
execFileSync('git', ['branch', '-D', branch], {
|
|
439
446
|
cwd: WORKSPACE,
|
|
440
447
|
timeout: 5000,
|
|
441
448
|
stdio: 'ignore',
|
|
@@ -507,9 +514,10 @@ function runLLM(prompt, task, worktreePath) {
|
|
|
507
514
|
|
|
508
515
|
let stdout = '';
|
|
509
516
|
let stderr = '';
|
|
517
|
+
const MAX_OUTPUT = 1024 * 1024; // 1MB cap
|
|
510
518
|
|
|
511
|
-
child.stdout.on('data', (d) => { stdout += d.toString(); });
|
|
512
|
-
child.stderr.on('data', (d) => { stderr += d.toString(); });
|
|
519
|
+
child.stdout.on('data', (d) => { if (stdout.length < MAX_OUTPUT) stdout += d.toString().slice(0, MAX_OUTPUT - stdout.length); });
|
|
520
|
+
child.stderr.on('data', (d) => { if (stderr.length < MAX_OUTPUT) stderr += d.toString().slice(0, MAX_OUTPUT - stderr.length); });
|
|
513
521
|
|
|
514
522
|
child.on('close', (code) => {
|
|
515
523
|
clearInterval(heartbeatTimer);
|
|
@@ -525,10 +533,25 @@ function runLLM(prompt, task, worktreePath) {
|
|
|
525
533
|
|
|
526
534
|
// ── Metric Evaluation ─────────────────────────────────
|
|
527
535
|
|
|
536
|
+
const ALLOWED_METRIC_PREFIXES = [
|
|
537
|
+
'npm test', 'npm run', 'node ', 'pytest', 'cargo test',
|
|
538
|
+
'go test', 'make test', 'jest', 'vitest', 'mocha',
|
|
539
|
+
];
|
|
540
|
+
|
|
541
|
+
function isAllowedMetric(cmd) {
|
|
542
|
+
if (/[\n\r\0;`]|\$\(|\|\||&&|<\(|>\(|<<|>>|>\s|\|/.test(cmd)) return false;
|
|
543
|
+
if (/\bnode\s+(-e\b|--eval\b|-p\b|--print\b|-r\b|--require\b|--import\b)/.test(cmd)) return false;
|
|
544
|
+
return ALLOWED_METRIC_PREFIXES.some(prefix => cmd.startsWith(prefix));
|
|
545
|
+
}
|
|
546
|
+
|
|
528
547
|
/**
|
|
529
548
|
* Run the task's metric command. Returns { passed, output }.
|
|
530
549
|
*/
|
|
531
550
|
function evaluateMetric(metric, cwd) {
|
|
551
|
+
if (!isAllowedMetric(metric)) {
|
|
552
|
+
log(`WARNING: Metric command blocked by security filter: ${metric}`);
|
|
553
|
+
return Promise.resolve({ passed: false, output: 'Metric command blocked by security filter' });
|
|
554
|
+
}
|
|
532
555
|
return new Promise((resolve) => {
|
|
533
556
|
const child = spawn('bash', ['-c', metric], {
|
|
534
557
|
cwd: cwd || WORKSPACE,
|
|
@@ -1011,6 +1034,9 @@ async function executeCollabTask(task) {
|
|
|
1011
1034
|
// Create worktree for isolation
|
|
1012
1035
|
const worktreePath = createWorktree(`${task.task_id}-${NODE_ID}`);
|
|
1013
1036
|
const taskDir = worktreePath || WORKSPACE;
|
|
1037
|
+
if (!worktreePath) {
|
|
1038
|
+
log(`WARNING: Collab task ${task.task_id} running in shared workspace — isolation not achieved`);
|
|
1039
|
+
}
|
|
1014
1040
|
|
|
1015
1041
|
// Periodic session heartbeat — detects abort/completion while waiting for rounds
|
|
1016
1042
|
const sessionHeartbeat = setInterval(async () => {
|
|
@@ -1169,9 +1195,13 @@ async function executeTask(task) {
|
|
|
1169
1195
|
// Create isolated worktree for this task (falls back to shared workspace on failure)
|
|
1170
1196
|
const worktreePath = createWorktree(task.task_id);
|
|
1171
1197
|
const taskDir = worktreePath || WORKSPACE;
|
|
1198
|
+
const workspaceIsolated = !!worktreePath;
|
|
1199
|
+
if (!workspaceIsolated) {
|
|
1200
|
+
log(`WARNING: Task ${task.task_id} running in shared workspace — isolation not achieved`);
|
|
1201
|
+
}
|
|
1172
1202
|
|
|
1173
|
-
// Signal start
|
|
1174
|
-
await natsRequest('mesh.tasks.start', { task_id: task.task_id });
|
|
1203
|
+
// Signal start (include isolation status so daemon knows)
|
|
1204
|
+
await natsRequest('mesh.tasks.start', { task_id: task.task_id, workspace_isolated: workspaceIsolated });
|
|
1175
1205
|
writeAgentState('working', task.task_id);
|
|
1176
1206
|
log(`Started: ${task.task_id} (dir: ${worktreePath ? 'worktree' : 'workspace'})`);
|
|
1177
1207
|
|
|
@@ -1394,8 +1424,9 @@ async function main() {
|
|
|
1394
1424
|
log(` Poll interval: ${POLL_INTERVAL / 1000}s`);
|
|
1395
1425
|
log(` Mode: ${ONCE ? 'single task' : 'continuous'} ${DRY_RUN ? '(dry run)' : ''}`);
|
|
1396
1426
|
|
|
1427
|
+
const natsOpts = natsConnectOpts();
|
|
1397
1428
|
nc = await connect({
|
|
1398
|
-
|
|
1429
|
+
...natsOpts,
|
|
1399
1430
|
timeout: 5000,
|
|
1400
1431
|
reconnect: true,
|
|
1401
1432
|
maxReconnectAttempts: 10,
|
package/bin/mesh-bridge.js
CHANGED
|
@@ -21,7 +21,7 @@ const path = require('path');
|
|
|
21
21
|
const { readTasks, updateTaskInPlace, isoTimestamp, ACTIVE_TASKS_PATH } = require('../lib/kanban-io');
|
|
22
22
|
|
|
23
23
|
const sc = StringCodec();
|
|
24
|
-
const { NATS_URL } = require('../lib/nats-resolve');
|
|
24
|
+
const { NATS_URL, natsConnectOpts } = require('../lib/nats-resolve');
|
|
25
25
|
const DISPATCH_INTERVAL = parseInt(process.env.BRIDGE_DISPATCH_INTERVAL || '10000'); // 10s
|
|
26
26
|
const LOG_DIR = path.join(process.env.HOME, '.openclaw', 'workspace', 'memory', 'mesh-logs');
|
|
27
27
|
const WORKSPACE = path.join(process.env.HOME, '.openclaw', 'workspace');
|
|
@@ -726,8 +726,9 @@ async function main() {
|
|
|
726
726
|
log(` Dispatch interval: ${DISPATCH_INTERVAL / 1000}s`);
|
|
727
727
|
log(` Mode: ${DRY_RUN ? 'dry run' : 'live'}`);
|
|
728
728
|
|
|
729
|
+
const natsOpts = natsConnectOpts();
|
|
729
730
|
nc = await connect({
|
|
730
|
-
|
|
731
|
+
...natsOpts,
|
|
731
732
|
timeout: 5000,
|
|
732
733
|
reconnect: true,
|
|
733
734
|
maxReconnectAttempts: 10,
|
package/bin/mesh-deploy.js
CHANGED
|
@@ -47,6 +47,10 @@ const crypto = require('crypto');
|
|
|
47
47
|
const IS_MAC = os.platform() === 'darwin';
|
|
48
48
|
const HOME = os.homedir();
|
|
49
49
|
const DEPLOY_BRANCH = process.env.OPENCLAW_DEPLOY_BRANCH || 'main';
|
|
50
|
+
if (!/^[a-zA-Z0-9._\/-]+$/.test(DEPLOY_BRANCH)) {
|
|
51
|
+
console.error(`Invalid DEPLOY_BRANCH: ${DEPLOY_BRANCH}`);
|
|
52
|
+
process.exit(1);
|
|
53
|
+
}
|
|
50
54
|
const REPO_DIR = process.env.OPENCLAW_REPO_DIR || path.join(HOME, 'openclaw');
|
|
51
55
|
|
|
52
56
|
// KNOWN ISSUE: Two-directory problem
|
|
@@ -36,6 +36,12 @@ const IS_MAC = os.platform() === "darwin";
|
|
|
36
36
|
|
|
37
37
|
const { ROLE_COMPONENTS } = require('../lib/mesh-roles');
|
|
38
38
|
|
|
39
|
+
// ── Circuit Breaker State ───────────────────────────────────────────────
|
|
40
|
+
let consecutiveFailures = 0;
|
|
41
|
+
let skipTicksRemaining = 0;
|
|
42
|
+
let lastErrorMsg = '';
|
|
43
|
+
let lastErrorRepeatCount = 0;
|
|
44
|
+
|
|
39
45
|
// ── Health Gathering ─────────────────────────────────────────────────────
|
|
40
46
|
// All the expensive execSync calls happen here, on our own schedule.
|
|
41
47
|
// No request timeout to race against.
|
|
@@ -226,11 +232,45 @@ async function main() {
|
|
|
226
232
|
|
|
227
233
|
// Publish immediately, then every interval
|
|
228
234
|
async function publish() {
|
|
235
|
+
// Circuit breaker: skip ticks during backoff
|
|
236
|
+
if (skipTicksRemaining > 0) {
|
|
237
|
+
skipTicksRemaining--;
|
|
238
|
+
return;
|
|
239
|
+
}
|
|
240
|
+
|
|
229
241
|
try {
|
|
230
242
|
const health = gatherHealth();
|
|
231
243
|
await kv.put(NODE_ID, sc.encode(JSON.stringify(health)));
|
|
244
|
+
// Reset on success
|
|
245
|
+
if (consecutiveFailures > 0) {
|
|
246
|
+
console.log(`[health-publisher] recovered after ${consecutiveFailures} consecutive failures`);
|
|
247
|
+
}
|
|
248
|
+
consecutiveFailures = 0;
|
|
249
|
+
lastErrorMsg = '';
|
|
250
|
+
lastErrorRepeatCount = 0;
|
|
232
251
|
} catch (err) {
|
|
233
|
-
|
|
252
|
+
consecutiveFailures++;
|
|
253
|
+
const msg = err.message;
|
|
254
|
+
|
|
255
|
+
// Log dedup: after 3 identical consecutive errors, log every 10th
|
|
256
|
+
if (msg === lastErrorMsg) {
|
|
257
|
+
lastErrorRepeatCount++;
|
|
258
|
+
if (lastErrorRepeatCount === 3) {
|
|
259
|
+
console.error(`[health-publisher] suppressing repeated errors (${lastErrorRepeatCount} occurrences): ${msg}`);
|
|
260
|
+
} else if (lastErrorRepeatCount > 3 && lastErrorRepeatCount % 10 === 0) {
|
|
261
|
+
console.error(`[health-publisher] suppressing repeated errors (${lastErrorRepeatCount} occurrences): ${msg}`);
|
|
262
|
+
}
|
|
263
|
+
// Silently skip logs between dedup thresholds
|
|
264
|
+
} else {
|
|
265
|
+
lastErrorMsg = msg;
|
|
266
|
+
lastErrorRepeatCount = 1;
|
|
267
|
+
console.error("[health-publisher] publish failed:", msg);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Exponential backoff: skip 2^min(N,6) ticks (max ~64 ticks / ~16 min at 15s)
|
|
271
|
+
const backoffTicks = Math.pow(2, Math.min(consecutiveFailures, 6));
|
|
272
|
+
skipTicksRemaining = backoffTicks;
|
|
273
|
+
console.error(`[health-publisher] backoff: skipping next ${backoffTicks} ticks (failures=${consecutiveFailures})`);
|
|
234
274
|
}
|
|
235
275
|
}
|
|
236
276
|
|
package/bin/mesh-task-daemon.js
CHANGED
|
@@ -46,7 +46,7 @@ const ROLE_DIRS = [
|
|
|
46
46
|
];
|
|
47
47
|
|
|
48
48
|
const sc = StringCodec();
|
|
49
|
-
const { NATS_URL } = require('../lib/nats-resolve');
|
|
49
|
+
const { NATS_URL, natsConnectOpts } = require('../lib/nats-resolve');
|
|
50
50
|
const BUDGET_CHECK_INTERVAL = 30000; // 30s
|
|
51
51
|
const STALL_MINUTES = parseInt(process.env.MESH_STALL_MINUTES || '5'); // no heartbeat for this long → stalled
|
|
52
52
|
const CIRCLING_STEP_TIMEOUT_MS = parseInt(process.env.MESH_CIRCLING_STEP_TIMEOUT_MS || String(10 * 60 * 1000)); // 10 min default
|
|
@@ -2013,7 +2013,8 @@ function cascadeFailure(plan, failedSubtaskId) {
|
|
|
2013
2013
|
async function main() {
|
|
2014
2014
|
log('Starting mesh task daemon...');
|
|
2015
2015
|
|
|
2016
|
-
|
|
2016
|
+
const natsOpts = natsConnectOpts();
|
|
2017
|
+
nc = await connect({ ...natsOpts, timeout: 5000 });
|
|
2017
2018
|
log(`Connected to NATS at ${NATS_URL}`);
|
|
2018
2019
|
|
|
2019
2020
|
// Initialize task store
|
|
@@ -2085,8 +2086,12 @@ async function main() {
|
|
|
2085
2086
|
}
|
|
2086
2087
|
|
|
2087
2088
|
// Start enforcement loops
|
|
2088
|
-
const proposalTimer = setInterval(
|
|
2089
|
-
|
|
2089
|
+
const proposalTimer = setInterval(async () => {
|
|
2090
|
+
try { await processProposals(); } catch (err) { log(`processProposals error: ${err.message}`); }
|
|
2091
|
+
}, BUDGET_CHECK_INTERVAL);
|
|
2092
|
+
const budgetTimer = setInterval(async () => {
|
|
2093
|
+
try { await enforceBudgets(); } catch (err) { log(`enforceBudgets error: ${err.message}`); }
|
|
2094
|
+
}, BUDGET_CHECK_INTERVAL);
|
|
2090
2095
|
const stallTimer = setInterval(detectStalls, BUDGET_CHECK_INTERVAL);
|
|
2091
2096
|
const recruitTimer = setInterval(checkRecruitingDeadlines, 5000); // check every 5s
|
|
2092
2097
|
const circlingStepSweepTimer = setInterval(sweepCirclingStepTimeouts, 60000); // every 60s
|
|
@@ -2106,6 +2111,11 @@ async function main() {
|
|
|
2106
2111
|
clearInterval(budgetTimer);
|
|
2107
2112
|
clearInterval(stallTimer);
|
|
2108
2113
|
clearInterval(recruitTimer);
|
|
2114
|
+
if (circlingStepSweepTimer) clearInterval(circlingStepSweepTimer);
|
|
2115
|
+
if (circlingStepTimers) {
|
|
2116
|
+
for (const timer of circlingStepTimers.values()) clearTimeout(timer);
|
|
2117
|
+
circlingStepTimers.clear();
|
|
2118
|
+
}
|
|
2109
2119
|
for (const sub of subs) sub.unsubscribe();
|
|
2110
2120
|
await nc.drain();
|
|
2111
2121
|
process.exit(0);
|
package/bin/mesh.js
CHANGED
|
@@ -27,23 +27,7 @@ const { connect, StringCodec, createInbox } = require('nats');
|
|
|
27
27
|
const fs = require('fs');
|
|
28
28
|
const path = require('path');
|
|
29
29
|
const os = require('os');
|
|
30
|
-
|
|
31
|
-
// ─── Config ──────────────────────────────────────────
|
|
32
|
-
// ── NATS URL resolution: env var → ~/.openclaw/openclaw.env → fallback IP ──
|
|
33
|
-
const NATS_FALLBACK = 'nats://100.91.131.61:4222';
|
|
34
|
-
function resolveNatsUrl() {
|
|
35
|
-
if (process.env.OPENCLAW_NATS) return process.env.OPENCLAW_NATS;
|
|
36
|
-
try {
|
|
37
|
-
const envFile = path.join(os.homedir(), '.openclaw', 'openclaw.env');
|
|
38
|
-
if (fs.existsSync(envFile)) {
|
|
39
|
-
const content = fs.readFileSync(envFile, 'utf8');
|
|
40
|
-
const match = content.match(/^\s*OPENCLAW_NATS\s*=\s*(.+)/m);
|
|
41
|
-
if (match && match[1].trim()) return match[1].trim();
|
|
42
|
-
}
|
|
43
|
-
} catch {}
|
|
44
|
-
return NATS_FALLBACK;
|
|
45
|
-
}
|
|
46
|
-
const NATS_URL = resolveNatsUrl();
|
|
30
|
+
const { natsConnectOpts } = require('../lib/nats-resolve');
|
|
47
31
|
const SHARED_DIR = path.join(os.homedir(), 'openclaw', 'shared');
|
|
48
32
|
const LOCAL_NODE = os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
|
|
49
33
|
const sc = StringCodec();
|
|
@@ -88,27 +72,16 @@ function remoteNode() {
|
|
|
88
72
|
|
|
89
73
|
// ─── Exec safety ─────────────────────────────────────
|
|
90
74
|
|
|
91
|
-
const
|
|
92
|
-
/\brm\s+(-[a-zA-Z]*)?r[a-zA-Z]*f/, // rm -rf, rm -fr, rm --recursive --force
|
|
93
|
-
/\brm\s+(-[a-zA-Z]*)?f[a-zA-Z]*r/, // rm -fr variants
|
|
94
|
-
/\bmkfs\b/, // format filesystem
|
|
95
|
-
/\bdd\s+.*of=/, // raw disk write
|
|
96
|
-
/\b>\s*\/dev\/[sh]d/, // write to raw device
|
|
97
|
-
/\bcurl\b.*\|\s*(ba)?sh/, // curl pipe to shell
|
|
98
|
-
/\bwget\b.*\|\s*(ba)?sh/, // wget pipe to shell
|
|
99
|
-
/\bchmod\s+(-[a-zA-Z]*\s+)?777\s+\//, // chmod 777 on root paths
|
|
100
|
-
/\b:(){ :\|:& };:/, // fork bomb
|
|
101
|
-
];
|
|
75
|
+
const { checkDestructivePatterns } = require('../lib/exec-safety');
|
|
102
76
|
|
|
103
77
|
function checkExecSafety(command) {
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
}
|
|
78
|
+
const result = checkDestructivePatterns(command);
|
|
79
|
+
if (result.blocked) {
|
|
80
|
+
console.error(`BLOCKED: Command matches destructive pattern.`);
|
|
81
|
+
console.error(` Command: ${command}`);
|
|
82
|
+
console.error(` Pattern: ${result.pattern}`);
|
|
83
|
+
console.error(`\nIf this is intentional, SSH into the node and run it directly.`);
|
|
84
|
+
process.exit(1);
|
|
112
85
|
}
|
|
113
86
|
}
|
|
114
87
|
|
|
@@ -118,10 +91,11 @@ function checkExecSafety(command) {
|
|
|
118
91
|
* Connect to NATS with a short timeout (this is a CLI tool, not a daemon).
|
|
119
92
|
*/
|
|
120
93
|
async function natsConnect() {
|
|
94
|
+
const opts = natsConnectOpts();
|
|
121
95
|
try {
|
|
122
|
-
return await connect({
|
|
96
|
+
return await connect({ ...opts, timeout: 5000 });
|
|
123
97
|
} catch (err) {
|
|
124
|
-
console.error(`Error: Cannot connect to NATS at ${
|
|
98
|
+
console.error(`Error: Cannot connect to NATS at ${opts.servers}`);
|
|
125
99
|
console.error(`Is the NATS server running? Is Tailscale connected?`);
|
|
126
100
|
process.exit(1);
|
|
127
101
|
}
|
|
@@ -891,7 +865,7 @@ async function cmdPlan(args) {
|
|
|
891
865
|
}
|
|
892
866
|
|
|
893
867
|
// Submit to mesh via NATS
|
|
894
|
-
const nc = await connect({
|
|
868
|
+
const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
|
|
895
869
|
try {
|
|
896
870
|
const reply = await nc.request(
|
|
897
871
|
'mesh.plans.create',
|
|
@@ -919,7 +893,7 @@ async function cmdPlan(args) {
|
|
|
919
893
|
if (args[i] === '--status' && args[i + 1]) { statusFilter = args[++i]; }
|
|
920
894
|
}
|
|
921
895
|
|
|
922
|
-
const nc = await connect({
|
|
896
|
+
const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
|
|
923
897
|
try {
|
|
924
898
|
const payload = statusFilter ? { status: statusFilter } : {};
|
|
925
899
|
const reply = await nc.request(
|
|
@@ -952,7 +926,7 @@ async function cmdPlan(args) {
|
|
|
952
926
|
process.exit(1);
|
|
953
927
|
}
|
|
954
928
|
|
|
955
|
-
const nc = await connect({
|
|
929
|
+
const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
|
|
956
930
|
try {
|
|
957
931
|
const reply = await nc.request(
|
|
958
932
|
'mesh.plans.get',
|
|
@@ -1050,7 +1024,7 @@ async function cmdPlan(args) {
|
|
|
1050
1024
|
process.exit(1);
|
|
1051
1025
|
}
|
|
1052
1026
|
|
|
1053
|
-
const nc = await connect({
|
|
1027
|
+
const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
|
|
1054
1028
|
try {
|
|
1055
1029
|
const reply = await nc.request(
|
|
1056
1030
|
'mesh.plans.approve',
|
|
@@ -1079,7 +1053,7 @@ async function cmdPlan(args) {
|
|
|
1079
1053
|
process.exit(1);
|
|
1080
1054
|
}
|
|
1081
1055
|
|
|
1082
|
-
const nc = await connect({
|
|
1056
|
+
const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
|
|
1083
1057
|
try {
|
|
1084
1058
|
const reply = await nc.request(
|
|
1085
1059
|
'mesh.plans.abort',
|
package/install.sh
CHANGED
|
@@ -770,8 +770,9 @@ else
|
|
|
770
770
|
fi
|
|
771
771
|
|
|
772
772
|
if [ "$OS" = "macos" ]; then
|
|
773
|
-
|
|
774
|
-
|
|
773
|
+
LAUNCHD_SVC_NAME="${SVC_NAME#openclaw-}"
|
|
774
|
+
TEMPLATE="$LAUNCHD_TEMPLATES/ai.openclaw.${LAUNCHD_SVC_NAME}.plist"
|
|
775
|
+
DEST="$LAUNCHD_DEST/ai.openclaw.${LAUNCHD_SVC_NAME}.plist"
|
|
775
776
|
|
|
776
777
|
if [ ! -f "$TEMPLATE" ]; then
|
|
777
778
|
warn " Template not found: $TEMPLATE"
|
package/lib/agent-activity.js
CHANGED
|
@@ -147,7 +147,7 @@ async function readLastEntry(filePath) {
|
|
|
147
147
|
} else {
|
|
148
148
|
const fh = await open(filePath, 'r');
|
|
149
149
|
try {
|
|
150
|
-
const buffer = Buffer.
|
|
150
|
+
const buffer = Buffer.alloc(chunkSize);
|
|
151
151
|
await fh.read(buffer, 0, chunkSize, offset);
|
|
152
152
|
content = buffer.toString('utf-8');
|
|
153
153
|
} finally {
|
|
@@ -195,7 +195,7 @@ async function parseJsonlTail(filePath, maxBytes = 131072) {
|
|
|
195
195
|
const fh = await open(filePath, 'r');
|
|
196
196
|
try {
|
|
197
197
|
const length = size - offset;
|
|
198
|
-
const buffer = Buffer.
|
|
198
|
+
const buffer = Buffer.alloc(length);
|
|
199
199
|
await fh.read(buffer, 0, length, offset);
|
|
200
200
|
content = buffer.toString('utf-8');
|
|
201
201
|
} finally {
|