openclaw-node-harness 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/bin/lane-watchdog.js +31 -21
  2. package/bin/mesh-agent.js +11 -2
  3. package/bin/mesh-deploy.js +4 -0
  4. package/bin/mesh-task-daemon.js +9 -4
  5. package/bin/mesh.js +9 -24
  6. package/lib/exec-safety.js +60 -2
  7. package/lib/kanban-io.js +8 -5
  8. package/lib/llm-providers.js +12 -1
  9. package/lib/mesh-collab.js +8 -2
  10. package/lib/mesh-harness.js +6 -0
  11. package/lib/mesh-plans.js +20 -2
  12. package/lib/mesh-tasks.js +26 -10
  13. package/mission-control/package-lock.json +4188 -3698
  14. package/mission-control/package.json +2 -2
  15. package/mission-control/src/app/api/diagnostics/route.ts +8 -0
  16. package/mission-control/src/app/api/diagnostics/test-runner/route.ts +8 -0
  17. package/mission-control/src/app/api/memory/graph/route.ts +34 -18
  18. package/mission-control/src/app/api/memory/search/route.ts +3 -2
  19. package/mission-control/src/app/api/mesh/identity/route.ts +13 -5
  20. package/mission-control/src/app/api/mesh/nodes/route.ts +8 -0
  21. package/mission-control/src/app/api/settings/gateway/route.ts +62 -0
  22. package/mission-control/src/app/api/souls/[id]/evolution/route.ts +28 -7
  23. package/mission-control/src/app/api/souls/[id]/propagate/route.ts +10 -3
  24. package/mission-control/src/app/api/souls/route.ts +6 -4
  25. package/mission-control/src/app/api/tasks/[id]/route.ts +20 -4
  26. package/mission-control/src/app/api/tasks/route.ts +68 -9
  27. package/mission-control/src/lib/config.ts +2 -2
  28. package/mission-control/src/lib/sync/tasks.ts +4 -1
  29. package/package.json +1 -1
  30. package/services/launchd/ai.openclaw.lane-watchdog.plist +1 -1
  31. package/services/launchd/ai.openclaw.mesh-agent.plist +4 -0
  32. package/services/launchd/ai.openclaw.mission-control.plist +4 -3
@@ -193,29 +193,39 @@ function tailLog(filePath, label) {
193
193
 
194
194
  const watcher = fs.watch(filePath, { persistent: true }, () => {
195
195
  try {
196
- const stat = fs.statSync(filePath);
197
- if (stat.size < fileSize) {
198
- // Log was rotated
196
+ // Read from current fileSize to EOF — avoid TOCTOU race by not
197
+ // pre-checking stat.size. createReadStream with just `start` reads
198
+ // to the end of the file atomically, then we update fileSize from
199
+ // the bytes actually read.
200
+ const stream = fs.createReadStream(filePath, {
201
+ start: fileSize,
202
+ encoding: 'utf8'
203
+ });
204
+ let buffer = '';
205
+ let bytesRead = 0;
206
+ stream.on('data', chunk => { buffer += chunk; bytesRead += Buffer.byteLength(chunk, 'utf8'); });
207
+ stream.on('end', () => {
208
+ if (bytesRead === 0) return; // no new data
209
+ const lines = buffer.split('\n').filter(Boolean);
210
+ for (const line of lines) {
211
+ parseLine(line);
212
+ }
213
+ fileSize += bytesRead;
214
+ });
215
+ stream.on('error', (err) => {
216
+ if (err.code === 'ENOENT') {
217
+ // File was deleted/rotated — reset position
218
+ fileSize = 0;
219
+ } else {
220
+ log(`ERROR: reading ${label}: ${err.message}`);
221
+ }
222
+ });
223
+ } catch (err) {
224
+ if (err.code === 'ENOENT') {
199
225
  fileSize = 0;
226
+ } else {
227
+ log(`ERROR: reading ${label}: ${err.message}`);
200
228
  }
201
- if (stat.size > fileSize) {
202
- const stream = fs.createReadStream(filePath, {
203
- start: fileSize,
204
- end: stat.size,
205
- encoding: 'utf8'
206
- });
207
- let buffer = '';
208
- stream.on('data', chunk => { buffer += chunk; });
209
- stream.on('end', () => {
210
- const lines = buffer.split('\n').filter(Boolean);
211
- for (const line of lines) {
212
- parseLine(line);
213
- }
214
- fileSize = stat.size;
215
- });
216
- }
217
- } catch (err) {
218
- log(`ERROR: reading ${label}: ${err.message}`);
219
229
  }
220
230
  });
221
231
 
package/bin/mesh-agent.js CHANGED
@@ -539,6 +539,8 @@ const ALLOWED_METRIC_PREFIXES = [
539
539
  ];
540
540
 
541
541
  function isAllowedMetric(cmd) {
542
+ if (/[\n\r\0;`]|\$\(|\|\||&&|<\(|>\(|<<|>>|>\s|\|/.test(cmd)) return false;
543
+ if (/\bnode\s+(-e\b|--eval\b|-p\b|--print\b|-r\b|--require\b|--import\b)/.test(cmd)) return false;
542
544
  return ALLOWED_METRIC_PREFIXES.some(prefix => cmd.startsWith(prefix));
543
545
  }
544
546
 
@@ -1032,6 +1034,9 @@ async function executeCollabTask(task) {
1032
1034
  // Create worktree for isolation
1033
1035
  const worktreePath = createWorktree(`${task.task_id}-${NODE_ID}`);
1034
1036
  const taskDir = worktreePath || WORKSPACE;
1037
+ if (!worktreePath) {
1038
+ log(`WARNING: Collab task ${task.task_id} running in shared workspace — isolation not achieved`);
1039
+ }
1035
1040
 
1036
1041
  // Periodic session heartbeat — detects abort/completion while waiting for rounds
1037
1042
  const sessionHeartbeat = setInterval(async () => {
@@ -1190,9 +1195,13 @@ async function executeTask(task) {
1190
1195
  // Create isolated worktree for this task (falls back to shared workspace on failure)
1191
1196
  const worktreePath = createWorktree(task.task_id);
1192
1197
  const taskDir = worktreePath || WORKSPACE;
1198
+ const workspaceIsolated = !!worktreePath;
1199
+ if (!workspaceIsolated) {
1200
+ log(`WARNING: Task ${task.task_id} running in shared workspace — isolation not achieved`);
1201
+ }
1193
1202
 
1194
- // Signal start
1195
- await natsRequest('mesh.tasks.start', { task_id: task.task_id });
1203
+ // Signal start (include isolation status so daemon knows)
1204
+ await natsRequest('mesh.tasks.start', { task_id: task.task_id, workspace_isolated: workspaceIsolated });
1196
1205
  writeAgentState('working', task.task_id);
1197
1206
  log(`Started: ${task.task_id} (dir: ${worktreePath ? 'worktree' : 'workspace'})`);
1198
1207
 
@@ -47,6 +47,10 @@ const crypto = require('crypto');
47
47
  const IS_MAC = os.platform() === 'darwin';
48
48
  const HOME = os.homedir();
49
49
  const DEPLOY_BRANCH = process.env.OPENCLAW_DEPLOY_BRANCH || 'main';
50
+ if (!/^[a-zA-Z0-9._\/-]+$/.test(DEPLOY_BRANCH)) {
51
+ console.error(`Invalid DEPLOY_BRANCH: ${DEPLOY_BRANCH}`);
52
+ process.exit(1);
53
+ }
50
54
  const REPO_DIR = process.env.OPENCLAW_REPO_DIR || path.join(HOME, 'openclaw');
51
55
 
52
56
  // KNOWN ISSUE: Two-directory problem
@@ -46,7 +46,7 @@ const ROLE_DIRS = [
46
46
  ];
47
47
 
48
48
  const sc = StringCodec();
49
- const { NATS_URL } = require('../lib/nats-resolve');
49
+ const { NATS_URL, natsConnectOpts } = require('../lib/nats-resolve');
50
50
  const BUDGET_CHECK_INTERVAL = 30000; // 30s
51
51
  const STALL_MINUTES = parseInt(process.env.MESH_STALL_MINUTES || '5'); // no heartbeat for this long → stalled
52
52
  const CIRCLING_STEP_TIMEOUT_MS = parseInt(process.env.MESH_CIRCLING_STEP_TIMEOUT_MS || String(10 * 60 * 1000)); // 10 min default
@@ -2013,7 +2013,8 @@ function cascadeFailure(plan, failedSubtaskId) {
2013
2013
  async function main() {
2014
2014
  log('Starting mesh task daemon...');
2015
2015
 
2016
- nc = await connect({ servers: NATS_URL, timeout: 5000 });
2016
+ const natsOpts = natsConnectOpts();
2017
+ nc = await connect({ ...natsOpts, timeout: 5000 });
2017
2018
  log(`Connected to NATS at ${NATS_URL}`);
2018
2019
 
2019
2020
  // Initialize task store
@@ -2085,8 +2086,12 @@ async function main() {
2085
2086
  }
2086
2087
 
2087
2088
  // Start enforcement loops
2088
- const proposalTimer = setInterval(processProposals, BUDGET_CHECK_INTERVAL);
2089
- const budgetTimer = setInterval(enforceBudgets, BUDGET_CHECK_INTERVAL);
2089
+ const proposalTimer = setInterval(async () => {
2090
+ try { await processProposals(); } catch (err) { log(`processProposals error: ${err.message}`); }
2091
+ }, BUDGET_CHECK_INTERVAL);
2092
+ const budgetTimer = setInterval(async () => {
2093
+ try { await enforceBudgets(); } catch (err) { log(`enforceBudgets error: ${err.message}`); }
2094
+ }, BUDGET_CHECK_INTERVAL);
2090
2095
  const stallTimer = setInterval(detectStalls, BUDGET_CHECK_INTERVAL);
2091
2096
  const recruitTimer = setInterval(checkRecruitingDeadlines, 5000); // check every 5s
2092
2097
  const circlingStepSweepTimer = setInterval(sweepCirclingStepTimeouts, 60000); // every 60s
package/bin/mesh.js CHANGED
@@ -27,23 +27,7 @@ const { connect, StringCodec, createInbox } = require('nats');
27
27
  const fs = require('fs');
28
28
  const path = require('path');
29
29
  const os = require('os');
30
-
31
- // ─── Config ──────────────────────────────────────────
32
- // ── NATS URL resolution: env var → ~/.openclaw/openclaw.env → fallback IP ──
33
- const NATS_FALLBACK = 'nats://100.91.131.61:4222';
34
- function resolveNatsUrl() {
35
- if (process.env.OPENCLAW_NATS) return process.env.OPENCLAW_NATS;
36
- try {
37
- const envFile = path.join(os.homedir(), '.openclaw', 'openclaw.env');
38
- if (fs.existsSync(envFile)) {
39
- const content = fs.readFileSync(envFile, 'utf8');
40
- const match = content.match(/^\s*OPENCLAW_NATS\s*=\s*(.+)/m);
41
- if (match && match[1].trim()) return match[1].trim();
42
- }
43
- } catch {}
44
- return NATS_FALLBACK;
45
- }
46
- const NATS_URL = resolveNatsUrl();
30
+ const { natsConnectOpts } = require('../lib/nats-resolve');
47
31
  const SHARED_DIR = path.join(os.homedir(), 'openclaw', 'shared');
48
32
  const LOCAL_NODE = os.hostname().toLowerCase().replace(/[^a-z0-9-]/g, '-');
49
33
  const sc = StringCodec();
@@ -107,10 +91,11 @@ function checkExecSafety(command) {
107
91
  * Connect to NATS with a short timeout (this is a CLI tool, not a daemon).
108
92
  */
109
93
  async function natsConnect() {
94
+ const opts = natsConnectOpts();
110
95
  try {
111
- return await connect({ servers: NATS_URL, timeout: 5000 });
96
+ return await connect({ ...opts, timeout: 5000 });
112
97
  } catch (err) {
113
- console.error(`Error: Cannot connect to NATS at ${NATS_URL}`);
98
+ console.error(`Error: Cannot connect to NATS at ${opts.servers}`);
114
99
  console.error(`Is the NATS server running? Is Tailscale connected?`);
115
100
  process.exit(1);
116
101
  }
@@ -880,7 +865,7 @@ async function cmdPlan(args) {
880
865
  }
881
866
 
882
867
  // Submit to mesh via NATS
883
- const nc = await connect({ servers: NATS_URL, timeout: 5000 });
868
+ const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
884
869
  try {
885
870
  const reply = await nc.request(
886
871
  'mesh.plans.create',
@@ -908,7 +893,7 @@ async function cmdPlan(args) {
908
893
  if (args[i] === '--status' && args[i + 1]) { statusFilter = args[++i]; }
909
894
  }
910
895
 
911
- const nc = await connect({ servers: NATS_URL, timeout: 5000 });
896
+ const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
912
897
  try {
913
898
  const payload = statusFilter ? { status: statusFilter } : {};
914
899
  const reply = await nc.request(
@@ -941,7 +926,7 @@ async function cmdPlan(args) {
941
926
  process.exit(1);
942
927
  }
943
928
 
944
- const nc = await connect({ servers: NATS_URL, timeout: 5000 });
929
+ const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
945
930
  try {
946
931
  const reply = await nc.request(
947
932
  'mesh.plans.get',
@@ -1039,7 +1024,7 @@ async function cmdPlan(args) {
1039
1024
  process.exit(1);
1040
1025
  }
1041
1026
 
1042
- const nc = await connect({ servers: NATS_URL, timeout: 5000 });
1027
+ const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
1043
1028
  try {
1044
1029
  const reply = await nc.request(
1045
1030
  'mesh.plans.approve',
@@ -1068,7 +1053,7 @@ async function cmdPlan(args) {
1068
1053
  process.exit(1);
1069
1054
  }
1070
1055
 
1071
- const nc = await connect({ servers: NATS_URL, timeout: 5000 });
1056
+ const nc = await connect({ ...natsConnectOpts(), timeout: 5000 });
1072
1057
  try {
1073
1058
  const reply = await nc.request(
1074
1059
  'mesh.plans.abort',
@@ -11,6 +11,30 @@
11
11
 
12
12
  'use strict';
13
13
 
14
+ // Shell metacharacter detection — blocks command chaining/injection.
15
+ // Safe pipes to common read-only utilities are allowed.
16
+ const SHELL_CHAIN_PATTERNS = /[\n\r\0;`]|\$\(|\|\||&&|<\(|>\(|<<|>>|>\s|\|(?!\s*grep\b|\s*head\b|\s*tail\b|\s*wc\b|\s*sort\b)/;
17
+
18
+ function containsShellChaining(cmd) {
19
+ // Allow safe pipes to common read-only utilities
20
+ return SHELL_CHAIN_PATTERNS.test(cmd);
21
+ }
22
+
23
+ // Dangerous flags that allow arbitrary code execution via node
24
+ const DANGEROUS_NODE_FLAGS = /\bnode\s+(-e\b|--eval\b|-p\b|--print\b|-r\b|--require\b|--import\b|--loader\b|--experimental-loader\b)/;
25
+
26
+ // Dangerous git flags that allow arbitrary config / code execution
27
+ const DANGEROUS_GIT_FLAGS = /\bgit\s+(-c\s|--config\s)/;
28
+
29
+ // Dangerous find flags that allow arbitrary command execution
30
+ const DANGEROUS_FIND_FLAGS = /\bfind\b.*\s(-exec\b|-execdir\b|-delete\b|-ok\b|-okdir\b)/;
31
+
32
+ // Dangerous make variable overrides (SHELL=, CC=, etc.)
33
+ const DANGEROUS_MAKE_FLAGS = /\bmake\b.*\b(SHELL|CC|CXX|LD|AR)=/;
34
+
35
+ // Dangerous python flags that allow arbitrary code execution
36
+ const DANGEROUS_PYTHON_FLAGS = /\bpython3?\s+(-c\b|-m\s+http)/;
37
+
14
38
  const DESTRUCTIVE_PATTERNS = [
15
39
  /\brm\s+(-[a-zA-Z]*)?r[a-zA-Z]*f/, // rm -rf, rm -fr, rm --recursive --force
16
40
  /\brm\s+(-[a-zA-Z]*)?f[a-zA-Z]*r/, // rm -fr variants
@@ -39,11 +63,15 @@ const DESTRUCTIVE_PATTERNS = [
39
63
  * CLI-side uses blocklist only; server-side uses both blocklist + allowlist.
40
64
  */
41
65
  const ALLOWED_EXEC_PREFIXES = [
42
- 'git ', 'npm ', 'node ', 'npx ', 'python ', 'python3 ',
66
+ 'git ', 'node ', 'python ', 'python3 ',
67
+ 'npm test', 'npm run test', 'npm run lint', 'npm run build', 'npm run dev',
68
+ 'npm run start', 'npm install', 'npm ci', 'npm ls', 'npm outdated',
69
+ 'npm audit', 'npm version', 'npm pack', 'npm run check',
70
+ 'npx vitest', 'npx jest', 'npx eslint', 'npx prettier', 'npx tsc',
43
71
  'cat ', 'ls ', 'head ', 'tail ', 'grep ', 'find ', 'wc ',
44
72
  'echo ', 'date ', 'uptime ', 'df ', 'free ', 'ps ',
45
73
  'bash openclaw/', 'bash ~/openclaw/', 'bash ./bin/',
46
- 'cd ', 'pwd', 'which ', 'env ', 'printenv ',
74
+ 'pwd', 'which ',
47
75
  'cargo ', 'go ', 'make ', 'pytest ', 'jest ', 'vitest ',
48
76
  ];
49
77
 
@@ -84,6 +112,30 @@ function validateExecCommand(command) {
84
112
  return { allowed: false, reason: 'Empty command' };
85
113
  }
86
114
 
115
+ if (containsShellChaining(trimmed)) {
116
+ return { allowed: false, reason: `Command contains shell chaining operators: ${trimmed.slice(0, 80)}` };
117
+ }
118
+
119
+ if (DANGEROUS_NODE_FLAGS.test(trimmed)) {
120
+ return { allowed: false, reason: `Dangerous node flag detected: ${trimmed.slice(0, 80)}` };
121
+ }
122
+
123
+ if (DANGEROUS_GIT_FLAGS.test(trimmed)) {
124
+ return { allowed: false, reason: `Dangerous git flag detected: ${trimmed.slice(0, 80)}` };
125
+ }
126
+
127
+ if (DANGEROUS_FIND_FLAGS.test(trimmed)) {
128
+ return { allowed: false, reason: `Dangerous find flag detected: ${trimmed.slice(0, 80)}` };
129
+ }
130
+
131
+ if (DANGEROUS_MAKE_FLAGS.test(trimmed)) {
132
+ return { allowed: false, reason: `Dangerous make variable override detected: ${trimmed.slice(0, 80)}` };
133
+ }
134
+
135
+ if (DANGEROUS_PYTHON_FLAGS.test(trimmed)) {
136
+ return { allowed: false, reason: `Dangerous python flag detected: ${trimmed.slice(0, 80)}` };
137
+ }
138
+
87
139
  const destructive = checkDestructivePatterns(trimmed);
88
140
  if (destructive.blocked) {
89
141
  return { allowed: false, reason: `Blocked by destructive pattern: ${destructive.pattern}` };
@@ -99,7 +151,13 @@ function validateExecCommand(command) {
99
151
  module.exports = {
100
152
  DESTRUCTIVE_PATTERNS,
101
153
  ALLOWED_EXEC_PREFIXES,
154
+ DANGEROUS_NODE_FLAGS,
155
+ DANGEROUS_GIT_FLAGS,
156
+ DANGEROUS_FIND_FLAGS,
157
+ DANGEROUS_MAKE_FLAGS,
158
+ DANGEROUS_PYTHON_FLAGS,
102
159
  checkDestructivePatterns,
103
160
  isAllowedExecCommand,
104
161
  validateExecCommand,
162
+ containsShellChaining,
105
163
  };
package/lib/kanban-io.js CHANGED
@@ -50,9 +50,8 @@ async function withMkdirLock(filePath, fn) {
50
50
  throw err;
51
51
  }
52
52
  }
53
- // Timeout — force acquire (stale lock)
54
- try { fs.rmdirSync(lockDir); } catch {}
55
- return fn();
53
+ // Timeout — refuse to proceed without lock to prevent data corruption
54
+ throw new Error(`kanban-io: lock acquisition timeout after ${maxWait}ms — file may be corrupted`);
56
55
  }
57
56
 
58
57
  // ── Parser ──────────────────────────────────────────
@@ -351,9 +350,13 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
351
350
  ...lines.slice(blockEnd),
352
351
  ];
353
352
 
354
- // Atomic write
353
+ // Atomic write — fsync before rename to ensure data hits disk
355
354
  const tmpPath = filePath + '.tmp.' + process.pid;
356
- fs.writeFileSync(tmpPath, newLines.join('\n'));
355
+ const output = newLines.join('\n');
356
+ const fd = fs.openSync(tmpPath, 'w');
357
+ fs.writeSync(fd, output);
358
+ fs.fsyncSync(fd);
359
+ fs.closeSync(fd);
357
360
  fs.renameSync(tmpPath, filePath);
358
361
  }
359
362
 
@@ -22,15 +22,26 @@ const fs = require('fs');
22
22
  const os = require('os');
23
23
 
24
24
  // ── Shell Command Security ─────────────────────────
25
+ const SHELL_CHAIN_PATTERNS = /[\n\r\0;`]|\$\(|\|\||&&|<\(|>\(|<<|>>|>\s|\|(?!\s*grep\b|\s*head\b|\s*tail\b|\s*wc\b|\s*sort\b)/;
26
+
27
+ const DANGEROUS_FIND_FLAGS = /\bfind\b.*\s(-exec\b|-execdir\b|-delete\b|-ok\b|-okdir\b)/;
28
+
29
+ const DANGEROUS_NODE_FLAGS = /\bnode\s+(-e\b|--eval\b|-p\b|--print\b|-r\b|--require\b|--import\b|--loader\b|--experimental-loader\b)/;
30
+
25
31
  const SHELL_PROVIDER_ALLOWED_PREFIXES = [
26
32
  'npm test', 'npm run', 'node ', 'python ', 'pytest', 'cargo test',
27
- 'go test', 'make', 'jest', 'vitest', 'mocha', 'bash ', 'sh ',
33
+ 'go test', 'make', 'jest', 'vitest', 'mocha',
34
+ 'bash openclaw/', 'bash ~/openclaw/', 'bash ./bin/',
35
+ 'sh openclaw/', 'sh ~/openclaw/', 'sh ./bin/',
28
36
  'cat ', 'echo ', 'ls ', 'grep ', 'find ', 'git '
29
37
  ];
30
38
 
31
39
  function validateShellCommand(cmd) {
32
40
  const trimmed = (cmd || '').trim();
33
41
  if (!trimmed) return false;
42
+ if (SHELL_CHAIN_PATTERNS.test(trimmed)) return false;
43
+ if (DANGEROUS_NODE_FLAGS.test(trimmed)) return false;
44
+ if (DANGEROUS_FIND_FLAGS.test(trimmed)) return false;
34
45
  return SHELL_PROVIDER_ALLOWED_PREFIXES.some(p => trimmed.startsWith(p));
35
46
  }
36
47
 
@@ -166,8 +166,9 @@ class CollabStore {
166
166
  await this.kv.put(key, sc.encode(JSON.stringify(updated)), { previousSeq: entry.revision });
167
167
  return updated;
168
168
  } catch (err) {
169
- if (attempt === maxRetries - 1) throw err;
170
- // conflict retry
169
+ const isCasConflict = err.code === '10071' || (err.message && err.message.includes('wrong last sequence'));
170
+ if (!isCasConflict || attempt === maxRetries - 1) throw err;
171
+ // CAS conflict — retry
171
172
  }
172
173
  }
173
174
  }
@@ -442,6 +443,7 @@ class CollabStore {
442
443
  let nextTurn = null;
443
444
  await this._updateWithCAS(sessionId, (session) => {
444
445
  if (session.mode !== COLLAB_MODE.SEQUENTIAL) return null;
446
+ if (session.status !== 'active') return null;
445
447
 
446
448
  const currentIdx = session.turn_order.indexOf(session.current_turn);
447
449
  const nextIdx = currentIdx + 1;
@@ -550,6 +552,9 @@ class CollabStore {
550
552
  console.error(`[collab] storeArtifact FAILED for ${sessionId}/${key}: ${err.message}. Removing artifact and persisting without it.`);
551
553
  delete session.circling.artifacts[key];
552
554
  try {
555
+ // Recovery write without CAS — acceptable because we're removing the artifact
556
+ // that caused the failure. Worst case: another concurrent write overwrites this,
557
+ // but that write also wouldn't have the problematic artifact.
553
558
  await this.kv.put(sessionId, sc.encode(JSON.stringify(session)));
554
559
  } catch (_) { /* best effort */ }
555
560
  return null;
@@ -851,6 +856,7 @@ class CollabStore {
851
856
  */
852
857
  async markCompleted(sessionId, result) {
853
858
  return this._updateWithCAS(sessionId, (session) => {
859
+ if (['completed', 'aborted'].includes(session.status)) return null;
854
860
  session.status = COLLAB_STATUS.COMPLETED;
855
861
  session.completed_at = new Date().toISOString();
856
862
  session.result = {
@@ -22,6 +22,7 @@ const fs = require('fs');
22
22
  const path = require('path');
23
23
  const { execSync } = require('child_process');
24
24
  const { globMatch } = require('./rule-loader');
25
+ const { validateExecCommand } = require('./exec-safety');
25
26
 
26
27
  // ── Rule Loading ─────────────────────────────────────
27
28
 
@@ -241,6 +242,11 @@ function preCommitSecretScan(worktreePath) {
241
242
  function postCommitValidate(worktreePath, command) {
242
243
  if (!worktreePath || !command) return { passed: true, output: '' };
243
244
 
245
+ const validation = validateExecCommand(command);
246
+ if (!validation.allowed) {
247
+ return { passed: false, output: `Validation command blocked: ${validation.reason}` };
248
+ }
249
+
244
250
  try {
245
251
  const output = execSync(command, {
246
252
  cwd: worktreePath, timeout: 10000, encoding: 'utf-8', stdio: 'pipe',
package/lib/mesh-plans.js CHANGED
@@ -38,6 +38,13 @@ const SUBTASK_STATUS = {
38
38
 
39
39
  // ── Delegation Modes ───────────────────────────────
40
40
 
41
+ const PLAN_TRANSITIONS = {
42
+ approve: new Set(['review', 'draft']),
43
+ startExecuting: new Set(['approved']),
44
+ markCompleted: new Set(['executing']),
45
+ markAborted: new Set(['draft', 'review', 'approved', 'executing']),
46
+ };
47
+
41
48
  const DELEGATION_MODE = {
42
49
  SOLO_MESH: 'solo_mesh',
43
50
  COLLAB_MESH: 'collab_mesh',
@@ -127,6 +134,11 @@ function createPlan({
127
134
  // Compute wave assignments
128
135
  assignWaves(enriched);
129
136
 
137
+ // Mark cycle-blocked subtasks (wave === -1) so they don't prevent plan completion
138
+ for (const st of enriched) {
139
+ if (st.wave === -1 && st.status === 'pending') st.status = 'blocked';
140
+ }
141
+
130
142
  const totalBudget = enriched.reduce((sum, st) => sum + st.budget_minutes, 0);
131
143
  const maxWave = enriched.reduce((max, st) => Math.max(max, st.wave), 0);
132
144
 
@@ -375,8 +387,9 @@ class PlanStore {
375
387
  await this.kv.put(key, sc.encode(JSON.stringify(updated)), { previousSeq: entry.revision });
376
388
  return updated;
377
389
  } catch (err) {
378
- if (attempt === maxRetries - 1) throw err;
379
- // conflict retry
390
+ const isCasConflict = err.code === '10071' || (err.message && err.message.includes('wrong last sequence'));
391
+ if (!isCasConflict || attempt === maxRetries - 1) throw err;
392
+ // CAS conflict — retry
380
393
  }
381
394
  }
382
395
  }
@@ -420,6 +433,7 @@ class PlanStore {
420
433
 
421
434
  async submitForReview(planId) {
422
435
  return this._updateWithCAS(planId, (plan) => {
436
+ if (plan.status !== 'draft') return null;
423
437
  plan.status = PLAN_STATUS.REVIEW;
424
438
  return plan;
425
439
  });
@@ -427,6 +441,7 @@ class PlanStore {
427
441
 
428
442
  async approve(planId, approvedBy = 'gui') {
429
443
  return this._updateWithCAS(planId, (plan) => {
444
+ if (!PLAN_TRANSITIONS.approve.has(plan.status)) return null;
430
445
  plan.status = PLAN_STATUS.APPROVED;
431
446
  plan.approved_by = approvedBy;
432
447
  plan.approved_at = new Date().toISOString();
@@ -436,6 +451,7 @@ class PlanStore {
436
451
 
437
452
  async startExecuting(planId) {
438
453
  return this._updateWithCAS(planId, (plan) => {
454
+ if (!PLAN_TRANSITIONS.startExecuting.has(plan.status)) return null;
439
455
  plan.status = PLAN_STATUS.EXECUTING;
440
456
  plan.started_at = new Date().toISOString();
441
457
  return plan;
@@ -444,6 +460,7 @@ class PlanStore {
444
460
 
445
461
  async markCompleted(planId) {
446
462
  return this._updateWithCAS(planId, (plan) => {
463
+ if (!PLAN_TRANSITIONS.markCompleted.has(plan.status)) return null;
447
464
  plan.status = PLAN_STATUS.COMPLETED;
448
465
  plan.completed_at = new Date().toISOString();
449
466
  return plan;
@@ -452,6 +469,7 @@ class PlanStore {
452
469
 
453
470
  async markAborted(planId, reason) {
454
471
  return this._updateWithCAS(planId, (plan) => {
472
+ if (!PLAN_TRANSITIONS.markAborted.has(plan.status)) return null;
455
473
  plan.status = PLAN_STATUS.ABORTED;
456
474
  plan.completed_at = new Date().toISOString();
457
475
  for (const st of plan.subtasks) {
package/lib/mesh-tasks.js CHANGED
@@ -46,6 +46,14 @@ const TASK_STATUS = {
46
46
  REJECTED: 'rejected',
47
47
  };
48
48
 
49
+ const TERMINAL_STATES = new Set([
50
+ TASK_STATUS.COMPLETED,
51
+ TASK_STATUS.FAILED,
52
+ TASK_STATUS.RELEASED,
53
+ TASK_STATUS.CANCELLED,
54
+ TASK_STATUS.REJECTED,
55
+ ]);
56
+
49
57
  /**
50
58
  * Create a new task with the enriched schema.
51
59
  * Karpathy-inspired fields: budget_minutes, metric, on_fail, scope.
@@ -158,8 +166,9 @@ class TaskStore {
158
166
  await this.kv.put(key, sc.encode(JSON.stringify(updated)), { previousSeq: entry.revision });
159
167
  return updated;
160
168
  } catch (err) {
161
- if (attempt === maxRetries - 1) throw err;
162
- // conflict retry
169
+ const isCasConflict = err.code === '10071' || (err.message && err.message.includes('wrong last sequence'));
170
+ if (!isCasConflict || attempt === maxRetries - 1) throw err;
171
+ // CAS conflict — retry
163
172
  }
164
173
  }
165
174
  }
@@ -253,6 +262,7 @@ class TaskStore {
253
262
  */
254
263
  async markRunning(taskId) {
255
264
  return this._updateWithCAS(taskId, (task) => {
265
+ if (TERMINAL_STATES.has(task.status)) return null;
256
266
  task.status = TASK_STATUS.RUNNING;
257
267
  task.started_at = new Date().toISOString();
258
268
  return task;
@@ -264,6 +274,7 @@ class TaskStore {
264
274
  */
265
275
  async markCompleted(taskId, result) {
266
276
  return this._updateWithCAS(taskId, (task) => {
277
+ if (TERMINAL_STATES.has(task.status)) return null;
267
278
  task.status = TASK_STATUS.COMPLETED;
268
279
  task.completed_at = new Date().toISOString();
269
280
  task.result = result;
@@ -277,6 +288,7 @@ class TaskStore {
277
288
  */
278
289
  async markPendingReview(taskId, result) {
279
290
  return this._updateWithCAS(taskId, (task) => {
291
+ if (TERMINAL_STATES.has(task.status)) return null;
280
292
  task.status = TASK_STATUS.PENDING_REVIEW;
281
293
  task.result = result;
282
294
  task.review_requested_at = new Date().toISOString();
@@ -316,6 +328,7 @@ class TaskStore {
316
328
  */
317
329
  async markFailed(taskId, reason, attempts = []) {
318
330
  return this._updateWithCAS(taskId, (task) => {
331
+ if (TERMINAL_STATES.has(task.status)) return null;
319
332
  task.status = TASK_STATUS.FAILED;
320
333
  task.completed_at = new Date().toISOString();
321
334
  task.result = { success: false, summary: reason };
@@ -329,6 +342,7 @@ class TaskStore {
329
342
  */
330
343
  async logAttempt(taskId, attempt) {
331
344
  return this._updateWithCAS(taskId, (task) => {
345
+ if (TERMINAL_STATES.has(task.status)) return null;
332
346
  task.attempts.push({
333
347
  ...attempt,
334
348
  timestamp: new Date().toISOString(),
@@ -343,6 +357,7 @@ class TaskStore {
343
357
  */
344
358
  async markReleased(taskId, reason, attempts = []) {
345
359
  return this._updateWithCAS(taskId, (task) => {
360
+ if (TERMINAL_STATES.has(task.status)) return null;
346
361
  task.status = TASK_STATUS.RELEASED;
347
362
  task.completed_at = new Date().toISOString();
348
363
  task.result = { success: false, summary: reason, released: true };
@@ -356,6 +371,7 @@ class TaskStore {
356
371
  */
357
372
  async touchActivity(taskId) {
358
373
  return this._updateWithCAS(taskId, (task) => {
374
+ if (TERMINAL_STATES.has(task.status)) return null;
359
375
  task.last_activity = new Date().toISOString();
360
376
  return task;
361
377
  });
@@ -379,25 +395,25 @@ class TaskStore {
379
395
  }
380
396
 
381
397
  /**
382
- * Find running tasks with no activity for `stallMinutes`.
398
+ * Find running or claimed tasks with no activity for `stallMinutes`.
383
399
  * Stall detection is separate from budget — a task can be within budget
384
400
  * but the agent process may have died silently.
401
+ * Claimed tasks that never transition to running (agent crashed after claim)
402
+ * are also detected and released back to queued.
385
403
  */
386
404
  async findStalled(stallMinutes = 5) {
387
405
  const running = await this.list({ status: TASK_STATUS.RUNNING });
406
+ const claimed = await this.list({ status: TASK_STATUS.CLAIMED });
388
407
  const cutoff = Date.now() - stallMinutes * 60 * 1000;
389
- return running.filter(t => {
390
- const lastSignal = t.last_activity || t.started_at;
408
+ return [...running, ...claimed].filter(t => {
409
+ const lastSignal = t.last_activity || t.started_at || t.claimed_at;
391
410
  return lastSignal && new Date(lastSignal) < cutoff;
392
411
  });
393
412
  }
394
413
 
395
414
  async _checkDeps(depIds) {
396
- for (const depId of depIds) {
397
- const dep = await this.get(depId);
398
- if (!dep || dep.status !== TASK_STATUS.COMPLETED) return false;
399
- }
400
- return true;
415
+ const deps = await Promise.all(depIds.map(id => this.get(id)));
416
+ return deps.every(dep => dep && dep.status === TASK_STATUS.COMPLETED);
401
417
  }
402
418
  }
403
419