npm - openclaw-node-harness - Versions diffs - 2.0.2 → 2.0.4 - Mend

openclaw-node-harness 2.0.2 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/bin/fleet-deploy.js +1 -1
package/bin/mesh-agent.js +217 -84
package/bin/mesh-bridge.js +274 -10
package/bin/mesh-deploy-listener.js +120 -98
package/bin/mesh-deploy.js +11 -3
package/bin/mesh-health-publisher.js +1 -1
package/bin/mesh-task-daemon.js +190 -15
package/bin/mesh.js +170 -22
package/bin/openclaw-node-init.js +147 -3
package/install.sh +7 -0
package/lib/kanban-io.js +50 -10
package/lib/mesh-collab.js +53 -3
package/lib/mesh-registry.js +11 -2
package/lib/mesh-tasks.js +6 -7
package/package.json +1 -1

package/bin/openclaw-node-init.js CHANGED Viewed

@@ -460,14 +460,60 @@ function installLaunchdService(meshDir, nodeBin, nodeId, provider, natsUrl) {
     return;
   }
+  // Deploy listener plist
+  const deployPlistPath = path.join(plistDir, 'ai.openclaw.deploy-listener.plist');
+  const deployPlist = `<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+  <key>Label</key>
+  <string>ai.openclaw.deploy-listener</string>
+  <key>ProgramArguments</key>
+  <array>
+    <string>${nodeBin}</string>
+    <string>${meshDir}/bin/mesh-deploy-listener.js</string>
+  </array>
+  <key>KeepAlive</key>
+  <true/>
+  <key>RunAtLoad</key>
+  <true/>
+  <key>StandardOutPath</key>
+  <string>${os.homedir()}/.openclaw/workspace/.tmp/mesh-deploy-listener.log</string>
+  <key>StandardErrorPath</key>
+  <string>${os.homedir()}/.openclaw/workspace/.tmp/mesh-deploy-listener.err</string>
+  <key>EnvironmentVariables</key>
+  <dict>
+    <key>OPENCLAW_NATS</key>
+    <string>${natsUrl}</string>
+    <key>OPENCLAW_NODE_ID</key>
+    <string>${nodeId}</string>
+    <key>OPENCLAW_NODE_ROLE</key>
+    <string>worker</string>
+    <key>OPENCLAW_REPO_DIR</key>
+    <string>${meshDir}</string>
+    <key>PATH</key>
+    <string>/usr/local/bin:/usr/bin:/bin:/opt/homebrew/bin:${os.homedir()}/.npm-global/bin</string>
+    <key>NODE_PATH</key>
+    <string>${meshDir}/node_modules:${meshDir}/lib</string>
+  </dict>
+  <key>ThrottleInterval</key>
+  <integer>30</integer>
+</dict>
+</plist>`;
   fs.mkdirSync(plistDir, { recursive: true });
   fs.writeFileSync(plistPath, plist);
-  ok(`Launchd service written: ${plistPath}`);
+  ok(`Mesh agent service written: ${plistPath}`);
+  fs.writeFileSync(deployPlistPath, deployPlist);
+  ok(`Deploy listener service written: ${deployPlistPath}`);
   try {
     execSync(`launchctl unload "${plistPath}" 2>/dev/null || true`, { stdio: 'pipe' });
     execSync(`launchctl load "${plistPath}"`, { stdio: 'pipe' });
-    ok('Service loaded and started');
+    ok('Mesh agent loaded and started');
+    execSync(`launchctl unload "${deployPlistPath}" 2>/dev/null || true`, { stdio: 'pipe' });
+    execSync(`launchctl load "${deployPlistPath}"`, { stdio: 'pipe' });
+    ok('Deploy listener loaded and started');
   } catch (e) {
     warn(`Service load warning: ${e.message}`);
   }
@@ -508,11 +554,40 @@ WantedBy=default.target
   fs.writeFileSync(servicePath, service);
   ok(`Systemd service written: ${servicePath}`);
+  // Deploy listener service
+  const deployServicePath = path.join(serviceDir, 'openclaw-deploy-listener.service');
+  const deployService = `[Unit]
+Description=OpenClaw Deploy Listener
+After=network-online.target
+Wants=network-online.target
+[Service]
+Type=simple
+ExecStart=${nodeBin} ${meshDir}/bin/mesh-deploy-listener.js
+Restart=always
+RestartSec=30
+Environment=OPENCLAW_NATS=${natsUrl}
+Environment=OPENCLAW_NODE_ID=${nodeId}
+Environment=OPENCLAW_NODE_ROLE=worker
+Environment=OPENCLAW_REPO_DIR=${meshDir}
+Environment=NODE_PATH=${meshDir}/node_modules:${meshDir}/lib
+Environment=PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:${os.homedir()}/.local/bin:${os.homedir()}/.npm-global/bin
+WorkingDirectory=${meshDir}
+[Install]
+WantedBy=default.target
+`;
+  fs.writeFileSync(deployServicePath, deployService);
+  ok(`Deploy listener service written: ${deployServicePath}`);
   try {
     execSync('systemctl --user daemon-reload', { stdio: 'pipe' });
     execSync('systemctl --user enable openclaw-mesh-agent', { stdio: 'pipe' });
     execSync('systemctl --user start openclaw-mesh-agent', { stdio: 'pipe' });
-    ok('Service enabled and started');
+    ok('Mesh agent enabled and started');
+    execSync('systemctl --user enable openclaw-deploy-listener', { stdio: 'pipe' });
+    execSync('systemctl --user start openclaw-deploy-listener', { stdio: 'pipe' });
+    ok('Deploy listener enabled and started');
   } catch (e) {
     warn(`Service start warning: ${e.message}`);
     warn('Try manually: systemctl --user start openclaw-mesh-agent');
@@ -630,6 +705,71 @@ async function verifyNatsHealth(natsUrl, nodeId) {
   }
 }
+// ── Mesh Topology Discovery ──────────────────────────
+async function discoverTopology(natsUrl, localNodeId) {
+  log('Discovering mesh topology...');
+  if (DRY_RUN) {
+    warn('[DRY RUN] Would query MESH_NODE_HEALTH and write mesh-aliases.json');
+    return;
+  }
+  try {
+    const nats = require('nats');
+    const nc = await nats.connect({ servers: natsUrl, timeout: 10000 });
+    const sc = nats.StringCodec();
+    const js = nc.jetstream();
+    const aliases = {};
+    // Query MESH_NODE_HEALTH for all known nodes
+    try {
+      const kv = await js.views.kv('MESH_NODE_HEALTH');
+      const keys = await kv.keys();
+      for await (const key of keys) {
+        const entry = await kv.get(key);
+        if (entry && entry.value) {
+          const health = JSON.parse(sc.decode(entry.value));
+          const nodeId = health.nodeId || key;
+          // Create short alias from node ID (strip common suffixes)
+          const short = nodeId
+            .replace(/-virtual-machine.*$/i, '')
+            .replace(/-vmware.*$/i, '')
+            .replace(/-local$/, '');
+          aliases[short] = nodeId;
+          if (health.role === 'lead') aliases['lead'] = nodeId;
+          ok(`Peer: ${nodeId} (${health.role || 'worker'}, ${health.tailscaleIp || 'unknown'})`);
+        }
+      }
+    } catch {
+      warn('MESH_NODE_HEALTH bucket not available — skipping topology');
+    }
+    // Also add self
+    const selfShort = localNodeId
+      .replace(/-virtual-machine.*$/i, '')
+      .replace(/-vmware.*$/i, '')
+      .replace(/-local$/, '');
+    aliases[selfShort] = localNodeId;
+    aliases['self'] = localNodeId;
+    await nc.drain();
+    if (Object.keys(aliases).length > 1) {
+      const aliasPath = path.join(os.homedir(), '.openclaw', 'mesh-aliases.json');
+      fs.writeFileSync(aliasPath, JSON.stringify(aliases, null, 2) + '\n', { mode: 0o644 });
+      ok(`Mesh aliases written: ${aliasPath} (${Object.keys(aliases).length} entries)`);
+    } else {
+      warn('No peers found in MESH_NODE_HEALTH — mesh-aliases.json will only have self');
+      const aliasPath = path.join(os.homedir(), '.openclaw', 'mesh-aliases.json');
+      fs.writeFileSync(aliasPath, JSON.stringify(aliases, null, 2) + '\n', { mode: 0o644 });
+    }
+  } catch (e) {
+    warn(`Topology discovery failed: ${e.message} (non-fatal)`);
+  }
+}
 // ── Main ──────────────────────────────────────────────
 async function main() {
@@ -699,6 +839,10 @@ async function main() {
   const natsHealthy = await verifyNatsHealth(config.nats, nodeId);
   const healthy = serviceAlive && natsHealthy;
+  // ── Step 9: Discover mesh topology ──
+  step(9, 'Discovering mesh topology...');
+  await discoverTopology(config.nats, nodeId);
   // ── Done ──
   console.log(`\n${BOLD}${GREEN}═══════════════════════════════════════${RESET}`);
   if (healthy) {

package/install.sh CHANGED Viewed

@@ -230,6 +230,10 @@ if [ -z "$NODE_ROLE" ]; then
     NODE_ROLE="worker"
   fi
 fi
+if [ "$NODE_ROLE" != "lead" ] && [ "$NODE_ROLE" != "worker" ]; then
+  error "Invalid role: $NODE_ROLE (must be 'lead' or 'worker')"
+  exit 1
+fi
 export OPENCLAW_NODE_ROLE="$NODE_ROLE"
 info "Node role: $NODE_ROLE"
@@ -692,6 +696,9 @@ else
       if command -v envsubst >/dev/null 2>&1; then
         envsubst < "$TEMPLATE" > "$DEST"
       else
+        # NOTE: sed delimiter is |. If OPENCLAW_NATS_TOKEN ever contains |
+        # (unlikely — tokens are hex/base64), this substitution will break.
+        # Prefer envsubst (above) when available; it has no delimiter issue.
         sed \
           -e "s|\${HOME}|$HOME|g" \
           -e "s|\${NODE_BIN}|$NODE_BIN|g" \

package/lib/kanban-io.js CHANGED Viewed

@@ -53,8 +53,14 @@ function withMkdirLock(filePath, fn) {
       if (Date.now() - start > maxWait) {
         throw new Error(`kanban-io: lock timeout after ${maxWait}ms on ${filePath}`);
       }
-      // Non-blocking 10ms pause (busy-wait would peg CPU in Node's single thread)
-      require('child_process').spawnSync('sleep', ['0.01']);
+      // Sleep ~10ms — Atomics.wait is precise but throws on main thread
+      // in some Node.js builds; fall back to busy-spin (rare contention path)
+      try {
+        Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 10);
+      } catch {
+        const end = Date.now() + 10;
+        while (Date.now() < end) { /* busy-wait fallback */ }
+      }
     }
   }
@@ -81,9 +87,7 @@ function parseTasks(content) {
   const liveIdx = content.indexOf('## Live Tasks');
   if (liveIdx === -1) return tasks;
-  const afterLive = content.slice(liveIdx);
-  const nextSectionIdx = afterLive.indexOf('\n## ', 1); // skip the current ## Live Tasks
-  const liveSection = nextSectionIdx >= 0 ? afterLive.slice(0, nextSectionIdx) : afterLive;
+  const liveSection = content.slice(liveIdx);
   const lines = liveSection.split('\n');
   let current = null;
@@ -110,6 +114,13 @@ function parseTasks(content) {
         budget_minutes: current.budget_minutes || 30,
         scope: current.scope || [],
         updated_at: current.updated_at || '',
+        // Mesh routing
+        llm_provider: current.llm_provider || null,
+        llm_model: current.llm_model || null,
+        preferred_nodes: current.preferred_nodes || [],
+        exclude_nodes: current.exclude_nodes || [],
+        collaboration: current.collaboration || null,
+        collab_result: current.collab_result || null,
       });
     }
   }
@@ -119,7 +130,7 @@ function parseTasks(content) {
     const taskIdMatch = line.match(/^- task_id:\s*(.+)$/);
     if (taskIdMatch) {
       flush();
-      current = { task_id: taskIdMatch[1].trim(), success_criteria: [], artifacts: [], scope: [] };
+      current = { task_id: taskIdMatch[1].trim(), success_criteria: [], artifacts: [], scope: [], preferred_nodes: [], exclude_nodes: [] };
       currentArrayKey = null;
       continue;
     }
@@ -176,6 +187,31 @@ function parseTasks(content) {
           current.scope = [];
           currentArrayKey = 'scope';
           break;
+        // Mesh routing fields
+        case 'llm_provider':
+        case 'provider':
+          current.llm_provider = value || null; currentArrayKey = null; break;
+        case 'llm_model':
+        case 'model':
+          current.llm_model = value || null; currentArrayKey = null; break;
+        case 'preferred_nodes':
+          current.preferred_nodes = [];
+          currentArrayKey = 'preferred_nodes';
+          break;
+        case 'exclude_nodes':
+          current.exclude_nodes = [];
+          currentArrayKey = 'exclude_nodes';
+          break;
+        case 'collaboration':
+          try { current.collaboration = value ? JSON.parse(value) : null; }
+          catch { current.collaboration = null; }
+          currentArrayKey = null;
+          break;
+        case 'collab_result':
+          try { current.collab_result = value ? JSON.parse(value) : null; }
+          catch { current.collab_result = null; }
+          currentArrayKey = null;
+          break;
         default:
           currentArrayKey = null;
           break;
@@ -236,7 +272,11 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
   const blockLines = lines.slice(blockStart, blockEnd);
   // Update scalar fields
-  for (const [key, value] of Object.entries(fieldUpdates)) {
+  for (const [key, rawValue] of Object.entries(fieldUpdates)) {
+    // Serialize objects/arrays as JSON so the parser can read them back
+    const value = (rawValue !== null && typeof rawValue === 'object')
+      ? JSON.stringify(rawValue)
+      : rawValue;
     const fieldRegex = new RegExp(`^  ${key}:\\s*.*$`);
     let found = false;
     for (let i = 1; i < blockLines.length; i++) {
@@ -249,7 +289,7 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
     if (!found) {
       // Insert before updated_at if it exists, otherwise at end of block
       const updatedAtIdx = blockLines.findIndex(l => l.match(/^  updated_at:/));
-      const insertIdx = updatedAtIdx >= 0 ? updatedAtIdx : blockLines.length;
+      const insertIdx = updatedAtIdx > 0 ? updatedAtIdx : blockLines.length;
       blockLines.splice(insertIdx, 0, `  ${key}: ${value}`);
     }
   }
@@ -262,7 +302,7 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
     if (headerIdx === -1) {
       // Insert the array before updated_at
       const updatedAtIdx = blockLines.findIndex(l => l.match(/^  updated_at:/));
-      const insertIdx = updatedAtIdx >= 0 ? updatedAtIdx : blockLines.length;
+      const insertIdx = updatedAtIdx > 0 ? updatedAtIdx : blockLines.length;
       const newLines = [`  ${key}:`];
       for (const item of items) {
         newLines.push(`    - ${item}`);
@@ -290,7 +330,7 @@ function _updateTaskInPlaceUnsafe(filePath, taskId, fieldUpdates = {}, arrayAppe
     if (headerIdx === -1) {
       // Insert the array before updated_at
       const updatedAtIdx = blockLines.findIndex(l => l.match(/^  updated_at:/));
-      const insertIdx = updatedAtIdx >= 0 ? updatedAtIdx : blockLines.length;
+      const insertIdx = updatedAtIdx > 0 ? updatedAtIdx : blockLines.length;
       const newLines = [`  ${key}:`];
       for (const item of items) {
         newLines.push(`    - ${item}`);

package/lib/mesh-collab.js CHANGED Viewed

@@ -105,6 +105,10 @@ function createSession(taskId, collabSpec) {
 // ── CollabStore (KV-backed) ─────────────────────────
+// Rate-limit audit error logs: max 3 per session, then go silent
+const _auditErrorCounts = new Map();
+const AUDIT_ERROR_LOG_LIMIT = 3;
 class CollabStore {
   constructor(kv) {
     this.kv = kv;
@@ -139,7 +143,14 @@ class CollabStore {
         ...detail,
       });
       await this.put(session);
-    } catch { /* best-effort — never block on audit */ }
+    } catch (err) {
+      // Best-effort — never block on audit, but log first N failures per session
+      const count = (_auditErrorCounts.get(sessionId) || 0) + 1;
+      _auditErrorCounts.set(sessionId, count);
+      if (count <= AUDIT_ERROR_LOG_LIMIT) {
+        console.error(`[collab] audit append failed for ${sessionId}/${event}: ${err.message}${count === AUDIT_ERROR_LOG_LIMIT ? ' (suppressing further audit errors for this session)' : ''}`);
+      }
+    }
   }
   /**
@@ -176,6 +187,30 @@ class CollabStore {
     return sessions[0] || null;
   }
+  /**
+   * Find active sessions that contain a given node.
+   * O(sessions) single pass — avoids the O(sessions × nodes) scan
+   * that detectStalls() previously used with list() + inner find().
+   */
+  async findActiveSessionsByNode(nodeId) {
+    const results = [];
+    const allKeys = [];
+    const keys = await this.kv.keys();
+    for await (const key of keys) {
+      allKeys.push(key);
+    }
+    for (const key of allKeys) {
+      const entry = await this.kv.get(key);
+      if (!entry || !entry.value) continue;
+      const session = JSON.parse(sc.decode(entry.value));
+      if (session.status !== COLLAB_STATUS.ACTIVE) continue;
+      if (session.nodes.some(n => n.node_id === nodeId)) {
+        results.push(session);
+      }
+    }
+    return results;
+  }
   // ── Node Management ────────────────────────────────
   /**
@@ -190,7 +225,8 @@ class CollabStore {
     // Check max_nodes
     if (session.max_nodes && session.nodes.length >= session.max_nodes) return null;
-    // Check duplicate
+    // Check duplicate — single-threaded event loop prevents concurrent joins
+    // from interleaving between find() and push(). No mutex needed.
     if (session.nodes.find(n => n.node_id === nodeId)) return null;
     session.nodes.push({
@@ -320,6 +356,9 @@ class CollabStore {
     const session = await this.get(sessionId);
     if (!session) return null;
+    // Only accept reflections on active sessions
+    if (session.status !== COLLAB_STATUS.ACTIVE) return null;
     const currentRound = session.rounds[session.rounds.length - 1];
     if (!currentRound) return null;
@@ -506,11 +545,14 @@ class CollabStore {
   }
   /**
-   * Mark session as aborted.
+   * Mark session as aborted. Returns null (no-op) if already completed or aborted.
+   * Callers can use truthiness to detect whether the abort actually happened.
    */
   async markAborted(sessionId, reason) {
     const session = await this.get(sessionId);
     if (!session) return null;
+    // Guard: don't corrupt completed/aborted sessions
+    if (['completed', 'aborted'].includes(session.status)) return null;
     session.status = COLLAB_STATUS.ABORTED;
     session.completed_at = new Date().toISOString();
     session.result = { success: false, summary: reason, aborted: true };
@@ -518,6 +560,14 @@ class CollabStore {
     return session;
   }
+  /**
+   * Clear the audit error rate-limit counter for a session.
+   * Call when a session is finalized (completed/aborted) to prevent Map leak.
+   */
+  clearAuditErrorCount(sessionId) {
+    _auditErrorCounts.delete(sessionId);
+  }
   /**
    * Get a summary of the session for reporting.
    */

package/lib/mesh-registry.js CHANGED Viewed

@@ -1,6 +1,10 @@
 /**
  * mesh-registry.js — NATS KV tool registry for OpenClaw mesh.
  *
+ * STATUS: UNUSED — fully implemented but no callers exist yet. Kept for
+ * future tool-mesh integration. Review before adopting; remove if still
+ * uncalled by next major release.
+ *
  * Shared library for:
  *   - Registering tools in MESH_TOOLS KV bucket
  *   - Heartbeat refresh (keeps tools alive via TTL)
@@ -36,7 +40,9 @@ class MeshRegistry {
   async init() {
     const js = this.nc.jetstream();
-    this.kv = await js.views.kv(KV_BUCKET);
+    // TTL: entries auto-expire after 120s if not refreshed by heartbeat (60s interval).
+    // Prevents stale entries from crashed services that never called shutdown().
+    this.kv = await js.views.kv(KV_BUCKET, { ttl: 120_000 });
     return this;
   }
@@ -111,7 +117,10 @@ class MeshRegistry {
       for (const [toolName, manifest] of this.manifests) {
         const kvKey = `${this.nodeId}.${toolName}`;
         try {
-          await this.kv.put(kvKey, sc.encode(JSON.stringify(manifest)));
+          await this.kv.put(kvKey, sc.encode(JSON.stringify({
+            ...manifest,
+            last_heartbeat: new Date().toISOString(),
+          })));
         } catch (err) {
           console.error(`[mesh-registry] heartbeat failed for ${kvKey}: ${err.message}`);
         }

package/lib/mesh-tasks.js CHANGED Viewed

@@ -140,15 +140,15 @@ class TaskStore {
       // Apply filters
       if (filter.status && task.status !== filter.status) continue;
       if (filter.owner && task.owner !== filter.owner) continue;
-      if (filter.tag && !task.tags.includes(filter.tag)) continue;
+      if (filter.tag && (!task.tags || !task.tags.includes(filter.tag))) continue;
       tasks.push(task);
     }
     // Sort by priority (higher first), then created_at (older first)
     tasks.sort((a, b) => {
-      if (b.priority !== a.priority) return b.priority - a.priority;
-      return new Date(a.created_at) - new Date(b.created_at);
+      if ((b.priority || 0) !== (a.priority || 0)) return (b.priority || 0) - (a.priority || 0);
+      return (new Date(a.created_at || 0)) - (new Date(b.created_at || 0));
     });
     return tasks;
@@ -169,7 +169,7 @@ class TaskStore {
       if (task.exclude_nodes && task.exclude_nodes.includes(nodeId)) continue;
       // Respect dependencies
-      if (task.depends_on.length > 0) {
+      if (task.depends_on && task.depends_on.length > 0) {
         const depsReady = await this._checkDeps(task.depends_on);
         if (!depsReady) continue;
       }
@@ -192,9 +192,8 @@ class TaskStore {
     task.status = TASK_STATUS.CLAIMED;
     task.owner = nodeId;
     task.claimed_at = new Date().toISOString();
-    task.budget_deadline = new Date(
-      Date.now() + task.budget_minutes * 60 * 1000
-    ).toISOString();
+    const budgetMs = (task.budget_minutes || 30) * 60 * 1000;
+    task.budget_deadline = new Date(Date.now() + budgetMs).toISOString();
     await this.put(task);
     return task;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "openclaw-node-harness",
-  "version": "2.0.2",
+  "version": "2.0.4",
   "description": "One-command installer for the OpenClaw node layer — identity, skills, souls, daemon, and Mission Control.",
   "bin": {
     "openclaw-node": "./cli.js"