npm - @yemi33/minions - Versions diffs - 0.1.1930 → 0.1.1932 - Mend

@yemi33/minions 0.1.1930 → 0.1.1932

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dashboard.js +150 -70
package/engine/cc-worker-pool.js +56 -4
package/engine/copilot-models.json +1 -1
package/engine/kb-sweep-runner.js +81 -0
package/engine/kb-sweep.js +78 -15
package/package.json +1 -1

package/dashboard.js CHANGED Viewed

@@ -2480,8 +2480,10 @@ async function _preflightModelCheck({ runtime: cliOverride, model: modelOverride
  *     contract SSE consumers depend on).
  *   - `usage` is `{}` because ACP `session/update` notifications don't
  *     surface token counts; trackEngineUsage is a no-op on `{}`.
- *   - Tool calls are not surfaced (sub-task B/C don't plumb `tool_call`
- *     notifications into a callback). Matches CC's pool trade-off.
+ *   - Tool calls are surfaced via the optional `onToolUse(name, input)`
+ *     callback (ACP `tool_call` notification, mapped to Claude-style
+ *     {name, input}). `tool_call_update` (results) is ignored to avoid
+ *     double chips.
  *   - Honors `timeoutMs`. On timeout: cancels the prompt, closes the tab
  *     (so the next call rebuilds against a clean process), resolves with
  *     `{ code: 1, stderr: 'doc-chat-pool: timeout after Xms' }`. The
@@ -2496,7 +2498,7 @@ async function _preflightModelCheck({ runtime: cliOverride, model: modelOverride
  * document body. Always re-sending extraContext is correctness-safe; the
  * pool's warm-process saving is preserved regardless.
  */
-function _invokeDocChatViaPool({ prompt, model, effort, engineConfig, systemPrompt, sessionKey, freshSession, timeoutMs, onChunk }) {
+function _invokeDocChatViaPool({ prompt, model, effort, engineConfig, systemPrompt, sessionKey, freshSession, timeoutMs, onChunk, onToolUse }) {
   const oneShot = !!freshSession;
   const tabKey = oneShot
     ? 'doc-chat:fresh:' + shared.uid()
@@ -2574,6 +2576,11 @@ function _invokeDocChatViaPool({ prompt, model, effort, engineConfig, systemProm
           try { onChunk(accumulated); } catch { /* swallow */ }
         }
       },
+      onToolUse: (name, input) => {
+        if (onToolUse) {
+          try { onToolUse(name, input || {}); } catch { /* swallow */ }
+        }
+      },
       onDone: () => {
         finalize({ text: accumulated, sessionId: sessionHandle.sessionId, code: 0, usage: {}, raw: accumulated, stderr: '' });
       },
@@ -2786,7 +2793,7 @@ async function ccCallStreaming(message, { store = 'cc', sessionKey, extraContext
     const p = _invokeDocChatViaPool({
       prompt: poolPrompt, sessionKey, model, effort: ccEffort,
       engineConfig: CONFIG.engine, systemPrompt,
-      onChunk,
+      onChunk, onToolUse,
       freshSession, timeoutMs: timeout,
     });
     if (onAbortReady) onAbortReady(p.abort);
@@ -4631,14 +4638,17 @@ const server = http.createServer(async (req, res) => {
     if (swept) result.lastSwept = swept.timestamp;
     // Surface in-flight sweep state so the UI can render a 'now sweeping (Xm)'
     // badge alongside the previous-completion 'swept N days ago' indicator.
-    // Memory wins when present, disk fallback survives dashboard restarts.
-    const sweepState = safeJson(path.join(ENGINE_DIR, 'kb-sweep-state.json'));
-    const memInFlight = !!global._kbSweepInFlight;
-    const diskInFlight = !!(sweepState && sweepState.status === 'in-flight');
-    if (memInFlight || diskInFlight) {
-      result.sweepInFlight = true;
-      result.sweepStartedAt = global._kbSweepStartedAt || (sweepState && sweepState.startedAt) || null;
-    }
+    // Source of truth: kb-sweep-state.json + PID liveness — the in-process
+    // sweep moved to a detached runner so in-memory globals are no longer
+    // authoritative (they die with the dashboard).
+    try {
+      const { readSweepLiveness } = require('./engine/kb-sweep');
+      const liveness = readSweepLiveness({ entryCount: entries.length });
+      if (liveness.inFlight && liveness.alive) {
+        result.sweepInFlight = true;
+        result.sweepStartedAt = liveness.startedAt || null;
+      }
+    } catch { /* best-effort UI indicator */ }
     return jsonReply(res, 200, result);
   }
@@ -4657,73 +4667,135 @@ const server = http.createServer(async (req, res) => {
   }
   async function handleKnowledgeSweep(req, res) {
-    // Auto-release stale guard — dynamic floor based on KB size (30 min min, +1s per entry)
-    const { staleGuardMs } = require('./engine/kb-sweep');
+    // Source of truth = kb-sweep-state.json + PID liveness. The sweep now runs
+    // as a detached child (engine/kb-sweep-runner.js) so it survives
+    // `minions restart`; the in-memory `global._kbSweep*` flags from the old
+    // in-process implementation are gone.
+    const {
+      readSweepLiveness, staleGuardMs, KB_SWEEP_STATE_PATH, KB_SWEEP_LOG_PATH, KB_SWEEP_RUNNER_PATH,
+    } = require('./engine/kb-sweep');
     const entryCount = (queries.getKnowledgeBaseEntries() || []).length;
     const guardMs = staleGuardMs(entryCount);
-    if (global._kbSweepInFlight && global._kbSweepStartedAt && Date.now() - global._kbSweepStartedAt > guardMs) {
-      console.log(`[kb-sweep] Auto-releasing stale guard (>${Math.round(guardMs / 60000)}min for ${entryCount} entries)`);
-      global._kbSweepInFlight = false;
-    }
-    // Disk-state fallback: if a previous dashboard process died mid-sweep, the
-    // state file says 'in-flight' forever. Treat it as stale past the guard so
-    // a new sweep can start.
-    const sweepStateFile = path.join(ENGINE_DIR, 'kb-sweep-state.json');
-    const diskState = safeJson(sweepStateFile);
-    const diskInFlight = !!(diskState && diskState.status === 'in-flight');
-    const diskStartedAt = diskState && diskState.startedAt ? Number(diskState.startedAt) : 0;
-    const diskStale = diskInFlight && diskStartedAt && Date.now() - diskStartedAt > guardMs;
-    if (diskStale) {
-      console.log(`[kb-sweep] Auto-releasing stale disk-state guard (>${Math.round(guardMs / 60000)}min)`);
-      try { shared.safeUnlink(sweepStateFile); } catch { /* ignore */ }
-    }
-    if (global._kbSweepInFlight || (diskInFlight && !diskStale)) {
+    // Synchronous pre-claim BEFORE awaiting the body so a concurrent POST
+    // arriving in the same tick sees in-flight state and can't double-spawn.
+    const sweepToken = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+    const liveness = readSweepLiveness({ entryCount });
+    if (liveness.inFlight && liveness.stale) {
+      const reason = !liveness.alive
+        ? `runner pid=${liveness.pid} is no longer alive`
+        : `>${Math.round(guardMs / 60000)}min for ${entryCount} entries`;
+      console.log(`[kb-sweep] Auto-releasing stale guard (${reason})`);
+      try { shared.safeUnlink(KB_SWEEP_STATE_PATH); } catch { /* ignore */ }
+    } else if (liveness.inFlight) {
       return jsonReply(res, 200, {
         ok: true, alreadyRunning: true,
-        startedAt: global._kbSweepStartedAt || diskStartedAt || null,
+        startedAt: liveness.startedAt || null,
       });
     }
-    const sweepToken = Date.now() + Math.random();
-    global._kbSweepToken = sweepToken;
-    global._kbSweepInFlight = true;
-    global._kbSweepStartedAt = Date.now();
+    // Claim the slot synchronously by writing a "starting" state. The runner
+    // will overwrite this with status:'in-flight' + its real pid once it boots.
+    // readSweepLiveness grants a 15s boot-grace to "starting" records with no pid.
+    const startedAt = Date.now();
+    try {
+      safeWrite(KB_SWEEP_STATE_PATH, JSON.stringify({
+        status: 'starting', startedAt, startedAtIso: new Date().toISOString(),
+        sweepToken, pid: null,
+      }));
+    } catch (e) {
+      console.error(`[kb-sweep] failed to write starting state: ${e.message}`);
+    }
     const body = await readBody(req).catch(() => ({}));
-    _runKbSweepBackground(body, sweepToken);
-    return jsonReply(res, 202, { ok: true, started: true });
-  }
-  async function _runKbSweepBackground(body, sweepToken) {
+    // Persist body to a temp file so spawn doesn't have to serialize large
+    // pinnedKeys arrays via argv. Skip when body is empty.
+    let bodyFile = null;
+    if (body && (Array.isArray(body.pinnedKeys) || body.dryRun != null)) {
+      bodyFile = path.join(ENGINE_DIR, `tmp-kb-sweep-body-${sweepToken}.json`);
+      try { safeWrite(bodyFile, JSON.stringify(body)); }
+      catch (e) {
+        console.error(`[kb-sweep] failed to write body-file ${bodyFile}: ${e.message}`);
+        bodyFile = null;
+      }
+    }
+    const { spawn: cpSpawn } = require('child_process');
+    // Open log fd in append mode so spawn can pipe stdio there. Child inherits
+    // the fd; parent closes its copy after spawn returns successfully.
+    let logFdNum = null;
+    let stdio = ['ignore', 'ignore', 'ignore'];
     try {
-      const { runKbSweep } = require('./engine/kb-sweep');
-      const result = await runKbSweep({ pinnedKeys: body.pinnedKeys, engineConfig: CONFIG.engine });
-      global._kbSweepLastResult = result;
-      global._kbSweepLastCompletedAt = Date.now();
+      logFdNum = fs.openSync(KB_SWEEP_LOG_PATH, 'a');
+      stdio = ['ignore', logFdNum, logFdNum];
     } catch (e) {
-      console.error('[kb-sweep] background error:', e.message);
-      global._kbSweepLastResult = { ok: false, error: e.message };
-      global._kbSweepLastCompletedAt = Date.now();
-    } finally { if (global._kbSweepToken === sweepToken) global._kbSweepInFlight = false; }
+      console.error(`[kb-sweep] failed to open log ${KB_SWEEP_LOG_PATH}: ${e.message}`);
+    }
+    const spawnArgs = ['--sweep-token', sweepToken];
+    if (bodyFile) spawnArgs.push('--body-file', bodyFile);
+    let proc;
+    try {
+      proc = cpSpawn(process.execPath, [KB_SWEEP_RUNNER_PATH, ...spawnArgs], {
+        cwd: MINIONS_DIR, stdio, detached: true, windowsHide: true,
+        env: { ...process.env },
+      });
+    } catch (e) {
+      if (logFdNum != null) try { fs.closeSync(logFdNum); } catch { /* ignore */ }
+      if (bodyFile) try { fs.unlinkSync(bodyFile); } catch { /* ignore */ }
+      // Release the "starting" claim on synchronous spawn failure so the user
+      // can retry immediately.
+      try { shared.safeUnlink(KB_SWEEP_STATE_PATH); } catch { /* ignore */ }
+      return jsonReply(res, 500, { error: `spawn failed: ${e.message}` });
+    }
+    if (logFdNum != null) try { fs.closeSync(logFdNum); } catch { /* ignore */ }
+    // Conditional CAS: only update the state file from "starting" → "in-flight"
+    // if our sweepToken still owns it. If the (fast) runner already wrote
+    // "completed"/"failed" or its own "in-flight", leave that newer state alone.
+    try {
+      const current = safeJson(KB_SWEEP_STATE_PATH);
+      if (current && current.status === 'starting' && current.sweepToken === sweepToken) {
+        safeWrite(KB_SWEEP_STATE_PATH, JSON.stringify({
+          status: 'in-flight', startedAt, startedAtIso: new Date().toISOString(),
+          sweepToken, pid: proc.pid,
+        }));
+      }
+    } catch { /* best-effort */ }
+    proc.unref();
+    return jsonReply(res, 202, { ok: true, started: true, sweepToken });
   }
   function handleKnowledgeSweepStatus(req, res) {
-    // Disk-state fallback: when the dashboard restarts mid-sweep the in-memory
-    // globals get reset, but engine/kb-sweep-state.json survives. Memory still
-    // wins when present (faster, no disk read on every poll).
+    // Source of truth = kb-sweep-state.json + PID liveness. Globals are gone —
+    // the runner is detached, so its lifecycle is independent of this process.
+    const { readSweepLiveness } = require('./engine/kb-sweep');
+    const entries = queries.getKnowledgeBaseEntries() || [];
+    const liveness = readSweepLiveness({ entryCount: entries.length });
     const diskState = safeJson(path.join(ENGINE_DIR, 'kb-sweep-state.json'));
-    const memInFlight = !!global._kbSweepInFlight;
-    const diskInFlight = !!(diskState && diskState.status === 'in-flight');
-    const inFlight = memInFlight || diskInFlight;
-    const startedAt = global._kbSweepStartedAt || (diskState && diskState.startedAt) || null;
-    let lastResult = global._kbSweepLastResult || null;
-    let lastCompletedAt = global._kbSweepLastCompletedAt || null;
-    if (!lastResult && diskState && (diskState.status === 'completed' || diskState.status === 'failed')) {
-      if (diskState.status === 'failed') {
-        lastResult = { ok: false, error: diskState.error || 'sweep failed' };
-      } else {
-        lastResult = diskState.lastResult || { ok: true, summary: diskState.summary };
-      }
-      if (!lastCompletedAt && diskState.completedAt) lastCompletedAt = diskState.completedAt;
+    let inFlight = false;
+    let startedAt = null;
+    let lastResult = null;
+    let lastCompletedAt = null;
+    if (liveness.inFlight && liveness.alive) {
+      inFlight = true;
+      startedAt = liveness.startedAt || null;
+    } else if (liveness.inFlight && !liveness.alive) {
+      // Runner crashed pre-completion (or "starting" claim expired without a
+      // runner ever booting). Surface a synthetic error so the UI doesn't
+      // silently lose the previous attempt.
+      lastResult = { ok: false, error: 'sweep process exited before reporting completion' };
+      lastCompletedAt = liveness.startedAt || null;
+    } else if (diskState && diskState.status === 'completed') {
+      lastResult = diskState.lastResult || { ok: true, summary: diskState.summary };
+      lastCompletedAt = diskState.completedAt || null;
+    } else if (diskState && diskState.status === 'failed') {
+      lastResult = { ok: false, error: diskState.error || 'sweep failed' };
+      lastCompletedAt = diskState.completedAt || null;
     }
     return jsonReply(res, 200, { inFlight, startedAt, lastResult, lastCompletedAt });
   }
@@ -6246,7 +6318,7 @@ What would you like to discuss or change? When you're happy, say "approve" and I
    */
   function _invokeCcStream({ prompt, sessionId, liveState, toolUses, model, effort, maxTurns, engineConfig, systemPrompt = CC_STATIC_SYSTEM_PROMPT, tabId }) {
     if (shared.resolveCcUseWorkerPool(engineConfig)) {
-      return _invokeCcStreamViaPool({ prompt, liveState, model, effort, engineConfig, systemPrompt, tabId });
+      return _invokeCcStreamViaPool({ prompt, liveState, toolUses, model, effort, engineConfig, systemPrompt, tabId });
     }
     const { callLLMStreaming } = require('./engine/llm');
     return callLLMStreaming(prompt, systemPrompt, {
@@ -6280,16 +6352,17 @@ What would you like to discuss or change? When you're happy, say "approve" and I
    *     callLLMStreaming's contract is "full accumulated text"; we accumulate
    *     here so `liveState.text` and downstream chunk events keep the same
    *     semantics consumers already depend on.
-   *   - Tool calls are not surfaced in sub-task B (the pool ignores
-   *     `tool_call` notifications). `toolUses` stays empty on this path; if
-   *     sub-task C/D adds tool_call surfacing in the pool we'll plumb a
-   *     callback here too.
+   *   - Tool calls are surfaced via the pool's `onToolUse` callback (ACP
+   *     `tool_call` notification, mapped to Claude-style {name, input} so the
+   *     dashboard's existing formatToolSummary chips render unchanged).
+   *     `tool_call_update` events (status: completed) are intentionally
+   *     ignored — surfacing results too would double the chip count.
    *   - `usage` is reported as an empty object — ACP doesn't expose token
    *     counts in the in-flight session/update notifications, and the pool's
    *     long-lived process makes per-turn usage attribution non-trivial.
    *     trackEngineUsage is a no-op on `{}`.
    */
-  function _invokeCcStreamViaPool({ prompt, liveState, model, effort, engineConfig, systemPrompt, tabId }) {
+  function _invokeCcStreamViaPool({ prompt, liveState, toolUses, model, effort, engineConfig, systemPrompt, tabId }) {
     const resolvedTabId = tabId || 'default';
     let cancelled = false;
     let accumulated = '';
@@ -6331,6 +6404,13 @@ What would you like to discuss or change? When you're happy, say "approve" and I
           liveState.text = accumulated;
           if (liveState.writer) liveState.writer({ type: 'chunk', text: accumulated });
         },
+        onToolUse: (name, input) => {
+          _touchCcLiveStream(liveState);
+          const safeInput = input || {};
+          if (Array.isArray(toolUses)) toolUses.push({ name, input: safeInput });
+          if (Array.isArray(liveState.tools)) liveState.tools.push({ name, input: safeInput });
+          if (liveState.writer) liveState.writer({ type: 'tool', name, input: _lightToolInput(safeInput) });
+        },
         onDone: () => {
           resolveResult({ text: accumulated, sessionId: sessionHandle.sessionId, code: 0, usage: {}, raw: accumulated, stderr: '' });
         },

package/engine/cc-worker-pool.js CHANGED Viewed

@@ -251,10 +251,21 @@ class Worker {
         if (text && this.inflight.onChunk) {
           try { this.inflight.onChunk(text); } catch { /* swallow */ }
         }
+      } else if (update.sessionUpdate === 'tool_call' && this.inflight.onToolUse) {
+        // ACP `tool_call` (status: pending, fired at invocation time) is the
+        // pool's equivalent of Claude's tool_use event. We map kinds to
+        // Claude-style tool names so the dashboard's existing
+        // formatToolSummary (Bash → "$ <cmd>", Read → "Reading <path>", etc.)
+        // works unchanged. Status updates (`tool_call_update`, status:
+        // completed) carry the result and are ignored here — surfacing
+        // results too would double the chip count without adding info the
+        // user can act on.
+        const mapped = _mapAcpToolCallToToolUse(update);
+        if (mapped) {
+          try { this.inflight.onToolUse(mapped.name, mapped.input); }
+          catch { /* swallow */ }
+        }
       }
-      // Other update kinds (available_commands_update, tool_call, ...) are
-      // ignored in sub-task B. Sub-task C/D will surface tool_call to the
-      // dashboard's onToolUse callback.
     }
   }
@@ -279,7 +290,7 @@ class Worker {
   // ── Stream a single turn ───────────────────────────────────────────────
   stream(promptText, opts = {}) {
-    const { onChunk, onDone, onError, signal, systemPromptText } = opts;
+    const { onChunk, onToolUse, onDone, onError, signal, systemPromptText } = opts;
     if (this.killed) {
       const err = new Error('cc-worker-pool: tab is closed');
       if (onError) try { onError(err); } catch { /* swallow */ }
@@ -307,6 +318,7 @@ class Worker {
       id,
       sessionId: this.sessionId,
       onChunk,
+      onToolUse,
       onDone,
       onError,
       signal,
@@ -425,6 +437,46 @@ function _extractChunkText(content) {
   return '';
 }
+// Map an ACP `tool_call` session/update notification to the {name, input} shape
+// the dashboard's formatToolSummary already understands. ACP's `kind` is a
+// coarse category (execute|read|edit|search|fetch|think|other); we translate to
+// the closest Claude tool name so the existing chip formatters keep working
+// (Bash → "$ <cmd>", Read → "Reading <path>", etc.). Unknown kinds fall back
+// to ACP's human-readable `title` with the raw input attached, which renders
+// through the default `<title>(<key>: <val>)` formatter.
+function _mapAcpToolCallToToolUse(update) {
+  if (!update || update.sessionUpdate !== 'tool_call') return null;
+  const rawInput = (update.rawInput && typeof update.rawInput === 'object') ? update.rawInput : {};
+  const kind = String(update.kind || '').toLowerCase();
+  const title = update.title || '';
+  // For kinds with a clear Claude-tool equivalent, use that name + raw input.
+  switch (kind) {
+    case 'execute':
+      return { name: 'Bash', input: rawInput };
+    case 'read':
+      return { name: 'Read', input: rawInput };
+    case 'edit':
+      return { name: 'Edit', input: rawInput };
+    case 'search': {
+      // Heuristic: Grep needs a pattern; Glob needs a glob pattern.
+      // ACP doesn't distinguish, so prefer Grep when a `path` hint is present
+      // (matches the dashboard's Grep formatter "Searching <pat> in <path>").
+      const isGrep = typeof rawInput.path === 'string' || typeof rawInput.regex === 'string';
+      return { name: isGrep ? 'Grep' : 'Glob', input: rawInput };
+    }
+    case 'fetch':
+      return { name: 'WebFetch', input: rawInput };
+    case 'think':
+      // No equivalent Claude tool; show the title so the user sees Copilot's
+      // own description of what it's thinking about.
+      return { name: title || 'Think', input: rawInput };
+    default:
+      // Fallback: show ACP's title and pass rawInput through. The dashboard's
+      // default formatter renders this as `<title>(<key>: <val>)`.
+      return { name: title || kind || 'Tool', input: rawInput };
+  }
+}
 // ── Public API ────────────────────────────────────────────────────────────
 async function getSession({ tabId, model, effort, mcpServers, systemPromptHash, cwd } = {}) {

package/engine/copilot-models.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
   "runtime": "copilot",
   "models": null,
-  "cachedAt": "2026-05-14T02:53:42.873Z"
+  "cachedAt": "2026-05-14T03:48:55.090Z"
 }

package/engine/kb-sweep-runner.js ADDED Viewed

@@ -0,0 +1,81 @@
+#!/usr/bin/env node
+/**
+ * engine/kb-sweep-runner.js — Detached entrypoint for the KB sweep.
+ *
+ * Spawned by dashboard.js `handleKnowledgeSweep` with `{ detached: true,
+ * stdio: ['ignore', logFd, logFd] }` so the sweep survives dashboard /
+ * engine restarts. The sweep regularly runs 1h+ and was previously killed
+ * mid-stream every `minions restart`.
+ *
+ * Args:
+ *   --sweep-token <token>   Opaque token from the dashboard (string/number).
+ *   --body-file <path>      Optional path to a JSON file with request body
+ *                           fields (pinnedKeys, dryRun). Deleted on exit.
+ *   --dry-run               Equivalent to `body.dryRun = true`.
+ *
+ * State protocol: runKbSweep itself writes `engine/kb-sweep-state.json`
+ * (in-flight → completed/failed) and includes `pid: process.pid` (this
+ * runner's pid) so the dashboard can liveness-check via `process.kill(pid, 0)`.
+ * Exits 0 on success, 1 on error. stdout/stderr land in engine/kb-sweep.log.
+ */
+const fs = require('fs');
+function getArg(argv, name) {
+  const idx = argv.indexOf(name);
+  if (idx >= 0 && idx + 1 < argv.length) return argv[idx + 1];
+  return null;
+}
+function hasFlag(argv, name) {
+  return argv.indexOf(name) >= 0;
+}
+const argv = process.argv.slice(2);
+const sweepToken = getArg(argv, '--sweep-token') || String(Date.now());
+const bodyFile = getArg(argv, '--body-file');
+const cliDryRun = hasFlag(argv, '--dry-run');
+let body = {};
+if (bodyFile) {
+  try {
+    const raw = fs.readFileSync(bodyFile, 'utf8');
+    body = JSON.parse(raw || '{}');
+  } catch (e) {
+    console.error(`[kb-sweep-runner] failed to read body-file ${bodyFile}: ${e.message}`);
+  }
+}
+const dryRun = cliDryRun || body.dryRun === true;
+// Lazy-require AFTER args are parsed so a malformed body-file doesn't drag in
+// the whole sweep stack before we've reported the failure.
+const queries = require('./queries');
+const { runKbSweep } = require('./kb-sweep');
+function cleanupBodyFile() {
+  if (!bodyFile) return;
+  try { fs.unlinkSync(bodyFile); } catch { /* ignore */ }
+}
+(async () => {
+  const startedIso = new Date().toISOString();
+  console.log(`[kb-sweep-runner] ${startedIso} starting pid=${process.pid} token=${sweepToken} dryRun=${dryRun}`);
+  try {
+    const engineConfig = (queries.getConfig() || {}).engine || {};
+    const result = await runKbSweep({
+      pinnedKeys: body.pinnedKeys,
+      engineConfig,
+      sweepToken,
+      dryRun,
+    });
+    const summary = result && result.summary ? result.summary : 'ok';
+    console.log(`[kb-sweep-runner] ${new Date().toISOString()} done: ${summary}`);
+    cleanupBodyFile();
+    process.exit(0);
+  } catch (e) {
+    const msg = e && e.message ? e.message : String(e);
+    console.error(`[kb-sweep-runner] ${new Date().toISOString()} error: ${msg}`);
+    if (e && e.stack) console.error(e.stack);
+    cleanupBodyFile();
+    process.exit(1);
+  }
+})();

package/engine/kb-sweep.js CHANGED Viewed

@@ -14,12 +14,14 @@ const path = require('path');
 const crypto = require('crypto');
 const shared = require('./shared');
 const queries = require('./queries');
-const { safeRead, safeWrite, safeUnlink, log, ts } = shared;
+const { safeRead, safeWrite, safeJson, safeUnlink, log, ts } = shared;
 const { MINIONS_DIR, ENGINE_DIR } = queries;
 const KB_DIR = path.join(MINIONS_DIR, 'knowledge');
 const SWEPT_DIR = path.join(KB_DIR, '_swept');
 const KB_SWEEP_STATE_PATH = path.join(ENGINE_DIR, 'kb-sweep-state.json');
+const KB_SWEEP_LOG_PATH = path.join(ENGINE_DIR, 'kb-sweep.log');
+const KB_SWEEP_RUNNER_PATH = path.join(__dirname, 'kb-sweep-runner.js');
 const SWEPT_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
 const COMPRESS_THRESHOLD_BYTES = 5000;
 const LLM_BATCH_SIZE = 30;
@@ -279,7 +281,62 @@ function _applyLlmPlan(plan, manifest, opts = {}) {
 }
 function _writeSweepState(state) {
-  try { safeWrite(KB_SWEEP_STATE_PATH, JSON.stringify(state)); } catch { /* ignore */ }
+  // Always include the current process pid + the caller-supplied sweepToken so
+  // the dashboard's liveness check (process.kill(pid, 0)) and the stale-guard
+  // can distinguish "still running" from "runner crashed". When this module is
+  // imported by the detached runner, process.pid is the runner's pid — which
+  // is exactly what we want.
+  const augmented = { pid: process.pid, ...state };
+  try { safeWrite(KB_SWEEP_STATE_PATH, JSON.stringify(augmented)); } catch { /* ignore */ }
+}
+/**
+ * Read kb-sweep-state.json and classify whether a sweep is alive + stale.
+ *
+ * Used by the dashboard's start endpoint, status endpoint, and stale-guard so
+ * they share a single source of truth (disk state + PID liveness) instead of
+ * relying on in-memory globals that die with the dashboard process.
+ *
+ * @param {object} [opts]
+ * @param {number} [opts.entryCount=0] - KB entry count for staleGuardMs()
+ * @param {number} [opts.now=Date.now()] - injectable clock for tests
+ * @param {(pid:number)=>boolean} [opts.isPidAlive] - injectable for tests
+ * @returns {{ inFlight: boolean, alive?: boolean, stale?: boolean, pid?: number,
+ *            startedAt?: number, sweepToken?: string|number|null, guardMs?: number,
+ *            status?: string }}
+ */
+function readSweepLiveness(opts = {}) {
+  const now = Number(opts.now) || Date.now();
+  const entryCount = Number(opts.entryCount) || 0;
+  const isPidAlive = typeof opts.isPidAlive === 'function'
+    ? opts.isPidAlive
+    : (pid) => { try { process.kill(pid, 0); return true; } catch { return false; } };
+  const state = safeJson(KB_SWEEP_STATE_PATH);
+  if (!state) return { inFlight: false };
+  // "starting" is written by the dashboard pre-spawn (no PID yet) to close the
+  // race window between two concurrent POSTs. "in-flight" is written by the
+  // runner once it boots and has its own pid.
+  if (state.status !== 'starting' && state.status !== 'in-flight') {
+    return { inFlight: false, status: state.status };
+  }
+  const pid = Number(state.pid) || 0;
+  const startedAt = Number(state.startedAt) || 0;
+  const guardMs = staleGuardMs(entryCount);
+  const age = startedAt ? now - startedAt : 0;
+  let alive;
+  if (state.status === 'starting') {
+    // No PID yet — grant a short boot-grace so spawn can complete and the
+    // runner can overwrite with status:'in-flight' + its pid.
+    const STARTING_GRACE_MS = 15000;
+    alive = age <= STARTING_GRACE_MS;
+  } else {
+    alive = pid > 0 ? !!isPidAlive(pid) : false;
+  }
+  const stale = !alive || (startedAt > 0 && age > guardMs);
+  return {
+    inFlight: true, alive, stale, pid, startedAt, guardMs,
+    sweepToken: state.sweepToken || null, status: state.status,
+  };
 }
 /**
@@ -298,23 +355,26 @@ function _writeSweepState(state) {
 async function runKbSweep(opts = {}) {
   const dryRun = !!opts.dryRun;
   const startedAt = Date.now();
-  if (!dryRun) _writeSweepState({ status: 'in-flight', startedAt, startedAtIso: ts() });
+  const sweepToken = opts.sweepToken != null ? opts.sweepToken : null;
+  // Always write state — even for dryRun — so a runner spawned with dryRun
+  // still reports terminal status and the dashboard pre-write doesn't leak
+  // a stale "in-flight"/"starting" record. The inner _runKbSweepImpl still
+  // honors dryRun for actual file mutations.
+  _writeSweepState({ status: 'in-flight', startedAt, startedAtIso: ts(), sweepToken, dryRun });
   try {
     const result = await _runKbSweepImpl(opts);
-    if (!dryRun) {
-      _writeSweepState({
-        status: 'completed', startedAt, completedAt: Date.now(), completedAtIso: ts(),
-        durationMs: result.durationMs, summary: result.summary, lastResult: result,
-      });
-    }
+    _writeSweepState({
+      status: 'completed', startedAt, completedAt: Date.now(), completedAtIso: ts(),
+      durationMs: result.durationMs, summary: result.summary, lastResult: result,
+      sweepToken, dryRun,
+    });
     return result;
   } catch (e) {
-    if (!dryRun) {
-      _writeSweepState({
-        status: 'failed', startedAt, completedAt: Date.now(), completedAtIso: ts(),
-        error: e && e.message ? e.message : String(e),
-      });
-    }
+    _writeSweepState({
+      status: 'failed', startedAt, completedAt: Date.now(), completedAtIso: ts(),
+      error: e && e.message ? e.message : String(e),
+      sweepToken, dryRun,
+    });
     throw e;
   }
 }
@@ -419,7 +479,10 @@ function staleGuardMs(entryCount) {
 module.exports = {
   runKbSweep,
   staleGuardMs,
+  readSweepLiveness,
   KB_SWEEP_STATE_PATH,
+  KB_SWEEP_LOG_PATH,
+  KB_SWEEP_RUNNER_PATH,
   // Exported for tests
   _hashEntry,
   _parseFrontmatter,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yemi33/minions",
-  "version": "0.1.1930",
+  "version": "0.1.1932",
   "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
   "bin": {
     "minions": "bin/minions.js"