@yemi33/minions 0.1.1930 → 0.1.1932

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dashboard.js CHANGED
@@ -2480,8 +2480,10 @@ async function _preflightModelCheck({ runtime: cliOverride, model: modelOverride
2480
2480
  * contract SSE consumers depend on).
2481
2481
  * - `usage` is `{}` because ACP `session/update` notifications don't
2482
2482
  * surface token counts; trackEngineUsage is a no-op on `{}`.
2483
- * - Tool calls are not surfaced (sub-task B/C don't plumb `tool_call`
2484
- * notifications into a callback). Matches CC's pool trade-off.
2483
+ * - Tool calls are surfaced via the optional `onToolUse(name, input)`
2484
+ * callback (ACP `tool_call` notification, mapped to Claude-style
2485
+ * {name, input}). `tool_call_update` (results) is ignored to avoid
2486
+ * double chips.
2485
2487
  * - Honors `timeoutMs`. On timeout: cancels the prompt, closes the tab
2486
2488
  * (so the next call rebuilds against a clean process), resolves with
2487
2489
  * `{ code: 1, stderr: 'doc-chat-pool: timeout after Xms' }`. The
@@ -2496,7 +2498,7 @@ async function _preflightModelCheck({ runtime: cliOverride, model: modelOverride
2496
2498
  * document body. Always re-sending extraContext is correctness-safe; the
2497
2499
  * pool's warm-process saving is preserved regardless.
2498
2500
  */
2499
- function _invokeDocChatViaPool({ prompt, model, effort, engineConfig, systemPrompt, sessionKey, freshSession, timeoutMs, onChunk }) {
2501
+ function _invokeDocChatViaPool({ prompt, model, effort, engineConfig, systemPrompt, sessionKey, freshSession, timeoutMs, onChunk, onToolUse }) {
2500
2502
  const oneShot = !!freshSession;
2501
2503
  const tabKey = oneShot
2502
2504
  ? 'doc-chat:fresh:' + shared.uid()
@@ -2574,6 +2576,11 @@ function _invokeDocChatViaPool({ prompt, model, effort, engineConfig, systemProm
2574
2576
  try { onChunk(accumulated); } catch { /* swallow */ }
2575
2577
  }
2576
2578
  },
2579
+ onToolUse: (name, input) => {
2580
+ if (onToolUse) {
2581
+ try { onToolUse(name, input || {}); } catch { /* swallow */ }
2582
+ }
2583
+ },
2577
2584
  onDone: () => {
2578
2585
  finalize({ text: accumulated, sessionId: sessionHandle.sessionId, code: 0, usage: {}, raw: accumulated, stderr: '' });
2579
2586
  },
@@ -2786,7 +2793,7 @@ async function ccCallStreaming(message, { store = 'cc', sessionKey, extraContext
2786
2793
  const p = _invokeDocChatViaPool({
2787
2794
  prompt: poolPrompt, sessionKey, model, effort: ccEffort,
2788
2795
  engineConfig: CONFIG.engine, systemPrompt,
2789
- onChunk,
2796
+ onChunk, onToolUse,
2790
2797
  freshSession, timeoutMs: timeout,
2791
2798
  });
2792
2799
  if (onAbortReady) onAbortReady(p.abort);
@@ -4631,14 +4638,17 @@ const server = http.createServer(async (req, res) => {
4631
4638
  if (swept) result.lastSwept = swept.timestamp;
4632
4639
  // Surface in-flight sweep state so the UI can render a 'now sweeping (Xm)'
4633
4640
  // badge alongside the previous-completion 'swept N days ago' indicator.
4634
- // Memory wins when present, disk fallback survives dashboard restarts.
4635
- const sweepState = safeJson(path.join(ENGINE_DIR, 'kb-sweep-state.json'));
4636
- const memInFlight = !!global._kbSweepInFlight;
4637
- const diskInFlight = !!(sweepState && sweepState.status === 'in-flight');
4638
- if (memInFlight || diskInFlight) {
4639
- result.sweepInFlight = true;
4640
- result.sweepStartedAt = global._kbSweepStartedAt || (sweepState && sweepState.startedAt) || null;
4641
- }
4641
+ // Source of truth: kb-sweep-state.json + PID liveness the in-process
4642
+ // sweep moved to a detached runner so in-memory globals are no longer
4643
+ // authoritative (they die with the dashboard).
4644
+ try {
4645
+ const { readSweepLiveness } = require('./engine/kb-sweep');
4646
+ const liveness = readSweepLiveness({ entryCount: entries.length });
4647
+ if (liveness.inFlight && liveness.alive) {
4648
+ result.sweepInFlight = true;
4649
+ result.sweepStartedAt = liveness.startedAt || null;
4650
+ }
4651
+ } catch { /* best-effort UI indicator */ }
4642
4652
  return jsonReply(res, 200, result);
4643
4653
  }
4644
4654
 
@@ -4657,73 +4667,135 @@ const server = http.createServer(async (req, res) => {
4657
4667
  }
4658
4668
 
4659
4669
  async function handleKnowledgeSweep(req, res) {
4660
- // Auto-release stale guard dynamic floor based on KB size (30 min min, +1s per entry)
4661
- const { staleGuardMs } = require('./engine/kb-sweep');
4670
+ // Source of truth = kb-sweep-state.json + PID liveness. The sweep now runs
4671
+ // as a detached child (engine/kb-sweep-runner.js) so it survives
4672
+ // `minions restart`; the in-memory `global._kbSweep*` flags from the old
4673
+ // in-process implementation are gone.
4674
+ const {
4675
+ readSweepLiveness, staleGuardMs, KB_SWEEP_STATE_PATH, KB_SWEEP_LOG_PATH, KB_SWEEP_RUNNER_PATH,
4676
+ } = require('./engine/kb-sweep');
4662
4677
  const entryCount = (queries.getKnowledgeBaseEntries() || []).length;
4663
4678
  const guardMs = staleGuardMs(entryCount);
4664
- if (global._kbSweepInFlight && global._kbSweepStartedAt && Date.now() - global._kbSweepStartedAt > guardMs) {
4665
- console.log(`[kb-sweep] Auto-releasing stale guard (>${Math.round(guardMs / 60000)}min for ${entryCount} entries)`);
4666
- global._kbSweepInFlight = false;
4667
- }
4668
- // Disk-state fallback: if a previous dashboard process died mid-sweep, the
4669
- // state file says 'in-flight' forever. Treat it as stale past the guard so
4670
- // a new sweep can start.
4671
- const sweepStateFile = path.join(ENGINE_DIR, 'kb-sweep-state.json');
4672
- const diskState = safeJson(sweepStateFile);
4673
- const diskInFlight = !!(diskState && diskState.status === 'in-flight');
4674
- const diskStartedAt = diskState && diskState.startedAt ? Number(diskState.startedAt) : 0;
4675
- const diskStale = diskInFlight && diskStartedAt && Date.now() - diskStartedAt > guardMs;
4676
- if (diskStale) {
4677
- console.log(`[kb-sweep] Auto-releasing stale disk-state guard (>${Math.round(guardMs / 60000)}min)`);
4678
- try { shared.safeUnlink(sweepStateFile); } catch { /* ignore */ }
4679
- }
4680
- if (global._kbSweepInFlight || (diskInFlight && !diskStale)) {
4679
+
4680
+ // Synchronous pre-claim BEFORE awaiting the body so a concurrent POST
4681
+ // arriving in the same tick sees in-flight state and can't double-spawn.
4682
+ const sweepToken = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
4683
+ const liveness = readSweepLiveness({ entryCount });
4684
+ if (liveness.inFlight && liveness.stale) {
4685
+ const reason = !liveness.alive
4686
+ ? `runner pid=${liveness.pid} is no longer alive`
4687
+ : `>${Math.round(guardMs / 60000)}min for ${entryCount} entries`;
4688
+ console.log(`[kb-sweep] Auto-releasing stale guard (${reason})`);
4689
+ try { shared.safeUnlink(KB_SWEEP_STATE_PATH); } catch { /* ignore */ }
4690
+ } else if (liveness.inFlight) {
4681
4691
  return jsonReply(res, 200, {
4682
4692
  ok: true, alreadyRunning: true,
4683
- startedAt: global._kbSweepStartedAt || diskStartedAt || null,
4693
+ startedAt: liveness.startedAt || null,
4684
4694
  });
4685
4695
  }
4686
- const sweepToken = Date.now() + Math.random();
4687
- global._kbSweepToken = sweepToken;
4688
- global._kbSweepInFlight = true;
4689
- global._kbSweepStartedAt = Date.now();
4696
+
4697
+ // Claim the slot synchronously by writing a "starting" state. The runner
4698
+ // will overwrite this with status:'in-flight' + its real pid once it boots.
4699
+ // readSweepLiveness grants a 15s boot-grace to "starting" records with no pid.
4700
+ const startedAt = Date.now();
4701
+ try {
4702
+ safeWrite(KB_SWEEP_STATE_PATH, JSON.stringify({
4703
+ status: 'starting', startedAt, startedAtIso: new Date().toISOString(),
4704
+ sweepToken, pid: null,
4705
+ }));
4706
+ } catch (e) {
4707
+ console.error(`[kb-sweep] failed to write starting state: ${e.message}`);
4708
+ }
4709
+
4690
4710
  const body = await readBody(req).catch(() => ({}));
4691
- _runKbSweepBackground(body, sweepToken);
4692
- return jsonReply(res, 202, { ok: true, started: true });
4693
- }
4694
4711
 
4695
- async function _runKbSweepBackground(body, sweepToken) {
4712
+ // Persist body to a temp file so spawn doesn't have to serialize large
4713
+ // pinnedKeys arrays via argv. Skip when body is empty.
4714
+ let bodyFile = null;
4715
+ if (body && (Array.isArray(body.pinnedKeys) || body.dryRun != null)) {
4716
+ bodyFile = path.join(ENGINE_DIR, `tmp-kb-sweep-body-${sweepToken}.json`);
4717
+ try { safeWrite(bodyFile, JSON.stringify(body)); }
4718
+ catch (e) {
4719
+ console.error(`[kb-sweep] failed to write body-file ${bodyFile}: ${e.message}`);
4720
+ bodyFile = null;
4721
+ }
4722
+ }
4723
+
4724
+ const { spawn: cpSpawn } = require('child_process');
4725
+ // Open log fd in append mode so spawn can pipe stdio there. Child inherits
4726
+ // the fd; parent closes its copy after spawn returns successfully.
4727
+ let logFdNum = null;
4728
+ let stdio = ['ignore', 'ignore', 'ignore'];
4696
4729
  try {
4697
- const { runKbSweep } = require('./engine/kb-sweep');
4698
- const result = await runKbSweep({ pinnedKeys: body.pinnedKeys, engineConfig: CONFIG.engine });
4699
- global._kbSweepLastResult = result;
4700
- global._kbSweepLastCompletedAt = Date.now();
4730
+ logFdNum = fs.openSync(KB_SWEEP_LOG_PATH, 'a');
4731
+ stdio = ['ignore', logFdNum, logFdNum];
4701
4732
  } catch (e) {
4702
- console.error('[kb-sweep] background error:', e.message);
4703
- global._kbSweepLastResult = { ok: false, error: e.message };
4704
- global._kbSweepLastCompletedAt = Date.now();
4705
- } finally { if (global._kbSweepToken === sweepToken) global._kbSweepInFlight = false; }
4733
+ console.error(`[kb-sweep] failed to open log ${KB_SWEEP_LOG_PATH}: ${e.message}`);
4734
+ }
4735
+
4736
+ const spawnArgs = ['--sweep-token', sweepToken];
4737
+ if (bodyFile) spawnArgs.push('--body-file', bodyFile);
4738
+
4739
+ let proc;
4740
+ try {
4741
+ proc = cpSpawn(process.execPath, [KB_SWEEP_RUNNER_PATH, ...spawnArgs], {
4742
+ cwd: MINIONS_DIR, stdio, detached: true, windowsHide: true,
4743
+ env: { ...process.env },
4744
+ });
4745
+ } catch (e) {
4746
+ if (logFdNum != null) try { fs.closeSync(logFdNum); } catch { /* ignore */ }
4747
+ if (bodyFile) try { fs.unlinkSync(bodyFile); } catch { /* ignore */ }
4748
+ // Release the "starting" claim on synchronous spawn failure so the user
4749
+ // can retry immediately.
4750
+ try { shared.safeUnlink(KB_SWEEP_STATE_PATH); } catch { /* ignore */ }
4751
+ return jsonReply(res, 500, { error: `spawn failed: ${e.message}` });
4752
+ }
4753
+ if (logFdNum != null) try { fs.closeSync(logFdNum); } catch { /* ignore */ }
4754
+
4755
+ // Conditional CAS: only update the state file from "starting" → "in-flight"
4756
+ // if our sweepToken still owns it. If the (fast) runner already wrote
4757
+ // "completed"/"failed" or its own "in-flight", leave that newer state alone.
4758
+ try {
4759
+ const current = safeJson(KB_SWEEP_STATE_PATH);
4760
+ if (current && current.status === 'starting' && current.sweepToken === sweepToken) {
4761
+ safeWrite(KB_SWEEP_STATE_PATH, JSON.stringify({
4762
+ status: 'in-flight', startedAt, startedAtIso: new Date().toISOString(),
4763
+ sweepToken, pid: proc.pid,
4764
+ }));
4765
+ }
4766
+ } catch { /* best-effort */ }
4767
+
4768
+ proc.unref();
4769
+ return jsonReply(res, 202, { ok: true, started: true, sweepToken });
4706
4770
  }
4707
4771
 
4708
4772
 
4709
4773
  function handleKnowledgeSweepStatus(req, res) {
4710
- // Disk-state fallback: when the dashboard restarts mid-sweep the in-memory
4711
- // globals get reset, but engine/kb-sweep-state.json survives. Memory still
4712
- // wins when present (faster, no disk read on every poll).
4774
+ // Source of truth = kb-sweep-state.json + PID liveness. Globals are gone
4775
+ // the runner is detached, so its lifecycle is independent of this process.
4776
+ const { readSweepLiveness } = require('./engine/kb-sweep');
4777
+ const entries = queries.getKnowledgeBaseEntries() || [];
4778
+ const liveness = readSweepLiveness({ entryCount: entries.length });
4713
4779
  const diskState = safeJson(path.join(ENGINE_DIR, 'kb-sweep-state.json'));
4714
- const memInFlight = !!global._kbSweepInFlight;
4715
- const diskInFlight = !!(diskState && diskState.status === 'in-flight');
4716
- const inFlight = memInFlight || diskInFlight;
4717
- const startedAt = global._kbSweepStartedAt || (diskState && diskState.startedAt) || null;
4718
- let lastResult = global._kbSweepLastResult || null;
4719
- let lastCompletedAt = global._kbSweepLastCompletedAt || null;
4720
- if (!lastResult && diskState && (diskState.status === 'completed' || diskState.status === 'failed')) {
4721
- if (diskState.status === 'failed') {
4722
- lastResult = { ok: false, error: diskState.error || 'sweep failed' };
4723
- } else {
4724
- lastResult = diskState.lastResult || { ok: true, summary: diskState.summary };
4725
- }
4726
- if (!lastCompletedAt && diskState.completedAt) lastCompletedAt = diskState.completedAt;
4780
+ let inFlight = false;
4781
+ let startedAt = null;
4782
+ let lastResult = null;
4783
+ let lastCompletedAt = null;
4784
+ if (liveness.inFlight && liveness.alive) {
4785
+ inFlight = true;
4786
+ startedAt = liveness.startedAt || null;
4787
+ } else if (liveness.inFlight && !liveness.alive) {
4788
+ // Runner crashed pre-completion (or "starting" claim expired without a
4789
+ // runner ever booting). Surface a synthetic error so the UI doesn't
4790
+ // silently lose the previous attempt.
4791
+ lastResult = { ok: false, error: 'sweep process exited before reporting completion' };
4792
+ lastCompletedAt = liveness.startedAt || null;
4793
+ } else if (diskState && diskState.status === 'completed') {
4794
+ lastResult = diskState.lastResult || { ok: true, summary: diskState.summary };
4795
+ lastCompletedAt = diskState.completedAt || null;
4796
+ } else if (diskState && diskState.status === 'failed') {
4797
+ lastResult = { ok: false, error: diskState.error || 'sweep failed' };
4798
+ lastCompletedAt = diskState.completedAt || null;
4727
4799
  }
4728
4800
  return jsonReply(res, 200, { inFlight, startedAt, lastResult, lastCompletedAt });
4729
4801
  }
@@ -6246,7 +6318,7 @@ What would you like to discuss or change? When you're happy, say "approve" and I
6246
6318
  */
6247
6319
  function _invokeCcStream({ prompt, sessionId, liveState, toolUses, model, effort, maxTurns, engineConfig, systemPrompt = CC_STATIC_SYSTEM_PROMPT, tabId }) {
6248
6320
  if (shared.resolveCcUseWorkerPool(engineConfig)) {
6249
- return _invokeCcStreamViaPool({ prompt, liveState, model, effort, engineConfig, systemPrompt, tabId });
6321
+ return _invokeCcStreamViaPool({ prompt, liveState, toolUses, model, effort, engineConfig, systemPrompt, tabId });
6250
6322
  }
6251
6323
  const { callLLMStreaming } = require('./engine/llm');
6252
6324
  return callLLMStreaming(prompt, systemPrompt, {
@@ -6280,16 +6352,17 @@ What would you like to discuss or change? When you're happy, say "approve" and I
6280
6352
  * callLLMStreaming's contract is "full accumulated text"; we accumulate
6281
6353
  * here so `liveState.text` and downstream chunk events keep the same
6282
6354
  * semantics consumers already depend on.
6283
- * - Tool calls are not surfaced in sub-task B (the pool ignores
6284
- * `tool_call` notifications). `toolUses` stays empty on this path; if
6285
- * sub-task C/D adds tool_call surfacing in the pool we'll plumb a
6286
- * callback here too.
6355
+ * - Tool calls are surfaced via the pool's `onToolUse` callback (ACP
6356
+ * `tool_call` notification, mapped to Claude-style {name, input} so the
6357
+ * dashboard's existing formatToolSummary chips render unchanged).
6358
+ * `tool_call_update` events (status: completed) are intentionally
6359
+ * ignored — surfacing results too would double the chip count.
6287
6360
  * - `usage` is reported as an empty object — ACP doesn't expose token
6288
6361
  * counts in the in-flight session/update notifications, and the pool's
6289
6362
  * long-lived process makes per-turn usage attribution non-trivial.
6290
6363
  * trackEngineUsage is a no-op on `{}`.
6291
6364
  */
6292
- function _invokeCcStreamViaPool({ prompt, liveState, model, effort, engineConfig, systemPrompt, tabId }) {
6365
+ function _invokeCcStreamViaPool({ prompt, liveState, toolUses, model, effort, engineConfig, systemPrompt, tabId }) {
6293
6366
  const resolvedTabId = tabId || 'default';
6294
6367
  let cancelled = false;
6295
6368
  let accumulated = '';
@@ -6331,6 +6404,13 @@ What would you like to discuss or change? When you're happy, say "approve" and I
6331
6404
  liveState.text = accumulated;
6332
6405
  if (liveState.writer) liveState.writer({ type: 'chunk', text: accumulated });
6333
6406
  },
6407
+ onToolUse: (name, input) => {
6408
+ _touchCcLiveStream(liveState);
6409
+ const safeInput = input || {};
6410
+ if (Array.isArray(toolUses)) toolUses.push({ name, input: safeInput });
6411
+ if (Array.isArray(liveState.tools)) liveState.tools.push({ name, input: safeInput });
6412
+ if (liveState.writer) liveState.writer({ type: 'tool', name, input: _lightToolInput(safeInput) });
6413
+ },
6334
6414
  onDone: () => {
6335
6415
  resolveResult({ text: accumulated, sessionId: sessionHandle.sessionId, code: 0, usage: {}, raw: accumulated, stderr: '' });
6336
6416
  },
@@ -251,10 +251,21 @@ class Worker {
251
251
  if (text && this.inflight.onChunk) {
252
252
  try { this.inflight.onChunk(text); } catch { /* swallow */ }
253
253
  }
254
+ } else if (update.sessionUpdate === 'tool_call' && this.inflight.onToolUse) {
255
+ // ACP `tool_call` (status: pending, fired at invocation time) is the
256
+ // pool's equivalent of Claude's tool_use event. We map kinds to
257
+ // Claude-style tool names so the dashboard's existing
258
+ // formatToolSummary (Bash → "$ <cmd>", Read → "Reading <path>", etc.)
259
+ // works unchanged. Status updates (`tool_call_update`, status:
260
+ // completed) carry the result and are ignored here — surfacing
261
+ // results too would double the chip count without adding info the
262
+ // user can act on.
263
+ const mapped = _mapAcpToolCallToToolUse(update);
264
+ if (mapped) {
265
+ try { this.inflight.onToolUse(mapped.name, mapped.input); }
266
+ catch { /* swallow */ }
267
+ }
254
268
  }
255
- // Other update kinds (available_commands_update, tool_call, ...) are
256
- // ignored in sub-task B. Sub-task C/D will surface tool_call to the
257
- // dashboard's onToolUse callback.
258
269
  }
259
270
  }
260
271
 
@@ -279,7 +290,7 @@ class Worker {
279
290
 
280
291
  // ── Stream a single turn ───────────────────────────────────────────────
281
292
  stream(promptText, opts = {}) {
282
- const { onChunk, onDone, onError, signal, systemPromptText } = opts;
293
+ const { onChunk, onToolUse, onDone, onError, signal, systemPromptText } = opts;
283
294
  if (this.killed) {
284
295
  const err = new Error('cc-worker-pool: tab is closed');
285
296
  if (onError) try { onError(err); } catch { /* swallow */ }
@@ -307,6 +318,7 @@ class Worker {
307
318
  id,
308
319
  sessionId: this.sessionId,
309
320
  onChunk,
321
+ onToolUse,
310
322
  onDone,
311
323
  onError,
312
324
  signal,
@@ -425,6 +437,46 @@ function _extractChunkText(content) {
425
437
  return '';
426
438
  }
427
439
 
440
+ // Map an ACP `tool_call` session/update notification to the {name, input} shape
441
+ // the dashboard's formatToolSummary already understands. ACP's `kind` is a
442
+ // coarse category (execute|read|edit|search|fetch|think|other); we translate to
443
+ // the closest Claude tool name so the existing chip formatters keep working
444
+ // (Bash → "$ <cmd>", Read → "Reading <path>", etc.). Unknown kinds fall back
445
+ // to ACP's human-readable `title` with the raw input attached, which renders
446
+ // through the default `<title>(<key>: <val>)` formatter.
447
+ function _mapAcpToolCallToToolUse(update) {
448
+ if (!update || update.sessionUpdate !== 'tool_call') return null;
449
+ const rawInput = (update.rawInput && typeof update.rawInput === 'object') ? update.rawInput : {};
450
+ const kind = String(update.kind || '').toLowerCase();
451
+ const title = update.title || '';
452
+ // For kinds with a clear Claude-tool equivalent, use that name + raw input.
453
+ switch (kind) {
454
+ case 'execute':
455
+ return { name: 'Bash', input: rawInput };
456
+ case 'read':
457
+ return { name: 'Read', input: rawInput };
458
+ case 'edit':
459
+ return { name: 'Edit', input: rawInput };
460
+ case 'search': {
461
+ // Heuristic: Grep needs a pattern; Glob needs a glob pattern.
462
+ // ACP doesn't distinguish, so prefer Grep when a `path` hint is present
463
+ // (matches the dashboard's Grep formatter "Searching <pat> in <path>").
464
+ const isGrep = typeof rawInput.path === 'string' || typeof rawInput.regex === 'string';
465
+ return { name: isGrep ? 'Grep' : 'Glob', input: rawInput };
466
+ }
467
+ case 'fetch':
468
+ return { name: 'WebFetch', input: rawInput };
469
+ case 'think':
470
+ // No equivalent Claude tool; show the title so the user sees Copilot's
471
+ // own description of what it's thinking about.
472
+ return { name: title || 'Think', input: rawInput };
473
+ default:
474
+ // Fallback: show ACP's title and pass rawInput through. The dashboard's
475
+ // default formatter renders this as `<title>(<key>: <val>)`.
476
+ return { name: title || kind || 'Tool', input: rawInput };
477
+ }
478
+ }
479
+
428
480
  // ── Public API ────────────────────────────────────────────────────────────
429
481
 
430
482
  async function getSession({ tabId, model, effort, mcpServers, systemPromptHash, cwd } = {}) {
@@ -1,5 +1,5 @@
1
1
  {
2
2
  "runtime": "copilot",
3
3
  "models": null,
4
- "cachedAt": "2026-05-14T02:53:42.873Z"
4
+ "cachedAt": "2026-05-14T03:48:55.090Z"
5
5
  }
@@ -0,0 +1,81 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * engine/kb-sweep-runner.js — Detached entrypoint for the KB sweep.
4
+ *
5
+ * Spawned by dashboard.js `handleKnowledgeSweep` with `{ detached: true,
6
+ * stdio: ['ignore', logFd, logFd] }` so the sweep survives dashboard /
7
+ * engine restarts. The sweep regularly runs 1h+ and was previously killed
8
+ * mid-stream every `minions restart`.
9
+ *
10
+ * Args:
11
+ * --sweep-token <token> Opaque token from the dashboard (string/number).
12
+ * --body-file <path> Optional path to a JSON file with request body
13
+ * fields (pinnedKeys, dryRun). Deleted on exit.
14
+ * --dry-run Equivalent to `body.dryRun = true`.
15
+ *
16
+ * State protocol: runKbSweep itself writes `engine/kb-sweep-state.json`
17
+ * (in-flight → completed/failed) and includes `pid: process.pid` (this
18
+ * runner's pid) so the dashboard can liveness-check via `process.kill(pid, 0)`.
19
+ * Exits 0 on success, 1 on error. stdout/stderr land in engine/kb-sweep.log.
20
+ */
21
+
22
+ const fs = require('fs');
23
+
24
+ function getArg(argv, name) {
25
+ const idx = argv.indexOf(name);
26
+ if (idx >= 0 && idx + 1 < argv.length) return argv[idx + 1];
27
+ return null;
28
+ }
29
+ function hasFlag(argv, name) {
30
+ return argv.indexOf(name) >= 0;
31
+ }
32
+
33
+ const argv = process.argv.slice(2);
34
+ const sweepToken = getArg(argv, '--sweep-token') || String(Date.now());
35
+ const bodyFile = getArg(argv, '--body-file');
36
+ const cliDryRun = hasFlag(argv, '--dry-run');
37
+
38
+ let body = {};
39
+ if (bodyFile) {
40
+ try {
41
+ const raw = fs.readFileSync(bodyFile, 'utf8');
42
+ body = JSON.parse(raw || '{}');
43
+ } catch (e) {
44
+ console.error(`[kb-sweep-runner] failed to read body-file ${bodyFile}: ${e.message}`);
45
+ }
46
+ }
47
+ const dryRun = cliDryRun || body.dryRun === true;
48
+
49
+ // Lazy-require AFTER args are parsed so a malformed body-file doesn't drag in
50
+ // the whole sweep stack before we've reported the failure.
51
+ const queries = require('./queries');
52
+ const { runKbSweep } = require('./kb-sweep');
53
+
54
+ function cleanupBodyFile() {
55
+ if (!bodyFile) return;
56
+ try { fs.unlinkSync(bodyFile); } catch { /* ignore */ }
57
+ }
58
+
59
+ (async () => {
60
+ const startedIso = new Date().toISOString();
61
+ console.log(`[kb-sweep-runner] ${startedIso} starting pid=${process.pid} token=${sweepToken} dryRun=${dryRun}`);
62
+ try {
63
+ const engineConfig = (queries.getConfig() || {}).engine || {};
64
+ const result = await runKbSweep({
65
+ pinnedKeys: body.pinnedKeys,
66
+ engineConfig,
67
+ sweepToken,
68
+ dryRun,
69
+ });
70
+ const summary = result && result.summary ? result.summary : 'ok';
71
+ console.log(`[kb-sweep-runner] ${new Date().toISOString()} done: ${summary}`);
72
+ cleanupBodyFile();
73
+ process.exit(0);
74
+ } catch (e) {
75
+ const msg = e && e.message ? e.message : String(e);
76
+ console.error(`[kb-sweep-runner] ${new Date().toISOString()} error: ${msg}`);
77
+ if (e && e.stack) console.error(e.stack);
78
+ cleanupBodyFile();
79
+ process.exit(1);
80
+ }
81
+ })();
@@ -14,12 +14,14 @@ const path = require('path');
14
14
  const crypto = require('crypto');
15
15
  const shared = require('./shared');
16
16
  const queries = require('./queries');
17
- const { safeRead, safeWrite, safeUnlink, log, ts } = shared;
17
+ const { safeRead, safeWrite, safeJson, safeUnlink, log, ts } = shared;
18
18
  const { MINIONS_DIR, ENGINE_DIR } = queries;
19
19
 
20
20
  const KB_DIR = path.join(MINIONS_DIR, 'knowledge');
21
21
  const SWEPT_DIR = path.join(KB_DIR, '_swept');
22
22
  const KB_SWEEP_STATE_PATH = path.join(ENGINE_DIR, 'kb-sweep-state.json');
23
+ const KB_SWEEP_LOG_PATH = path.join(ENGINE_DIR, 'kb-sweep.log');
24
+ const KB_SWEEP_RUNNER_PATH = path.join(__dirname, 'kb-sweep-runner.js');
23
25
  const SWEPT_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
24
26
  const COMPRESS_THRESHOLD_BYTES = 5000;
25
27
  const LLM_BATCH_SIZE = 30;
@@ -279,7 +281,62 @@ function _applyLlmPlan(plan, manifest, opts = {}) {
279
281
  }
280
282
 
281
283
  function _writeSweepState(state) {
282
- try { safeWrite(KB_SWEEP_STATE_PATH, JSON.stringify(state)); } catch { /* ignore */ }
284
+ // Always include the current process pid + the caller-supplied sweepToken so
285
+ // the dashboard's liveness check (process.kill(pid, 0)) and the stale-guard
286
+ // can distinguish "still running" from "runner crashed". When this module is
287
+ // imported by the detached runner, process.pid is the runner's pid — which
288
+ // is exactly what we want.
289
+ const augmented = { pid: process.pid, ...state };
290
+ try { safeWrite(KB_SWEEP_STATE_PATH, JSON.stringify(augmented)); } catch { /* ignore */ }
291
+ }
292
+
293
+ /**
294
+ * Read kb-sweep-state.json and classify whether a sweep is alive + stale.
295
+ *
296
+ * Used by the dashboard's start endpoint, status endpoint, and stale-guard so
297
+ * they share a single source of truth (disk state + PID liveness) instead of
298
+ * relying on in-memory globals that die with the dashboard process.
299
+ *
300
+ * @param {object} [opts]
301
+ * @param {number} [opts.entryCount=0] - KB entry count for staleGuardMs()
302
+ * @param {number} [opts.now=Date.now()] - injectable clock for tests
303
+ * @param {(pid:number)=>boolean} [opts.isPidAlive] - injectable for tests
304
+ * @returns {{ inFlight: boolean, alive?: boolean, stale?: boolean, pid?: number,
305
+ * startedAt?: number, sweepToken?: string|number|null, guardMs?: number,
306
+ * status?: string }}
307
+ */
308
+ function readSweepLiveness(opts = {}) {
309
+ const now = Number(opts.now) || Date.now();
310
+ const entryCount = Number(opts.entryCount) || 0;
311
+ const isPidAlive = typeof opts.isPidAlive === 'function'
312
+ ? opts.isPidAlive
313
+ : (pid) => { try { process.kill(pid, 0); return true; } catch { return false; } };
314
+ const state = safeJson(KB_SWEEP_STATE_PATH);
315
+ if (!state) return { inFlight: false };
316
+ // "starting" is written by the dashboard pre-spawn (no PID yet) to close the
317
+ // race window between two concurrent POSTs. "in-flight" is written by the
318
+ // runner once it boots and has its own pid.
319
+ if (state.status !== 'starting' && state.status !== 'in-flight') {
320
+ return { inFlight: false, status: state.status };
321
+ }
322
+ const pid = Number(state.pid) || 0;
323
+ const startedAt = Number(state.startedAt) || 0;
324
+ const guardMs = staleGuardMs(entryCount);
325
+ const age = startedAt ? now - startedAt : 0;
326
+ let alive;
327
+ if (state.status === 'starting') {
328
+ // No PID yet — grant a short boot-grace so spawn can complete and the
329
+ // runner can overwrite with status:'in-flight' + its pid.
330
+ const STARTING_GRACE_MS = 15000;
331
+ alive = age <= STARTING_GRACE_MS;
332
+ } else {
333
+ alive = pid > 0 ? !!isPidAlive(pid) : false;
334
+ }
335
+ const stale = !alive || (startedAt > 0 && age > guardMs);
336
+ return {
337
+ inFlight: true, alive, stale, pid, startedAt, guardMs,
338
+ sweepToken: state.sweepToken || null, status: state.status,
339
+ };
283
340
  }
284
341
 
285
342
  /**
@@ -298,23 +355,26 @@ function _writeSweepState(state) {
298
355
  async function runKbSweep(opts = {}) {
299
356
  const dryRun = !!opts.dryRun;
300
357
  const startedAt = Date.now();
301
- if (!dryRun) _writeSweepState({ status: 'in-flight', startedAt, startedAtIso: ts() });
358
+ const sweepToken = opts.sweepToken != null ? opts.sweepToken : null;
359
+ // Always write state — even for dryRun — so a runner spawned with dryRun
360
+ // still reports terminal status and the dashboard pre-write doesn't leak
361
+ // a stale "in-flight"/"starting" record. The inner _runKbSweepImpl still
362
+ // honors dryRun for actual file mutations.
363
+ _writeSweepState({ status: 'in-flight', startedAt, startedAtIso: ts(), sweepToken, dryRun });
302
364
  try {
303
365
  const result = await _runKbSweepImpl(opts);
304
- if (!dryRun) {
305
- _writeSweepState({
306
- status: 'completed', startedAt, completedAt: Date.now(), completedAtIso: ts(),
307
- durationMs: result.durationMs, summary: result.summary, lastResult: result,
308
- });
309
- }
366
+ _writeSweepState({
367
+ status: 'completed', startedAt, completedAt: Date.now(), completedAtIso: ts(),
368
+ durationMs: result.durationMs, summary: result.summary, lastResult: result,
369
+ sweepToken, dryRun,
370
+ });
310
371
  return result;
311
372
  } catch (e) {
312
- if (!dryRun) {
313
- _writeSweepState({
314
- status: 'failed', startedAt, completedAt: Date.now(), completedAtIso: ts(),
315
- error: e && e.message ? e.message : String(e),
316
- });
317
- }
373
+ _writeSweepState({
374
+ status: 'failed', startedAt, completedAt: Date.now(), completedAtIso: ts(),
375
+ error: e && e.message ? e.message : String(e),
376
+ sweepToken, dryRun,
377
+ });
318
378
  throw e;
319
379
  }
320
380
  }
@@ -419,7 +479,10 @@ function staleGuardMs(entryCount) {
419
479
  module.exports = {
420
480
  runKbSweep,
421
481
  staleGuardMs,
482
+ readSweepLiveness,
422
483
  KB_SWEEP_STATE_PATH,
484
+ KB_SWEEP_LOG_PATH,
485
+ KB_SWEEP_RUNNER_PATH,
423
486
  // Exported for tests
424
487
  _hashEntry,
425
488
  _parseFrontmatter,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.1930",
3
+ "version": "0.1.1932",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"