@yemi33/minions 0.1.1577 → 0.1.1579

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.1579 (2026-04-28)
4
+
5
+ ### Features
6
+ - hash-dedup, compress+normalize pass, dynamic stale-guard, rich result
7
+
8
+ ### Other
9
+ - Keep CC streams reconnectable
10
+
3
11
  ## 0.1.1577 (2026-04-27)
4
12
 
5
13
  ### Fixes
@@ -94,6 +94,13 @@ function _ccFindPinTarget(query) {
94
94
  function ccAbort() {
95
95
  var tab = _ccActiveTab();
96
96
  if (tab && tab._abortController) {
97
+ try {
98
+ fetch('/api/command-center/abort', {
99
+ method: 'POST',
100
+ headers: { 'Content-Type': 'application/json' },
101
+ body: JSON.stringify({ tabId: tab.id })
102
+ }).catch(function() {});
103
+ } catch {}
97
104
  tab._abortController.abort();
98
105
  tab._abortController = null;
99
106
  }
@@ -211,6 +218,13 @@ function ccCloseTab(id) {
211
218
  var closingTab = _ccTabs.find(function(t) { return t.id === id; });
212
219
  if (closingTab && closingTab._sending) {
213
220
  if (!confirm('This tab has an active request. Close anyway?')) return;
221
+ try {
222
+ fetch('/api/command-center/abort', {
223
+ method: 'POST',
224
+ headers: { 'Content-Type': 'application/json' },
225
+ body: JSON.stringify({ tabId: id })
226
+ }).catch(function() {});
227
+ } catch {}
214
228
  if (closingTab._abortController) { closingTab._abortController.abort(); closingTab._abortController = null; }
215
229
  closingTab._sending = false;
216
230
  closingTab._queue = [];
@@ -445,6 +459,8 @@ async function _ccDoSend(message, skipUserMsg, forceTabId) {
445
459
  if (existingQueueEl) existingQueueEl.remove();
446
460
 
447
461
  var ccStartTime = Date.now();
462
+ var reconnectAttempts = 0;
463
+ var streamStatusNote = '';
448
464
  var phases = [
449
465
  [0, 'Thinking...'],
450
466
  [3000, 'Reading minions context...'],
@@ -511,6 +527,9 @@ async function _ccDoSend(message, skipUserMsg, forceTabId) {
511
527
  if (streamedText) {
512
528
  html += renderMd(streamedText);
513
529
  }
530
+ if (streamStatusNote) {
531
+ html += '<div style="margin-top:6px;font-size:10px;color:var(--muted)">' + escHtml(streamStatusNote) + '</div>';
532
+ }
514
533
  html += '<div style="margin-top:' + (streamedText ? '6px' : '0') + '">' + _getThinkingHtml() + '</div>';
515
534
  streamDiv.innerHTML = html;
516
535
  // Re-append queue indicators so they stay below the streaming content
@@ -531,35 +550,70 @@ async function _ccDoSend(message, skipUserMsg, forceTabId) {
531
550
  var phaseTimer = setInterval(updateStreamDiv, 1000);
532
551
  updateStreamDiv(); // render proper layout immediately (not raw "Thinking..." text)
533
552
 
534
- try {
535
- // Stream response via SSE — shows text as it arrives
553
+ async function _ccConsumeStream(requestBody, isReconnect) {
536
554
  var res = await fetch('/api/command-center/stream', {
537
555
  method: 'POST', headers: { 'Content-Type': 'application/json' },
538
- body: JSON.stringify({ message: message, tabId: activeTabId, sessionId: activeTab.sessionId || null }),
556
+ body: JSON.stringify(requestBody),
539
557
  signal: activeTab._abortController ? activeTab._abortController.signal : AbortSignal.timeout(960000)
540
558
  });
541
559
 
542
560
  if (!res.ok) {
543
- // 429 = server still releasing previous request (abort race) retry silently up to 3 times
544
- if (res.status === 429 && (!activeTab._429retries || activeTab._429retries < 3)) {
561
+ if (!isReconnect && res.status === 429 && (!activeTab._429retries || activeTab._429retries < 3)) {
545
562
  activeTab._429retries = (activeTab._429retries || 0) + 1;
546
- _cleanupStreamDiv(); // remove current thinking div — prevents stacking on each retry
547
563
  await new Promise(function(r) { setTimeout(r, 1500); });
548
- return await _ccDoSend(message, true, forceTabId || activeTabId); // retry pass tabId so timer closures don't fight
564
+ return await _ccConsumeStream({ message: message, tabId: activeTabId, sessionId: activeTab.sessionId || null }, false);
549
565
  }
550
566
  activeTab._429retries = 0;
551
- _cleanupStreamDiv();
552
567
  var errText = await res.text();
553
- addMsg('assistant', '<span style="color:var(--red)">' + escHtml(errText || 'CC error') + '</span>' +
554
- (errText.includes('busy') ? ' <button onclick="ccNewTab()" style="margin-top:4px;padding:3px 10px;background:var(--surface2);border:1px solid var(--border);border-radius:4px;color:var(--blue);cursor:pointer;font-size:10px">Reset CC</button>' : ''));
555
- return;
568
+ if (isReconnect && res.status === 409) return { interrupted: true, reconnectable: false, reason: errText || 'No live stream' };
569
+ throw new Error(errText || 'CC error');
556
570
  }
557
571
 
572
+ activeTab._429retries = 0;
573
+ streamStatusNote = '';
574
+ updateStreamDiv();
575
+
558
576
  var reader = res.body.getReader();
559
577
  var decoder = new TextDecoder();
560
578
  var buf = '';
561
579
  var terminalEventSeen = false;
562
580
 
581
+ async function _handleEvent(evt) {
582
+ if (evt.type === 'chunk') {
583
+ streamedText = evt.text;
584
+ if (activeTab) activeTab._streamedText = streamedText;
585
+ updateStreamDiv();
586
+ } else if (evt.type === 'heartbeat') {
587
+ return;
588
+ } else if (evt.type === 'tool') {
589
+ toolsUsed.push({ name: evt.name, input: evt.input || {} });
590
+ if (activeTab) activeTab._toolsUsed = toolsUsed.slice();
591
+ updateStreamDiv();
592
+ if (msgs.scrollHeight - msgs.scrollTop - msgs.clientHeight < 150) msgs.scrollTop = msgs.scrollHeight;
593
+ } else if (evt.type === 'done') {
594
+ terminalEventSeen = true;
595
+ _cleanupStreamDiv();
596
+ if (evt.sessionReset) {
597
+ addMsg('system', '<div style="text-align:center;padding:6px 12px;font-size:11px;color:var(--muted);background:var(--surface2);border-radius:6px;margin:4px 0">Minions was updated — started a fresh session with latest context.</div>', false, activeTabId);
598
+ }
599
+ var rendered = renderMd(evt.text || streamedText || '');
600
+ addMsg('assistant', rendered + _ccElapsedFooter('{seconds}s'));
601
+ if (evt.sessionId !== undefined) {
602
+ var originTab = _ccTabs.find(function(t) { return t.id === activeTabId; });
603
+ if (originTab) { originTab.sessionId = evt.sessionId || null; }
604
+ ccSaveState(); ccUpdateSessionIndicator();
605
+ }
606
+ if (evt.actions && evt.actions.length > 0) {
607
+ _tagServerExecuted(evt.actions, evt.actionResults);
608
+ for (var ai = 0; ai < evt.actions.length; ai++) { await ccExecuteAction(evt.actions[ai], activeTabId); }
609
+ }
610
+ } else if (evt.type === 'error') {
611
+ terminalEventSeen = true;
612
+ _cleanupStreamDiv();
613
+ addMsg('assistant', '<span style="color:var(--red)">' + escHtml(evt.error) + '</span>');
614
+ }
615
+ }
616
+
563
617
  while (true) {
564
618
  var readResult = await reader.read();
565
619
  if (readResult.done) break;
@@ -569,93 +623,55 @@ async function _ccDoSend(message, skipUserMsg, forceTabId) {
569
623
  for (var li = 0; li < lines.length; li++) {
570
624
  var line = lines[li];
571
625
  if (!line.startsWith('data: ')) continue;
572
- try {
573
- var evt = JSON.parse(line.slice(6));
574
- if (evt.type === 'chunk') {
575
- streamedText = evt.text;
576
- if (activeTab) activeTab._streamedText = streamedText;
577
- updateStreamDiv();
578
- } else if (evt.type === 'heartbeat') {
579
- continue;
580
- } else if (evt.type === 'tool') {
581
- toolsUsed.push({ name: evt.name, input: evt.input || {} });
582
- if (activeTab) activeTab._toolsUsed = toolsUsed.slice();
583
- updateStreamDiv();
584
- if (msgs.scrollHeight - msgs.scrollTop - msgs.clientHeight < 150) msgs.scrollTop = msgs.scrollHeight;
585
- } else if (evt.type === 'done') {
586
- terminalEventSeen = true;
587
- _cleanupStreamDiv();
588
- // If system prompt changed, show a notice before the response
589
- if (evt.sessionReset) {
590
- addMsg('system', '<div style="text-align:center;padding:6px 12px;font-size:11px;color:var(--muted);background:var(--surface2);border-radius:6px;margin:4px 0">Minions was updated — started a fresh session with latest context.</div>', false, activeTabId);
591
- }
592
- // placeholder was added with skipSave=true — nothing to pop
593
- var rendered = renderMd(evt.text || streamedText || '');
594
- addMsg('assistant', rendered + _ccElapsedFooter('{seconds}s'));
595
- if (evt.sessionId !== undefined) {
596
- // Save session to the originating tab, not whichever tab is active now
597
- var originTab = _ccTabs.find(function(t) { return t.id === activeTabId; });
598
- if (originTab) { originTab.sessionId = evt.sessionId || null; }
599
- ccSaveState(); ccUpdateSessionIndicator();
600
- }
601
- if (evt.actions && evt.actions.length > 0) {
602
- _tagServerExecuted(evt.actions, evt.actionResults);
603
- for (var ai = 0; ai < evt.actions.length; ai++) { await ccExecuteAction(evt.actions[ai], activeTabId); }
604
- }
605
- } else if (evt.type === 'error') {
606
- terminalEventSeen = true;
607
- _cleanupStreamDiv();
608
- // placeholder was skipSave — no pop needed
609
- addMsg('assistant', '<span style="color:var(--red)">' + escHtml(evt.error) + '</span>');
610
- }
611
- } catch { /* incomplete JSON */ }
626
+ try { await _handleEvent(JSON.parse(line.slice(6))); } catch {}
612
627
  }
613
628
  }
614
- // Process any remaining buffered data after stream ends
615
629
  if (buf.trim()) {
616
630
  var remainingLines = buf.split('\n');
617
631
  for (var ri = 0; ri < remainingLines.length; ri++) {
618
632
  var rline = remainingLines[ri];
619
633
  if (!rline.startsWith('data: ')) continue;
620
- try {
621
- var revt = JSON.parse(rline.slice(6));
622
- if (revt.type === 'done') {
623
- terminalEventSeen = true;
624
- _cleanupStreamDiv();
625
- // placeholder was skipSave — no pop needed
626
- var rendered2 = renderMd(revt.text || streamedText || '');
627
- addMsg('assistant', rendered2 + _ccElapsedFooter('{seconds}s'));
628
- if (revt.sessionId !== undefined) {
629
- var originTab2 = _ccTabs.find(function(t) { return t.id === activeTabId; });
630
- if (originTab2) { originTab2.sessionId = revt.sessionId || null; }
631
- ccSaveState(); ccUpdateSessionIndicator();
632
- }
633
- if (revt.actions && revt.actions.length > 0) {
634
- _tagServerExecuted(revt.actions, revt.actionResults);
635
- for (var ai2 = 0; ai2 < revt.actions.length; ai2++) { await ccExecuteAction(revt.actions[ai2], activeTabId); }
636
- }
637
- } else if (revt.type === 'heartbeat') {
638
- continue;
639
- } else if (revt.type === 'error') {
640
- terminalEventSeen = true;
641
- _cleanupStreamDiv();
642
- addMsg('assistant', '<span style="color:var(--red)">' + escHtml(revt.error) + '</span>');
643
- } else if (revt.type === 'chunk') {
644
- streamedText = revt.text;
645
- updateStreamDiv();
646
- }
647
- } catch {}
634
+ try { await _handleEvent(JSON.parse(rline.slice(6))); } catch {}
648
635
  }
649
636
  }
650
- // If stream ended without a 'done' event, finalize with whatever we have
651
- if (!terminalEventSeen && (streamDiv.parentNode || document.getElementById('cc-restore-thinking') || document.querySelector('[data-stream-tab="' + activeTabId + '"]'))) {
652
- _cleanupStreamDiv();
653
- var streamEndedHint = '<div style="font-size:10px;color:var(--muted);margin-top:4px">The response stream ended before completion. Retry to continue from the last user message.</div>';
654
- if (streamedText) {
655
- addMsg('assistant', renderMd(streamedText) + _ccElapsedFooter('Stream interrupted after {seconds}s') + _ccRetryControls(streamEndedHint, false));
656
- } else {
657
- addMsg('assistant', '<span style="color:var(--red)">The response stream ended before completion.</span>' + _ccRetryControls(streamEndedHint, false));
637
+ return { interrupted: !terminalEventSeen, reconnectable: true };
638
+ }
639
+
640
+ try {
641
+ while (true) {
642
+ var consume = await _ccConsumeStream(
643
+ reconnectAttempts === 0
644
+ ? { message: message, tabId: activeTabId, sessionId: activeTab.sessionId || null }
645
+ : { tabId: activeTabId, sessionId: activeTab.sessionId || null, reconnect: true },
646
+ reconnectAttempts > 0
647
+ );
648
+ if (!consume.interrupted) break;
649
+ if (!consume.reconnectable || reconnectAttempts >= 2) {
650
+ _cleanupStreamDiv();
651
+ var streamEndedHint = '<div style="font-size:10px;color:var(--muted);margin-top:4px">The response stream ended before completion. Retry to continue from the last user message.</div>';
652
+ if (streamedText) {
653
+ addMsg('assistant', renderMd(streamedText) + _ccElapsedFooter('Stream interrupted after {seconds}s') + _ccRetryControls(streamEndedHint, false));
654
+ } else {
655
+ addMsg('assistant', '<span style="color:var(--red)">The response stream ended before completion.</span>' + _ccRetryControls(streamEndedHint, false));
656
+ }
657
+ break;
658
+ }
659
+ var reconnectHealth = await _ccDashboardHealth();
660
+ if (!reconnectHealth.reachable || reconnectHealth.restarted) {
661
+ _cleanupStreamDiv();
662
+ var reconnectHint = reconnectHealth.restarted
663
+ ? '<div style="font-size:10px;color:var(--muted);margin-top:4px">Dashboard restarted while this response was streaming. Reload the page to reconnect to the new instance.</div>'
664
+ : '<div style="font-size:10px;color:var(--muted);margin-top:4px">The request stream was interrupted, but the dashboard is still reachable. Retry or start a new session.</div>';
665
+ addMsg('assistant', (streamedText ? renderMd(streamedText) + _ccElapsedFooter('Stream interrupted after {seconds}s') : '') +
666
+ _ccRetryControls(reconnectHint, reconnectHealth.restarted));
667
+ break;
658
668
  }
669
+ reconnectAttempts++;
670
+ toolsUsed = [];
671
+ if (activeTab) activeTab._toolsUsed = [];
672
+ streamStatusNote = 'Connection interrupted — reattaching to the live response...';
673
+ updateStreamDiv();
674
+ await new Promise(function(r) { setTimeout(r, 1000 * reconnectAttempts); });
659
675
  }
660
676
  } catch (e) {
661
677
  _cleanupStreamDiv();
@@ -1,5 +1,11 @@
1
1
  // render-kb.js — Knowledge base rendering functions extracted from dashboard.html
2
2
 
3
+ function _formatBytes(n) {
4
+ if (n < 1024) return n + ' B';
5
+ if (n < 1024 * 1024) return (n / 1024).toFixed(0) + ' KB';
6
+ return (n / 1024 / 1024).toFixed(1) + ' MB';
7
+ }
8
+
3
9
  const KB_CAT_LABELS = {
4
10
  architecture: 'Architecture', conventions: 'Conventions',
5
11
  'project-notes': 'Project Notes', 'build-reports': 'Build Reports',
@@ -175,7 +181,18 @@ async function kbSweep() {
175
181
  if (result && result.ok) {
176
182
  btn.textContent = 'done';
177
183
  btn.style.color = 'var(--green)';
178
- showToast('cmd-toast', 'KB sweep complete: ' + (result.summary || 'done'), true);
184
+ // Rich summary toast show the key counts inline; full breakdown via console.log for now
185
+ var bytesSaved = (result.bytesBefore || 0) - (result.bytesAfter || 0);
186
+ var pieces = [];
187
+ if (result.entriesBefore != null) pieces.push((result.entriesBefore - (result.entriesAfter || 0)) + ' entries removed');
188
+ if (result.hashDuplicatesArchived) pieces.push(result.hashDuplicatesArchived + ' hash-dup');
189
+ if (result.llmDuplicatesArchived) pieces.push(result.llmDuplicatesArchived + ' llm-dup');
190
+ if (result.staleRemoved) pieces.push(result.staleRemoved + ' stale');
191
+ if (result.reclassified) pieces.push(result.reclassified + ' reclassified');
192
+ if (result.rewritten) pieces.push(result.rewritten + ' rewritten');
193
+ if (bytesSaved > 0) pieces.push(_formatBytes(bytesSaved) + ' saved');
194
+ var msg = pieces.length ? 'KB sweep: ' + pieces.join(' · ') : 'KB sweep: ' + (result.summary || 'done');
195
+ showToast('cmd-toast', msg, true);
179
196
  refreshKnowledgeBase();
180
197
  } else {
181
198
  btn.style.color = 'var(--red)';
package/dashboard.js CHANGED
@@ -544,10 +544,86 @@ const CC_SESSION_TTL_MS = shared.ENGINE_DEFAULTS.ccSessionTtlMs;
544
544
  let ccSession = { sessionId: null, createdAt: null, lastActiveAt: null, turnCount: 0 };
545
545
  const ccInFlightTabs = new Map(); // tabId → timestamp — per-tab in-flight tracking for parallel CC requests
546
546
  const ccInFlightAborts = new Map(); // tabId → abortFn — lets a new request kill the stale LLM
547
+ const ccLiveStreams = new Map(); // tabId → buffered live stream state for reconnect-after-disconnect
547
548
  const CC_INFLIGHT_TIMEOUT_MS = 2 * 60 * 1000; // 2 minutes — auto-release if request hangs
548
549
  const CC_LOCK_WAIT_MS = 200; // grace period for previous handler's finally to release lock
549
550
  const CC_STREAM_HEARTBEAT_MS = 15000; // keep streaming responses alive across proxies/restart races
551
+ const CC_STREAM_REATTACH_GRACE_MS = 60000; // keep CC job alive briefly after disconnect so the UI can reattach
552
+ const CC_STREAM_DONE_RETENTION_MS = 30000; // retain final payload briefly so reconnect can still receive it
550
553
  function _releaseCCTab(tabId) { ccInFlightTabs.delete(tabId); ccInFlightAborts.delete(tabId); }
554
+ function _getCcLiveStream(tabId) {
555
+ return ccLiveStreams.get(tabId) || null;
556
+ }
557
+ function _clearCcLiveTimers(tabId) {
558
+ const state = _getCcLiveStream(tabId);
559
+ if (!state) return;
560
+ if (state.abortTimer) {
561
+ clearTimeout(state.abortTimer);
562
+ state.abortTimer = null;
563
+ }
564
+ if (state.cleanupTimer) {
565
+ clearTimeout(state.cleanupTimer);
566
+ state.cleanupTimer = null;
567
+ }
568
+ }
569
+ function _clearCcLiveStream(tabId) {
570
+ const state = _getCcLiveStream(tabId);
571
+ if (!state) return;
572
+ _clearCcLiveTimers(tabId);
573
+ ccLiveStreams.delete(tabId);
574
+ }
575
+ function _ensureCcLiveStream(tabId) {
576
+ let state = _getCcLiveStream(tabId);
577
+ if (state) return state;
578
+ state = {
579
+ tabId,
580
+ text: '',
581
+ tools: [],
582
+ donePayload: null,
583
+ writer: null,
584
+ endResponse: null,
585
+ abortFn: null,
586
+ abortTimer: null,
587
+ cleanupTimer: null,
588
+ };
589
+ ccLiveStreams.set(tabId, state);
590
+ return state;
591
+ }
592
+ function _attachCcLiveStream(tabId, writer, endResponse) {
593
+ const state = _ensureCcLiveStream(tabId);
594
+ _clearCcLiveTimers(tabId);
595
+ state.writer = writer;
596
+ state.endResponse = endResponse;
597
+ return state;
598
+ }
599
+ function _detachCcLiveStream(tabId, writer) {
600
+ const state = _getCcLiveStream(tabId);
601
+ if (!state) return;
602
+ if (!writer || state.writer === writer) {
603
+ state.writer = null;
604
+ state.endResponse = null;
605
+ }
606
+ }
607
+ function _scheduleCcLiveAbort(tabId) {
608
+ const state = _getCcLiveStream(tabId);
609
+ if (!state || state.donePayload) return;
610
+ _clearCcLiveTimers(tabId);
611
+ state.abortTimer = setTimeout(() => {
612
+ const live = _getCcLiveStream(tabId);
613
+ if (!live || live.donePayload || live.writer) return;
614
+ try { if (live.abortFn) live.abortFn(); } catch {}
615
+ }, CC_STREAM_REATTACH_GRACE_MS);
616
+ }
617
+ function _scheduleCcLiveCleanup(tabId, delayMs = CC_STREAM_DONE_RETENTION_MS) {
618
+ const state = _getCcLiveStream(tabId);
619
+ if (!state) return;
620
+ if (state.cleanupTimer) clearTimeout(state.cleanupTimer);
621
+ state.cleanupTimer = setTimeout(() => {
622
+ const live = _getCcLiveStream(tabId);
623
+ if (!live || live.writer) return;
624
+ _clearCcLiveStream(tabId);
625
+ }, delayMs);
626
+ }
551
627
  function _ccTabIsInFlight(tabId) {
552
628
  if (!ccInFlightTabs.has(tabId)) return false;
553
629
  // Auto-release stale locks — if a request has been in-flight longer than CC_INFLIGHT_TIMEOUT_MS,
@@ -2577,183 +2653,31 @@ const server = http.createServer(async (req, res) => {
2577
2653
  }
2578
2654
 
2579
2655
  async function handleKnowledgeSweep(req, res) {
2580
- // Auto-release stale guard after 5 min (LLM may have hung)
2581
- if (global._kbSweepInFlight && global._kbSweepStartedAt && Date.now() - global._kbSweepStartedAt > 300000) {
2582
- console.log('[kb-sweep] Auto-releasing stale guard (>5min)');
2656
+ // Auto-release stale guard dynamic floor based on KB size (30 min min, +1s per entry)
2657
+ const { staleGuardMs } = require('./engine/kb-sweep');
2658
+ const entryCount = (queries.getKnowledgeBaseEntries() || []).length;
2659
+ const guardMs = staleGuardMs(entryCount);
2660
+ if (global._kbSweepInFlight && global._kbSweepStartedAt && Date.now() - global._kbSweepStartedAt > guardMs) {
2661
+ console.log(`[kb-sweep] Auto-releasing stale guard (>${Math.round(guardMs / 60000)}min for ${entryCount} entries)`);
2583
2662
  global._kbSweepInFlight = false;
2584
2663
  }
2585
2664
  if (global._kbSweepInFlight) {
2586
2665
  return jsonReply(res, 200, { ok: true, alreadyRunning: true, startedAt: global._kbSweepStartedAt });
2587
2666
  }
2588
- // Generation token prevents stale finally blocks from clearing the flag for a new sweep
2589
2667
  const sweepToken = Date.now() + Math.random();
2590
2668
  global._kbSweepToken = sweepToken;
2591
2669
  global._kbSweepInFlight = true;
2592
2670
  global._kbSweepStartedAt = Date.now();
2593
2671
  const body = await readBody(req).catch(() => ({}));
2594
- // Run sweep in background — return immediately so agents/UI don't time out
2595
2672
  _runKbSweepBackground(body, sweepToken);
2596
2673
  return jsonReply(res, 202, { ok: true, started: true });
2597
2674
  }
2598
2675
 
2599
2676
  async function _runKbSweepBackground(body, sweepToken) {
2600
2677
  try {
2601
- const entries = getKnowledgeBaseEntries();
2602
- if (entries.length < 2) {
2603
- global._kbSweepLastResult = { ok: true, summary: 'nothing to sweep (< 2 entries)' };
2604
- global._kbSweepLastCompletedAt = Date.now();
2605
- return;
2606
- }
2607
-
2608
- // Build a manifest of all KB entries with their content (skip pinned — user wants to keep them)
2609
- const requestPinnedKeys = Array.isArray(body.pinnedKeys)
2610
- ? body.pinnedKeys.filter(k => typeof k === 'string' && k.startsWith('knowledge/'))
2611
- : [];
2612
- const serverPinnedKeys = shared.getPinnedItems().filter(k => k.startsWith('knowledge/'));
2613
- const pinnedKeys = new Set([...serverPinnedKeys, ...requestPinnedKeys]);
2614
- const manifest = [];
2615
- for (const e of entries) {
2616
- if (pinnedKeys.has('knowledge/' + e.cat + '/' + e.file)) continue;
2617
- const content = safeRead(path.join(MINIONS_DIR, 'knowledge', e.cat, e.file));
2618
- if (!content) continue;
2619
- manifest.push({ category: e.cat, file: e.file, title: e.title, agent: e.agent, date: e.date, content: content.slice(0, 3000) });
2620
- }
2621
- if (manifest.length < 2) {
2622
- global._kbSweepLastResult = { ok: true, summary: 'nothing to sweep (< 2 unpinned entries)' };
2623
- global._kbSweepLastCompletedAt = Date.now();
2624
- return;
2625
- }
2626
-
2627
- const { callLLM, trackEngineUsage } = require('./engine/llm');
2628
- const BATCH_SIZE = 30; // ~30 entries per batch to stay within Haiku context
2629
- const batches = [];
2630
- for (let i = 0; i < manifest.length; i += BATCH_SIZE) {
2631
- batches.push(manifest.slice(i, i + BATCH_SIZE));
2632
- }
2633
-
2634
- const plan = { duplicates: [], reclassify: [], remove: [] };
2635
- for (let b = 0; b < batches.length; b++) {
2636
- const batch = batches[b];
2637
- const offset = b * BATCH_SIZE;
2638
- const prompt = `You are a knowledge base curator. Analyze these ${batch.length} entries (batch ${b + 1}/${batches.length}, indices ${offset}-${offset + batch.length - 1}) and produce a cleanup plan.
2639
-
2640
- ## Entries
2641
-
2642
- ${batch.map((m, i) => `[${offset + i}] ${m.category}/${m.file} | ${m.title} | ${m.date} | ${m.agent || '?'} | ${(m.content || '').slice(0, 200).replace(/\n/g, ' ')}`).join('\n')}
2643
-
2644
- ## Instructions
2645
-
2646
- 1. **Find duplicates**: entries with substantially the same content (same findings, different agents/runs). List pairs by index. Prefer keeping the more recent entry.
2647
- 2. **Find misclassified**: entries in the wrong category.
2648
- 3. **Find stale/empty**: entries with no actionable content (boilerplate, bail-out notes, "no changes needed").
2649
-
2650
- Respond with ONLY valid JSON: { "duplicates": [{ "keep": N, "remove": [N], "reason": "..." }], "reclassify": [{ "index": N, "from": "cat", "to": "cat", "reason": "..." }], "remove": [{ "index": N, "reason": "..." }] }
2651
- If nothing to do: { "duplicates": [], "reclassify": [], "remove": [] }`;
2652
-
2653
- const result = await callLLM(prompt, 'Output only JSON.', {
2654
- timeout: 120000, label: 'kb-sweep', model: 'haiku', maxTurns: 1, direct: true
2655
- });
2656
- trackEngineUsage('kb-sweep', result.usage);
2657
-
2658
- let batchPlan;
2659
- try {
2660
- let jsonStr = (result.text || '').trim();
2661
- const fenceMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
2662
- if (fenceMatch) jsonStr = fenceMatch[1].trim();
2663
- batchPlan = JSON.parse(jsonStr);
2664
- } catch {
2665
- console.log(`[kb-sweep] batch ${b + 1}/${batches.length} returned invalid JSON, skipping`);
2666
- continue;
2667
- }
2668
- if (batchPlan.duplicates) plan.duplicates.push(...batchPlan.duplicates);
2669
- if (batchPlan.reclassify) plan.reclassify.push(...batchPlan.reclassify);
2670
- if (batchPlan.remove) plan.remove.push(...batchPlan.remove);
2671
- }
2672
-
2673
- let removed = 0, reclassified = 0, merged = 0;
2674
- const kbDir = path.join(MINIONS_DIR, 'knowledge');
2675
-
2676
- // If nothing to do, store result and return
2677
- const totalActions = (plan.remove || []).length + (plan.duplicates || []).reduce((n, d) => n + (d.remove || []).length, 0) + (plan.reclassify || []).length;
2678
- if (totalActions === 0) {
2679
- global._kbSweepLastResult = { ok: true, summary: 'KB is clean — nothing to sweep', plan };
2680
- global._kbSweepLastCompletedAt = Date.now();
2681
- return;
2682
- }
2683
-
2684
- // Archive dir for swept files (never delete, always preserve)
2685
- const kbArchiveDir = path.join(kbDir, '_swept');
2686
- if (!fs.existsSync(kbArchiveDir)) fs.mkdirSync(kbArchiveDir, { recursive: true });
2687
-
2688
- function archiveKbFile(filePath, reason) {
2689
- if (!fs.existsSync(filePath)) return;
2690
- const basename = path.basename(filePath);
2691
- const destPath = shared.uniquePath(path.join(kbArchiveDir, basename));
2692
- try {
2693
- const content = safeRead(filePath);
2694
- if (content === null) return; // don't delete if we can't read
2695
- const meta = `<!-- swept: ${new Date().toISOString()} | reason: ${reason} -->\n`;
2696
- safeWrite(destPath, meta + content);
2697
- safeUnlink(filePath);
2698
- } catch (e) { console.error('kb archive:', e.message); }
2699
- }
2700
-
2701
- // Process removals (stale/empty) — archive, not delete
2702
- for (const r of (plan.remove || [])) {
2703
- const entry = manifest[r.index];
2704
- if (!entry) continue;
2705
- const fp = path.join(kbDir, entry.category, entry.file);
2706
- archiveKbFile(fp, 'stale: ' + (r.reason || ''));
2707
- removed++;
2708
- }
2709
-
2710
- // Process duplicates — archive the duplicates, keep the primary
2711
- for (const d of (plan.duplicates || [])) {
2712
- for (const idx of (d.remove || [])) {
2713
- const entry = manifest[idx];
2714
- if (!entry) continue;
2715
- const fp = path.join(kbDir, entry.category, entry.file);
2716
- archiveKbFile(fp, 'duplicate of index ' + d.keep + ': ' + (d.reason || ''));
2717
- merged++;
2718
- }
2719
- }
2720
-
2721
- // Process reclassifications (move between categories)
2722
- for (const r of (plan.reclassify || [])) {
2723
- const entry = manifest[r.index];
2724
- if (!entry || !shared.KB_CATEGORIES.includes(r.to)) continue;
2725
- const srcPath = path.join(kbDir, entry.category, entry.file);
2726
- const destDir = path.join(kbDir, r.to);
2727
- if (!fs.existsSync(srcPath)) continue;
2728
- if (!fs.existsSync(destDir)) fs.mkdirSync(destDir, { recursive: true });
2729
- try {
2730
- const srcStats = fs.statSync(srcPath);
2731
- const content = safeRead(srcPath);
2732
- const updated = content.replace(/^(category:\s*).+$/m, `$1${r.to}`);
2733
- const destPath = path.join(destDir, entry.file);
2734
- safeWrite(destPath, updated);
2735
- fs.utimesSync(destPath, srcStats.atime, srcStats.mtime);
2736
- safeUnlink(srcPath);
2737
- reclassified++;
2738
- } catch (e) { console.error('kb reclassify:', e.message); }
2739
- }
2740
-
2741
- // Prune swept files older than 30 days
2742
- let pruned = 0;
2743
- const SWEPT_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
2744
- try {
2745
- for (const f of fs.readdirSync(kbArchiveDir)) {
2746
- const fp = path.join(kbArchiveDir, f);
2747
- try {
2748
- if (Date.now() - fs.statSync(fp).mtimeMs > SWEPT_RETENTION_MS) { safeUnlink(fp); pruned++; }
2749
- } catch { /* cleanup */ }
2750
- }
2751
- } catch { /* optional */ }
2752
-
2753
- const summary = `${merged} duplicates merged, ${removed} stale removed, ${reclassified} reclassified${pruned ? ', ' + pruned + ' old swept files pruned' : ''}`;
2754
- safeWrite(path.join(ENGINE_DIR, 'kb-swept.json'), JSON.stringify({ timestamp: new Date().toISOString(), summary }));
2755
- queries.invalidateKnowledgeBaseCache();
2756
- global._kbSweepLastResult = { ok: true, summary, plan };
2678
+ const { runKbSweep } = require('./engine/kb-sweep');
2679
+ const result = await runKbSweep({ pinnedKeys: body.pinnedKeys });
2680
+ global._kbSweepLastResult = result;
2757
2681
  global._kbSweepLastCompletedAt = Date.now();
2758
2682
  } catch (e) {
2759
2683
  console.error('[kb-sweep] background error:', e.message);
@@ -2762,6 +2686,7 @@ If nothing to do: { "duplicates": [], "reclassify": [], "remove": [] }`;
2762
2686
  } finally { if (global._kbSweepToken === sweepToken) global._kbSweepInFlight = false; }
2763
2687
  }
2764
2688
 
2689
+
2765
2690
  function handleKnowledgeSweepStatus(req, res) {
2766
2691
  return jsonReply(res, 200, {
2767
2692
  inFlight: !!global._kbSweepInFlight,
@@ -4051,10 +3976,31 @@ What would you like to discuss or change? When you're happy, say "approve" and I
4051
3976
  async function handleCommandCenterNewSession(req, res) {
4052
3977
  ccSession = { sessionId: null, createdAt: null, lastActiveAt: null, turnCount: 0 };
4053
3978
  ccInFlightTabs.clear(); // Reset all in-flight guards
3979
+ for (const [tabId, live] of ccLiveStreams.entries()) {
3980
+ try { if (live.abortFn) live.abortFn(); } catch {}
3981
+ _clearCcLiveStream(tabId);
3982
+ }
4054
3983
  safeWrite(path.join(ENGINE_DIR, 'cc-session.json'), ccSession);
4055
3984
  return jsonReply(res, 200, { ok: true });
4056
3985
  }
4057
3986
 
3987
+ async function handleCommandCenterAbort(req, res) {
3988
+ try {
3989
+ const body = await readBody(req);
3990
+ const tabId = body.tabId || 'default';
3991
+ const live = _getCcLiveStream(tabId);
3992
+ if (live?.abortFn) {
3993
+ try { live.abortFn(); } catch {}
3994
+ } else {
3995
+ const abort = ccInFlightAborts.get(tabId);
3996
+ if (abort) { try { abort(); } catch {} }
3997
+ }
3998
+ _clearCcLiveStream(tabId);
3999
+ _releaseCCTab(tabId);
4000
+ return jsonReply(res, 200, { ok: true });
4001
+ } catch (e) { return jsonReply(res, 400, { error: e.message }); }
4002
+ }
4003
+
4058
4004
  async function handleCCSessionsList(req, res) {
4059
4005
  const sessions = _readCcTabSessions();
4060
4006
  return jsonReply(res, 200, { sessions });
@@ -4188,8 +4134,51 @@ What would you like to discuss or change? When you're happy, say "approve" and I
4188
4134
  };
4189
4135
  try {
4190
4136
  const body = await readBody(req);
4191
- if (!body.message) { res.statusCode = 400; res.end('message required'); return; }
4137
+ if (!body.message && !body.reconnect) { res.statusCode = 400; res.end('message required'); return; }
4192
4138
  tabId = body.tabId || 'default';
4139
+ if (body.reconnect) {
4140
+ const live = _getCcLiveStream(tabId);
4141
+ if (!live) { res.statusCode = 409; res.end('No live command-center response to reconnect'); return; }
4142
+ res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive' });
4143
+ writeCcEvent({ type: 'heartbeat' });
4144
+ _ccHeartbeatTimer = setInterval(() => {
4145
+ if (_ccStreamEnded) {
4146
+ stopCcHeartbeat();
4147
+ return;
4148
+ }
4149
+ if (!writeCcEvent({ type: 'heartbeat' })) stopCcHeartbeat();
4150
+ }, CC_STREAM_HEARTBEAT_MS);
4151
+ let reconnectDone;
4152
+ const reconnectDonePromise = new Promise(resolve => { reconnectDone = resolve; });
4153
+ _attachCcLiveStream(tabId, writeCcEvent, () => {
4154
+ if (_ccStreamEnded) return;
4155
+ _ccStreamEnded = true;
4156
+ stopCcHeartbeat();
4157
+ try { res.end(); } catch {}
4158
+ reconnectDone();
4159
+ });
4160
+ req.on('close', () => {
4161
+ if (_ccStreamEnded) return;
4162
+ stopCcHeartbeat();
4163
+ _detachCcLiveStream(tabId, writeCcEvent);
4164
+ _scheduleCcLiveAbort(tabId);
4165
+ reconnectDone();
4166
+ });
4167
+ for (const tool of live.tools || []) {
4168
+ writeCcEvent({ type: 'tool', name: tool.name, input: _lightToolInput(tool.input) });
4169
+ }
4170
+ if (live.text) writeCcEvent({ type: 'chunk', text: live.text });
4171
+ if (live.donePayload) {
4172
+ writeCcEvent(live.donePayload);
4173
+ _ccStreamEnded = true;
4174
+ stopCcHeartbeat();
4175
+ try { res.end(); } catch {}
4176
+ _scheduleCcLiveCleanup(tabId);
4177
+ return;
4178
+ }
4179
+ await reconnectDonePromise;
4180
+ return;
4181
+ }
4193
4182
  if (_ccTabIsInFlight(tabId)) {
4194
4183
  // Previous request still in-flight — abort its LLM (handles keep-alive abort where close event didn't fire)
4195
4184
  const prevAbort = ccInFlightAborts.get(tabId);
@@ -4200,6 +4189,13 @@ What would you like to discuss or change? When you're happy, say "approve" and I
4200
4189
  }
4201
4190
  }
4202
4191
  ccInFlightTabs.set(tabId, Date.now());
4192
+ _clearCcLiveStream(tabId);
4193
+ const liveState = _attachCcLiveStream(tabId, writeCcEvent, () => {
4194
+ if (_ccStreamEnded) return;
4195
+ _ccStreamEnded = true;
4196
+ stopCcHeartbeat();
4197
+ try { res.end(); } catch {}
4198
+ });
4203
4199
 
4204
4200
  res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive' });
4205
4201
  writeCcEvent({ type: 'heartbeat' }); // flush headers quickly and keep intermediaries from idling out
@@ -4210,17 +4206,12 @@ What would you like to discuss or change? When you're happy, say "approve" and I
4210
4206
  }
4211
4207
  if (!writeCcEvent({ type: 'heartbeat' })) stopCcHeartbeat();
4212
4208
  }, CC_STREAM_HEARTBEAT_MS);
4213
- // Kill LLM process immediately if client disconnects mid-stream.
4214
- // Guard with !_ccStreamEnded: when the stream ends normally, finally already released the lock;
4215
- // without the guard, this close event (which fires after res.end) could wipe a new request's lock.
4209
+ // Keep the LLM alive briefly after disconnect so the UI can reattach to the same in-flight turn.
4216
4210
  req.on('close', () => {
4217
4211
  if (!_ccStreamEnded) {
4218
4212
  stopCcHeartbeat();
4219
- _releaseCCTab(tabId);
4220
- if (_ccStreamAbort) {
4221
- console.log(`[CC-stream] Client disconnected for tab ${tabId} — aborting LLM`);
4222
- _ccStreamAbort();
4223
- }
4213
+ _detachCcLiveStream(tabId, writeCcEvent);
4214
+ _scheduleCcLiveAbort(tabId);
4224
4215
  }
4225
4216
  });
4226
4217
 
@@ -4257,14 +4248,17 @@ What would you like to discuss or change? When you're happy, say "approve" and I
4257
4248
  onChunk: (text) => {
4258
4249
  const actIdx = findCCActionsDelimiter(text);
4259
4250
  const display = actIdx >= 0 ? text.slice(0, actIdx).trim() : text;
4260
- writeCcEvent({ type: 'chunk', text: display });
4251
+ liveState.text = display;
4252
+ if (liveState.writer) liveState.writer({ type: 'chunk', text: display });
4261
4253
  },
4262
4254
  onToolUse: (name, input) => {
4263
4255
  toolUses.push({ name, input: input || {} });
4264
- writeCcEvent({ type: 'tool', name, input: _lightToolInput(input) });
4256
+ liveState.tools.push({ name, input: input || {} });
4257
+ if (liveState.writer) liveState.writer({ type: 'tool', name, input: _lightToolInput(input) });
4265
4258
  }
4266
4259
  });
4267
4260
  _ccStreamAbort = llmPromise.abort;
4261
+ liveState.abortFn = _ccStreamAbort;
4268
4262
  ccInFlightAborts.set(tabId, _ccStreamAbort);
4269
4263
  const result = await llmPromise;
4270
4264
  trackUsage('command-center', result.usage);
@@ -4278,21 +4272,24 @@ What would you like to discuss or change? When you're happy, say "approve" and I
4278
4272
  toolUses = []; // discard stale metadata from the failed resume attempt
4279
4273
  const retryPromise = callLLMStreaming(freshPrompt, CC_STATIC_SYSTEM_PROMPT, {
4280
4274
  timeout: 900000, label: 'command-center', model: streamModel, maxTurns: ccMaxTurns,
4281
- allowedTools: 'Bash,Read,Write,Edit,Glob,Grep,WebFetch,WebSearch',
4282
- effort: streamEffort, direct: true,
4283
- onChunk: (text) => {
4284
- const actIdx = findCCActionsDelimiter(text);
4285
- const display = actIdx >= 0 ? text.slice(0, actIdx).trim() : text;
4286
- writeCcEvent({ type: 'chunk', text: display });
4287
- },
4288
- onToolUse: (name, input) => {
4289
- toolUses.push({ name, input: input || {} });
4290
- writeCcEvent({ type: 'tool', name, input: _lightToolInput(input) });
4291
- }
4292
- });
4293
- _ccStreamAbort = retryPromise.abort;
4294
- ccInFlightAborts.set(tabId, _ccStreamAbort);
4295
- const retryResult = await retryPromise;
4275
+ allowedTools: 'Bash,Read,Write,Edit,Glob,Grep,WebFetch,WebSearch',
4276
+ effort: streamEffort, direct: true,
4277
+ onChunk: (text) => {
4278
+ const actIdx = findCCActionsDelimiter(text);
4279
+ const display = actIdx >= 0 ? text.slice(0, actIdx).trim() : text;
4280
+ liveState.text = display;
4281
+ if (liveState.writer) liveState.writer({ type: 'chunk', text: display });
4282
+ },
4283
+ onToolUse: (name, input) => {
4284
+ toolUses.push({ name, input: input || {} });
4285
+ liveState.tools.push({ name, input: input || {} });
4286
+ if (liveState.writer) liveState.writer({ type: 'tool', name, input: _lightToolInput(input) });
4287
+ }
4288
+ });
4289
+ _ccStreamAbort = retryPromise.abort;
4290
+ liveState.abortFn = _ccStreamAbort;
4291
+ ccInFlightAborts.set(tabId, _ccStreamAbort);
4292
+ const retryResult = await retryPromise;
4296
4293
  trackUsage('command-center', retryResult.usage);
4297
4294
  if (retryResult.text) {
4298
4295
  // Fresh session succeeded — use retryResult from here
@@ -4305,8 +4302,10 @@ What would you like to discuss or change? When you're happy, say "approve" and I
4305
4302
  const stderrTail = (result.stderr || '').trim().split('\n').filter(Boolean).slice(-3).join(' | ');
4306
4303
  console.error(`[CC-stream] Failed: code=${result.code}, stderr=${(result.stderr || '').slice(0, 500)}, stdout_tail=${(result.raw || '').slice(-500)}`);
4307
4304
  const retryHint = 'Send your message again to retry.';
4308
- writeCcEvent({ type: 'done', text: `I had trouble processing that ${debugInfo}. ${stderrTail ? 'Detail: ' + stderrTail : ''}\n\n${retryHint}`, actions: [], sessionId: null });
4309
- _ccStreamEnded = true; res.end();
4305
+ liveState.donePayload = { type: 'done', text: `I had trouble processing that ${debugInfo}. ${stderrTail ? 'Detail: ' + stderrTail : ''}\n\n${retryHint}`, actions: [], sessionId: null };
4306
+ if (liveState.writer) liveState.writer(liveState.donePayload);
4307
+ if (liveState.endResponse) liveState.endResponse();
4308
+ _scheduleCcLiveCleanup(tabId);
4310
4309
  return;
4311
4310
  }
4312
4311
 
@@ -4348,7 +4347,8 @@ What would you like to discuss or change? When you're happy, say "approve" and I
4348
4347
  }
4349
4348
  const donePayload = { type: 'done', text: displayText, actions, actionResults, sessionId: responseSessionId, newSession: !wasResume };
4350
4349
  if (sessionReset) donePayload.sessionReset = true;
4351
- writeCcEvent(donePayload);
4350
+ liveState.donePayload = donePayload;
4351
+ if (liveState.writer) liveState.writer(donePayload);
4352
4352
 
4353
4353
  // Mirror CC response to Teams (non-blocking, skip Teams-originated)
4354
4354
  const _streamTabId = body.tabId || 'default';
@@ -4356,7 +4356,8 @@ What would you like to discuss or change? When you're happy, say "approve" and I
4356
4356
  teams.teamsPostCCResponse(body.message, result.text).catch(() => {});
4357
4357
  }
4358
4358
 
4359
- _ccStreamEnded = true; res.end();
4359
+ if (liveState.endResponse) liveState.endResponse();
4360
+ _scheduleCcLiveCleanup(tabId);
4360
4361
  } finally {
4361
4362
  stopCcHeartbeat();
4362
4363
  _releaseCCTab(tabId);
@@ -5250,6 +5251,7 @@ What would you like to discuss or change? When you're happy, say "approve" and I
5250
5251
 
5251
5252
  // Command Center
5252
5253
  { method: 'POST', path: '/api/command-center/new-session', desc: 'Clear active CC session', handler: handleCommandCenterNewSession },
5254
+ { method: 'POST', path: '/api/command-center/abort', desc: 'Abort an in-flight CC request for a tab', params: 'tabId?', handler: handleCommandCenterAbort },
5253
5255
  { method: 'POST', path: '/api/command-center', desc: 'Conversational command center with full minions context', params: 'message, sessionId?', handler: handleCommandCenter },
5254
5256
  { method: 'POST', path: '/api/command-center/stream', desc: 'Streaming CC — SSE with text chunks as they arrive', params: 'message, tabId?', handler: handleCommandCenterStream },
5255
5257
  { method: 'GET', path: '/api/cc-sessions', desc: 'List CC session metadata for all tabs', handler: handleCCSessionsList },
@@ -0,0 +1,383 @@
1
+ /**
2
+ * engine/kb-sweep.js — Knowledge base sweep: dedup, compress, normalize.
3
+ *
4
+ * Replaces the inline sweep that lived in dashboard.js. Three passes:
5
+ * 1. Hash-based dedup — cheap, catches cross-batch duplicates
6
+ * 2. LLM batch sweep — finds remaining dupes + reclassify + stale-remove
7
+ * 3. Compress & normalize — per-entry LLM rewrite, flagged via _swept frontmatter
8
+ *
9
+ * Returns a rich summary so the dashboard can show before/after byte counts.
10
+ */
11
+
12
+ const fs = require('fs');
13
+ const path = require('path');
14
+ const crypto = require('crypto');
15
+ const shared = require('./shared');
16
+ const queries = require('./queries');
17
+ const { safeRead, safeWrite, safeUnlink, log, ts } = shared;
18
+ const { MINIONS_DIR, ENGINE_DIR } = queries;
19
+
20
+ const KB_DIR = path.join(MINIONS_DIR, 'knowledge');
21
+ const SWEPT_DIR = path.join(KB_DIR, '_swept');
22
+ const SWEPT_RETENTION_MS = 30 * 24 * 60 * 60 * 1000;
23
+ const COMPRESS_THRESHOLD_BYTES = 5000;
24
+ const LLM_BATCH_SIZE = 30;
25
+ const NORMALIZE_CONCURRENCY = 5;
26
+ const SWEPT_FLAG_KEY = '_swept'; // frontmatter key — entries with this skip the rewrite pass
27
+
28
+ function _hashEntry(content) {
29
+ const normalized = String(content || '').replace(/\s+/g, ' ').trim().slice(0, 500);
30
+ return crypto.createHash('sha256').update(normalized + ':' + (content?.length || 0)).digest('hex');
31
+ }
32
+
33
+ /**
34
+ * Parse YAML-ish frontmatter at the top of a markdown file.
35
+ * Returns { fm: {key:value}, body: string }.
36
+ */
37
+ function _parseFrontmatter(content) {
38
+ const m = String(content || '').match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
39
+ if (!m) return { fm: {}, body: content || '' };
40
+ const fm = {};
41
+ for (const line of m[1].split('\n')) {
42
+ const lm = line.match(/^([\w-]+):\s*(.*)$/);
43
+ if (lm) fm[lm[1]] = lm[2].trim();
44
+ }
45
+ return { fm, body: m[2].replace(/^\n+/, '') };
46
+ }
47
+
48
+ function _serializeFrontmatter(fm, body) {
49
+ const keys = Object.keys(fm);
50
+ if (keys.length === 0) return body;
51
+ const lines = keys.map(k => `${k}: ${fm[k]}`);
52
+ return `---\n${lines.join('\n')}\n---\n\n${body.replace(/^\n+/, '')}`;
53
+ }
54
+
55
+ function _archiveKbFile(filePath, reason) {
56
+ if (!fs.existsSync(filePath)) return false;
57
+ if (!fs.existsSync(SWEPT_DIR)) fs.mkdirSync(SWEPT_DIR, { recursive: true });
58
+ const destPath = shared.uniquePath(path.join(SWEPT_DIR, path.basename(filePath)));
59
+ try {
60
+ const content = safeRead(filePath);
61
+ if (content === null) return false;
62
+ safeWrite(destPath, `<!-- swept: ${new Date().toISOString()} | reason: ${reason} -->\n${content}`);
63
+ safeUnlink(filePath);
64
+ return true;
65
+ } catch (e) { log('warn', `[kb-sweep] archive ${path.basename(filePath)}: ${e.message}`); return false; }
66
+ }
67
+
68
+ function _pruneOldSwept() {
69
+ if (!fs.existsSync(SWEPT_DIR)) return 0;
70
+ let pruned = 0;
71
+ try {
72
+ for (const f of fs.readdirSync(SWEPT_DIR)) {
73
+ const fp = path.join(SWEPT_DIR, f);
74
+ try {
75
+ if (Date.now() - fs.statSync(fp).mtimeMs > SWEPT_RETENTION_MS) { safeUnlink(fp); pruned++; }
76
+ } catch { /* ignore */ }
77
+ }
78
+ } catch { /* ignore */ }
79
+ return pruned;
80
+ }
81
+
82
+ /** Group entries by content hash, keep most-recent per group. Cheap, no LLM. */
83
+ function _hashDedup(manifest, opts = {}) {
84
+ const groups = new Map(); // hash → entries[]
85
+ for (const e of manifest) {
86
+ const h = _hashEntry(e.content);
87
+ if (!groups.has(h)) groups.set(h, []);
88
+ groups.get(h).push(e);
89
+ }
90
+ let archived = 0;
91
+ const survivors = [];
92
+ for (const [, group] of groups) {
93
+ if (group.length === 1) { survivors.push(group[0]); continue; }
94
+ // Keep most recent (by date frontmatter, then mtime)
95
+ group.sort((a, b) => (b.date || '').localeCompare(a.date || '') || b.mtimeMs - a.mtimeMs);
96
+ survivors.push(group[0]);
97
+ for (const dup of group.slice(1)) {
98
+ if (opts.dryRun) { archived++; continue; }
99
+ const fp = path.join(KB_DIR, dup.category, dup.file);
100
+ if (_archiveKbFile(fp, `hash-duplicate of ${group[0].category}/${group[0].file}`)) archived++;
101
+ }
102
+ }
103
+ return { survivors, archived };
104
+ }
105
+
106
+ /** Batched LLM sweep — finds within-batch dupes, reclassifies, removes stale. */
107
+ async function _llmBatchSweep(manifest, callLLM, trackEngineUsage) {
108
+ const plan = { duplicates: [], reclassify: [], remove: [] };
109
+ const batches = [];
110
+ for (let i = 0; i < manifest.length; i += LLM_BATCH_SIZE) {
111
+ batches.push(manifest.slice(i, i + LLM_BATCH_SIZE));
112
+ }
113
+ for (let b = 0; b < batches.length; b++) {
114
+ const batch = batches[b];
115
+ const offset = b * LLM_BATCH_SIZE;
116
+ const prompt = `You are a knowledge base curator. Analyze these ${batch.length} entries (batch ${b + 1}/${batches.length}, indices ${offset}-${offset + batch.length - 1}) and produce a cleanup plan.
117
+
118
+ ## Entries
119
+
120
+ ${batch.map((m, i) => `[${offset + i}] ${m.category}/${m.file} | ${m.title} | ${m.date} | ${m.agent || '?'} | ${(m.content || '').slice(0, 200).replace(/\n/g, ' ')}`).join('\n')}
121
+
122
+ ## Instructions
123
+
124
+ 1. **Find duplicates**: entries with substantially the same content (same findings, different agents/runs). List pairs by index. Prefer keeping the more recent entry.
125
+ 2. **Find misclassified**: entries in the wrong category.
126
+ 3. **Find stale/empty**: entries with no actionable content (boilerplate, bail-out notes, "no changes needed").
127
+
128
+ Respond with ONLY valid JSON: { "duplicates": [{ "keep": N, "remove": [N], "reason": "..." }], "reclassify": [{ "index": N, "from": "cat", "to": "cat", "reason": "..." }], "remove": [{ "index": N, "reason": "..." }] }
129
+ If nothing to do: { "duplicates": [], "reclassify": [], "remove": [] }`;
130
+
131
+ let result;
132
+ try {
133
+ result = await callLLM(prompt, 'Output only JSON.', { timeout: 120000, label: 'kb-sweep', model: 'haiku', maxTurns: 1, direct: true });
134
+ trackEngineUsage('kb-sweep', result.usage);
135
+ } catch (e) { log('warn', `[kb-sweep] batch ${b + 1} LLM error: ${e.message}`); continue; }
136
+
137
+ let batchPlan;
138
+ try {
139
+ let jsonStr = (result.text || '').trim();
140
+ const fence = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
141
+ if (fence) jsonStr = fence[1].trim();
142
+ batchPlan = JSON.parse(jsonStr);
143
+ } catch { log('warn', `[kb-sweep] batch ${b + 1} returned invalid JSON, skipping`); continue; }
144
+ if (batchPlan.duplicates) plan.duplicates.push(...batchPlan.duplicates);
145
+ if (batchPlan.reclassify) plan.reclassify.push(...batchPlan.reclassify);
146
+ if (batchPlan.remove) plan.remove.push(...batchPlan.remove);
147
+ }
148
+ return plan;
149
+ }
150
+
151
+ /**
152
+ * Per-entry rewrite pass: compress large entries + normalize structure into
153
+ * a fixed template. Only runs on entries lacking the `_swept` frontmatter flag.
154
+ * Concurrency-limited via Promise pool.
155
+ */
156
+ async function _rewritePass(survivors, callLLM, trackEngineUsage, opts = {}) {
157
+ const REWRITE_PROMPT = (entry, body) => `You are restructuring a knowledge-base entry so future agents can scan it quickly.
158
+
159
+ Reshape the content into this exact template, preserving ALL actionable findings, file:line references, and code snippets. Compress to <=800 words by dropping boilerplate (dates, full file paths that aren't actionable, agent IDs in the body, narrative scaffolding).
160
+
161
+ Template:
162
+ ## Summary
163
+ 2-3 sentence overview.
164
+
165
+ ## Key Findings
166
+ - Bullet 1 (specific, includes file:line where relevant)
167
+ - Bullet 2
168
+
169
+ ## Action Items
170
+ - Bullet (omit section entirely if none)
171
+
172
+ ## References
173
+ - file:line citations or doc links (omit section if none)
174
+
175
+ Output ONLY the template body — no frontmatter, no markdown code fence, no preamble.
176
+
177
+ Original entry (category: ${entry.category}, agent: ${entry.agent || '?'}, date: ${entry.date}):
178
+
179
+ ${body}`;
180
+
181
+ const candidates = [];
182
+ for (const e of survivors) {
183
+ const fp = path.join(KB_DIR, e.category, e.file);
184
+ const content = safeRead(fp);
185
+ if (content == null) continue;
186
+ const { fm, body } = _parseFrontmatter(content);
187
+ // Skip already-processed unless the file was modified after the sweep flag was set
188
+ if (fm[SWEPT_FLAG_KEY]) {
189
+ try {
190
+ const mtime = fs.statSync(fp).mtimeMs;
191
+ const sweptAt = Date.parse(fm[SWEPT_FLAG_KEY]);
192
+ if (Number.isFinite(sweptAt) && mtime <= sweptAt + 1000) continue;
193
+ } catch { /* ignore — re-process */ }
194
+ }
195
+ candidates.push({ entry: e, fp, fm, body, originalSize: content.length });
196
+ }
197
+
198
+ if (candidates.length === 0) return { processed: 0, bytesBefore: 0, bytesAfter: 0 };
199
+
200
+ let processed = 0, bytesBefore = 0, bytesAfter = 0;
201
+ // Simple promise pool — NORMALIZE_CONCURRENCY at a time
202
+ let cursor = 0;
203
+ async function worker() {
204
+ while (cursor < candidates.length) {
205
+ const c = candidates[cursor++];
206
+ try {
207
+ const result = await callLLM(REWRITE_PROMPT(c.entry, c.body), 'Output ONLY the template body.', {
208
+ timeout: 120000, label: 'kb-rewrite', model: 'haiku', maxTurns: 1, direct: true,
209
+ });
210
+ trackEngineUsage('kb-sweep', result.usage);
211
+ let newBody = (result.text || '').trim();
212
+ // Strip accidental code fence
213
+ const fence = newBody.match(/^```(?:markdown|md)?\s*([\s\S]*?)```$/);
214
+ if (fence) newBody = fence[1].trim();
215
+ if (!newBody || newBody.length < 50) continue; // suspicious — skip
216
+ const newFm = { ...c.fm, [SWEPT_FLAG_KEY]: new Date().toISOString() };
217
+ const newContent = _serializeFrontmatter(newFm, newBody);
218
+ if (!opts.dryRun) safeWrite(c.fp, newContent);
219
+ bytesBefore += c.originalSize;
220
+ bytesAfter += newContent.length;
221
+ processed++;
222
+ } catch (e) { log('warn', `[kb-sweep] rewrite ${c.entry.category}/${c.entry.file}: ${e.message}`); }
223
+ }
224
+ }
225
+ const workers = Array.from({ length: NORMALIZE_CONCURRENCY }, worker);
226
+ await Promise.all(workers);
227
+ return { processed, bytesBefore, bytesAfter };
228
+ }
229
+
230
+ function _applyLlmPlan(plan, manifest, opts = {}) {
231
+ let removed = 0, merged = 0, reclassified = 0;
232
+ for (const r of (plan.remove || [])) {
233
+ const entry = manifest[r.index];
234
+ if (!entry) continue;
235
+ if (opts.dryRun) { removed++; continue; }
236
+ if (_archiveKbFile(path.join(KB_DIR, entry.category, entry.file), `stale: ${r.reason || ''}`)) removed++;
237
+ }
238
+ for (const d of (plan.duplicates || [])) {
239
+ for (const idx of (d.remove || [])) {
240
+ const entry = manifest[idx];
241
+ if (!entry) continue;
242
+ if (opts.dryRun) { merged++; continue; }
243
+ if (_archiveKbFile(path.join(KB_DIR, entry.category, entry.file), `duplicate of index ${d.keep}: ${d.reason || ''}`)) merged++;
244
+ }
245
+ }
246
+ for (const r of (plan.reclassify || [])) {
247
+ const entry = manifest[r.index];
248
+ if (!entry || !shared.KB_CATEGORIES.includes(r.to)) continue;
249
+ if (opts.dryRun) { reclassified++; continue; }
250
+ const srcPath = path.join(KB_DIR, entry.category, entry.file);
251
+ const destDir = path.join(KB_DIR, r.to);
252
+ if (!fs.existsSync(srcPath)) continue;
253
+ if (!fs.existsSync(destDir)) fs.mkdirSync(destDir, { recursive: true });
254
+ try {
255
+ const stats = fs.statSync(srcPath);
256
+ const content = safeRead(srcPath);
257
+ const updated = (content || '').replace(/^(category:\s*).+$/m, `$1${r.to}`);
258
+ const destPath = path.join(destDir, entry.file);
259
+ safeWrite(destPath, updated);
260
+ fs.utimesSync(destPath, stats.atime, stats.mtime);
261
+ safeUnlink(srcPath);
262
+ reclassified++;
263
+ } catch (e) { log('warn', `[kb-sweep] reclassify ${entry.file}: ${e.message}`); }
264
+ }
265
+ return { removed, merged, reclassified };
266
+ }
267
+
268
+ /**
269
+ * Run the full sweep. Returns a rich summary.
270
+ *
271
+ * @param {object} opts
272
+ * @param {string[]} [opts.pinnedKeys] - extra pinned keys (e.g. from request body)
273
+ * @param {boolean} [opts.dryRun] - count actions but don't mutate files
274
+ * @returns {Promise<object>} summary
275
+ */
276
+ async function runKbSweep(opts = {}) {
277
+ const { callLLM, trackEngineUsage } = require('./llm');
278
+ const summary = {
279
+ ok: true,
280
+ entriesBefore: 0,
281
+ entriesAfter: 0,
282
+ bytesBefore: 0,
283
+ bytesAfter: 0,
284
+ hashDuplicatesArchived: 0,
285
+ llmDuplicatesArchived: 0,
286
+ staleRemoved: 0,
287
+ reclassified: 0,
288
+ rewritten: 0,
289
+ rewriteBytesBefore: 0,
290
+ rewriteBytesAfter: 0,
291
+ sweptArchivePruned: 0,
292
+ durationMs: 0,
293
+ };
294
+ const t0 = Date.now();
295
+
296
+ const entries = queries.getKnowledgeBaseEntries();
297
+ if (entries.length < 2) { summary.summary = 'nothing to sweep (< 2 entries)'; summary.durationMs = Date.now() - t0; return summary; }
298
+
299
+ const requestPinned = Array.isArray(opts.pinnedKeys)
300
+ ? opts.pinnedKeys.filter(k => typeof k === 'string' && k.startsWith('knowledge/'))
301
+ : [];
302
+ const pinned = new Set([
303
+ ...shared.getPinnedItems().filter(k => k.startsWith('knowledge/')),
304
+ ...requestPinned,
305
+ ]);
306
+
307
+ // Build manifest with full content + mtime
308
+ const manifest = [];
309
+ for (const e of entries) {
310
+ if (pinned.has(`knowledge/${e.cat}/${e.file}`)) continue;
311
+ const fp = path.join(KB_DIR, e.cat, e.file);
312
+ const content = safeRead(fp);
313
+ if (!content) continue;
314
+ let mtimeMs = 0;
315
+ try { mtimeMs = fs.statSync(fp).mtimeMs; } catch { /* ignore */ }
316
+ manifest.push({ category: e.cat, file: e.file, title: e.title, agent: e.agent, date: e.date, content: content.slice(0, 3000), mtimeMs });
317
+ summary.entriesBefore++;
318
+ summary.bytesBefore += content.length;
319
+ }
320
+ if (manifest.length < 2) { summary.summary = 'nothing to sweep (< 2 unpinned entries)'; summary.durationMs = Date.now() - t0; return summary; }
321
+
322
+ // 1. Hash-based dedup (cheap, catches cross-batch duplicates)
323
+ const { survivors: afterHash, archived: hashArchived } = _hashDedup(manifest, opts);
324
+ summary.hashDuplicatesArchived = hashArchived;
325
+
326
+ // 2. LLM batch sweep — within-batch dupes + reclassify + remove stale
327
+ // Only runs against survivors, but we need indices that match the LIST sent to the LLM
328
+ const llmManifest = afterHash;
329
+ const plan = await _llmBatchSweep(llmManifest, callLLM, trackEngineUsage);
330
+ const llmActions = _applyLlmPlan(plan, llmManifest, opts);
331
+ summary.llmDuplicatesArchived = llmActions.merged;
332
+ summary.staleRemoved = llmActions.removed;
333
+ summary.reclassified = llmActions.reclassified;
334
+
335
+ // 3. Per-entry rewrite (compress + normalize)
336
+ // Filter to entries that survived hash + LLM passes (still on disk)
337
+ const stillOnDisk = afterHash.filter(e => fs.existsSync(path.join(KB_DIR, e.category, e.file)));
338
+ const rewriteResult = await _rewritePass(stillOnDisk, callLLM, trackEngineUsage, opts);
339
+ summary.rewritten = rewriteResult.processed;
340
+ summary.rewriteBytesBefore = rewriteResult.bytesBefore;
341
+ summary.rewriteBytesAfter = rewriteResult.bytesAfter;
342
+
343
+ // 4. Prune old swept files (>30 days)
344
+ summary.sweptArchivePruned = _pruneOldSwept();
345
+
346
+ // Final tallies — re-walk surviving entries for accurate bytesAfter
347
+ const finalEntries = queries.getKnowledgeBaseEntries();
348
+ for (const e of finalEntries) {
349
+ if (pinned.has(`knowledge/${e.cat}/${e.file}`)) continue;
350
+ const fp = path.join(KB_DIR, e.cat, e.file);
351
+ const content = safeRead(fp);
352
+ if (!content) continue;
353
+ summary.entriesAfter++;
354
+ summary.bytesAfter += content.length;
355
+ }
356
+
357
+ summary.durationMs = Date.now() - t0;
358
+ summary.summary = `${summary.hashDuplicatesArchived} hash-dup, ${summary.llmDuplicatesArchived} llm-dup, ${summary.staleRemoved} stale, ${summary.reclassified} reclassified, ${summary.rewritten} rewritten (${(summary.bytesBefore - summary.bytesAfter).toLocaleString()} bytes saved)`;
359
+
360
+ if (!opts.dryRun) {
361
+ try { safeWrite(path.join(ENGINE_DIR, 'kb-swept.json'), JSON.stringify({ timestamp: ts(), summary: summary.summary, detail: summary })); } catch { /* ignore */ }
362
+ try { queries.invalidateKnowledgeBaseCache(); } catch { /* ignore */ }
363
+ }
364
+ return summary;
365
+ }
366
+
367
+ /** Compute a dynamic stale-guard timeout based on KB size. */
368
+ function staleGuardMs(entryCount) {
369
+ // 30 minutes minimum, plus 1 second per entry (for the rewrite pass)
370
+ return Math.max(30 * 60 * 1000, entryCount * 1000);
371
+ }
372
+
373
+ module.exports = {
374
+ runKbSweep,
375
+ staleGuardMs,
376
+ // Exported for tests
377
+ _hashEntry,
378
+ _parseFrontmatter,
379
+ _serializeFrontmatter,
380
+ _hashDedup,
381
+ COMPRESS_THRESHOLD_BYTES,
382
+ SWEPT_FLAG_KEY,
383
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.1577",
3
+ "version": "0.1.1579",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"