@yemi33/minions 0.1.2003 → 0.1.2005

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dashboard.js CHANGED
@@ -38,7 +38,7 @@ const os = require('os');
38
38
  const { safeRead, safeReadOrNull, safeReadDir, safeWrite, safeJson, safeJsonObj, safeJsonArr, safeJsonNoRestore, safeUnlink, mutateJsonFileLocked, mutateTextFileLocked, mutateControl, mutateCooldowns, mutateWorkItems, getProjects: _getProjects, DONE_STATUSES, WI_STATUS, WORK_TYPE, WORKTREE_REQUIRING_TYPES, reopenWorkItem } = shared;
39
39
  const { getAgents, getAgentDetail, getPrdInfo, getWorkItems, getDispatchQueue,
40
40
  getSkills, getInbox, getNotesWithMeta, getPullRequests,
41
- getEngineLog, getMetrics, getKnowledgeBaseEntries, getProjectGitStatus, timeSince,
41
+ getEngineLog, getMetrics, getKnowledgeBaseEntries, getKnowledgeBaseEntriesSnapshot, getProjectGitStatus, timeSince,
42
42
  MINIONS_DIR, AGENTS_DIR, ENGINE_DIR, INBOX_DIR, DISPATCH_PATH, PRD_DIR } = queries;
43
43
 
44
44
  // Dev vs binary differentiation. When two dashboards run side-by-side (npm
@@ -912,6 +912,16 @@ function _steeringDeliveryState(agentId) {
912
912
 
913
913
  const PLANS_DIR = path.join(MINIONS_DIR, 'plans');
914
914
 
915
+ // W-mpetru8a000s123a — /api/plans cache. Dashboard auto-polls every 4s; the
916
+ // pre-cache cold path walked 4 directories (plans/, prd/, archive variants),
917
+ // sync-stat'd every file, and parsed/regex-scanned each .md → 2-3s blocking.
918
+ // 5s TTL means external .md edits surface within 5s. Mutation handlers must
919
+ // call invalidatePlansCache() for immediate visibility.
920
+ let _plansCache = null;
921
+ let _plansCacheTs = 0;
922
+ const PLANS_CACHE_TTL_MS = 5000;
923
+ function invalidatePlansCache() { _plansCache = null; _plansCacheTs = 0; }
924
+
915
925
  // Resolve a plan/PRD file path: .json files live in prd/, .md files in plans/
916
926
  // Validates that the file stays within the expected directory to prevent path traversal.
917
927
  function resolvePlanPath(file) {
@@ -1927,6 +1937,21 @@ const CC_LIVE_STREAM_MAX_AGE_MS = shared.ENGINE_DEFAULTS.ccLiveStreamMaxAgeMs;
1927
1937
  // edits aren't killed mid-stream and the backend timeout never beats the user's reading
1928
1938
  // time. The doc-chat handlers still abort on client disconnect.
1929
1939
  const DOC_CHAT_TIMEOUT_MS = 60 * 60 * 1000;
1940
+ // W-mpetru71000re5de — bound the SSE per-tab queue and force-close streams
1941
+ // whose consumer has been backpressured >30s. writeCcEvent used to log
1942
+ // [cc-sse-backpressure] and silently return true on res.write()===false;
1943
+ // Node's WritableState.buffered[] has no upper bound, so a backgrounded tab
1944
+ // whose socket is half-open (Windows default TCP keepalive 7200s) can
1945
+ // accumulate bytes forever — one tab can push the dashboard past V8's 4 GB
1946
+ // heap and silently OOM-kill. Smoking gun: a single CC stream sat open
1947
+ // 8.5 min with 45 tool events queued and 0 bytes flushed
1948
+ // ([cc-stream] reason=heartbeat-write-failed duration=511442ms chunks=0
1949
+ // tools=45 bytes=0). Shedding is safe because liveState.text/.tools are
1950
+ // populated BEFORE the writer({...}) call (dashboard.js:6753, 6760, 6854,
1951
+ // 6864), so dropped wire frames are fully recoverable via the
1952
+ // reconnect-replay protocol (dashboard.js:7048-7083).
1953
+ const SSE_MAX_QUEUE_BYTES = 4 * 1024 * 1024; // 4 MB per-tab — conservative, tunable
1954
+ const SSE_STUCK_KILL_MS = 30 * 1000; // 30s of continuous backpressure → res.destroy()
1930
1955
  function _releaseCCTab(tabId) { ccInFlightTabs.delete(tabId); ccInFlightAborts.delete(tabId); }
1931
1956
  function _getCcLiveStream(tabId) {
1932
1957
  return ccLiveStreams.get(tabId) || null;
@@ -4279,6 +4304,7 @@ const server = http.createServer(async (req, res) => {
4279
4304
  });
4280
4305
  if (existingVerify) {
4281
4306
  invalidateStatusCache();
4307
+ invalidatePlansCache();
4282
4308
  return jsonReply(res, 200, { ok: true, verifyId: existingVerify.id });
4283
4309
  }
4284
4310
  }
@@ -4298,6 +4324,7 @@ const server = http.createServer(async (req, res) => {
4298
4324
  const verify = items.find(w => w.sourcePlan === body.file && w.itemType === 'verify');
4299
4325
  if (verify) {
4300
4326
  invalidateStatusCache();
4327
+ invalidatePlansCache();
4301
4328
  return jsonReply(res, 200, { ok: true, verifyId: verify.id });
4302
4329
  }
4303
4330
  }
@@ -4894,6 +4921,7 @@ const server = http.createServer(async (req, res) => {
4894
4921
 
4895
4922
  const planFile = 'manual-' + shared.uid() + '.json';
4896
4923
  safeWrite(path.join(PRD_DIR, planFile), manualPrd.plan);
4924
+ invalidatePlansCache();
4897
4925
  return jsonReply(res, 200, { ok: true, id: manualPrd.id, file: planFile });
4898
4926
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
4899
4927
  }
@@ -4949,6 +4977,7 @@ const server = http.createServer(async (req, res) => {
4949
4977
  } catch (e) { console.error('work item sync:', e.message); }
4950
4978
  }
4951
4979
 
4980
+ invalidatePlansCache();
4952
4981
  return jsonReply(res, 200, { ok: true, item, workItemSynced });
4953
4982
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
4954
4983
  }
@@ -4995,6 +5024,7 @@ const server = http.createServer(async (req, res) => {
4995
5024
  d.meta?.item?.sourcePlan === body.source && d.meta?.item?.id === body.itemId
4996
5025
  );
4997
5026
 
5027
+ invalidatePlansCache();
4998
5028
  return jsonReply(res, 200, { ok: true, cancelled });
4999
5029
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
5000
5030
  }
@@ -5339,7 +5369,7 @@ const server = http.createServer(async (req, res) => {
5339
5369
  }
5340
5370
 
5341
5371
  async function handleKnowledgeList(req, res) {
5342
- const entries = getKnowledgeBaseEntries();
5372
+ const entries = await getKnowledgeBaseEntries();
5343
5373
  const result = {};
5344
5374
  for (const cat of shared.KB_CATEGORIES) result[cat] = [];
5345
5375
  for (const e of entries) {
@@ -5353,7 +5383,11 @@ const server = http.createServer(async (req, res) => {
5353
5383
  // Source of truth: kb-sweep-state.json + PID liveness — the in-process
5354
5384
  // sweep moved to a detached runner so in-memory globals are no longer
5355
5385
  // authoritative (they die with the dashboard).
5386
+ // W-mpetru8a000s123a: yield event loop before readSweepLiveness (sync
5387
+ // process.kill + safeJson) so a single /api/knowledge handler can't
5388
+ // chain three sync blocks back-to-back during a stall window.
5356
5389
  try {
5390
+ await new Promise(r => setImmediate(r));
5357
5391
  const { readSweepLiveness } = require('./engine/kb-sweep');
5358
5392
  const liveness = readSweepLiveness({ entryCount: entries.length });
5359
5393
  if (liveness.inFlight && liveness.alive) {
@@ -5386,7 +5420,7 @@ const server = http.createServer(async (req, res) => {
5386
5420
  const {
5387
5421
  readSweepLiveness, staleGuardMs, KB_SWEEP_STATE_PATH, KB_SWEEP_LOG_PATH, KB_SWEEP_RUNNER_PATH,
5388
5422
  } = require('./engine/kb-sweep');
5389
- const entryCount = (queries.getKnowledgeBaseEntries() || []).length;
5423
+ const entryCount = ((await queries.getKnowledgeBaseEntries()) || []).length;
5390
5424
  const guardMs = staleGuardMs(entryCount);
5391
5425
 
5392
5426
  // Synchronous pre-claim BEFORE awaiting the body so a concurrent POST
@@ -5482,11 +5516,11 @@ const server = http.createServer(async (req, res) => {
5482
5516
  }
5483
5517
 
5484
5518
 
5485
- function handleKnowledgeSweepStatus(req, res) {
5519
+ async function handleKnowledgeSweepStatus(req, res) {
5486
5520
  // Source of truth = kb-sweep-state.json + PID liveness. Globals are gone —
5487
5521
  // the runner is detached, so its lifecycle is independent of this process.
5488
5522
  const { readSweepLiveness } = require('./engine/kb-sweep');
5489
- const entries = queries.getKnowledgeBaseEntries() || [];
5523
+ const entries = (await queries.getKnowledgeBaseEntries()) || [];
5490
5524
  const liveness = readSweepLiveness({ entryCount: entries.length });
5491
5525
  const diskState = safeJson(path.join(ENGINE_DIR, 'kb-sweep-state.json'));
5492
5526
  let inFlight = false;
@@ -5513,13 +5547,19 @@ const server = http.createServer(async (req, res) => {
5513
5547
  }
5514
5548
 
5515
5549
  async function handlePlansList(req, res) {
5550
+ const now = Date.now();
5551
+ if (_plansCache && (now - _plansCacheTs) < PLANS_CACHE_TTL_MS) {
5552
+ return jsonReply(res, 200, _plansCache);
5553
+ }
5554
+ const fsp = fs.promises;
5516
5555
  const dirs = [
5517
5556
  { dir: PLANS_DIR, archived: false },
5518
5557
  { dir: path.join(PLANS_DIR, 'archive'), archived: true },
5519
5558
  { dir: PRD_DIR, archived: false },
5520
5559
  { dir: path.join(PRD_DIR, 'archive'), archived: true },
5521
5560
  ];
5522
- // Load work items to check for completed plan-to-prd conversions
5561
+ // Load work items to check for completed plan-to-prd conversions.
5562
+ // safeJsonArr is sync but reads a single small file — leave as is.
5523
5563
  const centralWi = safeJsonArr(path.join(MINIONS_DIR, 'work-items.json'));
5524
5564
  const completedPrdFiles = new Set(
5525
5565
  centralWi.filter(w => w.type === 'plan-to-prd' && DONE_STATUSES.has(w.status) && w.planFile)
@@ -5527,18 +5567,21 @@ const server = http.createServer(async (req, res) => {
5527
5567
  );
5528
5568
  const plans = [];
5529
5569
  for (const { dir, archived } of dirs) {
5530
- const allFiles = safeReadDir(dir).filter(f => f.endsWith('.json') || f.endsWith('.md'));
5531
- for (const f of allFiles) {
5570
+ const allFiles = (await fsp.readdir(dir).catch(() => []))
5571
+ .filter(f => f.endsWith('.json') || f.endsWith('.md'));
5572
+ const dirResults = await Promise.all(allFiles.map(async f => {
5532
5573
  const filePath = path.join(dir, f);
5533
- const content = safeRead(filePath) || '';
5534
- let updatedAt = '';
5535
- try { updatedAt = new Date(fs.statSync(filePath).mtimeMs).toISOString(); } catch { /* optional */ }
5574
+ const [content, stat] = await Promise.all([
5575
+ fsp.readFile(filePath, 'utf8').catch(() => ''),
5576
+ fsp.stat(filePath).catch(() => null),
5577
+ ]);
5578
+ const updatedAt = stat ? new Date(stat.mtimeMs).toISOString() : '';
5536
5579
  const isJson = f.endsWith('.json');
5537
5580
  if (isJson) {
5538
5581
  try {
5539
5582
  const plan = JSON.parse(content);
5540
5583
  const status = plan.status || 'active';
5541
- plans.push({
5584
+ return {
5542
5585
  file: f, format: 'prd', archived,
5543
5586
  project: plan.project || '',
5544
5587
  summary: plan.plan_summary || '',
@@ -5556,15 +5599,15 @@ const server = http.createServer(async (req, res) => {
5556
5599
  archiveReady: plan._archiveReady || false,
5557
5600
  archiveReadyAt: plan._archiveReadyAt || null,
5558
5601
  planStale: plan.planStale || false,
5559
- });
5560
- } catch { /* JSON parse fallback */ }
5602
+ };
5603
+ } catch { return null; /* JSON parse fallback */ }
5561
5604
  } else {
5562
5605
  const titleMatch = content.match(/^#\s+(?:Plan:\s*)?(.+)/m);
5563
5606
  const projectMatch = content.match(/\*\*Project:\*\*\s*(.+)/m);
5564
5607
  const authorMatch = content.match(/\*\*Author:\*\*\s*(.+)/m);
5565
5608
  const dateMatch = content.match(/\*\*Date:\*\*\s*(.+)/m);
5566
5609
  const versionMatch = f.match(/-v(\d+)/);
5567
- plans.push({
5610
+ return {
5568
5611
  file: f, format: 'draft', archived,
5569
5612
  project: projectMatch ? projectMatch[1].trim() : '',
5570
5613
  summary: titleMatch ? titleMatch[1].trim() : f.replace('.md', ''),
@@ -5578,11 +5621,14 @@ const server = http.createServer(async (req, res) => {
5578
5621
  requiresApproval: false,
5579
5622
  revisionFeedback: null,
5580
5623
  version: versionMatch ? parseInt(versionMatch[1]) : null,
5581
- });
5624
+ };
5582
5625
  }
5583
- }
5626
+ }));
5627
+ for (const r of dirResults) if (r) plans.push(r);
5584
5628
  }
5585
5629
  plans.sort((a, b) => (b.generatedAt || '').localeCompare(a.generatedAt || ''));
5630
+ _plansCache = plans;
5631
+ _plansCacheTs = Date.now();
5586
5632
  return jsonReply(res, 200, plans);
5587
5633
  }
5588
5634
 
@@ -5617,6 +5663,7 @@ const server = http.createServer(async (req, res) => {
5617
5663
  }
5618
5664
 
5619
5665
  invalidateStatusCache();
5666
+ invalidatePlansCache();
5620
5667
  return jsonReply(res, 200, { ok: true, unarchivedSource });
5621
5668
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
5622
5669
  }
@@ -5747,6 +5794,7 @@ const server = http.createServer(async (req, res) => {
5747
5794
  }
5748
5795
 
5749
5796
  invalidateStatusCache();
5797
+ invalidatePlansCache();
5750
5798
  return jsonReply(res, 200, { ok: true, status: 'approved', resumedWorkItems: resumed, diffAwareUpdate: diffAwareQueued });
5751
5799
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
5752
5800
  }
@@ -5862,6 +5910,7 @@ const server = http.createServer(async (req, res) => {
5862
5910
  }, { defaultValue: { pending: [], active: [], completed: [] } });
5863
5911
 
5864
5912
  invalidateStatusCache();
5913
+ invalidatePlansCache();
5865
5914
  return jsonReply(res, 200, { ok: true, status: 'paused', resetWorkItems: reset });
5866
5915
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
5867
5916
  }
@@ -5895,6 +5944,7 @@ const server = http.createServer(async (req, res) => {
5895
5944
  });
5896
5945
  if (!queueResult.queued) return jsonReply(res, 200, { ok: true, alreadyQueued: true, id: queueResult.id });
5897
5946
  invalidateStatusCache();
5947
+ invalidatePlansCache();
5898
5948
  return jsonReply(res, 200, { ok: true, id: queueResult.id });
5899
5949
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
5900
5950
  }
@@ -5913,6 +5963,7 @@ const server = http.createServer(async (req, res) => {
5913
5963
  return data;
5914
5964
  }, { defaultValue: {} });
5915
5965
 
5966
+ invalidatePlansCache();
5916
5967
  return jsonReply(res, 200, { ok: true, status: 'rejected' });
5917
5968
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
5918
5969
  }
@@ -6040,6 +6091,7 @@ const server = http.createServer(async (req, res) => {
6040
6091
  }
6041
6092
 
6042
6093
  invalidateStatusCache();
6094
+ invalidatePlansCache();
6043
6095
  return jsonReply(res, 200, { ok: true, cleanedWorkItems: cleaned, cleanedDispatches: dispatchCleaned });
6044
6096
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
6045
6097
  }
@@ -6115,6 +6167,7 @@ const server = http.createServer(async (req, res) => {
6115
6167
  } catch (e) { console.error('plan worktree cleanup:', e.message); }
6116
6168
 
6117
6169
  invalidateStatusCache();
6170
+ invalidatePlansCache();
6118
6171
  const payload = { ok: true, archived: body.file, archivedSource, cancelledItems };
6119
6172
  if (archiveWarnings.length > 0) payload.warnings = archiveWarnings;
6120
6173
  return jsonReply(res, 200, payload);
@@ -6149,6 +6202,7 @@ const server = http.createServer(async (req, res) => {
6149
6202
  }
6150
6203
 
6151
6204
  invalidateStatusCache();
6205
+ invalidatePlansCache();
6152
6206
  return jsonReply(res, 200, { ok: true, unarchivedSource });
6153
6207
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
6154
6208
  }
@@ -6180,6 +6234,7 @@ const server = http.createServer(async (req, res) => {
6180
6234
  planFile: body.file,
6181
6235
  });
6182
6236
  });
6237
+ invalidatePlansCache();
6183
6238
  return jsonReply(res, 200, { ok: true, status: 'revision-requested', workItemId: id });
6184
6239
  } catch (e) { return jsonReply(res, 400, { error: e.message }); }
6185
6240
  }
@@ -6332,6 +6387,13 @@ What would you like to discuss or change? When you're happy, say "approve" and I
6332
6387
  let _docStreamEnded = false;
6333
6388
  let _docHeartbeatTimer = null;
6334
6389
  const writeDocEvent = (payload) => {
6390
+ // TODO(W-mpetru71000re5de): doc-chat SSE has the same unbounded-queue
6391
+ // failure mode as CC's writeCcEvent — res.write() returning false from
6392
+ // backpressure silently queues bytes in Node's WritableState.buffered[].
6393
+ // Out of scope for this fix (task is scoped to CC only). When this is
6394
+ // addressed, mirror the SSE_MAX_QUEUE_BYTES shed + SSE_STUCK_KILL_MS
6395
+ // heartbeat force-close pattern from the writeCcEvent closure
6396
+ // (dashboard.js, search for SSE_MAX_QUEUE_BYTES).
6335
6397
  try {
6336
6398
  res.write('data: ' + JSON.stringify(payload) + '\n\n');
6337
6399
  return true;
@@ -7275,6 +7337,17 @@ What would you like to discuss or change? When you're happy, say "approve" and I
7275
7337
  let _ccStreamEnded = false;
7276
7338
  let _ccHeartbeatTimer = null;
7277
7339
  let _ccLastHeartbeatAt = Date.now();
7340
+ // W-mpetru71000re5de — per-stream backpressure clock + approximate queued
7341
+ // bytes. _bpStartedAt timestamps the first write that returned false (and
7342
+ // is reset on 'drain'); _queuedBytes accumulates write sizes pushed past
7343
+ // res.writable's highWaterMark (best-effort — Node's internal buffer
7344
+ // length is private). Used by writeCcEvent (shed) + the heartbeat tick
7345
+ // (force-close stuck streams).
7346
+ let _bpStartedAt = null;
7347
+ let _queuedBytes = 0;
7348
+ try {
7349
+ res.on('drain', () => { _bpStartedAt = null; _queuedBytes = 0; });
7350
+ } catch { /* listener registration is best-effort */ }
7278
7351
  // W-mpdavudb000v8446 — SSE delivery telemetry. Previously writeCcEvent
7279
7352
  // swallowed all write failures (res.destroyed / res.write returning false
7280
7353
  // for backpressure / sync throw), and the [cc-timing] log only proved
@@ -7311,6 +7384,30 @@ What would you like to discuss or change? When you're happy, say "approve" and I
7311
7384
  _logFail('json-serialize-failed', { error: String((err && err.message) || err).slice(0, 200) });
7312
7385
  return false;
7313
7386
  }
7387
+ // W-mpetru71000re5de — shed wire frames once the per-tab queue exceeds
7388
+ // the cap. Safe ONLY because liveState.text / liveState.tools are
7389
+ // populated BEFORE writer({...}) is called at every call site:
7390
+ // - dashboard.js:6753-6754 (legacy direct path: text → writer)
7391
+ // - dashboard.js:6760-6761 (legacy direct path: tools → writer)
7392
+ // - dashboard.js:6854-6855 (pool path: text → writer)
7393
+ // - dashboard.js:6864-6865 (pool path: tools → writer)
7394
+ // The reconnect path (dashboard.js:7048-7083) replays from liveState.*,
7395
+ // so dropped wire frames are fully recoverable on reattach. We still
7396
+ // bump _ccTelemetry counters so the [cc-stream] outcome log line stays
7397
+ // truthful about what the orchestrator produced — only the wire was
7398
+ // shed, the work happened.
7399
+ if (_queuedBytes > SSE_MAX_QUEUE_BYTES) {
7400
+ try {
7401
+ shared.log('warn', `[cc-sse-shed] tab=${tabId || _ccTelemetry.tabId || 'unknown'} type=${type} queuedBytes=${_queuedBytes} wireBytes=${wire.length}`);
7402
+ } catch { /* telemetry is best-effort */ }
7403
+ if (type === 'chunk') {
7404
+ _ccTelemetry.chunks++;
7405
+ _ccTelemetry.bytes += Buffer.byteLength(String((payload && payload.text) || ''), 'utf8');
7406
+ } else if (type === 'tool') {
7407
+ _ccTelemetry.tools++;
7408
+ }
7409
+ return true;
7410
+ }
7314
7411
  let writeOk;
7315
7412
  try { writeOk = res.write(wire); }
7316
7413
  catch (err) {
@@ -7322,8 +7419,16 @@ What would you like to discuss or change? When you're happy, say "approve" and I
7322
7419
  // The write IS still queued, so don't treat this as a failure, but
7323
7420
  // surface it so a slow consumer is visible in telemetry. Most CC
7324
7421
  // chunks are small enough that we never hit this in practice.
7422
+ // W-mpetru71000re5de — also start (or extend) the per-stream
7423
+ // backpressure clock and accumulate approximate queued bytes. The
7424
+ // heartbeat tick force-closes the stream once the clock exceeds
7425
+ // SSE_STUCK_KILL_MS; writeCcEvent above sheds further frames once
7426
+ // _queuedBytes exceeds SSE_MAX_QUEUE_BYTES. _bpStartedAt + _queuedBytes
7427
+ // are reset by the res.on('drain') listener registered above.
7428
+ if (_bpStartedAt == null) _bpStartedAt = Date.now();
7429
+ _queuedBytes += wire.length;
7325
7430
  try {
7326
- shared.log('warn', `[cc-sse-backpressure] tab=${tabId || _ccTelemetry.tabId || 'unknown'} type=${type} bytes=${wire.length}`);
7431
+ shared.log('warn', `[cc-sse-backpressure] tab=${tabId || _ccTelemetry.tabId || 'unknown'} type=${type} bytes=${wire.length} queuedBytes=${_queuedBytes} bpMs=${Date.now() - _bpStartedAt}`);
7327
7432
  } catch { /* telemetry is best-effort */ }
7328
7433
  }
7329
7434
  if (payload && payload.type === 'chunk') {
@@ -7365,6 +7470,22 @@ What would you like to discuss or change? When you're happy, say "approve" and I
7365
7470
  stopCcHeartbeat();
7366
7471
  return;
7367
7472
  }
7473
+ // W-mpetru71000re5de — force-close streams stuck on backpressure.
7474
+ // res.destroy() fires req.on('close'), which triggers the existing
7475
+ // teardown path (detach writer → schedule abort → cleanup), so the
7476
+ // queued bytes are sheddable via GC and the LLM is aborted. Catch-22
7477
+ // workaround: _scheduleCcLiveAbort bails while state.writer is
7478
+ // non-null and writer is non-null until req.close fires; that's
7479
+ // exactly what res.destroy() triggers.
7480
+ if (_bpStartedAt && Date.now() - _bpStartedAt > SSE_STUCK_KILL_MS) {
7481
+ const stuckMs = Date.now() - _bpStartedAt;
7482
+ try {
7483
+ shared.log('warn', `[cc-sse-stuck-close] tab=${tabId || _ccTelemetry.tabId || 'unknown'} stuckMs=${stuckMs} queuedBytes=${_queuedBytes}`);
7484
+ } catch { /* telemetry is best-effort */ }
7485
+ stopCcHeartbeat();
7486
+ try { res.destroy(); } catch { /* swallow — req.on('close') will still fire */ }
7487
+ return;
7488
+ }
7368
7489
  _checkStall();
7369
7490
  if (!writeCcEvent({ type: 'heartbeat' })) {
7370
7491
  stopCcHeartbeat();
@@ -10071,6 +10192,12 @@ if (require.main === module) {
10071
10192
  console.log(` Projects: ${PROJECTS.map(p => `${p.name} (${p.localPath})`).join(', ')}`);
10072
10193
  console.log(`\n Auto-refreshes every 4s. Ctrl+C to stop.\n`);
10073
10194
 
10195
+ // W-mpetru8a000s123a — warm the async KB cache so synchronous callers
10196
+ // (getWorkItems) see real data on first /api/status instead of an empty
10197
+ // snapshot. Fire-and-forget; tolerant of warming failure.
10198
+ Promise.resolve(queries.getKnowledgeBaseEntries())
10199
+ .catch(err => console.warn(`[dashboard] KB cache warm failed: ${err && err.message}`));
10200
+
10074
10201
  // Auto-open the browser unless suppressed. `minions restart` and the
10075
10202
  // upgrade path set MINIONS_NO_AUTO_OPEN=1 because the CLI orchestrates the
10076
10203
  // open itself after observing whether an existing tab reconnected.
package/engine/cli.js CHANGED
@@ -895,6 +895,21 @@ const commands = {
895
895
  }
896
896
  })();
897
897
 
898
+ // W-mpetru8a000s123a — warm the async KB cache so synchronous callers
899
+ // (getWorkItems, getKnowledgeBaseIndex, playbook render) see real data
900
+ // on first read instead of the empty snapshot. Fire-and-forget; the cache
901
+ // updates as soon as the scan resolves and any inflight async caller
902
+ // shares the same promise.
903
+ (function warmKnowledgeBaseCache() {
904
+ try {
905
+ const queries = require('./queries');
906
+ Promise.resolve(queries.getKnowledgeBaseEntries())
907
+ .catch(err => e.log('warn', `KB cache warm failed: ${err && err.message}`));
908
+ } catch (err) {
909
+ e.log('warn', `KB cache warm setup failed: ${err.message}`);
910
+ }
911
+ })();
912
+
898
913
  // Initial tick
899
914
  e.tick();
900
915
 
@@ -484,7 +484,7 @@ async function _runKbSweepImpl(opts = {}) {
484
484
  };
485
485
  const t0 = Date.now();
486
486
 
487
- const entries = queries.getKnowledgeBaseEntries();
487
+ const entries = await queries.getKnowledgeBaseEntries();
488
488
  if (entries.length < 2) { summary.summary = 'nothing to sweep (< 2 entries)'; summary.durationMs = Date.now() - t0; return summary; }
489
489
 
490
490
  const requestPinned = Array.isArray(opts.pinnedKeys)
@@ -535,7 +535,7 @@ async function _runKbSweepImpl(opts = {}) {
535
535
  summary.sweptArchivePruned = _pruneOldSwept();
536
536
 
537
537
  // Final tallies — re-walk surviving entries for accurate bytesAfter
538
- const finalEntries = queries.getKnowledgeBaseEntries();
538
+ const finalEntries = await queries.getKnowledgeBaseEntries();
539
539
  for (const e of finalEntries) {
540
540
  if (pinned.has(`knowledge/${e.cat}/${e.file}`)) continue;
541
541
  const fp = path.join(KB_DIR, e.cat, e.file);
package/engine/queries.js CHANGED
@@ -5,6 +5,7 @@
5
5
  */
6
6
 
7
7
  const fs = require('fs');
8
+ const fsp = require('fs').promises;
8
9
  const path = require('path');
9
10
  const os = require('os');
10
11
  const shared = require('./shared');
@@ -1058,35 +1059,52 @@ function getCommandIndex(config) {
1058
1059
 
1059
1060
  // ── Knowledge Base ──────────────────────────────────────────────────────────
1060
1061
 
1061
- let _kbCache = null;
1062
+ // W-mpetru8a000s123a async KB scan + stale-while-revalidate cache.
1063
+ // Async fs prevents event-loop stalls on /api/knowledge cold-cache hits
1064
+ // (previously ~11s of blocking readFileSync/statSync). Sync callers
1065
+ // (getWorkItems, getKnowledgeBaseIndex, playbook render) use the in-memory
1066
+ // snapshot via getKnowledgeBaseEntriesSnapshot() to avoid forcing async
1067
+ // propagation through ~17 callers of getWorkItems.
1068
+ let _kbCache = null; // last good snapshot — never nulled by invalidate
1062
1069
  let _kbCacheTs = 0;
1070
+ let _kbCacheStale = true; // invalidate marks stale; snapshot kept for sync readers
1071
+ let _kbRefreshPromise = null; // in-flight scan dedupe
1063
1072
  const KB_CACHE_TTL = 30000; // 30s — KB changes infrequently
1064
1073
 
1065
1074
  function invalidateKnowledgeBaseCache() {
1066
- _kbCache = null;
1075
+ _kbCacheStale = true;
1067
1076
  _kbCacheTs = 0;
1068
1077
  }
1069
1078
 
1070
- function getKnowledgeBaseEntries() {
1071
- const now = Date.now();
1072
- if (_kbCache && (now - _kbCacheTs) < KB_CACHE_TTL) return _kbCache;
1079
+ /**
1080
+ * Synchronous snapshot — returns last-known KB entries from memory, never
1081
+ * touches disk. Returns [] only until the first async getKnowledgeBaseEntries()
1082
+ * resolves. Used by sync callers (getWorkItems, getKnowledgeBaseIndex,
1083
+ * playbook render) that historically called the sync version.
1084
+ */
1085
+ function getKnowledgeBaseEntriesSnapshot() {
1086
+ return Array.isArray(_kbCache) ? _kbCache : [];
1087
+ }
1073
1088
 
1089
+ async function _scanKnowledgeBase() {
1074
1090
  const entries = [];
1075
1091
  for (const cat of KB_CATEGORIES) {
1076
1092
  const catDir = path.join(KNOWLEDGE_DIR, cat);
1077
- const files = safeReadDir(catDir).filter(f => f.endsWith('.md'));
1078
- for (const f of files) {
1093
+ const files = (await fsp.readdir(catDir).catch(() => [])).filter(f => f.endsWith('.md'));
1094
+ const fileResults = await Promise.all(files.map(async f => {
1079
1095
  const filePath = path.join(catDir, f);
1080
- const content = safeRead(filePath) || '';
1096
+ const [content, stat] = await Promise.all([
1097
+ fsp.readFile(filePath, 'utf8').catch(() => ''),
1098
+ fsp.stat(filePath).catch(() => null),
1099
+ ]);
1081
1100
  const titleMatch = content.match(/^#\s+(.+)/m);
1082
1101
  const title = titleMatch ? titleMatch[1].trim() : f.replace(/\.md$/, '');
1083
1102
  const agentMatch = f.match(/^\d{4}-\d{2}-\d{2}-(\w+)-/);
1084
1103
  const dateMatch = f.match(/^(\d{4}-\d{2}-\d{2})/) || content.match(/^date:\s*(\d{4}-\d{2}-\d{2})$/m);
1085
1104
  const sourceMatch = content.match(/^source:\s*(.+)/m);
1086
- let sortTs = 0;
1087
- try { sortTs = fs.statSync(filePath).mtimeMs || 0; } catch {}
1105
+ const sortTs = (stat && stat.mtimeMs) || 0;
1088
1106
  const displayDate = dateMatch ? dateMatch[1] : (sortTs ? new Date(sortTs).toISOString().slice(0, 10) : '');
1089
- entries.push({
1107
+ return {
1090
1108
  cat, file: f, title,
1091
1109
  agent: agentMatch ? agentMatch[1] : '',
1092
1110
  date: displayDate,
@@ -1094,22 +1112,36 @@ function getKnowledgeBaseEntries() {
1094
1112
  source: sourceMatch ? sourceMatch[1].trim() : '',
1095
1113
  preview: content.slice(0, 200),
1096
1114
  size: content.length,
1097
- });
1098
- }
1115
+ };
1116
+ }));
1117
+ entries.push(...fileResults);
1099
1118
  }
1100
1119
  entries.sort((a, b) =>
1101
1120
  (b.sortTs || 0) - (a.sortTs || 0) ||
1102
1121
  (b.date || '').localeCompare(a.date || '') ||
1103
1122
  a.title.localeCompare(b.title)
1104
1123
  );
1105
- _kbCache = entries;
1106
- _kbCacheTs = now;
1107
1124
  return entries;
1108
1125
  }
1109
1126
 
1127
+ async function getKnowledgeBaseEntries() {
1128
+ const now = Date.now();
1129
+ if (!_kbCacheStale && _kbCache && (now - _kbCacheTs) < KB_CACHE_TTL) return _kbCache;
1130
+ if (_kbRefreshPromise) return _kbRefreshPromise;
1131
+ _kbRefreshPromise = _scanKnowledgeBase()
1132
+ .then(entries => {
1133
+ _kbCache = entries;
1134
+ _kbCacheTs = Date.now();
1135
+ _kbCacheStale = false;
1136
+ return _kbCache;
1137
+ })
1138
+ .finally(() => { _kbRefreshPromise = null; });
1139
+ return _kbRefreshPromise;
1140
+ }
1141
+
1110
1142
  function getKnowledgeBaseIndex() {
1111
1143
  try {
1112
- const entries = getKnowledgeBaseEntries();
1144
+ const entries = getKnowledgeBaseEntriesSnapshot();
1113
1145
  if (entries.length === 0) return '';
1114
1146
  let index = '## Knowledge Base Reference\n\n';
1115
1147
  index += 'Deep-reference docs from past work. Read the file if you need detail.\n\n';
@@ -1227,8 +1259,9 @@ function getWorkItems(config) {
1227
1259
  const _agentDirCache = {};
1228
1260
  const _inboxFiles = safeReadDir(INBOX_DIR);
1229
1261
  const _archiveFiles = safeReadDir(ARCHIVE_DIR);
1230
- // Use cached KB entries (includes source frontmatter field)
1231
- const _kbEntries = getKnowledgeBaseEntries();
1262
+ // Use snapshot sync access; cold start before any async warm returns [].
1263
+ // Best-effort enrichment for work item _artifacts.notes, not correctness-critical.
1264
+ const _kbEntries = getKnowledgeBaseEntriesSnapshot();
1232
1265
  for (const item of allItems) {
1233
1266
  const arts = {};
1234
1267
  const agentId = item.dispatched_to || item.agent;
@@ -1754,7 +1787,7 @@ module.exports = {
1754
1787
  collectCommandFiles, getCommandIndex,
1755
1788
 
1756
1789
  // Knowledge base
1757
- getKnowledgeBaseEntries, getKnowledgeBaseIndex,
1790
+ getKnowledgeBaseEntries, getKnowledgeBaseEntriesSnapshot, getKnowledgeBaseIndex,
1758
1791
 
1759
1792
  // Work items & PRD
1760
1793
  getWorkItems, getPrdInfo,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.2003",
3
+ "version": "0.1.2005",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"