@yemi33/minions 0.1.1949 → 0.1.1951

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dashboard/js/command-center.js +9 -0
  2. package/dashboard/js/modal-qa.js +10 -0
  3. package/dashboard/js/refresh.js +4 -0
  4. package/dashboard/js/render-dispatch.js +25 -0
  5. package/dashboard/js/render-other.js +109 -2
  6. package/dashboard/js/settings.js +1 -1
  7. package/dashboard/layout.html +2 -2
  8. package/dashboard/pages/engine.html +6 -0
  9. package/dashboard/slim.html +1987 -0
  10. package/dashboard/styles.css +8 -0
  11. package/dashboard.js +450 -40
  12. package/docs/completion-reports.md +25 -0
  13. package/docs/design-state-storage.md +1 -1
  14. package/docs/slim-ux/architecture-suggestions.md +467 -0
  15. package/docs/slim-ux/concepts.md +824 -0
  16. package/engine/ado-mcp-wrapper.js +33 -7
  17. package/engine/ado.js +123 -15
  18. package/engine/cc-worker-pool.js +41 -0
  19. package/engine/cleanup.js +71 -34
  20. package/engine/cli.js +37 -0
  21. package/engine/dispatch.js +32 -9
  22. package/engine/features.js +6 -0
  23. package/engine/gh-token.js +137 -0
  24. package/engine/github.js +166 -29
  25. package/engine/issues.js +29 -0
  26. package/engine/keep-process-sweep.js +397 -0
  27. package/engine/lifecycle.js +150 -33
  28. package/engine/playbook.js +17 -0
  29. package/engine/queries.js +71 -0
  30. package/engine/recovery.js +6 -0
  31. package/engine/shared.js +481 -30
  32. package/engine/spawn-agent.js +44 -2
  33. package/engine/timeout.js +34 -11
  34. package/engine/worktree-pool.js +410 -0
  35. package/engine.js +643 -119
  36. package/package.json +6 -3
  37. package/playbooks/review.md +2 -0
  38. package/playbooks/shared-rules.md +3 -1
  39. package/prompts/cc-system.md +24 -0
  40. package/engine/copilot-models.json +0 -5
@@ -3,10 +3,32 @@
3
3
  * Wrapper for @azure-devops/mcp that fetches an ADO token via the shared
4
4
  * az-first provider chain and sets AZURE_DEVOPS_EXT_PAT before launching the
5
5
  * MCP server.
6
+ *
7
+ * P-b3f8e1a5: @azure-devops/mcp is pinned in package.json and resolved from
8
+ * local node_modules. We spawn process.execPath against the resolved bin file
9
+ * instead of going through npx/npx.cmd. This (a) eliminates the per-cold-start
10
+ * network fetch that ran with AZURE_DEVOPS_EXT_PAT in env, and (b) avoids the
11
+ * Windows .cmd shim chain that crashes the runtime under spawn.
6
12
  */
7
13
  const { spawn } = require('child_process');
14
+ const fs = require('fs');
15
+ const path = require('path');
8
16
  const { acquireAdoTokenSync } = require('./ado-token');
9
17
 
18
+ const PKG_NAME = '@azure-devops/mcp';
19
+ const BIN_NAME = 'mcp-server-azuredevops';
20
+
21
+ function resolveMcpBin() {
22
+ const pkgJsonPath = require.resolve(`${PKG_NAME}/package.json`);
23
+ const pkgJson = JSON.parse(fs.readFileSync(pkgJsonPath, 'utf8'));
24
+ const binField = pkgJson.bin;
25
+ const relBin = typeof binField === 'string' ? binField : binField && binField[BIN_NAME];
26
+ if (!relBin) {
27
+ throw new Error(`${PKG_NAME} package.json is missing bin entry "${BIN_NAME}"`);
28
+ }
29
+ return path.resolve(path.dirname(pkgJsonPath), relBin);
30
+ }
31
+
10
32
  let token;
11
33
  try {
12
34
  token = acquireAdoTokenSync().token;
@@ -16,14 +38,17 @@ try {
16
38
  process.exit(1);
17
39
  }
18
40
 
19
- // Launch the actual MCP server with the token in env
41
+ let binPath;
42
+ try {
43
+ binPath = resolveMcpBin();
44
+ } catch (e) {
45
+ process.stderr.write('ado-mcp-wrapper: failed to resolve ' + PKG_NAME + ': ' + e.message + '\n');
46
+ process.stderr.write('ado-mcp-wrapper: run "npm install" in the minions checkout to restore the pinned dependency\n');
47
+ process.exit(1);
48
+ }
49
+
20
50
  const args = process.argv.slice(2);
21
- const child = spawn(process.platform === 'win32' ? 'npx.cmd' : 'npx', [
22
- '-y',
23
- '--registry=https://registry.npmjs.org/',
24
- '@azure-devops/mcp@latest',
25
- ...args
26
- ], {
51
+ const child = spawn(process.execPath, [binPath, ...args], {
27
52
  stdio: 'inherit',
28
53
  env: { ...process.env, AZURE_DEVOPS_EXT_PAT: token, AZURE_DEVOPS_EXT_AZURE_RM_PAT: token },
29
54
  windowsHide: true,
@@ -35,3 +60,4 @@ child.on('error', (err) => {
35
60
  process.stderr.write('ado-mcp-wrapper: ' + err.message + '\n');
36
61
  process.exit(1);
37
62
  });
63
+
package/engine/ado.js CHANGED
@@ -347,10 +347,16 @@ function applyAdoPrMetadata(pr, prData) {
347
347
 
348
348
  const sourceBranch = stripRefsHeads(prData.sourceRefName);
349
349
  if (sourceBranch && (pr.branch !== sourceBranch || pr._branchResolutionError || pr._pendingReason === shared.PR_PENDING_REASON.MISSING_BRANCH)) {
350
- pr.branch = sourceBranch;
351
- if (pr._branchResolutionError) delete pr._branchResolutionError;
352
- if (pr._pendingReason === shared.PR_PENDING_REASON.MISSING_BRANCH) delete pr._pendingReason;
353
- updated = true;
350
+ // P-a7c4d2e8 (F3): validate ADO-derived branch ref before persistence.
351
+ // On invalid ref, log + skip so the poller keeps running (defensive).
352
+ try {
353
+ pr.branch = shared.validateGitRef(sourceBranch);
354
+ if (pr._branchResolutionError) delete pr._branchResolutionError;
355
+ if (pr._pendingReason === shared.PR_PENDING_REASON.MISSING_BRANCH) delete pr._pendingReason;
356
+ updated = true;
357
+ } catch (refErr) {
358
+ log('warn', `ADO: invalid sourceRefName "${sourceBranch.slice(0, 64)}" for PR ${pr.id || '?'}: ${refErr.message}`);
359
+ }
354
360
  }
355
361
 
356
362
  const title = String(prData.title || '').trim();
@@ -590,6 +596,13 @@ async function forEachActivePr(config, token, callback) {
590
596
 
591
597
  for (const project of projects) {
592
598
  if (isGitHubProject(project)) continue;
599
+ // W-mp625n27000m6e78 — Honor per-project workSources.pullRequests.enabled.
600
+ // Default-ON: only skip when explicitly disabled (=== false). Same gate as
601
+ // engine/github.js:forEachActiveGhPr — without this, disabling PR polling
602
+ // for an ADO project via the dashboard Settings UI was cosmetic.
603
+ const src = project?.workSources?.pullRequests || config?.workSources?.pullRequests;
604
+ if (src && src.enabled === false) continue;
605
+
593
606
  repairAdoProjectConfig(project, 'ADO PR polling');
594
607
  if (!project.adoOrg || !project.adoProject) continue;
595
608
 
@@ -1060,17 +1073,32 @@ async function pollPrHumanComments(config) {
1060
1073
  const cutoffStr = pr.humanFeedback?.lastProcessedCommentDate || pr.created || '1970-01-01';
1061
1074
  const cutoffMs = new Date(cutoffStr).getTime() || 0;
1062
1075
 
1063
- // Collect ALL human comments on the PR for full context
1076
+ // Collect ALL human comments on the PR for full context. `allCommentDates`
1077
+ // tracks the publishedDate of every NON-system comment we observed —
1078
+ // including bot/CI bodies and explicitly-ignored authors that get filtered
1079
+ // out before reaching `allHumanComments`. We need their dates so the
1080
+ // cutoff can advance past them on every successful poll, mirroring the
1081
+ // GitHub poller convention (engine/github.js: `allCommentDates`). Without
1082
+ // this, ignored/CI comments are re-fetched and re-filtered every tick.
1064
1083
  const allHumanComments = [];
1065
1084
  const newHumanComments = [];
1085
+ const allCommentDates = [];
1066
1086
  const ignoredAuthors = (config.engine?.ignoredCommentAuthors || []).map(a => a.toLowerCase());
1067
1087
 
1068
1088
  for (const thread of threads) {
1069
- // Skip resolved/closed threads only process active (1) and pending (6)
1070
- if (thread.status && thread.status !== 'active' && thread.status !== 1 && thread.status !== 6) continue;
1089
+ // P-8c6a4f2d: collect comments from ALL threads regardless of status.
1090
+ // Previously we skipped resolved/closed threads here, which dropped
1091
+ // fresh human replies (humans do reply on resolved threads after a
1092
+ // teammate marks them done). Classification below — ignoredAuthors,
1093
+ // CI/coverage report regex, agent (Minions) detection — runs on every
1094
+ // collected comment, mirroring engine/github.js pollPrHumanComments
1095
+ // which has never filtered by thread state.
1071
1096
  for (const comment of (thread.comments || [])) {
1072
1097
  if (!comment.content || comment.commentType === 'system') continue;
1073
1098
  const content = comment.content;
1099
+ // Track date for cutoff BEFORE author/body filters so bot/CI/ignored
1100
+ // comments still advance the cutoff.
1101
+ if (comment.publishedDate) allCommentDates.push(comment.publishedDate);
1074
1102
  // Skip explicitly ignored authors and CI-report bodies, but do not ignore bot authors by default.
1075
1103
  const authorName = (comment.author?.displayName || '').toLowerCase();
1076
1104
  if (ignoredAuthors.some(a => authorName.includes(a))) continue;
@@ -1096,18 +1124,21 @@ async function pollPrHumanComments(config) {
1096
1124
  }
1097
1125
  }
1098
1126
 
1099
- // Update cutoff even if only agent comments are new
1100
- const allNewDates = allHumanComments.filter(c => (new Date(c.date).getTime() || 0) > cutoffMs).map(c => c.date);
1127
+ // Persist cutoff unconditionally for any new comment we observed — even
1128
+ // if every new comment was bot/CI/ignored/agent. Mirrors GitHub poller.
1129
+ const allNewDates = allCommentDates.filter(d => (new Date(d).getTime() || 0) > cutoffMs);
1101
1130
  if (allNewDates.length > 0 && newHumanComments.length === 0) {
1102
1131
  pr.humanFeedback = { ...(pr.humanFeedback || {}), lastProcessedCommentDate: allNewDates.sort().pop() };
1103
1132
  return true;
1104
1133
  }
1105
1134
  if (newHumanComments.length === 0) return false;
1106
1135
 
1107
- // Sort all comments chronologically and build full context for the fix agent
1136
+ // Sort all comments chronologically and build full context for the fix agent.
1137
+ // Cutoff advances to the latest of ALL new comment dates (so newer agent/CI
1138
+ // comments interleaved with the human feedback don't re-scan next tick).
1108
1139
  allHumanComments.sort((a, b) => a.date.localeCompare(b.date));
1109
1140
  newHumanComments.sort((a, b) => a.date.localeCompare(b.date));
1110
- const latestDate = newHumanComments[newHumanComments.length - 1].date;
1141
+ const latestDate = allNewDates.sort().pop() || newHumanComments[newHumanComments.length - 1].date;
1111
1142
 
1112
1143
  // Provide ALL comments as context — the agent needs full thread context to fix properly
1113
1144
  const feedbackContent = allHumanComments
@@ -1213,10 +1244,15 @@ async function reconcilePrs(config) {
1213
1244
  existing.prNumber = adoPr.pullRequestId;
1214
1245
  }
1215
1246
  if (existing && !existing.branch && branch) {
1216
- existing.branch = branch;
1217
- if (existing._branchResolutionError) delete existing._branchResolutionError;
1218
- if (existing._pendingReason === shared.PR_PENDING_REASON.MISSING_BRANCH) delete existing._pendingReason;
1219
- metadataUpdated++;
1247
+ // P-a7c4d2e8 (F3): validate ADO-derived branch ref before persistence.
1248
+ try {
1249
+ existing.branch = shared.validateGitRef(branch);
1250
+ if (existing._branchResolutionError) delete existing._branchResolutionError;
1251
+ if (existing._pendingReason === shared.PR_PENDING_REASON.MISSING_BRANCH) delete existing._pendingReason;
1252
+ metadataUpdated++;
1253
+ } catch (refErr) {
1254
+ log('warn', `ADO: invalid sourceRefName "${branch.slice(0, 64)}" for PR ${prId}: ${refErr.message}`);
1255
+ }
1220
1256
  }
1221
1257
  // PR already tracked — write link to pr-links.json if we can extract an ID
1222
1258
  if (confirmedItemId) {
@@ -1331,6 +1367,77 @@ async function checkLiveReviewStatus(pr, project) {
1331
1367
  }
1332
1368
  }
1333
1369
 
1370
+ /**
1371
+ * W-mp7b1g8q000fea45 — Reviewer vote reconciliation on verdict flip.
1372
+ *
1373
+ * Resets the *authenticated reviewer's own* prior negative vote (-5 wait-for-author
1374
+ * or -10 rejected) to +10 (approved) when an agent flips their verdict from
1375
+ * request_changes → approved on a re-review. Mirrors the existing target-branch
1376
+ * re-approval pattern at engine/ado.js:837-852 (PUT reviewers/{myId} {vote:10}).
1377
+ *
1378
+ * IMPORTANT: only operates on the authenticated reviewer's own vote. Other
1379
+ * reviewers' negative votes (humans, other minions on a different account) are
1380
+ * left untouched — caller (lifecycle.updatePrAfterReview) re-checks live status
1381
+ * after the reset and refuses to mark the PR locally approved if any negative
1382
+ * vote remains.
1383
+ *
1384
+ * Returns null on transport failure (no token, no project config, network error).
1385
+ * Otherwise returns:
1386
+ * {
1387
+ * attempted: boolean, // true if we found our reviewer entry and tried to PUT
1388
+ * changed: boolean, // true if the reviewer's vote was actually flipped from <0 → 10
1389
+ * fromVote: number|null,// the reviewer's prior vote (null if not a reviewer on the PR)
1390
+ * toVote: number|null,// the vote after PUT (10 if changed, prior value otherwise)
1391
+ * }
1392
+ */
1393
+ async function resetReviewerNegativeVote(pr, project) {
1394
+ try {
1395
+ repairAdoProjectConfig(project, 'ADO reviewer vote reset', pr ? [pr] : null);
1396
+ if (!project?.adoOrg || !project?.adoProject) return null;
1397
+ const token = await getAdoToken();
1398
+ if (!token) return null;
1399
+ const orgBase = shared.getAdoOrgBase(project);
1400
+ const prNum = shared.getPrNumber(pr);
1401
+ if (!prNum) return null;
1402
+ const adoRepositoryLookupKey = getAdoRepositoryLookupKey(project);
1403
+ if (!adoRepositoryLookupKey) {
1404
+ logMissingAdoRepository(project, 'ADO reviewer vote reset');
1405
+ return null;
1406
+ }
1407
+ const encodedRepoId = encodeURIComponent(adoRepositoryLookupKey);
1408
+ const repoBase = `${orgBase}/${project.adoProject}/_apis/git/repositories/${encodedRepoId}`;
1409
+ const prUrl = `${repoBase}/pullrequests/${prNum}?api-version=7.1`;
1410
+ // 4s timeout — same budget as checkLiveReviewStatus.
1411
+ const prData = await adoFetch(prUrl, token, { timeout: 4000 });
1412
+ if (!prData) return null;
1413
+ // Identify our authenticated reviewer entry.
1414
+ const identityData = await adoFetch(`${orgBase}/_apis/connectionData?api-version=7.1`, token, { timeout: 4000 }).catch(() => null);
1415
+ const myId = identityData?.authenticatedUser?.id;
1416
+ if (!myId) return null;
1417
+ const myReviewer = (prData.reviewers || []).find(r => String(r?.id || '').toLowerCase() === String(myId).toLowerCase());
1418
+ const myVote = myReviewer && Number.isFinite(myReviewer.vote) ? myReviewer.vote : null;
1419
+ if (myVote == null) {
1420
+ // Not a reviewer on this PR — nothing to reset. (Reviewing-without-vote
1421
+ // is the normal GH path; on ADO we generally do vote, so this is rare.)
1422
+ return { attempted: false, changed: false, fromVote: null, toVote: null };
1423
+ }
1424
+ if (myVote >= 0) {
1425
+ // Already neutral or positive — nothing to reset.
1426
+ return { attempted: false, changed: false, fromVote: myVote, toVote: myVote };
1427
+ }
1428
+ await adoFetch(`${repoBase}/pullrequests/${prNum}/reviewers/${myId}?api-version=7.1`, token, {
1429
+ method: 'PUT',
1430
+ body: JSON.stringify({ vote: 10 }),
1431
+ timeout: 4000,
1432
+ });
1433
+ log('info', `PR ${pr.id}: reset reviewer vote ${myVote} → 10 (verdict flipped to approved)`);
1434
+ return { attempted: true, changed: true, fromVote: myVote, toVote: 10 };
1435
+ } catch (e) {
1436
+ log('warn', `Reviewer vote reset for ${pr?.id || 'unknown PR'}: ${e.message}`);
1437
+ return null;
1438
+ }
1439
+ }
1440
+
1334
1441
  /**
1335
1442
  * Cheap pre-dispatch freshness check for build status and merge-conflict state.
1336
1443
  * Mirrors checkLiveReviewStatus — fetches PR data once, classifies builds for the
@@ -1767,6 +1874,7 @@ module.exports = {
1767
1874
  reconcileAbandonedPrs, // W-mp60tw0u000j3931 — one-shot startup re-probe of abandoned PRs
1768
1875
  checkLiveReviewStatus,
1769
1876
  checkLiveBuildAndConflict,
1877
+ resetReviewerNegativeVote, // W-mp7b1g8q000fea45 — reset reviewer's prior negative vote on verdict flip
1770
1878
  needsAdoPollRetry,
1771
1879
  isAdoAuthError, // exported for testing
1772
1880
  isAdoThrottled,
@@ -59,6 +59,11 @@ const IDLE_REAPER_MS = 10 * 60 * 1000;
59
59
  // Reaper sweep cadence. Not exposed as ENGINE_DEFAULTS to keep the pool
60
60
  // dependency-free; sub-task C/D can plumb a config knob if needed.
61
61
  const REAPER_INTERVAL_MS = 60 * 1000;
62
+ // Cap concurrent warm spawns triggered by tab/modal open. Without a cap, a
63
+ // user spamming new tabs would fan out N parallel cold spawns; with one, the
64
+ // excess waits in FIFO order while the first few warm. The actual user-driven
65
+ // getSession() path is NOT throttled — only the pre-warm path is.
66
+ const WARM_MAX_CONCURRENT = 3;
62
67
 
63
68
  // Test seam — every external side effect goes through `_internals` so
64
69
  // test/unit/cc-worker-pool.test.js can stub spawn/now/killImmediate.
@@ -556,6 +561,40 @@ function closeTab(tabId) {
556
561
  worker.close();
557
562
  }
558
563
 
564
+ let _warmInflight = 0;
565
+ const _warmQueue = [];
566
+
567
+ function _drainWarmQueue() {
568
+ while (_warmInflight < WARM_MAX_CONCURRENT && _warmQueue.length > 0) {
569
+ const next = _warmQueue.shift();
570
+ _warmInflight++;
571
+ next().finally(() => {
572
+ _warmInflight--;
573
+ _drainWarmQueue();
574
+ });
575
+ }
576
+ }
577
+
578
+ // Pre-warm a tab: spawn the process, run the initialize handshake, and create
579
+ // the ACP session (including MCP server init) without sending a prompt. The
580
+ // returned session is registered in `_tabs` exactly as if getSession had built
581
+ // it, so the next real call is a warm-reuse hit. Already-warm tabs are a
582
+ // no-op. Concurrent calls beyond WARM_MAX_CONCURRENT queue in FIFO order.
583
+ async function warmTab(args = {}) {
584
+ if (!args.tabId) throw new Error('cc-worker-pool.warmTab: tabId is required');
585
+ return new Promise((resolve, reject) => {
586
+ _warmQueue.push(async () => {
587
+ try {
588
+ const handle = await getSession(args);
589
+ resolve({ tabId: args.tabId, lifecycle: handle.lifecycle, sessionId: handle.sessionId });
590
+ } catch (err) {
591
+ reject(err);
592
+ }
593
+ });
594
+ _drainWarmQueue();
595
+ });
596
+ }
597
+
559
598
  // Cancel the currently in-flight prompt on this tab without killing the
560
599
  // worker. Sends ACP `session/cancel` so the remote daemon stops generating;
561
600
  // the warm process + initialized MCP servers + session state are preserved
@@ -598,6 +637,7 @@ function _reapIdleTabs() {
598
637
 
599
638
  module.exports = {
600
639
  getSession,
640
+ warmTab,
601
641
  closeTab,
602
642
  cancelInflight,
603
643
  shutdown,
@@ -608,4 +648,5 @@ module.exports = {
608
648
  _buildSessionNewParams,
609
649
  IDLE_REAPER_MS,
610
650
  REAPER_INTERVAL_MS,
651
+ WARM_MAX_CONCURRENT,
611
652
  };
package/engine/cleanup.js CHANGED
@@ -28,6 +28,16 @@ function engine() { if (!_engine) _engine = require('../engine'); return _engine
28
28
  let _dispatch = null;
29
29
  function dispatchModule() { if (!_dispatch) _dispatch = require('./dispatch'); return _dispatch; }
30
30
 
31
+ // Lazy require for worktree-pool — used to protect pool entries from age/cap
32
+ // sweeps (W-mp73ya3e000me6c5). Borrowed entries with live dispatches are
33
+ // always protected; idle entries survive the age sweep but stay eligible for
34
+ // the cap sweep so MAX_WORKTREES is still honored.
35
+ let _worktreePool = null;
36
+ function worktreePool() {
37
+ if (!_worktreePool) _worktreePool = require('./worktree-pool');
38
+ return _worktreePool;
39
+ }
40
+
31
41
  // ─── Helpers ────────────────────────────────────────────────────────────────
32
42
 
33
43
  /**
@@ -247,30 +257,6 @@ async function getWorktreeBranchAsync(wtPath) {
247
257
  }
248
258
  }
249
259
 
250
- let _orphanPidProcessNamesCache = null;
251
- function _orphanPidProcessNames() {
252
- if (_orphanPidProcessNamesCache) return _orphanPidProcessNamesCache;
253
- const names = new Set(['node']);
254
- try {
255
- for (const name of require('./runtimes').listRuntimes()) names.add(String(name).toLowerCase());
256
- // Copilot can run through the GitHub CLI fallback (`gh copilot`), so allow
257
- // gh only when the copilot runtime is registered.
258
- if (names.has('copilot')) names.add('gh');
259
- } catch {
260
- names.add('claude');
261
- }
262
- _orphanPidProcessNamesCache = names;
263
- return names;
264
- }
265
-
266
- function _processNameAllowedForOrphanKill(processText) {
267
- const firstLine = String(processText || '').trim().split(/\r?\n/).find(Boolean) || '';
268
- const imageName = path.basename(firstLine.trim().split(/\s+/)[0] || '').toLowerCase().replace(/\.exe$/, '');
269
- if (!imageName) return false;
270
- return _orphanPidProcessNames().has(imageName);
271
- }
272
-
273
-
274
260
  /**
275
261
  * Kill orphaned processes whose dispatch ID appears in the worktree dir name.
276
262
  * Only kills processes NOT in the active dispatch queue — never kills live agents.
@@ -300,19 +286,15 @@ function _killProcessInWorktree(dir, activeProcesses, activeIds) {
300
286
  if (isActive) continue; // still active — do not kill
301
287
  const pid = parseInt(fs.readFileSync(path.join(tmpDir, f), 'utf8').trim(), 10);
302
288
  if (pid > 0) {
303
- // Verify the PID still belongs to a Minions runtime process before killing
289
+ // Verify the PID still belongs to a Minions runtime process before killing.
290
+ // The shared helper inspects the PID's full command line for `claude` /
291
+ // `copilot` so a recycled PID running an unrelated process is skipped.
304
292
  try {
305
293
  if (process.platform === 'win32') {
306
- const taskInfo = exec(`tasklist /FI "PID eq ${pid}" /NH`, { encoding: 'utf8', timeout: 3000, windowsHide: true });
307
- const taskLower = taskInfo.toLowerCase();
308
- if (!_processNameAllowedForOrphanKill(taskLower)) continue;
294
+ if (!shared.isProcessCommandLineMatchingAgent(pid)) continue;
309
295
  exec(`taskkill /F /T /PID ${pid}`, { stdio: 'pipe', timeout: 5000, windowsHide: true });
310
296
  } else {
311
- // Verify the process name before killing (prevent recycled PID kill)
312
- try {
313
- const psOut = exec(`ps -p ${pid} -o comm=`, { encoding: 'utf8', timeout: 3000 }).trim();
314
- if (!_processNameAllowedForOrphanKill(psOut)) continue;
315
- } catch { continue; } // process dead or ps failed
297
+ if (!shared.isProcessCommandLineMatchingAgent(pid)) continue;
316
298
  try { process.kill(-pid, 'SIGKILL'); } catch { process.kill(pid, 'SIGKILL'); }
317
299
  }
318
300
  log('info', `Killed orphaned PID ${pid} (${f}) before worktree removal`);
@@ -477,6 +459,18 @@ async function runCleanup(config, verbose = false) {
477
459
  // isn't destroyed alongside the worktree.
478
460
  const phantomBranches = collectPhantomBranchesForProject(project);
479
461
 
462
+ // W-mp73ya3e000me6c5 — worktree pool protection sets. Borrowed entries
463
+ // with live dispatches are ALWAYS protected (no age, no cap). Idle
464
+ // entries survive the age sweep but stay eligible for the cap sweep so
465
+ // MAX_WORKTREES is still honored.
466
+ let _activeBorrowedPaths = new Set();
467
+ let _idlePaths = new Set();
468
+ try {
469
+ _activeBorrowedPaths = worktreePool().getActiveBorrowedPaths(activeDispatchIds);
470
+ _idlePaths = worktreePool().getIdlePaths();
471
+ } catch (e) { log('warn', `worktree-pool: cleanup lookup failed: ${e.message}`); }
472
+ const _normalizePoolPath = worktreePool()._normalizePath;
473
+
480
474
  // Probe `git branch --show-current` for every worktree in chunks of 5.
481
475
  // Sequential probing was the dominant cost in the cleanup phase
482
476
  // (5–15s tick stall every 10 ticks at 50+ worktrees), but unbounded
@@ -531,6 +525,19 @@ async function runCleanup(config, verbose = false) {
531
525
  }
532
526
  }
533
527
 
528
+ // W-mp73ya3e000me6c5 — pool-membership protection.
529
+ const _normWt = _normalizePoolPath ? _normalizePoolPath(wtPath) : wtPath;
530
+ const _isActiveBorrowed = _activeBorrowedPaths.has(_normWt);
531
+ const _isIdlePool = _idlePaths.has(_normWt);
532
+ if (_isActiveBorrowed) {
533
+ // Borrowed by a live dispatch — full protection (overrides any
534
+ // earlier shouldClean from merged-branch match too: the borrow
535
+ // owner is responsible, not stale PR data).
536
+ isProtected = true;
537
+ shouldClean = false;
538
+ if (verbose) console.log(` Skipping worktree ${dir}: pool-borrowed by active dispatch`);
539
+ }
540
+
534
541
  // Also clean worktrees older than 2 hours with no active dispatch referencing them
535
542
  let mtime = Date.now();
536
543
  if (!shouldClean) {
@@ -538,7 +545,7 @@ async function runCleanup(config, verbose = false) {
538
545
  const stat = fs.statSync(wtPath);
539
546
  mtime = stat.mtimeMs;
540
547
  const ageMs = Date.now() - mtime;
541
- if (ageMs > 7200000 && !isReferenced && !isProtected) { // 2 hours — P-e0b4f7a5: phantom-protected worktrees survive the age sweep too
548
+ if (ageMs > 7200000 && !isReferenced && !isProtected && !_isIdlePool) { // 2 hours — P-e0b4f7a5: phantom-protected worktrees survive the age sweep too. W-mp73ya3e000me6c5: idle pool entries also survive (TTL eviction handled by worktree-pool.pruneStale).
542
549
  shouldClean = true;
543
550
  }
544
551
  } catch { /* optional */ }
@@ -625,6 +632,9 @@ async function runCleanup(config, verbose = false) {
625
632
  _killProcessInWorktree(entry.dir, activeProcesses, activeDispatchIds);
626
633
  if (shared.removeWorktree(entry.wtPath, root, worktreeRoot)) {
627
634
  cleaned.worktrees++;
635
+ // W-mp73ya3e000me6c5 — keep the pool file in sync when an idle
636
+ // pool entry is reaped by the cap sweep (or any other reason).
637
+ try { worktreePool().evictEntry(entry.wtPath, 'cleanup-removed'); } catch (_e) { /* optional */ }
628
638
  const mergedPr = entry.matchedMergedBranch
629
639
  ? freshMergedPrByBranch.get(sanitizeBranch(normalizeLocalBranchName(entry.matchedMergedBranch)).toLowerCase())
630
640
  : null;
@@ -639,6 +649,26 @@ async function runCleanup(config, verbose = false) {
639
649
  } // end worktreeRoots loop
640
650
  }
641
651
 
652
+ // W-mp73ya3e000me6c5 — prune the worktree pool: drop entries whose path is
653
+ // gone, whose borrower is no longer in dispatch.active, or whose idle TTL
654
+ // has expired. Runs after the per-project worktree sweeps so we operate on
655
+ // the freshest dispatch state. The pool file is only mutated when entries
656
+ // change; default-off projects (no entries) are a no-op.
657
+ try {
658
+ const _dispatchForPrune = getDispatch();
659
+ const _activeIdsForPrune = new Set((_dispatchForPrune.active || []).map(d => d.id));
660
+ const _config = queries.getConfig();
661
+ const _idleTtlMs = _config?.engine?.worktreePoolIdleTtlMs ?? ENGINE_DEFAULTS.worktreePoolIdleTtlMs;
662
+ const _result = worktreePool().pruneStale({
663
+ activeDispatchIds: _activeIdsForPrune,
664
+ idleTtlMs: _idleTtlMs,
665
+ });
666
+ if (_result.evicted > 0) {
667
+ cleaned.worktreePoolEvicted = _result.evicted;
668
+ if (verbose) console.log(` worktree-pool: evicted ${_result.evicted} stale entr${_result.evicted === 1 ? 'y' : 'ies'}`);
669
+ }
670
+ } catch (e) { log('warn', 'worktree-pool prune: ' + e.message); }
671
+
642
672
  // 4. Kill zombie agent processes not tracked by the engine
643
673
  // List all node processes, check if any are running spawn-agent.js for our minions
644
674
  try {
@@ -683,6 +713,13 @@ async function runCleanup(config, verbose = false) {
683
713
  for (const p of allProcs) {
684
714
  if (p.cmd && /[\\/]dashboard\.js(?![\w.-])/i.test(p.cmd)) anchorPids.push(p.pid);
685
715
  }
716
+ // W-mp68q6ke0010de68 — opt-in keep_processes flag: PIDs declared in
717
+ // any agents/<id>/keep-pids.json file are anchors, so the reachability
718
+ // walk treats them (and their MCP grandchildren) as legitimate.
719
+ try {
720
+ const keepProcessSweep = require('./keep-process-sweep');
721
+ for (const pid of keepProcessSweep.getActiveAnchorPids()) anchorPids.push(pid);
722
+ } catch (e) { log('warn', `cleanup: keep-process anchor lookup failed: ${e.message}`); }
686
723
  const reach = shared.listProcessReachable(anchorPids, allProcs);
687
724
  // MCP server commandlines. Matches scoped (@modelcontextprotocol/*,
688
725
  // @<scope>/mcp[-*]) and flat (mcp-server-*, *-mcp-server,
package/engine/cli.js CHANGED
@@ -735,6 +735,43 @@ const commands = {
735
735
  }
736
736
  })();
737
737
 
738
+ // W-mp68q6ke0010de68 — Boot reconcile for keep_processes: clear out any
739
+ // stale agents/<id>/keep-pids.json files left over from a prior engine
740
+ // process (TTL expired while engine was down, or all declared PIDs are
741
+ // dead). Run once at boot before the first tick. Idempotent.
742
+ (function startupReconcileKeepProcesses() {
743
+ try {
744
+ const { sweepKeepProcesses } = require('./keep-process-sweep');
745
+ const stats = sweepKeepProcesses();
746
+ if (stats.scanned > 0 && (stats.expiredFiles || stats.deadFiles)) {
747
+ console.log(` Keep-processes boot reconcile: ${stats.expiredFiles} expired, ${stats.deadFiles} dead, ${stats.killedPids} killed`);
748
+ }
749
+ } catch (err) {
750
+ e.log('warn', `Keep-processes boot reconcile failed: ${err.message}`);
751
+ }
752
+ })();
753
+
754
+ // W-mp73ya3e000me6c5 — Boot reconcile for the worktree pool: drop entries
755
+ // whose path is gone (manual rm), whose borrower crashed before
756
+ // returning, or whose idle TTL expired while the engine was down. The
757
+ // tick-time prune in cleanup.js will catch anything that piles up later;
758
+ // boot prune just keeps re-attach sane.
759
+ (function startupReconcileWorktreePool() {
760
+ try {
761
+ const worktreePool = require('./worktree-pool');
762
+ const dispatchSnap = require('./queries').getDispatch();
763
+ const activeIds = new Set((dispatchSnap.active || []).map(d => d.id));
764
+ const cfg = require('./queries').getConfig();
765
+ const idleTtlMs = cfg?.engine?.worktreePoolIdleTtlMs ?? require('./shared').ENGINE_DEFAULTS.worktreePoolIdleTtlMs;
766
+ const result = worktreePool.pruneStale({ activeDispatchIds: activeIds, idleTtlMs });
767
+ if (result.evicted > 0) {
768
+ console.log(` Worktree-pool boot reconcile: evicted ${result.evicted} stale entr${result.evicted === 1 ? 'y' : 'ies'}`);
769
+ }
770
+ } catch (err) {
771
+ e.log('warn', `Worktree-pool boot reconcile failed: ${err.message}`);
772
+ }
773
+ })();
774
+
738
775
  // Initial tick
739
776
  e.tick();
740
777
 
@@ -9,7 +9,7 @@ const shared = require('./shared');
9
9
  const queries = require('./queries');
10
10
  const { setCooldownFailure } = require('./cooldown');
11
11
 
12
- const { safeJson, safeWrite, safeReadDir, mutateJsonFileLocked, mutateWorkItems,
12
+ const { safeJson, mutateJsonFileLocked, mutateWorkItems,
13
13
  mutatePullRequests, getProjects, projectWorkItemsPath, projectPrPath, log, ts, dateStamp,
14
14
  sidecarDispatchPrompt, deleteDispatchPromptSidecar,
15
15
  WI_STATUS, WORK_TYPE, DISPATCH_RESULT, ENGINE_DEFAULTS, AGENT_STATUS, FAILURE_CLASS, PR_STATUS } = shared;
@@ -340,7 +340,12 @@ function pruneStalePrDispatches(config = queries.getConfig()) {
340
340
  function isRetryableFailureReason(reason = '', failureClass = '') {
341
341
  // FAILURE_CLASS-based classification takes precedence when available
342
342
  if (failureClass) {
343
- const neverRetry = new Set([FAILURE_CLASS.CONFIG_ERROR, FAILURE_CLASS.PERMISSION_BLOCKED]);
343
+ const neverRetry = new Set([
344
+ FAILURE_CLASS.CONFIG_ERROR,
345
+ FAILURE_CLASS.PERMISSION_BLOCKED,
346
+ FAILURE_CLASS.WORKTREE_PREFLIGHT, // pre-spawn worktree validation — recompute will produce the same failure
347
+ FAILURE_CLASS.INVALID_KEEP_PROCESSES_WORKDIR, // W-mp6k7ywi000fa33c — keep-pids cwd is not a real git worktree; re-running won't fix the structural issue
348
+ ]);
344
349
  if (neverRetry.has(failureClass)) return false;
345
350
  }
346
351
  const r = String(reason || '').toLowerCase();
@@ -593,6 +598,8 @@ function completeDispatch(id, result = DISPATCH_RESULT.SUCCESS, reason = '', res
593
598
  [FAILURE_CLASS.OUT_OF_CONTEXT]: 'context window exhausted',
594
599
  [FAILURE_CLASS.CONFIG_ERROR]: 'configuration error',
595
600
  [FAILURE_CLASS.PERMISSION_BLOCKED]: 'permission or auth failure',
601
+ [FAILURE_CLASS.WORKTREE_PREFLIGHT]: 'worktree preflight rejected (nested in project root or rootDir collapsed to drive root)',
602
+ [FAILURE_CLASS.INVALID_KEEP_PROCESSES_WORKDIR]: 'keep_processes cwd is not a real git worktree (rerun in a `git worktree add` directory)',
596
603
  [FAILURE_CLASS.UNKNOWN]: 'unknown error',
597
604
  };
598
605
  const classLabel = failureClass ? (CLASS_LABELS[failureClass] || failureClass) : '';
@@ -667,14 +674,30 @@ function completeDispatch(id, result = DISPATCH_RESULT.SUCCESS, reason = '', res
667
674
  // ─── Inbox Alert ─────────────────────────────────────────────────────────────
668
675
 
669
676
  function writeInboxAlert(slug, content) {
677
+ const safeSlug = shared.safeSlugComponent(slug, 100);
678
+ const alertKey = `engine-alert-${safeSlug}-${dateStamp()}`;
679
+ const file = path.join(INBOX_DIR, `${alertKey}.md`);
680
+ // Atomic dedupe: openSync(_, 'wx') creates exclusively, fails with EEXIST if
681
+ // the file is already there. Closes the TOCTOU window that the previous
682
+ // safeReadDir+safeWrite pattern left open between two racing writers.
683
+ let fd;
670
684
  try {
671
- const safeSlug = shared.safeSlugComponent(slug, 100);
672
- const file = path.join(INBOX_DIR, `engine-alert-${safeSlug}-${dateStamp()}.md`);
673
- // Dedupe: don't write the same alert twice in the same day
674
- const existing = safeReadDir(INBOX_DIR).find(f => f.startsWith(`engine-alert-${safeSlug}-${dateStamp()}`));
675
- if (existing) return;
676
- safeWrite(file, content);
677
- } catch (e) { log('warn', 'write inbox alert: ' + e.message); }
685
+ if (!fs.existsSync(INBOX_DIR)) fs.mkdirSync(INBOX_DIR, { recursive: true });
686
+ fd = fs.openSync(file, 'wx');
687
+ } catch (e) {
688
+ if (e && e.code === 'EEXIST') {
689
+ log('info', `Alert ${alertKey} already exists, skipping duplicate`);
690
+ return false;
691
+ }
692
+ log('warn', 'write inbox alert: ' + e.message);
693
+ throw e;
694
+ }
695
+ try {
696
+ fs.writeSync(fd, typeof content === 'string' ? content : JSON.stringify(content, null, 2));
697
+ } finally {
698
+ try { fs.closeSync(fd); } catch { /* best-effort close */ }
699
+ }
700
+ return true;
678
701
  }
679
702
 
680
703
  // ─── Agent Worker Status ────────────────────────────────────────────────────
@@ -26,6 +26,12 @@
26
26
  // (knowledge/architecture/2026-05-13-ripley-meeting-conclusion-daily-architecture-bug-review-2.md,
27
27
  // PR-C, Option B — keep framework, document the registry).
28
28
  const FEATURES = {
29
+ 'slim-ux': {
30
+ description: 'Experimental redesigned dashboard surface (placeholder gate; Slim UX)',
31
+ default: false,
32
+ addedIn: '0.1.1757',
33
+ expires: '2026-11-01',
34
+ },
29
35
  // ccUseWorkerPool — sub-tasks B/C/D of W-mp2w003600196c51 (CC perf).
30
36
  // Routes Command Center / doc-chat through engine/cc-worker-pool.js
31
37
  // (persistent `copilot --acp` per CC tab) instead of spawning a fresh CLI