@yemi33/minions 0.1.1949 → 0.1.1951
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/js/command-center.js +9 -0
- package/dashboard/js/modal-qa.js +10 -0
- package/dashboard/js/refresh.js +4 -0
- package/dashboard/js/render-dispatch.js +25 -0
- package/dashboard/js/render-other.js +109 -2
- package/dashboard/js/settings.js +1 -1
- package/dashboard/layout.html +2 -2
- package/dashboard/pages/engine.html +6 -0
- package/dashboard/slim.html +1987 -0
- package/dashboard/styles.css +8 -0
- package/dashboard.js +450 -40
- package/docs/completion-reports.md +25 -0
- package/docs/design-state-storage.md +1 -1
- package/docs/slim-ux/architecture-suggestions.md +467 -0
- package/docs/slim-ux/concepts.md +824 -0
- package/engine/ado-mcp-wrapper.js +33 -7
- package/engine/ado.js +123 -15
- package/engine/cc-worker-pool.js +41 -0
- package/engine/cleanup.js +71 -34
- package/engine/cli.js +37 -0
- package/engine/dispatch.js +32 -9
- package/engine/features.js +6 -0
- package/engine/gh-token.js +137 -0
- package/engine/github.js +166 -29
- package/engine/issues.js +29 -0
- package/engine/keep-process-sweep.js +397 -0
- package/engine/lifecycle.js +150 -33
- package/engine/playbook.js +17 -0
- package/engine/queries.js +71 -0
- package/engine/recovery.js +6 -0
- package/engine/shared.js +481 -30
- package/engine/spawn-agent.js +44 -2
- package/engine/timeout.js +34 -11
- package/engine/worktree-pool.js +410 -0
- package/engine.js +643 -119
- package/package.json +6 -3
- package/playbooks/review.md +2 -0
- package/playbooks/shared-rules.md +3 -1
- package/prompts/cc-system.md +24 -0
- package/engine/copilot-models.json +0 -5
package/engine.js
CHANGED
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
|
|
24
24
|
const fs = require('fs');
|
|
25
25
|
const path = require('path');
|
|
26
|
+
const crypto = require('crypto');
|
|
26
27
|
const shared = require('./engine/shared');
|
|
27
28
|
const { exec, execAsync, execSilent, runFile, ts, ENGINE_DEFAULTS,
|
|
28
29
|
WI_STATUS, DONE_STATUSES, WORK_TYPE, PLAN_STATUS, PRD_ITEM_STATUS, PRD_MATERIALIZABLE, PR_STATUS, DISPATCH_RESULT, AGENT_STATUS,
|
|
@@ -103,7 +104,9 @@ const mutatePullRequests = shared.mutatePullRequests;
|
|
|
103
104
|
const withFileLock = shared.withFileLock;
|
|
104
105
|
|
|
105
106
|
const CHECKPOINT_CAP_FAIL_REASON = 'Exceeded 3 checkpoint-resumes; manual intervention required';
|
|
106
|
-
|
|
107
|
+
// W-mp73x32w000l143d: shared.READ_ONLY_ROOT_TASK_TYPES is the canonical set;
|
|
108
|
+
// re-aliased here for the existing call sites in this file.
|
|
109
|
+
const READ_ONLY_ROOT_TASK_TYPES = shared.READ_ONLY_ROOT_TASK_TYPES;
|
|
107
110
|
|
|
108
111
|
function isPipelineBranchName(branchName) {
|
|
109
112
|
return typeof branchName === 'string' && branchName.startsWith('pipeline/');
|
|
@@ -123,6 +126,10 @@ const steering = require('./engine/steering');
|
|
|
123
126
|
|
|
124
127
|
const { runCleanup } = require('./engine/cleanup');
|
|
125
128
|
|
|
129
|
+
// ─── Worktree pool (W-mp73ya3e000me6c5 — opt-in cross-branch warm reuse) ────
|
|
130
|
+
|
|
131
|
+
const worktreePool = require('./engine/worktree-pool');
|
|
132
|
+
|
|
126
133
|
// ─── State Readers (delegated to engine/queries.js) ─────────────────────────
|
|
127
134
|
|
|
128
135
|
const { getConfig, getControl, getDispatch, getNotes,
|
|
@@ -201,7 +208,7 @@ async function pruneAncestorDeps(deps, gitOpts, cwd) {
|
|
|
201
208
|
for (let j = 0; j < deps.length; j++) {
|
|
202
209
|
if (i === j || ancestorIndices.has(j)) continue;
|
|
203
210
|
try {
|
|
204
|
-
await
|
|
211
|
+
await shared.shellSafeGit(['merge-base', '--is-ancestor', `origin/${deps[i].branch}`, `origin/${deps[j].branch}`], { ...gitOpts, cwd });
|
|
205
212
|
// deps[i] is an ancestor of deps[j] — prune deps[i]
|
|
206
213
|
ancestorIndices.add(i);
|
|
207
214
|
break;
|
|
@@ -221,14 +228,14 @@ async function preflightMergeSimulation(deps, mainRef, gitOpts, cwd) {
|
|
|
221
228
|
for (let i = 0; i < deps.length; i++) {
|
|
222
229
|
const depBranch = deps[i].branch;
|
|
223
230
|
try {
|
|
224
|
-
const result = await
|
|
231
|
+
const result = await shared.shellSafeGit(['merge-tree', '--write-tree', currentRef, `origin/${depBranch}`], { ...gitOpts, cwd });
|
|
225
232
|
const treeSha = (typeof result === 'string' ? result : (result.stdout?.toString?.() || '')).trim().split('\n')[0];
|
|
226
233
|
if (!treeSha) return { ok: true }; // can't parse tree SHA, skip pre-flight
|
|
227
234
|
// Create temp commit to chain for next dep (skip for last dep — no chaining needed)
|
|
228
235
|
if (i < deps.length - 1) {
|
|
229
236
|
try {
|
|
230
|
-
const commitResult = await
|
|
231
|
-
|
|
237
|
+
const commitResult = await shared.shellSafeGit(
|
|
238
|
+
['commit-tree', treeSha, '-p', currentRef, '-p', `origin/${depBranch}`, '-m', 'preflight-merge'],
|
|
232
239
|
{ ...gitOpts, cwd }
|
|
233
240
|
);
|
|
234
241
|
const commitSha = (typeof commitResult === 'string' ? commitResult : (commitResult.stdout?.toString?.() || '')).trim();
|
|
@@ -501,8 +508,8 @@ async function syncReusedWorktree(rootDir, worktreePath, branchName, gitOpts = {
|
|
|
501
508
|
// even on slow links.
|
|
502
509
|
let onOrigin = true;
|
|
503
510
|
try {
|
|
504
|
-
await
|
|
505
|
-
|
|
511
|
+
await shared.shellSafeGit(
|
|
512
|
+
['ls-remote', '--exit-code', '--heads', 'origin', branchName],
|
|
506
513
|
{ ...gitOpts, cwd: rootDir, timeout: 5000 },
|
|
507
514
|
);
|
|
508
515
|
} catch (e) {
|
|
@@ -515,15 +522,15 @@ async function syncReusedWorktree(rootDir, worktreePath, branchName, gitOpts = {
|
|
|
515
522
|
log('info', `Branch ${branchName} not on origin yet — first push pending; skipping fetch/pull`);
|
|
516
523
|
return { skipped: true, reason: 'no-upstream' };
|
|
517
524
|
}
|
|
518
|
-
try { await
|
|
519
|
-
try { await
|
|
525
|
+
try { await shared.shellSafeGit(['fetch', 'origin', branchName], { ...gitOpts, cwd: rootDir }); } catch (e) { log('warn', 'git: ' + e.message); }
|
|
526
|
+
try { await shared.shellSafeGit(['pull', 'origin', branchName], { ...gitOpts, cwd: worktreePath }); } catch (e) { log('warn', 'git: ' + e.message); }
|
|
520
527
|
return { skipped: false };
|
|
521
528
|
}
|
|
522
529
|
|
|
523
530
|
// Find an existing worktree already checked out on a given branch
|
|
524
531
|
async function findExistingWorktree(repoDir, branchName) {
|
|
525
532
|
try {
|
|
526
|
-
const out = await
|
|
533
|
+
const out = await shared.shellSafeGit(['worktree', 'list', '--porcelain'], { cwd: repoDir, timeout: 10000 });
|
|
527
534
|
const found = shared.parseWorktreePorcelain(out).find(w => w.branch === branchName);
|
|
528
535
|
if (found && fs.existsSync(found.path)) return found.path;
|
|
529
536
|
} catch (e) { log('warn', 'git: ' + e.message); }
|
|
@@ -553,17 +560,24 @@ function removeStaleIndexLock(rootDir) {
|
|
|
553
560
|
} catch (e) { log('warn', 'git: ' + e.message); }
|
|
554
561
|
}
|
|
555
562
|
|
|
556
|
-
async function runWorktreeAdd(rootDir, worktreePath,
|
|
563
|
+
async function runWorktreeAdd(rootDir, worktreePath, addArgs, gitOpts, worktreeCreateRetries) {
|
|
564
|
+
// P-a7c4d2e8 (F3): argv-form `git worktree add` — `addArgs` is an array of
|
|
565
|
+
// additional arguments (typically a branch name, optionally preceded by
|
|
566
|
+
// `-b <newBranch>`). The worktreePath is passed via `--` to disambiguate
|
|
567
|
+
// from refs and prevent option-style argument injection.
|
|
568
|
+
if (!Array.isArray(addArgs)) {
|
|
569
|
+
throw new TypeError('runWorktreeAdd: addArgs must be an array');
|
|
570
|
+
}
|
|
557
571
|
let lastErr = null;
|
|
558
572
|
const retries = Math.max(0, Number(worktreeCreateRetries) || 0);
|
|
559
573
|
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
560
574
|
try {
|
|
561
575
|
if (attempt > 0) {
|
|
562
|
-
try { await
|
|
576
|
+
try { await shared.shellSafeGit(['worktree', 'prune'], { ...gitOpts, cwd: rootDir, timeout: 15000 }); } catch (e) { log('warn', 'git: ' + e.message); }
|
|
563
577
|
removeStaleIndexLock(rootDir);
|
|
564
578
|
log('warn', `Retrying git worktree add (attempt ${attempt + 1}/${retries + 1}) for ${path.basename(worktreePath)}`);
|
|
565
579
|
}
|
|
566
|
-
await
|
|
580
|
+
await shared.shellSafeGit(['worktree', 'add', worktreePath, ...addArgs], { ...gitOpts, cwd: rootDir });
|
|
567
581
|
return;
|
|
568
582
|
} catch (err) {
|
|
569
583
|
lastErr = err;
|
|
@@ -584,7 +598,7 @@ async function pruneStaleWorktreeForBranch(rootDir, branchName, gitOpts) {
|
|
|
584
598
|
if (!branchName) return 0;
|
|
585
599
|
let trees = [];
|
|
586
600
|
try {
|
|
587
|
-
const out = await
|
|
601
|
+
const out = await shared.shellSafeGit(['worktree', 'list', '--porcelain'], { ...gitOpts, cwd: rootDir, timeout: 10000 });
|
|
588
602
|
trees = shared.parseWorktreePorcelain(out);
|
|
589
603
|
} catch (e) {
|
|
590
604
|
log('warn', `pruneStaleWorktreeForBranch list: ${e.message?.split('\n')[0]}`);
|
|
@@ -595,14 +609,14 @@ async function pruneStaleWorktreeForBranch(rootDir, branchName, gitOpts) {
|
|
|
595
609
|
let removed = 0;
|
|
596
610
|
for (const w of stale) {
|
|
597
611
|
try {
|
|
598
|
-
await
|
|
612
|
+
await shared.shellSafeGit(['worktree', 'remove', '-f', '-f', w.path], { ...gitOpts, cwd: rootDir, timeout: 15000 });
|
|
599
613
|
removed++;
|
|
600
614
|
log('warn', `Removed stale worktree entry for ${branchName} at missing path ${w.path}${w.locked ? ' (was locked)' : ''}`);
|
|
601
615
|
} catch (e) {
|
|
602
616
|
log('warn', `git worktree remove -f -f failed for stale ${w.path}: ${e.message?.split('\n')[0]}`);
|
|
603
617
|
}
|
|
604
618
|
}
|
|
605
|
-
try { await
|
|
619
|
+
try { await shared.shellSafeGit(['worktree', 'prune'], { ...gitOpts, cwd: rootDir, timeout: 10000 }); } catch { /* best-effort */ }
|
|
606
620
|
return removed;
|
|
607
621
|
}
|
|
608
622
|
|
|
@@ -717,8 +731,8 @@ async function recoverPartialWorktree(rootDir, worktreePath, branchName, gitOpts
|
|
|
717
731
|
if (existingWt && fs.existsSync(existingWt)) return true;
|
|
718
732
|
if (!fs.existsSync(worktreePath)) return false;
|
|
719
733
|
try {
|
|
720
|
-
await
|
|
721
|
-
await
|
|
734
|
+
await shared.shellSafeGit(['-C', worktreePath, 'rev-parse', '--is-inside-work-tree'], { ...gitOpts, timeout: 10000 });
|
|
735
|
+
await shared.shellSafeGit(['-C', worktreePath, 'rev-parse', '--abbrev-ref', 'HEAD'], { ...gitOpts, timeout: 10000 });
|
|
722
736
|
log('warn', `Recovered partially-created worktree for ${branchName} at ${worktreePath}`);
|
|
723
737
|
return true;
|
|
724
738
|
} catch {
|
|
@@ -755,12 +769,73 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
755
769
|
}
|
|
756
770
|
const metaProjectFields = metaProject && typeof metaProject === 'object' ? metaProject : {};
|
|
757
771
|
const project = projectResolution.project ? { ...projectResolution.project, ...metaProjectFields } : {};
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
//
|
|
761
|
-
|
|
772
|
+
// W-mp73x32w000l143d: decouple agent cwd from worktree placement.
|
|
773
|
+
// resolveSpawnPaths returns:
|
|
774
|
+
// - read-only types: { cwd: <project dir or MINIONS_DIR>, worktreeRootDir: null }
|
|
775
|
+
// (no drive-root preflight — these tasks don't need a worktree)
|
|
776
|
+
// - code-mutating types: { cwd: null, worktreeRootDir: <project root> }
|
|
777
|
+
// (caller defaults cwd to worktreeRootDir; drive-root collapse throws
|
|
778
|
+
// WORKTREE_ROOTDIR_COLLAPSED_TO_DRIVE_ROOT — same fail-fast behavior as
|
|
779
|
+
// the legacy resolveProjectRootDir call this replaced).
|
|
780
|
+
// Pipeline branches force a worktree even for read-only types — handled
|
|
781
|
+
// immediately after the resolver call below.
|
|
782
|
+
const _preBranchName = meta?.branch ? sanitizeBranch(meta.branch) : null;
|
|
783
|
+
let cwd, worktreeRootDir;
|
|
784
|
+
try {
|
|
785
|
+
({ cwd, worktreeRootDir } = shared.resolveSpawnPaths(project, type, MINIONS_DIR));
|
|
786
|
+
} catch (rootErr) {
|
|
787
|
+
if (rootErr?.code === 'WORKTREE_ROOTDIR_COLLAPSED_TO_DRIVE_ROOT' || rootErr?.code === 'WORKTREE_ROOTDIR_MISSING_BASE') {
|
|
788
|
+
log('error', `spawnAgent: project rootDir resolution failed for ${id}: ${rootErr.message}`);
|
|
789
|
+
// Prompt files haven't been written yet at this point — no cleanup needed.
|
|
790
|
+
completeDispatch(
|
|
791
|
+
id,
|
|
792
|
+
DISPATCH_RESULT.ERROR,
|
|
793
|
+
rootErr.message.slice(0, 800),
|
|
794
|
+
'Pre-spawn worktree preflight rejected — see failure_class for the specific cause.',
|
|
795
|
+
{ failureClass: FAILURE_CLASS.WORKTREE_PREFLIGHT, agentRetryable: false },
|
|
796
|
+
);
|
|
797
|
+
cleanupTempAgent(agentId);
|
|
798
|
+
return null;
|
|
799
|
+
}
|
|
800
|
+
throw rootErr;
|
|
801
|
+
}
|
|
802
|
+
// Pipeline branches need a worktree even for read-only types (the worktree
|
|
803
|
+
// IS the pipeline's isolated workspace). When we detect a pipeline branch
|
|
804
|
+
// on a read-only type, recompute worktreeRootDir so the worktree creation
|
|
805
|
+
// block has a placement parent — and so the drive-root preflight still fires.
|
|
806
|
+
if (worktreeRootDir === null && isPipelineBranchName(_preBranchName)) {
|
|
807
|
+
try {
|
|
808
|
+
worktreeRootDir = shared.resolveProjectRootDir(project.localPath, MINIONS_DIR);
|
|
809
|
+
} catch (rootErr) {
|
|
810
|
+
if (rootErr?.code === 'WORKTREE_ROOTDIR_COLLAPSED_TO_DRIVE_ROOT' || rootErr?.code === 'WORKTREE_ROOTDIR_MISSING_BASE') {
|
|
811
|
+
log('error', `spawnAgent: pipeline-branch rootDir resolution failed for ${id}: ${rootErr.message}`);
|
|
812
|
+
completeDispatch(
|
|
813
|
+
id,
|
|
814
|
+
DISPATCH_RESULT.ERROR,
|
|
815
|
+
rootErr.message.slice(0, 800),
|
|
816
|
+
'Pre-spawn worktree preflight rejected — see failure_class for the specific cause.',
|
|
817
|
+
{ failureClass: FAILURE_CLASS.WORKTREE_PREFLIGHT, agentRetryable: false },
|
|
818
|
+
);
|
|
819
|
+
cleanupTempAgent(agentId);
|
|
820
|
+
return null;
|
|
821
|
+
}
|
|
822
|
+
throw rootErr;
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
// Legacy local alias: downstream git ops (worktree add, prune, fetch) and
|
|
826
|
+
// the `cwd === rootDir` safety warn at line ~1387 reference `rootDir`. For
|
|
827
|
+
// read-only rootless tasks (no worktree, no branch) this is null — the
|
|
828
|
+
// rootDir-referencing code paths only run inside `if (branchName)` /
|
|
829
|
+
// `if (worktreePath)` guards, so a null rootDir is safe there.
|
|
830
|
+
const rootDir = worktreeRootDir;
|
|
831
|
+
|
|
832
|
+
// Determine working directory. For code-mutating types the resolver
|
|
833
|
+
// returned cwd: null and we default to the worktree placement parent
|
|
834
|
+
// (matches legacy behavior — reassigned to the worktreePath after
|
|
835
|
+
// `git worktree add` succeeds at line ~1078).
|
|
836
|
+
if (cwd == null) cwd = rootDir;
|
|
762
837
|
let worktreePath = null;
|
|
763
|
-
let branchName =
|
|
838
|
+
let branchName = _preBranchName;
|
|
764
839
|
const worktreeCreateTimeout = Math.max(60000, Number(engineConfig.worktreeCreateTimeout) || ENGINE_DEFAULTS.worktreeCreateTimeout);
|
|
765
840
|
const worktreeCreateRetries = Math.max(0, Math.min(3, Number(engineConfig.worktreeCreateRetries) || ENGINE_DEFAULTS.worktreeCreateRetries));
|
|
766
841
|
const _gitOpts = { stdio: 'pipe', timeout: 30000, windowsHide: true, env: shared.gitEnv() };
|
|
@@ -778,14 +853,24 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
778
853
|
safeUnlink(completionReportPath);
|
|
779
854
|
} catch (e) { log('warn', `completion report setup: ${e.message}`); }
|
|
780
855
|
}
|
|
856
|
+
// P-d2a8f6c1 (agent trust boundary F8): per-spawn cryptographic nonce. The
|
|
857
|
+
// engine injects this into the agent env as MINIONS_COMPLETION_NONCE and
|
|
858
|
+
// requires the agent to echo it back in the completion JSON. On parse, the
|
|
859
|
+
// engine compares report.nonce against the in-memory value below; on
|
|
860
|
+
// mismatch the report is treated as forged (e.g. a prompt-injected agent
|
|
861
|
+
// writing into a sibling agent's completion path) and discarded. See
|
|
862
|
+
// engine/lifecycle.js:runPostCompletionHooks and docs/completion-reports.md.
|
|
863
|
+
const completionNonce = crypto.randomBytes(16).toString('hex');
|
|
781
864
|
const completionReportInstruction = completionReportPath ? [
|
|
782
865
|
'## Completion Report',
|
|
783
866
|
'',
|
|
784
867
|
`Before exiting, write a JSON completion report to: \`${completionReportPath}\``,
|
|
785
868
|
'',
|
|
786
|
-
'Use this shape: {"status":"success|partial|failed","summary":"...","verdict":"approved|changes-requested|null","pr":"PR URL or id if relevant","failure_class":"...","retryable":true|false,"needs_rerun":true|false,"artifacts":[{"type":"note|plan|prd|pr|file","path":"relative/path/or/url","title":"short label"}]}.',
|
|
869
|
+
'Use this shape: {"status":"success|partial|failed","summary":"...","verdict":"approved|changes-requested|null","pr":"PR URL or id if relevant","failure_class":"...","retryable":true|false,"needs_rerun":true|false,"nonce":"<value of MINIONS_COMPLETION_NONCE env var>","artifacts":[{"type":"note|plan|prd|pr|file","path":"relative/path/or/url","title":"short label"}]}.',
|
|
787
870
|
'This report is the primary completion signal; fenced completion blocks are only a fallback.',
|
|
788
871
|
'',
|
|
872
|
+
`**Trust nonce (REQUIRED):** copy the exact value of the \`MINIONS_COMPLETION_NONCE\` environment variable into the report's \`nonce\` field. The engine validates this on read; mismatched or missing nonces are treated as untrusted and the dispatch is failed with \`failure_class: 'completion-nonce-mismatch'\`. Do not invent, regenerate, or share this value across reports.`,
|
|
873
|
+
'',
|
|
789
874
|
].join('\n') : '';
|
|
790
875
|
const buildFullTaskPrompt = (promptBody) => {
|
|
791
876
|
const taskPromptWithSteering = pendingSteering.prompt
|
|
@@ -807,6 +892,22 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
807
892
|
const sysPromptPath = path.join(tmpDir, `sysprompt-${safeId}.md`);
|
|
808
893
|
safeWrite(sysPromptPath, systemPrompt);
|
|
809
894
|
const _cleanupPromptFiles = () => { safeUnlink(promptPath); safeUnlink(sysPromptPath); };
|
|
895
|
+
// Convert a WORKTREE_NESTED_IN_PROJECT throw into a fail-fast non-retryable
|
|
896
|
+
// dispatch failure (W-mp62taw2000ubcc3). The error's `.code` is set by
|
|
897
|
+
// shared.assertWorktreeOutsideProject so we don't have to parse the message.
|
|
898
|
+
// Returns truthy when the caller should `return null` from spawnAgent.
|
|
899
|
+
const _failWorktreePreflight = (assertErr) => {
|
|
900
|
+
log('error', `spawnAgent: worktree preflight rejected for ${id}: ${assertErr.message}`);
|
|
901
|
+
_cleanupPromptFiles();
|
|
902
|
+
completeDispatch(
|
|
903
|
+
id,
|
|
904
|
+
DISPATCH_RESULT.ERROR,
|
|
905
|
+
assertErr.message.slice(0, 800),
|
|
906
|
+
'Pre-spawn worktree preflight rejected — see failure_class for the specific cause. Recompute will produce the same rejection until the underlying configuration changes.',
|
|
907
|
+
{ failureClass: FAILURE_CLASS.WORKTREE_PREFLIGHT, agentRetryable: false },
|
|
908
|
+
);
|
|
909
|
+
cleanupTempAgent(agentId);
|
|
910
|
+
};
|
|
810
911
|
_phaseT.afterPrompt = Date.now();
|
|
811
912
|
|
|
812
913
|
if (branchName) {
|
|
@@ -819,8 +920,13 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
819
920
|
worktreePath = path.resolve(rootDir, engineConfig.worktreeRoot || '../worktrees', wtDirName);
|
|
820
921
|
// Refuse to spawn into a worktree path that's inside the project root —
|
|
821
922
|
// nested worktrees cause glob/grep to match both copies (mirror writes).
|
|
822
|
-
//
|
|
823
|
-
|
|
923
|
+
// WORKTREE_NESTED_IN_PROJECT is non-retryable: the recompute on the next
|
|
924
|
+
// tick will produce the same path. Fail fast (W-mp62taw2000ubcc3).
|
|
925
|
+
try { shared.assertWorktreeOutsideProject(worktreePath, rootDir); }
|
|
926
|
+
catch (assertErr) {
|
|
927
|
+
if (assertErr?.code === 'WORKTREE_NESTED_IN_PROJECT') { _failWorktreePreflight(assertErr); return null; }
|
|
928
|
+
throw assertErr;
|
|
929
|
+
}
|
|
824
930
|
|
|
825
931
|
// If branch is already checked out in an existing worktree, reuse it
|
|
826
932
|
_phaseT.findExistingStart = Date.now();
|
|
@@ -830,7 +936,11 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
830
936
|
// Same guard for reuse — a previously-created bad worktree must not
|
|
831
937
|
// be silently reused either; the cleanup sweep flags these so the
|
|
832
938
|
// operator can remove them.
|
|
833
|
-
shared.assertWorktreeOutsideProject(existingWt, rootDir);
|
|
939
|
+
try { shared.assertWorktreeOutsideProject(existingWt, rootDir); }
|
|
940
|
+
catch (assertErr) {
|
|
941
|
+
if (assertErr?.code === 'WORKTREE_NESTED_IN_PROJECT') { _failWorktreePreflight(assertErr); return null; }
|
|
942
|
+
throw assertErr;
|
|
943
|
+
}
|
|
834
944
|
worktreePath = existingWt;
|
|
835
945
|
log('info', `Reusing existing worktree for ${branchName}: ${existingWt}`);
|
|
836
946
|
// Probe origin first — locally-created branches that were never pushed
|
|
@@ -840,30 +950,84 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
840
950
|
await syncReusedWorktree(rootDir, existingWt, branchName, _gitOpts);
|
|
841
951
|
_phaseT.reuseSyncEnd = Date.now();
|
|
842
952
|
} else if (READ_ONLY_ROOT_TASK_TYPES.has(type) && !isPipelineBranchName(branchName)) {
|
|
843
|
-
// Read-only tasks — no worktree needed, run in
|
|
844
|
-
|
|
953
|
+
// Read-only tasks — no worktree needed, run in cwd from resolveSpawnPaths
|
|
954
|
+
// (project.localPath or MINIONS_DIR). W-mp73x32w000l143d.
|
|
955
|
+
log('info', `${type}: read-only task, no worktree needed — running in cwd ${cwd}`);
|
|
845
956
|
branchName = null;
|
|
846
957
|
worktreePath = null;
|
|
847
958
|
} else {
|
|
848
959
|
_phaseT.createWorktreeStart = Date.now();
|
|
960
|
+
|
|
961
|
+
// ── Pool borrow (W-mp73ya3e000me6c5) ────────────────────────────────
|
|
962
|
+
// Try to borrow a warm worktree from the per-project pool BEFORE the
|
|
963
|
+
// existing fresh-create path. Default-off (`worktreePoolSize: 0`); when
|
|
964
|
+
// enabled, this saves the cold install/build cost on heavy projects.
|
|
965
|
+
// Borrow only fires when the branch is brand-new (no upstream yet) so
|
|
966
|
+
// we don't disrupt fix-tasks targeting existing PR branches. Any git
|
|
967
|
+
// failure during the checkout evicts the entry and falls through to
|
|
968
|
+
// the unchanged fresh-create logic.
|
|
969
|
+
let borrowedFromPool = false;
|
|
970
|
+
const _isSharedForPool = meta?.branchStrategy === 'shared-branch' || meta?.useExistingBranch;
|
|
971
|
+
const _poolProject = project.name || 'default';
|
|
972
|
+
const _poolSize = worktreePool.getProjectPoolSize(_poolProject, config);
|
|
973
|
+
if (_poolSize > 0 && !_isSharedForPool && branchName) {
|
|
974
|
+
let _branchOnRemote = true;
|
|
975
|
+
try {
|
|
976
|
+
await shared.shellSafeGit(
|
|
977
|
+
['ls-remote', '--exit-code', '--heads', 'origin', branchName],
|
|
978
|
+
{ ..._gitOpts, cwd: rootDir, timeout: 5000 },
|
|
979
|
+
);
|
|
980
|
+
} catch (e) { if (e && e.code === 2) _branchOnRemote = false; }
|
|
981
|
+
if (!_branchOnRemote) {
|
|
982
|
+
const borrowed = worktreePool.tryBorrow(_poolProject, id);
|
|
983
|
+
if (borrowed && borrowed.path && fs.existsSync(borrowed.path)) {
|
|
984
|
+
try { shared.assertWorktreeOutsideProject(borrowed.path, rootDir); }
|
|
985
|
+
catch (assertErr) {
|
|
986
|
+
if (assertErr?.code === 'WORKTREE_NESTED_IN_PROJECT') {
|
|
987
|
+
worktreePool.evictEntry(borrowed.path, 'nested-in-project');
|
|
988
|
+
_failWorktreePreflight(assertErr); return null;
|
|
989
|
+
}
|
|
990
|
+
throw assertErr;
|
|
991
|
+
}
|
|
992
|
+
try {
|
|
993
|
+
const _mainRef = sanitizeBranch(shared.resolveMainBranch(rootDir, project.mainBranch));
|
|
994
|
+
await shared.shellSafeGit(['fetch', 'origin', _mainRef], { ..._gitOpts, cwd: rootDir, timeout: 30000 });
|
|
995
|
+
// -B force-creates/resets the branch so a stale local ref from a
|
|
996
|
+
// prior occupant does not block the checkout.
|
|
997
|
+
await shared.shellSafeGit(['checkout', '-B', branchName, `origin/${_mainRef}`], { ..._gitOpts, cwd: borrowed.path, timeout: 30000 });
|
|
998
|
+
worktreePath = borrowed.path;
|
|
999
|
+
borrowedFromPool = true;
|
|
1000
|
+
log('info', `worktree-pool: borrowed warm worktree for ${_poolProject}/${branchName}: ${borrowed.path}`);
|
|
1001
|
+
} catch (borrowErr) {
|
|
1002
|
+
log('warn', `worktree-pool: borrow checkout failed for ${branchName} at ${borrowed.path}: ${borrowErr.message} — evicting and falling through to fresh create`);
|
|
1003
|
+
worktreePool.evictEntry(borrowed.path, 'borrow-checkout-failed');
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
|
|
849
1009
|
try {
|
|
850
1010
|
if (!fs.existsSync(worktreePath)) {
|
|
851
1011
|
const isSharedBranch = meta?.branchStrategy === 'shared-branch' || meta?.useExistingBranch;
|
|
852
1012
|
// Prune stale worktree entries before creating (handles leftover entries from crashed runs)
|
|
853
|
-
try { await
|
|
1013
|
+
try { await shared.shellSafeGit(['worktree', 'prune'], { ..._gitOpts, cwd: rootDir, timeout: 10000 }); } catch (e) { log('warn', 'git: ' + e.message); }
|
|
854
1014
|
// Remove stale index.lock before creating worktree (Windows crashes can leave this behind)
|
|
855
1015
|
removeStaleIndexLock(rootDir);
|
|
856
1016
|
|
|
857
1017
|
if (isSharedBranch) {
|
|
858
1018
|
log('info', `Creating worktree for shared branch: ${worktreePath} on ${branchName}`);
|
|
859
|
-
try { await
|
|
1019
|
+
try { await shared.shellSafeGit(['fetch', 'origin', branchName], { ..._gitOpts, cwd: rootDir }); } catch (e) { log('warn', 'git: ' + e.message); }
|
|
860
1020
|
try {
|
|
861
|
-
await runWorktreeAdd(rootDir, worktreePath,
|
|
1021
|
+
await runWorktreeAdd(rootDir, worktreePath, [branchName], _worktreeGitOpts, worktreeCreateRetries);
|
|
862
1022
|
} catch (eShared) {
|
|
863
1023
|
if (eShared.message?.includes('already used by worktree') || eShared.message?.includes('already checked out')) {
|
|
864
1024
|
const existingWtPath = await findExistingWorktree(rootDir, branchName);
|
|
865
1025
|
if (existingWtPath && fs.existsSync(existingWtPath)) {
|
|
866
|
-
shared.assertWorktreeOutsideProject(existingWtPath, rootDir);
|
|
1026
|
+
try { shared.assertWorktreeOutsideProject(existingWtPath, rootDir); }
|
|
1027
|
+
catch (assertErr) {
|
|
1028
|
+
if (assertErr?.code === 'WORKTREE_NESTED_IN_PROJECT') { _failWorktreePreflight(assertErr); return null; }
|
|
1029
|
+
throw assertErr;
|
|
1030
|
+
}
|
|
867
1031
|
log('info', `Shared branch ${branchName} already checked out at ${existingWtPath} — reusing`);
|
|
868
1032
|
worktreePath = existingWtPath;
|
|
869
1033
|
} else {
|
|
@@ -873,42 +1037,42 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
873
1037
|
const pruned = await pruneStaleWorktreeForBranch(rootDir, branchName, _gitOpts);
|
|
874
1038
|
if (pruned > 0) {
|
|
875
1039
|
log('info', `Pruned ${pruned} stale worktree entry(ies) for shared branch ${branchName}; retrying worktree add`);
|
|
876
|
-
await runWorktreeAdd(rootDir, worktreePath,
|
|
1040
|
+
await runWorktreeAdd(rootDir, worktreePath, [branchName], _worktreeGitOpts, 0);
|
|
877
1041
|
} else { throw eShared; }
|
|
878
1042
|
}
|
|
879
1043
|
} else if (eShared.message?.includes('invalid reference') || eShared.message?.includes('not a valid ref')) {
|
|
880
1044
|
// Branch doesn't exist yet (first item in plan) — create it from main
|
|
881
1045
|
const mainRef = sanitizeBranch(shared.resolveMainBranch(rootDir, project.mainBranch));
|
|
882
1046
|
log('info', `Shared branch ${branchName} not found — creating from ${mainRef}`);
|
|
883
|
-
await runWorktreeAdd(rootDir, worktreePath,
|
|
1047
|
+
await runWorktreeAdd(rootDir, worktreePath, ['-b', branchName, mainRef], _worktreeGitOpts, worktreeCreateRetries);
|
|
884
1048
|
} else { throw eShared; }
|
|
885
1049
|
}
|
|
886
1050
|
} else {
|
|
887
1051
|
log('info', `Creating worktree: ${worktreePath} on branch ${branchName}`);
|
|
888
1052
|
const mainRef = sanitizeBranch(shared.resolveMainBranch(rootDir, project.mainBranch));
|
|
889
1053
|
try {
|
|
890
|
-
await runWorktreeAdd(rootDir, worktreePath,
|
|
1054
|
+
await runWorktreeAdd(rootDir, worktreePath, ['-b', branchName, mainRef], _worktreeGitOpts, worktreeCreateRetries);
|
|
891
1055
|
} catch (e1) {
|
|
892
1056
|
const branchExists = e1.message?.includes('already exists');
|
|
893
1057
|
log('warn', `Worktree -b failed for ${branchName}: ${e1.message?.split('\n')[0]}`);
|
|
894
1058
|
if (!branchExists) {
|
|
895
1059
|
// Transient error (lock, timeout) — prune, clean, and retry -b once more
|
|
896
1060
|
log('info', `Retrying -b create after prune for ${branchName}`);
|
|
897
|
-
try { await
|
|
1061
|
+
try { await shared.shellSafeGit(['worktree', 'prune'], { ..._gitOpts, cwd: rootDir, timeout: 15000 }); } catch { /* optional */ }
|
|
898
1062
|
removeStaleIndexLock(rootDir);
|
|
899
1063
|
// Clean up partial worktree directory from failed attempt
|
|
900
1064
|
try { if (fs.existsSync(worktreePath)) fs.rmSync(worktreePath, { recursive: true, force: true }); } catch { /* optional */ }
|
|
901
1065
|
try {
|
|
902
|
-
await runWorktreeAdd(rootDir, worktreePath,
|
|
1066
|
+
await runWorktreeAdd(rootDir, worktreePath, ['-b', branchName, mainRef], _worktreeGitOpts, 0);
|
|
903
1067
|
} catch (e1b) {
|
|
904
1068
|
log('error', `Worktree -b retry also failed for ${branchName}: ${e1b.message?.split('\n')[0]}`);
|
|
905
1069
|
throw e1b;
|
|
906
1070
|
}
|
|
907
1071
|
} else {
|
|
908
1072
|
// Branch already exists — try checkout without -b
|
|
909
|
-
try { await
|
|
1073
|
+
try { await shared.shellSafeGit(['fetch', 'origin', branchName], { ..._gitOpts, cwd: rootDir }); } catch (e) { log('warn', 'git: ' + e.message); }
|
|
910
1074
|
try {
|
|
911
|
-
await runWorktreeAdd(rootDir, worktreePath,
|
|
1075
|
+
await runWorktreeAdd(rootDir, worktreePath, [branchName], _worktreeGitOpts, worktreeCreateRetries);
|
|
912
1076
|
log('info', `Reusing existing branch: ${branchName}`);
|
|
913
1077
|
} catch (e2) {
|
|
914
1078
|
// "already checked out" or "already used by worktree" — find and reuse or recover
|
|
@@ -930,17 +1094,21 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
930
1094
|
log('warn', `Branch ${branchName} actively used by another agent at ${existingWtPath} — cannot create worktree`);
|
|
931
1095
|
throw e2;
|
|
932
1096
|
}
|
|
933
|
-
shared.assertWorktreeOutsideProject(existingWtPath, rootDir);
|
|
1097
|
+
try { shared.assertWorktreeOutsideProject(existingWtPath, rootDir); }
|
|
1098
|
+
catch (assertErr) {
|
|
1099
|
+
if (assertErr?.code === 'WORKTREE_NESTED_IN_PROJECT') { _failWorktreePreflight(assertErr); return null; }
|
|
1100
|
+
throw assertErr;
|
|
1101
|
+
}
|
|
934
1102
|
log('info', `Branch ${branchName} already checked out at ${existingWtPath} — reusing`);
|
|
935
1103
|
worktreePath = existingWtPath;
|
|
936
1104
|
} else if (existingWtPath && !fs.existsSync(existingWtPath)) {
|
|
937
1105
|
log('warn', `Branch ${branchName} tracked in missing dir ${existingWtPath} — pruning and recreating`);
|
|
938
|
-
try { await
|
|
939
|
-
await runWorktreeAdd(rootDir, worktreePath,
|
|
1106
|
+
try { await shared.shellSafeGit(['worktree', 'prune'], { ..._gitOpts, cwd: rootDir, timeout: 10000 }); } catch (e) { log('warn', 'git: ' + e.message); }
|
|
1107
|
+
await runWorktreeAdd(rootDir, worktreePath, [branchName], _worktreeGitOpts, worktreeCreateRetries);
|
|
940
1108
|
log('info', `Recovered worktree for ${branchName} after stale entry prune`);
|
|
941
1109
|
} else {
|
|
942
|
-
try { await
|
|
943
|
-
await runWorktreeAdd(rootDir, worktreePath,
|
|
1110
|
+
try { await shared.shellSafeGit(['worktree', 'prune'], { ..._gitOpts, cwd: rootDir, timeout: 10000 }); } catch (e) { log('warn', 'git: ' + e.message); }
|
|
1111
|
+
await runWorktreeAdd(rootDir, worktreePath, [branchName], _worktreeGitOpts, worktreeCreateRetries);
|
|
944
1112
|
}
|
|
945
1113
|
} else {
|
|
946
1114
|
throw e2;
|
|
@@ -951,7 +1119,7 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
951
1119
|
}
|
|
952
1120
|
} else if (meta?.branchStrategy === 'shared-branch') {
|
|
953
1121
|
log('info', `Pulling latest on shared branch ${branchName}`);
|
|
954
|
-
try { await
|
|
1122
|
+
try { await shared.shellSafeGit(['pull', 'origin', branchName], { ..._gitOpts, cwd: worktreePath }); } catch (e) { log('warn', 'git: ' + e.message); }
|
|
955
1123
|
}
|
|
956
1124
|
} catch (err) {
|
|
957
1125
|
if (await recoverPartialWorktree(rootDir, worktreePath, branchName, _gitOpts)) {
|
|
@@ -1027,7 +1195,7 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1027
1195
|
}
|
|
1028
1196
|
const fetchResults = await Promise.allSettled(
|
|
1029
1197
|
fetchable.map(({ branch: depBranch }) =>
|
|
1030
|
-
|
|
1198
|
+
shared.shellSafeGit(['fetch', 'origin', depBranch], { ..._gitOpts, cwd: rootDir }).then(() => depBranch)
|
|
1031
1199
|
)
|
|
1032
1200
|
);
|
|
1033
1201
|
const hasFetchFailures = fetchResults.some(r => r.status === 'rejected');
|
|
@@ -1047,10 +1215,10 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1047
1215
|
// If remote ref missing, check if branch exists locally and push it (#782)
|
|
1048
1216
|
if (errMsg.includes('couldn\'t find remote ref') || errMsg.includes('not found in upstream')) {
|
|
1049
1217
|
try {
|
|
1050
|
-
await
|
|
1218
|
+
await shared.shellSafeGit(['rev-parse', '--verify', `refs/heads/${failedBranch}`], { ..._gitOpts, cwd: rootDir });
|
|
1051
1219
|
// Branch exists locally — push it to origin
|
|
1052
1220
|
log('info', `Dependency ${failedBranch} exists locally but not on remote — pushing to origin`);
|
|
1053
|
-
await
|
|
1221
|
+
await shared.shellSafeGit(['push', 'origin', failedBranch], { ..._gitOpts, cwd: rootDir, timeout: 60000 });
|
|
1054
1222
|
log('info', `Successfully pushed local-only dependency branch ${failedBranch} to origin`);
|
|
1055
1223
|
recoveredBranches.add(failedBranch);
|
|
1056
1224
|
continue;
|
|
@@ -1089,7 +1257,7 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1089
1257
|
const ancestorChecks = await Promise.all(
|
|
1090
1258
|
prunedDeps.map(async ({ branch: depBranch }) => {
|
|
1091
1259
|
try {
|
|
1092
|
-
await
|
|
1260
|
+
await shared.shellSafeGit(['merge-base', '--is-ancestor', `origin/${depBranch}`, 'HEAD'], { ..._gitOpts, cwd: worktreePath });
|
|
1093
1261
|
return true;
|
|
1094
1262
|
} catch (_) { return false; }
|
|
1095
1263
|
})
|
|
@@ -1122,9 +1290,9 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1122
1290
|
let stashed = false;
|
|
1123
1291
|
if (!depMergeFailed && !skipDepMerge && prunedDeps.length > 0) {
|
|
1124
1292
|
try {
|
|
1125
|
-
const statusOut = (await
|
|
1293
|
+
const statusOut = (await shared.shellSafeGit(['status', '--porcelain'], { ..._gitOpts, cwd: worktreePath })).stdout.toString().trim();
|
|
1126
1294
|
if (statusOut) {
|
|
1127
|
-
await
|
|
1295
|
+
await shared.shellSafeGit(['stash', 'push', '--include-untracked', '-m', 'engine: stash before dep re-merge'], { ..._gitOpts, cwd: worktreePath });
|
|
1128
1296
|
stashed = true;
|
|
1129
1297
|
log('info', `Stashed uncommitted changes in ${branchName} before dep merge`);
|
|
1130
1298
|
}
|
|
@@ -1135,27 +1303,27 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1135
1303
|
if (!depMergeFailed && !skipDepMerge) {
|
|
1136
1304
|
for (const { branch: depBranch, prId } of prunedDeps) {
|
|
1137
1305
|
try {
|
|
1138
|
-
await
|
|
1306
|
+
await shared.shellSafeGit(['merge', `origin/${depBranch}`, '--no-edit'], { ..._gitOpts, cwd: worktreePath });
|
|
1139
1307
|
log('info', `Merged dependency branch ${depBranch} (${prId}) into worktree ${branchName}`);
|
|
1140
1308
|
} catch (mergeErr) {
|
|
1141
1309
|
// Merge failed — possibly due to diverged history from a force-pushed (rebased) dep branch.
|
|
1142
1310
|
// Abort partial merge, reset worktree to clean main base, and re-merge all deps from scratch.
|
|
1143
1311
|
log('warn', `Merge of ${depBranch} into ${branchName} failed: ${mergeErr.message} — attempting reset and re-merge of all deps`);
|
|
1144
|
-
try { await
|
|
1312
|
+
try { await shared.shellSafeGit(['merge', '--abort'], { ..._gitOpts, cwd: worktreePath }); } catch (_) { /* no merge in progress */ }
|
|
1145
1313
|
const mainRef = sanitizeBranch(shared.resolveMainBranch(rootDir, project.mainBranch));
|
|
1146
1314
|
try {
|
|
1147
|
-
await
|
|
1315
|
+
await shared.shellSafeGit(['reset', '--hard', `origin/${mainRef}`], { ..._gitOpts, cwd: worktreePath });
|
|
1148
1316
|
log('info', `Reset worktree ${branchName} to origin/${mainRef} for clean dep re-merge`);
|
|
1149
1317
|
// Re-merge ALL pruned dep branches from scratch on clean base
|
|
1150
1318
|
for (const { branch: reBranch, prId: rePrId } of prunedDeps) {
|
|
1151
|
-
await
|
|
1319
|
+
await shared.shellSafeGit(['merge', `origin/${reBranch}`, '--no-edit'], { ..._gitOpts, cwd: worktreePath });
|
|
1152
1320
|
log('info', `Re-merged dependency branch ${reBranch} (${rePrId}) into worktree ${branchName}`);
|
|
1153
1321
|
}
|
|
1154
1322
|
log('info', `Successfully re-merged all ${prunedDeps.length} dep branches after reset for ${branchName}`);
|
|
1155
1323
|
} catch (resetErr) {
|
|
1156
1324
|
const errOutput = (resetErr.message || '') + '\n' + (resetErr.stdout?.toString?.() || '') + '\n' + (resetErr.stderr?.toString?.() || '');
|
|
1157
1325
|
log('warn', `Failed to reset and re-merge deps for ${branchName}: ${resetErr.message}`);
|
|
1158
|
-
try { await
|
|
1326
|
+
try { await shared.shellSafeGit(['merge', '--abort'], { ..._gitOpts, cwd: worktreePath }); } catch (_) { /* no merge in progress */ }
|
|
1159
1327
|
// Post-mortem: incremental simulation to identify which dep caused the conflict (#958)
|
|
1160
1328
|
// Uses same chained merge-tree approach as pre-flight to catch inter-dep conflicts
|
|
1161
1329
|
const pmMainRef = sanitizeBranch(shared.resolveMainBranch(rootDir, project.mainBranch));
|
|
@@ -1172,8 +1340,8 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1172
1340
|
for (const { branch: reBranch2 } of prunedDeps) {
|
|
1173
1341
|
try {
|
|
1174
1342
|
const mainRef2 = sanitizeBranch(shared.resolveMainBranch(rootDir, project.mainBranch));
|
|
1175
|
-
const mergeBase = (await
|
|
1176
|
-
const treeResult = await
|
|
1343
|
+
const mergeBase = (await shared.shellSafeGit(['merge-base', `origin/${mainRef2}`, `origin/${reBranch2}`], { ..._gitOpts, cwd: rootDir })).stdout.toString().trim();
|
|
1344
|
+
const treeResult = await shared.shellSafeGit(['merge-tree', mergeBase, `origin/${mainRef2}`, `origin/${reBranch2}`], { ..._gitOpts, cwd: rootDir });
|
|
1177
1345
|
const treeOutput = treeResult.stdout?.toString?.() || '';
|
|
1178
1346
|
if (treeOutput.includes('<<<<<<<') || treeOutput.includes('changed in both')) {
|
|
1179
1347
|
depConflictBranch = reBranch2;
|
|
@@ -1196,7 +1364,7 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1196
1364
|
// Restore stashed changes after dep merge (#973)
|
|
1197
1365
|
if (stashed) {
|
|
1198
1366
|
try {
|
|
1199
|
-
await
|
|
1367
|
+
await shared.shellSafeGit(['stash', 'pop'], { ..._gitOpts, cwd: worktreePath });
|
|
1200
1368
|
log('info', `Restored stashed changes in ${branchName} after dep merge`);
|
|
1201
1369
|
} catch (popErr) {
|
|
1202
1370
|
log('warn', `git stash pop failed in ${branchName}: ${popErr.message} — stash preserved for agent`);
|
|
@@ -1407,7 +1575,16 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1407
1575
|
// Spawn the claude process
|
|
1408
1576
|
const childEnv = shared.cleanChildEnv();
|
|
1409
1577
|
if (completionReportPath) childEnv.MINIONS_COMPLETION_REPORT = completionReportPath;
|
|
1578
|
+
if (completionNonce) childEnv.MINIONS_COMPLETION_NONCE = completionNonce;
|
|
1410
1579
|
childEnv.MINIONS_REPO_HOST = getRepoHost(project);
|
|
1580
|
+
// W-mp6k7ywi000fa33c — per-WI override that bypasses the requireGitWorkdir
|
|
1581
|
+
// check on agents/<id>/keep-pids.json. Plumbed via env so spawn-agent's
|
|
1582
|
+
// close handler (which runs in the subprocess and computes the reap plan)
|
|
1583
|
+
// can honor it without re-reading the WI meta.
|
|
1584
|
+
if (meta?.item?.meta?.keep_processes_skip_workdir_check
|
|
1585
|
+
|| dispatchItem.meta?.keep_processes_skip_workdir_check) {
|
|
1586
|
+
childEnv.MINIONS_KEEP_PROCESSES_SKIP_WORKDIR_CHECK = '1';
|
|
1587
|
+
}
|
|
1411
1588
|
|
|
1412
1589
|
if (getRepoHost(project) === 'ado') {
|
|
1413
1590
|
// Inject cached ADO token so ADO agents skip re-authentication (#998).
|
|
@@ -1445,18 +1622,30 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1445
1622
|
}
|
|
1446
1623
|
} catch { /* rotation is best-effort — overwrite still happens below */ }
|
|
1447
1624
|
|
|
1448
|
-
//
|
|
1449
|
-
//
|
|
1450
|
-
//
|
|
1625
|
+
// Setup-and-spawn block guarded by partial-setup cleanup (P-f4d2e8a1).
|
|
1626
|
+
// If anything between log-stamping and activeProcesses.set throws, the
|
|
1627
|
+
// catch below tears down every artifact we managed to create so the work
|
|
1628
|
+
// item isn't left "stuck active" with an orphan PID file, log fd, or
|
|
1629
|
+
// realActivityMap entry. The dispatch loop's existing null-return /
|
|
1630
|
+
// throw handler (engine.js ~5184) already re-queues the work item;
|
|
1631
|
+
// here we just guarantee in-memory + on-disk state is consistent.
|
|
1632
|
+
let pidFilePath, logFd, registeredInActivityMap, registeredInActiveProcesses, proc;
|
|
1633
|
+
// The PID file at this path is written asynchronously by the spawned
|
|
1634
|
+
// child (engine/spawn-agent.js:423) once it starts. We compute the path
|
|
1635
|
+
// upfront so the catch block can unlink it whether spawn fails before
|
|
1636
|
+
// the child wrote it (no-op) or after (real cleanup).
|
|
1637
|
+
pidFilePath = promptPath.replace(/prompt-/, 'pid-').replace(/\.md$/, '.pid');
|
|
1451
1638
|
try {
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1639
|
+
// Stamp the log synchronously before spawn. If the synchronous write throws,
|
|
1640
|
+
// we still attempt to spawn (log-file stamp failure must not block the agent),
|
|
1641
|
+
// but we note it so the diagnostic trail isn't silent either.
|
|
1642
|
+
try {
|
|
1643
|
+
safeWrite(liveOutputPath, `# Live output for ${agentId} — ${id}\n# Started: ${startedAt}\n# Task: ${dispatchItem.task}\n[${new Date().toISOString()}] spawn: agent=${agentId} item=${id}\n\n`);
|
|
1644
|
+
} catch (stubErr) {
|
|
1645
|
+
log('warn', `Failed to stamp live-output stub for ${agentId} (${id}): ${stubErr.message}`);
|
|
1646
|
+
}
|
|
1456
1647
|
|
|
1457
|
-
|
|
1458
|
-
_phaseT.spawnCallStart = Date.now();
|
|
1459
|
-
try {
|
|
1648
|
+
_phaseT.spawnCallStart = Date.now();
|
|
1460
1649
|
// `detached: true` puts the agent in its own process group (POSIX) / job
|
|
1461
1650
|
// object (Windows), so when the engine dies — gracefully via stop, abruptly
|
|
1462
1651
|
// via taskkill, or because of a crash — the agent keeps running and can be
|
|
@@ -1470,41 +1659,65 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1470
1659
|
detached: true,
|
|
1471
1660
|
});
|
|
1472
1661
|
_phaseT.spawnCallEnd = Date.now();
|
|
1662
|
+
|
|
1663
|
+
// Seed realActivityMap and stamp PID immediately — BEFORE any handlers (#W-mo25loq8kjer).
|
|
1664
|
+
// Why NOW, not later in the function:
|
|
1665
|
+
// 1. Error-handler race. The `proc.on('error', ...)` handler below calls realActivityMap.delete(id)
|
|
1666
|
+
// on synchronous spawn failures. Seeding before registering handlers ensures delete sees a value
|
|
1667
|
+
// to clear rather than leaving an absent-then-absent no-op that downstream code must guard.
|
|
1668
|
+
// 2. Orphan diagnostics. The PID line gives timeout.js a deterministic way to tell "spawn died
|
|
1669
|
+
// before first write" (stub-only log) from "process started and is hung" (stub + pid line).
|
|
1670
|
+
realActivityMap.set(id, Date.now());
|
|
1671
|
+
registeredInActivityMap = true;
|
|
1672
|
+
try {
|
|
1673
|
+
fs.appendFileSync(liveOutputPath, `[${new Date().toISOString()}] pid: ${proc.pid ?? 'unknown'}\n`);
|
|
1674
|
+
} catch { /* log stamp is best-effort — don't block spawn on fs failure */ }
|
|
1675
|
+
|
|
1676
|
+
const initialProcInfo = {
|
|
1677
|
+
proc,
|
|
1678
|
+
agentId,
|
|
1679
|
+
startedAt,
|
|
1680
|
+
runtimeName,
|
|
1681
|
+
sessionId: cachedSessionId,
|
|
1682
|
+
_completionNonce: completionNonce,
|
|
1683
|
+
...(cachedSessionId ? {
|
|
1684
|
+
_runtimeResumeAt: Date.now(),
|
|
1685
|
+
_runtimeResumeAwaitingFirstOutput: true,
|
|
1686
|
+
} : {}),
|
|
1687
|
+
_pendingSteeringFiles: pendingSteering.entries,
|
|
1688
|
+
};
|
|
1689
|
+
activeProcesses.set(id, initialProcInfo);
|
|
1690
|
+
registeredInActiveProcesses = true;
|
|
1473
1691
|
} catch (spawnErr) {
|
|
1474
|
-
//
|
|
1475
|
-
//
|
|
1476
|
-
//
|
|
1477
|
-
|
|
1692
|
+
// Partial-setup cleanup (P-f4d2e8a1): tear down every artifact in the
|
|
1693
|
+
// reverse order it was created. Each step is conditional + best-effort
|
|
1694
|
+
// so cleanup itself never throws on top of the original error.
|
|
1695
|
+
if (proc === undefined) {
|
|
1696
|
+
// Synchronous spawn failure — record it to the (already-stamped) log so the
|
|
1697
|
+
// orphan detector's "logSize > stub-only" check can tell this apart from a
|
|
1698
|
+
// hung process. Preserves the diagnostic the prior inline catch wrote.
|
|
1699
|
+
try { fs.appendFileSync(liveOutputPath, `[${new Date().toISOString()}] spawn-failed: ${spawnErr.message}\n[process-exit] spawn-failed\n`); } catch { /* cleanup-only best effort */ }
|
|
1700
|
+
} else if (proc && typeof proc.kill === 'function') {
|
|
1701
|
+
// spawn() returned a handle but a later registration step threw —
|
|
1702
|
+
// kill the orphan child so it doesn't run unmonitored.
|
|
1703
|
+
try { proc.kill('SIGKILL'); } catch { /* already exited */ }
|
|
1704
|
+
}
|
|
1705
|
+
if (registeredInActiveProcesses) {
|
|
1706
|
+
try { activeProcesses.delete(id); } catch { /* map.delete never throws but be defensive */ }
|
|
1707
|
+
}
|
|
1708
|
+
if (registeredInActivityMap) {
|
|
1709
|
+
try { realActivityMap.delete(id); } catch { /* defensive */ }
|
|
1710
|
+
}
|
|
1711
|
+
if (logFd !== undefined) {
|
|
1712
|
+
try { fs.closeSync(logFd); } catch { /* fd may already be closed */ }
|
|
1713
|
+
}
|
|
1714
|
+
if (pidFilePath) {
|
|
1715
|
+
try { safeUnlink(pidFilePath); } catch { /* may not exist yet */ }
|
|
1716
|
+
}
|
|
1478
1717
|
cleanupTempAgent(agentId);
|
|
1479
1718
|
throw spawnErr;
|
|
1480
1719
|
}
|
|
1481
1720
|
|
|
1482
|
-
// Seed realActivityMap and stamp PID immediately — BEFORE any handlers (#W-mo25loq8kjer).
|
|
1483
|
-
// Why NOW, not later in the function:
|
|
1484
|
-
// 1. Error-handler race. The `proc.on('error', ...)` handler below calls realActivityMap.delete(id)
|
|
1485
|
-
// on synchronous spawn failures. Seeding before registering handlers ensures delete sees a value
|
|
1486
|
-
// to clear rather than leaving an absent-then-absent no-op that downstream code must guard.
|
|
1487
|
-
// 2. Orphan diagnostics. The PID line gives timeout.js a deterministic way to tell "spawn died
|
|
1488
|
-
// before first write" (stub-only log) from "process started and is hung" (stub + pid line).
|
|
1489
|
-
realActivityMap.set(id, Date.now());
|
|
1490
|
-
try {
|
|
1491
|
-
fs.appendFileSync(liveOutputPath, `[${new Date().toISOString()}] pid: ${proc.pid ?? 'unknown'}\n`);
|
|
1492
|
-
} catch { /* log stamp is best-effort — don't block spawn on fs failure */ }
|
|
1493
|
-
|
|
1494
|
-
const initialProcInfo = {
|
|
1495
|
-
proc,
|
|
1496
|
-
agentId,
|
|
1497
|
-
startedAt,
|
|
1498
|
-
runtimeName,
|
|
1499
|
-
sessionId: cachedSessionId,
|
|
1500
|
-
...(cachedSessionId ? {
|
|
1501
|
-
_runtimeResumeAt: Date.now(),
|
|
1502
|
-
_runtimeResumeAwaitingFirstOutput: true,
|
|
1503
|
-
} : {}),
|
|
1504
|
-
_pendingSteeringFiles: pendingSteering.entries,
|
|
1505
|
-
};
|
|
1506
|
-
activeProcesses.set(id, initialProcInfo);
|
|
1507
|
-
|
|
1508
1721
|
// Emit per-phase timing for spawn-latency analysis. One structured line per
|
|
1509
1722
|
// dispatch; grep `[spawn-timing]` to aggregate. Null phases didn't run for
|
|
1510
1723
|
// this dispatch (e.g. stale_head only runs for fix tasks; dep_* only when
|
|
@@ -1677,6 +1890,10 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1677
1890
|
const spawnScript = path.join(ENGINE_DIR, 'spawn-agent.js');
|
|
1678
1891
|
const childEnv = shared.cleanChildEnv();
|
|
1679
1892
|
if (completionReportPath) childEnv.MINIONS_COMPLETION_REPORT = completionReportPath;
|
|
1893
|
+
// P-d2a8f6c1: preserve the per-dispatch nonce across steering resume so
|
|
1894
|
+
// the agent's completion JSON still validates after the resumed turn.
|
|
1895
|
+
// The dispatch id is the unit of trust, not the spawn instance.
|
|
1896
|
+
if (completionNonce) childEnv.MINIONS_COMPLETION_NONCE = completionNonce;
|
|
1680
1897
|
childEnv.MINIONS_LIVE_OUTPUT_PATH = liveOutputPath;
|
|
1681
1898
|
childEnv.MINIONS_REPO_HOST = getRepoHost(project);
|
|
1682
1899
|
if (getRepoHost(project) === 'ado') {
|
|
@@ -1686,6 +1903,11 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1686
1903
|
if (adoToken) childEnv.MINIONS_ADO_TOKEN = adoToken;
|
|
1687
1904
|
} catch { /* non-fatal */ }
|
|
1688
1905
|
}
|
|
1906
|
+
// W-mp6k7ywi000fa33c — propagate keep_processes workdir-check override across steering resume.
|
|
1907
|
+
if (dispatchItem.meta?.item?.meta?.keep_processes_skip_workdir_check
|
|
1908
|
+
|| dispatchItem.meta?.keep_processes_skip_workdir_check) {
|
|
1909
|
+
childEnv.MINIONS_KEEP_PROCESSES_SKIP_WORKDIR_CHECK = '1';
|
|
1910
|
+
}
|
|
1689
1911
|
let resumeProc;
|
|
1690
1912
|
try {
|
|
1691
1913
|
// detached so the resumed steering session also survives engine death (matches initial spawn)
|
|
@@ -1716,6 +1938,8 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1716
1938
|
startedAt: procInfo.startedAt,
|
|
1717
1939
|
runtimeName,
|
|
1718
1940
|
sessionId: steerSessionId,
|
|
1941
|
+
// P-d2a8f6c1: keep the per-dispatch nonce alive across the steering resume.
|
|
1942
|
+
_completionNonce: procInfo._completionNonce || completionNonce,
|
|
1719
1943
|
_runtimeResumeAt: Date.now(),
|
|
1720
1944
|
_runtimeResumeAwaitingFirstOutput: true,
|
|
1721
1945
|
_pendingSteeringFiles: mergePendingSteeringEntries(
|
|
@@ -1851,30 +2075,127 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1851
2075
|
}
|
|
1852
2076
|
|
|
1853
2077
|
// Parse output and run all post-completion hooks
|
|
1854
|
-
|
|
2078
|
+
// P-d2a8f6c1: hand the per-spawn nonce to lifecycle so it can validate the
|
|
2079
|
+
// report's `nonce` field. Read it from activeProcesses BEFORE any later
|
|
2080
|
+
// delete clears the entry.
|
|
2081
|
+
const expectedNonce = activeProcesses.get(id)?._completionNonce || null;
|
|
2082
|
+
const completionNonceRequired = engineConfig.completionNonceRequired ?? ENGINE_DEFAULTS.completionNonceRequired;
|
|
2083
|
+
const { resultSummary, autoRecovered, completionContractFailure, structuredCompletion, agentReportedFailure, agentRetryable, nonceMismatch } = await runPostCompletionHooks(dispatchItem, agentId, code, stdout, config, { expectedNonce, completionNonceRequired });
|
|
1855
2084
|
const retryableDecision = typeof agentRetryable === 'boolean' ? agentRetryable : failureInfo.retryable;
|
|
1856
2085
|
|
|
2086
|
+
// W-mp6k7ywi000fa33c — keep_processes acceptance gate. When the work
|
|
2087
|
+
// item carried `meta.keep_processes: true` and produced a keep-pids.json
|
|
2088
|
+
// sidecar whose `cwd` does not look like a real git worktree (default
|
|
2089
|
+
// `requireGitWorkdir: true` in ENGINE_DEFAULTS.keepProcesses), reject
|
|
2090
|
+
// the file and force the dispatch to fail with a dedicated failure
|
|
2091
|
+
// class. spawn-agent's close handler has already reaped the kept PIDs
|
|
2092
|
+
// (the same validation runs there via computeReapPlan), so the engine's
|
|
2093
|
+
// job here is just (a) flip the dispatch outcome to ERROR, (b) emit an
|
|
2094
|
+
// inbox alert that the responsible agent will see on its next dispatch,
|
|
2095
|
+
// and (c) delete the now-rejected sidecar so it does not accumulate.
|
|
2096
|
+
//
|
|
2097
|
+
// Per-WI override: `meta.keep_processes_skip_workdir_check: true` skips
|
|
2098
|
+
// the gate entirely (legitimate non-git keep_processes use cases).
|
|
2099
|
+
let keepProcessesWorkdirFailure = null;
|
|
2100
|
+
{
|
|
2101
|
+
const _wiMeta = dispatchItem.meta?.item?.meta || {};
|
|
2102
|
+
const _kpEnabled = !!_wiMeta.keep_processes
|
|
2103
|
+
|| !!dispatchItem.meta?.keep_processes;
|
|
2104
|
+
const _kpSkipWorkdir = !!_wiMeta.keep_processes_skip_workdir_check
|
|
2105
|
+
|| !!dispatchItem.meta?.keep_processes_skip_workdir_check;
|
|
2106
|
+
if (_kpEnabled && !_kpSkipWorkdir && ENGINE_DEFAULTS.keepProcesses?.requireGitWorkdir !== false) {
|
|
2107
|
+
try {
|
|
2108
|
+
const keepProcessSweep = require('./engine/keep-process-sweep');
|
|
2109
|
+
const evalResult = keepProcessSweep.evaluateKeepPidsAcceptance(agentId, { requireGitWorkdir: true });
|
|
2110
|
+
if (evalResult.exists && evalResult.isWorkdirRejection) {
|
|
2111
|
+
keepProcessesWorkdirFailure = {
|
|
2112
|
+
reason: evalResult.reason,
|
|
2113
|
+
cwd: evalResult.recordedCwd || '',
|
|
2114
|
+
filePath: evalResult.filePath,
|
|
2115
|
+
};
|
|
2116
|
+
// Delete the sidecar so it does not anchor stale PIDs on later
|
|
2117
|
+
// sweeps and does not show up as "malformed" forever.
|
|
2118
|
+
try { fs.unlinkSync(evalResult.filePath); } catch (_e) { /* gone or busy */ }
|
|
2119
|
+
log('warn', `keep-processes acceptance: REJECTED ${agentId} (${id}) — ${evalResult.reason}; PIDs reaped by spawn-agent, sidecar deleted`);
|
|
2120
|
+
// Emit inbox alert so the agent sees this on its next turn.
|
|
2121
|
+
try {
|
|
2122
|
+
const wiId = dispatchItem.meta?.item?.id || '';
|
|
2123
|
+
const slug = `keep-processes-workdir-${agentId}`;
|
|
2124
|
+
const alertBody = [
|
|
2125
|
+
`# keep_processes setup REJECTED for ${agentId}`,
|
|
2126
|
+
'',
|
|
2127
|
+
`Your kept-PIDs setup at \`${evalResult.recordedCwd || '<unknown>'}\` failed validation: ${evalResult.reason}.`,
|
|
2128
|
+
'The directory is not a git worktree. PIDs were NOT protected and will be reaped.',
|
|
2129
|
+
'',
|
|
2130
|
+
wiId ? `Work item: ${wiId}` : '',
|
|
2131
|
+
`Agent: ${agentId}`,
|
|
2132
|
+
`Dispatch: ${id}`,
|
|
2133
|
+
'',
|
|
2134
|
+
'Why this matters: a keep_processes work item that runs in a non-git directory',
|
|
2135
|
+
'is almost always a partial copy of a repo (a selective `cp -r`). The Minions',
|
|
2136
|
+
'cleanup sweep cannot reason about such directories safely; later sweeps may',
|
|
2137
|
+
'rmSync subdirs treating them as separate worktrees. Re-run the work item',
|
|
2138
|
+
'inside a real `git worktree add` directory, or set',
|
|
2139
|
+
'`meta.keep_processes_skip_workdir_check: true` on the work item if you',
|
|
2140
|
+
'genuinely intend to keep PIDs alive in a non-git directory.',
|
|
2141
|
+
'',
|
|
2142
|
+
].join('\n');
|
|
2143
|
+
writeInboxAlert(slug, alertBody);
|
|
2144
|
+
} catch (alertErr) {
|
|
2145
|
+
log('warn', `keep-processes acceptance: failed to emit inbox alert for ${agentId}: ${alertErr.message}`);
|
|
2146
|
+
}
|
|
2147
|
+
} else if (evalResult.exists && !evalResult.accepted) {
|
|
2148
|
+
// Non-workdir validation failure (oversize pids, bad TTL, etc.) —
|
|
2149
|
+
// already handled by validateKeepPidsRecord; just log for audit.
|
|
2150
|
+
log('warn', `keep-processes acceptance: ${agentId} (${id}) sidecar rejected — ${evalResult.reason} (not a workdir failure)`);
|
|
2151
|
+
}
|
|
2152
|
+
} catch (e) {
|
|
2153
|
+
log('warn', `keep-processes acceptance check failed for ${agentId} (${id}): ${e.message}`);
|
|
2154
|
+
}
|
|
2155
|
+
}
|
|
2156
|
+
}
|
|
2157
|
+
|
|
1857
2158
|
// Move from active to completed in dispatch (single source of truth for agent status)
|
|
1858
2159
|
// autoRecovered: agent failed after creating PRs — treat as success
|
|
1859
2160
|
const hardContractFail = completionContractFailure?.severity === 'hard'
|
|
1860
2161
|
|| completionContractFailure?.nonTerminal === true;
|
|
1861
|
-
|
|
2162
|
+
// P-d2a8f6c1: nonce mismatch (or missing+required) is a security failure —
|
|
2163
|
+
// override effectiveResult to ERROR and surface the dedicated failure_class.
|
|
2164
|
+
// We mark the work item as failed (processWorkItemFailure NOT suppressed)
|
|
2165
|
+
// so the dispatch is not silently retried by the auto-recovery path.
|
|
2166
|
+
const nonceFail = nonceMismatch && nonceMismatch.severity === 'hard';
|
|
2167
|
+
// W-mp6k7ywi000fa33c — keep_processes workdir rejection is a hard
|
|
2168
|
+
// failure: the agent's claim that "everything was set up correctly" is
|
|
2169
|
+
// structurally false. Force ERROR so the dispatch is not silently treated
|
|
2170
|
+
// as success even when exit code is 0.
|
|
2171
|
+
const keepProcessesWorkdirFail = !!keepProcessesWorkdirFailure;
|
|
2172
|
+
const effectiveResult = (hardContractFail || nonceFail || keepProcessesWorkdirFail)
|
|
2173
|
+
? DISPATCH_RESULT.ERROR
|
|
2174
|
+
: (((code === 0 && !agentReportedFailure) || autoRecovered) ? DISPATCH_RESULT.SUCCESS : DISPATCH_RESULT.ERROR);
|
|
1862
2175
|
const finalCompletionReportPath = structuredCompletion?._path || dispatchItem.meta?.completionReportPath || shared.dispatchCompletionReportPath(id);
|
|
1863
2176
|
const completionOpts = {
|
|
1864
2177
|
...(finalCompletionReportPath ? { completionReportPath: finalCompletionReportPath } : {}),
|
|
1865
2178
|
...(structuredCompletion ? { structuredCompletion } : {}),
|
|
1866
2179
|
};
|
|
1867
|
-
const completeOpts =
|
|
1868
|
-
? { ...completionOpts,
|
|
1869
|
-
: (
|
|
1870
|
-
...completionOpts,
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
2180
|
+
const completeOpts = keepProcessesWorkdirFail
|
|
2181
|
+
? { ...completionOpts, failureClass: FAILURE_CLASS.INVALID_KEEP_PROCESSES_WORKDIR, agentRetryable: false }
|
|
2182
|
+
: (nonceFail
|
|
2183
|
+
? { ...completionOpts, failureClass: nonceMismatch.failureClass, agentRetryable: false }
|
|
2184
|
+
: (hardContractFail
|
|
2185
|
+
? { ...completionOpts, processWorkItemFailure: false }
|
|
2186
|
+
: (effectiveResult === DISPATCH_RESULT.ERROR ? {
|
|
2187
|
+
...completionOpts,
|
|
2188
|
+
...(failureClass ? { failureClass } : {}),
|
|
2189
|
+
...(typeof retryableDecision === 'boolean' ? { agentRetryable: retryableDecision } : {}),
|
|
2190
|
+
...(structuredCompletion?.failure_class ? { failureClass: structuredCompletion.failure_class } : {}),
|
|
2191
|
+
} : completionOpts)));
|
|
1875
2192
|
// Extract last 5 non-empty stderr lines as error context when exit code is non-zero
|
|
1876
2193
|
let errorReason = '';
|
|
1877
|
-
if (
|
|
2194
|
+
if (keepProcessesWorkdirFail) {
|
|
2195
|
+
errorReason = `invalid_keep_processes_workdir: ${keepProcessesWorkdirFailure.reason} (cwd=${keepProcessesWorkdirFailure.cwd || '<unknown>'})`.slice(0, 300);
|
|
2196
|
+
} else if (nonceFail) {
|
|
2197
|
+
errorReason = nonceMismatch.reason || 'completion nonce mismatch';
|
|
2198
|
+
} else if (hardContractFail) {
|
|
1878
2199
|
errorReason = completionContractFailure.reason || 'PR attachment contract failed';
|
|
1879
2200
|
} else if (agentReportedFailure) {
|
|
1880
2201
|
errorReason = structuredCompletion
|
|
@@ -1894,8 +2215,76 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1894
2215
|
const hint = diagnoseEmptyOutput(failureClass, code, elapsedMs);
|
|
1895
2216
|
if (hint) errorReason = errorReason ? `${hint} ${errorReason}` : hint;
|
|
1896
2217
|
}
|
|
2218
|
+
|
|
2219
|
+
// ── Pool return (W-mp73ya3e000me6c5) ────────────────────────────────────
|
|
2220
|
+
// Park a healthy worktree as IDLE in the per-project pool so the next
|
|
2221
|
+
// dispatch can reuse it without paying the cold install/build cost. Run
|
|
2222
|
+
// BEFORE completeDispatch so the dispatch is still in dispatch.active
|
|
2223
|
+
// during the git ops — otherwise pruneStale() racing on another tick
|
|
2224
|
+
// could see borrowedBy as orphaned and evict the entry mid-return.
|
|
2225
|
+
// Skipped when keep_processes PIDs are still alive: the worktree may be
|
|
2226
|
+
// the cwd of a left-running dev server or watcher.
|
|
2227
|
+
if (effectiveResult === DISPATCH_RESULT.SUCCESS && worktreePath && fs.existsSync(worktreePath)) {
|
|
2228
|
+
let _keepPidsAlive = false;
|
|
2229
|
+
try {
|
|
2230
|
+
const _ks = require('./engine/keep-process-sweep');
|
|
2231
|
+
const _anchorRes = _ks.getActiveAnchorPidsForAgent(agentId);
|
|
2232
|
+
if (_anchorRes && _anchorRes.pids && _anchorRes.pids.size > 0) _keepPidsAlive = true;
|
|
2233
|
+
} catch (_e) { /* keep-process-sweep import optional — fall through */ }
|
|
2234
|
+
|
|
2235
|
+
const _projForReturn = project?.name || 'default';
|
|
2236
|
+
const _poolSizeReturn = worktreePool.getProjectPoolSize(_projForReturn, config);
|
|
2237
|
+
if (!_keepPidsAlive && _poolSizeReturn > 0) {
|
|
2238
|
+
try {
|
|
2239
|
+
const _mainRefRet = sanitizeBranch(shared.resolveMainBranch(rootDir, project?.mainBranch));
|
|
2240
|
+
await shared.shellSafeGit(['reset', '--hard', 'HEAD'], { ..._gitOpts, cwd: worktreePath, timeout: 30000 });
|
|
2241
|
+
// -fd preserves gitignored files (node_modules, .vite, .next caches) — that's the whole point.
|
|
2242
|
+
await shared.shellSafeGit(['clean', '-fd'], { ..._gitOpts, cwd: worktreePath, timeout: 30000 });
|
|
2243
|
+
await shared.shellSafeGit(['fetch', 'origin', _mainRefRet], { ..._gitOpts, cwd: rootDir, timeout: 30000 });
|
|
2244
|
+
// Detach at origin/<main> — local main is typically checked out in
|
|
2245
|
+
// the project root and git refuses two checkouts of the same branch.
|
|
2246
|
+
await shared.shellSafeGit(['checkout', '--detach', `origin/${_mainRefRet}`], { ..._gitOpts, cwd: worktreePath, timeout: 30000 });
|
|
2247
|
+
const _outcome = worktreePool.returnToPool(_projForReturn, worktreePath, {
|
|
2248
|
+
poolSize: _poolSizeReturn,
|
|
2249
|
+
branch: branchName || '',
|
|
2250
|
+
});
|
|
2251
|
+
log('info', `worktree-pool: return outcome=${_outcome} for ${_projForReturn} at ${worktreePath}`);
|
|
2252
|
+
if (_outcome === 'rejected') {
|
|
2253
|
+
// Capacity rejected — drop any stale entry so cleanup can reap normally.
|
|
2254
|
+
worktreePool.evictEntry(worktreePath, 'capacity-rejected');
|
|
2255
|
+
}
|
|
2256
|
+
} catch (returnErr) {
|
|
2257
|
+
log('warn', `worktree-pool: return failed for ${worktreePath}: ${returnErr.message} — evicting from pool`);
|
|
2258
|
+
worktreePool.evictEntry(worktreePath, 'return-git-failed');
|
|
2259
|
+
}
|
|
2260
|
+
} else if (_keepPidsAlive) {
|
|
2261
|
+
// Skip the pool — the worktree is in use by left-running processes.
|
|
2262
|
+
// Make sure no stale entry lingers (defensive).
|
|
2263
|
+
worktreePool.evictEntry(worktreePath, 'keep-processes-alive');
|
|
2264
|
+
}
|
|
2265
|
+
}
|
|
2266
|
+
|
|
1897
2267
|
completeDispatch(id, effectiveResult, errorReason, resultSummary, completeOpts);
|
|
1898
2268
|
|
|
2269
|
+
// W-mp6k7ywi000fa33c — surface the workdir-rejection on the WI so the
|
|
2270
|
+
// dashboard pending-reason area shows the missing structure instead of
|
|
2271
|
+
// a bare failure_class label. _pendingReason on a failed item is treated
|
|
2272
|
+
// by the dashboard as "additional context" rather than a queue gate.
|
|
2273
|
+
if (keepProcessesWorkdirFailure && dispatchItem.meta?.item?.id) {
|
|
2274
|
+
try {
|
|
2275
|
+
const wiPath = resolveWorkItemPath(dispatchItem.meta);
|
|
2276
|
+
if (wiPath) {
|
|
2277
|
+
mutateJsonFileLocked(wiPath, data => {
|
|
2278
|
+
if (!Array.isArray(data)) return data;
|
|
2279
|
+
const wi = data.find(i => i.id === dispatchItem.meta.item.id);
|
|
2280
|
+
if (!wi) return data;
|
|
2281
|
+
wi._pendingReason = `invalid_keep_processes_workdir: ${keepProcessesWorkdirFailure.reason}`.slice(0, 500);
|
|
2282
|
+
return data;
|
|
2283
|
+
});
|
|
2284
|
+
}
|
|
2285
|
+
} catch (e) { log('warn', `keep-processes acceptance: failed to set _pendingReason: ${e.message}`); }
|
|
2286
|
+
}
|
|
2287
|
+
|
|
1899
2288
|
// Cleanup temp files (including PID file now that dispatch is complete)
|
|
1900
2289
|
try { fs.unlinkSync(sysPromptPath); } catch { /* cleanup */ }
|
|
1901
2290
|
try { fs.unlinkSync(promptPath); } catch { /* cleanup */ }
|
|
@@ -1974,6 +2363,7 @@ async function spawnAgent(dispatchItem, config) {
|
|
|
1974
2363
|
startedAt,
|
|
1975
2364
|
runtimeName,
|
|
1976
2365
|
sessionId: existingProcInfo.sessionId || cachedSessionId,
|
|
2366
|
+
_completionNonce: existingProcInfo._completionNonce || completionNonce,
|
|
1977
2367
|
_pendingSteeringFiles: mergePendingSteeringEntries(existingProcInfo._pendingSteeringFiles, pendingSteering.entries),
|
|
1978
2368
|
});
|
|
1979
2369
|
|
|
@@ -2288,6 +2678,51 @@ function safePrdFilenameForProject(projectName, suffix) {
|
|
|
2288
2678
|
return path.basename(resolved);
|
|
2289
2679
|
}
|
|
2290
2680
|
|
|
2681
|
+
/**
|
|
2682
|
+
* Atomically reserve a unique PRD filename in `prdDir` (P-9b7e5d3c).
|
|
2683
|
+
*
|
|
2684
|
+
* Replaces the racy "fs.existsSync(...) then write" pattern: two parallel
|
|
2685
|
+
* materializations on the same plan slug previously both observed a 'free'
|
|
2686
|
+
* filename and silently overwrote each other. This helper uses
|
|
2687
|
+
* `fs.openSync(path, 'wx')` so the OS rejects duplicate creates atomically.
|
|
2688
|
+
*
|
|
2689
|
+
* Attempt schedule (100 attempts total):
|
|
2690
|
+
* attempt = 0 → baseFileName (e.g. "slug.json")
|
|
2691
|
+
* attempt = 1 → "${stem}-1${ext}" (e.g. "slug-1.json")
|
|
2692
|
+
* attempt = 2 → "${stem}-2${ext}"
|
|
2693
|
+
* ...
|
|
2694
|
+
* attempt = 99 → "${stem}-99${ext}"
|
|
2695
|
+
*
|
|
2696
|
+
* Behavior:
|
|
2697
|
+
* - On EEXIST: increment attempt and retry.
|
|
2698
|
+
* - Any other openSync error propagates immediately.
|
|
2699
|
+
* - After 100 EEXIST failures: throw
|
|
2700
|
+
* 'Could not reserve unique PRD filename after 100 attempts'.
|
|
2701
|
+
* - On success: write `content` to the reserved file, close fd, return basename.
|
|
2702
|
+
*/
|
|
2703
|
+
function reservePrdFilename(prdDir, baseFileName, content = '') {
|
|
2704
|
+
const ext = path.extname(baseFileName);
|
|
2705
|
+
const stem = ext ? baseFileName.slice(0, -ext.length) : baseFileName;
|
|
2706
|
+
for (let attempt = 0; attempt < 100; attempt++) {
|
|
2707
|
+
const candidateName = attempt === 0 ? baseFileName : `${stem}-${attempt}${ext}`;
|
|
2708
|
+
const candidatePath = path.join(prdDir, candidateName);
|
|
2709
|
+
let fd;
|
|
2710
|
+
try {
|
|
2711
|
+
fd = fs.openSync(candidatePath, 'wx');
|
|
2712
|
+
} catch (err) {
|
|
2713
|
+
if (err && err.code === 'EEXIST') continue;
|
|
2714
|
+
throw err;
|
|
2715
|
+
}
|
|
2716
|
+
try {
|
|
2717
|
+
if (content) fs.writeSync(fd, content);
|
|
2718
|
+
} finally {
|
|
2719
|
+
fs.closeSync(fd);
|
|
2720
|
+
}
|
|
2721
|
+
return candidateName;
|
|
2722
|
+
}
|
|
2723
|
+
throw new Error('Could not reserve unique PRD filename after 100 attempts');
|
|
2724
|
+
}
|
|
2725
|
+
|
|
2291
2726
|
function materializePlansAsWorkItems(config) {
|
|
2292
2727
|
if (!fs.existsSync(PRD_DIR)) { try { fs.mkdirSync(PRD_DIR, { recursive: true }); } catch (e) { log('warn', 'create PRD directory: ' + e.message); } }
|
|
2293
2728
|
const writePrdLocked = (fileName, data) => {
|
|
@@ -2409,9 +2844,11 @@ function materializePlansAsWorkItems(config) {
|
|
|
2409
2844
|
const parsed = JSON.parse(stripped);
|
|
2410
2845
|
if (parsed.missing_features) {
|
|
2411
2846
|
const jsonName = mf.replace(/\.md$/, '.json');
|
|
2412
|
-
|
|
2847
|
+
// Atomic open ('wx' + retry) so two parallel migrations on the
|
|
2848
|
+
// same slug don't both overwrite a single PRD (P-9b7e5d3c).
|
|
2849
|
+
const reserved = reservePrdFilename(checkDir, jsonName, JSON.stringify(parsed, null, 2));
|
|
2413
2850
|
try { fs.unlinkSync(path.join(checkDir, mf)); } catch { /* cleanup */ }
|
|
2414
|
-
log('info', `Plan enforcement: moved ${mf} → prd/${
|
|
2851
|
+
log('info', `Plan enforcement: moved ${mf} → prd/${reserved} (PRDs must be .json in prd/)`);
|
|
2415
2852
|
}
|
|
2416
2853
|
} catch {} // Not JSON — it's a proper plan .md, leave it
|
|
2417
2854
|
}
|
|
@@ -2424,9 +2861,11 @@ function materializePlansAsWorkItems(config) {
|
|
|
2424
2861
|
try {
|
|
2425
2862
|
const parsed = safeJson(path.join(PLANS_DIR, jf));
|
|
2426
2863
|
if (parsed?.missing_features) {
|
|
2427
|
-
|
|
2864
|
+
// Atomic open ('wx' + retry) so two parallel migrations on the
|
|
2865
|
+
// same slug don't both overwrite a single PRD (P-9b7e5d3c).
|
|
2866
|
+
const reserved = reservePrdFilename(PRD_DIR, jf, JSON.stringify(parsed, null, 2));
|
|
2428
2867
|
try { fs.unlinkSync(path.join(PLANS_DIR, jf)); } catch { /* cleanup */ }
|
|
2429
|
-
log('info', `Auto-migrated PRD ${jf} from plans/ to prd
|
|
2868
|
+
log('info', `Auto-migrated PRD ${jf} from plans/ to prd/${reserved}`);
|
|
2430
2869
|
}
|
|
2431
2870
|
} catch (e) { log('warn', 'migrate PRD from plans: ' + e.message); }
|
|
2432
2871
|
}
|
|
@@ -2669,12 +3108,20 @@ function materializePlansAsWorkItems(config) {
|
|
|
2669
3108
|
const reconciled = reconcileItemsWithPrs(existingItems, allPrsForReconcile, { onlyIds: newlyCreatedIds });
|
|
2670
3109
|
if (reconciled > 0) log('info', `Plan reconciliation: marked ${reconciled} item(s) as done → ${projName}`);
|
|
2671
3110
|
|
|
2672
|
-
// PRD removal sync: cancel pending work items whose PRD item was removed from the plan
|
|
3111
|
+
// PRD removal sync: cancel pending work items whose PRD item was removed from the plan.
|
|
3112
|
+
// IMPORTANT: decomposed sub-items (children spawned by the decompose agent for
|
|
3113
|
+
// implement:large parents) carry parent_id and have IDs of the shape
|
|
3114
|
+
// <parentId>-a/-b/-c. They are NEVER written back into the PRD's missing_features
|
|
3115
|
+
// array, so an id-only predicate cancels them every time the materializer creates a
|
|
3116
|
+
// new WI for the same plan. Accept WIs whose parent_id matches any current PRD id
|
|
3117
|
+
// (one-hop only — decompose produces a single level of children today).
|
|
2673
3118
|
const currentPrdIds = new Set(plan.missing_features.map(f => f.id));
|
|
2674
3119
|
let cancelled = 0;
|
|
2675
3120
|
for (const wi of existingItems) {
|
|
2676
3121
|
if (wi.status !== WI_STATUS.PENDING || wi.sourcePlan !== file) continue;
|
|
2677
|
-
|
|
3122
|
+
const ownIdInPrd = currentPrdIds.has(wi.id);
|
|
3123
|
+
const parentInPrd = wi.parent_id && currentPrdIds.has(wi.parent_id);
|
|
3124
|
+
if (!ownIdInPrd && !parentInPrd) {
|
|
2678
3125
|
wi.status = WI_STATUS.CANCELLED;
|
|
2679
3126
|
wi.cancelledAt = ts();
|
|
2680
3127
|
wi.cancelReason = `PRD item removed from ${file}`;
|
|
@@ -2718,8 +3165,9 @@ function materializePlansAsWorkItems(config) {
|
|
|
2718
3165
|
const mainBranch = shared.resolveMainBranch(root, firstProject.mainBranch);
|
|
2719
3166
|
const branch = sanitizeBranch(plan.feature_branch);
|
|
2720
3167
|
// Create branch from main (idempotent — ignores if exists)
|
|
2721
|
-
|
|
2722
|
-
|
|
3168
|
+
// P-a7c4d2e8 (F3): argv-form (shell:false) replaces shell-piped exec.
|
|
3169
|
+
try { shared.shellSafeGitSync(['branch', branch, mainBranch], { cwd: root }); } catch { /* idempotent — branch may already exist */ }
|
|
3170
|
+
try { shared.shellSafeGitSync(['push', '-u', 'origin', branch], { cwd: root }); } catch (e) { log('warn', `git push -u origin ${branch} (pre-create): ${e.message?.split('\n')[0]}`); }
|
|
2723
3171
|
log('info', `Shared branch pre-created: ${branch} for plan ${file}`);
|
|
2724
3172
|
} catch (err) {
|
|
2725
3173
|
log('warn', `Failed to pre-create shared branch for ${file}: ${err.message}`);
|
|
@@ -3437,6 +3885,13 @@ function renderProjectWorkItemPromptForAgent(item, workType, agentId, config, pr
|
|
|
3437
3885
|
pr_url: item.pr_url || item.prUrl || '',
|
|
3438
3886
|
reviewer: item.reviewer || 'Reviewer',
|
|
3439
3887
|
review_note: item.review_note || item.reviewNote || item.description || item.title || 'See PR thread comments',
|
|
3888
|
+
// W-mp68q6ke0010de68 — opt-in keep_processes hint plumbed via item.meta.
|
|
3889
|
+
// Truthy `keep_processes` triggers the playbook hint section; missing flag
|
|
3890
|
+
// means the agent never learns the feature exists (default-off).
|
|
3891
|
+
keep_processes: !!(item.meta && item.meta.keep_processes),
|
|
3892
|
+
keep_processes_ttl_minutes: item.meta && Number.isFinite(Number(item.meta.keep_processes_ttl_minutes))
|
|
3893
|
+
? Math.floor(Number(item.meta.keep_processes_ttl_minutes))
|
|
3894
|
+
: '',
|
|
3440
3895
|
};
|
|
3441
3896
|
const cpResult = buildWorkItemDispatchVars(item, vars, config, {
|
|
3442
3897
|
worktreePath: vars.worktree_path || root,
|
|
@@ -4661,6 +5116,23 @@ let tickRunning = false;
|
|
|
4661
5116
|
let _tickStartedAt = 0;
|
|
4662
5117
|
const TICK_TIMEOUT_MS = 300000; // 5 min — force-release tick lock if stuck
|
|
4663
5118
|
|
|
5119
|
+
// P-c2e5a1d9-a — Generation counter that is incremented on every tick start
|
|
5120
|
+
// AND every time the force-release branch reclaims a hung tick. The in-flight
|
|
5121
|
+
// (hung) tickInner captures its own `myGeneration` at entry; if a force-release
|
|
5122
|
+
// later bumps `tickGeneration`, the stale tick can detect the mismatch via
|
|
5123
|
+
// `_isTickStale()` and abort instead of mutating shared state alongside the
|
|
5124
|
+
// fresh tick that took over. Sub-task -a wires the scaffolding + a single
|
|
5125
|
+
// guard call after the heartbeat write; per-phase guards land in -b.
|
|
5126
|
+
let tickGeneration = 0;
|
|
5127
|
+
|
|
5128
|
+
function _isTickStale(gen) {
|
|
5129
|
+
if (gen !== tickGeneration) {
|
|
5130
|
+
log('warn', 'Tick generation mismatch, aborting stale tick');
|
|
5131
|
+
return true;
|
|
5132
|
+
}
|
|
5133
|
+
return false;
|
|
5134
|
+
}
|
|
5135
|
+
|
|
4664
5136
|
function _pollIntervalMsFromTicks(ticks, tickIntervalMs) {
|
|
4665
5137
|
const normalizedTicks = Math.max(1, Number(ticks) || 1);
|
|
4666
5138
|
const normalizedTickInterval = Math.max(1, Number(tickIntervalMs) || ENGINE_DEFAULTS.tickInterval);
|
|
@@ -4683,6 +5155,10 @@ async function tick() {
|
|
|
4683
5155
|
log('error', `Tick hung for ${Math.round((Date.now() - _tickStartedAt) / 1000)}s — force-releasing lock`);
|
|
4684
5156
|
tickRunning = false;
|
|
4685
5157
|
_tickStartedAt = 0;
|
|
5158
|
+
// P-c2e5a1d9-a — Bump generation so the in-flight (hung) tickInner sees
|
|
5159
|
+
// a mismatch via `_isTickStale()` and bails out before mutating shared
|
|
5160
|
+
// state alongside the fresh tick that's about to take over.
|
|
5161
|
+
tickGeneration++;
|
|
4686
5162
|
}
|
|
4687
5163
|
return;
|
|
4688
5164
|
}
|
|
@@ -4699,6 +5175,11 @@ async function tick() {
|
|
|
4699
5175
|
}
|
|
4700
5176
|
|
|
4701
5177
|
async function tickInner() {
|
|
5178
|
+
// P-c2e5a1d9-a — Capture this tick's generation as the very first statement
|
|
5179
|
+
// so any guard later in this function can detect a force-release that
|
|
5180
|
+
// reclaimed the lock while we were still running.
|
|
5181
|
+
const myGeneration = ++tickGeneration;
|
|
5182
|
+
|
|
4702
5183
|
const control = getControl();
|
|
4703
5184
|
if (control.state !== 'running' && control.state !== 'stopping') {
|
|
4704
5185
|
log('info', `Engine state is "${control.state}" — exiting process`);
|
|
@@ -4708,6 +5189,11 @@ async function tickInner() {
|
|
|
4708
5189
|
// Write heartbeat so dashboard can detect stale engine
|
|
4709
5190
|
try { mutateControl(c => ({ ...c, heartbeat: Date.now() })); } catch (e) { log('warn', 'write heartbeat: ' + e.message); }
|
|
4710
5191
|
|
|
5192
|
+
// P-c2e5a1d9-a — Initial wiring guard: bail immediately if a force-release
|
|
5193
|
+
// reclaimed our lock while the heartbeat write was in flight. Per-phase
|
|
5194
|
+
// guards inside the rest of tickInner are sub-task -b's scope.
|
|
5195
|
+
if (_isTickStale(myGeneration)) return;
|
|
5196
|
+
|
|
4711
5197
|
const config = getConfig();
|
|
4712
5198
|
tickCount++;
|
|
4713
5199
|
const now = Date.now();
|
|
@@ -4737,6 +5223,23 @@ async function tickInner() {
|
|
|
4737
5223
|
// 2.5. Periodic cleanup + MCP sync (every 10 ticks = ~5 minutes)
|
|
4738
5224
|
if (tickCount % 10 === 0) {
|
|
4739
5225
|
try { await runCleanup(config); } catch (e) { log('warn', `runCleanup: ${e.message}`); }
|
|
5226
|
+
if (_isTickStale(myGeneration)) return;
|
|
5227
|
+
}
|
|
5228
|
+
|
|
5229
|
+
// 2.52. keep_processes TTL/dead-PID sweep (W-mp68q6ke0010de68). Walks
|
|
5230
|
+
// agents/*/keep-pids.json, kills+unlinks expired entries, silently unlinks
|
|
5231
|
+
// entries whose PIDs are all gone, leaves malformed files alone. Cheap (one
|
|
5232
|
+
// readdir + N small file reads), bounded by ENGINE_DEFAULTS.keepProcesses.
|
|
5233
|
+
const keepSweepEvery = Math.max(1, ENGINE_DEFAULTS.keepProcesses?.sweepEvery || 30);
|
|
5234
|
+
if (ENGINE_DEFAULTS.keepProcesses?.enabled !== false && tickCount % keepSweepEvery === 0) {
|
|
5235
|
+
safe('sweepKeepProcesses', () => {
|
|
5236
|
+
const { sweepKeepProcesses } = require('./engine/keep-process-sweep');
|
|
5237
|
+
const stats = sweepKeepProcesses();
|
|
5238
|
+
if (stats.scanned > 0 && (stats.expiredFiles || stats.deadFiles || stats.malformed)) {
|
|
5239
|
+
log('info', `keep-processes sweep: scanned=${stats.scanned} expired=${stats.expiredFiles} dead=${stats.deadFiles} malformed=${stats.malformed} killed=${stats.killedPids}`);
|
|
5240
|
+
}
|
|
5241
|
+
});
|
|
5242
|
+
if (_isTickStale(myGeneration)) return;
|
|
4740
5243
|
}
|
|
4741
5244
|
|
|
4742
5245
|
// 2.55. Check persistent watches (3 tick-equivalents, default ~3 minutes)
|
|
@@ -4831,7 +5334,9 @@ async function tickInner() {
|
|
|
4831
5334
|
log('info', '[gh] PR status poll skipped — throttled');
|
|
4832
5335
|
}
|
|
4833
5336
|
if (statusPolls.length) await Promise.allSettled(statusPolls);
|
|
5337
|
+
if (_isTickStale(myGeneration)) return;
|
|
4834
5338
|
try { await processPendingRebases(config); } catch (err) { log('warn', `Pending rebase processing error: ${err?.message || err}`); }
|
|
5339
|
+
if (_isTickStale(myGeneration)) return;
|
|
4835
5340
|
// Sync PR status back to PRD items (missing → done when active PR exists)
|
|
4836
5341
|
try { syncPrdFromPrs(config); } catch (err) { log('warn', `PRD sync error: ${err?.message || err}`); }
|
|
4837
5342
|
// Check if any plans can be marked completed (all features done/in-pr)
|
|
@@ -4875,12 +5380,14 @@ async function tickInner() {
|
|
|
4875
5380
|
log('info', '[gh] PR comment poll skipped — throttled');
|
|
4876
5381
|
}
|
|
4877
5382
|
if (commentPolls.length) await Promise.allSettled(commentPolls);
|
|
5383
|
+
if (_isTickStale(myGeneration)) return;
|
|
4878
5384
|
// Reconciliation runs regardless of poll flags — it's a recovery sweep, not a convenience poll
|
|
4879
5385
|
// Reconciliation also parallelized — ADO and GitHub reconciliation are independent
|
|
4880
5386
|
const reconcilePolls = [];
|
|
4881
5387
|
reconcilePolls.push(reconcilePrs(config).catch(err => { log('warn', `ADO PR reconciliation error: ${err?.message || err}${err?.stack ? ' | ' + err.stack.split('\n')[1]?.trim() : ''}`); }));
|
|
4882
5388
|
reconcilePolls.push(ghReconcilePrs(config).catch(err => { log('warn', `GitHub PR reconciliation error: ${err?.message || err}${err?.stack ? ' | ' + err.stack.split('\n')[1]?.trim() : ''}`); }));
|
|
4883
5389
|
await Promise.allSettled(reconcilePolls);
|
|
5390
|
+
if (_isTickStale(myGeneration)) return;
|
|
4884
5391
|
}
|
|
4885
5392
|
|
|
4886
5393
|
// 2.9. Stalled dispatch detection — auto-retry failed items blocking the graph (every 20 ticks = ~10 min)
|
|
@@ -4903,6 +5410,7 @@ async function tickInner() {
|
|
|
4903
5410
|
const dispatchKeysToClear = [];
|
|
4904
5411
|
const cooldownKeysToClear = [];
|
|
4905
5412
|
|
|
5413
|
+
if (_isTickStale(myGeneration)) return;
|
|
4906
5414
|
mutateWorkItems(wiPath, items => {
|
|
4907
5415
|
let changed = false;
|
|
4908
5416
|
const failedIds = new Set(items.filter(w => w.status === WI_STATUS.FAILED).map(w => w.id));
|
|
@@ -4959,6 +5467,7 @@ async function tickInner() {
|
|
|
4959
5467
|
// Clear dispatch entries AFTER work-items lock is released (no nested locks)
|
|
4960
5468
|
for (const key of dispatchKeysToClear) {
|
|
4961
5469
|
try {
|
|
5470
|
+
if (_isTickStale(myGeneration)) return;
|
|
4962
5471
|
mutateDispatch((dp) => {
|
|
4963
5472
|
dp.completed = dp.completed.filter(d => d.meta?.dispatchKey !== key);
|
|
4964
5473
|
return dp;
|
|
@@ -4997,6 +5506,7 @@ async function tickInner() {
|
|
|
4997
5506
|
try { pruneStalePrDispatches(config); } catch (e) { log('warn', 'prune stale PR dispatches: ' + e.message); }
|
|
4998
5507
|
let discoveryOk = true;
|
|
4999
5508
|
try { await discoverWork(config); } catch (e) { log('warn', 'discoverWork: ' + e.message); discoveryOk = false; }
|
|
5509
|
+
if (_isTickStale(myGeneration)) return;
|
|
5000
5510
|
|
|
5001
5511
|
// 4. Update snapshot
|
|
5002
5512
|
safe('updateSnapshot', () => updateSnapshot(config));
|
|
@@ -5022,6 +5532,7 @@ async function tickInner() {
|
|
|
5022
5532
|
const pa = itemPriority[a.meta?.item?.priority] ?? 1, pb = itemPriority[b.meta?.item?.priority] ?? 1;
|
|
5023
5533
|
return pa - pb;
|
|
5024
5534
|
});
|
|
5535
|
+
if (_isTickStale(myGeneration)) return;
|
|
5025
5536
|
mutateDispatch((dp) => {
|
|
5026
5537
|
dp.pending = dispatch.pending;
|
|
5027
5538
|
dp.active = dispatch.active || dp.active;
|
|
@@ -5069,6 +5580,7 @@ async function tickInner() {
|
|
|
5069
5580
|
delete item.skipReason;
|
|
5070
5581
|
refreshDeferredWorkItemPrompt(item, config);
|
|
5071
5582
|
try {
|
|
5583
|
+
if (_isTickStale(myGeneration)) return;
|
|
5072
5584
|
mutateDispatch((dp) => {
|
|
5073
5585
|
const p = (dp.pending || []).find(d => d.id === item.id);
|
|
5074
5586
|
if (p) {
|
|
@@ -5185,6 +5697,7 @@ async function tickInner() {
|
|
|
5185
5697
|
log('error', `spawnAgent exception for ${item.id}: ${spawnErr.message}`);
|
|
5186
5698
|
proc = null;
|
|
5187
5699
|
}
|
|
5700
|
+
if (_isTickStale(myGeneration)) return;
|
|
5188
5701
|
if (proc === null) {
|
|
5189
5702
|
// spawnAgent failed (e.g., worktree creation error). It already called
|
|
5190
5703
|
// completeDispatch internally which handles retry logic, but log at the
|
|
@@ -5198,6 +5711,7 @@ async function tickInner() {
|
|
|
5198
5711
|
? path.join(ENGINE_DIR, '..', 'work-items.json')
|
|
5199
5712
|
: item.meta.project?.name ? projectWorkItemsPath({ name: item.meta.project.name, localPath: item.meta.project.localPath }) : null;
|
|
5200
5713
|
if (wiPath) {
|
|
5714
|
+
if (_isTickStale(myGeneration)) return;
|
|
5201
5715
|
mutateWorkItems(wiPath, items => {
|
|
5202
5716
|
const wi = items.find(i => i.id === item.meta.item.id);
|
|
5203
5717
|
if (wi && wi.status === WI_STATUS.DISPATCHED) {
|
|
@@ -5267,6 +5781,7 @@ async function tickInner() {
|
|
|
5267
5781
|
}
|
|
5268
5782
|
}
|
|
5269
5783
|
if (skipReasonChanged) {
|
|
5784
|
+
if (_isTickStale(myGeneration)) return;
|
|
5270
5785
|
mutateDispatch((dp) => { dp.pending = postDispatch.pending; return dp; });
|
|
5271
5786
|
}
|
|
5272
5787
|
}
|
|
@@ -5300,6 +5815,7 @@ module.exports = {
|
|
|
5300
5815
|
// Discovery
|
|
5301
5816
|
discoverWork, discoverFromPrs, discoverFromWorkItems, discoverCentralWorkItems,
|
|
5302
5817
|
materializePlansAsWorkItems,
|
|
5818
|
+
reservePrdFilename, // exported for testing (P-9b7e5d3c)
|
|
5303
5819
|
sweepStaleArchivedPrdBackups, // exported for testing
|
|
5304
5820
|
|
|
5305
5821
|
// Shared helpers (used by lifecycle.js and tests)
|
|
@@ -5333,6 +5849,14 @@ module.exports = {
|
|
|
5333
5849
|
// Tick
|
|
5334
5850
|
tick,
|
|
5335
5851
|
resolveMaxConcurrent, _pollIntervalMsFromTicks, _shouldRunPeriodicPhase, // exported for testing
|
|
5852
|
+
// P-c2e5a1d9-a — exported for testing the tick-generation force-release path
|
|
5853
|
+
_isTickStale,
|
|
5854
|
+
get tickGeneration() { return tickGeneration; },
|
|
5855
|
+
set tickGeneration(v) { tickGeneration = v; },
|
|
5856
|
+
get tickRunning() { return tickRunning; },
|
|
5857
|
+
set tickRunning(v) { tickRunning = v; },
|
|
5858
|
+
get _tickStartedAt() { return _tickStartedAt; },
|
|
5859
|
+
set _tickStartedAt(v) { _tickStartedAt = v; },
|
|
5336
5860
|
};
|
|
5337
5861
|
|
|
5338
5862
|
// ─── Entrypoint ─────────────────────────────────────────────────────────────
|