@yemi33/minions 0.1.2044 → 0.1.2046
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dashboard/js/command-center.js +64 -7
- package/dashboard/js/fre.js +3 -2
- package/dashboard/js/refresh.js +143 -2
- package/dashboard/js/render-prs.js +43 -9
- package/dashboard/js/settings.js +9 -5
- package/dashboard/styles.css +21 -0
- package/dashboard.js +308 -164
- package/docs/auto-discovery.md +3 -1
- package/docs/qa-runbook-lifecycle.md +71 -0
- package/docs/qa-runbooks.md +6 -5
- package/docs/runtime-adapters.md +9 -4
- package/docs/security.md +2 -1
- package/docs/watches.md +19 -19
- package/engine/cc-worker-pool.js +87 -11
- package/engine/cleanup.js +84 -2
- package/engine/dispatch.js +6 -0
- package/engine/kb-sweep.js +127 -0
- package/engine/lifecycle.js +18 -0
- package/engine/llm.js +148 -2
- package/engine/preflight.js +5 -5
- package/engine/queries.js +133 -27
- package/engine/shared.js +40 -3
- package/engine/timeout.js +4 -0
- package/engine.js +240 -11
- package/package.json +1 -1
package/engine/queries.js
CHANGED
|
@@ -528,7 +528,7 @@ function getAgents(config) {
|
|
|
528
528
|
|
|
529
529
|
return roster.map(a => {
|
|
530
530
|
// Resolve which CLI runtime this agent dispatches to: per-agent override
|
|
531
|
-
// → engine.defaultCli → '
|
|
531
|
+
// → engine.defaultCli → 'copilot'. Surfaced so the dashboard can show a
|
|
532
532
|
// runtime tag next to the agent name.
|
|
533
533
|
const runtime = shared.resolveAgentCli(a, config.engine || {});
|
|
534
534
|
const inboxFiles = allInboxFiles.filter(f => f.includes(a.id));
|
|
@@ -1770,10 +1770,18 @@ function _projectGitStatusEqual(a, b) {
|
|
|
1770
1770
|
function _scheduleProjectGitStatusRefresh(localPath, key, configuredMainBranch) {
|
|
1771
1771
|
const existing = _projectGitStatusCache.get(key);
|
|
1772
1772
|
if (existing && existing.promise) return existing.promise;
|
|
1773
|
-
const entry = existing || { ts: 0, value: PROJECT_GIT_STATUS_PENDING, promise: null };
|
|
1773
|
+
const entry = existing || { ts: 0, value: PROJECT_GIT_STATUS_PENDING, promise: null, refMtimes: null };
|
|
1774
1774
|
const prevValue = entry.value;
|
|
1775
|
+
// Snapshot ref mtimes BEFORE the probe so the next call compares against
|
|
1776
|
+
// an exact baseline rather than a Date.now() timestamp. On Windows
|
|
1777
|
+
// Date.now() can have ~15ms granularity while NTFS mtime is sub-ms, so
|
|
1778
|
+
// a file written shortly before the probe could appear `mtimeMs > ts`
|
|
1779
|
+
// even when nothing actually changed.
|
|
1780
|
+
const probeStartTs = Date.now();
|
|
1781
|
+
const probeStartRefMtimes = _snapshotProjectGitRefMtimes(localPath, configuredMainBranch);
|
|
1775
1782
|
entry.promise = _probeProjectGitStatus(localPath, configuredMainBranch).then(value => {
|
|
1776
|
-
entry.ts =
|
|
1783
|
+
entry.ts = probeStartTs;
|
|
1784
|
+
entry.refMtimes = probeStartRefMtimes;
|
|
1777
1785
|
entry.value = value;
|
|
1778
1786
|
entry.promise = null;
|
|
1779
1787
|
if (_onProjectGitStatusChanged && !_projectGitStatusEqual(prevValue, value)) {
|
|
@@ -1824,31 +1832,89 @@ function _resolveGitDir(localPath) {
|
|
|
1824
1832
|
return null;
|
|
1825
1833
|
}
|
|
1826
1834
|
|
|
1827
|
-
//
|
|
1828
|
-
//
|
|
1829
|
-
//
|
|
1830
|
-
//
|
|
1831
|
-
//
|
|
1832
|
-
//
|
|
1833
|
-
//
|
|
1834
|
-
//
|
|
1835
|
-
//
|
|
1836
|
-
|
|
1835
|
+
// For a per-worktree gitdir, resolve the COMMON gitdir shared across all
|
|
1836
|
+
// linked worktrees of the same repo. Git writes per-worktree state
|
|
1837
|
+
// (HEAD, logs/HEAD, index) into the worktree-specific gitdir, but state
|
|
1838
|
+
// that's logically per-repo (objects, refs/remotes/, FETCH_HEAD) into
|
|
1839
|
+
// the common gitdir. The common location is recorded in
|
|
1840
|
+
// `<per-worktree-gitdir>/commondir`, a file containing either an
|
|
1841
|
+
// absolute path or a path relative to the per-worktree gitdir
|
|
1842
|
+
// (typically `../..` — resolves from `<main>/.git/worktrees/<name>` →
|
|
1843
|
+
// `<main>/.git`). For the main worktree there is no commondir file and
|
|
1844
|
+
// `gitDir` already IS the common gitdir, so this function returns
|
|
1845
|
+
// `gitDir` unchanged. Callers should track FETCH_HEAD and
|
|
1846
|
+
// refs/remotes/origin/<comparator> against the COMMON gitdir; logs/HEAD
|
|
1847
|
+
// must stay on the per-worktree gitdir so per-worktree HEAD moves still
|
|
1848
|
+
// register.
|
|
1849
|
+
function _resolveCommonGitDir(gitDir) {
|
|
1850
|
+
if (!gitDir) return null;
|
|
1851
|
+
const commondirPath = path.join(gitDir, 'commondir');
|
|
1852
|
+
let raw = '';
|
|
1853
|
+
try { raw = fs.readFileSync(commondirPath, { encoding: 'utf8', flag: 'r' }).trim(); }
|
|
1854
|
+
catch { return gitDir; }
|
|
1855
|
+
if (!raw) return gitDir;
|
|
1856
|
+
return path.isAbsolute(raw) ? raw : path.resolve(gitDir, raw);
|
|
1857
|
+
}
|
|
1858
|
+
|
|
1859
|
+
// Enumerate the per-project git ref files we watch for cache-busting:
|
|
1860
|
+
// logs/HEAD (per-worktree gitdir), FETCH_HEAD + refs/remotes/origin/* (common
|
|
1861
|
+
// gitdir for linked worktrees). Same paths as the fast-state mtime tracker
|
|
1862
|
+
// so callers see a coherent view across surfaces.
|
|
1863
|
+
function _projectGitRefFiles(localPath, configuredMainBranch) {
|
|
1837
1864
|
const gitDir = _resolveGitDir(localPath);
|
|
1838
|
-
if (!gitDir) return
|
|
1839
|
-
const
|
|
1865
|
+
if (!gitDir) return null;
|
|
1866
|
+
const commonGitDir = _resolveCommonGitDir(gitDir);
|
|
1867
|
+
const files = [
|
|
1840
1868
|
path.join(gitDir, 'logs', 'HEAD'),
|
|
1841
|
-
path.join(
|
|
1869
|
+
path.join(commonGitDir, 'FETCH_HEAD'),
|
|
1842
1870
|
];
|
|
1843
1871
|
const comparator = configuredMainBranch && String(configuredMainBranch).trim();
|
|
1844
1872
|
if (comparator) {
|
|
1845
|
-
|
|
1873
|
+
files.push(path.join(commonGitDir, 'refs', 'remotes', 'origin', comparator));
|
|
1846
1874
|
}
|
|
1847
|
-
|
|
1848
|
-
|
|
1849
|
-
|
|
1850
|
-
|
|
1851
|
-
|
|
1875
|
+
return files;
|
|
1876
|
+
}
|
|
1877
|
+
|
|
1878
|
+
// Snapshot mtimeMs for each ref file. Missing files record `null`. Used as
|
|
1879
|
+
// the baseline that the next `getProjectGitStatus` call compares against —
|
|
1880
|
+
// inequality, not threshold-vs-timestamp, so the result is precision-
|
|
1881
|
+
// independent (Windows `Date.now()` can be 15ms coarse while NTFS mtime is
|
|
1882
|
+
// sub-millisecond, which used to make threshold checks fire spuriously on
|
|
1883
|
+
// freshly-written files).
|
|
1884
|
+
function _snapshotProjectGitRefMtimes(localPath, configuredMainBranch) {
|
|
1885
|
+
const files = _projectGitRefFiles(localPath, configuredMainBranch);
|
|
1886
|
+
if (!files) return null;
|
|
1887
|
+
const out = Object.create(null);
|
|
1888
|
+
for (const f of files) {
|
|
1889
|
+
try { out[f] = fs.statSync(f).mtimeMs; }
|
|
1890
|
+
catch { out[f] = null; /* ENOENT recorded as null — flipping to present must bust */ }
|
|
1891
|
+
}
|
|
1892
|
+
return out;
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
// Return true when ANY tracked ref file's mtime (or existence) differs from
|
|
1896
|
+
// the snapshot captured during the last probe. Replaces the older threshold-
|
|
1897
|
+
// vs-cachedTs check that suffered from `Date.now()`/`mtimeMs` resolution
|
|
1898
|
+
// races on Windows. Lets `getProjectGitStatus` bypass its 15s TTL after
|
|
1899
|
+
// `git pull`, `git fetch`, `git checkout`, etc. so the next /api/status
|
|
1900
|
+
// reflects the new HEAD / ahead-behind within one SPA poll instead of
|
|
1901
|
+
// waiting out the TTL (W-mphdmr8c00030124). Cost: 2-3 statSync per call —
|
|
1902
|
+
// well under the 'cheap' budget.
|
|
1903
|
+
function _projectGitRefsAdvancedSince(localPath, configuredMainBranch, snapshot) {
|
|
1904
|
+
// No snapshot yet (legacy entry shape OR first call) — preserve the
|
|
1905
|
+
// current cached value so the TTL-only fast-path still works. A real
|
|
1906
|
+
// change still surfaces on the next /api/status because the fast-state
|
|
1907
|
+
// mtime tracker watches the same files and will bust the upstream cache.
|
|
1908
|
+
if (!snapshot) return false;
|
|
1909
|
+
const current = _snapshotProjectGitRefMtimes(localPath, configuredMainBranch);
|
|
1910
|
+
if (!current) return false;
|
|
1911
|
+
for (const file of Object.keys(snapshot)) {
|
|
1912
|
+
if (current[file] !== snapshot[file]) return true;
|
|
1913
|
+
}
|
|
1914
|
+
// Also catch a file that appeared since the snapshot (e.g. first `git
|
|
1915
|
+
// fetch` materialises FETCH_HEAD).
|
|
1916
|
+
for (const file of Object.keys(current)) {
|
|
1917
|
+
if (!(file in snapshot)) return true;
|
|
1852
1918
|
}
|
|
1853
1919
|
return false;
|
|
1854
1920
|
}
|
|
@@ -1865,7 +1931,7 @@ function getProjectGitStatus(localPath, configuredMainBranch = null) {
|
|
|
1865
1931
|
// user-visible lag) because the rebuilt fast-state still hits this
|
|
1866
1932
|
// cache and never schedules a refresh until the TTL itself expires.
|
|
1867
1933
|
if (cached && cached.ts && (now - cached.ts) < PROJECT_GIT_STATUS_TTL
|
|
1868
|
-
&& !_projectGitRefsAdvancedSince(localPath, cached.
|
|
1934
|
+
&& !_projectGitRefsAdvancedSince(localPath, configuredMainBranch, cached.refMtimes)) {
|
|
1869
1935
|
return cached.value;
|
|
1870
1936
|
}
|
|
1871
1937
|
// Cheap synchronous existsSync — short-circuits a path that just disappeared
|
|
@@ -2052,8 +2118,16 @@ function getStatusFastStateMtimePaths(config) {
|
|
|
2052
2118
|
files.push(shared.projectPrPath(p));
|
|
2053
2119
|
if (p && p.localPath) {
|
|
2054
2120
|
const gitDir = _resolveGitDir(p.localPath) || path.join(p.localPath, '.git');
|
|
2121
|
+
// logs/HEAD is per-worktree (HEAD moves, commits, checkouts).
|
|
2122
|
+
// FETCH_HEAD lives in the COMMON gitdir — `git fetch` from a linked
|
|
2123
|
+
// worktree writes to `<main>/.git/FETCH_HEAD`, not to the
|
|
2124
|
+
// per-worktree subdir. Tracking only the per-worktree path here
|
|
2125
|
+
// would leave linked-worktree projects stuck after `git fetch`
|
|
2126
|
+
// (the file at `<main>/.git/worktrees/<name>/FETCH_HEAD` never
|
|
2127
|
+
// exists — verified empirically).
|
|
2128
|
+
const commonGitDir = _resolveCommonGitDir(gitDir);
|
|
2055
2129
|
files.push(path.join(gitDir, 'logs', 'HEAD'));
|
|
2056
|
-
files.push(path.join(
|
|
2130
|
+
files.push(path.join(commonGitDir, 'FETCH_HEAD'));
|
|
2057
2131
|
}
|
|
2058
2132
|
}
|
|
2059
2133
|
return files;
|
|
@@ -2100,9 +2174,25 @@ function getStatusFastStateMtimePaths(config) {
|
|
|
2100
2174
|
* Files intentionally NOT tracked here:
|
|
2101
2175
|
* - version, autoMode, installId — change only on human/CLI edits, which
|
|
2102
2176
|
* already pop the slow-state via reloadConfig + the 60 s TTL.
|
|
2103
|
-
*
|
|
2104
|
-
*
|
|
2105
|
-
*
|
|
2177
|
+
*
|
|
2178
|
+
* Per-project `.git/logs/HEAD` + `.git/FETCH_HEAD` ARE tracked here in
|
|
2179
|
+
* addition to the fast-state tracker. The project payload (`projects:`
|
|
2180
|
+
* with ahead/behind counts) lives in the slow-state slice, and
|
|
2181
|
+
* `getProjectGitStatus` is called only from `_buildStatusSlowState`.
|
|
2182
|
+
* Without tracking the git refs here, a fresh `git pull` busts
|
|
2183
|
+
* fast-state but leaves slow-state cached for up to 60 s —
|
|
2184
|
+
* `getProjectGitStatus` is never called, so the background probe never
|
|
2185
|
+
* runs, so the `_setOnProjectGitStatusChanged` callback never fires,
|
|
2186
|
+
* so the cache stays stale. Tracking the refs in BOTH tiers means any
|
|
2187
|
+
* HEAD-moving / fetching git operation busts slow-state on the next
|
|
2188
|
+
* poll, the rebuild calls `getProjectGitStatus` which then schedules
|
|
2189
|
+
* the probe, and the probe's invalidation callback finishes the
|
|
2190
|
+
* round-trip within ~1 poll cycle. `logs/HEAD` is resolved via
|
|
2191
|
+
* `_resolveGitDir` (per-worktree); `FETCH_HEAD` is resolved via
|
|
2192
|
+
* `_resolveCommonGitDir` (shared across all linked worktrees of the
|
|
2193
|
+
* same repo — `git fetch` from a linked worktree writes to the main
|
|
2194
|
+
* gitdir, not the per-worktree subdir, so tracking the per-worktree
|
|
2195
|
+
* path would silently miss every fetch).
|
|
2106
2196
|
*
|
|
2107
2197
|
* NOTE: Detecting a change here busts the dashboard's slow-state cache, but
|
|
2108
2198
|
* the inner per-source caches (`queries._skillsCache` 30 s, dashboard's
|
|
@@ -2170,6 +2260,22 @@ function getStatusSlowStateMtimePaths(config) {
|
|
|
2170
2260
|
}
|
|
2171
2261
|
}
|
|
2172
2262
|
|
|
2263
|
+
// Per-project git refs — see the "Per-project .git/logs/HEAD" note in
|
|
2264
|
+
// the header. Same pair the fast-state tracker watches; `logs/HEAD` is
|
|
2265
|
+
// per-worktree (resolved via `_resolveGitDir`) while `FETCH_HEAD` lives
|
|
2266
|
+
// in the COMMON gitdir (resolved via `_resolveCommonGitDir`) — git
|
|
2267
|
+
// fetches from a linked worktree write to `<main>/.git/FETCH_HEAD`,
|
|
2268
|
+
// not into the per-worktree subdir. For the main worktree both
|
|
2269
|
+
// resolvers return the same gitdir, so the behavior collapses to the
|
|
2270
|
+
// expected `<localPath>/.git/{logs/HEAD,FETCH_HEAD}` pair.
|
|
2271
|
+
for (const project of projects) {
|
|
2272
|
+
if (!project || !project.localPath) continue;
|
|
2273
|
+
const gitDir = _resolveGitDir(project.localPath) || path.join(project.localPath, '.git');
|
|
2274
|
+
const commonGitDir = _resolveCommonGitDir(gitDir);
|
|
2275
|
+
files.push(path.join(gitDir, 'logs', 'HEAD'));
|
|
2276
|
+
files.push(path.join(commonGitDir, 'FETCH_HEAD'));
|
|
2277
|
+
}
|
|
2278
|
+
|
|
2173
2279
|
return files;
|
|
2174
2280
|
}
|
|
2175
2281
|
|
package/engine/shared.js
CHANGED
|
@@ -1779,6 +1779,7 @@ const ENGINE_DEFAULTS = {
|
|
|
1779
1779
|
autoReReviewPrs: true, // auto-dispatch review agents after a PR fix is pushed
|
|
1780
1780
|
autoFixReviewFeedback: true, // auto-dispatch fix agents for minions review changes-requested verdicts
|
|
1781
1781
|
autoFixHumanComments: true, // auto-dispatch fix agents for actionable human PR comments
|
|
1782
|
+
autoConsolidateMemory: false, // opt-in: periodically spawn engine/kb-sweep-runner.js from the tick loop (4h cadence). Inbox→notes consolidation already runs every tick via consolidateInbox; this flag only controls the KB sweep.
|
|
1782
1783
|
prNoOpFixPauseAttempts: 2, // pause one PR automation cause after repeated no-op fixes for unchanged evidence
|
|
1783
1784
|
completionReportRetentionDays: 90, // retain completion report sidecars beyond capped dispatch history
|
|
1784
1785
|
completionReportMaxFiles: 5000, // hard cap for completion report sidecars during cleanup
|
|
@@ -1787,6 +1788,7 @@ const ENGINE_DEFAULTS = {
|
|
|
1787
1788
|
evalLoop: true, // enable review→fix loop after implementation completes
|
|
1788
1789
|
evalMaxCost: null, // USD ceiling per work item across all eval iterations; null = no limit (gather baseline data first)
|
|
1789
1790
|
maxRetries: 3, // max dispatch retries before marking work item as failed
|
|
1791
|
+
maxRetriesPerAgent: 2, // W-mpmwxn1j — per-agent retry cap. When the SAME agent fails the same WI this many times, the next retry MUST reassign to a different eligible agent (consults routing.md + agent availability). Falls back to the same agent only when no alternate is available. Counted separately from `maxRetries` (which caps total retries across all agents) and tracked on the WI as `_retriesByAgent: { agentId: count }`. Hard-pinned agents bypass reassignment (operator intent wins).
|
|
1790
1792
|
maxPhantomRetries: 3, // max retries for "phantom completion" (runtime crashed before emitting type:"result"); tracked separately from _retryCount so phantom retries don't pollute the normal PR-attachment retry budget. See engine/lifecycle.markMissingPrAttachment + detectNonTerminalResultSummary.
|
|
1791
1793
|
minRetryGapMs: 120000, // 2min — minimum gap between retry dispatches for the same work item; prevents tight retry loops when an idempotent agent (e.g. review bailing out on a duplicate) cannot produce the expected output (#1770)
|
|
1792
1794
|
pipelineApiRetries: 2, // max attempts for pipeline API calls
|
|
@@ -1854,7 +1856,7 @@ const ENGINE_DEFAULTS = {
|
|
|
1854
1856
|
// Engine code MUST go through the resolveAgent*/resolveCc* helpers below;
|
|
1855
1857
|
// never read these fields directly. New runtimes are added by registering
|
|
1856
1858
|
// an adapter in engine/runtimes/index.js — these defaults stay stable.
|
|
1857
|
-
defaultCli: '
|
|
1859
|
+
defaultCli: 'copilot', // fleet-wide CLI runtime (must be a key in engine/runtimes/index.js); flipped from 'claude' in W-mpmwxkk40007c995 — Copilot is now the primary runtime, Claude remains supported as an opt-in
|
|
1858
1860
|
defaultModel: undefined, // fleet-wide model; undefined = let the runtime adapter pick its own default
|
|
1859
1861
|
ccCli: undefined, // CC/doc-chat CLI override; undefined = inherit defaultCli (independent of agent path)
|
|
1860
1862
|
ccModel: undefined, // CC/doc-chat model override; undefined = inherit defaultModel
|
|
@@ -1877,6 +1879,7 @@ const ENGINE_DEFAULTS = {
|
|
|
1877
1879
|
removeWorktreeFailureTtlMs: 24 * 60 * 60 * 1000, // stale failed paths are forgotten after a day
|
|
1878
1880
|
removeWorktreeFailureMaxEntries: 1000, // bound failed-worktree retry suppression cache
|
|
1879
1881
|
ccMaxTurns: 50, // max tool-use turns per CC/doc-chat call before CLI stops (per response, not per session)
|
|
1882
|
+
ccTurnTimeoutMs: 300000, // W-mpmwxni2000c25c7-b: wall-clock cap per CC/doc-chat turn; on expiry the in-flight LLM call is aborted and the handler surfaces `{code:'cc-turn-timeout', retriable:true}` instead of hanging the UI
|
|
1880
1883
|
docSessionMaxEntries: 200, // cap doc-chat session map/disk store by least-recent activity (LRU; sessions are non-expiring otherwise)
|
|
1881
1884
|
ccLiveStreamMaxAgeMs: 30 * 60 * 1000, // hard cap reconnect buffers if abort/cleanup stalls
|
|
1882
1885
|
metricsFlushIntervalMs: 10000, // batch trackEngineUsage writes to metrics.json — flushed every 10s instead of per-call to cut lock contention and dashboard mtime churn
|
|
@@ -2082,7 +2085,7 @@ function _isMeaningful(v) {
|
|
|
2082
2085
|
* Resolve the CLI runtime for a per-agent spawn. Priority:
|
|
2083
2086
|
* 1. `agent.cli` — per-agent override
|
|
2084
2087
|
* 2. `engine.defaultCli` — fleet default
|
|
2085
|
-
* 3. `ENGINE_DEFAULTS.defaultCli` ('
|
|
2088
|
+
* 3. `ENGINE_DEFAULTS.defaultCli` ('copilot') — hardcoded fallback
|
|
2086
2089
|
*
|
|
2087
2090
|
* Does NOT fall through to `engine.ccCli`. CC and agents are independent paths.
|
|
2088
2091
|
*/
|
|
@@ -2096,7 +2099,7 @@ function resolveAgentCli(agent, engine) {
|
|
|
2096
2099
|
* Resolve the CLI runtime for the Command Center / doc-chat. Priority:
|
|
2097
2100
|
* 1. `engine.ccCli` — CC-only override
|
|
2098
2101
|
* 2. `engine.defaultCli` — fleet default
|
|
2099
|
-
* 3. `ENGINE_DEFAULTS.defaultCli` ('
|
|
2102
|
+
* 3. `ENGINE_DEFAULTS.defaultCli` ('copilot') — hardcoded fallback
|
|
2100
2103
|
*
|
|
2101
2104
|
* Does NOT inspect any agent overrides. CC has no notion of "which agent" —
|
|
2102
2105
|
* it's a fleet-wide singleton.
|
|
@@ -4752,6 +4755,39 @@ function safeSlugComponent(text, maxLen = 80) {
|
|
|
4752
4755
|
return `${base}-${hash}`.slice(0, maxLen);
|
|
4753
4756
|
}
|
|
4754
4757
|
|
|
4758
|
+
// W-mpmwxn1j — Per-agent retry tracking. After the same agent has failed a WI
|
|
4759
|
+
// `ENGINE_DEFAULTS.maxRetriesPerAgent` times, the next dispatch must reassign
|
|
4760
|
+
// to a different eligible agent. The counter is stored on the WI itself
|
|
4761
|
+
// (`_retriesByAgent: { agentId: count }`) so engine restart preserves the
|
|
4762
|
+
// state. The counter is cleared on successful completion alongside
|
|
4763
|
+
// `_retryCount`. The caller passes a mutable WI object (inside a
|
|
4764
|
+
// `mutateWorkItems` / `mutateJsonFileLocked` callback) and the failed agent ID.
|
|
4765
|
+
// Anonymous failures (no resolvable agent) skip the bump — we'd corrupt the
|
|
4766
|
+
// shape with an `undefined` key. Returns the new per-agent count for logging.
|
|
4767
|
+
function bumpAgentRetryCount(wi, agentId) {
|
|
4768
|
+
if (!wi || !agentId) return 0;
|
|
4769
|
+
if (!wi._retriesByAgent || typeof wi._retriesByAgent !== 'object' || Array.isArray(wi._retriesByAgent)) {
|
|
4770
|
+
wi._retriesByAgent = {};
|
|
4771
|
+
}
|
|
4772
|
+
const next = (Number(wi._retriesByAgent[agentId]) || 0) + 1;
|
|
4773
|
+
wi._retriesByAgent[agentId] = next;
|
|
4774
|
+
return next;
|
|
4775
|
+
}
|
|
4776
|
+
|
|
4777
|
+
function getAgentRetryCount(wi, agentId) {
|
|
4778
|
+
if (!wi || !agentId) return 0;
|
|
4779
|
+
const map = wi._retriesByAgent;
|
|
4780
|
+
if (!map || typeof map !== 'object' || Array.isArray(map)) return 0;
|
|
4781
|
+
return Number(map[agentId]) || 0;
|
|
4782
|
+
}
|
|
4783
|
+
|
|
4784
|
+
function resolveMaxRetriesPerAgent(config) {
|
|
4785
|
+
const raw = config?.engine?.maxRetriesPerAgent;
|
|
4786
|
+
const val = Number(raw);
|
|
4787
|
+
if (Number.isFinite(val) && val > 0) return val;
|
|
4788
|
+
return ENGINE_DEFAULTS.maxRetriesPerAgent;
|
|
4789
|
+
}
|
|
4790
|
+
|
|
4755
4791
|
const PR_AUTOMATION_CAUSE_LIMIT = 50;
|
|
4756
4792
|
|
|
4757
4793
|
function getPrAutomationCauses(pr) {
|
|
@@ -4912,6 +4948,7 @@ module.exports = {
|
|
|
4912
4948
|
tailTextBytes,
|
|
4913
4949
|
appendTextTail,
|
|
4914
4950
|
writeToInbox, parseNoteId,
|
|
4951
|
+
bumpAgentRetryCount, getAgentRetryCount, resolveMaxRetriesPerAgent,
|
|
4915
4952
|
exec,
|
|
4916
4953
|
execAsync,
|
|
4917
4954
|
execSilent,
|
package/engine/timeout.js
CHANGED
|
@@ -587,6 +587,10 @@ function checkTimeouts(config) {
|
|
|
587
587
|
log('info', `Reconcile: work item ${item.id} agent died — auto-retry ${retries + 1}/${maxRetries}`);
|
|
588
588
|
item.status = WI_STATUS.PENDING;
|
|
589
589
|
item._retryCount = retries + 1;
|
|
590
|
+
// W-mpmwxn1j — bump per-agent retry count BEFORE deleting
|
|
591
|
+
// dispatched_to so the next dispatch can reassign away from the
|
|
592
|
+
// agent that died. Anonymous (no dispatched_to) failures don't bump.
|
|
593
|
+
if (item.dispatched_to) shared.bumpAgentRetryCount(item, item.dispatched_to);
|
|
590
594
|
delete item.dispatched_at;
|
|
591
595
|
delete item.dispatched_to;
|
|
592
596
|
delete item._pendingReason;
|