@yemi33/minions 0.1.2044 → 0.1.2046

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/engine/queries.js CHANGED
@@ -528,7 +528,7 @@ function getAgents(config) {
528
528
 
529
529
  return roster.map(a => {
530
530
  // Resolve which CLI runtime this agent dispatches to: per-agent override
531
- // → engine.defaultCli → 'claude'. Surfaced so the dashboard can show a
531
+ // → engine.defaultCli → 'copilot'. Surfaced so the dashboard can show a
532
532
  // runtime tag next to the agent name.
533
533
  const runtime = shared.resolveAgentCli(a, config.engine || {});
534
534
  const inboxFiles = allInboxFiles.filter(f => f.includes(a.id));
@@ -1770,10 +1770,18 @@ function _projectGitStatusEqual(a, b) {
1770
1770
  function _scheduleProjectGitStatusRefresh(localPath, key, configuredMainBranch) {
1771
1771
  const existing = _projectGitStatusCache.get(key);
1772
1772
  if (existing && existing.promise) return existing.promise;
1773
- const entry = existing || { ts: 0, value: PROJECT_GIT_STATUS_PENDING, promise: null };
1773
+ const entry = existing || { ts: 0, value: PROJECT_GIT_STATUS_PENDING, promise: null, refMtimes: null };
1774
1774
  const prevValue = entry.value;
1775
+ // Snapshot ref mtimes BEFORE the probe so the next call compares against
1776
+ // an exact baseline rather than a Date.now() timestamp. On Windows
1777
+ // Date.now() can have ~15ms granularity while NTFS mtime is sub-ms, so
1778
+ // a file written shortly before the probe could appear `mtimeMs > ts`
1779
+ // even when nothing actually changed.
1780
+ const probeStartTs = Date.now();
1781
+ const probeStartRefMtimes = _snapshotProjectGitRefMtimes(localPath, configuredMainBranch);
1775
1782
  entry.promise = _probeProjectGitStatus(localPath, configuredMainBranch).then(value => {
1776
- entry.ts = Date.now();
1783
+ entry.ts = probeStartTs;
1784
+ entry.refMtimes = probeStartRefMtimes;
1777
1785
  entry.value = value;
1778
1786
  entry.promise = null;
1779
1787
  if (_onProjectGitStatusChanged && !_projectGitStatusEqual(prevValue, value)) {
@@ -1824,31 +1832,89 @@ function _resolveGitDir(localPath) {
1824
1832
  return null;
1825
1833
  }
1826
1834
 
1827
- // Return true when any of the per-project git ref files (logs/HEAD,
1828
- // FETCH_HEAD, refs/remotes/origin/<comparator>) have mtimeMs > cachedTs.
1829
- // Lets `getProjectGitStatus` bypass its 15s TTL after `git pull`, `git
1830
- // fetch`, `git checkout`, etc. so the next /api/status reflects the new
1831
- // HEAD / ahead-behind within one SPA poll instead of waiting out the TTL
1832
- // (W-mphdmr8c00030124). Tolerates ENOENT on FETCH_HEAD / refs (never-
1833
- // fetched repos simply haven't moved those files yet). Cost ≤3 statSync
1834
- // per project per /api/status build well under the 'cheap' budget
1835
- // called out in getStatusFastStateMtimePaths's docstring.
1836
- function _projectGitRefsAdvancedSince(localPath, cachedTs, configuredMainBranch) {
1835
+ // For a per-worktree gitdir, resolve the COMMON gitdir shared across all
1836
+ // linked worktrees of the same repo. Git writes per-worktree state
1837
+ // (HEAD, logs/HEAD, index) into the worktree-specific gitdir, but state
1838
+ // that's logically per-repo (objects, refs/remotes/, FETCH_HEAD) into
1839
+ // the common gitdir. The common location is recorded in
1840
+ // `<per-worktree-gitdir>/commondir`, a file containing either an
1841
+ // absolute path or a path relative to the per-worktree gitdir
1842
+ // (typically `../..`resolves from `<main>/.git/worktrees/<name>`
1843
+ // `<main>/.git`). For the main worktree there is no commondir file and
1844
+ // `gitDir` already IS the common gitdir, so this function returns
1845
+ // `gitDir` unchanged. Callers should track FETCH_HEAD and
1846
+ // refs/remotes/origin/<comparator> against the COMMON gitdir; logs/HEAD
1847
+ // must stay on the per-worktree gitdir so per-worktree HEAD moves still
1848
+ // register.
1849
+ function _resolveCommonGitDir(gitDir) {
1850
+ if (!gitDir) return null;
1851
+ const commondirPath = path.join(gitDir, 'commondir');
1852
+ let raw = '';
1853
+ try { raw = fs.readFileSync(commondirPath, { encoding: 'utf8', flag: 'r' }).trim(); }
1854
+ catch { return gitDir; }
1855
+ if (!raw) return gitDir;
1856
+ return path.isAbsolute(raw) ? raw : path.resolve(gitDir, raw);
1857
+ }
1858
+
1859
+ // Enumerate the per-project git ref files we watch for cache-busting:
1860
+ // logs/HEAD (per-worktree gitdir), FETCH_HEAD + refs/remotes/origin/* (common
1861
+ // gitdir for linked worktrees). Same paths as the fast-state mtime tracker
1862
+ // so callers see a coherent view across surfaces.
1863
+ function _projectGitRefFiles(localPath, configuredMainBranch) {
1837
1864
  const gitDir = _resolveGitDir(localPath);
1838
- if (!gitDir) return false;
1839
- const candidates = [
1865
+ if (!gitDir) return null;
1866
+ const commonGitDir = _resolveCommonGitDir(gitDir);
1867
+ const files = [
1840
1868
  path.join(gitDir, 'logs', 'HEAD'),
1841
- path.join(gitDir, 'FETCH_HEAD'),
1869
+ path.join(commonGitDir, 'FETCH_HEAD'),
1842
1870
  ];
1843
1871
  const comparator = configuredMainBranch && String(configuredMainBranch).trim();
1844
1872
  if (comparator) {
1845
- candidates.push(path.join(gitDir, 'refs', 'remotes', 'origin', comparator));
1873
+ files.push(path.join(commonGitDir, 'refs', 'remotes', 'origin', comparator));
1846
1874
  }
1847
- for (const file of candidates) {
1848
- try {
1849
- const st = fs.statSync(file);
1850
- if (st.mtimeMs > cachedTs) return true;
1851
- } catch { /* ENOENT / EPERM file just hasn't moved */ }
1875
+ return files;
1876
+ }
1877
+
1878
+ // Snapshot mtimeMs for each ref file. Missing files record `null`. Used as
1879
+ // the baseline that the next `getProjectGitStatus` call compares against
1880
+ // inequality, not threshold-vs-timestamp, so the result is precision-
1881
+ // independent (Windows `Date.now()` can be 15ms coarse while NTFS mtime is
1882
+ // sub-millisecond, which used to make threshold checks fire spuriously on
1883
+ // freshly-written files).
1884
+ function _snapshotProjectGitRefMtimes(localPath, configuredMainBranch) {
1885
+ const files = _projectGitRefFiles(localPath, configuredMainBranch);
1886
+ if (!files) return null;
1887
+ const out = Object.create(null);
1888
+ for (const f of files) {
1889
+ try { out[f] = fs.statSync(f).mtimeMs; }
1890
+ catch { out[f] = null; /* ENOENT recorded as null — flipping to present must bust */ }
1891
+ }
1892
+ return out;
1893
+ }
1894
+
1895
+ // Return true when ANY tracked ref file's mtime (or existence) differs from
1896
+ // the snapshot captured during the last probe. Replaces the older threshold-
1897
+ // vs-cachedTs check that suffered from `Date.now()`/`mtimeMs` resolution
1898
+ // races on Windows. Lets `getProjectGitStatus` bypass its 15s TTL after
1899
+ // `git pull`, `git fetch`, `git checkout`, etc. so the next /api/status
1900
+ // reflects the new HEAD / ahead-behind within one SPA poll instead of
1901
+ // waiting out the TTL (W-mphdmr8c00030124). Cost: 2-3 statSync per call —
1902
+ // well under the 'cheap' budget.
1903
+ function _projectGitRefsAdvancedSince(localPath, configuredMainBranch, snapshot) {
1904
+ // No snapshot yet (legacy entry shape OR first call) — preserve the
1905
+ // current cached value so the TTL-only fast-path still works. A real
1906
+ // change still surfaces on the next /api/status because the fast-state
1907
+ // mtime tracker watches the same files and will bust the upstream cache.
1908
+ if (!snapshot) return false;
1909
+ const current = _snapshotProjectGitRefMtimes(localPath, configuredMainBranch);
1910
+ if (!current) return false;
1911
+ for (const file of Object.keys(snapshot)) {
1912
+ if (current[file] !== snapshot[file]) return true;
1913
+ }
1914
+ // Also catch a file that appeared since the snapshot (e.g. first `git
1915
+ // fetch` materialises FETCH_HEAD).
1916
+ for (const file of Object.keys(current)) {
1917
+ if (!(file in snapshot)) return true;
1852
1918
  }
1853
1919
  return false;
1854
1920
  }
@@ -1865,7 +1931,7 @@ function getProjectGitStatus(localPath, configuredMainBranch = null) {
1865
1931
  // user-visible lag) because the rebuilt fast-state still hits this
1866
1932
  // cache and never schedules a refresh until the TTL itself expires.
1867
1933
  if (cached && cached.ts && (now - cached.ts) < PROJECT_GIT_STATUS_TTL
1868
- && !_projectGitRefsAdvancedSince(localPath, cached.ts, configuredMainBranch)) {
1934
+ && !_projectGitRefsAdvancedSince(localPath, configuredMainBranch, cached.refMtimes)) {
1869
1935
  return cached.value;
1870
1936
  }
1871
1937
  // Cheap synchronous existsSync — short-circuits a path that just disappeared
@@ -2052,8 +2118,16 @@ function getStatusFastStateMtimePaths(config) {
2052
2118
  files.push(shared.projectPrPath(p));
2053
2119
  if (p && p.localPath) {
2054
2120
  const gitDir = _resolveGitDir(p.localPath) || path.join(p.localPath, '.git');
2121
+ // logs/HEAD is per-worktree (HEAD moves, commits, checkouts).
2122
+ // FETCH_HEAD lives in the COMMON gitdir — `git fetch` from a linked
2123
+ // worktree writes to `<main>/.git/FETCH_HEAD`, not to the
2124
+ // per-worktree subdir. Tracking only the per-worktree path here
2125
+ // would leave linked-worktree projects stuck after `git fetch`
2126
+ // (the file at `<main>/.git/worktrees/<name>/FETCH_HEAD` never
2127
+ // exists — verified empirically).
2128
+ const commonGitDir = _resolveCommonGitDir(gitDir);
2055
2129
  files.push(path.join(gitDir, 'logs', 'HEAD'));
2056
- files.push(path.join(gitDir, 'FETCH_HEAD'));
2130
+ files.push(path.join(commonGitDir, 'FETCH_HEAD'));
2057
2131
  }
2058
2132
  }
2059
2133
  return files;
@@ -2100,9 +2174,25 @@ function getStatusFastStateMtimePaths(config) {
2100
2174
  * Files intentionally NOT tracked here:
2101
2175
  * - version, autoMode, installId — change only on human/CLI edits, which
2102
2176
  * already pop the slow-state via reloadConfig + the 60 s TTL.
2103
- * - project git state — already invalidated via the
2104
- * `_setOnProjectGitStatusChanged` callback into `invalidateStatusCache`
2105
- * (W-mpgrk5cy fix); also tracked in fast-state via `.git/logs/HEAD`.
2177
+ *
2178
+ * Per-project `.git/logs/HEAD` + `.git/FETCH_HEAD` ARE tracked here in
2179
+ * addition to the fast-state tracker. The project payload (`projects:`
2180
+ * with ahead/behind counts) lives in the slow-state slice, and
2181
+ * `getProjectGitStatus` is called only from `_buildStatusSlowState`.
2182
+ * Without tracking the git refs here, a fresh `git pull` busts
2183
+ * fast-state but leaves slow-state cached for up to 60 s —
2184
+ * `getProjectGitStatus` is never called, so the background probe never
2185
+ * runs, so the `_setOnProjectGitStatusChanged` callback never fires,
2186
+ * so the cache stays stale. Tracking the refs in BOTH tiers means any
2187
+ * HEAD-moving / fetching git operation busts slow-state on the next
2188
+ * poll, the rebuild calls `getProjectGitStatus` which then schedules
2189
+ * the probe, and the probe's invalidation callback finishes the
2190
+ * round-trip within ~1 poll cycle. `logs/HEAD` is resolved via
2191
+ * `_resolveGitDir` (per-worktree); `FETCH_HEAD` is resolved via
2192
+ * `_resolveCommonGitDir` (shared across all linked worktrees of the
2193
+ * same repo — `git fetch` from a linked worktree writes to the main
2194
+ * gitdir, not the per-worktree subdir, so tracking the per-worktree
2195
+ * path would silently miss every fetch).
2106
2196
  *
2107
2197
  * NOTE: Detecting a change here busts the dashboard's slow-state cache, but
2108
2198
  * the inner per-source caches (`queries._skillsCache` 30 s, dashboard's
@@ -2170,6 +2260,22 @@ function getStatusSlowStateMtimePaths(config) {
2170
2260
  }
2171
2261
  }
2172
2262
 
2263
+ // Per-project git refs — see the "Per-project .git/logs/HEAD" note in
2264
+ // the header. Same pair the fast-state tracker watches; `logs/HEAD` is
2265
+ // per-worktree (resolved via `_resolveGitDir`) while `FETCH_HEAD` lives
2266
+ // in the COMMON gitdir (resolved via `_resolveCommonGitDir`) — git
2267
+ // fetches from a linked worktree write to `<main>/.git/FETCH_HEAD`,
2268
+ // not into the per-worktree subdir. For the main worktree both
2269
+ // resolvers return the same gitdir, so the behavior collapses to the
2270
+ // expected `<localPath>/.git/{logs/HEAD,FETCH_HEAD}` pair.
2271
+ for (const project of projects) {
2272
+ if (!project || !project.localPath) continue;
2273
+ const gitDir = _resolveGitDir(project.localPath) || path.join(project.localPath, '.git');
2274
+ const commonGitDir = _resolveCommonGitDir(gitDir);
2275
+ files.push(path.join(gitDir, 'logs', 'HEAD'));
2276
+ files.push(path.join(commonGitDir, 'FETCH_HEAD'));
2277
+ }
2278
+
2173
2279
  return files;
2174
2280
  }
2175
2281
 
package/engine/shared.js CHANGED
@@ -1779,6 +1779,7 @@ const ENGINE_DEFAULTS = {
1779
1779
  autoReReviewPrs: true, // auto-dispatch review agents after a PR fix is pushed
1780
1780
  autoFixReviewFeedback: true, // auto-dispatch fix agents for minions review changes-requested verdicts
1781
1781
  autoFixHumanComments: true, // auto-dispatch fix agents for actionable human PR comments
1782
+ autoConsolidateMemory: false, // opt-in: periodically spawn engine/kb-sweep-runner.js from the tick loop (4h cadence). Inbox→notes consolidation already runs every tick via consolidateInbox; this flag only controls the KB sweep.
1782
1783
  prNoOpFixPauseAttempts: 2, // pause one PR automation cause after repeated no-op fixes for unchanged evidence
1783
1784
  completionReportRetentionDays: 90, // retain completion report sidecars beyond capped dispatch history
1784
1785
  completionReportMaxFiles: 5000, // hard cap for completion report sidecars during cleanup
@@ -1787,6 +1788,7 @@ const ENGINE_DEFAULTS = {
1787
1788
  evalLoop: true, // enable review→fix loop after implementation completes
1788
1789
  evalMaxCost: null, // USD ceiling per work item across all eval iterations; null = no limit (gather baseline data first)
1789
1790
  maxRetries: 3, // max dispatch retries before marking work item as failed
1791
+ maxRetriesPerAgent: 2, // W-mpmwxn1j — per-agent retry cap. When the SAME agent fails the same WI this many times, the next retry MUST reassign to a different eligible agent (consults routing.md + agent availability). Falls back to the same agent only when no alternate is available. Counted separately from `maxRetries` (which caps total retries across all agents) and tracked on the WI as `_retriesByAgent: { agentId: count }`. Hard-pinned agents bypass reassignment (operator intent wins).
1790
1792
  maxPhantomRetries: 3, // max retries for "phantom completion" (runtime crashed before emitting type:"result"); tracked separately from _retryCount so phantom retries don't pollute the normal PR-attachment retry budget. See engine/lifecycle.markMissingPrAttachment + detectNonTerminalResultSummary.
1791
1793
  minRetryGapMs: 120000, // 2min — minimum gap between retry dispatches for the same work item; prevents tight retry loops when an idempotent agent (e.g. review bailing out on a duplicate) cannot produce the expected output (#1770)
1792
1794
  pipelineApiRetries: 2, // max attempts for pipeline API calls
@@ -1854,7 +1856,7 @@ const ENGINE_DEFAULTS = {
1854
1856
  // Engine code MUST go through the resolveAgent*/resolveCc* helpers below;
1855
1857
  // never read these fields directly. New runtimes are added by registering
1856
1858
  // an adapter in engine/runtimes/index.js — these defaults stay stable.
1857
- defaultCli: 'claude', // fleet-wide CLI runtime (must be a key in engine/runtimes/index.js)
1859
+ defaultCli: 'copilot', // fleet-wide CLI runtime (must be a key in engine/runtimes/index.js); flipped from 'claude' in W-mpmwxkk40007c995 — Copilot is now the primary runtime, Claude remains supported as an opt-in
1858
1860
  defaultModel: undefined, // fleet-wide model; undefined = let the runtime adapter pick its own default
1859
1861
  ccCli: undefined, // CC/doc-chat CLI override; undefined = inherit defaultCli (independent of agent path)
1860
1862
  ccModel: undefined, // CC/doc-chat model override; undefined = inherit defaultModel
@@ -1877,6 +1879,7 @@ const ENGINE_DEFAULTS = {
1877
1879
  removeWorktreeFailureTtlMs: 24 * 60 * 60 * 1000, // stale failed paths are forgotten after a day
1878
1880
  removeWorktreeFailureMaxEntries: 1000, // bound failed-worktree retry suppression cache
1879
1881
  ccMaxTurns: 50, // max tool-use turns per CC/doc-chat call before CLI stops (per response, not per session)
1882
+ ccTurnTimeoutMs: 300000, // W-mpmwxni2000c25c7-b: wall-clock cap per CC/doc-chat turn; on expiry the in-flight LLM call is aborted and the handler surfaces `{code:'cc-turn-timeout', retriable:true}` instead of hanging the UI
1880
1883
  docSessionMaxEntries: 200, // cap doc-chat session map/disk store by least-recent activity (LRU; sessions are non-expiring otherwise)
1881
1884
  ccLiveStreamMaxAgeMs: 30 * 60 * 1000, // hard cap reconnect buffers if abort/cleanup stalls
1882
1885
  metricsFlushIntervalMs: 10000, // batch trackEngineUsage writes to metrics.json — flushed every 10s instead of per-call to cut lock contention and dashboard mtime churn
@@ -2082,7 +2085,7 @@ function _isMeaningful(v) {
2082
2085
  * Resolve the CLI runtime for a per-agent spawn. Priority:
2083
2086
  * 1. `agent.cli` — per-agent override
2084
2087
  * 2. `engine.defaultCli` — fleet default
2085
- * 3. `ENGINE_DEFAULTS.defaultCli` ('claude') — hardcoded fallback
2088
+ * 3. `ENGINE_DEFAULTS.defaultCli` ('copilot') — hardcoded fallback
2086
2089
  *
2087
2090
  * Does NOT fall through to `engine.ccCli`. CC and agents are independent paths.
2088
2091
  */
@@ -2096,7 +2099,7 @@ function resolveAgentCli(agent, engine) {
2096
2099
  * Resolve the CLI runtime for the Command Center / doc-chat. Priority:
2097
2100
  * 1. `engine.ccCli` — CC-only override
2098
2101
  * 2. `engine.defaultCli` — fleet default
2099
- * 3. `ENGINE_DEFAULTS.defaultCli` ('claude') — hardcoded fallback
2102
+ * 3. `ENGINE_DEFAULTS.defaultCli` ('copilot') — hardcoded fallback
2100
2103
  *
2101
2104
  * Does NOT inspect any agent overrides. CC has no notion of "which agent" —
2102
2105
  * it's a fleet-wide singleton.
@@ -4752,6 +4755,39 @@ function safeSlugComponent(text, maxLen = 80) {
4752
4755
  return `${base}-${hash}`.slice(0, maxLen);
4753
4756
  }
4754
4757
 
4758
+ // W-mpmwxn1j — Per-agent retry tracking. After the same agent has failed a WI
4759
+ // `ENGINE_DEFAULTS.maxRetriesPerAgent` times, the next dispatch must reassign
4760
+ // to a different eligible agent. The counter is stored on the WI itself
4761
+ // (`_retriesByAgent: { agentId: count }`) so engine restart preserves the
4762
+ // state. The counter is cleared on successful completion alongside
4763
+ // `_retryCount`. The caller passes a mutable WI object (inside a
4764
+ // `mutateWorkItems` / `mutateJsonFileLocked` callback) and the failed agent ID.
4765
+ // Anonymous failures (no resolvable agent) skip the bump — we'd corrupt the
4766
+ // shape with an `undefined` key. Returns the new per-agent count for logging.
4767
+ function bumpAgentRetryCount(wi, agentId) {
4768
+ if (!wi || !agentId) return 0;
4769
+ if (!wi._retriesByAgent || typeof wi._retriesByAgent !== 'object' || Array.isArray(wi._retriesByAgent)) {
4770
+ wi._retriesByAgent = {};
4771
+ }
4772
+ const next = (Number(wi._retriesByAgent[agentId]) || 0) + 1;
4773
+ wi._retriesByAgent[agentId] = next;
4774
+ return next;
4775
+ }
4776
+
4777
+ function getAgentRetryCount(wi, agentId) {
4778
+ if (!wi || !agentId) return 0;
4779
+ const map = wi._retriesByAgent;
4780
+ if (!map || typeof map !== 'object' || Array.isArray(map)) return 0;
4781
+ return Number(map[agentId]) || 0;
4782
+ }
4783
+
4784
+ function resolveMaxRetriesPerAgent(config) {
4785
+ const raw = config?.engine?.maxRetriesPerAgent;
4786
+ const val = Number(raw);
4787
+ if (Number.isFinite(val) && val > 0) return val;
4788
+ return ENGINE_DEFAULTS.maxRetriesPerAgent;
4789
+ }
4790
+
4755
4791
  const PR_AUTOMATION_CAUSE_LIMIT = 50;
4756
4792
 
4757
4793
  function getPrAutomationCauses(pr) {
@@ -4912,6 +4948,7 @@ module.exports = {
4912
4948
  tailTextBytes,
4913
4949
  appendTextTail,
4914
4950
  writeToInbox, parseNoteId,
4951
+ bumpAgentRetryCount, getAgentRetryCount, resolveMaxRetriesPerAgent,
4915
4952
  exec,
4916
4953
  execAsync,
4917
4954
  execSilent,
package/engine/timeout.js CHANGED
@@ -587,6 +587,10 @@ function checkTimeouts(config) {
587
587
  log('info', `Reconcile: work item ${item.id} agent died — auto-retry ${retries + 1}/${maxRetries}`);
588
588
  item.status = WI_STATUS.PENDING;
589
589
  item._retryCount = retries + 1;
590
+ // W-mpmwxn1j — bump per-agent retry count BEFORE deleting
591
+ // dispatched_to so the next dispatch can reassign away from the
592
+ // agent that died. Anonymous (no dispatched_to) failures don't bump.
593
+ if (item.dispatched_to) shared.bumpAgentRetryCount(item, item.dispatched_to);
590
594
  delete item.dispatched_at;
591
595
  delete item.dispatched_to;
592
596
  delete item._pendingReason;