@yemi33/minions 0.1.1659 → 0.1.1660

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.1660 (2026-05-01)
4
+
5
+ ### Fixes
6
+ - less rigid agent orphan detection
7
+
3
8
  ## 0.1.1659 (2026-05-01)
4
9
 
5
10
  ### Other
@@ -1,5 +1,5 @@
1
1
  {
2
2
  "runtime": "copilot",
3
3
  "models": null,
4
- "cachedAt": "2026-05-01T05:34:43.680Z"
4
+ "cachedAt": "2026-05-01T05:58:00.505Z"
5
5
  }
package/engine/shared.js CHANGED
@@ -721,7 +721,19 @@ const ENGINE_DEFAULTS = {
721
721
  inboxConsolidateThreshold: 5,
722
722
  agentTimeout: 18000000, // 5h
723
723
  heartbeatTimeout: 300000, // 5min — stale-orphan grace after process tracking is lost
724
- heartbeatTimeouts: {}, // optional per-type stale-orphan overrides; merged at runtime (see timeout.js)
724
+ // Per-type stale-orphan overrides (merged with config.engine.heartbeatTimeouts at runtime see timeout.js).
725
+ // Heavy work types (multi-file edits, builds, test suites, full verify cycles) routinely go quiet for
726
+ // longer than the 5-min default when the engine has lost their tracked handle (e.g. across an engine
727
+ // restart). We give them headroom up to a typical build+tests cycle. Short-running types
728
+ // (decompose / meeting / etc.) keep the 5-min default by simply not appearing here.
729
+ heartbeatTimeouts: {
730
+ implement: 900000, // 15min — refactors, multi-file edits, builds
731
+ 'implement:large': 900000, // 15min — same class of work, larger scope
732
+ fix: 900000, // 15min — fix runs often include builds + retries
733
+ test: 900000, // 15min — build-and-test against existing PR
734
+ verify: 900000, // 15min — full project verification cycle
735
+ plan: 600000, // 10min — research-heavy
736
+ },
725
737
  maxTurns: 100,
726
738
  worktreeCreateTimeout: 300000, // 5min for git worktree add on large Windows repos
727
739
  worktreeCreateRetries: 1, // retry once on transient timeout/lock races
@@ -785,7 +797,6 @@ const ENGINE_DEFAULTS = {
785
797
  copilotReasoningSummaries: false, // Copilot --enable-reasoning-summaries (Anthropic-family models only)
786
798
  maxBudgetUsd: undefined, // fleet USD ceiling for --max-budget-usd (per-agent override: agents.<id>.maxBudgetUsd). Honors 0 via ?? so a literal cap of $0 works
787
799
  disableModelDiscovery: false, // skip runtime.listModels() REST calls fleet-wide (settings UI falls back to free-text)
788
- heartbeatTimeouts: {},
789
800
  maxPendingContexts: 20, // cap pendingContexts arrays in cooldowns.json to prevent unbounded growth
790
801
  maxPendingContextEntryBytes: 256 * 1024, // 256 KB — cap each pendingContexts entry to prevent huge PR comments from bloating cooldowns.json
791
802
  maxDispatchPromptBytes: 1024 * 1024, // 1 MB — dispatch items with prompts larger than this sidecar to engine/contexts/ to prevent dispatch.json OOM (#1167)
package/engine/timeout.js CHANGED
@@ -9,7 +9,7 @@ const queries = require('./queries');
9
9
  const steering = require('./steering');
10
10
 
11
11
  const { safeRead, safeWrite, safeJson, mutateJsonFileLocked, getProjects, projectWorkItemsPath, log, ts,
12
- ENGINE_DEFAULTS, WI_STATUS, WORK_TYPE, DISPATCH_RESULT, AGENT_STATUS } = shared;
12
+ ENGINE_DEFAULTS, ENGINE_DIR, WI_STATUS, WORK_TYPE, DISPATCH_RESULT, AGENT_STATUS } = shared;
13
13
  const { getDispatch, getAgentStatus } = queries;
14
14
  const AGENTS_DIR = queries.AGENTS_DIR;
15
15
  const MINIONS_DIR = shared.MINIONS_DIR;
@@ -142,6 +142,23 @@ function isTrackedProcessAlive(procInfo) {
142
142
  }
143
143
  }
144
144
 
145
+ // Last-resort liveness check via the on-disk PID file (engine/tmp/pid-<safeId>.pid).
146
+ // Used by orphan detection to avoid false-positive kills when the engine has lost the
147
+ // tracked process handle (engine restart, never-tracked spawn, etc.) but the OS-level
148
+ // child process is still alive and healthy. The safeId here mirrors engine.js spawn
149
+ // (id.replace(/[:\\/*?"<>|]/g, '-')) — same pattern engine/cli.js uses to re-attach.
150
+ function isOsPidAliveForDispatch(itemId) {
151
+ const safeId = String(itemId || '').replace(/[:\\/*?"<>|]/g, '-');
152
+ const pidPath = path.join(ENGINE_DIR, 'tmp', `pid-${safeId}.pid`);
153
+ let raw;
154
+ try { raw = fs.readFileSync(pidPath, 'utf8'); }
155
+ catch { return false; }
156
+ const pid = parseInt(String(raw).trim(), 10);
157
+ if (!Number.isFinite(pid) || pid <= 0) return false;
158
+ try { process.kill(pid, 0); return true; }
159
+ catch { return false; }
160
+ }
161
+
145
162
  function checkTimeouts(config) {
146
163
  const activeProcesses = engine().activeProcesses;
147
164
  const engineRestartGraceUntil = engine().engineRestartGraceUntil;
@@ -335,6 +352,11 @@ function checkTimeouts(config) {
335
352
  } catch { /* ENOENT — keep default */ }
336
353
 
337
354
  if (!processAlive && silentMs > staleOrphanTimeout && (Date.now() > engineRestartGraceUntil || engineRestartGraceExempt?.has(item.id))) {
355
+ // Last-resort PID check: lost tracked handle but OS process may still be alive.
356
+ if (isOsPidAliveForDispatch(item.id)) {
357
+ log('info', `Orphan check: ${item.agent} (${item.id}) silent ${silentSec}s but OS PID is alive — keeping [${_logState}]`);
358
+ continue;
359
+ }
338
360
  // No tracked process AND no recent output past stale-orphan timeout AND (grace period expired OR confirmed-dead at restart) → orphaned
339
361
  log('warn', `Orphan detected: ${item.agent} (${item.id}) — no live process tracked, silent for ${silentSec}s [${_logState}]`);
340
362
  dispatch().updateAgentStatus(item.id, AGENT_STATUS.TIMED_OUT, `Orphaned — no process, silent for ${silentSec}s`);
@@ -424,4 +446,5 @@ module.exports = {
424
446
  checkTimeouts,
425
447
  checkSteering,
426
448
  checkIdleThreshold,
449
+ isOsPidAliveForDispatch,
427
450
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.1659",
3
+ "version": "0.1.1660",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"