npm - @yemi33/minions - Versions diffs - 0.1.1965 → 0.1.1967 - Mend

@yemi33/minions 0.1.1965 → 0.1.1967

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/bin/minions.js +6 -6
package/dashboard/js/refresh.js +5 -0
package/dashboard/js/render-managed.js +261 -0
package/dashboard/js/render-other.js +5 -2
package/dashboard/pages/engine.html +6 -0
package/dashboard/styles.css +21 -4
package/dashboard-build.js +1 -1
package/dashboard.js +250 -1
package/docs/README.md +10 -13
package/docs/managed-spawn.md +259 -0
package/docs/watches.md +47 -20
package/engine/cli.js +39 -0
package/engine/managed-spawn.js +1325 -0
package/engine/playbook.js +34 -0
package/engine/projects.js +13 -0
package/engine/shared.js +118 -0
package/engine.js +264 -14
package/package.json +2 -1

package/engine/playbook.js CHANGED Viewed

@@ -468,6 +468,40 @@ function renderPlaybook(type, vars) {
     } catch (e) { log('warn', `keep_processes hint render failed: ${e.message}`); }
   }
+  // P-1f9c3a45 — opt-in managed_spawn dispatch hint. Mirrors keep_processes:
+  // injected only when the dispatcher set vars.managed_spawn (truthy) from the
+  // work item's `meta.managed_spawn`. Tells the agent how to write the
+  // managed-spawn sidecar so the engine takes over spawn + healthcheck.
+  if (vars.managed_spawn) {
+    try {
+      const managedSpawn = require('./managed-spawn');
+      const hint = managedSpawn.buildManagedSpawnHint({
+        agentId: vars.agent_id,
+        workItemId: vars.item_id || vars.task_id,
+        ttlMinutes: vars.managed_spawn_ttl_minutes,
+        minionsDir: MINIONS_DIR,
+      });
+      if (hint) inertAppendices.push(hint);
+    } catch (e) { log('warn', `managed_spawn hint render failed: ${e.message}`); }
+  }
+  // P-1f9c3a45 — auto-inject live managed processes block, project-scoped.
+  // Unconditional: any healthy+alive spec whose owner_project matches
+  // vars.project_name is surfaced to the dispatched agent so downstream WIs
+  // discover services stood up by earlier WIs without human hand-off. Cap at
+  // ENGINE_DEFAULTS.managedSpawn.promptContextMaxBytes (default 2KB) — the
+  // helper falls back to a compact name+base_url list when over cap. Empty
+  // string is returned when nothing matches, so we short-circuit on falsy.
+  if (vars.project_name) {
+    try {
+      const managedSpawn = require('./managed-spawn');
+      const liveBlock = managedSpawn.buildLiveManagedProcessesBlock({
+        project: vars.project_name,
+      });
+      if (liveBlock) inertAppendices.push(liveBlock);
+    } catch (e) { log('warn', `managed-spawn live-processes inject failed: ${e.message}`); }
+  }
   // Inject KB guardrail
   content += `\n\n---\n\n## Knowledge Base Rules\n\n`;
   content += `**Never delete, move, or overwrite files in \`knowledge/\`.** The sweep (consolidation engine) is the only process that writes to \`knowledge/\`. If you think a KB file is wrong, note it in your learnings file — do not touch \`knowledge/\` directly.\n`;

package/engine/projects.js CHANGED Viewed

@@ -132,6 +132,7 @@ function removeProject(target, options = {}) {
     drainedDispatches: 0, // includes active dispatches whose agent processes were killed
     cleanedWorktrees: 0,
     disabledSchedules: 0,
+    killedManagedProcesses: 0, // P-8a4d6f29 — managed-spawn cleanup
     archivedPlans: [],
     pipelineRefs: [],
     archivedTo: null,
@@ -173,6 +174,18 @@ function removeProject(target, options = {}) {
   );
   _requeueProjectlessCentralWorkItems(projectlessCentralItemIds);
+  // 2.5. Managed-spawn cleanup (P-8a4d6f29). Centralised in managed-spawn.js
+  //      so no other module needs to know about engine/managed-processes.json.
+  //      Kills + unlinks every spec owned by this project, including the .log
+  //      and .log.1 rotated sibling. Best-effort: failures only warn.
+  try {
+    const managedSpawn = require('./managed-spawn');
+    const result = managedSpawn.removeManagedSpecsForProject(project.name);
+    summary.killedManagedProcesses = result.killed || 0;
+  } catch (e) {
+    summary.warnings.push('managed-spawn cleanup: ' + e.message);
+  }
   // 3. Clean up worktrees under this project's worktree root, honoring
   //    config.engine.worktreeRoot (mirrors lifecycle.js cleanupPlanWorktrees).
   if (project.localPath) {

package/engine/shared.js CHANGED Viewed

@@ -650,6 +650,65 @@ function isPidAlive(pid) {
   catch { return false; }
 }
+// P-8a4d6f29 — single helper for detached-process stdio capture with
+// rotate-on-open. Used by bin/minions.js (engine + dashboard stdio logs) and
+// engine/managed-spawn.js openManagedLog. Centralising replaces the previous
+// _openStdioLog in bin/minions.js so rotation is uniform across every long-
+// running log Minions writes.
+//
+// Behavior:
+//   - Ensures `dir` exists (mkdir -p).
+//   - If `<dir>/<name>` already exists and its size > `rotateBytes`, rename it
+//     to `<dir>/<name>.1` (overwriting any prior `.1`) before opening. Keeps
+//     exactly one rotated sibling. We rotate first so the freshly opened fd
+//     points at an empty file — preserving the original O_APPEND semantics.
+//   - Opens the (possibly fresh) file in append mode and returns
+//     `{ fd, logPath, rotated }`. `rotated` is `true` when the .1 rename ran.
+//   - `opts.fallback === 'ignore'` makes any I/O failure return
+//     `{ fd: 'ignore', logPath, rotated: false }` instead of throwing — used by
+//     bin/minions.js where a failed log open must not block the restart.
+//   - `rotateBytes` defaults to ENGINE_DEFAULTS.managedSpawn.logRotateBytes
+//     (10 MB). Pass `Infinity` to disable rotation entirely.
+function openAppendLogFd(name, dir, opts) {
+  opts = opts || {};
+  if (typeof name !== 'string' || name.length === 0) {
+    throw new Error('openAppendLogFd: name required');
+  }
+  if (typeof dir !== 'string' || dir.length === 0) {
+    throw new Error('openAppendLogFd: dir required');
+  }
+  const fallback = opts.fallback || null;
+  const cap = Number.isFinite(opts.rotateBytes)
+    ? opts.rotateBytes
+    : ((ENGINE_DEFAULTS.managedSpawn && ENGINE_DEFAULTS.managedSpawn.logRotateBytes) || 10 * 1024 * 1024);
+  const logPath = path.join(dir, name);
+  try {
+    try { fs.mkdirSync(dir, { recursive: true }); }
+    catch (e) { if (e && e.code !== 'EEXIST') throw e; }
+    let rotated = false;
+    if (Number.isFinite(cap) && cap > 0) {
+      try {
+        const st = fs.statSync(logPath);
+        if (st && st.size > cap) {
+          const rotatedPath = logPath + '.1';
+          try { fs.unlinkSync(rotatedPath); }
+          catch (e) { if (e && e.code !== 'ENOENT') throw e; }
+          fs.renameSync(logPath, rotatedPath);
+          rotated = true;
+        }
+      } catch (e) {
+        if (e && e.code !== 'ENOENT') throw e;
+        // No existing file → nothing to rotate.
+      }
+    }
+    const fd = fs.openSync(logPath, 'a');
+    return { fd: fd, logPath: logPath, rotated: rotated };
+  } catch (e) {
+    if (fallback === 'ignore') return { fd: 'ignore', logPath: logPath, rotated: false };
+    throw e;
+  }
+}
 function withFileLock(lockPath, fn, {
   timeoutMs = 5000,
   retryDelayMs = 25,
@@ -1431,6 +1490,62 @@ const ENGINE_DEFAULTS = {
     // keep_processes use cases.
     requireGitWorkdir: true,
   },
+  // P-7a3b1c92 / plan W-mp7k1r760003b5dd — managed-spawn primitive: agents
+  // describe long-running services in agents/<id>/managed-spawn.json and the
+  // engine owns the spawn + healthcheck lifecycle. These defaults gate the
+  // validator (engine/managed-spawn.js) and later the engine-side spawn loop,
+  // per-tick sweep, playbook hint, and dashboard endpoints. Mirrors the
+  // `keepProcesses` block intentionally — symmetry with the keep-pids
+  // primitive is the documented design convention.
+  managedSpawn: {
+    enabled: true,                  // global kill switch; opt-in per-WI via meta.managed_spawn
+    maxSpecsPerFile: 5,             // ≤5 specs per managed-spawn.json file
+    maxNameLength: 64,              // kebab-case spec name cap
+    maxArgsCount: 64,               // child-process arg-vector cap per spec
+    maxEnvVars: 32,                 // env-object cap per spec
+    maxAttrsBytes: 2048,            // serialized `attrs` blob cap per spec
+    maxTtlMinutes: 1440,            // 24h hard cap on per-spec TTL
+    defaultTtlMinutes: 240,         // 4h default when spec.ttl_minutes omitted
+    sweepEvery: 30,                 // ticks between TTL/dead-PID sweeps
+    defaultHealthIntervalSec: 1,    // healthcheck polling cadence pre-healthy
+    healthBackoffSec: 30,           // healthcheck liveness cadence post-healthy
+    logRotateBytes: 10 * 1024 * 1024, // 10MB rotation threshold for managed-logs/<name>.log
+    bootReconcileMaxMs: 2000,       // boot-time reconcile timeout (don't block engine boot)
+    promptContextMaxBytes: 2048,    // cap on auto-injected `## Live managed processes` block
+    requireGitWorkdir: true,        // reject specs whose `cwd` isn't a real git worktree
+    // Single global executable allowlist. Applies to both `spec.cmd` and any
+    // `command` healthcheck's `cmd`. Keep narrow — adding a binary here lets
+    // any agent's sidecar invoke it under engine ownership.
+    executableAllowlist: [
+      'node', 'bun', 'npm', 'npx', 'pnpm', 'yarn',
+      'python', 'python3', 'pip', 'pip3',
+      'docker', 'podman',
+      'adb', 'emulator',
+      'gradle', 'gradlew', 'mvn',
+      'pwsh', 'powershell', 'bash', 'sh',
+      'curl', 'wget',
+      'git',
+    ],
+    // Env-key allowlist (exact match). Tight by default so a managed spec
+    // can't leak credentials (AWS_*, AZURE_*, GH_TOKEN, etc.). Anything not
+    // here must match one of the allowed prefixes below.
+    envKeyAllowlist: [
+      'NODE_ENV', 'PORT', 'HOST', 'PATH',
+      'DEBUG', 'LOG_LEVEL',
+      'HOME', 'USERPROFILE', 'TMPDIR', 'TEMP', 'TMP',
+      'LANG', 'LC_ALL',
+      'JAVA_HOME', 'ANDROID_HOME', 'ANDROID_SDK_ROOT',
+    ],
+    // Env-key prefix allowlist. Standard ecosystem prefixes that frontends
+    // and tooling depend on (Vite, Next.js, CRA, npm scripts). Extend with
+    // caution; broad prefixes (`AWS_`, `AZURE_`) belong on a deny-list, not
+    // an allow-list.
+    envKeyAllowlistPrefixes: [
+      'VITE_', 'NEXT_', 'REACT_APP_', 'NUXT_', 'GATSBY_',
+      'npm_config_', 'NPM_CONFIG_',
+      'MINIONS_',
+    ],
+  },
   // Backward-compat: keep `engine.claude.*` field family deprecation tracker. Listed here so preflight
   // knows which subkeys to flag as deprecated. Do not consume `claude.*` in new code — use the runtime
   // adapter system (engine/runtimes/) and the resolveAgent*/resolveCc* helpers instead.
@@ -2081,6 +2196,8 @@ const FAILURE_CLASS = {
   WORKTREE_PREFLIGHT: 'worktree-preflight', // Pre-spawn worktree validation rejected (nested-in-project, drive-root collapse) — never retryable
   INVALID_KEEP_PROCESSES_WORKDIR: 'invalid-keep-processes-workdir', // W-mp6k7ywi000fa33c: keep-pids.json declared a cwd that is not a real git worktree (likely a selective copy of the repo) — never retryable; agent must rerun in a real worktree
   INVALID_KEEP_PROCESSES_SCHEMA: 'invalid-keep-processes-schema', // W-mp7i902u000l991f: keep-pids.json failed validation for a reason other than workdir (pids-missing, ttl-too-long, expires_at-missing, pids-too-many, port-invalid, etc.) — agent wrote the wrong shape; never retryable until they fix the file
+  INVALID_MANAGED_SPAWN: 'invalid-managed-spawn', // P-7a3b1c92: agents/<id>/managed-spawn.json failed validator (bad schema, broken workdir, executable/env not on allowlist, healthcheck shape wrong). Engine refuses to spawn any spec — agent must fix file; never retryable as-is.
+  MANAGED_SPAWN_HEALTHCHECK_FAILED: 'managed-spawn-healthcheck-failed', // P-7a3b1c92: at least one managed-spawn spec was spawned but failed its healthcheck within timeout_s. Engine killed the failing PIDs; siblings stay alive. Dispatch ERROR with the failing spec name + log tail surfaced in the inbox alert.
   UNKNOWN: 'unknown',                     // Unclassified failure
 };
 const ESCALATION_POLICY = {
@@ -4297,6 +4414,7 @@ module.exports = {
   _WIN_RESERVED_NAMES, // exported for testing
   LOCK_STALE_MS,
   isPidAlive,
+  openAppendLogFd,
   flushLogs,
   redactSecrets,
   slugify,

package/engine.js CHANGED Viewed

@@ -2224,6 +2224,213 @@ async function spawnAgent(dispatchItem, config) {
       }
     }
+    // P-2d5e8f04 — managed-spawn acceptance gate. Symmetric to the
+    // keep-processes block above but for `agents/<id>/managed-spawn.json`:
+    // engine owns the spawn + lifecycle of the long-running services the
+    // agent described in its sidecar. This gate (a) rejects malformed
+    // sidecars as a hard non-retryable failure with a dedicated failure
+    // class + inbox alert, and (b) on success spawns each spec detached and
+    // batch-records them in engine/managed-processes.json. Healthcheck loops
+    // + dispatch ERROR-on-healthcheck-failure land in the follow-up item;
+    // for now a spec that spawns successfully is recorded with
+    // healthy:false, alive:true and the engine sweep / item-3 healthcheck
+    // loop will drive its state from there.
+    let managedSpawnAcceptanceFailure = null;
+    let managedSpawnSpawned = []; // [{name, pid, started_at, log_path}]
+    {
+      const _wiMeta = dispatchItem.meta?.item?.meta || {};
+      const _msEnabled = !!_wiMeta.managed_spawn
+        || !!dispatchItem.meta?.managed_spawn;
+      if (_msEnabled) {
+        try {
+          const managedSpawn = require('./engine/managed-spawn');
+          const evalResult = managedSpawn.evaluateManagedSpawnAcceptance(agentId);
+          if (evalResult.exists && !evalResult.accepted) {
+            managedSpawnAcceptanceFailure = {
+              reason: evalResult.reason,
+              filePath: evalResult.filePath,
+              isWorkdirRejection: !!evalResult.isWorkdirRejection,
+              parsedRaw: evalResult.parsedRaw || null,
+            };
+            try { fs.unlinkSync(evalResult.filePath); } catch (_e) { /* gone or busy */ }
+            log('warn', `managed-spawn acceptance: REJECTED ${agentId} (${id}) — ${evalResult.reason}; sidecar deleted`);
+            try {
+              const wiId = dispatchItem.meta?.item?.id || '';
+              const canonicalHint = (() => {
+                try {
+                  return managedSpawn.buildManagedSpawnHint({
+                    agentId,
+                    workItemId: wiId,
+                    minionsDir: shared.MINIONS_DIR,
+                  });
+                } catch (_hintErr) { return ''; }
+              })();
+              let parsedSnippet = '';
+              if (evalResult.parsedRaw) {
+                try { parsedSnippet = JSON.stringify(evalResult.parsedRaw, null, 2); }
+                catch (_jsonErr) { parsedSnippet = String(evalResult.parsedRaw); }
+                if (parsedSnippet.length > 500) parsedSnippet = parsedSnippet.slice(0, 500) + '\n... (truncated)';
+              }
+              const alertBody = [
+                `# managed_spawn setup REJECTED for ${agentId}`,
+                '',
+                `Your \`agents/${agentId}/managed-spawn.json\` failed validation: \`${evalResult.reason}\`.`,
+                'No services were spawned and the dispatch was marked ERROR (non-retryable).',
+                '',
+                wiId ? `Work item: ${wiId}` : '',
+                `Agent: ${agentId}`,
+                `Dispatch: ${id}`,
+                '',
+                parsedSnippet ? '## What you wrote\n\n```json\n' + parsedSnippet + '\n```\n' : '',
+                '## Canonical shape',
+                '',
+                canonicalHint || '(see `engine/managed-spawn.js` `buildManagedSpawnHint` for the canonical shape.)',
+                '',
+              ].filter(Boolean).join('\n');
+              writeInboxAlert(`managed-spawn-${agentId}`, alertBody);
+            } catch (alertErr) {
+              log('warn', `managed-spawn acceptance: failed to emit inbox alert for ${agentId}: ${alertErr.message}`);
+            }
+          } else if (evalResult.exists && evalResult.accepted && evalResult.record) {
+            // Valid sidecar — spawn each spec detached and batch-record.
+            // Per-spec failure here (e.g., binary missing on PATH despite
+            // passing the allowlist) marks the whole gate failed so the
+            // dispatch fails ERROR. Surviving siblings get killed for
+            // consistency — the agent should not exit green with a partial
+            // service set up.
+            const ctx = {
+              owner_agent: agentId,
+              owner_wi: dispatchItem.meta?.item?.id || '',
+              owner_project: project?.name || '',
+            };
+            const spawnedItems = [];
+            let spawnFailureReason = null;
+            for (const spec of evalResult.record.specs) {
+              try {
+                const runtime = managedSpawn.spawnManagedSpec(spec, ctx);
+                spawnedItems.push({ spec, runtime });
+                managedSpawnSpawned.push({ name: spec.name, pid: runtime.pid, started_at: runtime.started_at, log_path: runtime.log_path });
+              } catch (specErr) {
+                spawnFailureReason = `spawn failed for ${spec.name}: ${specErr.message}`;
+                log('warn', `managed-spawn: ${spawnFailureReason}`);
+                break;
+              }
+            }
+            if (spawnFailureReason) {
+              // Roll back: kill anything we just spawned, leave no dangling
+              // state. This is consistent with the "all healthy or fail"
+              // contract item 3 will enforce on healthcheck timeout.
+              for (const item of spawnedItems) {
+                try { shared.killByPidImmediate(item.runtime.pid); } catch (_e) {}
+              }
+              managedSpawnSpawned = [];
+              managedSpawnAcceptanceFailure = {
+                reason: spawnFailureReason,
+                filePath: evalResult.filePath,
+                isWorkdirRejection: false,
+                parsedRaw: null,
+              };
+              try { fs.unlinkSync(evalResult.filePath); } catch (_e) {}
+            } else {
+              try {
+                managedSpawn.recordManagedBatch(spawnedItems, ctx);
+              } catch (recErr) {
+                log('warn', `managed-spawn: state-file write failed for ${agentId}: ${recErr.message}`);
+              }
+              // The sidecar has been ingested into the state file; unlink
+              // it so a future dispatch for this agent does not re-spawn
+              // the same specs (the state file is the source of truth).
+              try { fs.unlinkSync(evalResult.filePath); } catch (_e) {}
+              log('info', `managed-spawn accepted: ${agentId} (${id}) spawned ${managedSpawnSpawned.length} spec(s)`);
+            }
+          }
+        } catch (e) {
+          log('warn', `managed-spawn acceptance check failed for ${agentId} (${id}): ${e.message}`);
+        }
+      }
+    }
+    // P-9c1f47a6 — managed-spawn healthcheck gate (item 3). After all specs
+    // are spawned + persisted, wait for each spec's first healthcheck to
+    // pass (or fail) within its declared timeout_s. Any failure here forces
+    // the dispatch to ERROR with FAILURE_CLASS.MANAGED_SPAWN_HEALTHCHECK_FAILED,
+    // kills the failing spec's PID + removes its state entry, attaches a
+    // log tail to the inbox alert, and leaves surviving siblings alone (a
+    // partial-failure spec set is more useful than an all-or-nothing wipe
+    // — the agent's later dispatch can inspect /api/managed-processes and
+    // either restart the failed one or remove the survivors itself).
+    let managedSpawnHealthcheckFailure = null;
+    if (managedSpawnSpawned.length > 0) {
+      try {
+        const managedSpawn = require('./engine/managed-spawn');
+        const items = managedSpawnSpawned;
+        // Re-read the specs from the state file (recordManagedBatch normalised
+        // them — healthcheck shape lives there now).
+        const liveSpecs = managedSpawn.listManagedSpecs();
+        const byName = new Map(liveSpecs.map(s => [s.name, s]));
+        const results = await Promise.allSettled(items.map((spawned) => {
+          const spec = byName.get(spawned.name);
+          if (!spec || !spec.healthcheck) {
+            return Promise.resolve({ healthy: false, error: 'no healthcheck recorded for ' + spawned.name, _name: spawned.name });
+          }
+          return managedSpawn.waitForFirstHealth(spec).then(r => Object.assign({ _name: spec.name, _pid: spawned.pid }, r));
+        }));
+        const failed = [];
+        for (let i = 0; i < results.length; i++) {
+          const r = results[i];
+          if (r.status === 'rejected') {
+            failed.push({ name: items[i].name, pid: items[i].pid, error: 'healthcheck threw: ' + (r.reason && r.reason.message ? r.reason.message : String(r.reason)) });
+            continue;
+          }
+          if (!r.value.healthy) {
+            failed.push({ name: r.value._name || items[i].name, pid: r.value._pid || items[i].pid, error: r.value.error || 'unhealthy' });
+          }
+        }
+        if (failed.length > 0) {
+          // Kill failing PIDs + drop their state entries. Surviving siblings
+          // stay alive intentionally (see comment above).
+          for (const f of failed) {
+            try { managedSpawn.removeManagedSpec(f.name); }
+            catch (e) { log('warn', `managed-spawn healthcheck: cleanup failed for ${f.name}: ${e.message}`); }
+          }
+          managedSpawnHealthcheckFailure = {
+            failed: failed,
+            survivedNames: items.filter(it => !failed.some(f => f.name === it.name)).map(it => it.name),
+          };
+          log('warn', `managed-spawn healthcheck: ${failed.length}/${items.length} spec(s) failed for ${agentId} (${id}); ` +
+            failed.map(f => `${f.name}=${f.error}`).join('; '));
+          try {
+            const wiId = dispatchItem.meta?.item?.id || '';
+            const logTails = failed.map(f => {
+              const tail = managedSpawn.tailManagedLog(f.name, 50) || '(log empty or unreadable)';
+              return '### ' + f.name + ' (pid ' + (f.pid || '?') + ')\n\nReason: `' + f.error + '`\n\n```\n' + tail.slice(-2000) + '\n```';
+            }).join('\n\n');
+            const alertBody = [
+              `# managed_spawn healthcheck FAILED for ${agentId}`,
+              '',
+              `${failed.length} of ${items.length} spec(s) failed their first healthcheck within \`timeout_s\`. The failing PIDs were killed and their state entries removed; surviving siblings (${managedSpawnHealthcheckFailure.survivedNames.join(', ') || 'none'}) stay alive.`,
+              '',
+              wiId ? `Work item: ${wiId}` : '',
+              `Agent: ${agentId}`,
+              `Dispatch: ${id}`,
+              '',
+              '## Failure detail + log tails',
+              '',
+              logTails,
+              '',
+            ].filter(Boolean).join('\n');
+            writeInboxAlert(`managed-spawn-healthcheck-${agentId}`, alertBody);
+          } catch (alertErr) {
+            log('warn', `managed-spawn healthcheck: failed to emit inbox alert for ${agentId}: ${alertErr.message}`);
+          }
+        } else {
+          log('info', `managed-spawn healthcheck: ${items.length} spec(s) healthy for ${agentId} (${id})`);
+        }
+      } catch (e) {
+        log('warn', `managed-spawn healthcheck check failed for ${agentId} (${id}): ${e.message}`);
+      }
+    }
     // Move from active to completed in dispatch (single source of truth for agent status)
     // autoRecovered: agent failed after creating PRs — treat as success
     const hardContractFail = completionContractFailure?.severity === 'hard'
@@ -2239,7 +2446,15 @@ async function spawnAgent(dispatchItem, config) {
     // not silently treated as success even when exit code is 0. Both
     // workdir and schema rejections route here; the failure_class differs.
     const keepProcessesAcceptanceFail = !!keepProcessesAcceptanceFailure;
-    const effectiveResult = (hardContractFail || nonceFail || keepProcessesAcceptanceFail)
+    // P-2d5e8f04 — managed-spawn acceptance failure is also a hard failure
+    // (same reasoning). Maps to FAILURE_CLASS.INVALID_MANAGED_SPAWN.
+    const managedSpawnAcceptanceFail = !!managedSpawnAcceptanceFailure;
+    // P-9c1f47a6 — managed-spawn healthcheck failure is also a hard failure:
+    // the agent claims the service is set up but it never became healthy
+    // within the declared timeout. Maps to
+    // FAILURE_CLASS.MANAGED_SPAWN_HEALTHCHECK_FAILED.
+    const managedSpawnHealthcheckFail = !!managedSpawnHealthcheckFailure;
+    const effectiveResult = (hardContractFail || nonceFail || keepProcessesAcceptanceFail || managedSpawnAcceptanceFail || managedSpawnHealthcheckFail)
       ? DISPATCH_RESULT.ERROR
       : (((code === 0 && !agentReportedFailure) || autoRecovered) ? DISPATCH_RESULT.SUCCESS : DISPATCH_RESULT.ERROR);
     const finalCompletionReportPath = structuredCompletion?._path || dispatchItem.meta?.completionReportPath || shared.dispatchCompletionReportPath(id);
@@ -2252,21 +2467,30 @@ async function spawnAgent(dispatchItem, config) {
           ? FAILURE_CLASS.INVALID_KEEP_PROCESSES_WORKDIR
           : FAILURE_CLASS.INVALID_KEEP_PROCESSES_SCHEMA)
       : null;
-    const completeOpts = keepProcessesAcceptanceFail
-      ? { ...completionOpts, failureClass: _kpFailureClass, agentRetryable: false }
-      : (nonceFail
-          ? { ...completionOpts, failureClass: nonceMismatch.failureClass, agentRetryable: false }
-          : (hardContractFail
-              ? { ...completionOpts, processWorkItemFailure: false }
-              : (effectiveResult === DISPATCH_RESULT.ERROR ? {
-                  ...completionOpts,
-                  ...(failureClass ? { failureClass } : {}),
-                  ...(typeof retryableDecision === 'boolean' ? { agentRetryable: retryableDecision } : {}),
-                  ...(structuredCompletion?.failure_class ? { failureClass: structuredCompletion.failure_class } : {}),
-                } : completionOpts)));
+    const completeOpts = managedSpawnHealthcheckFail
+      ? { ...completionOpts, failureClass: FAILURE_CLASS.MANAGED_SPAWN_HEALTHCHECK_FAILED, agentRetryable: false }
+      : (managedSpawnAcceptanceFail
+        ? { ...completionOpts, failureClass: FAILURE_CLASS.INVALID_MANAGED_SPAWN, agentRetryable: false }
+        : (keepProcessesAcceptanceFail
+          ? { ...completionOpts, failureClass: _kpFailureClass, agentRetryable: false }
+          : (nonceFail
+              ? { ...completionOpts, failureClass: nonceMismatch.failureClass, agentRetryable: false }
+              : (hardContractFail
+                  ? { ...completionOpts, processWorkItemFailure: false }
+                  : (effectiveResult === DISPATCH_RESULT.ERROR ? {
+                      ...completionOpts,
+                      ...(failureClass ? { failureClass } : {}),
+                      ...(typeof retryableDecision === 'boolean' ? { agentRetryable: retryableDecision } : {}),
+                      ...(structuredCompletion?.failure_class ? { failureClass: structuredCompletion.failure_class } : {}),
+                    } : completionOpts)))));
     // Extract last 5 non-empty stderr lines as error context when exit code is non-zero
     let errorReason = '';
-    if (keepProcessesAcceptanceFail) {
+    if (managedSpawnHealthcheckFail) {
+      const failNames = managedSpawnHealthcheckFailure.failed.map(f => f.name).join(',');
+      errorReason = `managed_spawn_healthcheck_failed: ${failNames} (${managedSpawnHealthcheckFailure.failed.length}/${managedSpawnSpawned.length})`.slice(0, 300);
+    } else if (managedSpawnAcceptanceFail) {
+      errorReason = `invalid_managed_spawn: ${managedSpawnAcceptanceFailure.reason}`.slice(0, 300);
+    } else if (keepProcessesAcceptanceFail) {
       if (keepProcessesAcceptanceFailure.isWorkdirRejection) {
         errorReason = `invalid_keep_processes_workdir: ${keepProcessesAcceptanceFailure.reason} (cwd=${keepProcessesAcceptanceFailure.cwd || '<unknown>'})`.slice(0, 300);
       } else {
@@ -4015,6 +4239,14 @@ function renderProjectWorkItemPromptForAgent(item, workType, agentId, config, pr
     keep_processes_ttl_minutes: item.meta && Number.isFinite(Number(item.meta.keep_processes_ttl_minutes))
       ? Math.floor(Number(item.meta.keep_processes_ttl_minutes))
       : '',
+    // P-1f9c3a45 — opt-in managed_spawn hint plumbed via item.meta. Same
+    // default-off shape as keep_processes; truthy fires the agent-side
+    // sidecar instructions in renderPlaybook. Live-processes auto-inject is
+    // project-scoped and unconditional (not gated on this flag).
+    managed_spawn: !!(item.meta && item.meta.managed_spawn),
+    managed_spawn_ttl_minutes: item.meta && Number.isFinite(Number(item.meta.managed_spawn_ttl_minutes))
+      ? Math.floor(Number(item.meta.managed_spawn_ttl_minutes))
+      : '',
   };
   const cpResult = buildWorkItemDispatchVars(item, vars, config, {
     worktreePath: vars.worktree_path || root,
@@ -5425,6 +5657,24 @@ async function tickInner() {
     if (_isTickStale(myGeneration)) return;
   }
+  // 2.53. managed-spawn TTL/dead-PID sweep + log rotation (P-8a4d6f29). Walks
+  // engine/managed-processes.json, kills TTL-expired specs, drops dead-PID
+  // rows, rotates managed-logs/<name>.log past ENGINE_DEFAULTS.managedSpawn
+  // .logRotateBytes. Mirrors the keep-processes sweep cadence (sweepEvery=30)
+  // so the engine never iterates per-spec on every tick. Healthcheck loops
+  // remain per-spec / self-scheduled and are NOT driven from here.
+  const managedSweepEvery = Math.max(1, ENGINE_DEFAULTS.managedSpawn?.sweepEvery || 30);
+  if (ENGINE_DEFAULTS.managedSpawn?.enabled !== false && tickCount % managedSweepEvery === 0) {
+    safe('sweepManagedSpawn', () => {
+      const { sweepManagedSpawn } = require('./engine/managed-spawn');
+      const stats = sweepManagedSpawn();
+      if (stats.scanned > 0 && (stats.ttlExpired || stats.deadDropped || stats.rotatedLogs || stats.malformed)) {
+        log('info', `managed-spawn sweep: scanned=${stats.scanned} ttl=${stats.ttlExpired} dead=${stats.deadDropped} killed=${stats.killedPids} rotated=${stats.rotatedLogs} malformed=${stats.malformed}`);
+      }
+    });
+    if (_isTickStale(myGeneration)) return;
+  }
   // 2.55. Check persistent watches (3 tick-equivalents, default ~3 minutes)
   const watchPollIntervalMs = _pollIntervalMsFromTicks(3, tickIntervalMs);
   if (_shouldRunPeriodicPhase(now, lastWatchCheckAt, watchPollIntervalMs)) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yemi33/minions",
-  "version": "0.1.1965",
+  "version": "0.1.1967",
   "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
   "bin": {
     "minions": "bin/minions.js"
@@ -20,6 +20,7 @@
     "test:e2e:report": "npx playwright show-report test/playwright/report",
     "test:e2e:video": "npx playwright test --video=on --headed",
     "test:all": "node test/run-parallel.js && node test/minions-tests.js && node test/integration/run.js",
+    "test:perf": "node test/perf/managed-spawn-load.test.js",
     "test:e2e:accept": "node test/playwright/accept-baseline.js",
     "test:e2e:accept-force": "node test/playwright/accept-baseline.js --force",
     "test:setup": "npx playwright install chromium"