npm - @yemi33/minions - Versions diffs - 0.1.1860 → 0.1.1862 - Mend

@yemi33/minions 0.1.1860 → 0.1.1862

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md +10 -0
package/engine/lifecycle.js +29 -3
package/engine/spawn-agent.js +34 -4
package/package.json +1 -1
package/playbooks/shared-rules.md +31 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,15 @@
 # Changelog
+## 0.1.1862 (2026-05-11)
+### Fixes
+- use 'exit' event for OS exit code + dedup sentinel; warn agents off .cmd shims
+## 0.1.1861 (2026-05-10)
+### Fixes
+- surface phantom-completion as the failReason instead of "no PR detected"
 ## 0.1.1860 (2026-05-10)
 ### Fixes

package/engine/lifecycle.js CHANGED Viewed

@@ -1098,6 +1098,21 @@ function _outputContainsPrUrl(output) {
   return prUrlPattern.test(output);
 }
+// Detects the phantom-completion signature in the raw runtime output: the
+// runtime's terminating `{"type":"result"}` event never landed. When this is
+// true and the PR-attachment contract is about to hard-fail for "no PR
+// detected," it's far more accurate to say "runtime crashed before emitting a
+// result event" — the agent didn't fail silently, the runtime CLI did. Used by
+// enforcePrAttachmentContract to pick a truthful failReason.
+function _outputHasRuntimeResultEvent(output) {
+  if (!output || typeof output !== 'string') return false;
+  // Both Claude and Copilot emit a top-level `{"type":"result"…}` JSONL line as
+  // the conversation terminator. A literal substring match is enough; the
+  // captured streams are JSONL so the brace pattern can't legitimately appear
+  // mid-field-value without the prefix.
+  return /"type":\s*"result"/.test(output);
+}
 function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity) {
   const noPrWiPath = resolveWorkItemPath(meta);
   const isHard = severity !== 'soft';
@@ -1239,9 +1254,20 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
   // was designed to catch) from "agent claimed a PR but engine couldn't attach
   // it canonically" (soft — verification gap, not a failure).
   const severity = _outputContainsPrUrl(output) ? 'soft' : 'hard';
-  const reason = severity === 'hard'
-    ? `${meta.item.id} completed but no PR URL was detected in the agent's output. Expected a PR — verify the agent didn't fail silently. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`
-    : `${meta.item.id} completed and a PR URL was found in the agent's output, but it couldn't be canonically attached. The work likely succeeded — verify by checking the PR list. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
+  // Hard-fail messaging: if the runtime never emitted its terminating result
+  // event, the failure is a phantom completion (runtime CLI crashed), not the
+  // agent silently skipping work. Surface that truthfully so operators don't
+  // chase "the agent didn't open a PR" when the real cause is "the runtime
+  // process died mid-conversation." Soft cases keep the original wording —
+  // they imply the runtime DID finish and a URL is in the stream.
+  let reason;
+  if (severity === 'hard') {
+    reason = _outputHasRuntimeResultEvent(output)
+      ? `${meta.item.id} completed but no PR URL was detected in the agent's output. Expected a PR — verify the agent didn't fail silently. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`
+      : `${meta.item.id} runtime exited without emitting a terminating result event — likely a phantom completion (the runtime CLI crashed mid-conversation, not a true agent failure). No PR was created. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
+  } else {
+    reason = `${meta.item.id} completed and a PR URL was found in the agent's output, but it couldn't be canonically attached. The work likely succeeded — verify by checking the PR list. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
+  }
   markMissingPrAttachment(meta, agentId, reason, resultSummary, severity);
   log(severity === 'hard' ? 'warn' : 'info', reason);
   return { reason, itemId: meta.item.id, severity };

package/engine/spawn-agent.js CHANGED Viewed

@@ -409,11 +409,38 @@ function main() {
   }, MCP_STARTUP_TIMEOUT);
   proc.stdout.once('data', () => { gotFirstOutput = true; clearTimeout(startupTimer); });
+  // Track the real OS exit code via the 'exit' event. Node's 'close' event
+  // can report code=0 on Windows when the OS-level exit was non-zero
+  // (observed empirically with both Claude Code CLI and Copilot CLI exiting
+  // with OS exit code 1 silently during long PowerShell tool calls — procdump
+  // captured the 1, but the engine's onAgentClose saw the spawn-agent's parent
+  // pipe report code=0). The 'exit' event fires earlier and carries the OS code
+  // more reliably; 'close' waits for stdio teardown which can race.
+  let realExitFromEvent = null;
+  let realSignalFromEvent = null;
+  let sentinelWritten = false;
+  proc.on('exit', (code, signal) => {
+    if (Number.isInteger(code)) realExitFromEvent = code;
+    if (signal) realSignalFromEvent = signal;
+  });
   proc.on('close', (code, signal) => {
     clearTimeout(startupTimer);
-    const exitCode = normalizeRuntimeExit(code, signal);
-    const sentinelResult = writeProcessExitSentinel({ exitCode, signal });
-    fs.appendFileSync(debugPath, `EXIT: code=${exitCode}${signal ? ` signal=${signal}` : ''}\nSTDERR: ${stderrBuf.slice(0, 500)}\n`);
+    // Prefer the 'exit' event's code/signal when present — see note above.
+    const effectiveCode = (realExitFromEvent != null) ? realExitFromEvent : code;
+    const effectiveSignal = realSignalFromEvent || signal;
+    const exitCode = normalizeRuntimeExit(effectiveCode, effectiveSignal);
+    if (sentinelWritten) {
+      // Defense-in-depth: never write a duplicate sentinel. We observed pairs
+      // of [process-exit] code=0 lines in live-output.log across many failed
+      // runs, which suggests close has fired twice in some edge cases (e.g.,
+      // shim re-launch on Windows). One sentinel per spawn is the contract.
+      fs.appendFileSync(debugPath, `EXIT (duplicate close, skipping sentinel): code=${exitCode}${effectiveSignal ? ` signal=${effectiveSignal}` : ''}\n`);
+      process.exit(exitCode);
+      return;
+    }
+    sentinelWritten = true;
+    const sentinelResult = writeProcessExitSentinel({ exitCode, signal: effectiveSignal });
+    fs.appendFileSync(debugPath, `EXIT: code=${exitCode}${effectiveSignal ? ` signal=${effectiveSignal}` : ''} (close=${code} exit=${realExitFromEvent})\nSTDERR: ${stderrBuf.slice(0, 500)}\n`);
     if (!sentinelResult.fileWritten) {
       fs.appendFileSync(debugPath, `EXIT SENTINEL: file write failed for ${process.env.MINIONS_LIVE_OUTPUT_PATH}\n`);
     }
@@ -421,7 +448,10 @@ function main() {
   });
   proc.on('error', (err) => {
     fs.appendFileSync(debugPath, `ERROR: ${err.message}\n`);
-    writeProcessExitSentinel({ exitCode: 1 });
+    if (!sentinelWritten) {
+      sentinelWritten = true;
+      writeProcessExitSentinel({ exitCode: 1 });
+    }
     process.exit(1);
   });
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yemi33/minions",
-  "version": "0.1.1860",
+  "version": "0.1.1862",
   "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
   "bin": {
     "minions": "bin/minions.js"

package/playbooks/shared-rules.md CHANGED Viewed

@@ -74,6 +74,37 @@ Use `status: "failed"` plus an accurate `failure_class`, `retryable`, and `needs
 **No-op completions:** when you correctly decline to do the work — the change was already shipped on master, the dispatch premise is wrong, the flagged review comment is your own author-notes, etc. — write `status: "success"`, `pr: "N/A"`, AND add `"noop": true`. The engine treats `noop: true` as the canonical signal that no PR was expected, marks the work item done with the rationale surfaced in `_noopReason` for the dashboard, and skips the missing-PR-attachment failure. Without `noop: true`, an empty PR will still be flagged as a silent failure and auto-retried up to `maxRetries` times.
+## Test Invocation — direct binary, not `.cmd` shims
+When running tests via the shell/PowerShell tool, invoke the underlying
+runner binary **directly** instead of going through a package-manager
+wrapper. The runtime CLIs (Copilot CLI, Claude Code CLI) deterministically
+crash on Windows when invoking `.cmd` shims (`npm`, `yarn`, `pnpm`, `npx`,
+`mocha`, `jest`, `vitest`) for substantial test commands inside a real
+agent-prompt session (confirmed 2026-05-11 by controlled reproduction).
+The crash signature: silent exit with OS code 1, no result event, no error
+message — the engine surfaces it as "phantom completion."
+For test verification, prefer:
+- ❌ `npm test`, `yarn test`, `pnpm test`, `npx vitest`, `npx jest`, `npx mocha`
+- ✅ `node test/<file>.test.js` (Node projects with custom runners)
+- ✅ `node node_modules/<runner>/bin/<runner>.js <args>` (resolve the runner manually)
+- ✅ `python -m pytest tests/test_foo.py` (Python — `.exe`, not `.cmd`)
+- ✅ `cargo test --test foo` (Rust)
+- ✅ `dotnet test path/to/Project.Tests.csproj` (.NET)
+- ✅ `go test ./...` (Go)
+To map a package-manager wrapper to its direct invocation, check the
+project's `package.json` `scripts` section (Node), `pyproject.toml` (Python),
+`Cargo.toml` (Rust), or the project's CLAUDE.md / README. Direct invocations
+are also significantly faster (no shim overhead).
+If the project's test setup *requires* the wrapper (e.g., it sets env vars
+or installs dependencies in a hook), report this in your completion block
+and use targeted node invocations for verification anyway — the wrapper
+crashes the agent before the test results land.
 ## Long-Running Commands
 Builds, dependency installs, tests, and local servers can be quiet for long periods. Run the repo's normal CLI commands and let them finish; do not add artificial progress output, heartbeat loops, or command-specific workarounds just to keep Minions active.