@yemi33/minions 0.1.1860 → 0.1.1862
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/engine/lifecycle.js +29 -3
- package/engine/spawn-agent.js +34 -4
- package/package.json +1 -1
- package/playbooks/shared-rules.md +31 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.1862 (2026-05-11)
|
|
4
|
+
|
|
5
|
+
### Fixes
|
|
6
|
+
- use 'exit' event for OS exit code + dedup sentinel; warn agents off .cmd shims
|
|
7
|
+
|
|
8
|
+
## 0.1.1861 (2026-05-10)
|
|
9
|
+
|
|
10
|
+
### Fixes
|
|
11
|
+
- surface phantom-completion as the failReason instead of "no PR detected"
|
|
12
|
+
|
|
3
13
|
## 0.1.1860 (2026-05-10)
|
|
4
14
|
|
|
5
15
|
### Fixes
|
package/engine/lifecycle.js
CHANGED
|
@@ -1098,6 +1098,21 @@ function _outputContainsPrUrl(output) {
|
|
|
1098
1098
|
return prUrlPattern.test(output);
|
|
1099
1099
|
}
|
|
1100
1100
|
|
|
1101
|
+
// Detects the phantom-completion signature in the raw runtime output: the
|
|
1102
|
+
// runtime's terminating `{"type":"result"}` event never landed. When this is
|
|
1103
|
+
// true and the PR-attachment contract is about to hard-fail for "no PR
|
|
1104
|
+
// detected," it's far more accurate to say "runtime crashed before emitting a
|
|
1105
|
+
// result event" — the agent didn't fail silently, the runtime CLI did. Used by
|
|
1106
|
+
// enforcePrAttachmentContract to pick a truthful failReason.
|
|
1107
|
+
function _outputHasRuntimeResultEvent(output) {
|
|
1108
|
+
if (!output || typeof output !== 'string') return false;
|
|
1109
|
+
// Both Claude and Copilot emit a top-level `{"type":"result"…}` JSONL line as
|
|
1110
|
+
// the conversation terminator. A literal substring match is enough; the
|
|
1111
|
+
// captured streams are JSONL so the brace pattern can't legitimately appear
|
|
1112
|
+
// mid-field-value without the prefix.
|
|
1113
|
+
return /"type":\s*"result"/.test(output);
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1101
1116
|
function markMissingPrAttachment(meta, agentId, reason, resultSummary, severity) {
|
|
1102
1117
|
const noPrWiPath = resolveWorkItemPath(meta);
|
|
1103
1118
|
const isHard = severity !== 'soft';
|
|
@@ -1239,9 +1254,20 @@ async function enforcePrAttachmentContract(type, meta, agentId, config, resultSu
|
|
|
1239
1254
|
// was designed to catch) from "agent claimed a PR but engine couldn't attach
|
|
1240
1255
|
// it canonically" (soft — verification gap, not a failure).
|
|
1241
1256
|
const severity = _outputContainsPrUrl(output) ? 'soft' : 'hard';
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1257
|
+
// Hard-fail messaging: if the runtime never emitted its terminating result
|
|
1258
|
+
// event, the failure is a phantom completion (runtime CLI crashed), not the
|
|
1259
|
+
// agent silently skipping work. Surface that truthfully so operators don't
|
|
1260
|
+
// chase "the agent didn't open a PR" when the real cause is "the runtime
|
|
1261
|
+
// process died mid-conversation." Soft cases keep the original wording —
|
|
1262
|
+
// they imply the runtime DID finish and a URL is in the stream.
|
|
1263
|
+
let reason;
|
|
1264
|
+
if (severity === 'hard') {
|
|
1265
|
+
reason = _outputHasRuntimeResultEvent(output)
|
|
1266
|
+
? `${meta.item.id} completed but no PR URL was detected in the agent's output. Expected a PR — verify the agent didn't fail silently. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`
|
|
1267
|
+
: `${meta.item.id} runtime exited without emitting a terminating result event — likely a phantom completion (the runtime CLI crashed mid-conversation, not a true agent failure). No PR was created. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
|
|
1268
|
+
} else {
|
|
1269
|
+
reason = `${meta.item.id} completed and a PR URL was found in the agent's output, but it couldn't be canonically attached. The work likely succeeded — verify by checking the PR list. (Branch: ${meta.branch || '(none)'}, agent: ${agentId})`;
|
|
1270
|
+
}
|
|
1245
1271
|
markMissingPrAttachment(meta, agentId, reason, resultSummary, severity);
|
|
1246
1272
|
log(severity === 'hard' ? 'warn' : 'info', reason);
|
|
1247
1273
|
return { reason, itemId: meta.item.id, severity };
|
package/engine/spawn-agent.js
CHANGED
|
@@ -409,11 +409,38 @@ function main() {
|
|
|
409
409
|
}, MCP_STARTUP_TIMEOUT);
|
|
410
410
|
proc.stdout.once('data', () => { gotFirstOutput = true; clearTimeout(startupTimer); });
|
|
411
411
|
|
|
412
|
+
// Track the real OS exit code via the 'exit' event. Node's 'close' event
|
|
413
|
+
// can report code=0 on Windows when the OS-level exit was non-zero
|
|
414
|
+
// (observed empirically with both Claude Code CLI and Copilot CLI exiting
|
|
415
|
+
// with OS exit code 1 silently during long PowerShell tool calls — procdump
|
|
416
|
+
// captured the 1, but the engine's onAgentClose saw the spawn-agent's parent
|
|
417
|
+
// pipe report code=0). The 'exit' event fires earlier and carries the OS code
|
|
418
|
+
// more reliably; 'close' waits for stdio teardown which can race.
|
|
419
|
+
let realExitFromEvent = null;
|
|
420
|
+
let realSignalFromEvent = null;
|
|
421
|
+
let sentinelWritten = false;
|
|
422
|
+
proc.on('exit', (code, signal) => {
|
|
423
|
+
if (Number.isInteger(code)) realExitFromEvent = code;
|
|
424
|
+
if (signal) realSignalFromEvent = signal;
|
|
425
|
+
});
|
|
412
426
|
proc.on('close', (code, signal) => {
|
|
413
427
|
clearTimeout(startupTimer);
|
|
414
|
-
|
|
415
|
-
const
|
|
416
|
-
|
|
428
|
+
// Prefer the 'exit' event's code/signal when present — see note above.
|
|
429
|
+
const effectiveCode = (realExitFromEvent != null) ? realExitFromEvent : code;
|
|
430
|
+
const effectiveSignal = realSignalFromEvent || signal;
|
|
431
|
+
const exitCode = normalizeRuntimeExit(effectiveCode, effectiveSignal);
|
|
432
|
+
if (sentinelWritten) {
|
|
433
|
+
// Defense-in-depth: never write a duplicate sentinel. We observed pairs
|
|
434
|
+
// of [process-exit] code=0 lines in live-output.log across many failed
|
|
435
|
+
// runs, which suggests close has fired twice in some edge cases (e.g.,
|
|
436
|
+
// shim re-launch on Windows). One sentinel per spawn is the contract.
|
|
437
|
+
fs.appendFileSync(debugPath, `EXIT (duplicate close, skipping sentinel): code=${exitCode}${effectiveSignal ? ` signal=${effectiveSignal}` : ''}\n`);
|
|
438
|
+
process.exit(exitCode);
|
|
439
|
+
return;
|
|
440
|
+
}
|
|
441
|
+
sentinelWritten = true;
|
|
442
|
+
const sentinelResult = writeProcessExitSentinel({ exitCode, signal: effectiveSignal });
|
|
443
|
+
fs.appendFileSync(debugPath, `EXIT: code=${exitCode}${effectiveSignal ? ` signal=${effectiveSignal}` : ''} (close=${code} exit=${realExitFromEvent})\nSTDERR: ${stderrBuf.slice(0, 500)}\n`);
|
|
417
444
|
if (!sentinelResult.fileWritten) {
|
|
418
445
|
fs.appendFileSync(debugPath, `EXIT SENTINEL: file write failed for ${process.env.MINIONS_LIVE_OUTPUT_PATH}\n`);
|
|
419
446
|
}
|
|
@@ -421,7 +448,10 @@ function main() {
|
|
|
421
448
|
});
|
|
422
449
|
proc.on('error', (err) => {
|
|
423
450
|
fs.appendFileSync(debugPath, `ERROR: ${err.message}\n`);
|
|
424
|
-
|
|
451
|
+
if (!sentinelWritten) {
|
|
452
|
+
sentinelWritten = true;
|
|
453
|
+
writeProcessExitSentinel({ exitCode: 1 });
|
|
454
|
+
}
|
|
425
455
|
process.exit(1);
|
|
426
456
|
});
|
|
427
457
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1862",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|
|
@@ -74,6 +74,37 @@ Use `status: "failed"` plus an accurate `failure_class`, `retryable`, and `needs
|
|
|
74
74
|
|
|
75
75
|
**No-op completions:** when you correctly decline to do the work — the change was already shipped on master, the dispatch premise is wrong, the flagged review comment is your own author-notes, etc. — write `status: "success"`, `pr: "N/A"`, AND add `"noop": true`. The engine treats `noop: true` as the canonical signal that no PR was expected, marks the work item done with the rationale surfaced in `_noopReason` for the dashboard, and skips the missing-PR-attachment failure. Without `noop: true`, an empty PR will still be flagged as a silent failure and auto-retried up to `maxRetries` times.
|
|
76
76
|
|
|
77
|
+
## Test Invocation — direct binary, not `.cmd` shims
|
|
78
|
+
|
|
79
|
+
When running tests via the shell/PowerShell tool, invoke the underlying
|
|
80
|
+
runner binary **directly** instead of going through a package-manager
|
|
81
|
+
wrapper. The runtime CLIs (Copilot CLI, Claude Code CLI) deterministically
|
|
82
|
+
crash on Windows when invoking `.cmd` shims (`npm`, `yarn`, `pnpm`, `npx`,
|
|
83
|
+
`mocha`, `jest`, `vitest`) for substantial test commands inside a real
|
|
84
|
+
agent-prompt session (confirmed 2026-05-11 by controlled reproduction).
|
|
85
|
+
The crash signature: silent exit with OS code 1, no result event, no error
|
|
86
|
+
message — the engine surfaces it as "phantom completion."
|
|
87
|
+
|
|
88
|
+
For test verification, prefer:
|
|
89
|
+
|
|
90
|
+
- ❌ `npm test`, `yarn test`, `pnpm test`, `npx vitest`, `npx jest`, `npx mocha`
|
|
91
|
+
- ✅ `node test/<file>.test.js` (Node projects with custom runners)
|
|
92
|
+
- ✅ `node node_modules/<runner>/bin/<runner>.js <args>` (resolve the runner manually)
|
|
93
|
+
- ✅ `python -m pytest tests/test_foo.py` (Python — `.exe`, not `.cmd`)
|
|
94
|
+
- ✅ `cargo test --test foo` (Rust)
|
|
95
|
+
- ✅ `dotnet test path/to/Project.Tests.csproj` (.NET)
|
|
96
|
+
- ✅ `go test ./...` (Go)
|
|
97
|
+
|
|
98
|
+
To map a package-manager wrapper to its direct invocation, check the
|
|
99
|
+
project's `package.json` `scripts` section (Node), `pyproject.toml` (Python),
|
|
100
|
+
`Cargo.toml` (Rust), or the project's CLAUDE.md / README. Direct invocations
|
|
101
|
+
are also significantly faster (no shim overhead).
|
|
102
|
+
|
|
103
|
+
If the project's test setup *requires* the wrapper (e.g., it sets env vars
|
|
104
|
+
or installs dependencies in a hook), report this in your completion block
|
|
105
|
+
and use targeted node invocations for verification anyway — the wrapper
|
|
106
|
+
crashes the agent before the test results land.
|
|
107
|
+
|
|
77
108
|
## Long-Running Commands
|
|
78
109
|
|
|
79
110
|
Builds, dependency installs, tests, and local servers can be quiet for long periods. Run the repo's normal CLI commands and let them finish; do not add artificial progress output, heartbeat loops, or command-specific workarounds just to keep Minions active.
|