npm - @yemi33/minions - Versions diffs - 0.1.1632 → 0.1.1634 - Mend

@yemi33/minions 0.1.1632 → 0.1.1634

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/CHANGELOG.md +17 -2
package/dashboard.js +46 -0
package/engine/copilot-models.json +1 -1
package/engine/dispatch.js +102 -73
package/engine/lifecycle.js +12 -0
package/engine/playbook.js +2 -1
package/engine.js +2 -0
package/package.json +1 -1
package/playbooks/docs.md +113 -0
package/playbooks/fix.md +17 -19
package/playbooks/implement-shared.md +17 -16
package/playbooks/implement.md +19 -14
package/playbooks/shared-rules.md +11 -0
package/playbooks/work-item.md +26 -23
package/prompts/cc-system.md +2 -0
package/routing.md +1 -0

package/CHANGELOG.md CHANGED Viewed

@@ -1,9 +1,24 @@
 # Changelog
-## 0.1.1632 (2026-04-30)
+## 0.1.1634 (2026-04-30)
+### Features
+- build-and-test CC action + docs playbook
+## 0.1.1633 (2026-04-30)
+### Features
+- prevent completed stderr redispatch
+### Fixes
+- yemi33/minions#1890
+### Other
+- docs: relax minions playbook contracts
+## 0.1.1631 (2026-04-30)
 ### Features
-- guard malformed PR context
 - clear stale pending reason on retry
 ## 0.1.1629 (2026-04-29)

package/dashboard.js CHANGED Viewed

@@ -25,6 +25,9 @@ const ado = require('./engine/ado');
 const gh = require('./engine/github');
 const issues = require('./engine/issues');
 const watchesMod = require('./engine/watches');
+const routing = require('./engine/routing');
+const playbook = require('./engine/playbook');
+const dispatchMod = require('./engine/dispatch');
 const os = require('os');
 const { safeRead, safeReadDir, safeWrite, safeJson, safeJsonObj, safeJsonArr, safeUnlink, mutateJsonFileLocked, mutateWorkItems, getProjects: _getProjects, DONE_STATUSES, WI_STATUS, reopenWorkItem } = shared;
@@ -1238,6 +1241,49 @@ async function executeCCActions(actions) {
           results.push({ type: action.type, id, ok: true });
           break;
         }
+        case 'build-and-test': {
+          // Resolve PR by number, ID, or URL — same lookup that drives the link-pr / PR-row paths.
+          const allPrs = getPullRequests().filter(p => !p._ghost);
+          const pr = shared.findPrRecord(allPrs, action.pr) || null;
+          if (!pr) {
+            results.push({ type: 'build-and-test', error: `PR not found: ${action.pr}` });
+            break;
+          }
+          // Resolve project: explicit param wins, else PR's _project, else first configured project as last resort.
+          const projectName = action.project || pr._project || null;
+          const project = projectName
+            ? PROJECTS.find(p => p.name?.toLowerCase() === String(projectName).toLowerCase())
+            : null;
+          if (!project) {
+            results.push({ type: 'build-and-test', error: `Project not found for PR ${pr.id}: ${projectName || '(none)'}` });
+            break;
+          }
+          // Pick agent: explicit param wins; else routing for 'test' work type.
+          let agentId = action.agent && CONFIG.agents?.[action.agent] ? action.agent : null;
+          if (!agentId) {
+            agentId = routing.resolveAgent('test', CONFIG, { authorAgent: pr.agent });
+          }
+          if (!agentId) {
+            results.push({ type: 'build-and-test', error: 'No available agent for test routing' });
+            break;
+          }
+          const prNumber = shared.getPrNumber(pr);
+          const dispatchKey = `cc-bt-${project.name}-${pr.id}`;
+          const item = playbook.buildPrDispatch(agentId, CONFIG, project, pr, 'test', {
+            pr_id: pr.id, pr_number: prNumber, pr_title: pr.title || '', pr_branch: pr.branch || '',
+            pr_author: pr.agent || '', pr_url: pr.url || '',
+            project_path: project.localPath || '',
+            task: `Build & test ${pr.id}: ${pr.title || ''}`,
+          }, `Build & test ${pr.id}: ${pr.title || ''}`,
+          { dispatchKey, source: 'cc-build-and-test', pr, branch: pr.branch, project: { name: project.name, localPath: project.localPath } });
+          if (!item) {
+            results.push({ type: 'build-and-test', error: 'Failed to render build-and-test playbook' });
+            break;
+          }
+          const id = dispatchMod.addToDispatch(item);
+          results.push({ type: 'build-and-test', id, agent: agentId, pr: pr.id, ok: true });
+          break;
+        }
         case 'note': {
           shared.writeToInbox('command-center', shared.slugify(action.title || 'note'), `# ${action.title || 'Note'}\n\n${action.content || action.description || ''}`);
           results.push({ type: 'note', ok: true });

package/engine/copilot-models.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
   "runtime": "copilot",
   "models": null,
-  "cachedAt": "2026-04-30T08:44:52.884Z"
+  "cachedAt": "2026-04-30T10:08:32.077Z"
 }

package/engine/dispatch.js CHANGED Viewed

@@ -184,6 +184,22 @@ function isRetryableFailureReason(reason = '', failureClass = '') {
   return !nonRetryable.some(s => r.includes(s));
 }
+function isCompletedWorkItemForFailure(item) {
+  return !!item && (
+    item.status === WI_STATUS.DONE ||
+    (!!item.completedAt && (!!item._pr || !!item._prUrl))
+  );
+}
+function readLiveWorkItem(meta) {
+  const itemId = meta?.item?.id;
+  if (!itemId) return null;
+  const wiPath = lifecycle().resolveWorkItemPath(meta);
+  if (!wiPath) return null;
+  const items = safeJson(wiPath) || [];
+  return Array.isArray(items) ? items.find(i => i.id === itemId) || null : null;
+}
 // ─── Complete Dispatch ───────────────────────────────────────────────────────
 function completeDispatch(id, result = DISPATCH_RESULT.SUCCESS, reason = '', resultSummary = '', opts = {}) {
@@ -225,83 +241,96 @@ function completeDispatch(id, result = DISPATCH_RESULT.SUCCESS, reason = '', res
     // Update source work item status on failure + auto-retry with backoff
     const retryableFailure = isRetryableFailureReason(reason, failureClass);
-    if (result === DISPATCH_RESULT.ERROR && item.meta?.dispatchKey && retryableFailure) setCooldownFailure(item.meta.dispatchKey);
+    let completedWorkItemFailure = false;
     if (processWorkItemFailure && result === DISPATCH_RESULT.ERROR && item.meta?.item?.id) {
-      let retries = (item.meta.item._retryCount || 0);
+      // If the live item cannot be resolved, keep the existing retry path.
       try {
-        const wi = queries.getWorkItems().find(i => i.id === item.meta.item.id);
-        if (wi) retries = wi._retryCount || 0;
-      } catch (e) { log('warn', 'read retry count: ' + e.message); }
-      const maxRetries = ENGINE_DEFAULTS.maxRetries;
-      // Use per-class retry limits from recovery.js when failureClass is available
-      const classAllowsRetry = failureClass ? recovery().shouldRetry(failureClass, retries) : (retries < maxRetries);
-      if (retryableFailure && classAllowsRetry) {
-        log('info', `Dispatch error for ${item.meta.item.id} — auto-retry ${retries + 1}/${maxRetries}${failureClass ? ' [' + failureClass + ']' : ''}`);
-        lifecycle().updateWorkItemStatus(item.meta, WI_STATUS.PENDING, '');
-        // Remove this dispatch key from completed so dedupe doesn't block immediate redispatch.
-        if (item.meta?.dispatchKey) {
-          try {
-            mutateDispatch((dp) => {
-              dp.completed = Array.isArray(dp.completed) ? dp.completed.filter(d => d.meta?.dispatchKey !== item.meta.dispatchKey) : [];
-              return dp;
-            });
-          } catch (e) { log('warn', 'clear dispatch for retry: ' + e.message); }
-        }
-        // Increment retry counter on the source work item
-        try {
-          const wiPath = lifecycle().resolveWorkItemPath(item.meta);
-          if (wiPath) {
-            mutateWorkItems(wiPath, items => {
-              const wi = items.find(i => i.id === item.meta.item.id);
-              if (wi && wi.status !== WI_STATUS.PAUSED && wi.status !== WI_STATUS.DONE && !wi.completedAt) {
-                wi._retryCount = retries + 1;
-                wi.status = WI_STATUS.PENDING;
-                wi._lastRetryReason = reason || '';
-                wi._lastRetryAt = ts();
-                delete wi.failReason;
-                delete wi.failedAt;
-                delete wi.dispatched_at;
-                delete wi.dispatched_to;
-                delete wi._pendingReason;
-              }
-            });
-          }
-        } catch (e) { log('warn', 'increment retry counter: ' + e.message); }
+        completedWorkItemFailure = isCompletedWorkItemForFailure(readLiveWorkItem(item.meta));
+      } catch (e) { log('warn', 'read live work item before retry: ' + e.message); }
+    }
+    if (result === DISPATCH_RESULT.ERROR && item.meta?.dispatchKey && retryableFailure && !completedWorkItemFailure) {
+      setCooldownFailure(item.meta.dispatchKey);
+    }
+    if (processWorkItemFailure && result === DISPATCH_RESULT.ERROR && item.meta?.item?.id) {
+      if (completedWorkItemFailure) {
+        log('info', `Dispatch error for ${item.meta.item.id} ignored — work item is already completed`);
       } else {
-        // Human-readable labels for each failure class — used as fallback when reason is empty
-        const CLASS_LABELS = {
-          [FAILURE_CLASS.EMPTY_OUTPUT]: 'agent produced no output \u2014 likely crashed on startup',
-          [FAILURE_CLASS.BUILD_FAILURE]: 'build/test/lint failure in output',
-          [FAILURE_CLASS.MERGE_CONFLICT]: 'merge conflict',
-          [FAILURE_CLASS.MAX_TURNS]: 'reached max turn limit',
-          [FAILURE_CLASS.TIMEOUT]: 'timed out waiting for agent',
-          [FAILURE_CLASS.SPAWN_ERROR]: 'agent process failed to start',
-          [FAILURE_CLASS.NETWORK_ERROR]: 'network or API error',
-          [FAILURE_CLASS.OUT_OF_CONTEXT]: 'context window exhausted',
-          [FAILURE_CLASS.CONFIG_ERROR]: 'configuration error',
-          [FAILURE_CLASS.PERMISSION_BLOCKED]: 'permission or auth failure',
-          [FAILURE_CLASS.UNKNOWN]: 'unknown error',
-        };
-        const classLabel = failureClass ? (CLASS_LABELS[failureClass] || failureClass) : '';
-        const effectiveReason = reason || classLabel || 'Unknown error';
-        const classSuffix = failureClass ? ` [${failureClass.toUpperCase().replace(/-/g, '_')}]` : '';
-        const finalReason = !retryableFailure
-          ? `Non-retryable failure: ${effectiveReason}${classSuffix}`
-          : (reason || `Failed after ${maxRetries} retries${classSuffix}`);
-        lifecycle().updateWorkItemStatus(item.meta, WI_STATUS.FAILED, finalReason);
-        // Surface blocked dependents in logs without creating failure inbox noise.
+        let retries = (item.meta.item._retryCount || 0);
         try {
-          const config = getConfig();
-          const failedId = item.meta.item.id;
-          const blockedItems = [];
-          const allItems = queries.getWorkItems(config);
-          allItems.filter(w => w.status === WI_STATUS.PENDING && (w.depends_on || []).includes(failedId))
-            .forEach(w => blockedItems.push(`- \`${w.id}\` — ${w.title}`));
-          log('warn', `Work item ${failedId} failed: ${finalReason}` +
-            (blockedItems.length > 0 ? `; blocked dependents: ${blockedItems.map(line => line.replace(/^- `([^`]+)`.*/, '$1')).join(', ')}` : '; no downstream items blocked'));
-        } catch (e) { log('warn', 'summarize failure dependents: ' + e.message); }
+          const wi = queries.getWorkItems().find(i => i.id === item.meta.item.id);
+          if (wi) retries = wi._retryCount || 0;
+        } catch (e) { log('warn', 'read retry count: ' + e.message); }
+        const maxRetries = ENGINE_DEFAULTS.maxRetries;
+        // Use per-class retry limits from recovery.js when failureClass is available
+        const classAllowsRetry = failureClass ? recovery().shouldRetry(failureClass, retries) : (retries < maxRetries);
+        if (retryableFailure && classAllowsRetry) {
+          log('info', `Dispatch error for ${item.meta.item.id} — auto-retry ${retries + 1}/${maxRetries}${failureClass ? ' [' + failureClass + ']' : ''}`);
+          lifecycle().updateWorkItemStatus(item.meta, WI_STATUS.PENDING, '');
+          // Remove this dispatch key from completed so dedupe doesn't block immediate redispatch.
+          if (item.meta?.dispatchKey) {
+            try {
+              mutateDispatch((dp) => {
+                dp.completed = Array.isArray(dp.completed) ? dp.completed.filter(d => d.meta?.dispatchKey !== item.meta.dispatchKey) : [];
+                return dp;
+              });
+            } catch (e) { log('warn', 'clear dispatch for retry: ' + e.message); }
+          }
+          // Increment retry counter on the source work item
+          try {
+            const wiPath = lifecycle().resolveWorkItemPath(item.meta);
+            if (wiPath) {
+              mutateWorkItems(wiPath, items => {
+                const wi = items.find(i => i.id === item.meta.item.id);
+                if (wi && wi.status !== WI_STATUS.PAUSED && wi.status !== WI_STATUS.DONE && !wi.completedAt) {
+                  wi._retryCount = retries + 1;
+                  wi.status = WI_STATUS.PENDING;
+                  wi._lastRetryReason = reason || '';
+                  wi._lastRetryAt = ts();
+                  delete wi.failReason;
+                  delete wi.failedAt;
+                  delete wi.dispatched_at;
+                  delete wi.dispatched_to;
+                  delete wi._pendingReason;
+                }
+              });
+            }
+          } catch (e) { log('warn', 'increment retry counter: ' + e.message); }
+        } else {
+          // Human-readable labels for each failure class — used as fallback when reason is empty
+          const CLASS_LABELS = {
+            [FAILURE_CLASS.EMPTY_OUTPUT]: 'agent produced no output \u2014 likely crashed on startup',
+            [FAILURE_CLASS.BUILD_FAILURE]: 'build/test/lint failure in output',
+            [FAILURE_CLASS.MERGE_CONFLICT]: 'merge conflict',
+            [FAILURE_CLASS.MAX_TURNS]: 'reached max turn limit',
+            [FAILURE_CLASS.TIMEOUT]: 'timed out waiting for agent',
+            [FAILURE_CLASS.SPAWN_ERROR]: 'agent process failed to start',
+            [FAILURE_CLASS.NETWORK_ERROR]: 'network or API error',
+            [FAILURE_CLASS.OUT_OF_CONTEXT]: 'context window exhausted',
+            [FAILURE_CLASS.CONFIG_ERROR]: 'configuration error',
+            [FAILURE_CLASS.PERMISSION_BLOCKED]: 'permission or auth failure',
+            [FAILURE_CLASS.UNKNOWN]: 'unknown error',
+          };
+          const classLabel = failureClass ? (CLASS_LABELS[failureClass] || failureClass) : '';
+          const effectiveReason = reason || classLabel || 'Unknown error';
+          const classSuffix = failureClass ? ` [${failureClass.toUpperCase().replace(/-/g, '_')}]` : '';
+          const finalReason = !retryableFailure
+            ? `Non-retryable failure: ${effectiveReason}${classSuffix}`
+            : (reason || `Failed after ${maxRetries} retries${classSuffix}`);
+          lifecycle().updateWorkItemStatus(item.meta, WI_STATUS.FAILED, finalReason);
+          // Surface blocked dependents in logs without creating failure inbox noise.
+          try {
+            const config = getConfig();
+            const failedId = item.meta.item.id;
+            const blockedItems = [];
+            const allItems = queries.getWorkItems(config);
+            allItems.filter(w => w.status === WI_STATUS.PENDING && (w.depends_on || []).includes(failedId))
+              .forEach(w => blockedItems.push(`- \`${w.id}\` — ${w.title}`));
+            log('warn', `Work item ${failedId} failed: ${finalReason}` +
+              (blockedItems.length > 0 ? `; blocked dependents: ${blockedItems.map(line => line.replace(/^- `([^`]+)`.*/, '$1')).join(', ')}` : '; no downstream items blocked'));
+          } catch (e) { log('warn', 'summarize failure dependents: ' + e.message); }
+        }
       }
     }

package/engine/lifecycle.js CHANGED Viewed

@@ -560,10 +560,18 @@ function updateWorkItemStatus(meta, status, reason) {
   const wiPath = resolveWorkItemPath(meta);
   if (!wiPath) return;
+  let completionGuarded = false;
   mutateJsonFileLocked(wiPath, (items) => {
     if (!items || !Array.isArray(items)) return items;
     const target = items.find(i => i.id === itemId);
     if (!target) return items;
+    if (status !== WI_STATUS.DONE && (
+      target.status === WI_STATUS.DONE ||
+      (!!target.completedAt && (!!target._pr || !!target._prUrl))
+    )) {
+      completionGuarded = true;
+      return items;
+    }
     if (meta.source === 'central-work-item-fanout') {
       if (!target.agentResults) target.agentResults = {};
@@ -609,6 +617,10 @@ function updateWorkItemStatus(meta, status, reason) {
     return items;
   }, { defaultValue: [], skipWriteIfUnchanged: true });
+  if (completionGuarded) {
+    log('info', `Work item ${itemId} already completed — ignoring ${status} status update`);
+    return;
+  }
   log('info', `Work item ${itemId} → ${status}${reason ? ': ' + reason : ''}`);
   syncPrdItemStatus(itemId, status, meta.item?.sourcePlan);
 }

package/engine/playbook.js CHANGED Viewed

@@ -278,6 +278,7 @@ const PLAYBOOK_REQUIRED_VARS = {
   'decompose':            ['item_id', 'item_description', 'project_path'],
   'verify':               ['task_description'],
   'test':                 ['item_name'],
+  'docs':                 ['item_id', 'item_name'],
   'work-item':            ['item_id', 'item_name'],
   'meeting-investigate':  ['meeting_title', 'agenda'],
   'meeting-debate':       ['meeting_title', 'agenda'],
@@ -630,7 +631,7 @@ function selectPlaybook(workType, item) {
   if (workType === WORK_TYPE.REVIEW && !item?._pr && !item?.pr_id) {
     return 'work-item';
   }
-  const typeSpecificPlaybooks = ['explore', 'review', 'test', 'plan-to-prd', 'plan', 'ask', 'verify', 'decompose', 'meeting-investigate', 'meeting-debate', 'meeting-conclude'];
+  const typeSpecificPlaybooks = ['explore', 'review', 'test', 'plan-to-prd', 'plan', 'ask', 'verify', 'decompose', 'docs', 'meeting-investigate', 'meeting-debate', 'meeting-conclude'];
   return typeSpecificPlaybooks.includes(workType) ? workType : 'work-item';
 }

package/engine.js CHANGED Viewed

@@ -2411,6 +2411,7 @@ function discoverFromWorkItems(config, project) {
   for (const item of items) {
     try {
+    if (isItemCompleted(item)) continue;
     // Re-evaluate failed items: if deps have recovered, reset to pending
     if (item.status === WI_STATUS.FAILED && !isItemCompleted(item) && item.failReason === 'Dependency failed — cannot proceed') {
       const depStatus = areDependenciesMet(item, config);
@@ -2908,6 +2909,7 @@ function discoverCentralWorkItems(config) {
   for (const item of items) {
     try {
+    if (isItemCompleted(item)) continue;
     if (item.status !== WI_STATUS.QUEUED && item.status !== WI_STATUS.PENDING) continue;
     const key = `central-work-${item.id}`;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yemi33/minions",
-  "version": "0.1.1632",
+  "version": "0.1.1634",
   "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
   "bin": {
     "minions": "bin/minions.js"

package/playbooks/docs.md ADDED Viewed

@@ -0,0 +1,113 @@
+# Docs Playbook
+> Agent: {{agent_name}} ({{agent_role}}) | Task: {{item_name}} | ID: {{item_id}}
+## Context
+Repo: {{repo_name}} | Org: {{ado_org}} | Project: {{ado_project}}
+Team root: {{team_root}}
+Project path: {{project_path}}
+## Mission
+Update, expand, or rewrite project documentation. Targets include READMEs, CLAUDE.md,
+files under `docs/`, JSDoc/TSDoc on exported APIs, and inline comments where they add
+real WHY value (not WHAT — the code already says what). Keep voice consistent with the
+project's existing docs.
+## Task
+**{{item_name}}**
+{{item_description}}
+{{additional_context}}
+{{references}}
+{{acceptance_criteria}}
+## Steps
+### 1. Read the doc(s) and the code they describe
+- Open the doc(s) being changed end-to-end before writing.
+- Read the source they describe — function signatures, exported symbols, config keys,
+  CLI flags, file paths. Don't trust the existing doc; trust the code.
+- For project-level docs (README, CLAUDE.md, /docs/*.md), skim adjacent docs so your
+  voice and structure match.
+### 2. Confirm doc reflects current code
+For every claim in the doc you're touching, verify it against current code:
+- Does the function still exist with that signature?
+- Are the file paths correct?
+- Are the listed flags / config keys still accepted?
+- Are removed features still being documented?
+- Are new features (visible in code) missing from the doc?
+If the doc describes vapor, delete the section. If real features are missing, add them.
+### 3. Write or update concisely
+- Match the project's existing voice — read 2-3 nearby docs to calibrate tone.
+- Prefer concrete examples over abstract description.
+- For code comments: follow the project's "Default to writing no comments" rule
+  (CLAUDE.md). Add comments only where they explain WHY a non-obvious choice was made,
+  never to restate WHAT the code does.
+- For project-level docs: the bar is "would a new contributor understand this?"
+- Keep tables, lists, and code blocks formatted consistently with surrounding docs.
+### 4. Verify
+- Re-read the changed doc end-to-end after editing — does it still flow?
+- If the project has doc-validation tests (lint, link-check, snippet-execution), run
+  them. Otherwise run `npm test` (or the project's documented test command) to make
+  sure nothing else broke.
+- For docs with embedded code samples, mentally execute each sample against current
+  code — stale samples are worse than missing ones.
+## Acceptance
+- Doc accurately reflects current code (no vapor, no missing features).
+- Voice and structure match the rest of the project's docs.
+- For inline code comments: follow project conventions; add comments only where they
+  explain WHY, never WHAT.
+- For project-level docs: a new contributor could read it and understand the topic.
+- Existing tests still pass; any doc-validation tests pass.
+## Git Workflow
+You are already running in a git worktree on branch `{{branch_name}}`. Do NOT create
+additional worktrees — the engine pre-created one for you. Do NOT remove the worktree —
+the engine handles cleanup automatically.
+Commit only the doc files (and any helper assets they reference). Do not bundle
+unrelated code changes into a docs PR.
+```bash
+git add <doc files>
+git commit -m "{{commit_message}}"
+git push -u origin {{branch_name}}
+```
+PR creation is MANDATORY for docs tasks — docs go through the same review flow as code.
+Use the appropriate repo-host tooling for PR creation. For Azure DevOps, prefer the
+`az` CLI first and use the ADO MCP only as a fallback.
+## Rules
+- Do NOT modify product code unless the task explicitly asks for it.
+- Do NOT add comments that restate what the code does.
+- Do NOT invent features that don't exist; verify against current code.
+- Read `notes.md` for all team rules before starting.
+## When to Stop
+Your task is complete once the doc accurately reflects current code, the PR is created
+with the changed doc files, and any doc-validation tests pass. Do not continue editing
+adjacent docs that weren't part of the task.
+## Team Decisions
+{{notes_content}}

package/playbooks/fix.md CHANGED Viewed

@@ -25,33 +25,31 @@ Before starting work, run `git status` and verify the worktree is clean and on t
 Use subagents only for genuinely parallel, independent tasks. For sequential work, single-file edits, searches, and file reads, work directly — do not spawn subagents.
-## How to Fix
+## Delivery Contract
-1. You are already in the correct worktree on branch `{{pr_branch}}`. Do NOT create additional worktrees.
+Handle this like the PR author responding directly from a CLI:
-2. For each issue listed above, use your judgment:
-   - **Fix it** if the feedback is valid and improves the code
-   - **Explain your rationale** if you believe the current approach is correct — reply on the review thread explaining why, with specific reasoning (e.g., performance, consistency with codebase patterns, intentional design choice). Do NOT silently ignore feedback — always respond.
+- You are already in the correct worktree on branch `{{pr_branch}}`. Do NOT create additional worktrees.
+- For each review finding, use engineering judgment:
+  - Fix it if the feedback is valid and improves correctness, safety, maintainability, or test coverage.
+  - If the current approach is intentionally correct, reply with specific rationale instead of silently changing code or ignoring the thread.
+- Handle merge conflicts when needed, preserving the PR's intended changes while keeping the branch reviewable.
+- Do not add unrelated cleanups or broaden the PR beyond the review feedback unless that is necessary to make the fix correct.
-3. Handle merge conflicts if any:
-   - If `git pull` or the PR shows conflicts, resolve them in the worktree
-   - Prefer the PR branch changes, commit the resolution
+## Validation
-## Build & Test (MANDATORY before pushing)
+Before pushing, prove the review fix did not break the branch:
-Before pushing, verify the fix doesn't break anything:
-1. **Build** the project using its build system (check CLAUDE.md, README, package.json, Makefile). If the build fails, fix it before proceeding.
-2. **Run the full test suite** using whatever command the project specifies (check CLAUDE.md, agent.md, README, or package.json scripts).
-3. If any tests fail due to your changes, fix them before pushing.
-4. If the build fails 3 times, report the errors in your PR comment and stop.
-5. Do NOT push code that breaks existing tests or the build.
+- Use the project's source of truth for commands: `CLAUDE.md`, README, package scripts, Makefile, or equivalent build config.
+- Run checks that are relevant to the addressed findings. Prefer the full suite when practical.
+- Fix regressions you introduced. If failures are pre-existing or unrelated, capture the evidence and include it in the PR comment.
+- Do not push code that breaks existing tests or the build because of your changes.
 > ⚠️ **Long builds (Gradle, MSBuild, dotnet, fresh `npm install`)**: any command that may stay silent for more than ~4 minutes will be killed by the heartbeat monitor. Run it via `Bash(run_in_background: true)` then `Monitor` to stream stdout, OR pass an explicit `timeout` (max 600000 ms). See **Long-Running Build / Test Commands** below for the full pattern.
-## Push & Comment on PR
+## Publish & Comment on PR
-Only after build and tests pass:
+After the fix is validated or any unavoidable limitation is clearly documented, commit only relevant files and push:
 ```bash
 git add <specific files>
@@ -76,7 +74,7 @@ After pushing, respond to each review comment/thread:
 ## When to Stop
-Your task is complete once you have: (1) confirmed build and tests pass, (2) pushed the fix, (3) commented on the PR, and (4) resolved addressed review threads. Do NOT continue exploring unrelated code or making additional improvements. Stop immediately.
+Your task is complete when each review finding has either been fixed or answered with rationale, the validation story is truthful and sufficient for review, the fix is pushed if code changed, the PR is commented, and addressed threads are resolved. Do NOT continue into unrelated improvements.
 **NEVER run `gh pr merge` or any merge command on this PR.** The engine handles merging after review approval. Self-merging bypasses the review cycle and is prohibited.

package/playbooks/implement-shared.md CHANGED Viewed

@@ -47,29 +47,30 @@ Before starting work, run `git status` and verify the worktree is clean and on t
 Use subagents only for genuinely parallel, independent tasks. For sequential work, single-file edits, searches, and file reads, work directly — do not spawn subagents.
-## Instructions
+## Delivery Contract
-1. Read relevant source code and reference implementations before writing anything
-2. Check what prior plan items already committed on this branch (`git log {{main_branch}}..HEAD`)
-3. Follow existing patterns exactly — check `CLAUDE.md` for conventions
-4. Build on existing work — don't duplicate or conflict with prior commits
+Deliver this as if the user asked you directly in a CLI, with the added constraint that this branch may already contain related work:
-## Build & Test (MANDATORY before pushing)
+- Understand the requested behavior and how prior commits on `{{branch_name}}` affect it.
+- Read the smallest useful set of source, tests, docs, and comparable implementations needed to make the change correctly.
+- Follow existing project conventions from `CLAUDE.md` and nearby code.
+- Build on previous plan-item work instead of duplicating or conflicting with it.
+- Make the complete change required by this item; do not add unrelated cleanups or speculative improvements.
-After implementation, verify everything works:
+## Validation
-1. **Build** the project using its build system (check CLAUDE.md, package.json, README, Makefile)
-2. Verify the build succeeds with your changes AND all prior commits on this branch
-3. **Run the full test suite** — fix any regressions you introduced
-4. **Run any other checks** the repo defines (linting, type checking, formatting)
-5. If the build fails 3 times, report the errors in your findings and stop
-6. Do NOT push code with a broken build or failing tests that you introduced
+Before publishing, prove the shared branch still works with your change included:
+- Use the project's source of truth for commands: `CLAUDE.md`, README, package scripts, Makefile, or equivalent build config.
+- Run checks that are relevant to this item and to the integrated branch state. Prefer the full suite when practical.
+- Fix regressions you introduced. If failures are pre-existing or caused by earlier branch work, capture the evidence and say so clearly.
+- Do not push code with a broken build or failing tests that you introduced.
 > ⚠️ **Long builds (Gradle, MSBuild, dotnet, fresh `npm install`)**: any command that may stay silent for more than ~4 minutes will be killed by the heartbeat monitor. Run it via `Bash(run_in_background: true)` then `Monitor` to stream stdout, OR pass an explicit `timeout` (max 600000 ms). See **Long-Running Build / Test Commands** below for the full pattern.
-## Push
+## Publish
-Only after build and tests pass:
+After the change is validated or any unavoidable limitation is clearly documented, commit only the relevant files and push to the shared branch:
 ```bash
 git add <specific files>
@@ -79,7 +80,7 @@ git push origin {{branch_name}}
 ## When to Stop
-Your task is complete once you have: (1) confirmed build and tests pass, and (2) pushed to the shared branch. Do NOT create a PR — the engine creates one when all plan items are done. Stop after pushing.
+Your task is complete when the requested implementation is delivered, the validation story is truthful and sufficient for review, and your commit is pushed to the shared branch. Do NOT create a PR — the engine creates one when all plan items are done.
 ## Completion

package/playbooks/implement.md CHANGED Viewed

@@ -38,32 +38,35 @@ Before starting work, run `git status` and verify the worktree is clean and on t
 Use subagents only for genuinely parallel, independent tasks (e.g., editing files in unrelated modules simultaneously). For sequential work, single-file edits, searches, and file reads, work directly — do not spawn subagents.
-## Instructions
+## Delivery Contract
-1. Read relevant source code and reference implementations before writing anything
-2. Follow existing patterns exactly — check `agents/create-agent/` or the closest comparable agent
-3. Follow the project's logging and coding conventions (check CLAUDE.md)
+Deliver this as if the user asked you directly in a CLI:
+- Understand the requested behavior and relevant acceptance criteria before editing.
+- Read the smallest useful set of source, tests, docs, and comparable implementations needed to make the change correctly.
+- Follow existing project conventions, including logging, typing, error handling, and test structure.
+- Make the complete change required by the task; do not add unrelated cleanups or speculative improvements.
+- Keep working through failures you introduced until the implementation is either correct or honestly blocked with concrete evidence.
 ## Git Workflow
 You are already running in a git worktree on branch `{{branch_name}}`. Do NOT create additional worktrees — the engine pre-created one for you.
 Do NOT remove the worktree — the engine handles cleanup automatically.
-## Build & Test (MANDATORY before pushing)
+## Validation
-Build and test before pushing:
+Before publishing, prove the change with the repo's own documented checks:
-1. **Build** the project using its build system (check CLAUDE.md, package.json, README, or Makefile). Retry up to 3 times; if it still fails, report the errors and stop.
-2. **Run the full test suite** using the command the project defines. Fix regressions you introduced and re-run until your changes are green.
-3. If tests were already failing before your changes, note that in the PR description but do not block on pre-existing failures.
-4. **Run any other checks** the repo defines (linting, type checking, formatting).
-5. Do NOT push code with a broken build or failing tests that you introduced.
+- Use the project's source of truth for commands: `CLAUDE.md`, README, package scripts, Makefile, or equivalent build config.
+- Run the checks that are relevant to this task, including tests that cover the changed behavior. Prefer the full suite when practical.
+- Fix regressions you introduced. If failures are pre-existing or outside the task, capture the evidence and make that explicit in the PR.
+- Do not publish changes with a broken build or failing tests that you introduced.
 > ⚠️ **Long builds (Gradle, MSBuild, dotnet, fresh `npm install`)**: any command that may stay silent for more than ~4 minutes will be killed by the heartbeat monitor. Run it via `Bash(run_in_background: true)` then `Monitor` to stream stdout, OR pass an explicit `timeout` (max 600000 ms). See **Long-Running Build / Test Commands** below for the full pattern.
-## Push & Create PR
+## Publish
-Only after build and tests pass:
+After the change is validated or any unavoidable limitation is clearly documented, commit only the relevant files and push this branch:
 ```bash
 git add <specific files>
@@ -73,10 +76,12 @@ git push -u origin {{branch_name}}
 {{pr_section}}
+PR creation is MANDATORY for implement tasks because the engine tracks review and completion from the PR.
 Include build/test status and run instructions in the PR description. If the project has a runnable app, include the localhost URL.
 ## When to Stop
-Your task is complete once you have: (1) confirmed build and tests pass, (2) pushed your branch, and (3) created the PR. Your final message MUST include the PR URL so the engine can track it. Stop immediately after.
+Your task is complete when the requested implementation is delivered, the validation story is truthful and sufficient for review, the branch is pushed, and the PR exists. Your final message MUST include the PR URL so the engine can track it.
 Do NOT run `gh pr merge` or any other merge command on your own PR. The engine reviews and merges PRs through a separate review cycle. Self-merging is prohibited.

package/playbooks/shared-rules.md CHANGED Viewed

@@ -17,6 +17,17 @@ Codex will review your changes — make sure your implementation is thorough and
 Your context window may be compacted or summarized mid-task by Claude's automatic context management. This is normal and expected for long-running tasks. Do NOT interpret compacted or truncated context as a signal to stop early, wrap up prematurely, or skip remaining work. Continue working toward your stated objective regardless of context window state — re-read key files if needed to recover context.
+## Delegated Task Contract
+Treat a Minions assignment like the user typed the same task directly into a capable CLI agent. Preserve the user's actual task contract first; the playbook adds orchestration guardrails, not a rigid script for thinking or implementation.
+- Optimize for the requested outcome, not for mechanically completing checklist steps.
+- Use judgment to choose the smallest reliable workflow that fully satisfies the task.
+- Read only the context needed to make correct decisions; do not perform broad archaeology unless the task requires it.
+- Validate with the repo's own documented commands and acceptance criteria. If full validation is impossible or pre-existing failures block it, explain that precisely instead of inventing a green result.
+- Prefer direct work over ceremony. Branches, PRs, inbox notes, completion blocks, and status comments exist for traceability; they should not change what "done" means for the user.
+- Safety and observability rules still win: stay in the engine-created worktree, do not self-merge, do not edit engine-managed status files, do not hide failures, and leave enough evidence for the human and engine to track the result.
 ## Engine Rules (apply to all tasks)
 **Context compaction:** Your context window may be compacted mid-task by Claude's infrastructure. If you notice your earlier conversation history appears truncated or summarized, this is normal and expected. Do not interpret compaction as a signal to stop early or wrap up. Continue working toward your task objective — all relevant instructions and state remain available.

package/playbooks/work-item.md CHANGED Viewed

@@ -20,28 +20,31 @@ Team root: {{team_root}}
 Branch format: `feat/{{item_id}}-<short-description>`
 Keep branch names lowercase, use hyphens, max 60 chars.
-## Steps
-1. **Understand the task** — read the description carefully, explore relevant code
-2. **Navigate** to the correct project directory: `{{project_path}}`
-3. You are already in a worktree on branch `{{branch_name}}`. Do NOT create additional worktrees.
-4. **Implement** the changes
-5. **Build** — using the repo's build system (check CLAUDE.md, package.json, README, Makefile). If it fails, fix and retry (up to 3 times).
-6. **Run the full test suite** — find the test command from project docs. Fix any regressions you introduced. Do NOT push with failing tests.
-7. **Run any other checks** the repo defines (linting, type checking, formatting)
-8. **Commit and push** (only after build and tests pass):
-   ```bash
-   git add <specific files>
-   git commit -m "feat({{item_id}}): <description>"
-   git push -u origin {{branch_name}}
-   ```
-9. **Create a PR:**
-   {{pr_create_instructions}}
-   - sourceRefName: `refs/heads/feat/{{item_id}}-<short-desc>`
-   - targetRefName: `refs/heads/{{main_branch}}`
-   - title: `feat({{item_id}}): <description>`
-10. **Post implementation notes** as a PR thread comment:
-    {{pr_comment_instructions}}
+## Delivery Contract
+Treat this like the user typed the task directly into a CLI agent:
+- Work in the correct project directory: `{{project_path}}`.
+- You are already in a worktree on branch `{{branch_name}}`. Do NOT create additional worktrees.
+- Understand the requested outcome, inspect the relevant source/tests/docs, and make the complete change needed.
+- Follow existing repo conventions and avoid unrelated cleanups.
+- Validate with the repo's documented build/test/check commands. Fix regressions you introduced; if failures are pre-existing or outside the task, document the evidence.
+- Do NOT publish code with a broken build or failing tests that you introduced.
+After the change is ready for review, commit only relevant files, push `{{branch_name}}`, create the PR, and post implementation notes with the validation result:
+```bash
+git add <specific files>
+git commit -m "feat({{item_id}}): <description>"
+git push -u origin {{branch_name}}
+```
+{{pr_create_instructions}}
+- sourceRefName: `refs/heads/feat/{{item_id}}-<short-desc>`
+- targetRefName: `refs/heads/{{main_branch}}`
+- title: `feat({{item_id}}): <description>`
+{{pr_comment_instructions}}
 Do NOT remove the worktree — the engine handles cleanup automatically.
@@ -59,6 +62,6 @@ If you encounter merge conflicts during push or PR creation:
 ## When to Stop
-Your task is complete once you have: (1) confirmed build and tests pass, (2) pushed your branch, and (3) created the PR. Do NOT continue beyond the task description. Stop immediately.
+Your task is complete when the requested work item is delivered, the validation story is truthful and sufficient for review, the branch is pushed, and the PR exists. Do NOT continue into unrelated improvements.
 Do NOT run `gh pr merge` or any other merge command on your own PR. The engine reviews and merges PRs through a separate review cycle. Self-merging is prohibited.

package/prompts/cc-system.md CHANGED Viewed

@@ -77,6 +77,8 @@ Core action types:
   workTypes: `explore` (research/report only, NO PR), `ask` (answer/report, NO PR), `implement` (new code, PR REQUIRED), `fix` (bug fix, PR REQUIRED), `review` (code review, NO PR), `test` (tests, PR if new), `verify` (merge/build/maintenance, NO PR)
   If the user wants a design/architecture artifact committed through a PR, dispatch `implement` or `docs` rather than `explore`.
   When the user names a specific agent ("assign this to lambert"), put exactly that one name in `agents` (e.g. `"agents": ["lambert"]`). A single-agent assignment is hard-pinned by the server — it will queue for that agent only and skip the routing table. Use multi-agent arrays only when the user names multiple agents or asks for fan-out.
+- **build-and-test**: pr, project (optional), agent (optional) — Run the build-and-test playbook against a PR. The agent will checkout the PR branch, run the project's build/test commands, and report results. Use when the user asks to "run tests on PR X" or "build PR X" or after a fix to verify nothing regressed.
+  Example: user says "run build and test on PR 1834" → `{"type":"build-and-test","pr":"1834"}`
 - **note**: title, content — save to inbox
 - **knowledge**: title, content, category (architecture/conventions/project-notes/build-reports/reviews) — create new KB entry or copy existing doc to KB
 - **pin-to-pinned**: title, content, level (critical/warning) — write to pinned.md, force-injected into ALL agent prompts (rarely needed)

package/routing.md CHANGED Viewed

@@ -25,6 +25,7 @@ Notes:
 - `_any_` means route to any available idle agent (lowest error rate first)
 - `implement:large` is for items with `estimated_complexity: "large"`
 - Engine falls back to any idle agent if both preferred and fallback are busy
+- Routing selects an owner; it should not narrow the user's task contract. The assigned agent should behave like the user typed the same task directly into a CLI, with Minions adding only safety, status, and review guardrails.
 ## Rules