npm - @yemi33/minions - Versions diffs - 0.1.2070 → 0.1.2072 - Mend

@yemi33/minions 0.1.2070 → 0.1.2072

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dashboard/js/qa.js +358 -0
package/dashboard/js/state.js +2 -1
package/dashboard/pages/qa.html +72 -0
package/dashboard/styles.css +102 -0
package/dashboard.js +410 -6
package/docs/qa-runbook-lifecycle.md +232 -0
package/engine/cleanup.js +4 -1
package/engine/comment-classifier.js +8 -1
package/engine/cooldown.js +6 -2
package/engine/gh-comment.js +74 -3
package/engine/gh-token.js +7 -9
package/engine/lifecycle.js +100 -0
package/engine/pipeline.js +9 -1
package/engine/playbook.js +39 -0
package/engine/qa-runners/maestro.js +152 -0
package/engine/qa-runners/playwright.js +149 -0
package/engine/qa-runners.js +323 -0
package/engine/qa-sessions.js +1008 -0
package/engine/shared.js +71 -12
package/engine.js +140 -0
package/package.json +1 -1
package/playbooks/qa-session-draft.md +158 -0
package/playbooks/qa-session-execute.md +165 -0
package/playbooks/qa-session-setup.md +154 -0
package/prompts/cc-system.md +43 -0
package/routing.md +3 -0

package/engine/shared.js CHANGED Viewed

@@ -467,6 +467,22 @@ function safeReadDir(dir) {
   try { return fs.readdirSync(dir); } catch { return []; }
 }
+/**
+ * Read a JSON file with **automatic restore from `.backup` sidecar** on
+ * missing/corrupt primary. Intended for live, mutable state files
+ * (work-items.json, dispatch.json, pull-requests.json, etc.) that are paired
+ * with a `.backup` sidecar written by `safeWrite`. Returns the parsed JSON,
+ * or null when both primary and backup are missing/unparseable.
+ *
+ * **Restore semantics:** If the primary is missing or unparseable but a valid
+ * `.backup` exists, the backup is parsed, returned, AND atomically rewritten
+ * to the primary path (best-effort). This protects live state from torn
+ * writes / interrupted saves.
+ *
+ * Counterpart: `safeJsonNoRestore` for terminal artifacts and "missing == gone"
+ * reads (cooldowns, archived PRDs, ephemeral session state) where reviving a
+ * stale `.backup` is actively harmful. See its JSDoc for selection guidance.
+ */
 function safeJson(p) {
   // Split the read from the parse so we can distinguish "file missing" (normal
   // pre-create state — silent) from "file present but corrupt JSON" (real
@@ -524,22 +540,42 @@ function safeJsonObj(p) { return safeJson(p) || {}; }
 function safeJsonArr(p) { return safeJson(p) || []; }
 /**
- * Sibling of safeJson for terminal-artifact reads (PRDs in `prd/`, archived
- * plans, anything where a missing primary should NOT auto-restore from a
- * stale `.backup` sidecar). Returns the parsed JSON on success, or null when
- * the primary is missing or unparseable.
+ * Sibling of safeJson for terminal-artifact and "missing == gone" reads
+ * (PRDs in `prd/`, archived plans, cooldowns, ephemeral session state —
+ * anything where a missing primary should NOT auto-restore from a stale
+ * `.backup` sidecar). Returns the parsed JSON on success, or `defaultValue`
+ * (default `null`) on **any** failure: missing file, unparseable JSON, or
+ * IO error. The `.backup` sidecar is never consulted.
  *
  * Why a separate primitive: safeJson's restore-on-miss is correct for live
  * state files (work-items.json, dispatch.json, pull-requests.json, etc.) but
- * actively harmful for terminal artifacts. Archived PRDs leave a `.backup`
- * sidecar in `prd/`; if any caller reads the active path with safeJson, the
- * .backup is silently restored and the dashboard sees a phantom "active" PRD
- * (W-mouptdh1000h9f39). PRDs are end-state — no automatic resurrection.
+ * actively harmful for terminal artifacts. Examples of misuse and the bugs
+ * they hide:
+ *   - Archived PRDs leave a `.backup` sidecar in `prd/`; reading the active
+ *     path with safeJson silently restores it and the dashboard sees a
+ *     phantom "active" PRD (W-mouptdh1000h9f39). PRDs are end-state — no
+ *     resurrection.
+ *   - Cooldowns are time-bounded ephemeral state (24h TTL). Restoring a
+ *     stale `cooldowns.json.backup` could resurrect expired entries that
+ *     should already have been pruned, suppressing legitimate dispatches.
+ *   - Restoring corrupt-primary scenarios from `.backup` masks the underlying
+ *     write integrity failure and breaks State Integrity tests.
+ *
+ * **When to use which:**
+ *   - `safeJson(p)` — live mutable state paired with safeWrite-managed `.backup`.
+ *     Restore-on-miss is protective against torn writes.
+ *   - `safeJsonNoRestore(p, defaultValue)` — terminal artifacts, time-bounded
+ *     ephemeral state, or any read where "missing/corrupt" should mean "gone".
  *
  * Parse errors are logged so silent corruption still surfaces (mirrors
  * safeJson's contract). Read errors other than ENOENT are also logged.
+ *
+ * @param {string} p - Absolute path to the JSON file.
+ * @param {*} [defaultValue=null] - Value returned on any failure (missing,
+ *   parse error, IO error). Pass `{}` / `[]` to mirror safeJsonObj/safeJsonArr.
+ * @returns {*} Parsed JSON on success, otherwise `defaultValue`.
  */
-function safeJsonNoRestore(p) {
+function safeJsonNoRestore(p, defaultValue = null) {
   let raw;
   try {
     raw = fs.readFileSync(p, 'utf8');
@@ -547,13 +583,13 @@ function safeJsonNoRestore(p) {
     if (e && e.code !== 'ENOENT') {
       console.warn(`[safeJsonNoRestore] read failed for ${path.basename(p)}: ${e.message}`);
     }
-    return null;
+    return defaultValue;
   }
   try {
     return JSON.parse(raw);
   } catch (parseErr) {
     console.error(`[safeJsonNoRestore] parse failure for ${path.basename(p)}: ${parseErr.message}`);
-    return null;
+    return defaultValue;
   }
 }
@@ -1144,10 +1180,20 @@ function mutateJsonFileLocked(filePath, mutateFn, {
     let data = safeJson(filePath);
     const parsedInvalid = fileExists && data === null;
     if (data === null || typeof data !== 'object') data = Array.isArray(defaultValue) ? [...defaultValue] : { ...defaultValue };
-    const beforeSerialized = skipWriteIfUnchanged ? JSON.stringify(data) : null;
+    // Normalize BEFORE taking the baseline snapshot so that both `beforeSerialized`
+    // and the post-mutator snapshot reflect post-normalize state. Capturing the
+    // baseline before normalize breaks the `skipWriteIfUnchanged` optimization for
+    // pull-requests.json files: a no-op mutator on a denormalized file would
+    // always trip the write path because normalization itself shifted serialized
+    // bytes between the two snapshots (P-bfa1c-skipwrite-timing). The trade-off
+    // is intentional: when normalization is the ONLY change, we deliberately
+    // leave the on-disk file denormalized — readers re-run normalizePrRecords on
+    // load (see getPrLinks, engine/queries.js:670-674), so the in-memory contract
+    // is preserved without the per-poll mtime bump.
     if (path.basename(filePath) === 'pull-requests.json' && Array.isArray(data)) {
       normalizePrRecords(data, resolveProjectForPrPath(filePath));
     }
+    const beforeSerialized = skipWriteIfUnchanged ? JSON.stringify(data) : null;
     const next = mutateFn(data);
     const finalData = next === undefined ? data : next;
     const shouldWrite = !skipWriteIfUnchanged || parsedInvalid || JSON.stringify(finalData) !== beforeSerialized;
@@ -1767,6 +1813,14 @@ function parseStreamJsonOutput(raw, runtimeName, opts) {
 const KB_CATEGORIES = ['architecture', 'conventions', 'project-notes', 'build-reports', 'reviews'];
+// P-bfa2b-kb-path-traversal — read-side whitelist for /api/knowledge/:category/:file.
+// Superset of KB_CATEGORIES: adds 'agents' because per-agent personal memory is
+// served from knowledge/agents/<id>.md (see engine/consolidation.js +
+// engine/playbook.js) but is NOT a destination for inbox classification, so
+// KB_CATEGORIES intentionally excludes it. Frozen so handlers can rely on the
+// list being immutable across the process lifetime.
+const KB_READABLE_CATEGORIES = Object.freeze([...KB_CATEGORIES, 'agents']);
 /**
  * Classify an inbox item into a knowledge base category.
  * Single source of truth — used by consolidation.js (both LLM and regex paths).
@@ -4763,6 +4817,10 @@ function mutatePullRequests(filePath, mutator) {
     return mutator(data) || data;
   }, {
     defaultValue: [],
+    skipWriteIfUnchanged: true,
+    // Emit only when an actual write happened. skipWriteIfUnchanged can
+    // short-circuit no-op mutations; suppress the event in that case so the
+    // dashboard cache-version doesn't bump for nothing.
     onWrote: () => {
       try { require('./db-events').emitStateEvent('pull_requests'); } catch { /* optional */ }
     },
@@ -5158,6 +5216,7 @@ module.exports = {
   gitEnv,
   parseStreamJsonOutput,
   KB_CATEGORIES,
+  KB_READABLE_CATEGORIES,
   classifyInboxItem,
   ENGINE_DEFAULTS,
   resolveAgentCli, resolveCcCli, resolveCcUseWorkerPool, resolveAgentModel, resolveCcModel,

package/engine.js CHANGED Viewed

@@ -5021,6 +5021,88 @@ async function discoverFromPrs(config, project) {
   return newWork;
 }
+/**
+ * P-f9a2e1b4 — Compute runner_brief / runner_execute_brief / test_file for
+ * QA Session DRAFT and EXECUTE dispatches.
+ *
+ * Lazy-requires `./engine/qa-sessions`, `./engine/qa-runners`, and
+ * `./engine/managed-spawn` so non-QA dispatches don't pay the load cost
+ * and so test isolation (createTestMinionsDir → ISOLATED_MODULES) gets a
+ * fresh module instance per test.
+ *
+ * Returns `{ runner_brief: '', runner_execute_brief: '', test_file: '' }`
+ * for:
+ *   - non-QA-session items (no item.meta.sessionId)
+ *   - SETUP phase (the SETUP playbook doesn't read these vars; the runner
+ *     adapter contract is N/A until the managed-spawn is healthy)
+ *   - any failure inside the lookup chain (session missing, runner
+ *     missing, spawn missing) — failures are surfaced via a WARN log so
+ *     the render still succeeds and the playbook's empty-brief failure
+ *     path catches it.
+ */
+function _buildRunnerBriefVars(item, project) {
+  const empty = { runner_brief: '', runner_execute_brief: '', test_file: '' };
+  const meta = item && item.meta;
+  if (!meta || !meta.sessionId) return empty;
+  const phase = meta.sessionPhase;
+  if (phase !== 'draft' && phase !== 'execute') return empty;
+  try {
+    const qaSessions = require('./engine/qa-sessions');
+    const qaRunners = require('./engine/qa-runners');
+    const managedSpawn = require('./engine/managed-spawn');
+    const session = qaSessions.getSession(meta.sessionId);
+    if (!session) {
+      log('warn', `qa-session render: session ${meta.sessionId} not found — runner brief empty`);
+      return empty;
+    }
+    const target = (meta.qaSession && meta.qaSession.target) || session.spec.target || {};
+    const explicit = (meta.qaSession && meta.qaSession.runner) || session.spec.runner || '';
+    const runner = qaRunners.detectRunner(target, project || null, explicit);
+    if (!runner) {
+      log('warn', `qa-session render: no runner detected for session ${meta.sessionId} (target.kind=${target.kind}, explicit=${explicit || 'none'}) — runner brief empty`);
+      return empty;
+    }
+    // Live managed-spawn snapshot (port / base_url / health). listManagedSpecs()
+    // returns [] when the state file is missing or unreadable; defensive
+    // filter is fine here.
+    let spawnInfo = null;
+    try {
+      const specs = managedSpawn.listManagedSpecs();
+      spawnInfo = (specs || []).find(s => s && s.name === session.managedSpawnName) || null;
+    } catch (spawnErr) {
+      log('warn', `qa-session render: managed-spawn lookup failed for ${session.managedSpawnName}: ${spawnErr.message}`);
+    }
+    const briefOpts = {
+      session,
+      sessionId: session.id,
+      spawnInfo,
+      flowsRaw: (meta.qaSession && meta.qaSession.flowsRaw) || session.spec.flowsRaw || '',
+      capture: (meta.qaSession && meta.qaSession.capture) || session.spec.capture || {},
+      testFile: session.testFile || null,
+    };
+    const out = { runner_brief: '', runner_execute_brief: '', test_file: session.testFile || '' };
+    if (phase === 'draft') {
+      try {
+        const brief = runner.generateBrief(briefOpts);
+        out.runner_brief = typeof brief === 'string' ? brief : (brief && brief.markdown) || '';
+      } catch (briefErr) {
+        log('warn', `qa-session render: runner ${runner.name} generateBrief threw: ${briefErr.message}`);
+      }
+    } else if (phase === 'execute') {
+      try {
+        const brief = runner.executeBrief(briefOpts);
+        out.runner_execute_brief = typeof brief === 'string' ? brief : (brief && brief.markdown) || '';
+      } catch (briefErr) {
+        log('warn', `qa-session render: runner ${runner.name} executeBrief threw: ${briefErr.message}`);
+      }
+    }
+    return out;
+  } catch (err) {
+    log('warn', `qa-session render: _buildRunnerBriefVars failed for ${meta.sessionId} (${phase}): ${err.message}`);
+    return empty;
+  }
+}
 /**
  * Scan work-items.json for manually queued tasks
  */
@@ -5079,6 +5161,64 @@ function renderProjectWorkItemPromptForAgent(item, workType, agentId, config, pr
     qa_artifacts_dir: item.meta && item.meta.qaRunId
       ? path.posix.join('engine', 'qa-artifacts', String(item.meta.qaRunId))
       : '',
+    // P-e6b3c2d8 — QA Session template vars. The qa-sessions chain helpers
+    // (engine/qa-sessions.js#_baseWorkItem) stamp meta.sessionId,
+    // meta.sessionPhase, and meta.qaSession.{target,flowsRaw,mode,capture,runner}
+    // on each SETUP/DRAFT/EXECUTE WI; renderProjectWorkItemPromptForAgent
+    // surfaces them as named template vars so the qa-session-* playbooks
+    // can reference them by literal {{name}} without re-resolving from
+    // item.meta. Only target.kind === <X> populates target_<X>; the rest
+    // resolve to empty strings (filtered out of unresolved-var warnings via
+    // PLAYBOOK_OPTIONAL_VARS).
+    session_id: (item.meta && item.meta.sessionId) || '',
+    session_phase: (item.meta && item.meta.sessionPhase) || '',
+    managed_spawn_name: item.meta && item.meta.sessionId
+      ? 'qa-session-' + String(item.meta.sessionId)
+      : '',
+    target_kind: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind) || '',
+    target_pr_id: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'pr'
+      ? String(item.meta.qaSession.target.prId || '')
+      : ''),
+    target_branch: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'branch'
+      ? String(item.meta.qaSession.target.branch || '')
+      : ''),
+    target_sha: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'commit'
+      ? String(item.meta.qaSession.target.sha || '')
+      : ''),
+    target_worktree: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'current'
+      ? String(item.meta.qaSession.target.worktree || '')
+      : ''),
+    target_json: (item.meta && item.meta.qaSession && item.meta.qaSession.target)
+      ? JSON.stringify(item.meta.qaSession.target)
+      : '',
+    flows_raw: (item.meta && item.meta.qaSession && item.meta.qaSession.flowsRaw) || '',
+    runner_hint: (item.meta && item.meta.qaSession && item.meta.qaSession.runner) || '',
+    capture: (item.meta && item.meta.qaSession && item.meta.qaSession.capture)
+      ? Object.entries(item.meta.qaSession.capture)
+          .filter(([, v]) => !!v)
+          .map(([k]) => k)
+          .join(',')
+      : '',
+    session_mode: (item.meta && item.meta.qaSession && item.meta.qaSession.mode) || '',
+    // P-f9a2e1b4 — Runner adapter briefs. The DRAFT playbook consumes
+    // {{runner_brief}} (runner.generateBrief() output); EXECUTE consumes
+    // {{runner_execute_brief}} (runner.executeBrief() output) plus
+    // {{test_file}} (session.testFile, set after DRAFT). For non-QA-session
+    // items and for the SETUP phase, all three resolve to empty strings;
+    // PLAYBOOK_OPTIONAL_VARS keeps them out of unresolved-var warnings.
+    //
+    // We lazy-require qa-sessions + qa-runners + managed-spawn so non-QA
+    // dispatches don't pay the load cost, and so test isolation (which
+    // busts these modules from require.cache via createTestMinionsDir →
+    // ISOLATED_MODULES) picks up a fresh module instance per test.
+    //
+    // Defensive failure mode: any throw inside the brief computation
+    // resolves to an empty string and surfaces as a warn log. Renders
+    // must never blow up because a runner adapter misbehaved — the agent
+    // gets a "no runner brief available" cue and reports a setup
+    // failure via the qa-session-draft-failed / qa-session-execute-failed
+    // path. (See playbooks/qa-session-draft.md → "Failure path" section.)
+    ..._buildRunnerBriefVars(item, project),
   };
   const cpResult = buildWorkItemDispatchVars(item, vars, config, {
     worktreePath: vars.worktree_path || root,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@yemi33/minions",
-  "version": "0.1.2070",
+  "version": "0.1.2072",
   "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
   "bin": {
     "minions": "bin/minions.js"

package/playbooks/qa-session-draft.md ADDED Viewed

@@ -0,0 +1,158 @@
+---
+requiresProjectContext: true
+---
+# Playbook: QA Session DRAFT
+You are {{agent_name}}, the {{agent_role}} on the {{project_name}} project.
+TEAM ROOT: {{team_root}}
+## Your Task
+QA Session **DRAFT** phase for session **{{session_id}}** (work item {{item_id}}).
+A user asked Minions to QA the following target and flows; the SETUP phase
+has already resolved the target into a worktree and the engine has spawned
+the dev-up command as a managed-spawn. Your job is to translate the
+natural-language flows into a runner-native test file.
+- **Session id:** `{{session_id}}`
+- **Session phase:** `{{session_phase}}`
+- **Target kind:** `{{target_kind}}`
+- **Target PR id:** `{{target_pr_id}}`
+- **Target branch:** `{{target_branch}}`
+- **Target commit SHA:** `{{target_sha}}`
+- **Target worktree (kind=current):** `{{target_worktree}}`
+- **Raw target JSON:** `{{target_json}}`
+- **Flows (natural language):** {{flows_raw}}
+- **Runner hint (optional explicit runner):** `{{runner_hint}}`
+- **Capture:** `{{capture}}`
+- **Mode:** `{{session_mode}}`
+- **Managed-spawn target:** `{{managed_spawn_name}}` (live — query
+  `http://localhost:7331/api/managed-processes/by-name?name={{managed_spawn_name}}`
+  for the freshest port / base URL / health).
+{{additional_context}}
+## What "qa-session-draft" means
+A `qa-session-draft` task is the **second** of three chained work items the
+engine dispatches for each QA Session (SETUP → DRAFT → EXECUTE). The SETUP
+agent already produced a managed-spawn sidecar and the engine spawned the
+dev-up command; the EXECUTE agent will run your drafted test against that
+live spawn. Your only deliverable is the **test file itself**, written in
+the runner's native format under
+`engine/qa-tests/{{session_id}}/` (relative to the Minions root).
+The engine resolved a concrete **runner adapter** for this session
+(Playwright, Maestro, or a project plugin) and its `generateBrief()` hook
+already produced the precise authoring instructions you need. Read the
+runner brief below, then implement exactly the file it describes.
+### Runner brief
+{{runner_brief}}
+### Reporting the test file path
+When you exit, your completion JSON MUST include a `testFile` field with
+the **relative path inside `engine/qa-tests/{{session_id}}/`** of the file
+you wrote (e.g. `test.spec.js`, `flow.yaml`). The engine reads this and
+stores it on the session record so the EXECUTE prompt can reference it
+directly. Without `testFile`, EXECUTE falls back to a generic
+`test.<ext>` hint and the agent may pick the wrong file.
+Example:
+```json
+{
+  "status": "success",
+  "summary": "Drafted Playwright spec covering login + redirect flow",
+  "testFile": "test.spec.js",
+  "nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
+  "artifacts": [
+    { "type": "file", "path": "engine/qa-tests/{{session_id}}/test.spec.js", "title": "Drafted Playwright spec" }
+  ]
+}
+```
+## No PR, no commit
+`qa-session-draft` is a test-authoring task. **Do not**:
+- commit, push, or open a pull request — sessions are tracked by the
+  session record, not a merged PR
+- modify project source — the only file you should write is the test
+  file under `engine/qa-tests/{{session_id}}/`
+- start the managed-spawn yourself — it is already running; query
+  `/api/managed-processes/by-name?name={{managed_spawn_name}}` for the
+  live port / base URL / health snapshot
+## Failure path (REQUIRED)
+If the runner brief is empty (no runner could be detected and none was
+specified), if you cannot translate the flows into a runner-native file,
+or if the managed-spawn is not healthy enough to draft against, **do not
+write a partial test file**. Instead, write your completion report with:
+```json
+{
+  "status": "failed",
+  "summary": "<one-line human-readable explanation of what blocked DRAFT>",
+  "failure_class": "qa-session-draft-failed",
+  "retryable": false,
+  "needs_rerun": false,
+  "nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
+  "artifacts": []
+}
+```
+The `engine/qa-sessions.js#handleDraftComplete` hook reads `failure_class`
+and the summary, transitions the session to `failed`, and surfaces the
+explanation in the dashboard session card so the human knows exactly why
+DRAFT gave up.
+Examples of legitimate failure summaries:
+- `"No QA runner detected and none specified — install Playwright or Maestro and re-run with runner=<name>."`
+- `"Flows reference a feature that does not exist in the spawn (e.g. /admin route returns 404)."`
+- `"Managed-spawn {{managed_spawn_name}} not healthy — base URL unreachable from the agent."`
+## Working directory
+```bash
+# PowerShell
+echo $env:MINIONS_AGENT_CWD
+pwd
+# bash/zsh
+echo "$MINIONS_AGENT_CWD"
+pwd
+```
+`MINIONS_AGENT_CWD` is the engine-resolved worktree root. Prefer it over
+`pwd` for any cwd-sensitive command. The test file path is **relative to
+the Minions root**, not the project worktree — write to
+`<MINIONS_ROOT>/engine/qa-tests/{{session_id}}/`. The Minions root is the
+parent of the project worktree (one level above `MINIONS_AGENT_CWD` for
+project-scoped sessions; equal to `MINIONS_AGENT_CWD` for central
+sessions).
+## Findings
+Write findings to `{{team_root}}/notes/inbox/{{agent_id}}-{{item_id}}-{{date}}.md`
+only after successful completion. Include:
+- Session id + target summary
+- Runner adapter chosen
+- Test file path + line count
+- Notes for future drafts on the same project (flaky selectors, env-vars
+  needed, runner gotchas)
+## Constraints
+- Do not modify production code unless explicitly asked.
+- Do not remove worktrees; the engine handles cleanup automatically.
+- Do not start or restart the managed-spawn — the engine owns it.
+- The test file is the deliverable — without it (or without a `testFile`
+  pointer in completion JSON), the EXECUTE phase has nothing to run.

package/playbooks/qa-session-execute.md ADDED Viewed

@@ -0,0 +1,165 @@
+---
+requiresProjectContext: true
+---
+# Playbook: QA Session EXECUTE
+You are {{agent_name}}, the {{agent_role}} on the {{project_name}} project.
+TEAM ROOT: {{team_root}}
+## Your Task
+QA Session **EXECUTE** phase for session **{{session_id}}** (work item {{item_id}}).
+The SETUP and DRAFT phases have already finished: the engine spawned the
+dev-up command as a managed-spawn, and the DRAFT agent wrote a
+runner-native test file under `engine/qa-tests/{{session_id}}/`. Your
+job is to **invoke that test against the live managed-spawn**, capture
+the configured artifacts, and write the result sidecar the engine
+ingests.
+- **Session id:** `{{session_id}}`
+- **Session phase:** `{{session_phase}}`
+- **Managed-spawn target:** `{{managed_spawn_name}}` (live — query
+  `http://localhost:7331/api/managed-processes/by-name?name={{managed_spawn_name}}`
+  for the freshest port / base URL / health).
+- **Test file (relative to `engine/qa-tests/{{session_id}}/`):** `{{test_file}}`
+- **Flows (for context):** {{flows_raw}}
+- **Runner hint (optional explicit runner):** `{{runner_hint}}`
+- **Capture:** `{{capture}}`
+- **Mode:** `{{session_mode}}`
+- **qa-runs record id (use this in the sidecar's `runId` field):** `{{qa_run_id}}`
+{{additional_context}}
+## What "qa-session-execute" means
+A `qa-session-execute` task is the **third** of three chained work items
+the engine dispatches for each QA Session (SETUP → DRAFT → EXECUTE). The
+engine resolved the same runner adapter the DRAFT phase used; its
+`executeBrief()` hook produced the precise invocation command + flags
+below.
+### Runner execute brief
+{{runner_execute_brief}}
+### Result sidecar (REQUIRED)
+Before exit, write the result sidecar at
+`agents/{{agent_id}}/qa-run-result.json` with this exact shape:
+```json
+{
+  "runId": "{{qa_run_id}}",
+  "status": "passed",
+  "summary": "1 sentence rollup the dashboard will render",
+  "artifacts": [
+    {
+      "type": "screenshot",
+      "path": "engine/qa-artifacts/{{session_id}}/01-login-form.png",
+      "label": "Login form rendered",
+      "capturedAt": "2026-05-20T20:42:00.000Z"
+    }
+  ]
+}
+```
+Valid `status` values:
+- `passed` — every step in the drafted test ran green and every required
+  capture artifact was produced.
+- `failed` — at least one assertion failed. Still write the sidecar with
+  whatever artifacts you captured plus the failing-step summary.
+- `errored` — the runner itself crashed or the managed-spawn went
+  unreachable mid-run (use this sparingly — distinguishes infra failure
+  from real product-level failure).
+The engine consumes this sidecar in `engine/lifecycle.js` and calls
+`qaRuns.completeRun({{qa_run_id}}, …)`. **If the sidecar is missing when
+you exit, the engine marks the run `errored`** — always write it, even on
+bail-out.
+The `engine/qa-sessions.js#handleExecuteComplete` hook then reads the
+qa-runs terminal status and transitions the session to `done` / `failed`
+accordingly.
+## No PR, no commit
+`qa-session-execute` is a verification task. **Do not**:
+- commit, push, or open a pull request — sessions are tracked by the
+  session record + qa-runs record, not a merged PR
+- modify project source — if a test step requires a code change, stop,
+  leave changes uncommitted, and document the gap in the result summary
+- start or restart the managed-spawn — the engine owns it
+- modify the drafted test file — re-drafting belongs to the DRAFT phase
+  (the human invokes it via POST `/api/qa/sessions/<id>/edit`)
+## Failure path (REQUIRED)
+If the managed-spawn is unhealthy, the runner CLI is missing, or you
+cannot even attempt the test invocation, **do not silently exit
+green**. Write:
+```json
+{
+  "status": "failed",
+  "summary": "<one-line human-readable explanation of what blocked EXECUTE>",
+  "failure_class": "qa-session-execute-failed",
+  "retryable": false,
+  "needs_rerun": false,
+  "nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
+  "artifacts": []
+}
+```
+…AND write a matching `qa-run-result.json` sidecar with `status: "errored"`
+so the qa-runs record terminalizes correctly. The session will transition
+to `failed` with `failureClass: qa-session-execute-failed`.
+## Working directory
+```bash
+# PowerShell
+echo $env:MINIONS_AGENT_CWD
+pwd
+# bash/zsh
+echo "$MINIONS_AGENT_CWD"
+pwd
+```
+`MINIONS_AGENT_CWD` is the engine-resolved worktree root. Prefer it over
+`pwd` for any cwd-sensitive command. The test file path is **relative to
+the Minions root**: full path is
+`<MINIONS_ROOT>/engine/qa-tests/{{session_id}}/{{test_file}}`. Capture
+artifacts to `<MINIONS_ROOT>/engine/qa-artifacts/{{session_id}}/`.
+## Long-Running Commands
+Playwright runs, Maestro flows, and webdriver waits can be silent for
+minutes. Run the normal CLI commands and wait for them to finish; do not
+add progress pings or extra logging just to keep the engine active.
+## Findings
+Write findings to `{{team_root}}/notes/inbox/{{agent_id}}-{{item_id}}-{{date}}.md`
+only after successful completion. Include:
+- Session id + target summary
+- Test file + runner adapter
+- Per-step pass/fail
+- Artifact paths (relative to `{{team_root}}`)
+- Notes for the next EXECUTE on the same target (flaky selectors, env
+  quirks, runner gotchas)
+## Constraints
+- Do not modify production code unless explicitly asked.
+- Do not remove worktrees; the engine handles cleanup automatically.
+- Do not start or restart the managed-spawn — the engine owns it.
+- Always emit the `qa-run-result.json` sidecar before exit — even a
+  single-field
+  `{"runId": "{{qa_run_id}}", "status": "errored", "summary": "...", "artifacts": []}`
+  is better than an absent file.