@yemi33/minions 0.1.2071 → 0.1.2072

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/engine/shared.js CHANGED
@@ -467,6 +467,22 @@ function safeReadDir(dir) {
467
467
  try { return fs.readdirSync(dir); } catch { return []; }
468
468
  }
469
469
 
470
+ /**
471
+ * Read a JSON file with **automatic restore from `.backup` sidecar** on
472
+ * missing/corrupt primary. Intended for live, mutable state files
473
+ * (work-items.json, dispatch.json, pull-requests.json, etc.) that are paired
474
+ * with a `.backup` sidecar written by `safeWrite`. Returns the parsed JSON,
475
+ * or null when both primary and backup are missing/unparseable.
476
+ *
477
+ * **Restore semantics:** If the primary is missing or unparseable but a valid
478
+ * `.backup` exists, the backup is parsed, returned, AND atomically rewritten
479
+ * to the primary path (best-effort). This protects live state from torn
480
+ * writes / interrupted saves.
481
+ *
482
+ * Counterpart: `safeJsonNoRestore` for terminal artifacts and "missing == gone"
483
+ * reads (cooldowns, archived PRDs, ephemeral session state) where reviving a
484
+ * stale `.backup` is actively harmful. See its JSDoc for selection guidance.
485
+ */
470
486
  function safeJson(p) {
471
487
  // Split the read from the parse so we can distinguish "file missing" (normal
472
488
  // pre-create state — silent) from "file present but corrupt JSON" (real
@@ -524,22 +540,42 @@ function safeJsonObj(p) { return safeJson(p) || {}; }
524
540
  function safeJsonArr(p) { return safeJson(p) || []; }
525
541
 
526
542
  /**
527
- * Sibling of safeJson for terminal-artifact reads (PRDs in `prd/`, archived
528
- * plans, anything where a missing primary should NOT auto-restore from a
529
- * stale `.backup` sidecar). Returns the parsed JSON on success, or null when
530
- * the primary is missing or unparseable.
543
+ * Sibling of safeJson for terminal-artifact and "missing == gone" reads
544
+ * (PRDs in `prd/`, archived plans, cooldowns, ephemeral session state
545
+ * anything where a missing primary should NOT auto-restore from a stale
546
+ * `.backup` sidecar). Returns the parsed JSON on success, or `defaultValue`
547
+ * (default `null`) on **any** failure: missing file, unparseable JSON, or
548
+ * IO error. The `.backup` sidecar is never consulted.
531
549
  *
532
550
  * Why a separate primitive: safeJson's restore-on-miss is correct for live
533
551
  * state files (work-items.json, dispatch.json, pull-requests.json, etc.) but
534
- * actively harmful for terminal artifacts. Archived PRDs leave a `.backup`
535
- * sidecar in `prd/`; if any caller reads the active path with safeJson, the
536
- * .backup is silently restored and the dashboard sees a phantom "active" PRD
537
- * (W-mouptdh1000h9f39). PRDs are end-state no automatic resurrection.
552
+ * actively harmful for terminal artifacts. Examples of misuse and the bugs
553
+ * they hide:
554
+ * - Archived PRDs leave a `.backup` sidecar in `prd/`; reading the active
555
+ * path with safeJson silently restores it and the dashboard sees a
556
+ * phantom "active" PRD (W-mouptdh1000h9f39). PRDs are end-state — no
557
+ * resurrection.
558
+ * - Cooldowns are time-bounded ephemeral state (24h TTL). Restoring a
559
+ * stale `cooldowns.json.backup` could resurrect expired entries that
560
+ * should already have been pruned, suppressing legitimate dispatches.
561
+ * - Restoring corrupt-primary scenarios from `.backup` masks the underlying
562
+ * write integrity failure and breaks State Integrity tests.
563
+ *
564
+ * **When to use which:**
565
+ * - `safeJson(p)` — live mutable state paired with safeWrite-managed `.backup`.
566
+ * Restore-on-miss is protective against torn writes.
567
+ * - `safeJsonNoRestore(p, defaultValue)` — terminal artifacts, time-bounded
568
+ * ephemeral state, or any read where "missing/corrupt" should mean "gone".
538
569
  *
539
570
  * Parse errors are logged so silent corruption still surfaces (mirrors
540
571
  * safeJson's contract). Read errors other than ENOENT are also logged.
572
+ *
573
+ * @param {string} p - Absolute path to the JSON file.
574
+ * @param {*} [defaultValue=null] - Value returned on any failure (missing,
575
+ * parse error, IO error). Pass `{}` / `[]` to mirror safeJsonObj/safeJsonArr.
576
+ * @returns {*} Parsed JSON on success, otherwise `defaultValue`.
541
577
  */
542
- function safeJsonNoRestore(p) {
578
+ function safeJsonNoRestore(p, defaultValue = null) {
543
579
  let raw;
544
580
  try {
545
581
  raw = fs.readFileSync(p, 'utf8');
@@ -547,13 +583,13 @@ function safeJsonNoRestore(p) {
547
583
  if (e && e.code !== 'ENOENT') {
548
584
  console.warn(`[safeJsonNoRestore] read failed for ${path.basename(p)}: ${e.message}`);
549
585
  }
550
- return null;
586
+ return defaultValue;
551
587
  }
552
588
  try {
553
589
  return JSON.parse(raw);
554
590
  } catch (parseErr) {
555
591
  console.error(`[safeJsonNoRestore] parse failure for ${path.basename(p)}: ${parseErr.message}`);
556
- return null;
592
+ return defaultValue;
557
593
  }
558
594
  }
559
595
 
@@ -1144,10 +1180,20 @@ function mutateJsonFileLocked(filePath, mutateFn, {
1144
1180
  let data = safeJson(filePath);
1145
1181
  const parsedInvalid = fileExists && data === null;
1146
1182
  if (data === null || typeof data !== 'object') data = Array.isArray(defaultValue) ? [...defaultValue] : { ...defaultValue };
1147
- const beforeSerialized = skipWriteIfUnchanged ? JSON.stringify(data) : null;
1183
+ // Normalize BEFORE taking the baseline snapshot so that both `beforeSerialized`
1184
+ // and the post-mutator snapshot reflect post-normalize state. Capturing the
1185
+ // baseline before normalize breaks the `skipWriteIfUnchanged` optimization for
1186
+ // pull-requests.json files: a no-op mutator on a denormalized file would
1187
+ // always trip the write path because normalization itself shifted serialized
1188
+ // bytes between the two snapshots (P-bfa1c-skipwrite-timing). The trade-off
1189
+ // is intentional: when normalization is the ONLY change, we deliberately
1190
+ // leave the on-disk file denormalized — readers re-run normalizePrRecords on
1191
+ // load (see getPrLinks, engine/queries.js:670-674), so the in-memory contract
1192
+ // is preserved without the per-poll mtime bump.
1148
1193
  if (path.basename(filePath) === 'pull-requests.json' && Array.isArray(data)) {
1149
1194
  normalizePrRecords(data, resolveProjectForPrPath(filePath));
1150
1195
  }
1196
+ const beforeSerialized = skipWriteIfUnchanged ? JSON.stringify(data) : null;
1151
1197
  const next = mutateFn(data);
1152
1198
  const finalData = next === undefined ? data : next;
1153
1199
  const shouldWrite = !skipWriteIfUnchanged || parsedInvalid || JSON.stringify(finalData) !== beforeSerialized;
@@ -1767,6 +1813,14 @@ function parseStreamJsonOutput(raw, runtimeName, opts) {
1767
1813
 
1768
1814
  const KB_CATEGORIES = ['architecture', 'conventions', 'project-notes', 'build-reports', 'reviews'];
1769
1815
 
1816
+ // P-bfa2b-kb-path-traversal — read-side whitelist for /api/knowledge/:category/:file.
1817
+ // Superset of KB_CATEGORIES: adds 'agents' because per-agent personal memory is
1818
+ // served from knowledge/agents/<id>.md (see engine/consolidation.js +
1819
+ // engine/playbook.js) but is NOT a destination for inbox classification, so
1820
+ // KB_CATEGORIES intentionally excludes it. Frozen so handlers can rely on the
1821
+ // list being immutable across the process lifetime.
1822
+ const KB_READABLE_CATEGORIES = Object.freeze([...KB_CATEGORIES, 'agents']);
1823
+
1770
1824
  /**
1771
1825
  * Classify an inbox item into a knowledge base category.
1772
1826
  * Single source of truth — used by consolidation.js (both LLM and regex paths).
@@ -4763,6 +4817,10 @@ function mutatePullRequests(filePath, mutator) {
4763
4817
  return mutator(data) || data;
4764
4818
  }, {
4765
4819
  defaultValue: [],
4820
+ skipWriteIfUnchanged: true,
4821
+ // Emit only when an actual write happened. skipWriteIfUnchanged can
4822
+ // short-circuit no-op mutations; suppress the event in that case so the
4823
+ // dashboard cache-version doesn't bump for nothing.
4766
4824
  onWrote: () => {
4767
4825
  try { require('./db-events').emitStateEvent('pull_requests'); } catch { /* optional */ }
4768
4826
  },
@@ -5158,6 +5216,7 @@ module.exports = {
5158
5216
  gitEnv,
5159
5217
  parseStreamJsonOutput,
5160
5218
  KB_CATEGORIES,
5219
+ KB_READABLE_CATEGORIES,
5161
5220
  classifyInboxItem,
5162
5221
  ENGINE_DEFAULTS,
5163
5222
  resolveAgentCli, resolveCcCli, resolveCcUseWorkerPool, resolveAgentModel, resolveCcModel,
package/engine.js CHANGED
@@ -5021,6 +5021,88 @@ async function discoverFromPrs(config, project) {
5021
5021
  return newWork;
5022
5022
  }
5023
5023
 
5024
+ /**
5025
+ * P-f9a2e1b4 — Compute runner_brief / runner_execute_brief / test_file for
5026
+ * QA Session DRAFT and EXECUTE dispatches.
5027
+ *
5028
+ * Lazy-requires `./engine/qa-sessions`, `./engine/qa-runners`, and
5029
+ * `./engine/managed-spawn` so non-QA dispatches don't pay the load cost
5030
+ * and so test isolation (createTestMinionsDir → ISOLATED_MODULES) gets a
5031
+ * fresh module instance per test.
5032
+ *
5033
+ * Returns `{ runner_brief: '', runner_execute_brief: '', test_file: '' }`
5034
+ * for:
5035
+ * - non-QA-session items (no item.meta.sessionId)
5036
+ * - SETUP phase (the SETUP playbook doesn't read these vars; the runner
5037
+ * adapter contract is N/A until the managed-spawn is healthy)
5038
+ * - any failure inside the lookup chain (session missing, runner
5039
+ * missing, spawn missing) — failures are surfaced via a WARN log so
5040
+ * the render still succeeds and the playbook's empty-brief failure
5041
+ * path catches it.
5042
+ */
5043
+ function _buildRunnerBriefVars(item, project) {
5044
+ const empty = { runner_brief: '', runner_execute_brief: '', test_file: '' };
5045
+ const meta = item && item.meta;
5046
+ if (!meta || !meta.sessionId) return empty;
5047
+ const phase = meta.sessionPhase;
5048
+ if (phase !== 'draft' && phase !== 'execute') return empty;
5049
+ try {
5050
+ const qaSessions = require('./engine/qa-sessions');
5051
+ const qaRunners = require('./engine/qa-runners');
5052
+ const managedSpawn = require('./engine/managed-spawn');
5053
+ const session = qaSessions.getSession(meta.sessionId);
5054
+ if (!session) {
5055
+ log('warn', `qa-session render: session ${meta.sessionId} not found — runner brief empty`);
5056
+ return empty;
5057
+ }
5058
+ const target = (meta.qaSession && meta.qaSession.target) || session.spec.target || {};
5059
+ const explicit = (meta.qaSession && meta.qaSession.runner) || session.spec.runner || '';
5060
+ const runner = qaRunners.detectRunner(target, project || null, explicit);
5061
+ if (!runner) {
5062
+ log('warn', `qa-session render: no runner detected for session ${meta.sessionId} (target.kind=${target.kind}, explicit=${explicit || 'none'}) — runner brief empty`);
5063
+ return empty;
5064
+ }
5065
+ // Live managed-spawn snapshot (port / base_url / health). listManagedSpecs()
5066
+ // returns [] when the state file is missing or unreadable; defensive
5067
+ // filter is fine here.
5068
+ let spawnInfo = null;
5069
+ try {
5070
+ const specs = managedSpawn.listManagedSpecs();
5071
+ spawnInfo = (specs || []).find(s => s && s.name === session.managedSpawnName) || null;
5072
+ } catch (spawnErr) {
5073
+ log('warn', `qa-session render: managed-spawn lookup failed for ${session.managedSpawnName}: ${spawnErr.message}`);
5074
+ }
5075
+ const briefOpts = {
5076
+ session,
5077
+ sessionId: session.id,
5078
+ spawnInfo,
5079
+ flowsRaw: (meta.qaSession && meta.qaSession.flowsRaw) || session.spec.flowsRaw || '',
5080
+ capture: (meta.qaSession && meta.qaSession.capture) || session.spec.capture || {},
5081
+ testFile: session.testFile || null,
5082
+ };
5083
+ const out = { runner_brief: '', runner_execute_brief: '', test_file: session.testFile || '' };
5084
+ if (phase === 'draft') {
5085
+ try {
5086
+ const brief = runner.generateBrief(briefOpts);
5087
+ out.runner_brief = typeof brief === 'string' ? brief : (brief && brief.markdown) || '';
5088
+ } catch (briefErr) {
5089
+ log('warn', `qa-session render: runner ${runner.name} generateBrief threw: ${briefErr.message}`);
5090
+ }
5091
+ } else if (phase === 'execute') {
5092
+ try {
5093
+ const brief = runner.executeBrief(briefOpts);
5094
+ out.runner_execute_brief = typeof brief === 'string' ? brief : (brief && brief.markdown) || '';
5095
+ } catch (briefErr) {
5096
+ log('warn', `qa-session render: runner ${runner.name} executeBrief threw: ${briefErr.message}`);
5097
+ }
5098
+ }
5099
+ return out;
5100
+ } catch (err) {
5101
+ log('warn', `qa-session render: _buildRunnerBriefVars failed for ${meta.sessionId} (${phase}): ${err.message}`);
5102
+ return empty;
5103
+ }
5104
+ }
5105
+
5024
5106
  /**
5025
5107
  * Scan work-items.json for manually queued tasks
5026
5108
  */
@@ -5079,6 +5161,64 @@ function renderProjectWorkItemPromptForAgent(item, workType, agentId, config, pr
5079
5161
  qa_artifacts_dir: item.meta && item.meta.qaRunId
5080
5162
  ? path.posix.join('engine', 'qa-artifacts', String(item.meta.qaRunId))
5081
5163
  : '',
5164
+ // P-e6b3c2d8 — QA Session template vars. The qa-sessions chain helpers
5165
+ // (engine/qa-sessions.js#_baseWorkItem) stamp meta.sessionId,
5166
+ // meta.sessionPhase, and meta.qaSession.{target,flowsRaw,mode,capture,runner}
5167
+ // on each SETUP/DRAFT/EXECUTE WI; renderProjectWorkItemPromptForAgent
5168
+ // surfaces them as named template vars so the qa-session-* playbooks
5169
+ // can reference them by literal {{name}} without re-resolving from
5170
+ // item.meta. Only target.kind === <X> populates target_<X>; the rest
5171
+ // resolve to empty strings (filtered out of unresolved-var warnings via
5172
+ // PLAYBOOK_OPTIONAL_VARS).
5173
+ session_id: (item.meta && item.meta.sessionId) || '',
5174
+ session_phase: (item.meta && item.meta.sessionPhase) || '',
5175
+ managed_spawn_name: item.meta && item.meta.sessionId
5176
+ ? 'qa-session-' + String(item.meta.sessionId)
5177
+ : '',
5178
+ target_kind: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind) || '',
5179
+ target_pr_id: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'pr'
5180
+ ? String(item.meta.qaSession.target.prId || '')
5181
+ : ''),
5182
+ target_branch: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'branch'
5183
+ ? String(item.meta.qaSession.target.branch || '')
5184
+ : ''),
5185
+ target_sha: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'commit'
5186
+ ? String(item.meta.qaSession.target.sha || '')
5187
+ : ''),
5188
+ target_worktree: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'current'
5189
+ ? String(item.meta.qaSession.target.worktree || '')
5190
+ : ''),
5191
+ target_json: (item.meta && item.meta.qaSession && item.meta.qaSession.target)
5192
+ ? JSON.stringify(item.meta.qaSession.target)
5193
+ : '',
5194
+ flows_raw: (item.meta && item.meta.qaSession && item.meta.qaSession.flowsRaw) || '',
5195
+ runner_hint: (item.meta && item.meta.qaSession && item.meta.qaSession.runner) || '',
5196
+ capture: (item.meta && item.meta.qaSession && item.meta.qaSession.capture)
5197
+ ? Object.entries(item.meta.qaSession.capture)
5198
+ .filter(([, v]) => !!v)
5199
+ .map(([k]) => k)
5200
+ .join(',')
5201
+ : '',
5202
+ session_mode: (item.meta && item.meta.qaSession && item.meta.qaSession.mode) || '',
5203
+ // P-f9a2e1b4 — Runner adapter briefs. The DRAFT playbook consumes
5204
+ // {{runner_brief}} (runner.generateBrief() output); EXECUTE consumes
5205
+ // {{runner_execute_brief}} (runner.executeBrief() output) plus
5206
+ // {{test_file}} (session.testFile, set after DRAFT). For non-QA-session
5207
+ // items and for the SETUP phase, all three resolve to empty strings;
5208
+ // PLAYBOOK_OPTIONAL_VARS keeps them out of unresolved-var warnings.
5209
+ //
5210
+ // We lazy-require qa-sessions + qa-runners + managed-spawn so non-QA
5211
+ // dispatches don't pay the load cost, and so test isolation (which
5212
+ // busts these modules from require.cache via createTestMinionsDir →
5213
+ // ISOLATED_MODULES) picks up a fresh module instance per test.
5214
+ //
5215
+ // Defensive failure mode: any throw inside the brief computation
5216
+ // resolves to an empty string and surfaces as a warn log. Renders
5217
+ // must never blow up because a runner adapter misbehaved — the agent
5218
+ // gets a "no runner brief available" cue and reports a setup
5219
+ // failure via the qa-session-draft-failed / qa-session-execute-failed
5220
+ // path. (See playbooks/qa-session-draft.md → "Failure path" section.)
5221
+ ..._buildRunnerBriefVars(item, project),
5082
5222
  };
5083
5223
  const cpResult = buildWorkItemDispatchVars(item, vars, config, {
5084
5224
  worktreePath: vars.worktree_path || root,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yemi33/minions",
3
- "version": "0.1.2071",
3
+ "version": "0.1.2072",
4
4
  "description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
5
5
  "bin": {
6
6
  "minions": "bin/minions.js"
@@ -0,0 +1,158 @@
1
+ ---
2
+ requiresProjectContext: true
3
+ ---
4
+
5
+ # Playbook: QA Session DRAFT
6
+
7
+ You are {{agent_name}}, the {{agent_role}} on the {{project_name}} project.
8
+ TEAM ROOT: {{team_root}}
9
+
10
+ ## Your Task
11
+
12
+ QA Session **DRAFT** phase for session **{{session_id}}** (work item {{item_id}}).
13
+
14
+ A user asked Minions to QA the following target and flows; the SETUP phase
15
+ has already resolved the target into a worktree and the engine has spawned
16
+ the dev-up command as a managed-spawn. Your job is to translate the
17
+ natural-language flows into a runner-native test file.
18
+
19
+ - **Session id:** `{{session_id}}`
20
+ - **Session phase:** `{{session_phase}}`
21
+ - **Target kind:** `{{target_kind}}`
22
+ - **Target PR id:** `{{target_pr_id}}`
23
+ - **Target branch:** `{{target_branch}}`
24
+ - **Target commit SHA:** `{{target_sha}}`
25
+ - **Target worktree (kind=current):** `{{target_worktree}}`
26
+ - **Raw target JSON:** `{{target_json}}`
27
+ - **Flows (natural language):** {{flows_raw}}
28
+ - **Runner hint (optional explicit runner):** `{{runner_hint}}`
29
+ - **Capture:** `{{capture}}`
30
+ - **Mode:** `{{session_mode}}`
31
+ - **Managed-spawn target:** `{{managed_spawn_name}}` (live — query
32
+ `http://localhost:7331/api/managed-processes/by-name?name={{managed_spawn_name}}`
33
+ for the freshest port / base URL / health).
34
+
35
+ {{additional_context}}
36
+
37
+ ## What "qa-session-draft" means
38
+
39
+ A `qa-session-draft` task is the **second** of three chained work items the
40
+ engine dispatches for each QA Session (SETUP → DRAFT → EXECUTE). The SETUP
41
+ agent already produced a managed-spawn sidecar and the engine spawned the
42
+ dev-up command; the EXECUTE agent will run your drafted test against that
43
+ live spawn. Your only deliverable is the **test file itself**, written in
44
+ the runner's native format under
45
+ `engine/qa-tests/{{session_id}}/` (relative to the Minions root).
46
+
47
+ The engine resolved a concrete **runner adapter** for this session
48
+ (Playwright, Maestro, or a project plugin) and its `generateBrief()` hook
49
+ already produced the precise authoring instructions you need. Read the
50
+ runner brief below, then implement exactly the file it describes.
51
+
52
+ ### Runner brief
53
+
54
+ {{runner_brief}}
55
+
56
+ ### Reporting the test file path
57
+
58
+ When you exit, your completion JSON MUST include a `testFile` field with
59
+ the **relative path inside `engine/qa-tests/{{session_id}}/`** of the file
60
+ you wrote (e.g. `test.spec.js`, `flow.yaml`). The engine reads this and
61
+ stores it on the session record so the EXECUTE prompt can reference it
62
+ directly. Without `testFile`, EXECUTE falls back to a generic
63
+ `test.<ext>` hint and the agent may pick the wrong file.
64
+
65
+ Example:
66
+
67
+ ```json
68
+ {
69
+ "status": "success",
70
+ "summary": "Drafted Playwright spec covering login + redirect flow",
71
+ "testFile": "test.spec.js",
72
+ "nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
73
+ "artifacts": [
74
+ { "type": "file", "path": "engine/qa-tests/{{session_id}}/test.spec.js", "title": "Drafted Playwright spec" }
75
+ ]
76
+ }
77
+ ```
78
+
79
+ ## No PR, no commit
80
+
81
+ `qa-session-draft` is a test-authoring task. **Do not**:
82
+
83
+ - commit, push, or open a pull request — sessions are tracked by the
84
+ session record, not a merged PR
85
+ - modify project source — the only file you should write is the test
86
+ file under `engine/qa-tests/{{session_id}}/`
87
+ - start the managed-spawn yourself — it is already running; query
88
+ `/api/managed-processes/by-name?name={{managed_spawn_name}}` for the
89
+ live port / base URL / health snapshot
90
+
91
+ ## Failure path (REQUIRED)
92
+
93
+ If the runner brief is empty (no runner could be detected and none was
94
+ specified), if you cannot translate the flows into a runner-native file,
95
+ or if the managed-spawn is not healthy enough to draft against, **do not
96
+ write a partial test file**. Instead, write your completion report with:
97
+
98
+ ```json
99
+ {
100
+ "status": "failed",
101
+ "summary": "<one-line human-readable explanation of what blocked DRAFT>",
102
+ "failure_class": "qa-session-draft-failed",
103
+ "retryable": false,
104
+ "needs_rerun": false,
105
+ "nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
106
+ "artifacts": []
107
+ }
108
+ ```
109
+
110
+ The `engine/qa-sessions.js#handleDraftComplete` hook reads `failure_class`
111
+ and the summary, transitions the session to `failed`, and surfaces the
112
+ explanation in the dashboard session card so the human knows exactly why
113
+ DRAFT gave up.
114
+
115
+ Examples of legitimate failure summaries:
116
+
117
+ - `"No QA runner detected and none specified — install Playwright or Maestro and re-run with runner=<name>."`
118
+ - `"Flows reference a feature that does not exist in the spawn (e.g. /admin route returns 404)."`
119
+ - `"Managed-spawn {{managed_spawn_name}} not healthy — base URL unreachable from the agent."`
120
+
121
+ ## Working directory
122
+
123
+ ```bash
124
+ # PowerShell
125
+ echo $env:MINIONS_AGENT_CWD
126
+ pwd
127
+
128
+ # bash/zsh
129
+ echo "$MINIONS_AGENT_CWD"
130
+ pwd
131
+ ```
132
+
133
+ `MINIONS_AGENT_CWD` is the engine-resolved worktree root. Prefer it over
134
+ `pwd` for any cwd-sensitive command. The test file path is **relative to
135
+ the Minions root**, not the project worktree — write to
136
+ `<MINIONS_ROOT>/engine/qa-tests/{{session_id}}/`. The Minions root is the
137
+ parent of the project worktree (one level above `MINIONS_AGENT_CWD` for
138
+ project-scoped sessions; equal to `MINIONS_AGENT_CWD` for central
139
+ sessions).
140
+
141
+ ## Findings
142
+
143
+ Write findings to `{{team_root}}/notes/inbox/{{agent_id}}-{{item_id}}-{{date}}.md`
144
+ only after successful completion. Include:
145
+
146
+ - Session id + target summary
147
+ - Runner adapter chosen
148
+ - Test file path + line count
149
+ - Notes for future drafts on the same project (flaky selectors, env-vars
150
+ needed, runner gotchas)
151
+
152
+ ## Constraints
153
+
154
+ - Do not modify production code unless explicitly asked.
155
+ - Do not remove worktrees; the engine handles cleanup automatically.
156
+ - Do not start or restart the managed-spawn — the engine owns it.
157
+ - The test file is the deliverable — without it (or without a `testFile`
158
+ pointer in completion JSON), the EXECUTE phase has nothing to run.
@@ -0,0 +1,165 @@
1
+ ---
2
+ requiresProjectContext: true
3
+ ---
4
+
5
+ # Playbook: QA Session EXECUTE
6
+
7
+ You are {{agent_name}}, the {{agent_role}} on the {{project_name}} project.
8
+ TEAM ROOT: {{team_root}}
9
+
10
+ ## Your Task
11
+
12
+ QA Session **EXECUTE** phase for session **{{session_id}}** (work item {{item_id}}).
13
+
14
+ The SETUP and DRAFT phases have already finished: the engine spawned the
15
+ dev-up command as a managed-spawn, and the DRAFT agent wrote a
16
+ runner-native test file under `engine/qa-tests/{{session_id}}/`. Your
17
+ job is to **invoke that test against the live managed-spawn**, capture
18
+ the configured artifacts, and write the result sidecar the engine
19
+ ingests.
20
+
21
+ - **Session id:** `{{session_id}}`
22
+ - **Session phase:** `{{session_phase}}`
23
+ - **Managed-spawn target:** `{{managed_spawn_name}}` (live — query
24
+ `http://localhost:7331/api/managed-processes/by-name?name={{managed_spawn_name}}`
25
+ for the freshest port / base URL / health).
26
+ - **Test file (relative to `engine/qa-tests/{{session_id}}/`):** `{{test_file}}`
27
+ - **Flows (for context):** {{flows_raw}}
28
+ - **Runner hint (optional explicit runner):** `{{runner_hint}}`
29
+ - **Capture:** `{{capture}}`
30
+ - **Mode:** `{{session_mode}}`
31
+ - **qa-runs record id (use this in the sidecar's `runId` field):** `{{qa_run_id}}`
32
+
33
+ {{additional_context}}
34
+
35
+ ## What "qa-session-execute" means
36
+
37
+ A `qa-session-execute` task is the **third** of three chained work items
38
+ the engine dispatches for each QA Session (SETUP → DRAFT → EXECUTE). The
39
+ engine resolved the same runner adapter the DRAFT phase used; its
40
+ `executeBrief()` hook produced the precise invocation command + flags
41
+ below.
42
+
43
+ ### Runner execute brief
44
+
45
+ {{runner_execute_brief}}
46
+
47
+ ### Result sidecar (REQUIRED)
48
+
49
+ Before exit, write the result sidecar at
50
+ `agents/{{agent_id}}/qa-run-result.json` with this exact shape:
51
+
52
+ ```json
53
+ {
54
+ "runId": "{{qa_run_id}}",
55
+ "status": "passed",
56
+ "summary": "1 sentence rollup the dashboard will render",
57
+ "artifacts": [
58
+ {
59
+ "type": "screenshot",
60
+ "path": "engine/qa-artifacts/{{session_id}}/01-login-form.png",
61
+ "label": "Login form rendered",
62
+ "capturedAt": "2026-05-20T20:42:00.000Z"
63
+ }
64
+ ]
65
+ }
66
+ ```
67
+
68
+ Valid `status` values:
69
+
70
+ - `passed` — every step in the drafted test ran green and every required
71
+ capture artifact was produced.
72
+ - `failed` — at least one assertion failed. Still write the sidecar with
73
+ whatever artifacts you captured plus the failing-step summary.
74
+ - `errored` — the runner itself crashed or the managed-spawn went
75
+ unreachable mid-run (use this sparingly — distinguishes infra failure
76
+ from real product-level failure).
77
+
78
+ The engine consumes this sidecar in `engine/lifecycle.js` and calls
79
+ `qaRuns.completeRun({{qa_run_id}}, …)`. **If the sidecar is missing when
80
+ you exit, the engine marks the run `errored`** — always write it, even on
81
+ bail-out.
82
+
83
+ The `engine/qa-sessions.js#handleExecuteComplete` hook then reads the
84
+ qa-runs terminal status and transitions the session to `done` / `failed`
85
+ accordingly.
86
+
87
+ ## No PR, no commit
88
+
89
+ `qa-session-execute` is a verification task. **Do not**:
90
+
91
+ - commit, push, or open a pull request — sessions are tracked by the
92
+ session record + qa-runs record, not a merged PR
93
+ - modify project source — if a test step requires a code change, stop,
94
+ leave changes uncommitted, and document the gap in the result summary
95
+ - start or restart the managed-spawn — the engine owns it
96
+ - modify the drafted test file — re-drafting belongs to the DRAFT phase
97
+ (the human invokes it via POST `/api/qa/sessions/<id>/edit`)
98
+
99
+ ## Failure path (REQUIRED)
100
+
101
+ If the managed-spawn is unhealthy, the runner CLI is missing, or you
102
+ cannot even attempt the test invocation, **do not silently exit
103
+ green**. Write:
104
+
105
+ ```json
106
+ {
107
+ "status": "failed",
108
+ "summary": "<one-line human-readable explanation of what blocked EXECUTE>",
109
+ "failure_class": "qa-session-execute-failed",
110
+ "retryable": false,
111
+ "needs_rerun": false,
112
+ "nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
113
+ "artifacts": []
114
+ }
115
+ ```
116
+
117
+ …AND write a matching `qa-run-result.json` sidecar with `status: "errored"`
118
+ so the qa-runs record terminalizes correctly. The session will transition
119
+ to `failed` with `failureClass: qa-session-execute-failed`.
120
+
121
+ ## Working directory
122
+
123
+ ```bash
124
+ # PowerShell
125
+ echo $env:MINIONS_AGENT_CWD
126
+ pwd
127
+
128
+ # bash/zsh
129
+ echo "$MINIONS_AGENT_CWD"
130
+ pwd
131
+ ```
132
+
133
+ `MINIONS_AGENT_CWD` is the engine-resolved worktree root. Prefer it over
134
+ `pwd` for any cwd-sensitive command. The test file path is **relative to
135
+ the Minions root**: full path is
136
+ `<MINIONS_ROOT>/engine/qa-tests/{{session_id}}/{{test_file}}`. Capture
137
+ artifacts to `<MINIONS_ROOT>/engine/qa-artifacts/{{session_id}}/`.
138
+
139
+ ## Long-Running Commands
140
+
141
+ Playwright runs, Maestro flows, and webdriver waits can be silent for
142
+ minutes. Run the normal CLI commands and wait for them to finish; do not
143
+ add progress pings or extra logging just to keep the engine active.
144
+
145
+ ## Findings
146
+
147
+ Write findings to `{{team_root}}/notes/inbox/{{agent_id}}-{{item_id}}-{{date}}.md`
148
+ only after successful completion. Include:
149
+
150
+ - Session id + target summary
151
+ - Test file + runner adapter
152
+ - Per-step pass/fail
153
+ - Artifact paths (relative to `{{team_root}}`)
154
+ - Notes for the next EXECUTE on the same target (flaky selectors, env
155
+ quirks, runner gotchas)
156
+
157
+ ## Constraints
158
+
159
+ - Do not modify production code unless explicitly asked.
160
+ - Do not remove worktrees; the engine handles cleanup automatically.
161
+ - Do not start or restart the managed-spawn — the engine owns it.
162
+ - Always emit the `qa-run-result.json` sidecar before exit — even a
163
+ single-field
164
+ `{"runId": "{{qa_run_id}}", "status": "errored", "summary": "...", "artifacts": []}`
165
+ is better than an absent file.