@yemi33/minions 0.1.2071 → 0.1.2073
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/js/qa.js +358 -0
- package/dashboard/js/state.js +2 -1
- package/dashboard/pages/qa.html +72 -0
- package/dashboard/styles.css +102 -0
- package/dashboard.js +410 -6
- package/docs/README.md +1 -0
- package/docs/auto-discovery.md +2 -1
- package/docs/kb-sweep.md +8 -0
- package/docs/qa-runbook-lifecycle.md +232 -0
- package/engine/cleanup.js +4 -1
- package/engine/comment-classifier.js +8 -1
- package/engine/cooldown.js +6 -2
- package/engine/db/migrations/007-watches.js +95 -0
- package/engine/gh-comment.js +74 -3
- package/engine/lifecycle.js +100 -0
- package/engine/metrics-store.js +0 -0
- package/engine/pipeline.js +9 -1
- package/engine/playbook.js +39 -0
- package/engine/pull-requests-store.js +30 -22
- package/engine/qa-runners/maestro.js +152 -0
- package/engine/qa-runners/playwright.js +149 -0
- package/engine/qa-runners.js +323 -0
- package/engine/qa-sessions.js +1008 -0
- package/engine/shared.js +109 -13
- package/engine/watches-store.js +259 -0
- package/engine/watches.js +12 -16
- package/engine/work-items-store.js +33 -35
- package/engine.js +140 -0
- package/package.json +1 -1
- package/playbooks/qa-session-draft.md +158 -0
- package/playbooks/qa-session-execute.md +165 -0
- package/playbooks/qa-session-setup.md +154 -0
- package/prompts/cc-system.md +43 -0
- package/routing.md +3 -0
package/engine.js
CHANGED
|
@@ -5021,6 +5021,88 @@ async function discoverFromPrs(config, project) {
|
|
|
5021
5021
|
return newWork;
|
|
5022
5022
|
}
|
|
5023
5023
|
|
|
5024
|
+
/**
|
|
5025
|
+
* P-f9a2e1b4 — Compute runner_brief / runner_execute_brief / test_file for
|
|
5026
|
+
* QA Session DRAFT and EXECUTE dispatches.
|
|
5027
|
+
*
|
|
5028
|
+
* Lazy-requires `./engine/qa-sessions`, `./engine/qa-runners`, and
|
|
5029
|
+
* `./engine/managed-spawn` so non-QA dispatches don't pay the load cost
|
|
5030
|
+
* and so test isolation (createTestMinionsDir → ISOLATED_MODULES) gets a
|
|
5031
|
+
* fresh module instance per test.
|
|
5032
|
+
*
|
|
5033
|
+
* Returns `{ runner_brief: '', runner_execute_brief: '', test_file: '' }`
|
|
5034
|
+
* for:
|
|
5035
|
+
* - non-QA-session items (no item.meta.sessionId)
|
|
5036
|
+
* - SETUP phase (the SETUP playbook doesn't read these vars; the runner
|
|
5037
|
+
* adapter contract is N/A until the managed-spawn is healthy)
|
|
5038
|
+
* - any failure inside the lookup chain (session missing, runner
|
|
5039
|
+
* missing, spawn missing) — failures are surfaced via a WARN log so
|
|
5040
|
+
* the render still succeeds and the playbook's empty-brief failure
|
|
5041
|
+
* path catches it.
|
|
5042
|
+
*/
|
|
5043
|
+
function _buildRunnerBriefVars(item, project) {
|
|
5044
|
+
const empty = { runner_brief: '', runner_execute_brief: '', test_file: '' };
|
|
5045
|
+
const meta = item && item.meta;
|
|
5046
|
+
if (!meta || !meta.sessionId) return empty;
|
|
5047
|
+
const phase = meta.sessionPhase;
|
|
5048
|
+
if (phase !== 'draft' && phase !== 'execute') return empty;
|
|
5049
|
+
try {
|
|
5050
|
+
const qaSessions = require('./engine/qa-sessions');
|
|
5051
|
+
const qaRunners = require('./engine/qa-runners');
|
|
5052
|
+
const managedSpawn = require('./engine/managed-spawn');
|
|
5053
|
+
const session = qaSessions.getSession(meta.sessionId);
|
|
5054
|
+
if (!session) {
|
|
5055
|
+
log('warn', `qa-session render: session ${meta.sessionId} not found — runner brief empty`);
|
|
5056
|
+
return empty;
|
|
5057
|
+
}
|
|
5058
|
+
const target = (meta.qaSession && meta.qaSession.target) || session.spec.target || {};
|
|
5059
|
+
const explicit = (meta.qaSession && meta.qaSession.runner) || session.spec.runner || '';
|
|
5060
|
+
const runner = qaRunners.detectRunner(target, project || null, explicit);
|
|
5061
|
+
if (!runner) {
|
|
5062
|
+
log('warn', `qa-session render: no runner detected for session ${meta.sessionId} (target.kind=${target.kind}, explicit=${explicit || 'none'}) — runner brief empty`);
|
|
5063
|
+
return empty;
|
|
5064
|
+
}
|
|
5065
|
+
// Live managed-spawn snapshot (port / base_url / health). listManagedSpecs()
|
|
5066
|
+
// returns [] when the state file is missing or unreadable; defensive
|
|
5067
|
+
// filter is fine here.
|
|
5068
|
+
let spawnInfo = null;
|
|
5069
|
+
try {
|
|
5070
|
+
const specs = managedSpawn.listManagedSpecs();
|
|
5071
|
+
spawnInfo = (specs || []).find(s => s && s.name === session.managedSpawnName) || null;
|
|
5072
|
+
} catch (spawnErr) {
|
|
5073
|
+
log('warn', `qa-session render: managed-spawn lookup failed for ${session.managedSpawnName}: ${spawnErr.message}`);
|
|
5074
|
+
}
|
|
5075
|
+
const briefOpts = {
|
|
5076
|
+
session,
|
|
5077
|
+
sessionId: session.id,
|
|
5078
|
+
spawnInfo,
|
|
5079
|
+
flowsRaw: (meta.qaSession && meta.qaSession.flowsRaw) || session.spec.flowsRaw || '',
|
|
5080
|
+
capture: (meta.qaSession && meta.qaSession.capture) || session.spec.capture || {},
|
|
5081
|
+
testFile: session.testFile || null,
|
|
5082
|
+
};
|
|
5083
|
+
const out = { runner_brief: '', runner_execute_brief: '', test_file: session.testFile || '' };
|
|
5084
|
+
if (phase === 'draft') {
|
|
5085
|
+
try {
|
|
5086
|
+
const brief = runner.generateBrief(briefOpts);
|
|
5087
|
+
out.runner_brief = typeof brief === 'string' ? brief : (brief && brief.markdown) || '';
|
|
5088
|
+
} catch (briefErr) {
|
|
5089
|
+
log('warn', `qa-session render: runner ${runner.name} generateBrief threw: ${briefErr.message}`);
|
|
5090
|
+
}
|
|
5091
|
+
} else if (phase === 'execute') {
|
|
5092
|
+
try {
|
|
5093
|
+
const brief = runner.executeBrief(briefOpts);
|
|
5094
|
+
out.runner_execute_brief = typeof brief === 'string' ? brief : (brief && brief.markdown) || '';
|
|
5095
|
+
} catch (briefErr) {
|
|
5096
|
+
log('warn', `qa-session render: runner ${runner.name} executeBrief threw: ${briefErr.message}`);
|
|
5097
|
+
}
|
|
5098
|
+
}
|
|
5099
|
+
return out;
|
|
5100
|
+
} catch (err) {
|
|
5101
|
+
log('warn', `qa-session render: _buildRunnerBriefVars failed for ${meta.sessionId} (${phase}): ${err.message}`);
|
|
5102
|
+
return empty;
|
|
5103
|
+
}
|
|
5104
|
+
}
|
|
5105
|
+
|
|
5024
5106
|
/**
|
|
5025
5107
|
* Scan work-items.json for manually queued tasks
|
|
5026
5108
|
*/
|
|
@@ -5079,6 +5161,64 @@ function renderProjectWorkItemPromptForAgent(item, workType, agentId, config, pr
|
|
|
5079
5161
|
qa_artifacts_dir: item.meta && item.meta.qaRunId
|
|
5080
5162
|
? path.posix.join('engine', 'qa-artifacts', String(item.meta.qaRunId))
|
|
5081
5163
|
: '',
|
|
5164
|
+
// P-e6b3c2d8 — QA Session template vars. The qa-sessions chain helpers
|
|
5165
|
+
// (engine/qa-sessions.js#_baseWorkItem) stamp meta.sessionId,
|
|
5166
|
+
// meta.sessionPhase, and meta.qaSession.{target,flowsRaw,mode,capture,runner}
|
|
5167
|
+
// on each SETUP/DRAFT/EXECUTE WI; renderProjectWorkItemPromptForAgent
|
|
5168
|
+
// surfaces them as named template vars so the qa-session-* playbooks
|
|
5169
|
+
// can reference them by literal {{name}} without re-resolving from
|
|
5170
|
+
// item.meta. Only target.kind === <X> populates target_<X>; the rest
|
|
5171
|
+
// resolve to empty strings (filtered out of unresolved-var warnings via
|
|
5172
|
+
// PLAYBOOK_OPTIONAL_VARS).
|
|
5173
|
+
session_id: (item.meta && item.meta.sessionId) || '',
|
|
5174
|
+
session_phase: (item.meta && item.meta.sessionPhase) || '',
|
|
5175
|
+
managed_spawn_name: item.meta && item.meta.sessionId
|
|
5176
|
+
? 'qa-session-' + String(item.meta.sessionId)
|
|
5177
|
+
: '',
|
|
5178
|
+
target_kind: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind) || '',
|
|
5179
|
+
target_pr_id: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'pr'
|
|
5180
|
+
? String(item.meta.qaSession.target.prId || '')
|
|
5181
|
+
: ''),
|
|
5182
|
+
target_branch: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'branch'
|
|
5183
|
+
? String(item.meta.qaSession.target.branch || '')
|
|
5184
|
+
: ''),
|
|
5185
|
+
target_sha: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'commit'
|
|
5186
|
+
? String(item.meta.qaSession.target.sha || '')
|
|
5187
|
+
: ''),
|
|
5188
|
+
target_worktree: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'current'
|
|
5189
|
+
? String(item.meta.qaSession.target.worktree || '')
|
|
5190
|
+
: ''),
|
|
5191
|
+
target_json: (item.meta && item.meta.qaSession && item.meta.qaSession.target)
|
|
5192
|
+
? JSON.stringify(item.meta.qaSession.target)
|
|
5193
|
+
: '',
|
|
5194
|
+
flows_raw: (item.meta && item.meta.qaSession && item.meta.qaSession.flowsRaw) || '',
|
|
5195
|
+
runner_hint: (item.meta && item.meta.qaSession && item.meta.qaSession.runner) || '',
|
|
5196
|
+
capture: (item.meta && item.meta.qaSession && item.meta.qaSession.capture)
|
|
5197
|
+
? Object.entries(item.meta.qaSession.capture)
|
|
5198
|
+
.filter(([, v]) => !!v)
|
|
5199
|
+
.map(([k]) => k)
|
|
5200
|
+
.join(',')
|
|
5201
|
+
: '',
|
|
5202
|
+
session_mode: (item.meta && item.meta.qaSession && item.meta.qaSession.mode) || '',
|
|
5203
|
+
// P-f9a2e1b4 — Runner adapter briefs. The DRAFT playbook consumes
|
|
5204
|
+
// {{runner_brief}} (runner.generateBrief() output); EXECUTE consumes
|
|
5205
|
+
// {{runner_execute_brief}} (runner.executeBrief() output) plus
|
|
5206
|
+
// {{test_file}} (session.testFile, set after DRAFT). For non-QA-session
|
|
5207
|
+
// items and for the SETUP phase, all three resolve to empty strings;
|
|
5208
|
+
// PLAYBOOK_OPTIONAL_VARS keeps them out of unresolved-var warnings.
|
|
5209
|
+
//
|
|
5210
|
+
// We lazy-require qa-sessions + qa-runners + managed-spawn so non-QA
|
|
5211
|
+
// dispatches don't pay the load cost, and so test isolation (which
|
|
5212
|
+
// busts these modules from require.cache via createTestMinionsDir →
|
|
5213
|
+
// ISOLATED_MODULES) picks up a fresh module instance per test.
|
|
5214
|
+
//
|
|
5215
|
+
// Defensive failure mode: any throw inside the brief computation
|
|
5216
|
+
// resolves to an empty string and surfaces as a warn log. Renders
|
|
5217
|
+
// must never blow up because a runner adapter misbehaved — the agent
|
|
5218
|
+
// gets a "no runner brief available" cue and reports a setup
|
|
5219
|
+
// failure via the qa-session-draft-failed / qa-session-execute-failed
|
|
5220
|
+
// path. (See playbooks/qa-session-draft.md → "Failure path" section.)
|
|
5221
|
+
..._buildRunnerBriefVars(item, project),
|
|
5082
5222
|
};
|
|
5083
5223
|
const cpResult = buildWorkItemDispatchVars(item, vars, config, {
|
|
5084
5224
|
worktreePath: vars.worktree_path || root,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2073",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
---
|
|
2
|
+
requiresProjectContext: true
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Playbook: QA Session DRAFT
|
|
6
|
+
|
|
7
|
+
You are {{agent_name}}, the {{agent_role}} on the {{project_name}} project.
|
|
8
|
+
TEAM ROOT: {{team_root}}
|
|
9
|
+
|
|
10
|
+
## Your Task
|
|
11
|
+
|
|
12
|
+
QA Session **DRAFT** phase for session **{{session_id}}** (work item {{item_id}}).
|
|
13
|
+
|
|
14
|
+
A user asked Minions to QA the following target and flows; the SETUP phase
|
|
15
|
+
has already resolved the target into a worktree and the engine has spawned
|
|
16
|
+
the dev-up command as a managed-spawn. Your job is to translate the
|
|
17
|
+
natural-language flows into a runner-native test file.
|
|
18
|
+
|
|
19
|
+
- **Session id:** `{{session_id}}`
|
|
20
|
+
- **Session phase:** `{{session_phase}}`
|
|
21
|
+
- **Target kind:** `{{target_kind}}`
|
|
22
|
+
- **Target PR id:** `{{target_pr_id}}`
|
|
23
|
+
- **Target branch:** `{{target_branch}}`
|
|
24
|
+
- **Target commit SHA:** `{{target_sha}}`
|
|
25
|
+
- **Target worktree (kind=current):** `{{target_worktree}}`
|
|
26
|
+
- **Raw target JSON:** `{{target_json}}`
|
|
27
|
+
- **Flows (natural language):** {{flows_raw}}
|
|
28
|
+
- **Runner hint (optional explicit runner):** `{{runner_hint}}`
|
|
29
|
+
- **Capture:** `{{capture}}`
|
|
30
|
+
- **Mode:** `{{session_mode}}`
|
|
31
|
+
- **Managed-spawn target:** `{{managed_spawn_name}}` (live — query
|
|
32
|
+
`http://localhost:7331/api/managed-processes/by-name?name={{managed_spawn_name}}`
|
|
33
|
+
for the freshest port / base URL / health).
|
|
34
|
+
|
|
35
|
+
{{additional_context}}
|
|
36
|
+
|
|
37
|
+
## What "qa-session-draft" means
|
|
38
|
+
|
|
39
|
+
A `qa-session-draft` task is the **second** of three chained work items the
|
|
40
|
+
engine dispatches for each QA Session (SETUP → DRAFT → EXECUTE). The SETUP
|
|
41
|
+
agent already produced a managed-spawn sidecar and the engine spawned the
|
|
42
|
+
dev-up command; the EXECUTE agent will run your drafted test against that
|
|
43
|
+
live spawn. Your only deliverable is the **test file itself**, written in
|
|
44
|
+
the runner's native format under
|
|
45
|
+
`engine/qa-tests/{{session_id}}/` (relative to the Minions root).
|
|
46
|
+
|
|
47
|
+
The engine resolved a concrete **runner adapter** for this session
|
|
48
|
+
(Playwright, Maestro, or a project plugin) and its `generateBrief()` hook
|
|
49
|
+
already produced the precise authoring instructions you need. Read the
|
|
50
|
+
runner brief below, then implement exactly the file it describes.
|
|
51
|
+
|
|
52
|
+
### Runner brief
|
|
53
|
+
|
|
54
|
+
{{runner_brief}}
|
|
55
|
+
|
|
56
|
+
### Reporting the test file path
|
|
57
|
+
|
|
58
|
+
When you exit, your completion JSON MUST include a `testFile` field with
|
|
59
|
+
the **relative path inside `engine/qa-tests/{{session_id}}/`** of the file
|
|
60
|
+
you wrote (e.g. `test.spec.js`, `flow.yaml`). The engine reads this and
|
|
61
|
+
stores it on the session record so the EXECUTE prompt can reference it
|
|
62
|
+
directly. Without `testFile`, EXECUTE falls back to a generic
|
|
63
|
+
`test.<ext>` hint and the agent may pick the wrong file.
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
|
|
67
|
+
```json
|
|
68
|
+
{
|
|
69
|
+
"status": "success",
|
|
70
|
+
"summary": "Drafted Playwright spec covering login + redirect flow",
|
|
71
|
+
"testFile": "test.spec.js",
|
|
72
|
+
"nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
|
|
73
|
+
"artifacts": [
|
|
74
|
+
{ "type": "file", "path": "engine/qa-tests/{{session_id}}/test.spec.js", "title": "Drafted Playwright spec" }
|
|
75
|
+
]
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## No PR, no commit
|
|
80
|
+
|
|
81
|
+
`qa-session-draft` is a test-authoring task. **Do not**:
|
|
82
|
+
|
|
83
|
+
- commit, push, or open a pull request — sessions are tracked by the
|
|
84
|
+
session record, not a merged PR
|
|
85
|
+
- modify project source — the only file you should write is the test
|
|
86
|
+
file under `engine/qa-tests/{{session_id}}/`
|
|
87
|
+
- start the managed-spawn yourself — it is already running; query
|
|
88
|
+
`/api/managed-processes/by-name?name={{managed_spawn_name}}` for the
|
|
89
|
+
live port / base URL / health snapshot
|
|
90
|
+
|
|
91
|
+
## Failure path (REQUIRED)
|
|
92
|
+
|
|
93
|
+
If the runner brief is empty (no runner could be detected and none was
|
|
94
|
+
specified), if you cannot translate the flows into a runner-native file,
|
|
95
|
+
or if the managed-spawn is not healthy enough to draft against, **do not
|
|
96
|
+
write a partial test file**. Instead, write your completion report with:
|
|
97
|
+
|
|
98
|
+
```json
|
|
99
|
+
{
|
|
100
|
+
"status": "failed",
|
|
101
|
+
"summary": "<one-line human-readable explanation of what blocked DRAFT>",
|
|
102
|
+
"failure_class": "qa-session-draft-failed",
|
|
103
|
+
"retryable": false,
|
|
104
|
+
"needs_rerun": false,
|
|
105
|
+
"nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
|
|
106
|
+
"artifacts": []
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
The `engine/qa-sessions.js#handleDraftComplete` hook reads `failure_class`
|
|
111
|
+
and the summary, transitions the session to `failed`, and surfaces the
|
|
112
|
+
explanation in the dashboard session card so the human knows exactly why
|
|
113
|
+
DRAFT gave up.
|
|
114
|
+
|
|
115
|
+
Examples of legitimate failure summaries:
|
|
116
|
+
|
|
117
|
+
- `"No QA runner detected and none specified — install Playwright or Maestro and re-run with runner=<name>."`
|
|
118
|
+
- `"Flows reference a feature that does not exist in the spawn (e.g. /admin route returns 404)."`
|
|
119
|
+
- `"Managed-spawn {{managed_spawn_name}} not healthy — base URL unreachable from the agent."`
|
|
120
|
+
|
|
121
|
+
## Working directory
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# PowerShell
|
|
125
|
+
echo $env:MINIONS_AGENT_CWD
|
|
126
|
+
pwd
|
|
127
|
+
|
|
128
|
+
# bash/zsh
|
|
129
|
+
echo "$MINIONS_AGENT_CWD"
|
|
130
|
+
pwd
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
`MINIONS_AGENT_CWD` is the engine-resolved worktree root. Prefer it over
|
|
134
|
+
`pwd` for any cwd-sensitive command. The test file path is **relative to
|
|
135
|
+
the Minions root**, not the project worktree — write to
|
|
136
|
+
`<MINIONS_ROOT>/engine/qa-tests/{{session_id}}/`. The Minions root is the
|
|
137
|
+
parent of the project worktree (one level above `MINIONS_AGENT_CWD` for
|
|
138
|
+
project-scoped sessions; equal to `MINIONS_AGENT_CWD` for central
|
|
139
|
+
sessions).
|
|
140
|
+
|
|
141
|
+
## Findings
|
|
142
|
+
|
|
143
|
+
Write findings to `{{team_root}}/notes/inbox/{{agent_id}}-{{item_id}}-{{date}}.md`
|
|
144
|
+
only after successful completion. Include:
|
|
145
|
+
|
|
146
|
+
- Session id + target summary
|
|
147
|
+
- Runner adapter chosen
|
|
148
|
+
- Test file path + line count
|
|
149
|
+
- Notes for future drafts on the same project (flaky selectors, env-vars
|
|
150
|
+
needed, runner gotchas)
|
|
151
|
+
|
|
152
|
+
## Constraints
|
|
153
|
+
|
|
154
|
+
- Do not modify production code unless explicitly asked.
|
|
155
|
+
- Do not remove worktrees; the engine handles cleanup automatically.
|
|
156
|
+
- Do not start or restart the managed-spawn — the engine owns it.
|
|
157
|
+
- The test file is the deliverable — without it (or without a `testFile`
|
|
158
|
+
pointer in completion JSON), the EXECUTE phase has nothing to run.
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
---
|
|
2
|
+
requiresProjectContext: true
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Playbook: QA Session EXECUTE
|
|
6
|
+
|
|
7
|
+
You are {{agent_name}}, the {{agent_role}} on the {{project_name}} project.
|
|
8
|
+
TEAM ROOT: {{team_root}}
|
|
9
|
+
|
|
10
|
+
## Your Task
|
|
11
|
+
|
|
12
|
+
QA Session **EXECUTE** phase for session **{{session_id}}** (work item {{item_id}}).
|
|
13
|
+
|
|
14
|
+
The SETUP and DRAFT phases have already finished: the engine spawned the
|
|
15
|
+
dev-up command as a managed-spawn, and the DRAFT agent wrote a
|
|
16
|
+
runner-native test file under `engine/qa-tests/{{session_id}}/`. Your
|
|
17
|
+
job is to **invoke that test against the live managed-spawn**, capture
|
|
18
|
+
the configured artifacts, and write the result sidecar the engine
|
|
19
|
+
ingests.
|
|
20
|
+
|
|
21
|
+
- **Session id:** `{{session_id}}`
|
|
22
|
+
- **Session phase:** `{{session_phase}}`
|
|
23
|
+
- **Managed-spawn target:** `{{managed_spawn_name}}` (live — query
|
|
24
|
+
`http://localhost:7331/api/managed-processes/by-name?name={{managed_spawn_name}}`
|
|
25
|
+
for the freshest port / base URL / health).
|
|
26
|
+
- **Test file (relative to `engine/qa-tests/{{session_id}}/`):** `{{test_file}}`
|
|
27
|
+
- **Flows (for context):** {{flows_raw}}
|
|
28
|
+
- **Runner hint (optional explicit runner):** `{{runner_hint}}`
|
|
29
|
+
- **Capture:** `{{capture}}`
|
|
30
|
+
- **Mode:** `{{session_mode}}`
|
|
31
|
+
- **qa-runs record id (use this in the sidecar's `runId` field):** `{{qa_run_id}}`
|
|
32
|
+
|
|
33
|
+
{{additional_context}}
|
|
34
|
+
|
|
35
|
+
## What "qa-session-execute" means
|
|
36
|
+
|
|
37
|
+
A `qa-session-execute` task is the **third** of three chained work items
|
|
38
|
+
the engine dispatches for each QA Session (SETUP → DRAFT → EXECUTE). The
|
|
39
|
+
engine resolved the same runner adapter the DRAFT phase used; its
|
|
40
|
+
`executeBrief()` hook produced the precise invocation command + flags
|
|
41
|
+
below.
|
|
42
|
+
|
|
43
|
+
### Runner execute brief
|
|
44
|
+
|
|
45
|
+
{{runner_execute_brief}}
|
|
46
|
+
|
|
47
|
+
### Result sidecar (REQUIRED)
|
|
48
|
+
|
|
49
|
+
Before exit, write the result sidecar at
|
|
50
|
+
`agents/{{agent_id}}/qa-run-result.json` with this exact shape:
|
|
51
|
+
|
|
52
|
+
```json
|
|
53
|
+
{
|
|
54
|
+
"runId": "{{qa_run_id}}",
|
|
55
|
+
"status": "passed",
|
|
56
|
+
"summary": "1 sentence rollup the dashboard will render",
|
|
57
|
+
"artifacts": [
|
|
58
|
+
{
|
|
59
|
+
"type": "screenshot",
|
|
60
|
+
"path": "engine/qa-artifacts/{{session_id}}/01-login-form.png",
|
|
61
|
+
"label": "Login form rendered",
|
|
62
|
+
"capturedAt": "2026-05-20T20:42:00.000Z"
|
|
63
|
+
}
|
|
64
|
+
]
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Valid `status` values:
|
|
69
|
+
|
|
70
|
+
- `passed` — every step in the drafted test ran green and every required
|
|
71
|
+
capture artifact was produced.
|
|
72
|
+
- `failed` — at least one assertion failed. Still write the sidecar with
|
|
73
|
+
whatever artifacts you captured plus the failing-step summary.
|
|
74
|
+
- `errored` — the runner itself crashed or the managed-spawn went
|
|
75
|
+
unreachable mid-run (use this sparingly — distinguishes infra failure
|
|
76
|
+
from real product-level failure).
|
|
77
|
+
|
|
78
|
+
The engine consumes this sidecar in `engine/lifecycle.js` and calls
|
|
79
|
+
`qaRuns.completeRun({{qa_run_id}}, …)`. **If the sidecar is missing when
|
|
80
|
+
you exit, the engine marks the run `errored`** — always write it, even on
|
|
81
|
+
bail-out.
|
|
82
|
+
|
|
83
|
+
The `engine/qa-sessions.js#handleExecuteComplete` hook then reads the
|
|
84
|
+
qa-runs terminal status and transitions the session to `done` / `failed`
|
|
85
|
+
accordingly.
|
|
86
|
+
|
|
87
|
+
## No PR, no commit
|
|
88
|
+
|
|
89
|
+
`qa-session-execute` is a verification task. **Do not**:
|
|
90
|
+
|
|
91
|
+
- commit, push, or open a pull request — sessions are tracked by the
|
|
92
|
+
session record + qa-runs record, not a merged PR
|
|
93
|
+
- modify project source — if a test step requires a code change, stop,
|
|
94
|
+
leave changes uncommitted, and document the gap in the result summary
|
|
95
|
+
- start or restart the managed-spawn — the engine owns it
|
|
96
|
+
- modify the drafted test file — re-drafting belongs to the DRAFT phase
|
|
97
|
+
(the human invokes it via POST `/api/qa/sessions/<id>/edit`)
|
|
98
|
+
|
|
99
|
+
## Failure path (REQUIRED)
|
|
100
|
+
|
|
101
|
+
If the managed-spawn is unhealthy, the runner CLI is missing, or you
|
|
102
|
+
cannot even attempt the test invocation, **do not silently exit
|
|
103
|
+
green**. Write:
|
|
104
|
+
|
|
105
|
+
```json
|
|
106
|
+
{
|
|
107
|
+
"status": "failed",
|
|
108
|
+
"summary": "<one-line human-readable explanation of what blocked EXECUTE>",
|
|
109
|
+
"failure_class": "qa-session-execute-failed",
|
|
110
|
+
"retryable": false,
|
|
111
|
+
"needs_rerun": false,
|
|
112
|
+
"nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
|
|
113
|
+
"artifacts": []
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
…AND write a matching `qa-run-result.json` sidecar with `status: "errored"`
|
|
118
|
+
so the qa-runs record terminalizes correctly. The session will transition
|
|
119
|
+
to `failed` with `failureClass: qa-session-execute-failed`.
|
|
120
|
+
|
|
121
|
+
## Working directory
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# PowerShell
|
|
125
|
+
echo $env:MINIONS_AGENT_CWD
|
|
126
|
+
pwd
|
|
127
|
+
|
|
128
|
+
# bash/zsh
|
|
129
|
+
echo "$MINIONS_AGENT_CWD"
|
|
130
|
+
pwd
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
`MINIONS_AGENT_CWD` is the engine-resolved worktree root. Prefer it over
|
|
134
|
+
`pwd` for any cwd-sensitive command. The test file path is **relative to
|
|
135
|
+
the Minions root**: full path is
|
|
136
|
+
`<MINIONS_ROOT>/engine/qa-tests/{{session_id}}/{{test_file}}`. Capture
|
|
137
|
+
artifacts to `<MINIONS_ROOT>/engine/qa-artifacts/{{session_id}}/`.
|
|
138
|
+
|
|
139
|
+
## Long-Running Commands
|
|
140
|
+
|
|
141
|
+
Playwright runs, Maestro flows, and webdriver waits can be silent for
|
|
142
|
+
minutes. Run the normal CLI commands and wait for them to finish; do not
|
|
143
|
+
add progress pings or extra logging just to keep the engine active.
|
|
144
|
+
|
|
145
|
+
## Findings
|
|
146
|
+
|
|
147
|
+
Write findings to `{{team_root}}/notes/inbox/{{agent_id}}-{{item_id}}-{{date}}.md`
|
|
148
|
+
only after successful completion. Include:
|
|
149
|
+
|
|
150
|
+
- Session id + target summary
|
|
151
|
+
- Test file + runner adapter
|
|
152
|
+
- Per-step pass/fail
|
|
153
|
+
- Artifact paths (relative to `{{team_root}}`)
|
|
154
|
+
- Notes for the next EXECUTE on the same target (flaky selectors, env
|
|
155
|
+
quirks, runner gotchas)
|
|
156
|
+
|
|
157
|
+
## Constraints
|
|
158
|
+
|
|
159
|
+
- Do not modify production code unless explicitly asked.
|
|
160
|
+
- Do not remove worktrees; the engine handles cleanup automatically.
|
|
161
|
+
- Do not start or restart the managed-spawn — the engine owns it.
|
|
162
|
+
- Always emit the `qa-run-result.json` sidecar before exit — even a
|
|
163
|
+
single-field
|
|
164
|
+
`{"runId": "{{qa_run_id}}", "status": "errored", "summary": "...", "artifacts": []}`
|
|
165
|
+
is better than an absent file.
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
---
|
|
2
|
+
requiresProjectContext: true
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Playbook: QA Session SETUP
|
|
6
|
+
|
|
7
|
+
You are {{agent_name}}, the {{agent_role}} on the {{project_name}} project.
|
|
8
|
+
TEAM ROOT: {{team_root}}
|
|
9
|
+
|
|
10
|
+
## Your Task
|
|
11
|
+
|
|
12
|
+
QA Session **SETUP** phase for session **{{session_id}}** (work item {{item_id}}).
|
|
13
|
+
|
|
14
|
+
A user asked Minions to QA the following target and flows:
|
|
15
|
+
|
|
16
|
+
- **Session id:** `{{session_id}}`
|
|
17
|
+
- **Target kind:** `{{target_kind}}`
|
|
18
|
+
- **Target PR id:** `{{target_pr_id}}`
|
|
19
|
+
- **Target branch:** `{{target_branch}}`
|
|
20
|
+
- **Target commit SHA:** `{{target_sha}}`
|
|
21
|
+
- **Target worktree (kind=current):** `{{target_worktree}}`
|
|
22
|
+
- **Raw target JSON:** `{{target_json}}`
|
|
23
|
+
- **Flows (natural language):** {{flows_raw}}
|
|
24
|
+
- **Runner hint (optional explicit runner):** `{{runner_hint}}`
|
|
25
|
+
- **Capture:** `{{capture}}`
|
|
26
|
+
- **Mode:** `{{session_mode}}`
|
|
27
|
+
|
|
28
|
+
{{additional_context}}
|
|
29
|
+
|
|
30
|
+
## What "qa-session-setup" means
|
|
31
|
+
|
|
32
|
+
A `qa-session-setup` task is the **first** of three chained work items the
|
|
33
|
+
engine dispatches for each QA Session (SETUP → DRAFT → EXECUTE). Your job is
|
|
34
|
+
to make the target runnable so the DRAFT and EXECUTE agents can drive a real
|
|
35
|
+
live instance:
|
|
36
|
+
|
|
37
|
+
1. **Resolve the target** (`{{target_kind}}`) into a checked-out worktree.
|
|
38
|
+
- `pr` → check out the PR's head branch (`{{target_pr_id}}`).
|
|
39
|
+
- `branch` → check out branch `{{target_branch}}`.
|
|
40
|
+
- `commit` → detach at `{{target_sha}}`.
|
|
41
|
+
- `current` → reuse the existing worktree at `{{target_worktree}}` (or
|
|
42
|
+
`MINIONS_AGENT_CWD` if `{{target_worktree}}` is empty).
|
|
43
|
+
2. **Inspect the codebase** to find a single "dev-up" command. Look in this
|
|
44
|
+
order: `package.json` `scripts.dev|start|serve`, top-level `Procfile`,
|
|
45
|
+
project README "Run locally / Getting Started" section, a `Makefile` `dev`
|
|
46
|
+
target, or a docker-compose service that exposes an HTTP port. Pick the
|
|
47
|
+
smallest command that brings the app up and binds to a TCP port.
|
|
48
|
+
3. **Write the managed-spawn sidecar** to
|
|
49
|
+
`agents/{{agent_id}}/managed-spawn.json` (relative to the Minions root)
|
|
50
|
+
with **exactly one** spec named **`{{managed_spawn_name}}`**. Use the JSON
|
|
51
|
+
shape the `managed_spawn` section below documents. The engine ingests this
|
|
52
|
+
sidecar on your exit and gates the next phase on the first healthcheck.
|
|
53
|
+
|
|
54
|
+
## Hard requirements on the sidecar
|
|
55
|
+
|
|
56
|
+
The engine validates the sidecar through `evaluateManagedSpawnAcceptance`.
|
|
57
|
+
Anything that fails validation flips your dispatch to FAILED with
|
|
58
|
+
`failure_class: 'invalid-managed-spawn'` and the QA Session transitions to
|
|
59
|
+
`failed` automatically. Specifically:
|
|
60
|
+
|
|
61
|
+
- `specs[0].name` MUST equal `{{managed_spawn_name}}` (exact match). The
|
|
62
|
+
engine joins the spawn back to its owning session by this convention.
|
|
63
|
+
- `specs[0].healthcheck` MUST be present and verifiable. Prefer HTTP
|
|
64
|
+
(`type: 'http'`) with a real URL and `expect_status` set; fall back to
|
|
65
|
+
`type: 'command'` only when the app has no HTTP surface (e.g. a CLI worker).
|
|
66
|
+
- `specs[0].cmd` MUST be on the engine's allowlist (`node`, `bun`, `npm`,
|
|
67
|
+
`npx`, `pnpm`, `yarn`, `python`, `docker`, `adb`, `gradle`, `gradlew`,
|
|
68
|
+
`mvn`, `pwsh`, `powershell`, `bash`, `sh`, `curl`, `git`, …). If the
|
|
69
|
+
project requires a non-allowlisted binary, **stop and report a setup
|
|
70
|
+
failure** (see below) — do NOT try to work around it.
|
|
71
|
+
- `specs[0].cwd` MUST be an absolute path inside the resolved worktree.
|
|
72
|
+
- Pick a free port and put it in both `ports[]` and the healthcheck URL.
|
|
73
|
+
|
|
74
|
+
The `managed_spawn` block injected later in this prompt has the full schema
|
|
75
|
+
and the executable allowlist enumerated. Read it before writing the sidecar.
|
|
76
|
+
|
|
77
|
+
## No PR, no commit, no test code yet
|
|
78
|
+
|
|
79
|
+
SETUP only resolves the target and writes the managed-spawn spec. **Do not**:
|
|
80
|
+
|
|
81
|
+
- write any test code — that belongs to the DRAFT phase
|
|
82
|
+
- commit, push, or open a PR — sessions are tracked via the session record,
|
|
83
|
+
not a merged PR
|
|
84
|
+
- modify project source — the dev-up command should run the project as-is
|
|
85
|
+
- start the app yourself (`bun run dev` in a detached process) — the engine
|
|
86
|
+
spawns the spec for you after you exit
|
|
87
|
+
|
|
88
|
+
If the project genuinely will not run without a code change (missing
|
|
89
|
+
dependency wiring, hard-coded prod URL, etc.), stop and report a setup
|
|
90
|
+
failure so the human can decide whether to patch it themselves.
|
|
91
|
+
|
|
92
|
+
## Failure path (REQUIRED)
|
|
93
|
+
|
|
94
|
+
If you cannot resolve the target, cannot find a dev-up command, hit a
|
|
95
|
+
non-allowlisted binary, or otherwise produce a sidecar that
|
|
96
|
+
`evaluateManagedSpawnAcceptance` would reject, **do not write a malformed
|
|
97
|
+
sidecar**. Instead, write your completion report with:
|
|
98
|
+
|
|
99
|
+
```json
|
|
100
|
+
{
|
|
101
|
+
"status": "failed",
|
|
102
|
+
"summary": "<one-line human-readable explanation of what blocked SETUP>",
|
|
103
|
+
"failure_class": "qa-session-setup-failed",
|
|
104
|
+
"retryable": false,
|
|
105
|
+
"needs_rerun": false,
|
|
106
|
+
"nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
|
|
107
|
+
"artifacts": []
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
The `engine/qa-sessions.js#handleSetupComplete` hook reads `failure_class`
|
|
112
|
+
and the summary, transitions the session to `failed`, and surfaces the
|
|
113
|
+
explanation in the dashboard session card so the human knows exactly why
|
|
114
|
+
SETUP gave up.
|
|
115
|
+
|
|
116
|
+
Examples of legitimate failure summaries:
|
|
117
|
+
|
|
118
|
+
- `"Project has no detectable dev-up command — no package.json scripts.dev, Procfile, or Makefile dev target."`
|
|
119
|
+
- `"Required binary 'cargo' is not on the engine's managed-spawn allowlist."`
|
|
120
|
+
- `"Target PR #1234 head branch could not be checked out: fatal: reference is not a tree."`
|
|
121
|
+
- `"Detected dev-up command but it requires a database connection string we have no way to provide in CI."`
|
|
122
|
+
|
|
123
|
+
## Working directory
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
# PowerShell
|
|
127
|
+
echo $env:MINIONS_AGENT_CWD
|
|
128
|
+
pwd
|
|
129
|
+
|
|
130
|
+
# bash/zsh
|
|
131
|
+
echo "$MINIONS_AGENT_CWD"
|
|
132
|
+
pwd
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
`MINIONS_AGENT_CWD` is the engine-resolved worktree root. Prefer it over
|
|
136
|
+
`pwd` for any cwd-sensitive command.
|
|
137
|
+
|
|
138
|
+
## Findings
|
|
139
|
+
|
|
140
|
+
Write findings to `{{team_root}}/notes/inbox/{{agent_id}}-{{item_id}}-{{date}}.md`
|
|
141
|
+
only after successful completion. Include:
|
|
142
|
+
|
|
143
|
+
- Session id + target summary
|
|
144
|
+
- Dev-up command chosen and where it was discovered (file:line)
|
|
145
|
+
- Managed-spawn name, healthcheck shape, port
|
|
146
|
+
- Notes for future setup runs on the same target (flaky startup, env-vars
|
|
147
|
+
needed, port collisions)
|
|
148
|
+
|
|
149
|
+
## Constraints
|
|
150
|
+
|
|
151
|
+
- Do not modify production code unless explicitly asked.
|
|
152
|
+
- Do not remove worktrees; the engine handles cleanup automatically.
|
|
153
|
+
- The sidecar is the deliverable — without it, the session is stuck in
|
|
154
|
+
`spawning` until the SETUP WI times out.
|