@yemi33/minions 0.1.2070 → 0.1.2072
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/js/qa.js +358 -0
- package/dashboard/js/state.js +2 -1
- package/dashboard/pages/qa.html +72 -0
- package/dashboard/styles.css +102 -0
- package/dashboard.js +410 -6
- package/docs/qa-runbook-lifecycle.md +232 -0
- package/engine/cleanup.js +4 -1
- package/engine/comment-classifier.js +8 -1
- package/engine/cooldown.js +6 -2
- package/engine/gh-comment.js +74 -3
- package/engine/gh-token.js +7 -9
- package/engine/lifecycle.js +100 -0
- package/engine/pipeline.js +9 -1
- package/engine/playbook.js +39 -0
- package/engine/qa-runners/maestro.js +152 -0
- package/engine/qa-runners/playwright.js +149 -0
- package/engine/qa-runners.js +323 -0
- package/engine/qa-sessions.js +1008 -0
- package/engine/shared.js +71 -12
- package/engine.js +140 -0
- package/package.json +1 -1
- package/playbooks/qa-session-draft.md +158 -0
- package/playbooks/qa-session-execute.md +165 -0
- package/playbooks/qa-session-setup.md +154 -0
- package/prompts/cc-system.md +43 -0
- package/routing.md +3 -0
package/engine/shared.js
CHANGED
|
@@ -467,6 +467,22 @@ function safeReadDir(dir) {
|
|
|
467
467
|
try { return fs.readdirSync(dir); } catch { return []; }
|
|
468
468
|
}
|
|
469
469
|
|
|
470
|
+
/**
|
|
471
|
+
* Read a JSON file with **automatic restore from `.backup` sidecar** on
|
|
472
|
+
* missing/corrupt primary. Intended for live, mutable state files
|
|
473
|
+
* (work-items.json, dispatch.json, pull-requests.json, etc.) that are paired
|
|
474
|
+
* with a `.backup` sidecar written by `safeWrite`. Returns the parsed JSON,
|
|
475
|
+
* or null when both primary and backup are missing/unparseable.
|
|
476
|
+
*
|
|
477
|
+
* **Restore semantics:** If the primary is missing or unparseable but a valid
|
|
478
|
+
* `.backup` exists, the backup is parsed, returned, AND atomically rewritten
|
|
479
|
+
* to the primary path (best-effort). This protects live state from torn
|
|
480
|
+
* writes / interrupted saves.
|
|
481
|
+
*
|
|
482
|
+
* Counterpart: `safeJsonNoRestore` for terminal artifacts and "missing == gone"
|
|
483
|
+
* reads (cooldowns, archived PRDs, ephemeral session state) where reviving a
|
|
484
|
+
* stale `.backup` is actively harmful. See its JSDoc for selection guidance.
|
|
485
|
+
*/
|
|
470
486
|
function safeJson(p) {
|
|
471
487
|
// Split the read from the parse so we can distinguish "file missing" (normal
|
|
472
488
|
// pre-create state — silent) from "file present but corrupt JSON" (real
|
|
@@ -524,22 +540,42 @@ function safeJsonObj(p) { return safeJson(p) || {}; }
|
|
|
524
540
|
function safeJsonArr(p) { return safeJson(p) || []; }
|
|
525
541
|
|
|
526
542
|
/**
|
|
527
|
-
* Sibling of safeJson for terminal-artifact
|
|
528
|
-
*
|
|
529
|
-
*
|
|
530
|
-
* the
|
|
543
|
+
* Sibling of safeJson for terminal-artifact and "missing == gone" reads
|
|
544
|
+
* (PRDs in `prd/`, archived plans, cooldowns, ephemeral session state —
|
|
545
|
+
* anything where a missing primary should NOT auto-restore from a stale
|
|
546
|
+
* `.backup` sidecar). Returns the parsed JSON on success, or `defaultValue`
|
|
547
|
+
* (default `null`) on **any** failure: missing file, unparseable JSON, or
|
|
548
|
+
* IO error. The `.backup` sidecar is never consulted.
|
|
531
549
|
*
|
|
532
550
|
* Why a separate primitive: safeJson's restore-on-miss is correct for live
|
|
533
551
|
* state files (work-items.json, dispatch.json, pull-requests.json, etc.) but
|
|
534
|
-
* actively harmful for terminal artifacts.
|
|
535
|
-
*
|
|
536
|
-
*
|
|
537
|
-
*
|
|
552
|
+
* actively harmful for terminal artifacts. Examples of misuse and the bugs
|
|
553
|
+
* they hide:
|
|
554
|
+
* - Archived PRDs leave a `.backup` sidecar in `prd/`; reading the active
|
|
555
|
+
* path with safeJson silently restores it and the dashboard sees a
|
|
556
|
+
* phantom "active" PRD (W-mouptdh1000h9f39). PRDs are end-state — no
|
|
557
|
+
* resurrection.
|
|
558
|
+
* - Cooldowns are time-bounded ephemeral state (24h TTL). Restoring a
|
|
559
|
+
* stale `cooldowns.json.backup` could resurrect expired entries that
|
|
560
|
+
* should already have been pruned, suppressing legitimate dispatches.
|
|
561
|
+
* - Restoring corrupt-primary scenarios from `.backup` masks the underlying
|
|
562
|
+
* write integrity failure and breaks State Integrity tests.
|
|
563
|
+
*
|
|
564
|
+
* **When to use which:**
|
|
565
|
+
* - `safeJson(p)` — live mutable state paired with safeWrite-managed `.backup`.
|
|
566
|
+
* Restore-on-miss is protective against torn writes.
|
|
567
|
+
* - `safeJsonNoRestore(p, defaultValue)` — terminal artifacts, time-bounded
|
|
568
|
+
* ephemeral state, or any read where "missing/corrupt" should mean "gone".
|
|
538
569
|
*
|
|
539
570
|
* Parse errors are logged so silent corruption still surfaces (mirrors
|
|
540
571
|
* safeJson's contract). Read errors other than ENOENT are also logged.
|
|
572
|
+
*
|
|
573
|
+
* @param {string} p - Absolute path to the JSON file.
|
|
574
|
+
* @param {*} [defaultValue=null] - Value returned on any failure (missing,
|
|
575
|
+
* parse error, IO error). Pass `{}` / `[]` to mirror safeJsonObj/safeJsonArr.
|
|
576
|
+
* @returns {*} Parsed JSON on success, otherwise `defaultValue`.
|
|
541
577
|
*/
|
|
542
|
-
function safeJsonNoRestore(p) {
|
|
578
|
+
function safeJsonNoRestore(p, defaultValue = null) {
|
|
543
579
|
let raw;
|
|
544
580
|
try {
|
|
545
581
|
raw = fs.readFileSync(p, 'utf8');
|
|
@@ -547,13 +583,13 @@ function safeJsonNoRestore(p) {
|
|
|
547
583
|
if (e && e.code !== 'ENOENT') {
|
|
548
584
|
console.warn(`[safeJsonNoRestore] read failed for ${path.basename(p)}: ${e.message}`);
|
|
549
585
|
}
|
|
550
|
-
return
|
|
586
|
+
return defaultValue;
|
|
551
587
|
}
|
|
552
588
|
try {
|
|
553
589
|
return JSON.parse(raw);
|
|
554
590
|
} catch (parseErr) {
|
|
555
591
|
console.error(`[safeJsonNoRestore] parse failure for ${path.basename(p)}: ${parseErr.message}`);
|
|
556
|
-
return
|
|
592
|
+
return defaultValue;
|
|
557
593
|
}
|
|
558
594
|
}
|
|
559
595
|
|
|
@@ -1144,10 +1180,20 @@ function mutateJsonFileLocked(filePath, mutateFn, {
|
|
|
1144
1180
|
let data = safeJson(filePath);
|
|
1145
1181
|
const parsedInvalid = fileExists && data === null;
|
|
1146
1182
|
if (data === null || typeof data !== 'object') data = Array.isArray(defaultValue) ? [...defaultValue] : { ...defaultValue };
|
|
1147
|
-
|
|
1183
|
+
// Normalize BEFORE taking the baseline snapshot so that both `beforeSerialized`
|
|
1184
|
+
// and the post-mutator snapshot reflect post-normalize state. Capturing the
|
|
1185
|
+
// baseline before normalize breaks the `skipWriteIfUnchanged` optimization for
|
|
1186
|
+
// pull-requests.json files: a no-op mutator on a denormalized file would
|
|
1187
|
+
// always trip the write path because normalization itself shifted serialized
|
|
1188
|
+
// bytes between the two snapshots (P-bfa1c-skipwrite-timing). The trade-off
|
|
1189
|
+
// is intentional: when normalization is the ONLY change, we deliberately
|
|
1190
|
+
// leave the on-disk file denormalized — readers re-run normalizePrRecords on
|
|
1191
|
+
// load (see getPrLinks, engine/queries.js:670-674), so the in-memory contract
|
|
1192
|
+
// is preserved without the per-poll mtime bump.
|
|
1148
1193
|
if (path.basename(filePath) === 'pull-requests.json' && Array.isArray(data)) {
|
|
1149
1194
|
normalizePrRecords(data, resolveProjectForPrPath(filePath));
|
|
1150
1195
|
}
|
|
1196
|
+
const beforeSerialized = skipWriteIfUnchanged ? JSON.stringify(data) : null;
|
|
1151
1197
|
const next = mutateFn(data);
|
|
1152
1198
|
const finalData = next === undefined ? data : next;
|
|
1153
1199
|
const shouldWrite = !skipWriteIfUnchanged || parsedInvalid || JSON.stringify(finalData) !== beforeSerialized;
|
|
@@ -1767,6 +1813,14 @@ function parseStreamJsonOutput(raw, runtimeName, opts) {
|
|
|
1767
1813
|
|
|
1768
1814
|
const KB_CATEGORIES = ['architecture', 'conventions', 'project-notes', 'build-reports', 'reviews'];
|
|
1769
1815
|
|
|
1816
|
+
// P-bfa2b-kb-path-traversal — read-side whitelist for /api/knowledge/:category/:file.
|
|
1817
|
+
// Superset of KB_CATEGORIES: adds 'agents' because per-agent personal memory is
|
|
1818
|
+
// served from knowledge/agents/<id>.md (see engine/consolidation.js +
|
|
1819
|
+
// engine/playbook.js) but is NOT a destination for inbox classification, so
|
|
1820
|
+
// KB_CATEGORIES intentionally excludes it. Frozen so handlers can rely on the
|
|
1821
|
+
// list being immutable across the process lifetime.
|
|
1822
|
+
const KB_READABLE_CATEGORIES = Object.freeze([...KB_CATEGORIES, 'agents']);
|
|
1823
|
+
|
|
1770
1824
|
/**
|
|
1771
1825
|
* Classify an inbox item into a knowledge base category.
|
|
1772
1826
|
* Single source of truth — used by consolidation.js (both LLM and regex paths).
|
|
@@ -4763,6 +4817,10 @@ function mutatePullRequests(filePath, mutator) {
|
|
|
4763
4817
|
return mutator(data) || data;
|
|
4764
4818
|
}, {
|
|
4765
4819
|
defaultValue: [],
|
|
4820
|
+
skipWriteIfUnchanged: true,
|
|
4821
|
+
// Emit only when an actual write happened. skipWriteIfUnchanged can
|
|
4822
|
+
// short-circuit no-op mutations; suppress the event in that case so the
|
|
4823
|
+
// dashboard cache-version doesn't bump for nothing.
|
|
4766
4824
|
onWrote: () => {
|
|
4767
4825
|
try { require('./db-events').emitStateEvent('pull_requests'); } catch { /* optional */ }
|
|
4768
4826
|
},
|
|
@@ -5158,6 +5216,7 @@ module.exports = {
|
|
|
5158
5216
|
gitEnv,
|
|
5159
5217
|
parseStreamJsonOutput,
|
|
5160
5218
|
KB_CATEGORIES,
|
|
5219
|
+
KB_READABLE_CATEGORIES,
|
|
5161
5220
|
classifyInboxItem,
|
|
5162
5221
|
ENGINE_DEFAULTS,
|
|
5163
5222
|
resolveAgentCli, resolveCcCli, resolveCcUseWorkerPool, resolveAgentModel, resolveCcModel,
|
package/engine.js
CHANGED
|
@@ -5021,6 +5021,88 @@ async function discoverFromPrs(config, project) {
|
|
|
5021
5021
|
return newWork;
|
|
5022
5022
|
}
|
|
5023
5023
|
|
|
5024
|
+
/**
|
|
5025
|
+
* P-f9a2e1b4 — Compute runner_brief / runner_execute_brief / test_file for
|
|
5026
|
+
* QA Session DRAFT and EXECUTE dispatches.
|
|
5027
|
+
*
|
|
5028
|
+
* Lazy-requires `./engine/qa-sessions`, `./engine/qa-runners`, and
|
|
5029
|
+
* `./engine/managed-spawn` so non-QA dispatches don't pay the load cost
|
|
5030
|
+
* and so test isolation (createTestMinionsDir → ISOLATED_MODULES) gets a
|
|
5031
|
+
* fresh module instance per test.
|
|
5032
|
+
*
|
|
5033
|
+
* Returns `{ runner_brief: '', runner_execute_brief: '', test_file: '' }`
|
|
5034
|
+
* for:
|
|
5035
|
+
* - non-QA-session items (no item.meta.sessionId)
|
|
5036
|
+
* - SETUP phase (the SETUP playbook doesn't read these vars; the runner
|
|
5037
|
+
* adapter contract is N/A until the managed-spawn is healthy)
|
|
5038
|
+
* - any failure inside the lookup chain (session missing, runner
|
|
5039
|
+
* missing, spawn missing) — failures are surfaced via a WARN log so
|
|
5040
|
+
* the render still succeeds and the playbook's empty-brief failure
|
|
5041
|
+
* path catches it.
|
|
5042
|
+
*/
|
|
5043
|
+
function _buildRunnerBriefVars(item, project) {
|
|
5044
|
+
const empty = { runner_brief: '', runner_execute_brief: '', test_file: '' };
|
|
5045
|
+
const meta = item && item.meta;
|
|
5046
|
+
if (!meta || !meta.sessionId) return empty;
|
|
5047
|
+
const phase = meta.sessionPhase;
|
|
5048
|
+
if (phase !== 'draft' && phase !== 'execute') return empty;
|
|
5049
|
+
try {
|
|
5050
|
+
const qaSessions = require('./engine/qa-sessions');
|
|
5051
|
+
const qaRunners = require('./engine/qa-runners');
|
|
5052
|
+
const managedSpawn = require('./engine/managed-spawn');
|
|
5053
|
+
const session = qaSessions.getSession(meta.sessionId);
|
|
5054
|
+
if (!session) {
|
|
5055
|
+
log('warn', `qa-session render: session ${meta.sessionId} not found — runner brief empty`);
|
|
5056
|
+
return empty;
|
|
5057
|
+
}
|
|
5058
|
+
const target = (meta.qaSession && meta.qaSession.target) || session.spec.target || {};
|
|
5059
|
+
const explicit = (meta.qaSession && meta.qaSession.runner) || session.spec.runner || '';
|
|
5060
|
+
const runner = qaRunners.detectRunner(target, project || null, explicit);
|
|
5061
|
+
if (!runner) {
|
|
5062
|
+
log('warn', `qa-session render: no runner detected for session ${meta.sessionId} (target.kind=${target.kind}, explicit=${explicit || 'none'}) — runner brief empty`);
|
|
5063
|
+
return empty;
|
|
5064
|
+
}
|
|
5065
|
+
// Live managed-spawn snapshot (port / base_url / health). listManagedSpecs()
|
|
5066
|
+
// returns [] when the state file is missing or unreadable; defensive
|
|
5067
|
+
// filter is fine here.
|
|
5068
|
+
let spawnInfo = null;
|
|
5069
|
+
try {
|
|
5070
|
+
const specs = managedSpawn.listManagedSpecs();
|
|
5071
|
+
spawnInfo = (specs || []).find(s => s && s.name === session.managedSpawnName) || null;
|
|
5072
|
+
} catch (spawnErr) {
|
|
5073
|
+
log('warn', `qa-session render: managed-spawn lookup failed for ${session.managedSpawnName}: ${spawnErr.message}`);
|
|
5074
|
+
}
|
|
5075
|
+
const briefOpts = {
|
|
5076
|
+
session,
|
|
5077
|
+
sessionId: session.id,
|
|
5078
|
+
spawnInfo,
|
|
5079
|
+
flowsRaw: (meta.qaSession && meta.qaSession.flowsRaw) || session.spec.flowsRaw || '',
|
|
5080
|
+
capture: (meta.qaSession && meta.qaSession.capture) || session.spec.capture || {},
|
|
5081
|
+
testFile: session.testFile || null,
|
|
5082
|
+
};
|
|
5083
|
+
const out = { runner_brief: '', runner_execute_brief: '', test_file: session.testFile || '' };
|
|
5084
|
+
if (phase === 'draft') {
|
|
5085
|
+
try {
|
|
5086
|
+
const brief = runner.generateBrief(briefOpts);
|
|
5087
|
+
out.runner_brief = typeof brief === 'string' ? brief : (brief && brief.markdown) || '';
|
|
5088
|
+
} catch (briefErr) {
|
|
5089
|
+
log('warn', `qa-session render: runner ${runner.name} generateBrief threw: ${briefErr.message}`);
|
|
5090
|
+
}
|
|
5091
|
+
} else if (phase === 'execute') {
|
|
5092
|
+
try {
|
|
5093
|
+
const brief = runner.executeBrief(briefOpts);
|
|
5094
|
+
out.runner_execute_brief = typeof brief === 'string' ? brief : (brief && brief.markdown) || '';
|
|
5095
|
+
} catch (briefErr) {
|
|
5096
|
+
log('warn', `qa-session render: runner ${runner.name} executeBrief threw: ${briefErr.message}`);
|
|
5097
|
+
}
|
|
5098
|
+
}
|
|
5099
|
+
return out;
|
|
5100
|
+
} catch (err) {
|
|
5101
|
+
log('warn', `qa-session render: _buildRunnerBriefVars failed for ${meta.sessionId} (${phase}): ${err.message}`);
|
|
5102
|
+
return empty;
|
|
5103
|
+
}
|
|
5104
|
+
}
|
|
5105
|
+
|
|
5024
5106
|
/**
|
|
5025
5107
|
* Scan work-items.json for manually queued tasks
|
|
5026
5108
|
*/
|
|
@@ -5079,6 +5161,64 @@ function renderProjectWorkItemPromptForAgent(item, workType, agentId, config, pr
|
|
|
5079
5161
|
qa_artifacts_dir: item.meta && item.meta.qaRunId
|
|
5080
5162
|
? path.posix.join('engine', 'qa-artifacts', String(item.meta.qaRunId))
|
|
5081
5163
|
: '',
|
|
5164
|
+
// P-e6b3c2d8 — QA Session template vars. The qa-sessions chain helpers
|
|
5165
|
+
// (engine/qa-sessions.js#_baseWorkItem) stamp meta.sessionId,
|
|
5166
|
+
// meta.sessionPhase, and meta.qaSession.{target,flowsRaw,mode,capture,runner}
|
|
5167
|
+
// on each SETUP/DRAFT/EXECUTE WI; renderProjectWorkItemPromptForAgent
|
|
5168
|
+
// surfaces them as named template vars so the qa-session-* playbooks
|
|
5169
|
+
// can reference them by literal {{name}} without re-resolving from
|
|
5170
|
+
// item.meta. Only target.kind === <X> populates target_<X>; the rest
|
|
5171
|
+
// resolve to empty strings (filtered out of unresolved-var warnings via
|
|
5172
|
+
// PLAYBOOK_OPTIONAL_VARS).
|
|
5173
|
+
session_id: (item.meta && item.meta.sessionId) || '',
|
|
5174
|
+
session_phase: (item.meta && item.meta.sessionPhase) || '',
|
|
5175
|
+
managed_spawn_name: item.meta && item.meta.sessionId
|
|
5176
|
+
? 'qa-session-' + String(item.meta.sessionId)
|
|
5177
|
+
: '',
|
|
5178
|
+
target_kind: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind) || '',
|
|
5179
|
+
target_pr_id: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'pr'
|
|
5180
|
+
? String(item.meta.qaSession.target.prId || '')
|
|
5181
|
+
: ''),
|
|
5182
|
+
target_branch: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'branch'
|
|
5183
|
+
? String(item.meta.qaSession.target.branch || '')
|
|
5184
|
+
: ''),
|
|
5185
|
+
target_sha: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'commit'
|
|
5186
|
+
? String(item.meta.qaSession.target.sha || '')
|
|
5187
|
+
: ''),
|
|
5188
|
+
target_worktree: (item.meta && item.meta.qaSession && item.meta.qaSession.target && item.meta.qaSession.target.kind === 'current'
|
|
5189
|
+
? String(item.meta.qaSession.target.worktree || '')
|
|
5190
|
+
: ''),
|
|
5191
|
+
target_json: (item.meta && item.meta.qaSession && item.meta.qaSession.target)
|
|
5192
|
+
? JSON.stringify(item.meta.qaSession.target)
|
|
5193
|
+
: '',
|
|
5194
|
+
flows_raw: (item.meta && item.meta.qaSession && item.meta.qaSession.flowsRaw) || '',
|
|
5195
|
+
runner_hint: (item.meta && item.meta.qaSession && item.meta.qaSession.runner) || '',
|
|
5196
|
+
capture: (item.meta && item.meta.qaSession && item.meta.qaSession.capture)
|
|
5197
|
+
? Object.entries(item.meta.qaSession.capture)
|
|
5198
|
+
.filter(([, v]) => !!v)
|
|
5199
|
+
.map(([k]) => k)
|
|
5200
|
+
.join(',')
|
|
5201
|
+
: '',
|
|
5202
|
+
session_mode: (item.meta && item.meta.qaSession && item.meta.qaSession.mode) || '',
|
|
5203
|
+
// P-f9a2e1b4 — Runner adapter briefs. The DRAFT playbook consumes
|
|
5204
|
+
// {{runner_brief}} (runner.generateBrief() output); EXECUTE consumes
|
|
5205
|
+
// {{runner_execute_brief}} (runner.executeBrief() output) plus
|
|
5206
|
+
// {{test_file}} (session.testFile, set after DRAFT). For non-QA-session
|
|
5207
|
+
// items and for the SETUP phase, all three resolve to empty strings;
|
|
5208
|
+
// PLAYBOOK_OPTIONAL_VARS keeps them out of unresolved-var warnings.
|
|
5209
|
+
//
|
|
5210
|
+
// We lazy-require qa-sessions + qa-runners + managed-spawn so non-QA
|
|
5211
|
+
// dispatches don't pay the load cost, and so test isolation (which
|
|
5212
|
+
// busts these modules from require.cache via createTestMinionsDir →
|
|
5213
|
+
// ISOLATED_MODULES) picks up a fresh module instance per test.
|
|
5214
|
+
//
|
|
5215
|
+
// Defensive failure mode: any throw inside the brief computation
|
|
5216
|
+
// resolves to an empty string and surfaces as a warn log. Renders
|
|
5217
|
+
// must never blow up because a runner adapter misbehaved — the agent
|
|
5218
|
+
// gets a "no runner brief available" cue and reports a setup
|
|
5219
|
+
// failure via the qa-session-draft-failed / qa-session-execute-failed
|
|
5220
|
+
// path. (See playbooks/qa-session-draft.md → "Failure path" section.)
|
|
5221
|
+
..._buildRunnerBriefVars(item, project),
|
|
5082
5222
|
};
|
|
5083
5223
|
const cpResult = buildWorkItemDispatchVars(item, vars, config, {
|
|
5084
5224
|
worktreePath: vars.worktree_path || root,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yemi33/minions",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2072",
|
|
4
4
|
"description": "Multi-agent AI dev team that runs from ~/.minions/ — five autonomous agents share a single engine, dashboard, and knowledge base",
|
|
5
5
|
"bin": {
|
|
6
6
|
"minions": "bin/minions.js"
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
---
|
|
2
|
+
requiresProjectContext: true
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Playbook: QA Session DRAFT
|
|
6
|
+
|
|
7
|
+
You are {{agent_name}}, the {{agent_role}} on the {{project_name}} project.
|
|
8
|
+
TEAM ROOT: {{team_root}}
|
|
9
|
+
|
|
10
|
+
## Your Task
|
|
11
|
+
|
|
12
|
+
QA Session **DRAFT** phase for session **{{session_id}}** (work item {{item_id}}).
|
|
13
|
+
|
|
14
|
+
A user asked Minions to QA the following target and flows; the SETUP phase
|
|
15
|
+
has already resolved the target into a worktree and the engine has spawned
|
|
16
|
+
the dev-up command as a managed-spawn. Your job is to translate the
|
|
17
|
+
natural-language flows into a runner-native test file.
|
|
18
|
+
|
|
19
|
+
- **Session id:** `{{session_id}}`
|
|
20
|
+
- **Session phase:** `{{session_phase}}`
|
|
21
|
+
- **Target kind:** `{{target_kind}}`
|
|
22
|
+
- **Target PR id:** `{{target_pr_id}}`
|
|
23
|
+
- **Target branch:** `{{target_branch}}`
|
|
24
|
+
- **Target commit SHA:** `{{target_sha}}`
|
|
25
|
+
- **Target worktree (kind=current):** `{{target_worktree}}`
|
|
26
|
+
- **Raw target JSON:** `{{target_json}}`
|
|
27
|
+
- **Flows (natural language):** {{flows_raw}}
|
|
28
|
+
- **Runner hint (optional explicit runner):** `{{runner_hint}}`
|
|
29
|
+
- **Capture:** `{{capture}}`
|
|
30
|
+
- **Mode:** `{{session_mode}}`
|
|
31
|
+
- **Managed-spawn target:** `{{managed_spawn_name}}` (live — query
|
|
32
|
+
`http://localhost:7331/api/managed-processes/by-name?name={{managed_spawn_name}}`
|
|
33
|
+
for the freshest port / base URL / health).
|
|
34
|
+
|
|
35
|
+
{{additional_context}}
|
|
36
|
+
|
|
37
|
+
## What "qa-session-draft" means
|
|
38
|
+
|
|
39
|
+
A `qa-session-draft` task is the **second** of three chained work items the
|
|
40
|
+
engine dispatches for each QA Session (SETUP → DRAFT → EXECUTE). The SETUP
|
|
41
|
+
agent already produced a managed-spawn sidecar and the engine spawned the
|
|
42
|
+
dev-up command; the EXECUTE agent will run your drafted test against that
|
|
43
|
+
live spawn. Your only deliverable is the **test file itself**, written in
|
|
44
|
+
the runner's native format under
|
|
45
|
+
`engine/qa-tests/{{session_id}}/` (relative to the Minions root).
|
|
46
|
+
|
|
47
|
+
The engine resolved a concrete **runner adapter** for this session
|
|
48
|
+
(Playwright, Maestro, or a project plugin) and its `generateBrief()` hook
|
|
49
|
+
already produced the precise authoring instructions you need. Read the
|
|
50
|
+
runner brief below, then implement exactly the file it describes.
|
|
51
|
+
|
|
52
|
+
### Runner brief
|
|
53
|
+
|
|
54
|
+
{{runner_brief}}
|
|
55
|
+
|
|
56
|
+
### Reporting the test file path
|
|
57
|
+
|
|
58
|
+
When you exit, your completion JSON MUST include a `testFile` field with
|
|
59
|
+
the **relative path inside `engine/qa-tests/{{session_id}}/`** of the file
|
|
60
|
+
you wrote (e.g. `test.spec.js`, `flow.yaml`). The engine reads this and
|
|
61
|
+
stores it on the session record so the EXECUTE prompt can reference it
|
|
62
|
+
directly. Without `testFile`, EXECUTE falls back to a generic
|
|
63
|
+
`test.<ext>` hint and the agent may pick the wrong file.
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
|
|
67
|
+
```json
|
|
68
|
+
{
|
|
69
|
+
"status": "success",
|
|
70
|
+
"summary": "Drafted Playwright spec covering login + redirect flow",
|
|
71
|
+
"testFile": "test.spec.js",
|
|
72
|
+
"nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
|
|
73
|
+
"artifacts": [
|
|
74
|
+
{ "type": "file", "path": "engine/qa-tests/{{session_id}}/test.spec.js", "title": "Drafted Playwright spec" }
|
|
75
|
+
]
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## No PR, no commit
|
|
80
|
+
|
|
81
|
+
`qa-session-draft` is a test-authoring task. **Do not**:
|
|
82
|
+
|
|
83
|
+
- commit, push, or open a pull request — sessions are tracked by the
|
|
84
|
+
session record, not a merged PR
|
|
85
|
+
- modify project source — the only file you should write is the test
|
|
86
|
+
file under `engine/qa-tests/{{session_id}}/`
|
|
87
|
+
- start the managed-spawn yourself — it is already running; query
|
|
88
|
+
`/api/managed-processes/by-name?name={{managed_spawn_name}}` for the
|
|
89
|
+
live port / base URL / health snapshot
|
|
90
|
+
|
|
91
|
+
## Failure path (REQUIRED)
|
|
92
|
+
|
|
93
|
+
If the runner brief is empty (no runner could be detected and none was
|
|
94
|
+
specified), if you cannot translate the flows into a runner-native file,
|
|
95
|
+
or if the managed-spawn is not healthy enough to draft against, **do not
|
|
96
|
+
write a partial test file**. Instead, write your completion report with:
|
|
97
|
+
|
|
98
|
+
```json
|
|
99
|
+
{
|
|
100
|
+
"status": "failed",
|
|
101
|
+
"summary": "<one-line human-readable explanation of what blocked DRAFT>",
|
|
102
|
+
"failure_class": "qa-session-draft-failed",
|
|
103
|
+
"retryable": false,
|
|
104
|
+
"needs_rerun": false,
|
|
105
|
+
"nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
|
|
106
|
+
"artifacts": []
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
The `engine/qa-sessions.js#handleDraftComplete` hook reads `failure_class`
|
|
111
|
+
and the summary, transitions the session to `failed`, and surfaces the
|
|
112
|
+
explanation in the dashboard session card so the human knows exactly why
|
|
113
|
+
DRAFT gave up.
|
|
114
|
+
|
|
115
|
+
Examples of legitimate failure summaries:
|
|
116
|
+
|
|
117
|
+
- `"No QA runner detected and none specified — install Playwright or Maestro and re-run with runner=<name>."`
|
|
118
|
+
- `"Flows reference a feature that does not exist in the spawn (e.g. /admin route returns 404)."`
|
|
119
|
+
- `"Managed-spawn {{managed_spawn_name}} not healthy — base URL unreachable from the agent."`
|
|
120
|
+
|
|
121
|
+
## Working directory
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# PowerShell
|
|
125
|
+
echo $env:MINIONS_AGENT_CWD
|
|
126
|
+
pwd
|
|
127
|
+
|
|
128
|
+
# bash/zsh
|
|
129
|
+
echo "$MINIONS_AGENT_CWD"
|
|
130
|
+
pwd
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
`MINIONS_AGENT_CWD` is the engine-resolved worktree root. Prefer it over
|
|
134
|
+
`pwd` for any cwd-sensitive command. The test file path is **relative to
|
|
135
|
+
the Minions root**, not the project worktree — write to
|
|
136
|
+
`<MINIONS_ROOT>/engine/qa-tests/{{session_id}}/`. The Minions root is the
|
|
137
|
+
parent of the project worktree (one level above `MINIONS_AGENT_CWD` for
|
|
138
|
+
project-scoped sessions; equal to `MINIONS_AGENT_CWD` for central
|
|
139
|
+
sessions).
|
|
140
|
+
|
|
141
|
+
## Findings
|
|
142
|
+
|
|
143
|
+
Write findings to `{{team_root}}/notes/inbox/{{agent_id}}-{{item_id}}-{{date}}.md`
|
|
144
|
+
only after successful completion. Include:
|
|
145
|
+
|
|
146
|
+
- Session id + target summary
|
|
147
|
+
- Runner adapter chosen
|
|
148
|
+
- Test file path + line count
|
|
149
|
+
- Notes for future drafts on the same project (flaky selectors, env-vars
|
|
150
|
+
needed, runner gotchas)
|
|
151
|
+
|
|
152
|
+
## Constraints
|
|
153
|
+
|
|
154
|
+
- Do not modify production code unless explicitly asked.
|
|
155
|
+
- Do not remove worktrees; the engine handles cleanup automatically.
|
|
156
|
+
- Do not start or restart the managed-spawn — the engine owns it.
|
|
157
|
+
- The test file is the deliverable — without it (or without a `testFile`
|
|
158
|
+
pointer in completion JSON), the EXECUTE phase has nothing to run.
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
---
|
|
2
|
+
requiresProjectContext: true
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Playbook: QA Session EXECUTE
|
|
6
|
+
|
|
7
|
+
You are {{agent_name}}, the {{agent_role}} on the {{project_name}} project.
|
|
8
|
+
TEAM ROOT: {{team_root}}
|
|
9
|
+
|
|
10
|
+
## Your Task
|
|
11
|
+
|
|
12
|
+
QA Session **EXECUTE** phase for session **{{session_id}}** (work item {{item_id}}).
|
|
13
|
+
|
|
14
|
+
The SETUP and DRAFT phases have already finished: the engine spawned the
|
|
15
|
+
dev-up command as a managed-spawn, and the DRAFT agent wrote a
|
|
16
|
+
runner-native test file under `engine/qa-tests/{{session_id}}/`. Your
|
|
17
|
+
job is to **invoke that test against the live managed-spawn**, capture
|
|
18
|
+
the configured artifacts, and write the result sidecar the engine
|
|
19
|
+
ingests.
|
|
20
|
+
|
|
21
|
+
- **Session id:** `{{session_id}}`
|
|
22
|
+
- **Session phase:** `{{session_phase}}`
|
|
23
|
+
- **Managed-spawn target:** `{{managed_spawn_name}}` (live — query
|
|
24
|
+
`http://localhost:7331/api/managed-processes/by-name?name={{managed_spawn_name}}`
|
|
25
|
+
for the freshest port / base URL / health).
|
|
26
|
+
- **Test file (relative to `engine/qa-tests/{{session_id}}/`):** `{{test_file}}`
|
|
27
|
+
- **Flows (for context):** {{flows_raw}}
|
|
28
|
+
- **Runner hint (optional explicit runner):** `{{runner_hint}}`
|
|
29
|
+
- **Capture:** `{{capture}}`
|
|
30
|
+
- **Mode:** `{{session_mode}}`
|
|
31
|
+
- **qa-runs record id (use this in the sidecar's `runId` field):** `{{qa_run_id}}`
|
|
32
|
+
|
|
33
|
+
{{additional_context}}
|
|
34
|
+
|
|
35
|
+
## What "qa-session-execute" means
|
|
36
|
+
|
|
37
|
+
A `qa-session-execute` task is the **third** of three chained work items
|
|
38
|
+
the engine dispatches for each QA Session (SETUP → DRAFT → EXECUTE). The
|
|
39
|
+
engine resolved the same runner adapter the DRAFT phase used; its
|
|
40
|
+
`executeBrief()` hook produced the precise invocation command + flags
|
|
41
|
+
below.
|
|
42
|
+
|
|
43
|
+
### Runner execute brief
|
|
44
|
+
|
|
45
|
+
{{runner_execute_brief}}
|
|
46
|
+
|
|
47
|
+
### Result sidecar (REQUIRED)
|
|
48
|
+
|
|
49
|
+
Before exit, write the result sidecar at
|
|
50
|
+
`agents/{{agent_id}}/qa-run-result.json` with this exact shape:
|
|
51
|
+
|
|
52
|
+
```json
|
|
53
|
+
{
|
|
54
|
+
"runId": "{{qa_run_id}}",
|
|
55
|
+
"status": "passed",
|
|
56
|
+
"summary": "1 sentence rollup the dashboard will render",
|
|
57
|
+
"artifacts": [
|
|
58
|
+
{
|
|
59
|
+
"type": "screenshot",
|
|
60
|
+
"path": "engine/qa-artifacts/{{session_id}}/01-login-form.png",
|
|
61
|
+
"label": "Login form rendered",
|
|
62
|
+
"capturedAt": "2026-05-20T20:42:00.000Z"
|
|
63
|
+
}
|
|
64
|
+
]
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Valid `status` values:
|
|
69
|
+
|
|
70
|
+
- `passed` — every step in the drafted test ran green and every required
|
|
71
|
+
capture artifact was produced.
|
|
72
|
+
- `failed` — at least one assertion failed. Still write the sidecar with
|
|
73
|
+
whatever artifacts you captured plus the failing-step summary.
|
|
74
|
+
- `errored` — the runner itself crashed or the managed-spawn went
|
|
75
|
+
unreachable mid-run (use this sparingly — distinguishes infra failure
|
|
76
|
+
from real product-level failure).
|
|
77
|
+
|
|
78
|
+
The engine consumes this sidecar in `engine/lifecycle.js` and calls
|
|
79
|
+
`qaRuns.completeRun({{qa_run_id}}, …)`. **If the sidecar is missing when
|
|
80
|
+
you exit, the engine marks the run `errored`** — always write it, even on
|
|
81
|
+
bail-out.
|
|
82
|
+
|
|
83
|
+
The `engine/qa-sessions.js#handleExecuteComplete` hook then reads the
|
|
84
|
+
qa-runs terminal status and transitions the session to `done` / `failed`
|
|
85
|
+
accordingly.
|
|
86
|
+
|
|
87
|
+
## No PR, no commit
|
|
88
|
+
|
|
89
|
+
`qa-session-execute` is a verification task. **Do not**:
|
|
90
|
+
|
|
91
|
+
- commit, push, or open a pull request — sessions are tracked by the
|
|
92
|
+
session record + qa-runs record, not a merged PR
|
|
93
|
+
- modify project source — if a test step requires a code change, stop,
|
|
94
|
+
leave changes uncommitted, and document the gap in the result summary
|
|
95
|
+
- start or restart the managed-spawn — the engine owns it
|
|
96
|
+
- modify the drafted test file — re-drafting belongs to the DRAFT phase
|
|
97
|
+
(the human invokes it via POST `/api/qa/sessions/<id>/edit`)
|
|
98
|
+
|
|
99
|
+
## Failure path (REQUIRED)
|
|
100
|
+
|
|
101
|
+
If the managed-spawn is unhealthy, the runner CLI is missing, or you
|
|
102
|
+
cannot even attempt the test invocation, **do not silently exit
|
|
103
|
+
green**. Write:
|
|
104
|
+
|
|
105
|
+
```json
|
|
106
|
+
{
|
|
107
|
+
"status": "failed",
|
|
108
|
+
"summary": "<one-line human-readable explanation of what blocked EXECUTE>",
|
|
109
|
+
"failure_class": "qa-session-execute-failed",
|
|
110
|
+
"retryable": false,
|
|
111
|
+
"needs_rerun": false,
|
|
112
|
+
"nonce": "<value of MINIONS_COMPLETION_NONCE env var>",
|
|
113
|
+
"artifacts": []
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
…AND write a matching `qa-run-result.json` sidecar with `status: "errored"`
|
|
118
|
+
so the qa-runs record terminalizes correctly. The session will transition
|
|
119
|
+
to `failed` with `failureClass: qa-session-execute-failed`.
|
|
120
|
+
|
|
121
|
+
## Working directory
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# PowerShell
|
|
125
|
+
echo $env:MINIONS_AGENT_CWD
|
|
126
|
+
pwd
|
|
127
|
+
|
|
128
|
+
# bash/zsh
|
|
129
|
+
echo "$MINIONS_AGENT_CWD"
|
|
130
|
+
pwd
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
`MINIONS_AGENT_CWD` is the engine-resolved worktree root. Prefer it over
|
|
134
|
+
`pwd` for any cwd-sensitive command. The test file path is **relative to
|
|
135
|
+
the Minions root**: full path is
|
|
136
|
+
`<MINIONS_ROOT>/engine/qa-tests/{{session_id}}/{{test_file}}`. Capture
|
|
137
|
+
artifacts to `<MINIONS_ROOT>/engine/qa-artifacts/{{session_id}}/`.
|
|
138
|
+
|
|
139
|
+
## Long-Running Commands
|
|
140
|
+
|
|
141
|
+
Playwright runs, Maestro flows, and webdriver waits can be silent for
|
|
142
|
+
minutes. Run the normal CLI commands and wait for them to finish; do not
|
|
143
|
+
add progress pings or extra logging just to keep the engine active.
|
|
144
|
+
|
|
145
|
+
## Findings
|
|
146
|
+
|
|
147
|
+
Write findings to `{{team_root}}/notes/inbox/{{agent_id}}-{{item_id}}-{{date}}.md`
|
|
148
|
+
only after successful completion. Include:
|
|
149
|
+
|
|
150
|
+
- Session id + target summary
|
|
151
|
+
- Test file + runner adapter
|
|
152
|
+
- Per-step pass/fail
|
|
153
|
+
- Artifact paths (relative to `{{team_root}}`)
|
|
154
|
+
- Notes for the next EXECUTE on the same target (flaky selectors, env
|
|
155
|
+
quirks, runner gotchas)
|
|
156
|
+
|
|
157
|
+
## Constraints
|
|
158
|
+
|
|
159
|
+
- Do not modify production code unless explicitly asked.
|
|
160
|
+
- Do not remove worktrees; the engine handles cleanup automatically.
|
|
161
|
+
- Do not start or restart the managed-spawn — the engine owns it.
|
|
162
|
+
- Always emit the `qa-run-result.json` sidecar before exit — even a
|
|
163
|
+
single-field
|
|
164
|
+
`{"runId": "{{qa_run_id}}", "status": "errored", "summary": "...", "artifacts": []}`
|
|
165
|
+
is better than an absent file.
|