@yemi33/minions 0.1.2070 → 0.1.2072

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,3 +69,235 @@ receives `target`, `steps`, `expectedArtifacts` as template vars; required
69
69
  to write the sidecar above before exit. Routing line in `routing.md` maps
70
70
  the synthetic `qa-validate` task-type to the playbook so manual dispatches
71
71
  work too.
72
+
73
+ # QA Sessions (PL-qa-sessions)
74
+
75
+ Natural-language QA flows: the user describes what they want tested in
76
+ plain prose, the engine sets up a live target, an agent drafts a
77
+ runner-native test file, and (with user approval) a second agent executes
78
+ it. Sessions are a thin orchestration layer on top of the same
79
+ `managed-spawn` + `qa-run-result.json` infrastructure that powers
80
+ runbooks above — they reuse `qa-runs.json`, `engine/qa-artifacts/`, and
81
+ the existing `engine/lifecycle.js#runPostCompletionHooks` qa-run sidecar
82
+ hook. Surfaced on `/qa` (sessions card list above the runbooks/runs
83
+ tables) and proxied by the Command Center natural-language shortcut.
84
+
85
+ ## Use case (vs. runbooks)
86
+
87
+ - **Runbook**: a stable, named, reusable test plan stored as JSON. Same
88
+ steps run repeatedly against the same target. Edit once, dispatch
89
+ many times.
90
+ - **Session**: an ad-hoc, single-shot, natural-language QA prompt. The
91
+ user describes intent ("smoke the homepage after my PR"), engine
92
+ resolves the target + spawns it, an agent drafts a *new* runner-native
93
+ test file just for this run. The drafted file lives at
94
+ `engine/qa-tests/<sessionId>/test.<ext>` and is the audit artifact; the
95
+ session record is the orchestration breadcrumb.
96
+
97
+ If the same flow is going to be re-run repeatedly, promote the drafted
98
+ test file into a saved runbook. Sessions are *not* meant to replace
99
+ runbooks for repeat traffic.
100
+
101
+ ## State machine
102
+
103
+ Eight states, source of truth `engine/qa-sessions.js#QA_SESSION_STATE`:
104
+
105
+ ```
106
+ pending ──▶ spawning ──▶ drafting ──▶ awaiting-approval ──▶ executing ──▶ done
107
+ │ │ │ │ │ ╲
108
+ ╰────────────┴─────────────┴────────────────┴────────────────┴───────────▶ failed
109
+ ╰────────────┴─────────────┴────────────────┴────────────────┴───────────▶ killed
110
+ ```
111
+
112
+ Forward transitions only; `done` / `failed` / `killed` are terminal (zero
113
+ outgoing edges). Two non-linear edges:
114
+
115
+ - `awaiting-approval → drafting` — `POST /api/qa/sessions/<id>/edit` re-fires
116
+ DRAFT with the user's natural-language feedback as steering.
117
+ - `drafting → executing` — `auto` mode skips approval and chains EXECUTE
118
+ directly from DRAFT-done.
119
+
120
+ Cancellation paths (`pending → killed`, `spawning → killed`, etc.) cover
121
+ the `POST /api/qa/sessions/<id>/cancel` and `/kill` endpoints. Illegal
122
+ transitions throw; `dashboard.js` translates the thrown error to HTTP 409
123
+ via `_qaSessionsErrorToStatus` (see endpoint table).
124
+
125
+ ## Three-phase work-item chain
126
+
127
+ A session orchestrates three sequential work items, each rendered with a
128
+ phase-specific playbook (`meta.playbook` override beats the work-type
129
+ routing default):
130
+
131
+ 1. **SETUP** (`playbooks/qa-session-setup.md`, `meta.managed_spawn: true`)
132
+ resolves `target.kind ∈ {pr, branch, current, commit}` to a worktree,
133
+ inspects the codebase for the dev-up command, and writes
134
+ `agents/<agentId>/managed-spawn.json` with
135
+ `name=qa-session-<sessionId>`. The engine spawn-validates the sidecar
136
+ then awaits `waitForFirstHealth` per the existing managed-spawn
137
+ lifecycle (see `docs/managed-spawn.md`). Healthcheck-passing transitions
138
+ the session `spawning → drafting`. First-health failure → `failed` with
139
+ `failure_class: qa-session-setup-failed`.
140
+
141
+ 2. **DRAFT** (`playbooks/qa-session-draft.md`) reads the live spawn
142
+ metadata via `/api/managed-processes/by-name/qa-session-<id>`, calls the
143
+ resolved runner's `generateBrief({target, flowsRaw, capture})` hook, and
144
+ writes the runner-native test file to
145
+ `engine/qa-tests/<sessionId>/test.<ext>`. The runner's
146
+ `validateOutputDir` hook gates DRAFT success — a missing or malformed
147
+ test file lands the session in `failed`. In `confirm` mode the session
148
+ parks at `awaiting-approval`; in `auto` mode it auto-chains to EXECUTE.
149
+
150
+ 3. **EXECUTE** (`playbooks/qa-session-execute.md`) runs the drafted test
151
+ against the live spawn via the runner's `executeBrief` hook, captures
152
+ artifacts per `spec.capture: { video, screenshots, logs }`, and writes
153
+ `agents/<agentId>/qa-run-result.json` — the same sidecar the
154
+ `qa-validate` runbook flow uses. The existing
155
+ `engine/lifecycle.js#runPostCompletionHooks` `meta.qaRunId` hook ingests
156
+ the sidecar and marks the linked `qa-runs` record terminal; the
157
+ session-level `handleExecuteComplete` then reads the `qa-run` terminal
158
+ status and transitions `executing → done` (or `failed`).
159
+
160
+ Each phase WI carries `meta.sessionId`, `meta.sessionPhase`, `meta.qaSession`
161
+ (target + flowsRaw + mode + capture + runner), and `meta.playbook`. The
162
+ EXECUTE WI additionally carries `meta.qaRunId` so the existing qa-run
163
+ lifecycle hook fires.
164
+
165
+ ## Endpoints
166
+
167
+ Documented in `dashboard.js`; routes are visible at `GET /api/routes`.
168
+
169
+ | Method | Path | Behavior |
170
+ |--------|--------------------------------------------|---------------------------------------------------------------------------------------------------------------------------|
171
+ | POST | `/api/qa/session` | Create session; validates spec, calls `createSession` + `queueSetup` (`pending → spawning`). Returns `sessionId`, `setupWorkItemId`, `managedSpawnName`. |
172
+ | GET | `/api/qa/sessions` | List sessions newest-first. Optional `?limit=N` and `?state=pending\|spawning\|drafting\|awaiting-approval\|executing\|done\|failed\|killed`. |
173
+ | GET | `/api/qa/sessions/<id>` | Fetch a single session record by id. |
174
+ | POST | `/api/qa/sessions/<id>/approve` | `awaiting-approval → executing`. Server-side creates the linked `qa-runs` record (synthetic `runbookId='qa-session-<id>'`), queues EXECUTE WI, stamps `qaRunId` on the session. |
175
+ | POST | `/api/qa/sessions/<id>/edit` | `awaiting-approval → drafting`. Body: `{ feedback }`. Re-fires DRAFT with the reviewer feedback threaded into the prompt. |
176
+ | POST | `/api/qa/sessions/<id>/cancel` | Non-terminal → `killed`. Optional `{ reason }`. Does NOT touch the managed-spawn — use `/kill` for that. |
177
+ | POST | `/api/qa/sessions/<id>/kill` | Non-terminal → `killed` AND `removeManagedSpec(<name>)`. Best-effort on the spawn kill (no-op if no spawn). |
178
+ | POST | `/api/qa/sessions/<id>/dismiss` | Non-terminal → `done`. Accept the draft as final; leaves spawn alive. Optional `{ summary }`. |
179
+ | GET | `/api/qa/runners` | List registered runner adapters (built-ins + `qa-runners.d/` plugins). Metadata only — hooks (functions) are stripped. |
180
+ | POST | `/api/qa/runners/reload` | Clear in-process registry, re-register built-ins, re-scan `qa-runners.d/` for plugin edits. Returns the fresh runner list. |
181
+
182
+ The single-session POSTs share `_qaSessionAction` in `dashboard.js`; module
183
+ errors are mapped to HTTP via `_qaSessionsErrorToStatus`:
184
+
185
+ - `'session not found'` → 404
186
+ - `'unsafe sessionId'` / `'invalid spec'` / `'requires …'` (size cap) → 400
187
+ - `'illegal state transition'` / `'requires state …'` / `'requires non-terminal'` → 409
188
+
189
+ ## File locations
190
+
191
+ - **Session state**: `engine/qa-sessions.json` (single file, all projects,
192
+ capped at `QA_SESSIONS_MAX_RECORDS = 500` via `createSession`-time
193
+ rotation by `createdAt`).
194
+ - **Drafted test files**: `engine/qa-tests/<sessionId>/test.<ext>`. Created
195
+ by `createSession` (idempotent `mkdirSync recursive`); written by the
196
+ DRAFT agent. Path is sandboxed by `_isSafeSessionId` (kebab-case ≤ 64
197
+ chars, no `..`, no separators) before every read/write that maps id →
198
+ path. Gitignored.
199
+ - **Managed-spawn**: `engine/managed-processes.json` entry with
200
+ `name='qa-session-<sessionId>'`. Cleaned up by `/kill` or by the
201
+ session's `cancelSession` if the operator opts for the cancel-without-
202
+ spawn-kill path.
203
+ - **EXECUTE artifacts**: same as runbooks — `engine/qa-artifacts/<runId>/`
204
+ served via `GET /api/qa/artifacts/<runId>/<file>`.
205
+
206
+ ## Runner adapters (P-c4a9e7f3 / P-b8e1d4a6)
207
+
208
+ Pluggable test-runner registry at `engine/qa-runners.js`. Built-in
209
+ adapters: `playwright` (priority 100, detects `playwright.config.*`),
210
+ `maestro` (priority 80, detects `.maestro/`). Each adapter exports five
211
+ hooks:
212
+
213
+ - `detect(target, project)` → `boolean`. Auto-detect for blank-runner
214
+ sessions.
215
+ - `generateBrief({target, flowsRaw, capture, project})` → instructions
216
+ string. Handed to the DRAFT agent so it emits a runner-native test file.
217
+ - `executeBrief({sessionId, target, capture, project})` → instructions /
218
+ command for the EXECUTE agent.
219
+ - `validateOutputDir(dir)` → `{ ok, errors[] }`. Gates the DRAFT → EXECUTE
220
+ transition.
221
+ - `installHint` → string shown when `detect()` returns true but the
222
+ runner CLI is missing.
223
+
224
+ Resolution order in `detectRunner(target, project, explicitRunner)`:
225
+ explicit-name (no `detect` call, unknown names return null), then
226
+ priority-desc iteration. Plugin folder: `<MINIONS_DIR>/qa-runners.d/*.js`
227
+ (same trust level as `playbooks/` and `watches.d/`). Hot-reload via
228
+ `POST /api/qa/runners/reload` (clears registry → re-registers built-ins →
229
+ re-scans plugin dir) so plugin edits take effect without an engine
230
+ restart.
231
+
232
+ ## Fast-state slice
233
+
234
+ `/api/status.qaSessions = { total, sig }` — the unsorted summary helper
235
+ `engine/qa-sessions.js#summarizeSessionsForStatus()`. Mirrors `qaRuns` so
236
+ the sidebar activity-dot lights up on any new session or state
237
+ transition within one `/api/status` poll cycle (~4s). Do NOT call
238
+ `listSessions({limit:50})` from this hot path — it sorts O(N log N) on
239
+ every fast-state rebuild.
240
+
241
+ ## Dashboard UI (P-h7e4f9b2)
242
+
243
+ `/qa` page (`dashboard/pages/qa.html`, `dashboard/js/qa.js`) gains a
244
+ **QA Sessions** section above the existing Targets/Runbooks/Runs trio:
245
+
246
+ - **Start QA Session form** — target kind dropdown with conditional
247
+ sub-fields (PR id, branch, commit SHA, worktree path), flows textarea,
248
+ mode toggle (`confirm`|`auto`), capture checkboxes, runner dropdown
249
+ auto-populated from `GET /api/qa/runners`, project input.
250
+ - **Sessions list** — composite cards with phase chips
251
+ (🔧 setup → 📝 draft → ▶ execute → ✅ done). Cards visually flip the
252
+ chip classes (`--done` / `--active` / `--pending` / `--failed` /
253
+ `--killed`) per `session.state`. State-driven left-border color
254
+ (red=failed, green=done, yellow=awaiting-approval, blue=active).
255
+ - **Action buttons** —
256
+ `awaiting-approval` cards show `[Approve & run]` `[Edit]` `[Cancel]`;
257
+ every non-terminal card shows `[Dismiss]` `[Kill spawn]` in the footer;
258
+ terminal cards show no actions.
259
+ - **Polling** — `_startQaSessionsPoll` runs a 3000 ms
260
+ `setInterval(loadQaSessions)`; `_qaAfterSessionsRender` auto-stops the
261
+ poll once every cached session is in
262
+ `QA_SESSION_TERMINAL_STATES = {done, failed, killed}`. Form-submit and
263
+ action handlers each call `_startQaSessionsPoll` to re-activate polling
264
+ when a new non-terminal session appears. Wired into the canonical page
265
+ lifecycle: `loadQaRunners` + `loadQaSessions` + `_startQaSessionsPoll`
266
+ live in `PAGE_LAZY_LOADERS.qa`; `_stopQaSessionsPoll` lives in
267
+ `PAGE_LEAVE_HOOKS`.
268
+
269
+ ## Command Center shortcut (PR12: CC system prompt)
270
+
271
+ The CC system prompt teaches natural-language → `POST /api/qa/session`.
272
+ The user can type "smoke test the home page in PR #2887" and CC builds
273
+ the spec (`target.kind='pr'`, `target.prId='github:yemi33/minions#2887'`,
274
+ `flowsRaw='smoke test the home page'`, default `mode='confirm'`) and
275
+ fires the endpoint. CC must include the
276
+ `X-Minions-Agent: cc-<turn-id>` header so the session's `createdBy`
277
+ field gets the right audit trail.
278
+
279
+ ## When something goes wrong
280
+
281
+ - **SETUP managed-spawn won't validate** → session lands in `failed` with
282
+ `failure_class: 'invalid-managed-spawn'` (from
283
+ `evaluateManagedSpawnAcceptance` in `onAgentClose`) or
284
+ `'managed-spawn-healthcheck-failed'`. The `error` field carries the
285
+ reason; check `engine/managed-logs/qa-session-<sessionId>.log` for the
286
+ failing healthcheck.
287
+ - **DRAFT validateOutputDir rejects** → `failure_class:
288
+ 'qa-session-draft-failed'`. Inspect the drafted file at
289
+ `engine/qa-tests/<sessionId>/`; the runner's `validateOutputDir.errors`
290
+ array tells you what's missing.
291
+ - **EXECUTE qa-run terminal status is `failed`/`errored`** →
292
+ `failure_class: 'qa-session-execute-failed'` /
293
+ `'qa-session-execute-errored'`. The linked `qa-runs` record (joined via
294
+ `session.qaRunId`) carries the agent's `summary` and artifact list.
295
+ - **Want to start over after seeing a bad draft** → POST
296
+ `/api/qa/sessions/<id>/edit` with `{ feedback: "…" }`; do NOT
297
+ `/cancel` + create a new session unless the original spec was wrong
298
+ (the test file from the prior DRAFT round stays on disk under
299
+ `engine/qa-tests/<sessionId>/` as `test.<ext>.bak.<round>` so you can
300
+ diff iterations).
301
+ - **Spawn is wedged but session is still in `executing`** → POST
302
+ `/api/qa/sessions/<id>/kill` (NOT `/cancel` — the latter leaves the
303
+ spawn alive).
package/engine/cleanup.js CHANGED
@@ -1238,7 +1238,10 @@ async function runCleanup(config, verbose = false) {
1238
1238
  cleaned.pendingContextsTrimmed = 0;
1239
1239
  try {
1240
1240
  const cooldownPath = path.join(ENGINE_DIR, 'cooldowns.json');
1241
- const cooldowns = safeJson(cooldownPath);
1241
+ // safeJsonNoRestore same rationale as engine/cooldown.js loadCooldowns:
1242
+ // resurrecting a stale .backup at cleanup time could re-introduce expired
1243
+ // entries the active code already pruned (P-bfa1d-safejson-no-restore).
1244
+ const cooldowns = safeJsonNoRestore(cooldownPath);
1242
1245
  if (cooldowns && typeof cooldowns === 'object') {
1243
1246
  let dirty = false;
1244
1247
  // Trim oversized pendingContexts arrays (one-time migration + ongoing cap)
@@ -94,8 +94,15 @@ function isPreviewStatusBody(body) {
94
94
  // gh-comment.buildMinionsCommentBody produces (marker, \n\n, body) and
95
95
  // prevents quoted/blockquoted markers in human replies from triggering the
96
96
  // classifier.
97
+ //
98
+ // P-bfa-s1-bom-marker (settlement): strip an optional leading BOM (\uFEFF)
99
+ // followed by any ASCII/Unicode whitespace before applying the regex.
100
+ // Copy-paste flows from Word, VSCode-with-BOM, and editors that auto-insert
101
+ // leading spaces would otherwise be misclassified as un-marked. The
102
+ // blockquote rejection (lines beginning with `> <!--`) is preserved — `>`
103
+ // is not whitespace and is not stripped, so quoted markers remain rejected.
97
104
  function hasMinionsMarker(body) {
98
- const text = String(body || '');
105
+ const text = String(body || '').replace(/^\uFEFF?\s*/, '');
99
106
  if (!text) return false;
100
107
  const m = MINIONS_COMMENT_MARKER_RE.exec(text);
101
108
  if (!m) return false;
@@ -7,7 +7,7 @@ const path = require('path');
7
7
  const shared = require('./shared');
8
8
  const queries = require('./queries');
9
9
 
10
- const { safeJson, mutateCooldowns, log, ENGINE_DEFAULTS } = shared;
10
+ const { safeJson, safeJsonNoRestore, mutateCooldowns, log, ENGINE_DEFAULTS } = shared;
11
11
  const { ENGINE_DIR } = queries;
12
12
 
13
13
  /**
@@ -40,7 +40,11 @@ const dispatchCooldowns = new Map(); // key → { timestamp, failures }
40
40
  let _lastDiskCooldownKeys = new Set();
41
41
 
42
42
  function loadCooldowns() {
43
- const saved = safeJson(COOLDOWN_PATH);
43
+ // safeJsonNoRestore cooldowns are time-bounded ephemeral state (24h TTL).
44
+ // Restoring a stale `cooldowns.json.backup` could resurrect expired entries
45
+ // that should already have been pruned, suppressing legitimate dispatches
46
+ // (P-bfa1d-safejson-no-restore). Missing/corrupt primary == "no cooldowns".
47
+ const saved = safeJsonNoRestore(COOLDOWN_PATH);
44
48
  if (!saved) return;
45
49
  const now = Date.now();
46
50
  for (const [k, v] of Object.entries(saved)) {
@@ -25,12 +25,28 @@
25
25
  * `gh` invocation: argv form with `--body-file <tmp>` (NOT `--body <inline>`).
26
26
  * Avoids platform-specific shell-quoting bugs for bodies that contain quotes,
27
27
  * backticks, or `$(…)`. Temp files are cleaned up in `finally`.
28
+ *
29
+ * Per-slug PAT routing (P-bfa2a): each public function resolves a per-slug
30
+ * GitHub token via `engine/gh-token.js#resolveTokenForSlug(repo)` and threads
31
+ * it into the spawned `gh` process via `env.GH_TOKEN`. When the slug has no
32
+ * mapping in `engine.ghAccounts`, we fall back to the ambient `gh` identity
33
+ * (inherited via `process.env`) and emit a one-shot `console.warn` per slug
34
+ * so the dual-account audit can spot stragglers. The pinned "Never run
35
+ * `gh auth switch`" policy means we MUST resolve per-call rather than
36
+ * piggy-backing on the global active account.
28
37
  */
29
38
 
30
39
  const fs = require('fs');
31
40
  const path = require('path');
32
41
  const os = require('os');
33
42
  const { execFileSync: _execFileSync } = require('child_process');
43
+ const { resolveTokenForSlug: _defaultResolveTokenForSlug } = require('./gh-token');
44
+
45
+ // Module-level dedupe set so the "no token mapping for <slug>" warning fires
46
+ // at most once per slug per process — keeps the audit signal visible without
47
+ // flooding the log on every comment post. Exported via `_clearTokenWarnings`
48
+ // for tests.
49
+ const _warnedUnresolvedSlugs = new Set();
34
50
 
35
51
  // ── Validation ───────────────────────────────────────────────────────────────
36
52
 
@@ -134,12 +150,56 @@ function _writeTempBodyFile(content) {
134
150
  return file;
135
151
  }
136
152
 
137
- function _runGh(execFileSync, args, timeoutMs) {
138
- return execFileSync('gh', args, {
153
+ function _runGh(execFileSync, args, timeoutMs, env) {
154
+ const opts = {
139
155
  encoding: 'utf8',
140
156
  timeout: timeoutMs,
141
157
  windowsHide: true,
142
- });
158
+ };
159
+ // Only set `env` when we have a token override to thread; leaving the option
160
+ // unset preserves the existing `process.env` inheritance for back-compat with
161
+ // unmapped slugs and tests that don't stub the resolver.
162
+ if (env) opts.env = env;
163
+ return execFileSync('gh', args, opts);
164
+ }
165
+
166
+ /**
167
+ * Resolve the per-slug PAT for `repo` and return an env override suitable for
168
+ * passing to `_runGh`. Returns `undefined` when no mapping exists (caller
169
+ * falls back to the ambient `gh` identity inherited from `process.env`).
170
+ *
171
+ * Emits a one-shot `console.warn` per (slug, reason) so the dual-account audit
172
+ * can spot stragglers without flooding the log on every comment post. Resolver
173
+ * exceptions are swallowed — comment posting must never fail because token
174
+ * resolution did. The pinned "Never run `gh auth switch`" policy survives even
175
+ * when the mapping is missing: we just inherit the ambient identity instead of
176
+ * forcibly mutating it.
177
+ */
178
+ function _resolveTokenEnvForRepo(repo, resolveTokenForSlugFn) {
179
+ const resolve = resolveTokenForSlugFn || _defaultResolveTokenForSlug;
180
+ let token = null;
181
+ try {
182
+ token = resolve(repo);
183
+ } catch (e) {
184
+ const key = `error:${repo}`;
185
+ if (!_warnedUnresolvedSlugs.has(key)) {
186
+ _warnedUnresolvedSlugs.add(key);
187
+ console.warn(
188
+ `gh-comment: resolveTokenForSlug threw for "${repo}" (${e?.message || e}) — falling back to ambient gh identity`,
189
+ );
190
+ }
191
+ return undefined;
192
+ }
193
+ if (!token) {
194
+ if (!_warnedUnresolvedSlugs.has(repo)) {
195
+ _warnedUnresolvedSlugs.add(repo);
196
+ console.warn(
197
+ `gh-comment: no token mapping for repo "${repo}" — falling back to ambient gh identity (audit straggler)`,
198
+ );
199
+ }
200
+ return undefined;
201
+ }
202
+ return { ...process.env, GH_TOKEN: token };
143
203
  }
144
204
 
145
205
  function postPrComment({
@@ -151,16 +211,19 @@ function postPrComment({
151
211
  workItemId,
152
212
  timeoutMs = 30000,
153
213
  execFileSync = _execFileSync,
214
+ resolveTokenForSlug,
154
215
  } = {}) {
155
216
  _validateRepo(repo);
156
217
  _validatePrNumber(prNumber);
157
218
  const finalBody = buildMinionsCommentBody({ agentId, kind, workItemId, body });
158
219
  const file = _writeTempBodyFile(finalBody);
220
+ const env = _resolveTokenEnvForRepo(repo, resolveTokenForSlug);
159
221
  try {
160
222
  const output = _runGh(
161
223
  execFileSync,
162
224
  ['pr', 'comment', String(prNumber), '--repo', repo, '--body-file', file],
163
225
  timeoutMs,
226
+ env,
164
227
  );
165
228
  return { output: String(output || '').trim(), bodyFile: file };
166
229
  } finally {
@@ -177,16 +240,19 @@ function postPrReviewComment({
177
240
  workItemId,
178
241
  timeoutMs = 30000,
179
242
  execFileSync = _execFileSync,
243
+ resolveTokenForSlug,
180
244
  } = {}) {
181
245
  _validateRepo(repo);
182
246
  _validatePrNumber(prNumber);
183
247
  const finalBody = buildMinionsCommentBody({ agentId, kind, workItemId, body });
184
248
  const file = _writeTempBodyFile(finalBody);
249
+ const env = _resolveTokenEnvForRepo(repo, resolveTokenForSlug);
185
250
  try {
186
251
  const output = _runGh(
187
252
  execFileSync,
188
253
  ['pr', 'review', String(prNumber), '--comment', '--repo', repo, '--body-file', file],
189
254
  timeoutMs,
255
+ env,
190
256
  );
191
257
  return { output: String(output || '').trim(), bodyFile: file };
192
258
  } finally {
@@ -210,6 +276,7 @@ function postPrReview({
210
276
  workItemId,
211
277
  timeoutMs = 30000,
212
278
  execFileSync = _execFileSync,
279
+ resolveTokenForSlug,
213
280
  } = {}) {
214
281
  const flag = _REVIEW_EVENT_FLAGS[event];
215
282
  if (!flag) {
@@ -221,11 +288,13 @@ function postPrReview({
221
288
  _validatePrNumber(prNumber);
222
289
  const finalBody = buildMinionsCommentBody({ agentId, kind, workItemId, body });
223
290
  const file = _writeTempBodyFile(finalBody);
291
+ const env = _resolveTokenEnvForRepo(repo, resolveTokenForSlug);
224
292
  try {
225
293
  const output = _runGh(
226
294
  execFileSync,
227
295
  ['pr', 'review', String(prNumber), flag, '--repo', repo, '--body-file', file],
228
296
  timeoutMs,
297
+ env,
229
298
  );
230
299
  return { output: String(output || '').trim(), bodyFile: file };
231
300
  } finally {
@@ -250,4 +319,6 @@ module.exports = {
250
319
  // Internal helpers exported for tests / advanced callers
251
320
  _buildMarker,
252
321
  _writeTempBodyFile,
322
+ _resolveTokenEnvForRepo,
323
+ _clearTokenWarnings: () => _warnedUnresolvedSlugs.clear(),
253
324
  };
@@ -18,7 +18,7 @@
18
18
  * via `_setTokenForTest(slug, token)` and clear it via `_clearTokenCache()`.
19
19
  */
20
20
 
21
- const { execSync } = require('child_process');
21
+ const { execFileSync } = require('child_process');
22
22
  const path = require('path');
23
23
  const shared = require('./shared');
24
24
  const { safeJson, MINIONS_DIR, log } = shared;
@@ -70,14 +70,12 @@ function _fetchTokenForAccount(account, opts = {}) {
70
70
  const cached = _accountTokens.get(account);
71
71
  if (cached && cached.expiresAt > Date.now()) return cached.token;
72
72
 
73
- const run = opts.execSync || execSync;
73
+ const run = opts.execFileSync || execFileSync;
74
74
  try {
75
- // Argv form via `gh` is safer than constructing a shell string when account
76
- // names ever include odd chars; using execSync's command form here for
77
- // consistency with ado-token.js, but the account name flows from a config
78
- // map under our control (validated at write time).
79
- const cmd = `gh auth token --user ${account} --hostname github.com`;
80
- const out = run(cmd, {
75
+ // Argv-array form: `account` is passed as a literal argument and never
76
+ // interpreted by a shell, so shell metacharacters in the configured
77
+ // account name (e.g. `;`, backticks, `$()`) cannot be executed.
78
+ const out = run('gh', ['auth', 'token', '--user', account, '--hostname', 'github.com'], {
81
79
  timeout: FETCH_TIMEOUT_MS,
82
80
  encoding: 'utf8',
83
81
  windowsHide: true,
@@ -102,7 +100,7 @@ function _fetchTokenForAccount(account, opts = {}) {
102
100
  * caller should fall back to the ambient `gh` identity.
103
101
  *
104
102
  * Test seam: `_setTokenForTest(slug, token)` short-circuits the entire chain
105
- * so unit tests do not have to mock execSync nor stand up a config file.
103
+ * so unit tests do not have to mock execFileSync nor stand up a config file.
106
104
  */
107
105
  function resolveTokenForSlug(slug, opts = {}) {
108
106
  if (slug && _slugTokenOverrides.has(slug)) return _slugTokenOverrides.get(slug);
@@ -4393,6 +4393,106 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
4393
4393
  }
4394
4394
  }
4395
4395
 
4396
+ // P-a3f7c1b2 — QA Session lifecycle hook. The SETUP / DRAFT / EXECUTE work
4397
+ // items carry `meta.sessionId` and `meta.sessionPhase` (set by the
4398
+ // qa-sessions chain helpers — see engine/qa-sessions.js). On each agent
4399
+ // exit we delegate to the matching handler, which validates the state
4400
+ // transition and queues the next phase WI when applicable. The qa-validate
4401
+ // qaRunId hook above (line 4340) already handles the qa-runs sidecar for
4402
+ // the EXECUTE phase; we just need to map the qa-runs terminal status onto
4403
+ // the session state.
4404
+ //
4405
+ // The same dispatchItem.meta vs dispatchItem.meta.item.meta dual-location
4406
+ // problem the qaRunId block documents applies here — central-spawned
4407
+ // sessions land their sidecar at the top level, project-routed ones nest
4408
+ // it under `meta.item.meta`. Accept both.
4409
+ const qaSessionId = meta?.sessionId || meta?.item?.meta?.sessionId;
4410
+ const qaSessionPhase = meta?.sessionPhase || meta?.item?.meta?.sessionPhase;
4411
+ if (qaSessionId && qaSessionPhase) {
4412
+ try {
4413
+ const qaSessions = require('./qa-sessions');
4414
+ const wiPath = resolveWorkItemPath(meta);
4415
+ const project = meta?.project?.name || meta?.item?.project || null;
4416
+ const failureReason = typeof resultSummary === 'string' && resultSummary
4417
+ ? resultSummary
4418
+ : '';
4419
+ if (qaSessionPhase === 'setup') {
4420
+ qaSessions.handleSetupComplete(qaSessionId, {
4421
+ success: !!effectiveSuccess,
4422
+ wiPath,
4423
+ project,
4424
+ failureClass: (structuredCompletion && structuredCompletion.failure_class) || null,
4425
+ reason: failureReason,
4426
+ });
4427
+ } else if (qaSessionPhase === 'draft') {
4428
+ // The DRAFT agent reports the relative test file path in its
4429
+ // structured completion as `testFile` (or the playbook can write a
4430
+ // qa-session-draft-result.json sidecar in a future iteration). For
4431
+ // now read it from structuredCompletion when present; the session
4432
+ // record still works without it (EXECUTE prompt falls back to the
4433
+ // generic test.<ext> hint).
4434
+ const testFile = structuredCompletion && typeof structuredCompletion.testFile === 'string'
4435
+ ? structuredCompletion.testFile
4436
+ : null;
4437
+ const session = qaSessions.getSession(qaSessionId);
4438
+ if (session && session.spec && session.spec.mode === 'auto' && effectiveSuccess) {
4439
+ // Auto-mode chains EXECUTE; we need a qa-runs record up-front. The
4440
+ // dashboard endpoint that created the session can't know in
4441
+ // advance whether EXECUTE will fire (mode could be flipped), so
4442
+ // the qa-runs record is created here on the auto path.
4443
+ let qaRunId = null;
4444
+ try {
4445
+ const qaRuns = require('./qa-runs');
4446
+ const run = qaRuns.createRun({
4447
+ runbookId: 'qa-session-' + qaSessionId,
4448
+ targetName: session.managedSpawnName,
4449
+ project: project || session.spec.project || null,
4450
+ });
4451
+ qaRunId = run.id;
4452
+ } catch (createErr) {
4453
+ log('warn', `qa-session auto-mode createRun failed for ${qaSessionId}: ${createErr.message}`);
4454
+ }
4455
+ qaSessions.handleDraftComplete(qaSessionId, {
4456
+ success: true,
4457
+ testFile,
4458
+ wiPath,
4459
+ project,
4460
+ qaRunId,
4461
+ reason: failureReason,
4462
+ });
4463
+ } else {
4464
+ qaSessions.handleDraftComplete(qaSessionId, {
4465
+ success: !!effectiveSuccess,
4466
+ testFile,
4467
+ reason: failureReason,
4468
+ });
4469
+ }
4470
+ } else if (qaSessionPhase === 'execute') {
4471
+ // The qaRunId block above (line 4340) already wrote the terminal
4472
+ // qa-runs record. Re-read it to drive the session state.
4473
+ let qaRunStatus = null;
4474
+ const linkedQaRunId = meta?.qaRunId || meta?.item?.meta?.qaRunId;
4475
+ if (linkedQaRunId) {
4476
+ try {
4477
+ const qaRuns = require('./qa-runs');
4478
+ const run = qaRuns.getRun(linkedQaRunId);
4479
+ if (run && run.status) qaRunStatus = run.status;
4480
+ } catch (readErr) {
4481
+ log('warn', `qa-session execute qa-run lookup failed for ${qaSessionId}: ${readErr.message}`);
4482
+ }
4483
+ }
4484
+ qaSessions.handleExecuteComplete(qaSessionId, {
4485
+ success: !!effectiveSuccess,
4486
+ qaRunStatus,
4487
+ summary: resultSummary || null,
4488
+ reason: failureReason,
4489
+ });
4490
+ }
4491
+ } catch (err) {
4492
+ log('warn', `qa-session completion hook for ${qaSessionId} (${qaSessionPhase}): ${err.message}`);
4493
+ }
4494
+ }
4495
+
4396
4496
  // Plan chaining removed — user must explicitly execute plan-to-prd after reviewing the plan
4397
4497
  if (effectiveSuccess && meta?.item?.sourcePlan) checkPlanCompletion(meta, config);
4398
4498
 
@@ -76,7 +76,9 @@ function getPipelineRuns() {
76
76
  function getActiveRun(pipelineId) {
77
77
  const runs = getPipelineRuns();
78
78
  const pipelineRuns = runs[pipelineId] || [];
79
- return pipelineRuns.find(r => r.status === PIPELINE_STATUS.RUNNING || r.status === PIPELINE_STATUS.PAUSED);
79
+ return pipelineRuns.find(r => r.status === PIPELINE_STATUS.RUNNING
80
+ || r.status === PIPELINE_STATUS.PAUSED
81
+ || r.status === PIPELINE_STATUS.WAITING_HUMAN);
80
82
  }
81
83
 
82
84
  function startRun(pipelineId, pipeline) {
@@ -110,6 +112,12 @@ function startRun(pipelineId, pipeline) {
110
112
  }
111
113
 
112
114
  function updateRunStage(pipelineId, runId, stageId, updates) {
115
+ if (updates && Object.prototype.hasOwnProperty.call(updates, 'status')) {
116
+ const validStatuses = Object.values(PIPELINE_STATUS);
117
+ if (!validStatuses.includes(updates.status)) {
118
+ throw new Error(`updateRunStage: invalid status '${updates.status}' (expected one of: ${validStatuses.join('|')})`);
119
+ }
120
+ }
113
121
  mutateJsonFileLocked(PIPELINE_RUNS_PATH, (data) => {
114
122
  const runs = data[pipelineId] || [];
115
123
  const run = runs.find(r => r.runId === runId);