npm - @yemi33/minions - Versions diffs - 0.1.2070 → 0.1.2072 - Mend

@yemi33/minions 0.1.2070 → 0.1.2072

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dashboard/js/qa.js +358 -0
package/dashboard/js/state.js +2 -1
package/dashboard/pages/qa.html +72 -0
package/dashboard/styles.css +102 -0
package/dashboard.js +410 -6
package/docs/qa-runbook-lifecycle.md +232 -0
package/engine/cleanup.js +4 -1
package/engine/comment-classifier.js +8 -1
package/engine/cooldown.js +6 -2
package/engine/gh-comment.js +74 -3
package/engine/gh-token.js +7 -9
package/engine/lifecycle.js +100 -0
package/engine/pipeline.js +9 -1
package/engine/playbook.js +39 -0
package/engine/qa-runners/maestro.js +152 -0
package/engine/qa-runners/playwright.js +149 -0
package/engine/qa-runners.js +323 -0
package/engine/qa-sessions.js +1008 -0
package/engine/shared.js +71 -12
package/engine.js +140 -0
package/package.json +1 -1
package/playbooks/qa-session-draft.md +158 -0
package/playbooks/qa-session-execute.md +165 -0
package/playbooks/qa-session-setup.md +154 -0
package/prompts/cc-system.md +43 -0
package/routing.md +3 -0

package/docs/qa-runbook-lifecycle.md CHANGED Viewed

@@ -69,3 +69,235 @@ receives `target`, `steps`, `expectedArtifacts` as template vars; required
 to write the sidecar above before exit. Routing line in `routing.md` maps
 the synthetic `qa-validate` task-type to the playbook so manual dispatches
 work too.
+# QA Sessions (PL-qa-sessions)
+Natural-language QA flows: the user describes what they want tested in
+plain prose, the engine sets up a live target, an agent drafts a
+runner-native test file, and (with user approval) a second agent executes
+it. Sessions are a thin orchestration layer on top of the same
+`managed-spawn` + `qa-run-result.json` infrastructure that powers
+runbooks above — they reuse `qa-runs.json`, `engine/qa-artifacts/`, and
+the existing `engine/lifecycle.js#runPostCompletionHooks` qa-run sidecar
+hook. Surfaced on `/qa` (sessions card list above the runbooks/runs
+tables) and proxied by the Command Center natural-language shortcut.
+## Use case (vs. runbooks)
+- **Runbook**: a stable, named, reusable test plan stored as JSON. Same
+  steps run repeatedly against the same target. Edit once, dispatch
+  many times.
+- **Session**: an ad-hoc, single-shot, natural-language QA prompt. The
+  user describes intent ("smoke the homepage after my PR"), engine
+  resolves the target + spawns it, an agent drafts a *new* runner-native
+  test file just for this run. The drafted file lives at
+  `engine/qa-tests/<sessionId>/test.<ext>` and is the audit artifact; the
+  session record is the orchestration breadcrumb.
+If the same flow is going to be re-run repeatedly, promote the drafted
+test file into a saved runbook. Sessions are *not* meant to replace
+runbooks for repeat traffic.
+## State machine
+Eight states, source of truth `engine/qa-sessions.js#QA_SESSION_STATE`:
+```
+pending ──▶ spawning ──▶ drafting ──▶ awaiting-approval ──▶ executing ──▶ done
+   │            │             │                │                │           ╲
+   ╰────────────┴─────────────┴────────────────┴────────────────┴───────────▶ failed
+   ╰────────────┴─────────────┴────────────────┴────────────────┴───────────▶ killed
+```
+Forward transitions only; `done` / `failed` / `killed` are terminal (zero
+outgoing edges). Two non-linear edges:
+- `awaiting-approval → drafting` — `POST /api/qa/sessions/<id>/edit` re-fires
+  DRAFT with the user's natural-language feedback as steering.
+- `drafting → executing` — `auto` mode skips approval and chains EXECUTE
+  directly from DRAFT-done.
+Cancellation paths (`pending → killed`, `spawning → killed`, etc.) cover
+the `POST /api/qa/sessions/<id>/cancel` and `/kill` endpoints. Illegal
+transitions throw; `dashboard.js` translates the thrown error to HTTP 409
+via `_qaSessionsErrorToStatus` (see endpoint table).
+## Three-phase work-item chain
+A session orchestrates three sequential work items, each rendered with a
+phase-specific playbook (`meta.playbook` override beats the work-type
+routing default):
+1. **SETUP** (`playbooks/qa-session-setup.md`, `meta.managed_spawn: true`)
+   resolves `target.kind ∈ {pr, branch, current, commit}` to a worktree,
+   inspects the codebase for the dev-up command, and writes
+   `agents/<agentId>/managed-spawn.json` with
+   `name=qa-session-<sessionId>`. The engine spawn-validates the sidecar
+   then awaits `waitForFirstHealth` per the existing managed-spawn
+   lifecycle (see `docs/managed-spawn.md`). Healthcheck-passing transitions
+   the session `spawning → drafting`. First-health failure → `failed` with
+   `failure_class: qa-session-setup-failed`.
+2. **DRAFT** (`playbooks/qa-session-draft.md`) reads the live spawn
+   metadata via `/api/managed-processes/by-name/qa-session-<id>`, calls the
+   resolved runner's `generateBrief({target, flowsRaw, capture})` hook, and
+   writes the runner-native test file to
+   `engine/qa-tests/<sessionId>/test.<ext>`. The runner's
+   `validateOutputDir` hook gates DRAFT success — a missing or malformed
+   test file lands the session in `failed`. In `confirm` mode the session
+   parks at `awaiting-approval`; in `auto` mode it auto-chains to EXECUTE.
+3. **EXECUTE** (`playbooks/qa-session-execute.md`) runs the drafted test
+   against the live spawn via the runner's `executeBrief` hook, captures
+   artifacts per `spec.capture: { video, screenshots, logs }`, and writes
+   `agents/<agentId>/qa-run-result.json` — the same sidecar the
+   `qa-validate` runbook flow uses. The existing
+   `engine/lifecycle.js#runPostCompletionHooks` `meta.qaRunId` hook ingests
+   the sidecar and marks the linked `qa-runs` record terminal; the
+   session-level `handleExecuteComplete` then reads the `qa-run` terminal
+   status and transitions `executing → done` (or `failed`).
+Each phase WI carries `meta.sessionId`, `meta.sessionPhase`, `meta.qaSession`
+(target + flowsRaw + mode + capture + runner), and `meta.playbook`. The
+EXECUTE WI additionally carries `meta.qaRunId` so the existing qa-run
+lifecycle hook fires.
+## Endpoints
+Documented in `dashboard.js`; routes are visible at `GET /api/routes`.
+| Method | Path                                       | Behavior                                                                                                                  |
+|--------|--------------------------------------------|---------------------------------------------------------------------------------------------------------------------------|
+| POST   | `/api/qa/session`                          | Create session; validates spec, calls `createSession` + `queueSetup` (`pending → spawning`). Returns `sessionId`, `setupWorkItemId`, `managedSpawnName`. |
+| GET    | `/api/qa/sessions`                         | List sessions newest-first. Optional `?limit=N` and `?state=pending\|spawning\|drafting\|awaiting-approval\|executing\|done\|failed\|killed`. |
+| GET    | `/api/qa/sessions/<id>`                    | Fetch a single session record by id.                                                                                      |
+| POST   | `/api/qa/sessions/<id>/approve`            | `awaiting-approval → executing`. Server-side creates the linked `qa-runs` record (synthetic `runbookId='qa-session-<id>'`), queues EXECUTE WI, stamps `qaRunId` on the session. |
+| POST   | `/api/qa/sessions/<id>/edit`               | `awaiting-approval → drafting`. Body: `{ feedback }`. Re-fires DRAFT with the reviewer feedback threaded into the prompt. |
+| POST   | `/api/qa/sessions/<id>/cancel`             | Non-terminal → `killed`. Optional `{ reason }`. Does NOT touch the managed-spawn — use `/kill` for that.                  |
+| POST   | `/api/qa/sessions/<id>/kill`               | Non-terminal → `killed` AND `removeManagedSpec(<name>)`. Best-effort on the spawn kill (no-op if no spawn).                |
+| POST   | `/api/qa/sessions/<id>/dismiss`            | Non-terminal → `done`. Accept the draft as final; leaves spawn alive. Optional `{ summary }`.                              |
+| GET    | `/api/qa/runners`                          | List registered runner adapters (built-ins + `qa-runners.d/` plugins). Metadata only — hooks (functions) are stripped.    |
+| POST   | `/api/qa/runners/reload`                   | Clear in-process registry, re-register built-ins, re-scan `qa-runners.d/` for plugin edits. Returns the fresh runner list. |
+The single-session POSTs share `_qaSessionAction` in `dashboard.js`; module
+errors are mapped to HTTP via `_qaSessionsErrorToStatus`:
+- `'session not found'` → 404
+- `'unsafe sessionId'` / `'invalid spec'` / `'requires …'` (size cap) → 400
+- `'illegal state transition'` / `'requires state …'` / `'requires non-terminal'` → 409
+## File locations
+- **Session state**: `engine/qa-sessions.json` (single file, all projects,
+  capped at `QA_SESSIONS_MAX_RECORDS = 500` via `createSession`-time
+  rotation by `createdAt`).
+- **Drafted test files**: `engine/qa-tests/<sessionId>/test.<ext>`. Created
+  by `createSession` (idempotent `mkdirSync recursive`); written by the
+  DRAFT agent. Path is sandboxed by `_isSafeSessionId` (kebab-case ≤ 64
+  chars, no `..`, no separators) before every read/write that maps id →
+  path. Gitignored.
+- **Managed-spawn**: `engine/managed-processes.json` entry with
+  `name='qa-session-<sessionId>'`. Cleaned up by `/kill` or by the
+  session's `cancelSession` if the operator opts for the cancel-without-
+  spawn-kill path.
+- **EXECUTE artifacts**: same as runbooks — `engine/qa-artifacts/<runId>/`
+  served via `GET /api/qa/artifacts/<runId>/<file>`.
+## Runner adapters (P-c4a9e7f3 / P-b8e1d4a6)
+Pluggable test-runner registry at `engine/qa-runners.js`. Built-in
+adapters: `playwright` (priority 100, detects `playwright.config.*`),
+`maestro` (priority 80, detects `.maestro/`). Each adapter exports five
+hooks:
+- `detect(target, project)` → `boolean`. Auto-detect for blank-runner
+  sessions.
+- `generateBrief({target, flowsRaw, capture, project})` → instructions
+  string. Handed to the DRAFT agent so it emits a runner-native test file.
+- `executeBrief({sessionId, target, capture, project})` → instructions /
+  command for the EXECUTE agent.
+- `validateOutputDir(dir)` → `{ ok, errors[] }`. Gates the DRAFT → EXECUTE
+  transition.
+- `installHint` → string shown when `detect()` returns true but the
+  runner CLI is missing.
+Resolution order in `detectRunner(target, project, explicitRunner)`:
+explicit-name (no `detect` call, unknown names return null), then
+priority-desc iteration. Plugin folder: `<MINIONS_DIR>/qa-runners.d/*.js`
+(same trust level as `playbooks/` and `watches.d/`). Hot-reload via
+`POST /api/qa/runners/reload` (clears registry → re-registers built-ins →
+re-scans plugin dir) so plugin edits take effect without an engine
+restart.
+## Fast-state slice
+`/api/status.qaSessions = { total, sig }` — the unsorted summary helper
+`engine/qa-sessions.js#summarizeSessionsForStatus()`. Mirrors `qaRuns` so
+the sidebar activity-dot lights up on any new session or state
+transition within one `/api/status` poll cycle (~4s). Do NOT call
+`listSessions({limit:50})` from this hot path — it sorts O(N log N) on
+every fast-state rebuild.
+## Dashboard UI (P-h7e4f9b2)
+`/qa` page (`dashboard/pages/qa.html`, `dashboard/js/qa.js`) gains a
+**QA Sessions** section above the existing Targets/Runbooks/Runs trio:
+- **Start QA Session form** — target kind dropdown with conditional
+  sub-fields (PR id, branch, commit SHA, worktree path), flows textarea,
+  mode toggle (`confirm`|`auto`), capture checkboxes, runner dropdown
+  auto-populated from `GET /api/qa/runners`, project input.
+- **Sessions list** — composite cards with phase chips
+  (🔧 setup → 📝 draft → ▶ execute → ✅ done). Cards visually flip the
+  chip classes (`--done` / `--active` / `--pending` / `--failed` /
+  `--killed`) per `session.state`. State-driven left-border color
+  (red=failed, green=done, yellow=awaiting-approval, blue=active).
+- **Action buttons** —
+  `awaiting-approval` cards show `[Approve & run]` `[Edit]` `[Cancel]`;
+  every non-terminal card shows `[Dismiss]` `[Kill spawn]` in the footer;
+  terminal cards show no actions.
+- **Polling** — `_startQaSessionsPoll` runs a 3000 ms
+  `setInterval(loadQaSessions)`; `_qaAfterSessionsRender` auto-stops the
+  poll once every cached session is in
+  `QA_SESSION_TERMINAL_STATES = {done, failed, killed}`. Form-submit and
+  action handlers each call `_startQaSessionsPoll` to re-activate polling
+  when a new non-terminal session appears. Wired into the canonical page
+  lifecycle: `loadQaRunners` + `loadQaSessions` + `_startQaSessionsPoll`
+  live in `PAGE_LAZY_LOADERS.qa`; `_stopQaSessionsPoll` lives in
+  `PAGE_LEAVE_HOOKS`.
+## Command Center shortcut (PR12: CC system prompt)
+The CC system prompt teaches natural-language → `POST /api/qa/session`.
+The user can type "smoke test the home page in PR #2887" and CC builds
+the spec (`target.kind='pr'`, `target.prId='github:yemi33/minions#2887'`,
+`flowsRaw='smoke test the home page'`, default `mode='confirm'`) and
+fires the endpoint. CC must include the
+`X-Minions-Agent: cc-<turn-id>` header so the session's `createdBy`
+field gets the right audit trail.
+## When something goes wrong
+- **SETUP managed-spawn won't validate** → session lands in `failed` with
+  `failure_class: 'invalid-managed-spawn'` (from
+  `evaluateManagedSpawnAcceptance` in `onAgentClose`) or
+  `'managed-spawn-healthcheck-failed'`. The `error` field carries the
+  reason; check `engine/managed-logs/qa-session-<sessionId>.log` for the
+  failing healthcheck.
+- **DRAFT validateOutputDir rejects** → `failure_class:
+  'qa-session-draft-failed'`. Inspect the drafted file at
+  `engine/qa-tests/<sessionId>/`; the runner's `validateOutputDir.errors`
+  array tells you what's missing.
+- **EXECUTE qa-run terminal status is `failed`/`errored`** →
+  `failure_class: 'qa-session-execute-failed'` /
+  `'qa-session-execute-errored'`. The linked `qa-runs` record (joined via
+  `session.qaRunId`) carries the agent's `summary` and artifact list.
+- **Want to start over after seeing a bad draft** → POST
+  `/api/qa/sessions/<id>/edit` with `{ feedback: "…" }`; do NOT
+  `/cancel` + create a new session unless the original spec was wrong
+  (the test file from the prior DRAFT round stays on disk under
+  `engine/qa-tests/<sessionId>/` as `test.<ext>.bak.<round>` so you can
+  diff iterations).
+- **Spawn is wedged but session is still in `executing`** → POST
+  `/api/qa/sessions/<id>/kill` (NOT `/cancel` — the latter leaves the
+  spawn alive).

package/engine/cleanup.js CHANGED Viewed

@@ -1238,7 +1238,10 @@ async function runCleanup(config, verbose = false) {
   cleaned.pendingContextsTrimmed = 0;
   try {
     const cooldownPath = path.join(ENGINE_DIR, 'cooldowns.json');
-    const cooldowns = safeJson(cooldownPath);
+    // safeJsonNoRestore — same rationale as engine/cooldown.js loadCooldowns:
+    // resurrecting a stale .backup at cleanup time could re-introduce expired
+    // entries the active code already pruned (P-bfa1d-safejson-no-restore).
+    const cooldowns = safeJsonNoRestore(cooldownPath);
     if (cooldowns && typeof cooldowns === 'object') {
       let dirty = false;
       // Trim oversized pendingContexts arrays (one-time migration + ongoing cap)

package/engine/comment-classifier.js CHANGED Viewed

@@ -94,8 +94,15 @@ function isPreviewStatusBody(body) {
 // gh-comment.buildMinionsCommentBody produces (marker, \n\n, body) and
 // prevents quoted/blockquoted markers in human replies from triggering the
 // classifier.
+//
+// P-bfa-s1-bom-marker (settlement): strip an optional leading BOM (\uFEFF)
+// followed by any ASCII/Unicode whitespace before applying the regex.
+// Copy-paste flows from Word, VSCode-with-BOM, and editors that auto-insert
+// leading spaces would otherwise be misclassified as un-marked. The
+// blockquote rejection (lines beginning with `> <!--`) is preserved — `>`
+// is not whitespace and is not stripped, so quoted markers remain rejected.
 function hasMinionsMarker(body) {
-  const text = String(body || '');
+  const text = String(body || '').replace(/^\uFEFF?\s*/, '');
   if (!text) return false;
   const m = MINIONS_COMMENT_MARKER_RE.exec(text);
   if (!m) return false;

package/engine/cooldown.js CHANGED Viewed

@@ -7,7 +7,7 @@ const path = require('path');
 const shared = require('./shared');
 const queries = require('./queries');
-const { safeJson, mutateCooldowns, log, ENGINE_DEFAULTS } = shared;
+const { safeJson, safeJsonNoRestore, mutateCooldowns, log, ENGINE_DEFAULTS } = shared;
 const { ENGINE_DIR } = queries;
 /**
@@ -40,7 +40,11 @@ const dispatchCooldowns = new Map(); // key → { timestamp, failures }
 let _lastDiskCooldownKeys = new Set();
 function loadCooldowns() {
-  const saved = safeJson(COOLDOWN_PATH);
+  // safeJsonNoRestore — cooldowns are time-bounded ephemeral state (24h TTL).
+  // Restoring a stale `cooldowns.json.backup` could resurrect expired entries
+  // that should already have been pruned, suppressing legitimate dispatches
+  // (P-bfa1d-safejson-no-restore). Missing/corrupt primary == "no cooldowns".
+  const saved = safeJsonNoRestore(COOLDOWN_PATH);
   if (!saved) return;
   const now = Date.now();
   for (const [k, v] of Object.entries(saved)) {

package/engine/gh-comment.js CHANGED Viewed

@@ -25,12 +25,28 @@
  * `gh` invocation: argv form with `--body-file <tmp>` (NOT `--body <inline>`).
  * Avoids platform-specific shell-quoting bugs for bodies that contain quotes,
  * backticks, or `$(…)`. Temp files are cleaned up in `finally`.
+ *
+ * Per-slug PAT routing (P-bfa2a): each public function resolves a per-slug
+ * GitHub token via `engine/gh-token.js#resolveTokenForSlug(repo)` and threads
+ * it into the spawned `gh` process via `env.GH_TOKEN`. When the slug has no
+ * mapping in `engine.ghAccounts`, we fall back to the ambient `gh` identity
+ * (inherited via `process.env`) and emit a one-shot `console.warn` per slug
+ * so the dual-account audit can spot stragglers. The pinned "Never run
+ * `gh auth switch`" policy means we MUST resolve per-call rather than
+ * piggy-backing on the global active account.
  */
 const fs = require('fs');
 const path = require('path');
 const os = require('os');
 const { execFileSync: _execFileSync } = require('child_process');
+const { resolveTokenForSlug: _defaultResolveTokenForSlug } = require('./gh-token');
+// Module-level dedupe set so the "no token mapping for <slug>" warning fires
+// at most once per slug per process — keeps the audit signal visible without
+// flooding the log on every comment post. Exported via `_clearTokenWarnings`
+// for tests.
+const _warnedUnresolvedSlugs = new Set();
 // ── Validation ───────────────────────────────────────────────────────────────
@@ -134,12 +150,56 @@ function _writeTempBodyFile(content) {
   return file;
 }
-function _runGh(execFileSync, args, timeoutMs) {
-  return execFileSync('gh', args, {
+function _runGh(execFileSync, args, timeoutMs, env) {
+  const opts = {
     encoding: 'utf8',
     timeout: timeoutMs,
     windowsHide: true,
-  });
+  };
+  // Only set `env` when we have a token override to thread; leaving the option
+  // unset preserves the existing `process.env` inheritance for back-compat with
+  // unmapped slugs and tests that don't stub the resolver.
+  if (env) opts.env = env;
+  return execFileSync('gh', args, opts);
+}
+/**
+ * Resolve the per-slug PAT for `repo` and return an env override suitable for
+ * passing to `_runGh`. Returns `undefined` when no mapping exists (caller
+ * falls back to the ambient `gh` identity inherited from `process.env`).
+ *
+ * Emits a one-shot `console.warn` per (slug, reason) so the dual-account audit
+ * can spot stragglers without flooding the log on every comment post. Resolver
+ * exceptions are swallowed — comment posting must never fail because token
+ * resolution did. The pinned "Never run `gh auth switch`" policy survives even
+ * when the mapping is missing: we just inherit the ambient identity instead of
+ * forcibly mutating it.
+ */
+function _resolveTokenEnvForRepo(repo, resolveTokenForSlugFn) {
+  const resolve = resolveTokenForSlugFn || _defaultResolveTokenForSlug;
+  let token = null;
+  try {
+    token = resolve(repo);
+  } catch (e) {
+    const key = `error:${repo}`;
+    if (!_warnedUnresolvedSlugs.has(key)) {
+      _warnedUnresolvedSlugs.add(key);
+      console.warn(
+        `gh-comment: resolveTokenForSlug threw for "${repo}" (${e?.message || e}) — falling back to ambient gh identity`,
+      );
+    }
+    return undefined;
+  }
+  if (!token) {
+    if (!_warnedUnresolvedSlugs.has(repo)) {
+      _warnedUnresolvedSlugs.add(repo);
+      console.warn(
+        `gh-comment: no token mapping for repo "${repo}" — falling back to ambient gh identity (audit straggler)`,
+      );
+    }
+    return undefined;
+  }
+  return { ...process.env, GH_TOKEN: token };
 }
 function postPrComment({
@@ -151,16 +211,19 @@ function postPrComment({
   workItemId,
   timeoutMs = 30000,
   execFileSync = _execFileSync,
+  resolveTokenForSlug,
 } = {}) {
   _validateRepo(repo);
   _validatePrNumber(prNumber);
   const finalBody = buildMinionsCommentBody({ agentId, kind, workItemId, body });
   const file = _writeTempBodyFile(finalBody);
+  const env = _resolveTokenEnvForRepo(repo, resolveTokenForSlug);
   try {
     const output = _runGh(
       execFileSync,
       ['pr', 'comment', String(prNumber), '--repo', repo, '--body-file', file],
       timeoutMs,
+      env,
     );
     return { output: String(output || '').trim(), bodyFile: file };
   } finally {
@@ -177,16 +240,19 @@ function postPrReviewComment({
   workItemId,
   timeoutMs = 30000,
   execFileSync = _execFileSync,
+  resolveTokenForSlug,
 } = {}) {
   _validateRepo(repo);
   _validatePrNumber(prNumber);
   const finalBody = buildMinionsCommentBody({ agentId, kind, workItemId, body });
   const file = _writeTempBodyFile(finalBody);
+  const env = _resolveTokenEnvForRepo(repo, resolveTokenForSlug);
   try {
     const output = _runGh(
       execFileSync,
       ['pr', 'review', String(prNumber), '--comment', '--repo', repo, '--body-file', file],
       timeoutMs,
+      env,
     );
     return { output: String(output || '').trim(), bodyFile: file };
   } finally {
@@ -210,6 +276,7 @@ function postPrReview({
   workItemId,
   timeoutMs = 30000,
   execFileSync = _execFileSync,
+  resolveTokenForSlug,
 } = {}) {
   const flag = _REVIEW_EVENT_FLAGS[event];
   if (!flag) {
@@ -221,11 +288,13 @@ function postPrReview({
   _validatePrNumber(prNumber);
   const finalBody = buildMinionsCommentBody({ agentId, kind, workItemId, body });
   const file = _writeTempBodyFile(finalBody);
+  const env = _resolveTokenEnvForRepo(repo, resolveTokenForSlug);
   try {
     const output = _runGh(
       execFileSync,
       ['pr', 'review', String(prNumber), flag, '--repo', repo, '--body-file', file],
       timeoutMs,
+      env,
     );
     return { output: String(output || '').trim(), bodyFile: file };
   } finally {
@@ -250,4 +319,6 @@ module.exports = {
   // Internal helpers exported for tests / advanced callers
   _buildMarker,
   _writeTempBodyFile,
+  _resolveTokenEnvForRepo,
+  _clearTokenWarnings: () => _warnedUnresolvedSlugs.clear(),
 };

package/engine/gh-token.js CHANGED Viewed

@@ -18,7 +18,7 @@
  * via `_setTokenForTest(slug, token)` and clear it via `_clearTokenCache()`.
  */
-const { execSync } = require('child_process');
+const { execFileSync } = require('child_process');
 const path = require('path');
 const shared = require('./shared');
 const { safeJson, MINIONS_DIR, log } = shared;
@@ -70,14 +70,12 @@ function _fetchTokenForAccount(account, opts = {}) {
   const cached = _accountTokens.get(account);
   if (cached && cached.expiresAt > Date.now()) return cached.token;
-  const run = opts.execSync || execSync;
+  const run = opts.execFileSync || execFileSync;
   try {
-    // Argv form via `gh` is safer than constructing a shell string when account
-    // names ever include odd chars; using execSync's command form here for
-    // consistency with ado-token.js, but the account name flows from a config
-    // map under our control (validated at write time).
-    const cmd = `gh auth token --user ${account} --hostname github.com`;
-    const out = run(cmd, {
+    // Argv-array form: `account` is passed as a literal argument and never
+    // interpreted by a shell, so shell metacharacters in the configured
+    // account name (e.g. `;`, backticks, `$()`) cannot be executed.
+    const out = run('gh', ['auth', 'token', '--user', account, '--hostname', 'github.com'], {
       timeout: FETCH_TIMEOUT_MS,
       encoding: 'utf8',
       windowsHide: true,
@@ -102,7 +100,7 @@ function _fetchTokenForAccount(account, opts = {}) {
  * caller should fall back to the ambient `gh` identity.
  *
  * Test seam: `_setTokenForTest(slug, token)` short-circuits the entire chain
- * so unit tests do not have to mock execSync nor stand up a config file.
+ * so unit tests do not have to mock execFileSync nor stand up a config file.
  */
 function resolveTokenForSlug(slug, opts = {}) {
   if (slug && _slugTokenOverrides.has(slug)) return _slugTokenOverrides.get(slug);

package/engine/lifecycle.js CHANGED Viewed

@@ -4393,6 +4393,106 @@ async function runPostCompletionHooks(dispatchItem, agentId, code, stdout, confi
     }
   }
+  // P-a3f7c1b2 — QA Session lifecycle hook. The SETUP / DRAFT / EXECUTE work
+  // items carry `meta.sessionId` and `meta.sessionPhase` (set by the
+  // qa-sessions chain helpers — see engine/qa-sessions.js). On each agent
+  // exit we delegate to the matching handler, which validates the state
+  // transition and queues the next phase WI when applicable. The qa-validate
+  // qaRunId hook above (line 4340) already handles the qa-runs sidecar for
+  // the EXECUTE phase; we just need to map the qa-runs terminal status onto
+  // the session state.
+  //
+  // The same dispatchItem.meta vs dispatchItem.meta.item.meta dual-location
+  // problem the qaRunId block documents applies here — central-spawned
+  // sessions land their sidecar at the top level, project-routed ones nest
+  // it under `meta.item.meta`. Accept both.
+  const qaSessionId = meta?.sessionId || meta?.item?.meta?.sessionId;
+  const qaSessionPhase = meta?.sessionPhase || meta?.item?.meta?.sessionPhase;
+  if (qaSessionId && qaSessionPhase) {
+    try {
+      const qaSessions = require('./qa-sessions');
+      const wiPath = resolveWorkItemPath(meta);
+      const project = meta?.project?.name || meta?.item?.project || null;
+      const failureReason = typeof resultSummary === 'string' && resultSummary
+        ? resultSummary
+        : '';
+      if (qaSessionPhase === 'setup') {
+        qaSessions.handleSetupComplete(qaSessionId, {
+          success: !!effectiveSuccess,
+          wiPath,
+          project,
+          failureClass: (structuredCompletion && structuredCompletion.failure_class) || null,
+          reason: failureReason,
+        });
+      } else if (qaSessionPhase === 'draft') {
+        // The DRAFT agent reports the relative test file path in its
+        // structured completion as `testFile` (or the playbook can write a
+        // qa-session-draft-result.json sidecar in a future iteration). For
+        // now read it from structuredCompletion when present; the session
+        // record still works without it (EXECUTE prompt falls back to the
+        // generic test.<ext> hint).
+        const testFile = structuredCompletion && typeof structuredCompletion.testFile === 'string'
+          ? structuredCompletion.testFile
+          : null;
+        const session = qaSessions.getSession(qaSessionId);
+        if (session && session.spec && session.spec.mode === 'auto' && effectiveSuccess) {
+          // Auto-mode chains EXECUTE; we need a qa-runs record up-front. The
+          // dashboard endpoint that created the session can't know in
+          // advance whether EXECUTE will fire (mode could be flipped), so
+          // the qa-runs record is created here on the auto path.
+          let qaRunId = null;
+          try {
+            const qaRuns = require('./qa-runs');
+            const run = qaRuns.createRun({
+              runbookId: 'qa-session-' + qaSessionId,
+              targetName: session.managedSpawnName,
+              project: project || session.spec.project || null,
+            });
+            qaRunId = run.id;
+          } catch (createErr) {
+            log('warn', `qa-session auto-mode createRun failed for ${qaSessionId}: ${createErr.message}`);
+          }
+          qaSessions.handleDraftComplete(qaSessionId, {
+            success: true,
+            testFile,
+            wiPath,
+            project,
+            qaRunId,
+            reason: failureReason,
+          });
+        } else {
+          qaSessions.handleDraftComplete(qaSessionId, {
+            success: !!effectiveSuccess,
+            testFile,
+            reason: failureReason,
+          });
+        }
+      } else if (qaSessionPhase === 'execute') {
+        // The qaRunId block above (line 4340) already wrote the terminal
+        // qa-runs record. Re-read it to drive the session state.
+        let qaRunStatus = null;
+        const linkedQaRunId = meta?.qaRunId || meta?.item?.meta?.qaRunId;
+        if (linkedQaRunId) {
+          try {
+            const qaRuns = require('./qa-runs');
+            const run = qaRuns.getRun(linkedQaRunId);
+            if (run && run.status) qaRunStatus = run.status;
+          } catch (readErr) {
+            log('warn', `qa-session execute qa-run lookup failed for ${qaSessionId}: ${readErr.message}`);
+          }
+        }
+        qaSessions.handleExecuteComplete(qaSessionId, {
+          success: !!effectiveSuccess,
+          qaRunStatus,
+          summary: resultSummary || null,
+          reason: failureReason,
+        });
+      }
+    } catch (err) {
+      log('warn', `qa-session completion hook for ${qaSessionId} (${qaSessionPhase}): ${err.message}`);
+    }
+  }
   // Plan chaining removed — user must explicitly execute plan-to-prd after reviewing the plan
   if (effectiveSuccess && meta?.item?.sourcePlan) checkPlanCompletion(meta, config);

package/engine/pipeline.js CHANGED Viewed

@@ -76,7 +76,9 @@ function getPipelineRuns() {
 function getActiveRun(pipelineId) {
   const runs = getPipelineRuns();
   const pipelineRuns = runs[pipelineId] || [];
-  return pipelineRuns.find(r => r.status === PIPELINE_STATUS.RUNNING || r.status === PIPELINE_STATUS.PAUSED);
+  return pipelineRuns.find(r => r.status === PIPELINE_STATUS.RUNNING
+    || r.status === PIPELINE_STATUS.PAUSED
+    || r.status === PIPELINE_STATUS.WAITING_HUMAN);
 }
 function startRun(pipelineId, pipeline) {
@@ -110,6 +112,12 @@ function startRun(pipelineId, pipeline) {
 }
 function updateRunStage(pipelineId, runId, stageId, updates) {
+  if (updates && Object.prototype.hasOwnProperty.call(updates, 'status')) {
+    const validStatuses = Object.values(PIPELINE_STATUS);
+    if (!validStatuses.includes(updates.status)) {
+      throw new Error(`updateRunStage: invalid status '${updates.status}' (expected one of: ${validStatuses.join('|')})`);
+    }
+  }
   mutateJsonFileLocked(PIPELINE_RUNS_PATH, (data) => {
     const runs = data[pipelineId] || [];
     const run = runs.find(r => r.runId === runId);