npm - @yemi33/minions - Versions diffs - 0.1.2070 → 0.1.2072 - Mend

@yemi33/minions 0.1.2070 → 0.1.2072

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dashboard/js/qa.js +358 -0
package/dashboard/js/state.js +2 -1
package/dashboard/pages/qa.html +72 -0
package/dashboard/styles.css +102 -0
package/dashboard.js +410 -6
package/docs/qa-runbook-lifecycle.md +232 -0
package/engine/cleanup.js +4 -1
package/engine/comment-classifier.js +8 -1
package/engine/cooldown.js +6 -2
package/engine/gh-comment.js +74 -3
package/engine/gh-token.js +7 -9
package/engine/lifecycle.js +100 -0
package/engine/pipeline.js +9 -1
package/engine/playbook.js +39 -0
package/engine/qa-runners/maestro.js +152 -0
package/engine/qa-runners/playwright.js +149 -0
package/engine/qa-runners.js +323 -0
package/engine/qa-sessions.js +1008 -0
package/engine/shared.js +71 -12
package/engine.js +140 -0
package/package.json +1 -1
package/playbooks/qa-session-draft.md +158 -0
package/playbooks/qa-session-execute.md +165 -0
package/playbooks/qa-session-setup.md +154 -0
package/prompts/cc-system.md +43 -0
package/routing.md +3 -0

package/engine/qa-sessions.js ADDED Viewed

@@ -0,0 +1,1008 @@
+/**
+ * engine/qa-sessions.js — Lifecycle + persistence for QA Sessions.
+ *
+ * QA Session = a single end-to-end natural-language QA flow that the engine
+ * orchestrates across THREE chained work items:
+ *
+ *   SETUP   → resolves target (PR / branch / current / commit), checks out a
+ *             worktree, decides the dev-up command, writes a
+ *             managed-spawn.json sidecar. Engine spawns the spec; healthcheck
+ *             gates the transition to DRAFT.
+ *   DRAFT   → uses the active runner adapter to translate the user's
+ *             natural-language flows into a runner-native test file at
+ *             engine/qa-tests/<sessionId>/test.<ext>. In `confirm` mode the
+ *             session parks at `awaiting-approval` for human review; in
+ *             `auto` mode it auto-chains EXECUTE.
+ *   EXECUTE → runs the drafted test against the managed-spawn target, writes
+ *             agents/<id>/qa-run-result.json. The existing qa-runs lifecycle
+ *             hook (engine/lifecycle.js:4340) ingests the sidecar; this
+ *             module then transitions the session done/failed based on the
+ *             resulting qa-run terminal status.
+ *
+ * State machine (8 values):
+ *
+ *   pending ──▶ spawning ──▶ drafting ──▶ awaiting-approval ──▶ executing ──▶ done
+ *      │            │             │                │                │           ╲
+ *      ╰────────────┴─────────────┴────────────────┴────────────────┴───────────▶ failed
+ *      ╰────────────┴─────────────┴────────────────┴────────────────┴───────────▶ killed
+ *
+ *   (awaiting-approval ──▶ drafting on /edit; drafting ──▶ executing on auto mode.)
+ *
+ * Concurrency: every mutation goes through mutateJsonFileLocked per the repo
+ * convention. Callbacks are synchronous and never await. Slow filesystem work
+ * (qa-tests/<id>/ scaffolding, dispatch enqueueing) runs OUTSIDE the lock.
+ *
+ * Path-traversal hardening: sessionId is generated by createSession() with a
+ * uid suffix, but the module still treats every callsite as untrusted —
+ * _isSafeSessionId() is invoked on every public read/write that maps an id to
+ * a filesystem path or a session lookup. Mirrors engine/qa-runbooks.js
+ * _isSafeId (PR #2694 review feedback).
+ *
+ * State file: engine/qa-sessions.json (single file, all sessions across all
+ * projects), capped at QA_SESSIONS_MAX_RECORDS via createSession-time rotation.
+ */
+const fs = require('fs');
+const path = require('path');
+const shared = require('./shared');
+const { mutateJsonFileLocked, uid, ts, log } = shared;
+// Cap engine/qa-sessions.json. Sessions cost more than runs (3 WIs, a
+// managed-spawn, artifacts) so the operational steady state is meaningfully
+// smaller than QA_RUNS_MAX_RECORDS (2000). 500 covers ~2 months of nightly +
+// ad-hoc sessions without ballooning the JSON parse cost on /api/status's
+// fast-state slice (W-mpehsyhv event-loop budget — see qa-runs.js cap notes).
+const QA_SESSIONS_MAX_RECORDS = 500;
+const QA_SESSION_STATE = Object.freeze({
+  PENDING: 'pending',
+  SPAWNING: 'spawning',
+  DRAFTING: 'drafting',
+  AWAITING_APPROVAL: 'awaiting-approval',
+  EXECUTING: 'executing',
+  DONE: 'done',
+  FAILED: 'failed',
+  KILLED: 'killed',
+});
+const TERMINAL_STATES = Object.freeze(new Set([
+  QA_SESSION_STATE.DONE,
+  QA_SESSION_STATE.FAILED,
+  QA_SESSION_STATE.KILLED,
+]));
+const SESSION_PHASE = Object.freeze({
+  SETUP: 'setup',
+  DRAFT: 'draft',
+  EXECUTE: 'execute',
+});
+// Allowed forward transitions. Anything not enumerated here is rejected.
+//
+// Notes:
+//  - pending → killed/failed: a session can be cancelled or fail before the
+//    SETUP WI even starts (e.g., POST /sessions/<id>/cancel right after
+//    POST /api/qa/session, or createSession failed to queue SETUP).
+//  - drafting → executing: auto-mode skips awaiting-approval and goes
+//    straight from DRAFT-done to EXECUTE.
+//  - awaiting-approval → drafting: POST /api/qa/sessions/<id>/edit re-fires
+//    DRAFT with the user's natural-language feedback as steering.
+//  - awaiting-approval → done: POST /api/qa/sessions/<id>/dismiss accepts
+//    the draft as final but doesn't run it (user decided to ship the test
+//    file as-is and stop the session).
+//  - executing → killed: POST /api/qa/sessions/<id>/kill while the EXECUTE
+//    WI is mid-flight terminates the spawn and short-circuits the session.
+//  - Terminal states (done/failed/killed) have NO outgoing transitions.
+const ALLOWED_TRANSITIONS = {
+  [QA_SESSION_STATE.PENDING]: new Set([
+    QA_SESSION_STATE.SPAWNING,
+    QA_SESSION_STATE.FAILED,
+    QA_SESSION_STATE.KILLED,
+  ]),
+  [QA_SESSION_STATE.SPAWNING]: new Set([
+    QA_SESSION_STATE.DRAFTING,
+    QA_SESSION_STATE.FAILED,
+    QA_SESSION_STATE.KILLED,
+  ]),
+  [QA_SESSION_STATE.DRAFTING]: new Set([
+    QA_SESSION_STATE.AWAITING_APPROVAL,
+    QA_SESSION_STATE.EXECUTING,
+    QA_SESSION_STATE.FAILED,
+    QA_SESSION_STATE.KILLED,
+  ]),
+  [QA_SESSION_STATE.AWAITING_APPROVAL]: new Set([
+    QA_SESSION_STATE.DRAFTING,
+    QA_SESSION_STATE.EXECUTING,
+    QA_SESSION_STATE.DONE,
+    QA_SESSION_STATE.FAILED,
+    QA_SESSION_STATE.KILLED,
+  ]),
+  [QA_SESSION_STATE.EXECUTING]: new Set([
+    QA_SESSION_STATE.DONE,
+    QA_SESSION_STATE.FAILED,
+    QA_SESSION_STATE.KILLED,
+  ]),
+  [QA_SESSION_STATE.DONE]: new Set(),
+  [QA_SESSION_STATE.FAILED]: new Set(),
+  [QA_SESSION_STATE.KILLED]: new Set(),
+};
+const VALID_TARGET_KINDS = new Set(['pr', 'branch', 'current', 'commit']);
+const VALID_MODES = new Set(['confirm', 'auto']);
+const LIMITS = {
+  idMax: 64,
+  flowsMax: 4000,
+  feedbackMax: 4000,
+  runnerNameMax: 64,
+  targetFieldMax: 500,
+  projectMax: 64,
+  summaryMax: 2000,
+};
+// Mirrors engine/qa-runbooks.js _isSafeId — kebab-case ≤64 chars, no leading/
+// trailing hyphen, no double hyphen, no path separators / null bytes / `..`.
+// Reject anything that isn't safe BEFORE it can reach a path.join or a session
+// lookup so a hostile sessionId from the dashboard can't read or overwrite an
+// arbitrary file under MINIONS_DIR/engine.
+const _KEBAB_RE = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
+function _isNonEmptyString(v) {
+  return typeof v === 'string' && v.length > 0;
+}
+function _isSafeSessionId(id) {
+  return _isNonEmptyString(id) && id.length <= LIMITS.idMax && _KEBAB_RE.test(id);
+}
+// Dynamic paths — respect MINIONS_TEST_DIR for test isolation. shared.MINIONS_DIR
+// resolves at every call so MINIONS_TEST_DIR=foo flips the resolution
+// without re-requiring this module (mirrors qa-runs.js pattern).
+function qaSessionsPath() {
+  return path.join(shared.MINIONS_DIR, 'engine', 'qa-sessions.json');
+}
+function qaTestsDir() {
+  return path.join(shared.MINIONS_DIR, 'engine', 'qa-tests');
+}
+function qaTestsDirForSession(sessionId) {
+  if (!_isSafeSessionId(sessionId)) {
+    throw new Error('qa-sessions: unsafe sessionId for path: ' + sessionId);
+  }
+  return path.join(qaTestsDir(), sessionId);
+}
+function isValidState(s) {
+  return Object.values(QA_SESSION_STATE).includes(s);
+}
+function validateTransition(from, to) {
+  if (!isValidState(from)) throw new Error(`qa-sessions: invalid source state "${from}"`);
+  if (!isValidState(to)) throw new Error(`qa-sessions: invalid target state "${to}"`);
+  const allowed = ALLOWED_TRANSITIONS[from];
+  if (!allowed.has(to)) {
+    throw new Error(`qa-sessions: illegal state transition ${from} -> ${to}`);
+  }
+}
+// ── Validation helpers ──────────────────────────────────────────────────────
+function _validateTarget(target) {
+  const errors = [];
+  if (!target || typeof target !== 'object' || Array.isArray(target)) {
+    return ['target must be a plain object'];
+  }
+  if (!_isNonEmptyString(target.kind) || !VALID_TARGET_KINDS.has(target.kind)) {
+    errors.push('target.kind must be one of: ' + [...VALID_TARGET_KINDS].join(', '));
+    return errors;
+  }
+  // Per-kind sub-field requirements. Validate each as a length-capped string;
+  // the SETUP playbook is responsible for the semantic checks (PR exists,
+  // branch fetches, etc.) — this layer just guards path/length safety.
+  const requireField = (field) => {
+    const v = target[field];
+    if (!_isNonEmptyString(v)) errors.push(`target.${field} is required when kind=${target.kind}`);
+    else if (v.length > LIMITS.targetFieldMax) {
+      errors.push(`target.${field} exceeds ${LIMITS.targetFieldMax} chars`);
+    }
+  };
+  switch (target.kind) {
+    case 'pr':
+      requireField('prId');
+      break;
+    case 'branch':
+      requireField('branch');
+      break;
+    case 'commit':
+      requireField('sha');
+      break;
+    case 'current':
+      // No required sub-field. `worktree` is optional and validated as string.
+      if (target.worktree !== undefined && target.worktree !== null) {
+        if (typeof target.worktree !== 'string') {
+          errors.push('target.worktree must be a string when present');
+        } else if (target.worktree.length > LIMITS.targetFieldMax) {
+          errors.push(`target.worktree exceeds ${LIMITS.targetFieldMax} chars`);
+        }
+      }
+      break;
+  }
+  return errors;
+}
+function _validateCapture(capture) {
+  if (capture === undefined || capture === null) return [];
+  if (typeof capture !== 'object' || Array.isArray(capture)) {
+    return ['capture must be a plain object'];
+  }
+  const errors = [];
+  for (const field of ['video', 'screenshots', 'logs']) {
+    if (capture[field] !== undefined && typeof capture[field] !== 'boolean') {
+      errors.push(`capture.${field} must be boolean when present`);
+    }
+  }
+  return errors;
+}
+/**
+ * Validate a createSession spec. Returns { ok, errors }. Never throws.
+ */
+function validateSpec(spec) {
+  const errors = [];
+  if (!spec || typeof spec !== 'object' || Array.isArray(spec)) {
+    return { ok: false, errors: ['spec must be a plain object'] };
+  }
+  errors.push(..._validateTarget(spec.target));
+  if (!_isNonEmptyString(spec.flowsRaw)) {
+    errors.push('flowsRaw is required (non-empty string)');
+  } else if (spec.flowsRaw.length > LIMITS.flowsMax) {
+    errors.push(`flowsRaw exceeds ${LIMITS.flowsMax} chars`);
+  }
+  const mode = spec.mode || 'confirm';
+  if (!VALID_MODES.has(mode)) {
+    errors.push('mode must be one of: ' + [...VALID_MODES].join(', '));
+  }
+  errors.push(..._validateCapture(spec.capture));
+  if (spec.runner !== undefined && spec.runner !== null) {
+    if (typeof spec.runner !== 'string') {
+      errors.push('runner must be a string or null when present');
+    } else if (spec.runner.length > LIMITS.runnerNameMax) {
+      errors.push(`runner exceeds ${LIMITS.runnerNameMax} chars`);
+    } else if (spec.runner && !_KEBAB_RE.test(spec.runner)) {
+      errors.push('runner must be kebab-case (a-z, 0-9, hyphens)');
+    }
+  }
+  if (spec.project !== undefined && spec.project !== null) {
+    if (typeof spec.project !== 'string') {
+      errors.push('project must be a string when present');
+    } else if (spec.project.length > LIMITS.projectMax) {
+      errors.push(`project exceeds ${LIMITS.projectMax} chars`);
+    }
+  }
+  return { ok: errors.length === 0, errors };
+}
+// ── CRUD ────────────────────────────────────────────────────────────────────
+/**
+ * Create a session in `pending` state and persist it to qa-sessions.json.
+ *
+ * The caller (POST /api/qa/session handler) is responsible for queuing the
+ * SETUP work item via buildSetupWorkItem() + the standard work-items/dispatch
+ * flow. createSession() intentionally does NOT touch dispatch.json so the
+ * pure persistence layer stays unit-testable without standing up the whole
+ * engine.
+ *
+ * @param {object} spec
+ * @param {object} spec.target   - { kind: 'pr'|'branch'|'current'|'commit', ...sub-fields }
+ * @param {string} spec.flowsRaw - natural-language description of what to test
+ * @param {string} [spec.mode]   - 'confirm' (default) | 'auto'
+ * @param {object} [spec.capture] - { video?, screenshots?, logs? }
+ * @param {string} [spec.runner] - explicit runner name, or null to auto-detect
+ * @param {string} [spec.project] - project name (used to scope artifacts)
+ * @param {string} [spec.createdBy] - operator identity for audit
+ * @returns {object} the created session record
+ */
+function createSession(spec) {
+  const v = validateSpec(spec);
+  if (!v.ok) {
+    const err = new Error('qa-sessions: invalid spec: ' + v.errors.join('; '));
+    err.validationErrors = v.errors;
+    throw err;
+  }
+  const id = 'qas-' + uid();
+  const now = ts();
+  const session = {
+    id,
+    state: QA_SESSION_STATE.PENDING,
+    spec: {
+      target: { ...spec.target },
+      flowsRaw: spec.flowsRaw,
+      mode: spec.mode || 'confirm',
+      capture: {
+        video: !!(spec.capture && spec.capture.video),
+        screenshots: !!(spec.capture && spec.capture.screenshots),
+        logs: !!(spec.capture && spec.capture.logs),
+      },
+      runner: spec.runner || null,
+      project: spec.project || null,
+    },
+    // Per-phase WI links — back-filled by setSessionWorkItem when the
+    // dashboard endpoint or lifecycle hook queues the next phase.
+    workItems: { setup: null, draft: null, execute: null },
+    // The managed-spawn name follows a deterministic convention
+    // (`qa-session-<id>`) so /engine and listManagedSpecs() can join the
+    // spawn back to its owning session.
+    managedSpawnName: 'qa-session-' + id,
+    // qaRunId is the linked qa-runs record id — populated when EXECUTE
+    // queues its WI (the dashboard endpoint creates the qa-runs record and
+    // stamps it onto session.qaRunId). Default null until then.
+    qaRunId: null,
+    testFile: null,    // relative path under engine/qa-tests/<id>/ filled in by DRAFT
+    summary: null,
+    failureClass: null,
+    error: null,
+    createdAt: now,
+    createdBy: typeof spec.createdBy === 'string' ? spec.createdBy : null,
+    updatedAt: now,
+    completedAt: null,
+  };
+  mutateJsonFileLocked(qaSessionsPath(), (sessions) => {
+    if (!Array.isArray(sessions)) sessions = [];
+    sessions.push(session);
+    // Rotation: drop oldest-by-createdAt when over cap. Cheap because it runs
+    // only at createSession — the steady-state read paths skip the sort.
+    if (sessions.length > QA_SESSIONS_MAX_RECORDS) {
+      sessions.sort((a, b) => ((a && a.createdAt) || '').localeCompare((b && b.createdAt) || ''));
+      sessions = sessions.slice(sessions.length - QA_SESSIONS_MAX_RECORDS);
+    }
+    return sessions;
+  }, { defaultValue: [] });
+  // Pre-create the per-session test directory OUTSIDE the lock — directory
+  // creation is idempotent and the slow fs call must not run while holding
+  // the JSON lock (CLAUDE.md convention). DRAFT writes test.<ext> into here.
+  try { fs.mkdirSync(qaTestsDirForSession(id), { recursive: true }); }
+  catch (e) { log('warn', `qa-sessions: mkdir tests dir failed for ${id}: ${e.message}`); }
+  return session;
+}
+/**
+ * Lookup a single session by id, or null if missing / id unsafe.
+ */
+function getSession(id) {
+  if (!_isSafeSessionId(id)) return null;
+  const sessions = shared.safeJsonArr(qaSessionsPath());
+  return sessions.find(s => s && s.id === id) || null;
+}
+/**
+ * List sessions, newest first, optionally filtered by state, capped by limit.
+ */
+function listSessions({ limit, state } = {}) {
+  let sessions = shared.safeJsonArr(qaSessionsPath());
+  if (!Array.isArray(sessions)) return [];
+  if (state) {
+    if (!isValidState(state)) return [];
+    sessions = sessions.filter(s => s && s.state === state);
+  }
+  sessions = sessions.slice().sort((a, b) => {
+    const ac = (a && a.createdAt) || '';
+    const bc = (b && b.createdAt) || '';
+    if (ac === bc) return ((b && b.id) || '').localeCompare((a && a.id) || '');
+    return ac < bc ? 1 : -1;
+  });
+  const n = Number(limit);
+  if (Number.isFinite(n) && n > 0) sessions = sessions.slice(0, Math.floor(n));
+  return sessions;
+}
+/**
+ * Back-fill session.workItems[phase] with the queued WI id. Idempotent —
+ * overwrites any prior value (a re-queued DRAFT after /edit replaces the
+ * stale id). Returns the updated session or null on unknown id / unsafe id /
+ * invalid phase.
+ */
+function setSessionWorkItem(id, phase, workItemId) {
+  if (!_isSafeSessionId(id)) return null;
+  if (!Object.values(SESSION_PHASE).includes(phase)) return null;
+  let captured = null;
+  mutateJsonFileLocked(qaSessionsPath(), (sessions) => {
+    if (!Array.isArray(sessions)) sessions = [];
+    const session = sessions.find(s => s && s.id === id);
+    if (session) {
+      if (!session.workItems || typeof session.workItems !== 'object') {
+        session.workItems = { setup: null, draft: null, execute: null };
+      }
+      session.workItems[phase] = workItemId || null;
+      session.updatedAt = ts();
+      captured = session;
+    }
+    return sessions;
+  }, { defaultValue: [] });
+  return captured;
+}
+/**
+ * Back-fill session.qaRunId with the linked qa-runs record id. Called by the
+ * EXECUTE dispatch endpoint (PR4) after qaRuns.createRun(). Returns the
+ * updated session or null on unknown / unsafe id.
+ */
+function setSessionQaRunId(id, qaRunId) {
+  if (!_isSafeSessionId(id)) return null;
+  let captured = null;
+  mutateJsonFileLocked(qaSessionsPath(), (sessions) => {
+    if (!Array.isArray(sessions)) sessions = [];
+    const session = sessions.find(s => s && s.id === id);
+    if (session) {
+      session.qaRunId = qaRunId || null;
+      session.updatedAt = ts();
+      captured = session;
+    }
+    return sessions;
+  }, { defaultValue: [] });
+  return captured;
+}
+// ── State transitions ──────────────────────────────────────────────────────
+/**
+ * Generic state transition with optional patch. Validates the transition
+ * (throws on illegal), applies the patch, stamps updatedAt + completedAt (on
+ * terminal). Returns the updated session.
+ *
+ * Patch fields are applied directly to the session record — callers pass
+ * { summary, error, failureClass, testFile, qaRunId, ... } as needed. The
+ * `state` field on the patch is IGNORED; use the toState parameter.
+ *
+ * @param {string} id
+ * @param {string} toState
+ * @param {object} [patch]
+ * @returns {object} updated session
+ * @throws Error on unknown id, unsafe id, or illegal transition
+ */
+function transitionSession(id, toState, patch = {}) {
+  if (!_isSafeSessionId(id)) throw new Error('qa-sessions: unsafe sessionId: ' + id);
+  if (!isValidState(toState)) throw new Error('qa-sessions: invalid target state: ' + toState);
+  let captured = null;
+  let transitionError = null;
+  mutateJsonFileLocked(qaSessionsPath(), (sessions) => {
+    if (!Array.isArray(sessions)) sessions = [];
+    const session = sessions.find(s => s && s.id === id);
+    if (!session) { transitionError = new Error(`qa-sessions: session not found: ${id}`); return sessions; }
+    try { validateTransition(session.state, toState); }
+    catch (e) { transitionError = e; return sessions; }
+    session.state = toState;
+    session.updatedAt = ts();
+    if (TERMINAL_STATES.has(toState)) {
+      session.completedAt = ts();
+    }
+    if (patch && typeof patch === 'object' && !Array.isArray(patch)) {
+      // Whitelist mutable fields to keep transitionSession from rewriting
+      // immutable spec/createdAt fields by mistake.
+      for (const field of ['summary', 'error', 'failureClass', 'testFile', 'qaRunId', 'managedSpawnHealth']) {
+        if (Object.prototype.hasOwnProperty.call(patch, field)) {
+          session[field] = patch[field];
+        }
+      }
+    }
+    captured = session;
+    return sessions;
+  }, { defaultValue: [] });
+  if (transitionError) throw transitionError;
+  return captured;
+}
+// Named convenience transitions — thin wrappers over transitionSession for
+// readability at the call sites in lifecycle.js + dashboard.js. Each preserves
+// the throw-on-illegal contract.
+function markSpawning(id, patch) { return transitionSession(id, QA_SESSION_STATE.SPAWNING, patch); }
+function markDrafting(id, patch) { return transitionSession(id, QA_SESSION_STATE.DRAFTING, patch); }
+function markAwaitingApproval(id, patch) { return transitionSession(id, QA_SESSION_STATE.AWAITING_APPROVAL, patch); }
+function markExecuting(id, patch) { return transitionSession(id, QA_SESSION_STATE.EXECUTING, patch); }
+function markDone(id, patch) { return transitionSession(id, QA_SESSION_STATE.DONE, patch); }
+function markFailed(id, patch) { return transitionSession(id, QA_SESSION_STATE.FAILED, patch); }
+function markKilled(id, patch) { return transitionSession(id, QA_SESSION_STATE.KILLED, patch); }
+// ── Work-item builders (pure) ──────────────────────────────────────────────
+//
+// Each builder returns a WI spec ready for mutateWorkItems().push + addToDispatch.
+// Keeping these pure lets the dashboard endpoints (PR4) reuse them without
+// pulling dispatch into the unit test path. They're also called by the
+// lifecycle chain helpers below to queue the next phase.
+function _baseWorkItem(session, phase, { title, description, project }) {
+  const wiId = 'W-' + uid();
+  const wi = {
+    id: wiId,
+    title,
+    // Use TEST as the underlying type — qa-validate's existing dispatch uses
+    // the same pattern (dashboard.js:9962). meta.playbook overrides routing
+    // so the engine renders the qa-session-* playbook bodies (PR5+6).
+    type: shared.WORK_TYPE.TEST,
+    priority: 'medium',
+    description,
+    status: shared.WI_STATUS.PENDING,
+    created: new Date().toISOString(),
+    createdBy: 'qa-session-' + phase,
+    oneShot: true,
+    skipPr: true,
+    meta: {
+      sessionId: session.id,
+      sessionPhase: phase,
+      qaSession: {
+        target: session.spec.target,
+        flowsRaw: session.spec.flowsRaw,
+        mode: session.spec.mode,
+        capture: session.spec.capture,
+        runner: session.spec.runner,
+      },
+      playbook: 'qa-session-' + phase,
+    },
+  };
+  if (project) wi.project = project;
+  if (phase === 'setup') wi.meta.managed_spawn = true;
+  return wi;
+}
+/**
+ * Build the SETUP work item. The agent resolves the target, sets up a
+ * worktree, and writes a managed-spawn.json sidecar. Engine then spawns the
+ * service and the healthcheck gate drives the next transition.
+ */
+function buildSetupWorkItem(session, { project } = {}) {
+  return _baseWorkItem(session, SESSION_PHASE.SETUP, {
+    title: `QA Session SETUP: ${_summarizeTarget(session.spec.target)}`,
+    description: [
+      `QA Session ${session.id} — SETUP phase.`,
+      '',
+      `Target: ${JSON.stringify(session.spec.target)}`,
+      `Flows: ${session.spec.flowsRaw}`,
+      '',
+      'Resolve the target to a worktree, inspect the codebase for the dev-up command,',
+      `and write \`agents/<your-id>/managed-spawn.json\` with name=\`${session.managedSpawnName}\`.`,
+      'See `playbooks/qa-session-setup.md` for the full contract.',
+    ].join('\n'),
+    project,
+  });
+}
+/**
+ * Build the DRAFT work item. The agent reads the live spawn metadata, calls
+ * runner.generateBrief(), and writes the runner-native test file under
+ * engine/qa-tests/<sessionId>/.
+ *
+ * @param {object} session
+ * @param {object} [opts]
+ * @param {string} [opts.project]
+ * @param {string} [opts.feedback] - natural-language feedback from /edit, threaded into the prompt as steering
+ */
+function buildDraftWorkItem(session, { project, feedback } = {}) {
+  const lines = [
+    `QA Session ${session.id} — DRAFT phase.`,
+    '',
+    `Flows: ${session.spec.flowsRaw}`,
+    `Runner: ${session.spec.runner || '(auto-detected)'}`,
+    `Mode: ${session.spec.mode}`,
+    '',
+    `Managed-spawn target: \`${session.managedSpawnName}\` (live; query /api/managed-processes/by-name).`,
+    `Write the test file to \`engine/qa-tests/${session.id}/test.<ext>\` using the runner's native format.`,
+    'See `playbooks/qa-session-draft.md` for the full contract.',
+  ];
+  if (feedback) {
+    lines.push('', '## Reviewer feedback on previous draft', '', String(feedback));
+  }
+  return _baseWorkItem(session, SESSION_PHASE.DRAFT, {
+    title: `QA Session DRAFT: ${_summarizeTarget(session.spec.target)}`,
+    description: lines.join('\n'),
+    project,
+  });
+}
+/**
+ * Build the EXECUTE work item. The agent invokes the runner against the live
+ * spawn, captures artifacts per `capture`, and writes
+ * agents/<id>/qa-run-result.json. The existing qaRunId hook at
+ * engine/lifecycle.js:4340 ingests the sidecar; our own session hook below
+ * transitions done/failed based on the resulting qa-run terminal status.
+ *
+ * @param {object} session
+ * @param {object} opts
+ * @param {string} opts.qaRunId - id from qaRuns.createRun()
+ * @param {string} [opts.project]
+ */
+function buildExecuteWorkItem(session, { qaRunId, project } = {}) {
+  if (!_isNonEmptyString(qaRunId)) {
+    throw new Error('qa-sessions: buildExecuteWorkItem requires qaRunId');
+  }
+  const wi = _baseWorkItem(session, SESSION_PHASE.EXECUTE, {
+    title: `QA Session EXECUTE: ${_summarizeTarget(session.spec.target)}`,
+    description: [
+      `QA Session ${session.id} — EXECUTE phase.`,
+      '',
+      `Run \`engine/qa-tests/${session.id}/${session.testFile || 'test.<ext>'}\` against \`${session.managedSpawnName}\`.`,
+      `qaRunId: ${qaRunId}`,
+      '',
+      'Capture artifacts per session.spec.capture. Write a qa-run-result.json',
+      'sidecar to your agent dir (the engine ingests it and marks the linked',
+      'qa-runs record terminal). See `playbooks/qa-session-execute.md`.',
+    ].join('\n'),
+    project,
+  });
+  // qaRunId on the WI meta routes the existing lifecycle hook (line 4340) so
+  // the qa-runs record gets the same completion semantics as the standalone
+  // qa-validate dispatch path. Keeping it at top level (not nested under
+  // qaSession) matches the dispatchItem.meta.qaRunId convention.
+  wi.meta.qaRunId = qaRunId;
+  return wi;
+}
+function _summarizeTarget(target) {
+  if (!target || typeof target !== 'object') return '(unknown)';
+  switch (target.kind) {
+    case 'pr':      return `PR#${target.prId}`;
+    case 'branch':  return `branch:${target.branch}`;
+    case 'commit':  return `commit:${String(target.sha || '').slice(0, 8)}`;
+    case 'current': return `current:${target.worktree || 'cwd'}`;
+    default:        return target.kind || '(unknown)';
+  }
+}
+// ── Cross-WI dispatch chain helpers ─────────────────────────────────────────
+//
+// These are the integration entry points the lifecycle hook + dashboard
+// endpoints call when an agent finishes or a user takes an action. Each one
+// validates the transition first (so an illegal call throws BEFORE side
+// effects like queueing the next WI), then applies the state change, then
+// queues the next phase via _queueWorkItem when appropriate.
+//
+// dispatch + work-items are lazy-required inside _queueWorkItem to keep
+// `require('./qa-sessions')` cycle-safe at the top of lifecycle.js.
+function _queueWorkItem(wi, wiPath) {
+  // Append the WI to the project (or central) work-items file, then queue a
+  // dispatch entry that wraps it. Mirrors the qa-validate flow at
+  // dashboard.js handleQaRunbookRun (line 9985+). Both writes go through their
+  // module-internal locks so concurrent dashboard calls don't lose entries.
+  shared.mutateWorkItems(wiPath, (items) => {
+    if (!Array.isArray(items)) items = [];
+    if (!items.some(i => i && i.id === wi.id)) items.push(wi);
+    return items;
+  });
+  const dispatch = require('./dispatch');
+  dispatch.addToDispatch({
+    type: wi.type,
+    agent: wi.agent || null,
+    meta: { item: wi, playbook: wi.meta.playbook },
+  });
+  return wi.id;
+}
+/**
+ * Called by the POST /api/qa/session handler immediately after createSession.
+ * Validates pending → spawning, queues the SETUP WI, returns the queued WI id.
+ *
+ * @param {string} sessionId
+ * @param {object} opts
+ * @param {string} opts.wiPath - resolved work-items path (central or per-project)
+ * @param {string} [opts.project] - project name (set on the WI)
+ */
+function queueSetup(sessionId, { wiPath, project } = {}) {
+  if (!_isNonEmptyString(wiPath)) throw new Error('qa-sessions: queueSetup requires wiPath');
+  const session = getSession(sessionId);
+  if (!session) throw new Error('qa-sessions: session not found: ' + sessionId);
+  // transitionSession enforces pending → spawning. If the session is already
+  // past pending (createSession + queueSetup called twice), the throw bubbles
+  // up to the dashboard handler and surfaces as a 409 — better than silently
+  // double-queueing.
+  markSpawning(sessionId);
+  const wi = buildSetupWorkItem(session, { project: project || session.spec.project || null });
+  _queueWorkItem(wi, wiPath);
+  setSessionWorkItem(sessionId, SESSION_PHASE.SETUP, wi.id);
+  return wi.id;
+}
+/**
+ * Lifecycle hook: SETUP WI completed. On success the managed-spawn was
+ * accepted AND its healthcheck passed (engine.js drives this gating before
+ * marking the dispatch successful), so we advance to drafting and queue the
+ * DRAFT WI. On failure we record the failureClass and mark the session failed.
+ *
+ * @param {string} sessionId
+ * @param {object} opts
+ * @param {boolean} opts.success
+ * @param {string}  [opts.wiPath] - required when success=true
+ * @param {string}  [opts.project]
+ * @param {string}  [opts.failureClass]
+ * @param {string}  [opts.reason]
+ * @returns {string|null} the queued DRAFT WI id on success, null on failure
+ */
+function handleSetupComplete(sessionId, opts = {}) {
+  const session = getSession(sessionId);
+  if (!session) throw new Error('qa-sessions: session not found: ' + sessionId);
+  if (opts.success) {
+    if (!_isNonEmptyString(opts.wiPath)) {
+      throw new Error('qa-sessions: handleSetupComplete success requires wiPath');
+    }
+    markDrafting(sessionId, { managedSpawnHealth: 'healthy' });
+    // Re-read to pick up the state change for the DRAFT WI builder.
+    const updated = getSession(sessionId);
+    const wi = buildDraftWorkItem(updated, { project: opts.project || updated.spec.project || null });
+    _queueWorkItem(wi, opts.wiPath);
+    setSessionWorkItem(sessionId, SESSION_PHASE.DRAFT, wi.id);
+    return wi.id;
+  }
+  markFailed(sessionId, {
+    failureClass: opts.failureClass || 'qa-session-setup-failed',
+    error: opts.reason || null,
+    summary: opts.reason || 'SETUP phase failed',
+  });
+  return null;
+}
+/**
+ * Lifecycle hook: DRAFT WI completed. On success we either park at
+ * awaiting-approval (confirm mode — user must call /approve) or auto-chain
+ * EXECUTE (auto mode). The testFile path is captured for the EXECUTE prompt.
+ *
+ * @param {string} sessionId
+ * @param {object} opts
+ * @param {boolean} opts.success
+ * @param {string}  [opts.testFile] - relative path under qa-tests/<id>/, captured for EXECUTE
+ * @param {string}  [opts.wiPath]   - required when success=true and mode=auto
+ * @param {string}  [opts.project]
+ * @param {string}  [opts.qaRunId]  - required when success=true and mode=auto (caller creates the qa-runs record)
+ * @param {string}  [opts.reason]
+ * @returns {object} { nextState, queuedExecuteWi: string|null }
+ */
+function handleDraftComplete(sessionId, opts = {}) {
+  const session = getSession(sessionId);
+  if (!session) throw new Error('qa-sessions: session not found: ' + sessionId);
+  if (!opts.success) {
+    markFailed(sessionId, {
+      failureClass: 'qa-session-draft-failed',
+      error: opts.reason || null,
+      summary: opts.reason || 'DRAFT phase failed',
+    });
+    return { nextState: QA_SESSION_STATE.FAILED, queuedExecuteWi: null };
+  }
+  const testFilePatch = opts.testFile ? { testFile: opts.testFile } : {};
+  if (session.spec.mode === 'auto') {
+    if (!_isNonEmptyString(opts.wiPath)) {
+      throw new Error('qa-sessions: handleDraftComplete (auto) requires wiPath');
+    }
+    if (!_isNonEmptyString(opts.qaRunId)) {
+      throw new Error('qa-sessions: handleDraftComplete (auto) requires qaRunId');
+    }
+    markExecuting(sessionId, { ...testFilePatch, qaRunId: opts.qaRunId });
+    const updated = getSession(sessionId);
+    const wi = buildExecuteWorkItem(updated, {
+      qaRunId: opts.qaRunId,
+      project: opts.project || updated.spec.project || null,
+    });
+    _queueWorkItem(wi, opts.wiPath);
+    setSessionWorkItem(sessionId, SESSION_PHASE.EXECUTE, wi.id);
+    return { nextState: QA_SESSION_STATE.EXECUTING, queuedExecuteWi: wi.id };
+  }
+  // confirm mode (default)
+  markAwaitingApproval(sessionId, testFilePatch);
+  return { nextState: QA_SESSION_STATE.AWAITING_APPROVAL, queuedExecuteWi: null };
+}
+/**
+ * Lifecycle hook: EXECUTE WI completed. The qa-runs record's terminal status
+ * is the source of truth for done vs failed — the qaRunId hook at
+ * engine/lifecycle.js:4340 has already written it. We just read that record
+ * (when provided) and transition the session accordingly.
+ *
+ * @param {string} sessionId
+ * @param {object} opts
+ * @param {boolean} opts.success - dispatch-level success (whether the agent exited 0)
+ * @param {string}  [opts.qaRunStatus] - 'passed' | 'failed' | 'errored'  (from qa-runs record)
+ * @param {string}  [opts.summary]
+ * @param {string}  [opts.reason]
+ */
+function handleExecuteComplete(sessionId, opts = {}) {
+  const session = getSession(sessionId);
+  if (!session) throw new Error('qa-sessions: session not found: ' + sessionId);
+  // qa-run terminal status (when known) trumps dispatch-level success — a
+  // passing assertion run with an exit-1 wrapper still reports a passed
+  // qa-run; we mark the session done. Conversely, a failed/errored qa-run
+  // overrides a green dispatch.
+  const qaStatus = opts.qaRunStatus;
+  let toState;
+  let patch = { summary: opts.summary || null };
+  if (qaStatus === 'passed') {
+    toState = QA_SESSION_STATE.DONE;
+  } else if (qaStatus === 'failed' || qaStatus === 'errored') {
+    toState = QA_SESSION_STATE.FAILED;
+    patch.failureClass = qaStatus === 'errored' ? 'qa-session-execute-errored' : 'qa-session-execute-failed';
+    patch.error = opts.reason || `qa-run terminal status: ${qaStatus}`;
+  } else if (opts.success) {
+    // No qa-run status reported but the dispatch was successful — assume done.
+    toState = QA_SESSION_STATE.DONE;
+  } else {
+    toState = QA_SESSION_STATE.FAILED;
+    patch.failureClass = 'qa-session-execute-failed';
+    patch.error = opts.reason || 'EXECUTE phase failed';
+  }
+  transitionSession(sessionId, toState, patch);
+  return toState;
+}
+// ── User-initiated actions (called by dashboard endpoints) ─────────────────
+/**
+ * POST /api/qa/sessions/<id>/approve — awaiting-approval → executing, queues
+ * the EXECUTE WI. Caller creates the qa-runs record and passes its id.
+ */
+function approveDraft(sessionId, { wiPath, qaRunId, project } = {}) {
+  if (!_isNonEmptyString(wiPath)) throw new Error('qa-sessions: approveDraft requires wiPath');
+  if (!_isNonEmptyString(qaRunId)) throw new Error('qa-sessions: approveDraft requires qaRunId');
+  const session = getSession(sessionId);
+  if (!session) throw new Error('qa-sessions: session not found: ' + sessionId);
+  if (session.state !== QA_SESSION_STATE.AWAITING_APPROVAL) {
+    throw new Error(`qa-sessions: approveDraft requires state awaiting-approval, got ${session.state}`);
+  }
+  markExecuting(sessionId, { qaRunId });
+  const updated = getSession(sessionId);
+  const wi = buildExecuteWorkItem(updated, {
+    qaRunId,
+    project: project || updated.spec.project || null,
+  });
+  _queueWorkItem(wi, wiPath);
+  setSessionWorkItem(sessionId, SESSION_PHASE.EXECUTE, wi.id);
+  return wi.id;
+}
+/**
+ * POST /api/qa/sessions/<id>/edit — awaiting-approval → drafting, re-queue the
+ * DRAFT WI with the user's natural-language feedback threaded into the prompt.
+ */
+function editDraft(sessionId, { wiPath, feedback, project } = {}) {
+  if (!_isNonEmptyString(wiPath)) throw new Error('qa-sessions: editDraft requires wiPath');
+  if (!_isNonEmptyString(feedback)) throw new Error('qa-sessions: editDraft requires feedback');
+  if (feedback.length > LIMITS.feedbackMax) {
+    throw new Error(`qa-sessions: editDraft feedback exceeds ${LIMITS.feedbackMax} chars`);
+  }
+  const session = getSession(sessionId);
+  if (!session) throw new Error('qa-sessions: session not found: ' + sessionId);
+  if (session.state !== QA_SESSION_STATE.AWAITING_APPROVAL) {
+    throw new Error(`qa-sessions: editDraft requires state awaiting-approval, got ${session.state}`);
+  }
+  markDrafting(sessionId);
+  const updated = getSession(sessionId);
+  const wi = buildDraftWorkItem(updated, {
+    project: project || updated.spec.project || null,
+    feedback,
+  });
+  _queueWorkItem(wi, wiPath);
+  setSessionWorkItem(sessionId, SESSION_PHASE.DRAFT, wi.id);
+  return wi.id;
+}
+/**
+ * POST /api/qa/sessions/<id>/cancel — any non-terminal state → killed. Caller
+ * is responsible for killing the managed-spawn (the dashboard endpoint does
+ * this via managed-spawn.killSpec before calling cancelSession).
+ */
+function cancelSession(sessionId, { reason } = {}) {
+  return markKilled(sessionId, {
+    summary: 'Session cancelled by user',
+    error: reason || null,
+  });
+}
+/**
+ * POST /api/qa/sessions/<id>/kill — same as cancel but explicitly indicates
+ * the spawn should be killed too. Caller does the kill outside this module.
+ */
+function killSession(sessionId, { reason } = {}) {
+  return markKilled(sessionId, {
+    summary: 'Session killed by user',
+    error: reason || null,
+  });
+}
+/**
+ * POST /api/qa/sessions/<id>/dismiss — mark done without running. Valid from
+ * any non-terminal pre-execute state. Caller leaves spawn alive.
+ */
+function dismissSession(sessionId, { summary } = {}) {
+  const session = getSession(sessionId);
+  if (!session) throw new Error('qa-sessions: session not found: ' + sessionId);
+  if (TERMINAL_STATES.has(session.state)) {
+    throw new Error(`qa-sessions: dismissSession requires non-terminal state, got ${session.state}`);
+  }
+  return transitionSession(sessionId, QA_SESSION_STATE.DONE, {
+    summary: summary || 'Session dismissed by user',
+  });
+}
+// ── Status summary (cheap, for /api/status fast-state slice) ───────────────
+/**
+ * Cheap summary for the dashboard /api/status fast-state slice. Mirrors
+ * qa-runs.js summarizeRunsForStatus — no sorting, no extra parses. Used by
+ * the sidebar activity-dot to detect when a session is created or transitions
+ * state without paying for a full read of the session list.
+ *
+ * @returns {{ total: number, sig: string }}
+ */
+function summarizeSessionsForStatus() {
+  const sessions = shared.safeJsonArr(qaSessionsPath());
+  if (!Array.isArray(sessions) || sessions.length === 0) return { total: 0, sig: '' };
+  let sig = '';
+  for (const s of sessions) {
+    if (!s) continue;
+    sig += (s.id || '') + ':' + (s.state || '') + ',';
+  }
+  return { total: sessions.length, sig };
+}
+module.exports = {
+  // Constants
+  QA_SESSION_STATE,
+  TERMINAL_STATES,
+  SESSION_PHASE,
+  ALLOWED_TRANSITIONS,
+  VALID_TARGET_KINDS,
+  VALID_MODES,
+  LIMITS,
+  QA_SESSIONS_MAX_RECORDS,
+  // Paths
+  qaSessionsPath,
+  qaTestsDir,
+  qaTestsDirForSession,
+  // Validation
+  validateSpec,
+  validateTransition,
+  isValidState,
+  // CRUD
+  createSession,
+  getSession,
+  listSessions,
+  setSessionWorkItem,
+  setSessionQaRunId,
+  // Transitions
+  transitionSession,
+  markSpawning,
+  markDrafting,
+  markAwaitingApproval,
+  markExecuting,
+  markDone,
+  markFailed,
+  markKilled,
+  // Work-item builders (pure)
+  buildSetupWorkItem,
+  buildDraftWorkItem,
+  buildExecuteWorkItem,
+  // Chain helpers (impure — call dispatch + work-items)
+  queueSetup,
+  handleSetupComplete,
+  handleDraftComplete,
+  handleExecuteComplete,
+  // User actions
+  approveDraft,
+  editDraft,
+  cancelSession,
+  killSession,
+  dismissSession,
+  // Status
+  summarizeSessionsForStatus,
+  // Internals (exposed for tests)
+  _isSafeSessionId,
+};