npm - sequant - Versions diffs - 1.20.3 → 2.0.0 - Mend

sequant 1.20.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

package/.claude-plugin/marketplace.json +2 -4
package/.claude-plugin/plugin.json +1 -1
package/README.md +29 -9
package/dist/bin/cli.js +25 -2
package/dist/src/commands/doctor.js +42 -9
package/dist/src/commands/init.d.ts +1 -0
package/dist/src/commands/init.js +52 -0
package/dist/src/commands/logs.d.ts +1 -0
package/dist/src/commands/logs.js +18 -2
package/dist/src/commands/run.d.ts +7 -0
package/dist/src/commands/run.js +235 -68
package/dist/src/commands/serve.d.ts +13 -0
package/dist/src/commands/serve.js +131 -0
package/dist/src/commands/stats.d.ts +1 -0
package/dist/src/commands/stats.js +185 -26
package/dist/src/commands/status.d.ts +2 -0
package/dist/src/commands/status.js +99 -50
package/dist/src/index.d.ts +2 -2
package/dist/src/index.js +4 -1
package/dist/src/lib/ac-parser.d.ts +2 -0
package/dist/src/lib/ac-parser.js +12 -2
package/dist/src/lib/assess-comment-parser.d.ts +137 -0
package/dist/src/lib/assess-comment-parser.js +344 -0
package/dist/src/lib/ci/config.d.ts +22 -0
package/dist/src/lib/ci/config.js +134 -0
package/dist/src/lib/ci/index.d.ts +12 -0
package/dist/src/lib/ci/index.js +10 -0
package/dist/src/lib/ci/inputs.d.ts +29 -0
package/dist/src/lib/ci/inputs.js +103 -0
package/dist/src/lib/ci/labels.d.ts +34 -0
package/dist/src/lib/ci/labels.js +101 -0
package/dist/src/lib/ci/outputs.d.ts +25 -0
package/dist/src/lib/ci/outputs.js +84 -0
package/dist/src/lib/ci/triggers.d.ts +9 -0
package/dist/src/lib/ci/triggers.js +86 -0
package/dist/src/lib/ci/types.d.ts +131 -0
package/dist/src/lib/ci/types.js +47 -0
package/dist/src/lib/mcp-config.d.ts +54 -0
package/dist/src/lib/mcp-config.js +172 -0
package/dist/src/lib/merge-check/index.js +6 -12
package/dist/src/lib/merge-check/types.d.ts +20 -7
package/dist/src/lib/merge-check/types.js +11 -0
package/dist/src/lib/phase-signal.d.ts +3 -3
package/dist/src/lib/phase-signal.js +5 -3
package/dist/src/lib/settings.d.ts +52 -0
package/dist/src/lib/settings.js +41 -0
package/dist/src/lib/shutdown.d.ts +16 -5
package/dist/src/lib/shutdown.js +32 -12
package/dist/src/lib/solve-comment-parser.d.ts +9 -102
package/dist/src/lib/solve-comment-parser.js +13 -248
package/dist/src/lib/stacks.d.ts +8 -0
package/dist/src/lib/stacks.js +34 -0
package/dist/src/lib/system.js +3 -7
package/dist/src/lib/test-tautology-detector.d.ts +10 -0
package/dist/src/lib/test-tautology-detector.js +43 -4
package/dist/src/lib/upstream/assessment.js +9 -59
package/dist/src/lib/upstream/issues.js +12 -75
package/dist/src/lib/version-check.d.ts +2 -2
package/dist/src/lib/version-check.js +6 -3
package/dist/src/lib/version.d.ts +4 -0
package/dist/src/lib/version.js +25 -0
package/dist/src/lib/workflow/batch-executor.d.ts +18 -86
package/dist/src/lib/workflow/batch-executor.js +232 -55
package/dist/src/lib/workflow/drivers/agent-driver.d.ts +56 -0
package/dist/src/lib/workflow/drivers/agent-driver.js +8 -0
package/dist/src/lib/workflow/drivers/aider.d.ts +18 -0
package/dist/src/lib/workflow/drivers/aider.js +160 -0
package/dist/src/lib/workflow/drivers/claude-code.d.ts +17 -0
package/dist/src/lib/workflow/drivers/claude-code.js +165 -0
package/dist/src/lib/workflow/drivers/index.d.ts +20 -0
package/dist/src/lib/workflow/drivers/index.js +27 -0
package/dist/src/lib/workflow/error-classifier.d.ts +16 -0
package/dist/src/lib/workflow/error-classifier.js +90 -0
package/dist/src/lib/workflow/log-writer.d.ts +6 -3
package/dist/src/lib/workflow/log-writer.js +57 -27
package/dist/src/lib/workflow/metrics-schema.d.ts +9 -9
package/dist/src/lib/workflow/phase-detection.d.ts +23 -0
package/dist/src/lib/workflow/phase-detection.js +45 -29
package/dist/src/lib/workflow/phase-executor.d.ts +42 -3
package/dist/src/lib/workflow/phase-executor.js +340 -220
package/dist/src/lib/workflow/phase-mapper.d.ts +1 -1
package/dist/src/lib/workflow/phase-mapper.js +7 -7
package/dist/src/lib/workflow/platforms/github.d.ts +157 -0
package/dist/src/lib/workflow/platforms/github.js +466 -0
package/dist/src/lib/workflow/platforms/index.d.ts +17 -0
package/dist/src/lib/workflow/platforms/index.js +25 -0
package/dist/src/lib/workflow/platforms/platform-provider.d.ts +67 -0
package/dist/src/lib/workflow/platforms/platform-provider.js +8 -0
package/dist/src/lib/workflow/pr-status.d.ts +2 -4
package/dist/src/lib/workflow/pr-status.js +3 -16
package/dist/src/lib/workflow/qa-cache.d.ts +58 -0
package/dist/src/lib/workflow/qa-cache.js +88 -0
package/dist/src/lib/workflow/reconcile.d.ts +69 -0
package/dist/src/lib/workflow/reconcile.js +290 -0
package/dist/src/lib/workflow/ring-buffer.d.ts +17 -0
package/dist/src/lib/workflow/ring-buffer.js +37 -0
package/dist/src/lib/workflow/run-log-schema.d.ts +115 -24
package/dist/src/lib/workflow/run-log-schema.js +47 -12
package/dist/src/lib/workflow/run-reflect.js +1 -1
package/dist/src/lib/workflow/state-cleanup.js +21 -0
package/dist/src/lib/workflow/state-manager.d.ts +34 -3
package/dist/src/lib/workflow/state-manager.js +278 -126
package/dist/src/lib/workflow/state-schema.d.ts +34 -30
package/dist/src/lib/workflow/state-schema.js +35 -25
package/dist/src/lib/workflow/state-utils.d.ts +3 -1
package/dist/src/lib/workflow/state-utils.js +1 -0
package/dist/src/lib/workflow/types.d.ts +208 -6
package/dist/src/lib/workflow/types.js +20 -1
package/dist/src/lib/workflow/worktree-discovery.d.ts +1 -1
package/dist/src/lib/workflow/worktree-discovery.js +6 -14
package/dist/src/lib/workflow/worktree-manager.js +33 -51
package/dist/src/mcp/index.d.ts +4 -0
package/dist/src/mcp/index.js +4 -0
package/dist/src/mcp/resources.d.ts +7 -0
package/dist/src/mcp/resources.js +111 -0
package/dist/src/mcp/run-registry.d.ts +34 -0
package/dist/src/mcp/run-registry.js +42 -0
package/dist/src/mcp/server.d.ts +12 -0
package/dist/src/mcp/server.js +50 -0
package/dist/src/mcp/tools/logs.d.ts +7 -0
package/dist/src/mcp/tools/logs.js +149 -0
package/dist/src/mcp/tools/run.d.ts +121 -0
package/dist/src/mcp/tools/run.js +591 -0
package/dist/src/mcp/tools/status.d.ts +7 -0
package/dist/src/mcp/tools/status.js +127 -0
package/package.json +10 -1
package/templates/hooks/post-tool.sh +19 -8
package/templates/hooks/pre-tool.sh +36 -49
package/templates/mcp.json +6 -0
package/templates/skills/assess/SKILL.md +354 -352
package/templates/skills/exec/SKILL.md +64 -1
package/templates/skills/fullsolve/SKILL.md +35 -4
package/templates/skills/qa/SKILL.md +486 -9
package/templates/skills/qa/scripts/quality-checks.sh +1 -1
package/templates/skills/setup/SKILL.md +386 -0
package/templates/skills/solve/SKILL.md +38 -664
package/templates/skills/spec/SKILL.md +90 -31

package/dist/src/lib/workflow/phase-executor.js CHANGED Viewed

@@ -1,16 +1,19 @@
 /**
  * Phase execution engine for workflow orchestration.
  *
- * Handles executing individual phases via the Claude Agent SDK,
+ * Handles executing individual phases via an AgentDriver interface,
  * including cold-start retry logic and MCP fallback strategies.
+ *
+ * The SDK import has been moved to ClaudeCodeDriver — this module
+ * is agent-agnostic.
  */
 import chalk from "chalk";
-import { query } from "@anthropic-ai/claude-agent-sdk";
-import { getMcpServersConfig } from "../system.js";
+import { execSync } from "child_process";
 import { readAgentsMd } from "../agents-md.js";
+import { getDriver } from "./drivers/index.js";
 /**
- * Natural language prompts for each phase
- * These prompts will invoke the corresponding skills via natural language
+ * Natural language prompts for each phase.
+ * Claude Code invokes the corresponding skills via natural language.
  */
 const PHASE_PROMPTS = {
     spec: "Review GitHub issue #{issue} and create an implementation plan with verification criteria. Run the /spec {issue} workflow.",
@@ -18,8 +21,51 @@ const PHASE_PROMPTS = {
     testgen: "Generate test stubs for GitHub issue #{issue} based on the specification. Run the /testgen {issue} workflow.",
     exec: "Implement the feature for GitHub issue #{issue} following the spec. Run the /exec {issue} workflow.",
     test: "Execute structured browser-based testing for GitHub issue #{issue}. Run the /test {issue} workflow.",
+    verify: "Verify the implementation for GitHub issue #{issue} by running commands and capturing output. Run the /verify {issue} workflow.",
     qa: "Review the implementation for GitHub issue #{issue} against acceptance criteria. Run the /qa {issue} workflow.",
     loop: "Parse test/QA findings for GitHub issue #{issue} and iterate until quality gates pass. Run the /loop {issue} workflow.",
+    merger: "Integrate and merge completed worktrees for GitHub issue #{issue}. Run the /merger {issue} workflow.",
+};
+/**
+ * Self-contained prompts for non-Claude agents (Aider, Codex, etc.).
+ * These agents don't have a skill system, so prompts must include
+ * full instructions rather than skill invocations.
+ */
+const AIDER_PHASE_PROMPTS = {
+    spec: `Read GitHub issue #{issue} using 'gh issue view #{issue}'.
+Create a spec comment on the issue with:
+1. Implementation plan
+2. Acceptance criteria as a checklist
+3. Risk assessment
+Post the comment using 'gh issue comment #{issue} --body "<comment>"'.`,
+    "security-review": `Perform a security review for GitHub issue #{issue}.
+Read the issue with 'gh issue view #{issue}'.
+Check for auth, permissions, injection, and sensitive data issues.
+Post findings as a comment on the issue.`,
+    testgen: `Generate test stubs for GitHub issue #{issue}.
+Read the spec comments on the issue with 'gh issue view #{issue} --comments'.
+Create test files with describe/it blocks covering the acceptance criteria.
+Use the project's existing test framework.`,
+    exec: `Implement the feature described in GitHub issue #{issue}.
+Read the issue and any spec comments with 'gh issue view #{issue} --comments'.
+Follow the implementation plan from the spec.
+Write tests for new functionality.
+Ensure the build passes with 'npm test' and 'npm run build'.`,
+    test: `Test the implementation for GitHub issue #{issue}.
+Run 'npm test' and verify all tests pass.
+Check for edge cases and error handling.`,
+    verify: `Verify the implementation for GitHub issue #{issue}.
+Run relevant commands and capture their output for review.`,
+    qa: `Review the changes for GitHub issue #{issue}.
+Run 'npm test' and 'npm run build' to verify everything works.
+Check each acceptance criterion from the issue comments.
+Output a verdict: READY_FOR_MERGE, AC_MET_BUT_NOT_A_PLUS, or AC_NOT_MET
+with format "### Verdict: <VERDICT>" followed by an explanation.`,
+    loop: `Review test and QA findings for GitHub issue #{issue}.
+Fix any issues identified in the QA feedback.
+Re-run 'npm test' and 'npm run build' until all quality gates pass.`,
+    merger: `Integrate and merge completed worktrees for GitHub issue #{issue}.
+Ensure all branches are up to date and merge cleanly.`,
 };
 /**
  * Phases that require worktree isolation.
@@ -44,6 +90,16 @@ const ISOLATED_PHASES = [
  */
 const COLD_START_THRESHOLD_SECONDS = 60;
 const COLD_START_MAX_RETRIES = 2;
+/**
+ * Spec-specific retry configuration.
+ * Spec failures have a higher failure rate (~8.6%) than other phases due to
+ * transient GitHub API issues and rate limits. One extra retry with backoff
+ * recovers most of these without user intervention.
+ */
+/** @internal Exported for testing only */
+export const SPEC_RETRY_BACKOFF_MS = 5000;
+/** @internal Exported for testing only */
+export const SPEC_EXTRA_RETRIES = 1;
 export function parseQaVerdict(output) {
     if (!output)
         return null;
@@ -60,6 +116,95 @@ export function parseQaVerdict(output) {
     const verdict = verdictMatch[1].toUpperCase().replace(/-/g, "_");
     return verdict;
 }
+/**
+ * Parse condensed QA summary from QA phase output (#434).
+ *
+ * Handles multiple AC table formats produced by the QA skill:
+ * - 5-column: | AC-N | source | desc | STATUS | notes |
+ * - 4-column: | AC-N | desc | STATUS | notes |
+ * - 3-column: | AC-N | desc | STATUS |
+ *
+ * Status cells may contain emoji prefixes (✅ MET), shorthand
+ * (PARTIAL), or trailing text (MET — explanation).
+ *
+ * @internal Exported for testing only
+ */
+export function parseQaSummary(output) {
+    if (!output)
+        return null;
+    // Anchored pattern: cell content starts with optional emoji, then status keyword
+    // Uses alternation (not character class) to avoid ESLint no-misleading-character-class
+    const STATUS_CELL = /^(?:\u2705|\u274C|\u26A0\uFE0F|\u2B50|\u2139\uFE0F|\u2753|\u2757)?\s*(MET|NOT_MET|PARTIALLY_MET|PARTIAL|PENDING|N\/A)\b/i;
+    const lines = output.split("\n");
+    const acRows = lines.filter((line) => /^\s*\|\s*\*?\*?AC-\d+/.test(line));
+    if (acRows.length === 0)
+        return null;
+    let acMet = 0;
+    let acTotal = 0;
+    for (const row of acRows) {
+        const cells = row
+            .split("|")
+            .map((c) => c.trim())
+            .filter(Boolean);
+        // Scan cells right-to-left to find the status cell
+        let found = false;
+        for (let i = cells.length - 1; i >= 1; i--) {
+            const match = cells[i].match(STATUS_CELL);
+            if (match) {
+                const status = match[1].toUpperCase();
+                acTotal++;
+                if (status === "MET")
+                    acMet++;
+                found = true;
+                break;
+            }
+        }
+        // Row with AC-N but no parseable status is skipped
+        if (!found)
+            continue;
+    }
+    if (acTotal === 0)
+        return null;
+    const gaps = parseListSection(output, /\*\*(?:Issues|Gaps)/);
+    const suggestions = parseListSection(output, /\*\*Suggestions/);
+    return { acMet, acTotal, gaps, suggestions };
+}
+/**
+ * Parse a markdown bullet list section, filtering out "None" variants.
+ */
+function parseListSection(output, headerPattern) {
+    const items = [];
+    const lines = output.split("\n");
+    let inSection = false;
+    for (const line of lines) {
+        if (headerPattern.test(line)) {
+            // If the header line itself contains a bullet (inline), capture it
+            inSection = true;
+            continue;
+        }
+        if (inSection) {
+            // Section ends at next markdown header or bold label
+            if (/^#{1,4}\s/.test(line) || /^\*\*[^*]+\*\*:/.test(line)) {
+                break;
+            }
+            const bulletMatch = line.match(/^\s*[-*]\s+(.+)/);
+            if (bulletMatch) {
+                const trimmed = bulletMatch[1].trim();
+                // Filter "None", "None found", "None — text", etc.
+                if (trimmed && !/^None\b/i.test(trimmed)) {
+                    items.push(trimmed);
+                }
+            }
+            else if (line.trim() === "") {
+                continue;
+            }
+            else {
+                break;
+            }
+        }
+    }
+    return items;
+}
 /**
  * Format duration in human-readable format
  */
@@ -73,11 +218,15 @@ export function formatDuration(seconds) {
 }
 /**
  * Get the prompt for a phase with the issue number substituted.
+ * Selects self-contained prompts for non-Claude agents.
  * Includes AGENTS.md content as context so non-Claude agents
  * receive project conventions and workflow instructions.
+ *
+ * @internal Exported for testing only
  */
-async function getPhasePrompt(phase, issueNumber) {
-    const basePrompt = PHASE_PROMPTS[phase].replace(/\{issue\}/g, String(issueNumber));
+export async function getPhasePrompt(phase, issueNumber, agent) {
+    const prompts = agent && agent !== "claude-code" ? AIDER_PHASE_PROMPTS : PHASE_PROMPTS;
+    const basePrompt = prompts[phase].replace(/\{issue\}/g, String(issueNumber));
     // Include AGENTS.md content in the prompt context for non-Claude agent compatibility.
     // Claude reads CLAUDE.md natively, but other agents (Aider, Codex, Gemini CLI)
     // rely on AGENTS.md for project context.
@@ -88,22 +237,24 @@ async function getPhasePrompt(phase, issueNumber) {
     return basePrompt;
 }
 /**
- * Execute a single phase for an issue using Claude Agent SDK
+ * Execute a single phase for an issue using the configured AgentDriver.
  */
 async function executePhase(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner) {
     const startTime = Date.now();
+    const prompt = await getPhasePrompt(phase, issueNumber, config.agent);
     if (config.dryRun) {
-        // Dry run - just simulate
+        // Dry run - show the prompt that would be sent, then return
         if (config.verbose) {
             console.log(chalk.gray(`    Would execute: /${phase} ${issueNumber}`));
+            console.log(chalk.gray(`    Prompt: ${prompt}`));
         }
         return {
             phase,
             success: true,
             durationSeconds: 0,
+            output: prompt,
         };
     }
-    const prompt = await getPhasePrompt(phase, issueNumber);
     if (config.verbose) {
         console.log(chalk.gray(`    Prompt: ${prompt}`));
         if (worktreePath && ISOLATED_PHASES.includes(phase)) {
@@ -113,235 +264,170 @@ async function executePhase(issueNumber, phase, config, sessionId, worktreePath,
     // Determine working directory and environment
     const shouldUseWorktree = worktreePath && ISOLATED_PHASES.includes(phase);
     const cwd = shouldUseWorktree ? worktreePath : process.cwd();
-    // Track stderr for error diagnostics (declared outside try for catch access)
-    let capturedStderr = "";
-    try {
-        // Check if shutdown is in progress
-        if (shutdownManager?.shuttingDown) {
-            return {
-                phase,
-                success: false,
-                durationSeconds: 0,
-                error: "Shutdown in progress",
-            };
-        }
-        // Create abort controller for timeout
-        const abortController = new AbortController();
-        const timeoutId = setTimeout(() => {
-            abortController.abort();
-        }, config.phaseTimeout * 1000);
-        // Register abort controller with shutdown manager for graceful shutdown
-        if (shutdownManager) {
-            shutdownManager.setAbortController(abortController);
-        }
-        let resultSessionId;
-        let resultMessage;
-        let lastError;
-        let capturedOutput = "";
-        // Build environment with worktree isolation variables
-        const env = {
-            ...process.env,
-            CLAUDE_HOOKS_SMART_TESTS: config.noSmartTests ? "false" : "true",
-        };
-        // Set worktree isolation environment variables
-        if (shouldUseWorktree) {
-            env.SEQUANT_WORKTREE = worktreePath;
-            env.SEQUANT_ISSUE = String(issueNumber);
-        }
-        // Set orchestration context for skills to detect they're part of a workflow
-        // Skills can check these to skip redundant pre-flight checks
-        env.SEQUANT_ORCHESTRATOR = "sequant-run";
-        env.SEQUANT_PHASE = phase;
-        // Execute using Claude Agent SDK
-        // Safety: never resume a session when worktree isolation is active.
-        // Even if THIS phase doesn't use the worktree, a previous phase may have
-        // created the session there. Resuming from a different cwd crashes the SDK
-        // (exit code 1). ISOLATED_PHASES prevents this by design, but this guard
-        // catches edge cases (e.g. a new phase added without updating ISOLATED_PHASES).
-        const canResume = sessionId && !worktreePath;
-        // Get MCP servers config if enabled
-        // Reads from Claude Desktop config and passes to SDK for headless MCP support
-        const mcpServers = config.mcp ? getMcpServersConfig() : undefined;
-        // Track whether we're actively streaming verbose output
-        // Pausing spinner once per streaming session prevents truncation from rapid pause/resume cycles
-        // (Issue #283: ora's stop() clears the current line, which can truncate output when
-        // pause/resume is called for every chunk in rapid succession)
-        let verboseStreamingActive = false;
-        const queryInstance = query({
-            prompt,
-            options: {
-                abortController,
+    // Resolve file context for file-oriented drivers (e.g., Aider --file)
+    let files;
+    if (config.agent && config.agent !== "claude-code") {
+        try {
+            const output = execSync("git diff --name-only main...HEAD", {
                 cwd,
-                // Load project settings including skills
-                settingSources: ["project"],
-                // Use Claude Code's system prompt and tools
-                systemPrompt: { type: "preset", preset: "claude_code" },
-                tools: { type: "preset", preset: "claude_code" },
-                // Bypass permissions for headless execution
-                permissionMode: "bypassPermissions",
-                allowDangerouslySkipPermissions: true,
-                // Resume from previous session if provided (but not when switching directories)
-                ...(canResume ? { resume: sessionId } : {}),
-                // Configure smart tests and worktree isolation via environment
-                env,
-                // Pass MCP servers for headless mode (AC-2)
-                ...(mcpServers ? { mcpServers } : {}),
-                // Capture stderr for debugging (helps diagnose early exit failures)
-                stderr: (data) => {
-                    capturedStderr += data;
-                    // Write stderr in verbose mode
-                    if (config.verbose) {
-                        // Pause spinner once to avoid truncation (Issue #283)
-                        if (!verboseStreamingActive) {
-                            spinner?.pause();
-                            verboseStreamingActive = true;
-                        }
-                        process.stderr.write(chalk.red(data));
-                    }
-                },
-            },
-        });
-        // Stream and process messages
-        for await (const message of queryInstance) {
-            // Capture session ID from system init message
-            if (message.type === "system" && message.subtype === "init") {
-                resultSessionId = message.session_id;
+                encoding: "utf-8",
+                stdio: ["pipe", "pipe", "pipe"],
+            }).trim();
+            if (output) {
+                files = output.split("\n").filter(Boolean);
             }
-            // Capture output from assistant messages
-            if (message.type === "assistant") {
-                // Extract text content from the message
-                const content = message.message.content;
-                const textContent = content
-                    .filter((c) => c.type === "text" && c.text)
-                    .map((c) => c.text)
-                    .join("");
-                if (textContent) {
-                    capturedOutput += textContent;
-                    // Show streaming output in verbose mode
-                    if (config.verbose) {
-                        // Pause spinner once at start of streaming to avoid truncation
-                        // (Issue #283: repeated pause/resume causes ora to clear lines between chunks)
-                        if (!verboseStreamingActive) {
-                            spinner?.pause();
-                            verboseStreamingActive = true;
-                        }
-                        process.stdout.write(chalk.gray(textContent));
-                    }
-                }
-            }
-            // Capture the final result
-            if (message.type === "result") {
-                resultMessage = message;
-            }
-        }
-        // Resume spinner after streaming completes (if we paused it)
-        if (verboseStreamingActive) {
-            spinner?.resume();
-            verboseStreamingActive = false;
         }
-        clearTimeout(timeoutId);
-        // Clear abort controller from shutdown manager
-        if (shutdownManager) {
-            shutdownManager.clearAbortController();
+        catch {
+            // No changed files or git error — proceed without file context
         }
-        const durationSeconds = (Date.now() - startTime) / 1000;
-        // Check result status
-        if (resultMessage) {
-            if (resultMessage.subtype === "success") {
-                // For QA phase, check the verdict to determine actual success
-                // SDK "success" just means the query completed - we need to parse the verdict
-                if (phase === "qa" && capturedOutput) {
-                    const verdict = parseQaVerdict(capturedOutput);
-                    // Only READY_FOR_MERGE and NEEDS_VERIFICATION are considered passing
-                    // NEEDS_VERIFICATION is external verification, not a code quality issue
-                    if (verdict &&
-                        verdict !== "READY_FOR_MERGE" &&
-                        verdict !== "NEEDS_VERIFICATION") {
-                        return {
-                            phase,
-                            success: false,
-                            durationSeconds,
-                            error: `QA verdict: ${verdict}`,
-                            sessionId: resultSessionId,
-                            output: capturedOutput,
-                            verdict, // Include parsed verdict
-                        };
-                    }
-                    // Pass case - include verdict for logging
-                    return {
-                        phase,
-                        success: true,
-                        durationSeconds,
-                        sessionId: resultSessionId,
-                        output: capturedOutput,
-                        verdict: verdict ?? undefined, // Include if found
-                    };
+    }
+    // Check if shutdown is in progress
+    if (shutdownManager?.shuttingDown) {
+        return {
+            phase,
+            success: false,
+            durationSeconds: 0,
+            error: "Shutdown in progress",
+        };
+    }
+    // Create abort controller for timeout
+    const abortController = new AbortController();
+    const timeoutId = setTimeout(() => {
+        abortController.abort();
+    }, config.phaseTimeout * 1000);
+    // Register abort controller with shutdown manager for graceful shutdown
+    // Uses add/remove to support concurrent phase execution (#404)
+    if (shutdownManager) {
+        shutdownManager.addAbortController(abortController);
+    }
+    // Build environment with worktree isolation variables
+    const env = {
+        ...process.env,
+        CLAUDE_HOOKS_SMART_TESTS: config.noSmartTests ? "false" : "true",
+    };
+    // Set worktree isolation environment variables
+    if (shouldUseWorktree) {
+        env.SEQUANT_WORKTREE = worktreePath;
+        env.SEQUANT_ISSUE = String(issueNumber);
+    }
+    // Set orchestration context for skills to detect they're part of a workflow
+    // Skills can check these to skip redundant pre-flight checks
+    env.SEQUANT_ORCHESTRATOR = "sequant-run";
+    env.SEQUANT_PHASE = phase;
+    // Propagate issue type for skills to adapt behavior (e.g., lighter QA for docs)
+    if (config.issueType) {
+        env.SEQUANT_ISSUE_TYPE = config.issueType;
+    }
+    // Track whether we're actively streaming verbose output
+    // Pausing spinner once per streaming session prevents truncation from rapid pause/resume cycles
+    // (Issue #283: ora's stop() clears the current line, which can truncate output when
+    // pause/resume is called for every chunk in rapid succession)
+    let verboseStreamingActive = false;
+    // Safety: never resume a session when worktree isolation is active.
+    // Even if THIS phase doesn't use the worktree, a previous phase may have
+    // created the session there. Resuming from a different cwd crashes the SDK
+    // (exit code 1). ISOLATED_PHASES prevents this by design, but this guard
+    // catches edge cases (e.g. a new phase added without updating ISOLATED_PHASES).
+    const canResume = sessionId && !worktreePath;
+    // Build AgentExecutionConfig for the driver
+    const agentConfig = {
+        cwd,
+        env,
+        abortSignal: abortController.signal,
+        phaseTimeout: config.phaseTimeout,
+        verbose: config.verbose,
+        mcp: config.mcp,
+        sessionId: canResume ? sessionId : undefined,
+        files,
+        onOutput: config.verbose
+            ? (text) => {
+                if (!verboseStreamingActive) {
+                    spinner?.pause();
+                    verboseStreamingActive = true;
                 }
-                return {
-                    phase,
-                    success: true,
-                    durationSeconds,
-                    sessionId: resultSessionId,
-                    output: capturedOutput,
-                };
+                process.stdout.write(chalk.gray(text));
             }
-            else {
-                // Handle error subtypes
-                const errorSubtype = resultMessage.subtype;
-                if (errorSubtype === "error_max_turns") {
-                    lastError = "Max turns reached";
-                }
-                else if (errorSubtype === "error_during_execution") {
-                    lastError =
-                        resultMessage.errors?.join(", ") || "Error during execution";
-                }
-                else if (errorSubtype === "error_max_budget_usd") {
-                    lastError = "Budget limit exceeded";
-                }
-                else {
-                    lastError = `Error: ${errorSubtype}`;
+            : undefined,
+        onStderr: config.verbose
+            ? (data) => {
+                if (!verboseStreamingActive) {
+                    spinner?.pause();
+                    verboseStreamingActive = true;
                 }
+                process.stderr.write(chalk.red(data));
+            }
+            : undefined,
+    };
+    // Resolve driver from config or default
+    const driver = getDriver(config.agent, {
+        aiderSettings: config.aiderSettings,
+    });
+    const agentResult = await driver.executePhase(prompt, agentConfig);
+    // Resume spinner after execution completes (if we paused it)
+    if (verboseStreamingActive) {
+        spinner?.resume();
+    }
+    clearTimeout(timeoutId);
+    // Remove this specific abort controller from shutdown manager
+    if (shutdownManager) {
+        shutdownManager.removeAbortController(abortController);
+    }
+    const durationSeconds = (Date.now() - startTime) / 1000;
+    // Map AgentPhaseResult to PhaseResult
+    const tails = {
+        stderrTail: agentResult.stderrTail,
+        stdoutTail: agentResult.stdoutTail,
+        exitCode: agentResult.exitCode,
+    };
+    if (agentResult.success) {
+        // For QA phase, check the verdict to determine actual success
+        // Agent "success" just means the execution completed — we need to parse the verdict
+        if (phase === "qa" && agentResult.output) {
+            const verdict = parseQaVerdict(agentResult.output);
+            const summary = parseQaSummary(agentResult.output) ?? undefined;
+            if (verdict &&
+                verdict !== "READY_FOR_MERGE" &&
+                verdict !== "NEEDS_VERIFICATION") {
                 return {
                     phase,
                     success: false,
                     durationSeconds,
-                    error: lastError,
-                    sessionId: resultSessionId,
+                    error: `QA verdict: ${verdict}`,
+                    sessionId: agentResult.sessionId,
+                    output: agentResult.output,
+                    verdict,
+                    summary,
+                    ...tails,
                 };
             }
-        }
-        // No result message received
-        return {
-            phase,
-            success: false,
-            durationSeconds: (Date.now() - startTime) / 1000,
-            error: "No result received from Claude",
-            sessionId: resultSessionId,
-        };
-    }
-    catch (err) {
-        const durationSeconds = (Date.now() - startTime) / 1000;
-        const error = err instanceof Error ? err.message : String(err);
-        // Check if it was an abort (timeout)
-        if (error.includes("abort") || error.includes("AbortError")) {
             return {
                 phase,
-                success: false,
+                success: true,
                 durationSeconds,
-                error: `Timeout after ${config.phaseTimeout}s`,
+                sessionId: agentResult.sessionId,
+                output: agentResult.output,
+                verdict: verdict ?? undefined,
+                summary,
+                ...tails,
             };
         }
-        // Include stderr in error message if available (helps diagnose early exit failures)
-        const stderrSuffix = capturedStderr
-            ? `\nStderr: ${capturedStderr.slice(0, 500)}`
-            : "";
         return {
             phase,
-            success: false,
+            success: true,
             durationSeconds,
-            error: error + stderrSuffix,
+            sessionId: agentResult.sessionId,
+            output: agentResult.output,
+            ...tails,
         };
     }
+    return {
+        phase,
+        success: false,
+        durationSeconds,
+        error: agentResult.error,
+        sessionId: agentResult.sessionId,
+        ...tails,
+    };
 }
 /**
  * Execute a phase with automatic retry for cold-start failures and MCP fallback.
@@ -359,7 +445,9 @@ async function executePhase(issueNumber, phase, config, sessionId, worktreePath,
  */
 export async function executePhaseWithRetry(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner,
 /** @internal Injected for testing — defaults to module-level executePhase */
-executePhaseFn = executePhase) {
+executePhaseFn = executePhase,
+/** @internal Injected for testing — defaults to setTimeout-based delay */
+delayFn = (ms) => new Promise((resolve) => setTimeout(resolve, ms))) {
     // Skip retry logic if explicitly disabled
     if (config.retry === false) {
         return executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
@@ -369,8 +457,17 @@ executePhaseFn = executePhase) {
     for (let attempt = 0; attempt <= COLD_START_MAX_RETRIES; attempt++) {
         lastResult = await executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
         const duration = lastResult.durationSeconds ?? 0;
-        // Success or genuine failure (took long enough to be real work)
-        if (lastResult.success || duration >= COLD_START_THRESHOLD_SECONDS) {
+        // Success → return immediately
+        if (lastResult.success) {
+            return lastResult;
+        }
+        // Genuine failure (took long enough to be real work) → skip cold-start retries.
+        // For spec phase, break to allow Phase 3 (spec-specific retry) to run.
+        // For other phases, return immediately — no further retries.
+        if (duration >= COLD_START_THRESHOLD_SECONDS) {
+            if (phase === "spec") {
+                break;
+            }
             return lastResult;
         }
         // Cold-start failure detected — retry
@@ -396,7 +493,30 @@ executePhaseFn = executePhase) {
             console.log(chalk.green(`    ✓ Phase succeeded without MCP (MCP cold-start issue detected)`));
             return retryResult;
         }
-        // Both attempts failed - return original error for better diagnostics
+        // Update lastResult for Phase 3 (spec retry)
+        lastResult = retryResult;
+        // Non-spec phases: return original error after MCP fallback exhausted
+        if (phase !== "spec") {
+            return {
+                ...lastResult,
+                error: originalError,
+            };
+        }
+    }
+    // Phase 3: Spec-specific retry — spec has a higher transient failure rate
+    // than other phases (~8.6%), so one extra retry with backoff recovers most cases.
+    if (phase === "spec" && !lastResult.success) {
+        for (let i = 0; i < SPEC_EXTRA_RETRIES; i++) {
+            console.log(chalk.yellow(`\n    ⟳ Spec phase failed, retrying with ${SPEC_RETRY_BACKOFF_MS}ms backoff... (spec retry ${i + 1}/${SPEC_EXTRA_RETRIES})`));
+            await delayFn(SPEC_RETRY_BACKOFF_MS);
+            const specRetryResult = await executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
+            if (specRetryResult.success) {
+                console.log(chalk.green(`    ✓ Spec phase succeeded on retry`));
+                return specRetryResult;
+            }
+            lastResult = specRetryResult;
+        }
+        // All spec retries exhausted — return with original error for diagnostics
         return {
             ...lastResult,
             error: originalError,