sequant 1.20.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.claude-plugin/marketplace.json +2 -4
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +29 -9
  4. package/dist/bin/cli.js +25 -2
  5. package/dist/src/commands/doctor.js +42 -9
  6. package/dist/src/commands/init.d.ts +1 -0
  7. package/dist/src/commands/init.js +52 -0
  8. package/dist/src/commands/logs.d.ts +1 -0
  9. package/dist/src/commands/logs.js +18 -2
  10. package/dist/src/commands/run.d.ts +7 -0
  11. package/dist/src/commands/run.js +235 -68
  12. package/dist/src/commands/serve.d.ts +13 -0
  13. package/dist/src/commands/serve.js +131 -0
  14. package/dist/src/commands/stats.d.ts +1 -0
  15. package/dist/src/commands/stats.js +185 -26
  16. package/dist/src/commands/status.d.ts +2 -0
  17. package/dist/src/commands/status.js +99 -50
  18. package/dist/src/index.d.ts +2 -2
  19. package/dist/src/index.js +4 -1
  20. package/dist/src/lib/ac-parser.d.ts +2 -0
  21. package/dist/src/lib/ac-parser.js +12 -2
  22. package/dist/src/lib/assess-comment-parser.d.ts +137 -0
  23. package/dist/src/lib/assess-comment-parser.js +344 -0
  24. package/dist/src/lib/ci/config.d.ts +22 -0
  25. package/dist/src/lib/ci/config.js +134 -0
  26. package/dist/src/lib/ci/index.d.ts +12 -0
  27. package/dist/src/lib/ci/index.js +10 -0
  28. package/dist/src/lib/ci/inputs.d.ts +29 -0
  29. package/dist/src/lib/ci/inputs.js +103 -0
  30. package/dist/src/lib/ci/labels.d.ts +34 -0
  31. package/dist/src/lib/ci/labels.js +101 -0
  32. package/dist/src/lib/ci/outputs.d.ts +25 -0
  33. package/dist/src/lib/ci/outputs.js +84 -0
  34. package/dist/src/lib/ci/triggers.d.ts +9 -0
  35. package/dist/src/lib/ci/triggers.js +86 -0
  36. package/dist/src/lib/ci/types.d.ts +131 -0
  37. package/dist/src/lib/ci/types.js +47 -0
  38. package/dist/src/lib/mcp-config.d.ts +54 -0
  39. package/dist/src/lib/mcp-config.js +172 -0
  40. package/dist/src/lib/merge-check/index.js +6 -12
  41. package/dist/src/lib/merge-check/types.d.ts +20 -7
  42. package/dist/src/lib/merge-check/types.js +11 -0
  43. package/dist/src/lib/phase-signal.d.ts +3 -3
  44. package/dist/src/lib/phase-signal.js +5 -3
  45. package/dist/src/lib/settings.d.ts +52 -0
  46. package/dist/src/lib/settings.js +41 -0
  47. package/dist/src/lib/shutdown.d.ts +16 -5
  48. package/dist/src/lib/shutdown.js +32 -12
  49. package/dist/src/lib/solve-comment-parser.d.ts +9 -102
  50. package/dist/src/lib/solve-comment-parser.js +13 -248
  51. package/dist/src/lib/stacks.d.ts +8 -0
  52. package/dist/src/lib/stacks.js +34 -0
  53. package/dist/src/lib/system.js +3 -7
  54. package/dist/src/lib/test-tautology-detector.d.ts +10 -0
  55. package/dist/src/lib/test-tautology-detector.js +43 -4
  56. package/dist/src/lib/upstream/assessment.js +9 -59
  57. package/dist/src/lib/upstream/issues.js +12 -75
  58. package/dist/src/lib/version-check.d.ts +2 -2
  59. package/dist/src/lib/version-check.js +6 -3
  60. package/dist/src/lib/version.d.ts +4 -0
  61. package/dist/src/lib/version.js +25 -0
  62. package/dist/src/lib/workflow/batch-executor.d.ts +18 -86
  63. package/dist/src/lib/workflow/batch-executor.js +232 -55
  64. package/dist/src/lib/workflow/drivers/agent-driver.d.ts +56 -0
  65. package/dist/src/lib/workflow/drivers/agent-driver.js +8 -0
  66. package/dist/src/lib/workflow/drivers/aider.d.ts +18 -0
  67. package/dist/src/lib/workflow/drivers/aider.js +160 -0
  68. package/dist/src/lib/workflow/drivers/claude-code.d.ts +17 -0
  69. package/dist/src/lib/workflow/drivers/claude-code.js +165 -0
  70. package/dist/src/lib/workflow/drivers/index.d.ts +20 -0
  71. package/dist/src/lib/workflow/drivers/index.js +27 -0
  72. package/dist/src/lib/workflow/error-classifier.d.ts +16 -0
  73. package/dist/src/lib/workflow/error-classifier.js +90 -0
  74. package/dist/src/lib/workflow/log-writer.d.ts +6 -3
  75. package/dist/src/lib/workflow/log-writer.js +57 -27
  76. package/dist/src/lib/workflow/metrics-schema.d.ts +9 -9
  77. package/dist/src/lib/workflow/phase-detection.d.ts +23 -0
  78. package/dist/src/lib/workflow/phase-detection.js +45 -29
  79. package/dist/src/lib/workflow/phase-executor.d.ts +42 -3
  80. package/dist/src/lib/workflow/phase-executor.js +340 -220
  81. package/dist/src/lib/workflow/phase-mapper.d.ts +1 -1
  82. package/dist/src/lib/workflow/phase-mapper.js +7 -7
  83. package/dist/src/lib/workflow/platforms/github.d.ts +157 -0
  84. package/dist/src/lib/workflow/platforms/github.js +466 -0
  85. package/dist/src/lib/workflow/platforms/index.d.ts +17 -0
  86. package/dist/src/lib/workflow/platforms/index.js +25 -0
  87. package/dist/src/lib/workflow/platforms/platform-provider.d.ts +67 -0
  88. package/dist/src/lib/workflow/platforms/platform-provider.js +8 -0
  89. package/dist/src/lib/workflow/pr-status.d.ts +2 -4
  90. package/dist/src/lib/workflow/pr-status.js +3 -16
  91. package/dist/src/lib/workflow/qa-cache.d.ts +58 -0
  92. package/dist/src/lib/workflow/qa-cache.js +88 -0
  93. package/dist/src/lib/workflow/reconcile.d.ts +69 -0
  94. package/dist/src/lib/workflow/reconcile.js +290 -0
  95. package/dist/src/lib/workflow/ring-buffer.d.ts +17 -0
  96. package/dist/src/lib/workflow/ring-buffer.js +37 -0
  97. package/dist/src/lib/workflow/run-log-schema.d.ts +115 -24
  98. package/dist/src/lib/workflow/run-log-schema.js +47 -12
  99. package/dist/src/lib/workflow/run-reflect.js +1 -1
  100. package/dist/src/lib/workflow/state-cleanup.js +21 -0
  101. package/dist/src/lib/workflow/state-manager.d.ts +34 -3
  102. package/dist/src/lib/workflow/state-manager.js +278 -126
  103. package/dist/src/lib/workflow/state-schema.d.ts +34 -30
  104. package/dist/src/lib/workflow/state-schema.js +35 -25
  105. package/dist/src/lib/workflow/state-utils.d.ts +3 -1
  106. package/dist/src/lib/workflow/state-utils.js +1 -0
  107. package/dist/src/lib/workflow/types.d.ts +208 -6
  108. package/dist/src/lib/workflow/types.js +20 -1
  109. package/dist/src/lib/workflow/worktree-discovery.d.ts +1 -1
  110. package/dist/src/lib/workflow/worktree-discovery.js +6 -14
  111. package/dist/src/lib/workflow/worktree-manager.js +33 -51
  112. package/dist/src/mcp/index.d.ts +4 -0
  113. package/dist/src/mcp/index.js +4 -0
  114. package/dist/src/mcp/resources.d.ts +7 -0
  115. package/dist/src/mcp/resources.js +111 -0
  116. package/dist/src/mcp/run-registry.d.ts +34 -0
  117. package/dist/src/mcp/run-registry.js +42 -0
  118. package/dist/src/mcp/server.d.ts +12 -0
  119. package/dist/src/mcp/server.js +50 -0
  120. package/dist/src/mcp/tools/logs.d.ts +7 -0
  121. package/dist/src/mcp/tools/logs.js +149 -0
  122. package/dist/src/mcp/tools/run.d.ts +121 -0
  123. package/dist/src/mcp/tools/run.js +591 -0
  124. package/dist/src/mcp/tools/status.d.ts +7 -0
  125. package/dist/src/mcp/tools/status.js +127 -0
  126. package/package.json +10 -1
  127. package/templates/hooks/post-tool.sh +19 -8
  128. package/templates/hooks/pre-tool.sh +36 -49
  129. package/templates/mcp.json +6 -0
  130. package/templates/skills/assess/SKILL.md +354 -352
  131. package/templates/skills/exec/SKILL.md +64 -1
  132. package/templates/skills/fullsolve/SKILL.md +35 -4
  133. package/templates/skills/qa/SKILL.md +486 -9
  134. package/templates/skills/qa/scripts/quality-checks.sh +1 -1
  135. package/templates/skills/setup/SKILL.md +386 -0
  136. package/templates/skills/solve/SKILL.md +38 -664
  137. package/templates/skills/spec/SKILL.md +90 -31
@@ -1,16 +1,19 @@
1
1
  /**
2
2
  * Phase execution engine for workflow orchestration.
3
3
  *
4
- * Handles executing individual phases via the Claude Agent SDK,
4
+ * Handles executing individual phases via an AgentDriver interface,
5
5
  * including cold-start retry logic and MCP fallback strategies.
6
+ *
7
+ * The SDK import has been moved to ClaudeCodeDriver — this module
8
+ * is agent-agnostic.
6
9
  */
7
10
  import chalk from "chalk";
8
- import { query } from "@anthropic-ai/claude-agent-sdk";
9
- import { getMcpServersConfig } from "../system.js";
11
+ import { execSync } from "child_process";
10
12
  import { readAgentsMd } from "../agents-md.js";
13
+ import { getDriver } from "./drivers/index.js";
11
14
  /**
12
- * Natural language prompts for each phase
13
- * These prompts will invoke the corresponding skills via natural language
15
+ * Natural language prompts for each phase.
16
+ * Claude Code invokes the corresponding skills via natural language.
14
17
  */
15
18
  const PHASE_PROMPTS = {
16
19
  spec: "Review GitHub issue #{issue} and create an implementation plan with verification criteria. Run the /spec {issue} workflow.",
@@ -18,8 +21,51 @@ const PHASE_PROMPTS = {
18
21
  testgen: "Generate test stubs for GitHub issue #{issue} based on the specification. Run the /testgen {issue} workflow.",
19
22
  exec: "Implement the feature for GitHub issue #{issue} following the spec. Run the /exec {issue} workflow.",
20
23
  test: "Execute structured browser-based testing for GitHub issue #{issue}. Run the /test {issue} workflow.",
24
+ verify: "Verify the implementation for GitHub issue #{issue} by running commands and capturing output. Run the /verify {issue} workflow.",
21
25
  qa: "Review the implementation for GitHub issue #{issue} against acceptance criteria. Run the /qa {issue} workflow.",
22
26
  loop: "Parse test/QA findings for GitHub issue #{issue} and iterate until quality gates pass. Run the /loop {issue} workflow.",
27
+ merger: "Integrate and merge completed worktrees for GitHub issue #{issue}. Run the /merger {issue} workflow.",
28
+ };
29
+ /**
30
+ * Self-contained prompts for non-Claude agents (Aider, Codex, etc.).
31
+ * These agents don't have a skill system, so prompts must include
32
+ * full instructions rather than skill invocations.
33
+ */
34
+ const AIDER_PHASE_PROMPTS = {
35
+ spec: `Read GitHub issue #{issue} using 'gh issue view #{issue}'.
36
+ Create a spec comment on the issue with:
37
+ 1. Implementation plan
38
+ 2. Acceptance criteria as a checklist
39
+ 3. Risk assessment
40
+ Post the comment using 'gh issue comment #{issue} --body "<comment>"'.`,
41
+ "security-review": `Perform a security review for GitHub issue #{issue}.
42
+ Read the issue with 'gh issue view #{issue}'.
43
+ Check for auth, permissions, injection, and sensitive data issues.
44
+ Post findings as a comment on the issue.`,
45
+ testgen: `Generate test stubs for GitHub issue #{issue}.
46
+ Read the spec comments on the issue with 'gh issue view #{issue} --comments'.
47
+ Create test files with describe/it blocks covering the acceptance criteria.
48
+ Use the project's existing test framework.`,
49
+ exec: `Implement the feature described in GitHub issue #{issue}.
50
+ Read the issue and any spec comments with 'gh issue view #{issue} --comments'.
51
+ Follow the implementation plan from the spec.
52
+ Write tests for new functionality.
53
+ Ensure the build passes with 'npm test' and 'npm run build'.`,
54
+ test: `Test the implementation for GitHub issue #{issue}.
55
+ Run 'npm test' and verify all tests pass.
56
+ Check for edge cases and error handling.`,
57
+ verify: `Verify the implementation for GitHub issue #{issue}.
58
+ Run relevant commands and capture their output for review.`,
59
+ qa: `Review the changes for GitHub issue #{issue}.
60
+ Run 'npm test' and 'npm run build' to verify everything works.
61
+ Check each acceptance criterion from the issue comments.
62
+ Output a verdict: READY_FOR_MERGE, AC_MET_BUT_NOT_A_PLUS, or AC_NOT_MET
63
+ with format "### Verdict: <VERDICT>" followed by an explanation.`,
64
+ loop: `Review test and QA findings for GitHub issue #{issue}.
65
+ Fix any issues identified in the QA feedback.
66
+ Re-run 'npm test' and 'npm run build' until all quality gates pass.`,
67
+ merger: `Integrate and merge completed worktrees for GitHub issue #{issue}.
68
+ Ensure all branches are up to date and merge cleanly.`,
23
69
  };
24
70
  /**
25
71
  * Phases that require worktree isolation.
@@ -44,6 +90,16 @@ const ISOLATED_PHASES = [
44
90
  */
45
91
  const COLD_START_THRESHOLD_SECONDS = 60;
46
92
  const COLD_START_MAX_RETRIES = 2;
93
+ /**
94
+ * Spec-specific retry configuration.
95
+ * Spec failures have a higher failure rate (~8.6%) than other phases due to
96
+ * transient GitHub API issues and rate limits. One extra retry with backoff
97
+ * recovers most of these without user intervention.
98
+ */
99
+ /** @internal Exported for testing only */
100
+ export const SPEC_RETRY_BACKOFF_MS = 5000;
101
+ /** @internal Exported for testing only */
102
+ export const SPEC_EXTRA_RETRIES = 1;
47
103
  export function parseQaVerdict(output) {
48
104
  if (!output)
49
105
  return null;
@@ -60,6 +116,95 @@ export function parseQaVerdict(output) {
60
116
  const verdict = verdictMatch[1].toUpperCase().replace(/-/g, "_");
61
117
  return verdict;
62
118
  }
119
+ /**
120
+ * Parse condensed QA summary from QA phase output (#434).
121
+ *
122
+ * Handles multiple AC table formats produced by the QA skill:
123
+ * - 5-column: | AC-N | source | desc | STATUS | notes |
124
+ * - 4-column: | AC-N | desc | STATUS | notes |
125
+ * - 3-column: | AC-N | desc | STATUS |
126
+ *
127
+ * Status cells may contain emoji prefixes (✅ MET), shorthand
128
+ * (PARTIAL), or trailing text (MET — explanation).
129
+ *
130
+ * @internal Exported for testing only
131
+ */
132
+ export function parseQaSummary(output) {
133
+ if (!output)
134
+ return null;
135
+ // Anchored pattern: cell content starts with optional emoji, then status keyword
136
+ // Uses alternation (not character class) to avoid ESLint no-misleading-character-class
137
+ const STATUS_CELL = /^(?:\u2705|\u274C|\u26A0\uFE0F|\u2B50|\u2139\uFE0F|\u2753|\u2757)?\s*(MET|NOT_MET|PARTIALLY_MET|PARTIAL|PENDING|N\/A)\b/i;
138
+ const lines = output.split("\n");
139
+ const acRows = lines.filter((line) => /^\s*\|\s*\*?\*?AC-\d+/.test(line));
140
+ if (acRows.length === 0)
141
+ return null;
142
+ let acMet = 0;
143
+ let acTotal = 0;
144
+ for (const row of acRows) {
145
+ const cells = row
146
+ .split("|")
147
+ .map((c) => c.trim())
148
+ .filter(Boolean);
149
+ // Scan cells right-to-left to find the status cell
150
+ let found = false;
151
+ for (let i = cells.length - 1; i >= 1; i--) {
152
+ const match = cells[i].match(STATUS_CELL);
153
+ if (match) {
154
+ const status = match[1].toUpperCase();
155
+ acTotal++;
156
+ if (status === "MET")
157
+ acMet++;
158
+ found = true;
159
+ break;
160
+ }
161
+ }
162
+ // Row with AC-N but no parseable status is skipped
163
+ if (!found)
164
+ continue;
165
+ }
166
+ if (acTotal === 0)
167
+ return null;
168
+ const gaps = parseListSection(output, /\*\*(?:Issues|Gaps)/);
169
+ const suggestions = parseListSection(output, /\*\*Suggestions/);
170
+ return { acMet, acTotal, gaps, suggestions };
171
+ }
172
+ /**
173
+ * Parse a markdown bullet list section, filtering out "None" variants.
174
+ */
175
+ function parseListSection(output, headerPattern) {
176
+ const items = [];
177
+ const lines = output.split("\n");
178
+ let inSection = false;
179
+ for (const line of lines) {
180
+ if (headerPattern.test(line)) {
181
+ // If the header line itself contains a bullet (inline), capture it
182
+ inSection = true;
183
+ continue;
184
+ }
185
+ if (inSection) {
186
+ // Section ends at next markdown header or bold label
187
+ if (/^#{1,4}\s/.test(line) || /^\*\*[^*]+\*\*:/.test(line)) {
188
+ break;
189
+ }
190
+ const bulletMatch = line.match(/^\s*[-*]\s+(.+)/);
191
+ if (bulletMatch) {
192
+ const trimmed = bulletMatch[1].trim();
193
+ // Filter "None", "None found", "None — text", etc.
194
+ if (trimmed && !/^None\b/i.test(trimmed)) {
195
+ items.push(trimmed);
196
+ }
197
+ }
198
+ else if (line.trim() === "") {
199
+ continue;
200
+ }
201
+ else {
202
+ break;
203
+ }
204
+ }
205
+ }
206
+ return items;
207
+ }
63
208
  /**
64
209
  * Format duration in human-readable format
65
210
  */
@@ -73,11 +218,15 @@ export function formatDuration(seconds) {
73
218
  }
74
219
  /**
75
220
  * Get the prompt for a phase with the issue number substituted.
221
+ * Selects self-contained prompts for non-Claude agents.
76
222
  * Includes AGENTS.md content as context so non-Claude agents
77
223
  * receive project conventions and workflow instructions.
224
+ *
225
+ * @internal Exported for testing only
78
226
  */
79
- async function getPhasePrompt(phase, issueNumber) {
80
- const basePrompt = PHASE_PROMPTS[phase].replace(/\{issue\}/g, String(issueNumber));
227
+ export async function getPhasePrompt(phase, issueNumber, agent) {
228
+ const prompts = agent && agent !== "claude-code" ? AIDER_PHASE_PROMPTS : PHASE_PROMPTS;
229
+ const basePrompt = prompts[phase].replace(/\{issue\}/g, String(issueNumber));
81
230
  // Include AGENTS.md content in the prompt context for non-Claude agent compatibility.
82
231
  // Claude reads CLAUDE.md natively, but other agents (Aider, Codex, Gemini CLI)
83
232
  // rely on AGENTS.md for project context.
@@ -88,22 +237,24 @@ async function getPhasePrompt(phase, issueNumber) {
88
237
  return basePrompt;
89
238
  }
90
239
  /**
91
- * Execute a single phase for an issue using Claude Agent SDK
240
+ * Execute a single phase for an issue using the configured AgentDriver.
92
241
  */
93
242
  async function executePhase(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner) {
94
243
  const startTime = Date.now();
244
+ const prompt = await getPhasePrompt(phase, issueNumber, config.agent);
95
245
  if (config.dryRun) {
96
- // Dry run - just simulate
246
+ // Dry run - show the prompt that would be sent, then return
97
247
  if (config.verbose) {
98
248
  console.log(chalk.gray(` Would execute: /${phase} ${issueNumber}`));
249
+ console.log(chalk.gray(` Prompt: ${prompt}`));
99
250
  }
100
251
  return {
101
252
  phase,
102
253
  success: true,
103
254
  durationSeconds: 0,
255
+ output: prompt,
104
256
  };
105
257
  }
106
- const prompt = await getPhasePrompt(phase, issueNumber);
107
258
  if (config.verbose) {
108
259
  console.log(chalk.gray(` Prompt: ${prompt}`));
109
260
  if (worktreePath && ISOLATED_PHASES.includes(phase)) {
@@ -113,235 +264,170 @@ async function executePhase(issueNumber, phase, config, sessionId, worktreePath,
113
264
  // Determine working directory and environment
114
265
  const shouldUseWorktree = worktreePath && ISOLATED_PHASES.includes(phase);
115
266
  const cwd = shouldUseWorktree ? worktreePath : process.cwd();
116
- // Track stderr for error diagnostics (declared outside try for catch access)
117
- let capturedStderr = "";
118
- try {
119
- // Check if shutdown is in progress
120
- if (shutdownManager?.shuttingDown) {
121
- return {
122
- phase,
123
- success: false,
124
- durationSeconds: 0,
125
- error: "Shutdown in progress",
126
- };
127
- }
128
- // Create abort controller for timeout
129
- const abortController = new AbortController();
130
- const timeoutId = setTimeout(() => {
131
- abortController.abort();
132
- }, config.phaseTimeout * 1000);
133
- // Register abort controller with shutdown manager for graceful shutdown
134
- if (shutdownManager) {
135
- shutdownManager.setAbortController(abortController);
136
- }
137
- let resultSessionId;
138
- let resultMessage;
139
- let lastError;
140
- let capturedOutput = "";
141
- // Build environment with worktree isolation variables
142
- const env = {
143
- ...process.env,
144
- CLAUDE_HOOKS_SMART_TESTS: config.noSmartTests ? "false" : "true",
145
- };
146
- // Set worktree isolation environment variables
147
- if (shouldUseWorktree) {
148
- env.SEQUANT_WORKTREE = worktreePath;
149
- env.SEQUANT_ISSUE = String(issueNumber);
150
- }
151
- // Set orchestration context for skills to detect they're part of a workflow
152
- // Skills can check these to skip redundant pre-flight checks
153
- env.SEQUANT_ORCHESTRATOR = "sequant-run";
154
- env.SEQUANT_PHASE = phase;
155
- // Execute using Claude Agent SDK
156
- // Safety: never resume a session when worktree isolation is active.
157
- // Even if THIS phase doesn't use the worktree, a previous phase may have
158
- // created the session there. Resuming from a different cwd crashes the SDK
159
- // (exit code 1). ISOLATED_PHASES prevents this by design, but this guard
160
- // catches edge cases (e.g. a new phase added without updating ISOLATED_PHASES).
161
- const canResume = sessionId && !worktreePath;
162
- // Get MCP servers config if enabled
163
- // Reads from Claude Desktop config and passes to SDK for headless MCP support
164
- const mcpServers = config.mcp ? getMcpServersConfig() : undefined;
165
- // Track whether we're actively streaming verbose output
166
- // Pausing spinner once per streaming session prevents truncation from rapid pause/resume cycles
167
- // (Issue #283: ora's stop() clears the current line, which can truncate output when
168
- // pause/resume is called for every chunk in rapid succession)
169
- let verboseStreamingActive = false;
170
- const queryInstance = query({
171
- prompt,
172
- options: {
173
- abortController,
267
+ // Resolve file context for file-oriented drivers (e.g., Aider --file)
268
+ let files;
269
+ if (config.agent && config.agent !== "claude-code") {
270
+ try {
271
+ const output = execSync("git diff --name-only main...HEAD", {
174
272
  cwd,
175
- // Load project settings including skills
176
- settingSources: ["project"],
177
- // Use Claude Code's system prompt and tools
178
- systemPrompt: { type: "preset", preset: "claude_code" },
179
- tools: { type: "preset", preset: "claude_code" },
180
- // Bypass permissions for headless execution
181
- permissionMode: "bypassPermissions",
182
- allowDangerouslySkipPermissions: true,
183
- // Resume from previous session if provided (but not when switching directories)
184
- ...(canResume ? { resume: sessionId } : {}),
185
- // Configure smart tests and worktree isolation via environment
186
- env,
187
- // Pass MCP servers for headless mode (AC-2)
188
- ...(mcpServers ? { mcpServers } : {}),
189
- // Capture stderr for debugging (helps diagnose early exit failures)
190
- stderr: (data) => {
191
- capturedStderr += data;
192
- // Write stderr in verbose mode
193
- if (config.verbose) {
194
- // Pause spinner once to avoid truncation (Issue #283)
195
- if (!verboseStreamingActive) {
196
- spinner?.pause();
197
- verboseStreamingActive = true;
198
- }
199
- process.stderr.write(chalk.red(data));
200
- }
201
- },
202
- },
203
- });
204
- // Stream and process messages
205
- for await (const message of queryInstance) {
206
- // Capture session ID from system init message
207
- if (message.type === "system" && message.subtype === "init") {
208
- resultSessionId = message.session_id;
273
+ encoding: "utf-8",
274
+ stdio: ["pipe", "pipe", "pipe"],
275
+ }).trim();
276
+ if (output) {
277
+ files = output.split("\n").filter(Boolean);
209
278
  }
210
- // Capture output from assistant messages
211
- if (message.type === "assistant") {
212
- // Extract text content from the message
213
- const content = message.message.content;
214
- const textContent = content
215
- .filter((c) => c.type === "text" && c.text)
216
- .map((c) => c.text)
217
- .join("");
218
- if (textContent) {
219
- capturedOutput += textContent;
220
- // Show streaming output in verbose mode
221
- if (config.verbose) {
222
- // Pause spinner once at start of streaming to avoid truncation
223
- // (Issue #283: repeated pause/resume causes ora to clear lines between chunks)
224
- if (!verboseStreamingActive) {
225
- spinner?.pause();
226
- verboseStreamingActive = true;
227
- }
228
- process.stdout.write(chalk.gray(textContent));
229
- }
230
- }
231
- }
232
- // Capture the final result
233
- if (message.type === "result") {
234
- resultMessage = message;
235
- }
236
- }
237
- // Resume spinner after streaming completes (if we paused it)
238
- if (verboseStreamingActive) {
239
- spinner?.resume();
240
- verboseStreamingActive = false;
241
279
  }
242
- clearTimeout(timeoutId);
243
- // Clear abort controller from shutdown manager
244
- if (shutdownManager) {
245
- shutdownManager.clearAbortController();
280
+ catch {
281
+ // No changed files or git error — proceed without file context
246
282
  }
247
- const durationSeconds = (Date.now() - startTime) / 1000;
248
- // Check result status
249
- if (resultMessage) {
250
- if (resultMessage.subtype === "success") {
251
- // For QA phase, check the verdict to determine actual success
252
- // SDK "success" just means the query completed - we need to parse the verdict
253
- if (phase === "qa" && capturedOutput) {
254
- const verdict = parseQaVerdict(capturedOutput);
255
- // Only READY_FOR_MERGE and NEEDS_VERIFICATION are considered passing
256
- // NEEDS_VERIFICATION is external verification, not a code quality issue
257
- if (verdict &&
258
- verdict !== "READY_FOR_MERGE" &&
259
- verdict !== "NEEDS_VERIFICATION") {
260
- return {
261
- phase,
262
- success: false,
263
- durationSeconds,
264
- error: `QA verdict: ${verdict}`,
265
- sessionId: resultSessionId,
266
- output: capturedOutput,
267
- verdict, // Include parsed verdict
268
- };
269
- }
270
- // Pass case - include verdict for logging
271
- return {
272
- phase,
273
- success: true,
274
- durationSeconds,
275
- sessionId: resultSessionId,
276
- output: capturedOutput,
277
- verdict: verdict ?? undefined, // Include if found
278
- };
283
+ }
284
+ // Check if shutdown is in progress
285
+ if (shutdownManager?.shuttingDown) {
286
+ return {
287
+ phase,
288
+ success: false,
289
+ durationSeconds: 0,
290
+ error: "Shutdown in progress",
291
+ };
292
+ }
293
+ // Create abort controller for timeout
294
+ const abortController = new AbortController();
295
+ const timeoutId = setTimeout(() => {
296
+ abortController.abort();
297
+ }, config.phaseTimeout * 1000);
298
+ // Register abort controller with shutdown manager for graceful shutdown
299
+ // Uses add/remove to support concurrent phase execution (#404)
300
+ if (shutdownManager) {
301
+ shutdownManager.addAbortController(abortController);
302
+ }
303
+ // Build environment with worktree isolation variables
304
+ const env = {
305
+ ...process.env,
306
+ CLAUDE_HOOKS_SMART_TESTS: config.noSmartTests ? "false" : "true",
307
+ };
308
+ // Set worktree isolation environment variables
309
+ if (shouldUseWorktree) {
310
+ env.SEQUANT_WORKTREE = worktreePath;
311
+ env.SEQUANT_ISSUE = String(issueNumber);
312
+ }
313
+ // Set orchestration context for skills to detect they're part of a workflow
314
+ // Skills can check these to skip redundant pre-flight checks
315
+ env.SEQUANT_ORCHESTRATOR = "sequant-run";
316
+ env.SEQUANT_PHASE = phase;
317
+ // Propagate issue type for skills to adapt behavior (e.g., lighter QA for docs)
318
+ if (config.issueType) {
319
+ env.SEQUANT_ISSUE_TYPE = config.issueType;
320
+ }
321
+ // Track whether we're actively streaming verbose output
322
+ // Pausing spinner once per streaming session prevents truncation from rapid pause/resume cycles
323
+ // (Issue #283: ora's stop() clears the current line, which can truncate output when
324
+ // pause/resume is called for every chunk in rapid succession)
325
+ let verboseStreamingActive = false;
326
+ // Safety: never resume a session when worktree isolation is active.
327
+ // Even if THIS phase doesn't use the worktree, a previous phase may have
328
+ // created the session there. Resuming from a different cwd crashes the SDK
329
+ // (exit code 1). ISOLATED_PHASES prevents this by design, but this guard
330
+ // catches edge cases (e.g. a new phase added without updating ISOLATED_PHASES).
331
+ const canResume = sessionId && !worktreePath;
332
+ // Build AgentExecutionConfig for the driver
333
+ const agentConfig = {
334
+ cwd,
335
+ env,
336
+ abortSignal: abortController.signal,
337
+ phaseTimeout: config.phaseTimeout,
338
+ verbose: config.verbose,
339
+ mcp: config.mcp,
340
+ sessionId: canResume ? sessionId : undefined,
341
+ files,
342
+ onOutput: config.verbose
343
+ ? (text) => {
344
+ if (!verboseStreamingActive) {
345
+ spinner?.pause();
346
+ verboseStreamingActive = true;
279
347
  }
280
- return {
281
- phase,
282
- success: true,
283
- durationSeconds,
284
- sessionId: resultSessionId,
285
- output: capturedOutput,
286
- };
348
+ process.stdout.write(chalk.gray(text));
287
349
  }
288
- else {
289
- // Handle error subtypes
290
- const errorSubtype = resultMessage.subtype;
291
- if (errorSubtype === "error_max_turns") {
292
- lastError = "Max turns reached";
293
- }
294
- else if (errorSubtype === "error_during_execution") {
295
- lastError =
296
- resultMessage.errors?.join(", ") || "Error during execution";
297
- }
298
- else if (errorSubtype === "error_max_budget_usd") {
299
- lastError = "Budget limit exceeded";
300
- }
301
- else {
302
- lastError = `Error: ${errorSubtype}`;
350
+ : undefined,
351
+ onStderr: config.verbose
352
+ ? (data) => {
353
+ if (!verboseStreamingActive) {
354
+ spinner?.pause();
355
+ verboseStreamingActive = true;
303
356
  }
357
+ process.stderr.write(chalk.red(data));
358
+ }
359
+ : undefined,
360
+ };
361
+ // Resolve driver from config or default
362
+ const driver = getDriver(config.agent, {
363
+ aiderSettings: config.aiderSettings,
364
+ });
365
+ const agentResult = await driver.executePhase(prompt, agentConfig);
366
+ // Resume spinner after execution completes (if we paused it)
367
+ if (verboseStreamingActive) {
368
+ spinner?.resume();
369
+ }
370
+ clearTimeout(timeoutId);
371
+ // Remove this specific abort controller from shutdown manager
372
+ if (shutdownManager) {
373
+ shutdownManager.removeAbortController(abortController);
374
+ }
375
+ const durationSeconds = (Date.now() - startTime) / 1000;
376
+ // Map AgentPhaseResult to PhaseResult
377
+ const tails = {
378
+ stderrTail: agentResult.stderrTail,
379
+ stdoutTail: agentResult.stdoutTail,
380
+ exitCode: agentResult.exitCode,
381
+ };
382
+ if (agentResult.success) {
383
+ // For QA phase, check the verdict to determine actual success
384
+ // Agent "success" just means the execution completed — we need to parse the verdict
385
+ if (phase === "qa" && agentResult.output) {
386
+ const verdict = parseQaVerdict(agentResult.output);
387
+ const summary = parseQaSummary(agentResult.output) ?? undefined;
388
+ if (verdict &&
389
+ verdict !== "READY_FOR_MERGE" &&
390
+ verdict !== "NEEDS_VERIFICATION") {
304
391
  return {
305
392
  phase,
306
393
  success: false,
307
394
  durationSeconds,
308
- error: lastError,
309
- sessionId: resultSessionId,
395
+ error: `QA verdict: ${verdict}`,
396
+ sessionId: agentResult.sessionId,
397
+ output: agentResult.output,
398
+ verdict,
399
+ summary,
400
+ ...tails,
310
401
  };
311
402
  }
312
- }
313
- // No result message received
314
- return {
315
- phase,
316
- success: false,
317
- durationSeconds: (Date.now() - startTime) / 1000,
318
- error: "No result received from Claude",
319
- sessionId: resultSessionId,
320
- };
321
- }
322
- catch (err) {
323
- const durationSeconds = (Date.now() - startTime) / 1000;
324
- const error = err instanceof Error ? err.message : String(err);
325
- // Check if it was an abort (timeout)
326
- if (error.includes("abort") || error.includes("AbortError")) {
327
403
  return {
328
404
  phase,
329
- success: false,
405
+ success: true,
330
406
  durationSeconds,
331
- error: `Timeout after ${config.phaseTimeout}s`,
407
+ sessionId: agentResult.sessionId,
408
+ output: agentResult.output,
409
+ verdict: verdict ?? undefined,
410
+ summary,
411
+ ...tails,
332
412
  };
333
413
  }
334
- // Include stderr in error message if available (helps diagnose early exit failures)
335
- const stderrSuffix = capturedStderr
336
- ? `\nStderr: ${capturedStderr.slice(0, 500)}`
337
- : "";
338
414
  return {
339
415
  phase,
340
- success: false,
416
+ success: true,
341
417
  durationSeconds,
342
- error: error + stderrSuffix,
418
+ sessionId: agentResult.sessionId,
419
+ output: agentResult.output,
420
+ ...tails,
343
421
  };
344
422
  }
423
+ return {
424
+ phase,
425
+ success: false,
426
+ durationSeconds,
427
+ error: agentResult.error,
428
+ sessionId: agentResult.sessionId,
429
+ ...tails,
430
+ };
345
431
  }
346
432
  /**
347
433
  * Execute a phase with automatic retry for cold-start failures and MCP fallback.
@@ -359,7 +445,9 @@ async function executePhase(issueNumber, phase, config, sessionId, worktreePath,
359
445
  */
360
446
  export async function executePhaseWithRetry(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner,
361
447
  /** @internal Injected for testing — defaults to module-level executePhase */
362
- executePhaseFn = executePhase) {
448
+ executePhaseFn = executePhase,
449
+ /** @internal Injected for testing — defaults to setTimeout-based delay */
450
+ delayFn = (ms) => new Promise((resolve) => setTimeout(resolve, ms))) {
363
451
  // Skip retry logic if explicitly disabled
364
452
  if (config.retry === false) {
365
453
  return executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
@@ -369,8 +457,17 @@ executePhaseFn = executePhase) {
369
457
  for (let attempt = 0; attempt <= COLD_START_MAX_RETRIES; attempt++) {
370
458
  lastResult = await executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
371
459
  const duration = lastResult.durationSeconds ?? 0;
372
- // Success or genuine failure (took long enough to be real work)
373
- if (lastResult.success || duration >= COLD_START_THRESHOLD_SECONDS) {
460
+ // Success return immediately
461
+ if (lastResult.success) {
462
+ return lastResult;
463
+ }
464
+ // Genuine failure (took long enough to be real work) → skip cold-start retries.
465
+ // For spec phase, break to allow Phase 3 (spec-specific retry) to run.
466
+ // For other phases, return immediately — no further retries.
467
+ if (duration >= COLD_START_THRESHOLD_SECONDS) {
468
+ if (phase === "spec") {
469
+ break;
470
+ }
374
471
  return lastResult;
375
472
  }
376
473
  // Cold-start failure detected — retry
@@ -396,7 +493,30 @@ executePhaseFn = executePhase) {
396
493
  console.log(chalk.green(` ✓ Phase succeeded without MCP (MCP cold-start issue detected)`));
397
494
  return retryResult;
398
495
  }
399
- // Both attempts failed - return original error for better diagnostics
496
+ // Update lastResult for Phase 3 (spec retry)
497
+ lastResult = retryResult;
498
+ // Non-spec phases: return original error after MCP fallback exhausted
499
+ if (phase !== "spec") {
500
+ return {
501
+ ...lastResult,
502
+ error: originalError,
503
+ };
504
+ }
505
+ }
506
+ // Phase 3: Spec-specific retry — spec has a higher transient failure rate
507
+ // than other phases (~8.6%), so one extra retry with backoff recovers most cases.
508
+ if (phase === "spec" && !lastResult.success) {
509
+ for (let i = 0; i < SPEC_EXTRA_RETRIES; i++) {
510
+ console.log(chalk.yellow(`\n ⟳ Spec phase failed, retrying with ${SPEC_RETRY_BACKOFF_MS}ms backoff... (spec retry ${i + 1}/${SPEC_EXTRA_RETRIES})`));
511
+ await delayFn(SPEC_RETRY_BACKOFF_MS);
512
+ const specRetryResult = await executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
513
+ if (specRetryResult.success) {
514
+ console.log(chalk.green(` ✓ Spec phase succeeded on retry`));
515
+ return specRetryResult;
516
+ }
517
+ lastResult = specRetryResult;
518
+ }
519
+ // All spec retries exhausted — return with original error for diagnostics
400
520
  return {
401
521
  ...lastResult,
402
522
  error: originalError,