sequant 1.20.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.claude-plugin/marketplace.json +2 -4
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +29 -9
  4. package/dist/bin/cli.js +25 -2
  5. package/dist/src/commands/doctor.js +42 -9
  6. package/dist/src/commands/init.d.ts +1 -0
  7. package/dist/src/commands/init.js +52 -0
  8. package/dist/src/commands/logs.d.ts +1 -0
  9. package/dist/src/commands/logs.js +18 -2
  10. package/dist/src/commands/run.d.ts +7 -0
  11. package/dist/src/commands/run.js +235 -68
  12. package/dist/src/commands/serve.d.ts +13 -0
  13. package/dist/src/commands/serve.js +131 -0
  14. package/dist/src/commands/stats.d.ts +1 -0
  15. package/dist/src/commands/stats.js +185 -26
  16. package/dist/src/commands/status.d.ts +2 -0
  17. package/dist/src/commands/status.js +99 -50
  18. package/dist/src/index.d.ts +2 -2
  19. package/dist/src/index.js +4 -1
  20. package/dist/src/lib/ac-parser.d.ts +2 -0
  21. package/dist/src/lib/ac-parser.js +12 -2
  22. package/dist/src/lib/assess-comment-parser.d.ts +137 -0
  23. package/dist/src/lib/assess-comment-parser.js +344 -0
  24. package/dist/src/lib/ci/config.d.ts +22 -0
  25. package/dist/src/lib/ci/config.js +134 -0
  26. package/dist/src/lib/ci/index.d.ts +12 -0
  27. package/dist/src/lib/ci/index.js +10 -0
  28. package/dist/src/lib/ci/inputs.d.ts +29 -0
  29. package/dist/src/lib/ci/inputs.js +103 -0
  30. package/dist/src/lib/ci/labels.d.ts +34 -0
  31. package/dist/src/lib/ci/labels.js +101 -0
  32. package/dist/src/lib/ci/outputs.d.ts +25 -0
  33. package/dist/src/lib/ci/outputs.js +84 -0
  34. package/dist/src/lib/ci/triggers.d.ts +9 -0
  35. package/dist/src/lib/ci/triggers.js +86 -0
  36. package/dist/src/lib/ci/types.d.ts +131 -0
  37. package/dist/src/lib/ci/types.js +47 -0
  38. package/dist/src/lib/mcp-config.d.ts +54 -0
  39. package/dist/src/lib/mcp-config.js +172 -0
  40. package/dist/src/lib/merge-check/index.js +6 -12
  41. package/dist/src/lib/merge-check/types.d.ts +20 -7
  42. package/dist/src/lib/merge-check/types.js +11 -0
  43. package/dist/src/lib/phase-signal.d.ts +3 -3
  44. package/dist/src/lib/phase-signal.js +5 -3
  45. package/dist/src/lib/settings.d.ts +52 -0
  46. package/dist/src/lib/settings.js +41 -0
  47. package/dist/src/lib/shutdown.d.ts +16 -5
  48. package/dist/src/lib/shutdown.js +32 -12
  49. package/dist/src/lib/solve-comment-parser.d.ts +9 -102
  50. package/dist/src/lib/solve-comment-parser.js +13 -248
  51. package/dist/src/lib/stacks.d.ts +8 -0
  52. package/dist/src/lib/stacks.js +34 -0
  53. package/dist/src/lib/system.js +3 -7
  54. package/dist/src/lib/test-tautology-detector.d.ts +10 -0
  55. package/dist/src/lib/test-tautology-detector.js +43 -4
  56. package/dist/src/lib/upstream/assessment.js +9 -59
  57. package/dist/src/lib/upstream/issues.js +12 -75
  58. package/dist/src/lib/version-check.d.ts +2 -2
  59. package/dist/src/lib/version-check.js +6 -3
  60. package/dist/src/lib/version.d.ts +4 -0
  61. package/dist/src/lib/version.js +25 -0
  62. package/dist/src/lib/workflow/batch-executor.d.ts +18 -86
  63. package/dist/src/lib/workflow/batch-executor.js +232 -55
  64. package/dist/src/lib/workflow/drivers/agent-driver.d.ts +56 -0
  65. package/dist/src/lib/workflow/drivers/agent-driver.js +8 -0
  66. package/dist/src/lib/workflow/drivers/aider.d.ts +18 -0
  67. package/dist/src/lib/workflow/drivers/aider.js +160 -0
  68. package/dist/src/lib/workflow/drivers/claude-code.d.ts +17 -0
  69. package/dist/src/lib/workflow/drivers/claude-code.js +165 -0
  70. package/dist/src/lib/workflow/drivers/index.d.ts +20 -0
  71. package/dist/src/lib/workflow/drivers/index.js +27 -0
  72. package/dist/src/lib/workflow/error-classifier.d.ts +16 -0
  73. package/dist/src/lib/workflow/error-classifier.js +90 -0
  74. package/dist/src/lib/workflow/log-writer.d.ts +6 -3
  75. package/dist/src/lib/workflow/log-writer.js +57 -27
  76. package/dist/src/lib/workflow/metrics-schema.d.ts +9 -9
  77. package/dist/src/lib/workflow/phase-detection.d.ts +23 -0
  78. package/dist/src/lib/workflow/phase-detection.js +45 -29
  79. package/dist/src/lib/workflow/phase-executor.d.ts +42 -3
  80. package/dist/src/lib/workflow/phase-executor.js +345 -220
  81. package/dist/src/lib/workflow/phase-mapper.d.ts +1 -1
  82. package/dist/src/lib/workflow/phase-mapper.js +7 -7
  83. package/dist/src/lib/workflow/platforms/github.d.ts +157 -0
  84. package/dist/src/lib/workflow/platforms/github.js +466 -0
  85. package/dist/src/lib/workflow/platforms/index.d.ts +17 -0
  86. package/dist/src/lib/workflow/platforms/index.js +25 -0
  87. package/dist/src/lib/workflow/platforms/platform-provider.d.ts +67 -0
  88. package/dist/src/lib/workflow/platforms/platform-provider.js +8 -0
  89. package/dist/src/lib/workflow/pr-status.d.ts +2 -4
  90. package/dist/src/lib/workflow/pr-status.js +3 -16
  91. package/dist/src/lib/workflow/qa-cache.d.ts +58 -0
  92. package/dist/src/lib/workflow/qa-cache.js +88 -0
  93. package/dist/src/lib/workflow/reconcile.d.ts +69 -0
  94. package/dist/src/lib/workflow/reconcile.js +290 -0
  95. package/dist/src/lib/workflow/ring-buffer.d.ts +17 -0
  96. package/dist/src/lib/workflow/ring-buffer.js +37 -0
  97. package/dist/src/lib/workflow/run-log-schema.d.ts +115 -24
  98. package/dist/src/lib/workflow/run-log-schema.js +47 -12
  99. package/dist/src/lib/workflow/run-reflect.js +1 -1
  100. package/dist/src/lib/workflow/state-cleanup.js +21 -0
  101. package/dist/src/lib/workflow/state-manager.d.ts +34 -3
  102. package/dist/src/lib/workflow/state-manager.js +278 -126
  103. package/dist/src/lib/workflow/state-schema.d.ts +34 -30
  104. package/dist/src/lib/workflow/state-schema.js +35 -25
  105. package/dist/src/lib/workflow/state-utils.d.ts +3 -1
  106. package/dist/src/lib/workflow/state-utils.js +1 -0
  107. package/dist/src/lib/workflow/types.d.ts +208 -6
  108. package/dist/src/lib/workflow/types.js +20 -1
  109. package/dist/src/lib/workflow/worktree-discovery.d.ts +1 -1
  110. package/dist/src/lib/workflow/worktree-discovery.js +6 -14
  111. package/dist/src/lib/workflow/worktree-manager.js +33 -51
  112. package/dist/src/mcp/index.d.ts +4 -0
  113. package/dist/src/mcp/index.js +4 -0
  114. package/dist/src/mcp/resources.d.ts +7 -0
  115. package/dist/src/mcp/resources.js +111 -0
  116. package/dist/src/mcp/run-registry.d.ts +34 -0
  117. package/dist/src/mcp/run-registry.js +42 -0
  118. package/dist/src/mcp/server.d.ts +12 -0
  119. package/dist/src/mcp/server.js +50 -0
  120. package/dist/src/mcp/tools/logs.d.ts +7 -0
  121. package/dist/src/mcp/tools/logs.js +149 -0
  122. package/dist/src/mcp/tools/run.d.ts +121 -0
  123. package/dist/src/mcp/tools/run.js +591 -0
  124. package/dist/src/mcp/tools/status.d.ts +7 -0
  125. package/dist/src/mcp/tools/status.js +127 -0
  126. package/package.json +10 -1
  127. package/templates/hooks/post-tool.sh +19 -8
  128. package/templates/hooks/pre-tool.sh +36 -49
  129. package/templates/mcp.json +6 -0
  130. package/templates/skills/assess/SKILL.md +354 -352
  131. package/templates/skills/exec/SKILL.md +64 -1
  132. package/templates/skills/fullsolve/SKILL.md +35 -4
  133. package/templates/skills/qa/SKILL.md +486 -9
  134. package/templates/skills/qa/scripts/quality-checks.sh +1 -1
  135. package/templates/skills/setup/SKILL.md +386 -0
  136. package/templates/skills/solve/SKILL.md +38 -664
  137. package/templates/skills/spec/SKILL.md +90 -31
@@ -1,16 +1,19 @@
1
1
  /**
2
2
  * Phase execution engine for workflow orchestration.
3
3
  *
4
- * Handles executing individual phases via the Claude Agent SDK,
4
+ * Handles executing individual phases via an AgentDriver interface,
5
5
  * including cold-start retry logic and MCP fallback strategies.
6
+ *
7
+ * The SDK import has been moved to ClaudeCodeDriver — this module
8
+ * is agent-agnostic.
6
9
  */
7
10
  import chalk from "chalk";
8
- import { query } from "@anthropic-ai/claude-agent-sdk";
9
- import { getMcpServersConfig } from "../system.js";
11
+ import { execSync } from "child_process";
10
12
  import { readAgentsMd } from "../agents-md.js";
13
+ import { getDriver } from "./drivers/index.js";
11
14
  /**
12
- * Natural language prompts for each phase
13
- * These prompts will invoke the corresponding skills via natural language
15
+ * Natural language prompts for each phase.
16
+ * Claude Code invokes the corresponding skills via natural language.
14
17
  */
15
18
  const PHASE_PROMPTS = {
16
19
  spec: "Review GitHub issue #{issue} and create an implementation plan with verification criteria. Run the /spec {issue} workflow.",
@@ -18,19 +21,63 @@ const PHASE_PROMPTS = {
18
21
  testgen: "Generate test stubs for GitHub issue #{issue} based on the specification. Run the /testgen {issue} workflow.",
19
22
  exec: "Implement the feature for GitHub issue #{issue} following the spec. Run the /exec {issue} workflow.",
20
23
  test: "Execute structured browser-based testing for GitHub issue #{issue}. Run the /test {issue} workflow.",
24
+ verify: "Verify the implementation for GitHub issue #{issue} by running commands and capturing output. Run the /verify {issue} workflow.",
21
25
  qa: "Review the implementation for GitHub issue #{issue} against acceptance criteria. Run the /qa {issue} workflow.",
22
26
  loop: "Parse test/QA findings for GitHub issue #{issue} and iterate until quality gates pass. Run the /loop {issue} workflow.",
27
+ merger: "Integrate and merge completed worktrees for GitHub issue #{issue}. Run the /merger {issue} workflow.",
28
+ };
29
+ /**
30
+ * Self-contained prompts for non-Claude agents (Aider, Codex, etc.).
31
+ * These agents don't have a skill system, so prompts must include
32
+ * full instructions rather than skill invocations.
33
+ */
34
+ const AIDER_PHASE_PROMPTS = {
35
+ spec: `Read GitHub issue #{issue} using 'gh issue view #{issue}'.
36
+ Create a spec comment on the issue with:
37
+ 1. Implementation plan
38
+ 2. Acceptance criteria as a checklist
39
+ 3. Risk assessment
40
+ Post the comment using 'gh issue comment #{issue} --body "<comment>"'.`,
41
+ "security-review": `Perform a security review for GitHub issue #{issue}.
42
+ Read the issue with 'gh issue view #{issue}'.
43
+ Check for auth, permissions, injection, and sensitive data issues.
44
+ Post findings as a comment on the issue.`,
45
+ testgen: `Generate test stubs for GitHub issue #{issue}.
46
+ Read the spec comments on the issue with 'gh issue view #{issue} --comments'.
47
+ Create test files with describe/it blocks covering the acceptance criteria.
48
+ Use the project's existing test framework.`,
49
+ exec: `Implement the feature described in GitHub issue #{issue}.
50
+ Read the issue and any spec comments with 'gh issue view #{issue} --comments'.
51
+ Follow the implementation plan from the spec.
52
+ Write tests for new functionality.
53
+ Ensure the build passes with 'npm test' and 'npm run build'.`,
54
+ test: `Test the implementation for GitHub issue #{issue}.
55
+ Run 'npm test' and verify all tests pass.
56
+ Check for edge cases and error handling.`,
57
+ verify: `Verify the implementation for GitHub issue #{issue}.
58
+ Run relevant commands and capture their output for review.`,
59
+ qa: `Review the changes for GitHub issue #{issue}.
60
+ Run 'npm test' and 'npm run build' to verify everything works.
61
+ Check each acceptance criterion from the issue comments.
62
+ Output a verdict: READY_FOR_MERGE, AC_MET_BUT_NOT_A_PLUS, or AC_NOT_MET
63
+ with format "### Verdict: <VERDICT>" followed by an explanation.`,
64
+ loop: `Review test and QA findings for GitHub issue #{issue}.
65
+ Fix any issues identified in the QA feedback.
66
+ Re-run 'npm test' and 'npm run build' until all quality gates pass.`,
67
+ merger: `Integrate and merge completed worktrees for GitHub issue #{issue}.
68
+ Ensure all branches are up to date and merge cleanly.`,
23
69
  };
24
70
  /**
25
71
  * Phases that require worktree isolation.
26
- * Spec runs in main repo since it's planning-only.
27
- * security-review and loop must be isolated because they need to read/modify
28
- * worktree code, and running them in main directory with a session created
29
- * in the worktree causes the SDK to crash (cwd mismatch on session resume).
72
+ * Only `spec` runs in the main repo (planning-only, no file changes).
73
+ * All other phases must run in the worktree because:
74
+ * 1. They need to read/modify the worktree code
75
+ * 2. Resuming a session created in a different cwd crashes the SDK
30
76
  */
31
77
  const ISOLATED_PHASES = [
32
78
  "exec",
33
79
  "security-review",
80
+ "testgen",
34
81
  "test",
35
82
  "qa",
36
83
  "loop",
@@ -43,6 +90,16 @@ const ISOLATED_PHASES = [
43
90
  */
44
91
  const COLD_START_THRESHOLD_SECONDS = 60;
45
92
  const COLD_START_MAX_RETRIES = 2;
93
+ /**
94
+ * Spec-specific retry configuration.
95
+ * Spec failures have a higher failure rate (~8.6%) than other phases due to
96
+ * transient GitHub API issues and rate limits. One extra retry with backoff
97
+ * recovers most of these without user intervention.
98
+ */
99
+ /** @internal Exported for testing only */
100
+ export const SPEC_RETRY_BACKOFF_MS = 5000;
101
+ /** @internal Exported for testing only */
102
+ export const SPEC_EXTRA_RETRIES = 1;
46
103
  export function parseQaVerdict(output) {
47
104
  if (!output)
48
105
  return null;
@@ -59,6 +116,95 @@ export function parseQaVerdict(output) {
59
116
  const verdict = verdictMatch[1].toUpperCase().replace(/-/g, "_");
60
117
  return verdict;
61
118
  }
119
+ /**
120
+ * Parse condensed QA summary from QA phase output (#434).
121
+ *
122
+ * Handles multiple AC table formats produced by the QA skill:
123
+ * - 5-column: | AC-N | source | desc | STATUS | notes |
124
+ * - 4-column: | AC-N | desc | STATUS | notes |
125
+ * - 3-column: | AC-N | desc | STATUS |
126
+ *
127
+ * Status cells may contain emoji prefixes (✅ MET), shorthand
128
+ * (PARTIAL), or trailing text (MET — explanation).
129
+ *
130
+ * @internal Exported for testing only
131
+ */
132
+ export function parseQaSummary(output) {
133
+ if (!output)
134
+ return null;
135
+ // Anchored pattern: cell content starts with optional emoji, then status keyword
136
+ // Uses alternation (not character class) to avoid ESLint no-misleading-character-class
137
+ const STATUS_CELL = /^(?:\u2705|\u274C|\u26A0\uFE0F|\u2B50|\u2139\uFE0F|\u2753|\u2757)?\s*(MET|NOT_MET|PARTIALLY_MET|PARTIAL|PENDING|N\/A)\b/i;
138
+ const lines = output.split("\n");
139
+ const acRows = lines.filter((line) => /^\s*\|\s*\*?\*?AC-\d+/.test(line));
140
+ if (acRows.length === 0)
141
+ return null;
142
+ let acMet = 0;
143
+ let acTotal = 0;
144
+ for (const row of acRows) {
145
+ const cells = row
146
+ .split("|")
147
+ .map((c) => c.trim())
148
+ .filter(Boolean);
149
+ // Scan cells right-to-left to find the status cell
150
+ let found = false;
151
+ for (let i = cells.length - 1; i >= 1; i--) {
152
+ const match = cells[i].match(STATUS_CELL);
153
+ if (match) {
154
+ const status = match[1].toUpperCase();
155
+ acTotal++;
156
+ if (status === "MET")
157
+ acMet++;
158
+ found = true;
159
+ break;
160
+ }
161
+ }
162
+ // Row with AC-N but no parseable status is skipped
163
+ if (!found)
164
+ continue;
165
+ }
166
+ if (acTotal === 0)
167
+ return null;
168
+ const gaps = parseListSection(output, /\*\*(?:Issues|Gaps)/);
169
+ const suggestions = parseListSection(output, /\*\*Suggestions/);
170
+ return { acMet, acTotal, gaps, suggestions };
171
+ }
172
+ /**
173
+ * Parse a markdown bullet list section, filtering out "None" variants.
174
+ */
175
+ function parseListSection(output, headerPattern) {
176
+ const items = [];
177
+ const lines = output.split("\n");
178
+ let inSection = false;
179
+ for (const line of lines) {
180
+ if (headerPattern.test(line)) {
181
+ // If the header line itself contains a bullet (inline), capture it
182
+ inSection = true;
183
+ continue;
184
+ }
185
+ if (inSection) {
186
+ // Section ends at next markdown header or bold label
187
+ if (/^#{1,4}\s/.test(line) || /^\*\*[^*]+\*\*:/.test(line)) {
188
+ break;
189
+ }
190
+ const bulletMatch = line.match(/^\s*[-*]\s+(.+)/);
191
+ if (bulletMatch) {
192
+ const trimmed = bulletMatch[1].trim();
193
+ // Filter "None", "None found", "None — text", etc.
194
+ if (trimmed && !/^None\b/i.test(trimmed)) {
195
+ items.push(trimmed);
196
+ }
197
+ }
198
+ else if (line.trim() === "") {
199
+ continue;
200
+ }
201
+ else {
202
+ break;
203
+ }
204
+ }
205
+ }
206
+ return items;
207
+ }
62
208
  /**
63
209
  * Format duration in human-readable format
64
210
  */
@@ -72,11 +218,15 @@ export function formatDuration(seconds) {
72
218
  }
73
219
  /**
74
220
  * Get the prompt for a phase with the issue number substituted.
221
+ * Selects self-contained prompts for non-Claude agents.
75
222
  * Includes AGENTS.md content as context so non-Claude agents
76
223
  * receive project conventions and workflow instructions.
224
+ *
225
+ * @internal Exported for testing only
77
226
  */
78
- async function getPhasePrompt(phase, issueNumber) {
79
- const basePrompt = PHASE_PROMPTS[phase].replace(/\{issue\}/g, String(issueNumber));
227
+ export async function getPhasePrompt(phase, issueNumber, agent) {
228
+ const prompts = agent && agent !== "claude-code" ? AIDER_PHASE_PROMPTS : PHASE_PROMPTS;
229
+ const basePrompt = prompts[phase].replace(/\{issue\}/g, String(issueNumber));
80
230
  // Include AGENTS.md content in the prompt context for non-Claude agent compatibility.
81
231
  // Claude reads CLAUDE.md natively, but other agents (Aider, Codex, Gemini CLI)
82
232
  // rely on AGENTS.md for project context.
@@ -87,22 +237,24 @@ async function getPhasePrompt(phase, issueNumber) {
87
237
  return basePrompt;
88
238
  }
89
239
  /**
90
- * Execute a single phase for an issue using Claude Agent SDK
240
+ * Execute a single phase for an issue using the configured AgentDriver.
91
241
  */
92
242
  async function executePhase(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner) {
93
243
  const startTime = Date.now();
244
+ const prompt = await getPhasePrompt(phase, issueNumber, config.agent);
94
245
  if (config.dryRun) {
95
- // Dry run - just simulate
246
+ // Dry run - show the prompt that would be sent, then return
96
247
  if (config.verbose) {
97
248
  console.log(chalk.gray(` Would execute: /${phase} ${issueNumber}`));
249
+ console.log(chalk.gray(` Prompt: ${prompt}`));
98
250
  }
99
251
  return {
100
252
  phase,
101
253
  success: true,
102
254
  durationSeconds: 0,
255
+ output: prompt,
103
256
  };
104
257
  }
105
- const prompt = await getPhasePrompt(phase, issueNumber);
106
258
  if (config.verbose) {
107
259
  console.log(chalk.gray(` Prompt: ${prompt}`));
108
260
  if (worktreePath && ISOLATED_PHASES.includes(phase)) {
@@ -112,231 +264,170 @@ async function executePhase(issueNumber, phase, config, sessionId, worktreePath,
112
264
  // Determine working directory and environment
113
265
  const shouldUseWorktree = worktreePath && ISOLATED_PHASES.includes(phase);
114
266
  const cwd = shouldUseWorktree ? worktreePath : process.cwd();
115
- // Track stderr for error diagnostics (declared outside try for catch access)
116
- let capturedStderr = "";
117
- try {
118
- // Check if shutdown is in progress
119
- if (shutdownManager?.shuttingDown) {
120
- return {
121
- phase,
122
- success: false,
123
- durationSeconds: 0,
124
- error: "Shutdown in progress",
125
- };
126
- }
127
- // Create abort controller for timeout
128
- const abortController = new AbortController();
129
- const timeoutId = setTimeout(() => {
130
- abortController.abort();
131
- }, config.phaseTimeout * 1000);
132
- // Register abort controller with shutdown manager for graceful shutdown
133
- if (shutdownManager) {
134
- shutdownManager.setAbortController(abortController);
135
- }
136
- let resultSessionId;
137
- let resultMessage;
138
- let lastError;
139
- let capturedOutput = "";
140
- // Build environment with worktree isolation variables
141
- const env = {
142
- ...process.env,
143
- CLAUDE_HOOKS_SMART_TESTS: config.noSmartTests ? "false" : "true",
144
- };
145
- // Set worktree isolation environment variables
146
- if (shouldUseWorktree) {
147
- env.SEQUANT_WORKTREE = worktreePath;
148
- env.SEQUANT_ISSUE = String(issueNumber);
149
- }
150
- // Set orchestration context for skills to detect they're part of a workflow
151
- // Skills can check these to skip redundant pre-flight checks
152
- env.SEQUANT_ORCHESTRATOR = "sequant-run";
153
- env.SEQUANT_PHASE = phase;
154
- // Execute using Claude Agent SDK
155
- // Note: Don't resume sessions when switching to worktree (different cwd breaks resume)
156
- const canResume = sessionId && !shouldUseWorktree;
157
- // Get MCP servers config if enabled
158
- // Reads from Claude Desktop config and passes to SDK for headless MCP support
159
- const mcpServers = config.mcp ? getMcpServersConfig() : undefined;
160
- // Track whether we're actively streaming verbose output
161
- // Pausing spinner once per streaming session prevents truncation from rapid pause/resume cycles
162
- // (Issue #283: ora's stop() clears the current line, which can truncate output when
163
- // pause/resume is called for every chunk in rapid succession)
164
- let verboseStreamingActive = false;
165
- const queryInstance = query({
166
- prompt,
167
- options: {
168
- abortController,
267
+ // Resolve file context for file-oriented drivers (e.g., Aider --file)
268
+ let files;
269
+ if (config.agent && config.agent !== "claude-code") {
270
+ try {
271
+ const output = execSync("git diff --name-only main...HEAD", {
169
272
  cwd,
170
- // Load project settings including skills
171
- settingSources: ["project"],
172
- // Use Claude Code's system prompt and tools
173
- systemPrompt: { type: "preset", preset: "claude_code" },
174
- tools: { type: "preset", preset: "claude_code" },
175
- // Bypass permissions for headless execution
176
- permissionMode: "bypassPermissions",
177
- allowDangerouslySkipPermissions: true,
178
- // Resume from previous session if provided (but not when switching directories)
179
- ...(canResume ? { resume: sessionId } : {}),
180
- // Configure smart tests and worktree isolation via environment
181
- env,
182
- // Pass MCP servers for headless mode (AC-2)
183
- ...(mcpServers ? { mcpServers } : {}),
184
- // Capture stderr for debugging (helps diagnose early exit failures)
185
- stderr: (data) => {
186
- capturedStderr += data;
187
- // Write stderr in verbose mode
188
- if (config.verbose) {
189
- // Pause spinner once to avoid truncation (Issue #283)
190
- if (!verboseStreamingActive) {
191
- spinner?.pause();
192
- verboseStreamingActive = true;
193
- }
194
- process.stderr.write(chalk.red(data));
195
- }
196
- },
197
- },
198
- });
199
- // Stream and process messages
200
- for await (const message of queryInstance) {
201
- // Capture session ID from system init message
202
- if (message.type === "system" && message.subtype === "init") {
203
- resultSessionId = message.session_id;
273
+ encoding: "utf-8",
274
+ stdio: ["pipe", "pipe", "pipe"],
275
+ }).trim();
276
+ if (output) {
277
+ files = output.split("\n").filter(Boolean);
204
278
  }
205
- // Capture output from assistant messages
206
- if (message.type === "assistant") {
207
- // Extract text content from the message
208
- const content = message.message.content;
209
- const textContent = content
210
- .filter((c) => c.type === "text" && c.text)
211
- .map((c) => c.text)
212
- .join("");
213
- if (textContent) {
214
- capturedOutput += textContent;
215
- // Show streaming output in verbose mode
216
- if (config.verbose) {
217
- // Pause spinner once at start of streaming to avoid truncation
218
- // (Issue #283: repeated pause/resume causes ora to clear lines between chunks)
219
- if (!verboseStreamingActive) {
220
- spinner?.pause();
221
- verboseStreamingActive = true;
222
- }
223
- process.stdout.write(chalk.gray(textContent));
224
- }
225
- }
226
- }
227
- // Capture the final result
228
- if (message.type === "result") {
229
- resultMessage = message;
230
- }
231
- }
232
- // Resume spinner after streaming completes (if we paused it)
233
- if (verboseStreamingActive) {
234
- spinner?.resume();
235
- verboseStreamingActive = false;
236
279
  }
237
- clearTimeout(timeoutId);
238
- // Clear abort controller from shutdown manager
239
- if (shutdownManager) {
240
- shutdownManager.clearAbortController();
280
+ catch {
281
+ // No changed files or git error — proceed without file context
241
282
  }
242
- const durationSeconds = (Date.now() - startTime) / 1000;
243
- // Check result status
244
- if (resultMessage) {
245
- if (resultMessage.subtype === "success") {
246
- // For QA phase, check the verdict to determine actual success
247
- // SDK "success" just means the query completed - we need to parse the verdict
248
- if (phase === "qa" && capturedOutput) {
249
- const verdict = parseQaVerdict(capturedOutput);
250
- // Only READY_FOR_MERGE and NEEDS_VERIFICATION are considered passing
251
- // NEEDS_VERIFICATION is external verification, not a code quality issue
252
- if (verdict &&
253
- verdict !== "READY_FOR_MERGE" &&
254
- verdict !== "NEEDS_VERIFICATION") {
255
- return {
256
- phase,
257
- success: false,
258
- durationSeconds,
259
- error: `QA verdict: ${verdict}`,
260
- sessionId: resultSessionId,
261
- output: capturedOutput,
262
- verdict, // Include parsed verdict
263
- };
264
- }
265
- // Pass case - include verdict for logging
266
- return {
267
- phase,
268
- success: true,
269
- durationSeconds,
270
- sessionId: resultSessionId,
271
- output: capturedOutput,
272
- verdict: verdict ?? undefined, // Include if found
273
- };
283
+ }
284
+ // Check if shutdown is in progress
285
+ if (shutdownManager?.shuttingDown) {
286
+ return {
287
+ phase,
288
+ success: false,
289
+ durationSeconds: 0,
290
+ error: "Shutdown in progress",
291
+ };
292
+ }
293
+ // Create abort controller for timeout
294
+ const abortController = new AbortController();
295
+ const timeoutId = setTimeout(() => {
296
+ abortController.abort();
297
+ }, config.phaseTimeout * 1000);
298
+ // Register abort controller with shutdown manager for graceful shutdown
299
+ // Uses add/remove to support concurrent phase execution (#404)
300
+ if (shutdownManager) {
301
+ shutdownManager.addAbortController(abortController);
302
+ }
303
+ // Build environment with worktree isolation variables
304
+ const env = {
305
+ ...process.env,
306
+ CLAUDE_HOOKS_SMART_TESTS: config.noSmartTests ? "false" : "true",
307
+ };
308
+ // Set worktree isolation environment variables
309
+ if (shouldUseWorktree) {
310
+ env.SEQUANT_WORKTREE = worktreePath;
311
+ env.SEQUANT_ISSUE = String(issueNumber);
312
+ }
313
+ // Set orchestration context for skills to detect they're part of a workflow
314
+ // Skills can check these to skip redundant pre-flight checks
315
+ env.SEQUANT_ORCHESTRATOR = "sequant-run";
316
+ env.SEQUANT_PHASE = phase;
317
+ // Propagate issue type for skills to adapt behavior (e.g., lighter QA for docs)
318
+ if (config.issueType) {
319
+ env.SEQUANT_ISSUE_TYPE = config.issueType;
320
+ }
321
+ // Track whether we're actively streaming verbose output
322
+ // Pausing spinner once per streaming session prevents truncation from rapid pause/resume cycles
323
+ // (Issue #283: ora's stop() clears the current line, which can truncate output when
324
+ // pause/resume is called for every chunk in rapid succession)
325
+ let verboseStreamingActive = false;
326
+ // Safety: never resume a session when worktree isolation is active.
327
+ // Even if THIS phase doesn't use the worktree, a previous phase may have
328
+ // created the session there. Resuming from a different cwd crashes the SDK
329
+ // (exit code 1). ISOLATED_PHASES prevents this by design, but this guard
330
+ // catches edge cases (e.g. a new phase added without updating ISOLATED_PHASES).
331
+ const canResume = sessionId && !worktreePath;
332
+ // Build AgentExecutionConfig for the driver
333
+ const agentConfig = {
334
+ cwd,
335
+ env,
336
+ abortSignal: abortController.signal,
337
+ phaseTimeout: config.phaseTimeout,
338
+ verbose: config.verbose,
339
+ mcp: config.mcp,
340
+ sessionId: canResume ? sessionId : undefined,
341
+ files,
342
+ onOutput: config.verbose
343
+ ? (text) => {
344
+ if (!verboseStreamingActive) {
345
+ spinner?.pause();
346
+ verboseStreamingActive = true;
274
347
  }
275
- return {
276
- phase,
277
- success: true,
278
- durationSeconds,
279
- sessionId: resultSessionId,
280
- output: capturedOutput,
281
- };
348
+ process.stdout.write(chalk.gray(text));
282
349
  }
283
- else {
284
- // Handle error subtypes
285
- const errorSubtype = resultMessage.subtype;
286
- if (errorSubtype === "error_max_turns") {
287
- lastError = "Max turns reached";
288
- }
289
- else if (errorSubtype === "error_during_execution") {
290
- lastError =
291
- resultMessage.errors?.join(", ") || "Error during execution";
292
- }
293
- else if (errorSubtype === "error_max_budget_usd") {
294
- lastError = "Budget limit exceeded";
295
- }
296
- else {
297
- lastError = `Error: ${errorSubtype}`;
350
+ : undefined,
351
+ onStderr: config.verbose
352
+ ? (data) => {
353
+ if (!verboseStreamingActive) {
354
+ spinner?.pause();
355
+ verboseStreamingActive = true;
298
356
  }
357
+ process.stderr.write(chalk.red(data));
358
+ }
359
+ : undefined,
360
+ };
361
+ // Resolve driver from config or default
362
+ const driver = getDriver(config.agent, {
363
+ aiderSettings: config.aiderSettings,
364
+ });
365
+ const agentResult = await driver.executePhase(prompt, agentConfig);
366
+ // Resume spinner after execution completes (if we paused it)
367
+ if (verboseStreamingActive) {
368
+ spinner?.resume();
369
+ }
370
+ clearTimeout(timeoutId);
371
+ // Remove this specific abort controller from shutdown manager
372
+ if (shutdownManager) {
373
+ shutdownManager.removeAbortController(abortController);
374
+ }
375
+ const durationSeconds = (Date.now() - startTime) / 1000;
376
+ // Map AgentPhaseResult to PhaseResult
377
+ const tails = {
378
+ stderrTail: agentResult.stderrTail,
379
+ stdoutTail: agentResult.stdoutTail,
380
+ exitCode: agentResult.exitCode,
381
+ };
382
+ if (agentResult.success) {
383
+ // For QA phase, check the verdict to determine actual success
384
+ // Agent "success" just means the execution completed — we need to parse the verdict
385
+ if (phase === "qa" && agentResult.output) {
386
+ const verdict = parseQaVerdict(agentResult.output);
387
+ const summary = parseQaSummary(agentResult.output) ?? undefined;
388
+ if (verdict &&
389
+ verdict !== "READY_FOR_MERGE" &&
390
+ verdict !== "NEEDS_VERIFICATION") {
299
391
  return {
300
392
  phase,
301
393
  success: false,
302
394
  durationSeconds,
303
- error: lastError,
304
- sessionId: resultSessionId,
395
+ error: `QA verdict: ${verdict}`,
396
+ sessionId: agentResult.sessionId,
397
+ output: agentResult.output,
398
+ verdict,
399
+ summary,
400
+ ...tails,
305
401
  };
306
402
  }
307
- }
308
- // No result message received
309
- return {
310
- phase,
311
- success: false,
312
- durationSeconds: (Date.now() - startTime) / 1000,
313
- error: "No result received from Claude",
314
- sessionId: resultSessionId,
315
- };
316
- }
317
- catch (err) {
318
- const durationSeconds = (Date.now() - startTime) / 1000;
319
- const error = err instanceof Error ? err.message : String(err);
320
- // Check if it was an abort (timeout)
321
- if (error.includes("abort") || error.includes("AbortError")) {
322
403
  return {
323
404
  phase,
324
- success: false,
405
+ success: true,
325
406
  durationSeconds,
326
- error: `Timeout after ${config.phaseTimeout}s`,
407
+ sessionId: agentResult.sessionId,
408
+ output: agentResult.output,
409
+ verdict: verdict ?? undefined,
410
+ summary,
411
+ ...tails,
327
412
  };
328
413
  }
329
- // Include stderr in error message if available (helps diagnose early exit failures)
330
- const stderrSuffix = capturedStderr
331
- ? `\nStderr: ${capturedStderr.slice(0, 500)}`
332
- : "";
333
414
  return {
334
415
  phase,
335
- success: false,
416
+ success: true,
336
417
  durationSeconds,
337
- error: error + stderrSuffix,
418
+ sessionId: agentResult.sessionId,
419
+ output: agentResult.output,
420
+ ...tails,
338
421
  };
339
422
  }
423
+ return {
424
+ phase,
425
+ success: false,
426
+ durationSeconds,
427
+ error: agentResult.error,
428
+ sessionId: agentResult.sessionId,
429
+ ...tails,
430
+ };
340
431
  }
341
432
  /**
342
433
  * Execute a phase with automatic retry for cold-start failures and MCP fallback.
@@ -354,7 +445,9 @@ async function executePhase(issueNumber, phase, config, sessionId, worktreePath,
354
445
  */
355
446
  export async function executePhaseWithRetry(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner,
356
447
  /** @internal Injected for testing — defaults to module-level executePhase */
357
- executePhaseFn = executePhase) {
448
+ executePhaseFn = executePhase,
449
+ /** @internal Injected for testing — defaults to setTimeout-based delay */
450
+ delayFn = (ms) => new Promise((resolve) => setTimeout(resolve, ms))) {
358
451
  // Skip retry logic if explicitly disabled
359
452
  if (config.retry === false) {
360
453
  return executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
@@ -364,8 +457,17 @@ executePhaseFn = executePhase) {
364
457
  for (let attempt = 0; attempt <= COLD_START_MAX_RETRIES; attempt++) {
365
458
  lastResult = await executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
366
459
  const duration = lastResult.durationSeconds ?? 0;
367
- // Success or genuine failure (took long enough to be real work)
368
- if (lastResult.success || duration >= COLD_START_THRESHOLD_SECONDS) {
460
+ // Success return immediately
461
+ if (lastResult.success) {
462
+ return lastResult;
463
+ }
464
+ // Genuine failure (took long enough to be real work) → skip cold-start retries.
465
+ // For spec phase, break to allow Phase 3 (spec-specific retry) to run.
466
+ // For other phases, return immediately — no further retries.
467
+ if (duration >= COLD_START_THRESHOLD_SECONDS) {
468
+ if (phase === "spec") {
469
+ break;
470
+ }
369
471
  return lastResult;
370
472
  }
371
473
  // Cold-start failure detected — retry
@@ -391,7 +493,30 @@ executePhaseFn = executePhase) {
391
493
  console.log(chalk.green(` ✓ Phase succeeded without MCP (MCP cold-start issue detected)`));
392
494
  return retryResult;
393
495
  }
394
- // Both attempts failed - return original error for better diagnostics
496
+ // Update lastResult for Phase 3 (spec retry)
497
+ lastResult = retryResult;
498
+ // Non-spec phases: return original error after MCP fallback exhausted
499
+ if (phase !== "spec") {
500
+ return {
501
+ ...lastResult,
502
+ error: originalError,
503
+ };
504
+ }
505
+ }
506
+ // Phase 3: Spec-specific retry — spec has a higher transient failure rate
507
+ // than other phases (~8.6%), so one extra retry with backoff recovers most cases.
508
+ if (phase === "spec" && !lastResult.success) {
509
+ for (let i = 0; i < SPEC_EXTRA_RETRIES; i++) {
510
+ console.log(chalk.yellow(`\n ⟳ Spec phase failed, retrying with ${SPEC_RETRY_BACKOFF_MS}ms backoff... (spec retry ${i + 1}/${SPEC_EXTRA_RETRIES})`));
511
+ await delayFn(SPEC_RETRY_BACKOFF_MS);
512
+ const specRetryResult = await executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
513
+ if (specRetryResult.success) {
514
+ console.log(chalk.green(` ✓ Spec phase succeeded on retry`));
515
+ return specRetryResult;
516
+ }
517
+ lastResult = specRetryResult;
518
+ }
519
+ // All spec retries exhausted — return with original error for diagnostics
395
520
  return {
396
521
  ...lastResult,
397
522
  error: originalError,