sequant 1.20.3 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/.claude-plugin/marketplace.json +2 -4
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +36 -15
  4. package/dist/bin/cli.js +25 -2
  5. package/dist/src/commands/doctor.js +42 -9
  6. package/dist/src/commands/init.d.ts +1 -0
  7. package/dist/src/commands/init.js +52 -0
  8. package/dist/src/commands/logs.d.ts +1 -0
  9. package/dist/src/commands/logs.js +18 -2
  10. package/dist/src/commands/run.d.ts +7 -0
  11. package/dist/src/commands/run.js +235 -68
  12. package/dist/src/commands/serve.d.ts +13 -0
  13. package/dist/src/commands/serve.js +131 -0
  14. package/dist/src/commands/stats.d.ts +1 -0
  15. package/dist/src/commands/stats.js +185 -26
  16. package/dist/src/commands/status.d.ts +2 -0
  17. package/dist/src/commands/status.js +99 -50
  18. package/dist/src/index.d.ts +2 -2
  19. package/dist/src/index.js +4 -1
  20. package/dist/src/lib/ac-parser.d.ts +2 -0
  21. package/dist/src/lib/ac-parser.js +12 -2
  22. package/dist/src/lib/assess-comment-parser.d.ts +137 -0
  23. package/dist/src/lib/assess-comment-parser.js +344 -0
  24. package/dist/src/lib/ci/config.d.ts +22 -0
  25. package/dist/src/lib/ci/config.js +134 -0
  26. package/dist/src/lib/ci/index.d.ts +12 -0
  27. package/dist/src/lib/ci/index.js +10 -0
  28. package/dist/src/lib/ci/inputs.d.ts +29 -0
  29. package/dist/src/lib/ci/inputs.js +103 -0
  30. package/dist/src/lib/ci/labels.d.ts +34 -0
  31. package/dist/src/lib/ci/labels.js +101 -0
  32. package/dist/src/lib/ci/outputs.d.ts +25 -0
  33. package/dist/src/lib/ci/outputs.js +84 -0
  34. package/dist/src/lib/ci/triggers.d.ts +9 -0
  35. package/dist/src/lib/ci/triggers.js +86 -0
  36. package/dist/src/lib/ci/types.d.ts +131 -0
  37. package/dist/src/lib/ci/types.js +47 -0
  38. package/dist/src/lib/mcp-config.d.ts +54 -0
  39. package/dist/src/lib/mcp-config.js +172 -0
  40. package/dist/src/lib/merge-check/index.js +6 -12
  41. package/dist/src/lib/merge-check/types.d.ts +20 -7
  42. package/dist/src/lib/merge-check/types.js +11 -0
  43. package/dist/src/lib/phase-signal.d.ts +3 -3
  44. package/dist/src/lib/phase-signal.js +5 -3
  45. package/dist/src/lib/settings.d.ts +52 -0
  46. package/dist/src/lib/settings.js +41 -0
  47. package/dist/src/lib/shutdown.d.ts +16 -5
  48. package/dist/src/lib/shutdown.js +32 -12
  49. package/dist/src/lib/solve-comment-parser.d.ts +9 -102
  50. package/dist/src/lib/solve-comment-parser.js +13 -248
  51. package/dist/src/lib/stacks.d.ts +8 -0
  52. package/dist/src/lib/stacks.js +34 -0
  53. package/dist/src/lib/system.js +3 -7
  54. package/dist/src/lib/test-tautology-detector.d.ts +10 -0
  55. package/dist/src/lib/test-tautology-detector.js +43 -4
  56. package/dist/src/lib/upstream/assessment.js +9 -59
  57. package/dist/src/lib/upstream/issues.js +12 -75
  58. package/dist/src/lib/version-check.d.ts +2 -2
  59. package/dist/src/lib/version-check.js +6 -3
  60. package/dist/src/lib/version.d.ts +4 -0
  61. package/dist/src/lib/version.js +25 -0
  62. package/dist/src/lib/workflow/batch-executor.d.ts +26 -86
  63. package/dist/src/lib/workflow/batch-executor.js +269 -55
  64. package/dist/src/lib/workflow/drivers/agent-driver.d.ts +56 -0
  65. package/dist/src/lib/workflow/drivers/agent-driver.js +8 -0
  66. package/dist/src/lib/workflow/drivers/aider.d.ts +18 -0
  67. package/dist/src/lib/workflow/drivers/aider.js +160 -0
  68. package/dist/src/lib/workflow/drivers/claude-code.d.ts +17 -0
  69. package/dist/src/lib/workflow/drivers/claude-code.js +165 -0
  70. package/dist/src/lib/workflow/drivers/index.d.ts +20 -0
  71. package/dist/src/lib/workflow/drivers/index.js +27 -0
  72. package/dist/src/lib/workflow/error-classifier.d.ts +16 -0
  73. package/dist/src/lib/workflow/error-classifier.js +90 -0
  74. package/dist/src/lib/workflow/log-writer.d.ts +6 -3
  75. package/dist/src/lib/workflow/log-writer.js +57 -27
  76. package/dist/src/lib/workflow/metrics-schema.d.ts +9 -9
  77. package/dist/src/lib/workflow/phase-detection.d.ts +23 -0
  78. package/dist/src/lib/workflow/phase-detection.js +45 -29
  79. package/dist/src/lib/workflow/phase-executor.d.ts +42 -3
  80. package/dist/src/lib/workflow/phase-executor.js +375 -229
  81. package/dist/src/lib/workflow/phase-mapper.d.ts +1 -1
  82. package/dist/src/lib/workflow/phase-mapper.js +7 -7
  83. package/dist/src/lib/workflow/platforms/github.d.ts +157 -0
  84. package/dist/src/lib/workflow/platforms/github.js +466 -0
  85. package/dist/src/lib/workflow/platforms/index.d.ts +17 -0
  86. package/dist/src/lib/workflow/platforms/index.js +25 -0
  87. package/dist/src/lib/workflow/platforms/platform-provider.d.ts +67 -0
  88. package/dist/src/lib/workflow/platforms/platform-provider.js +8 -0
  89. package/dist/src/lib/workflow/pr-status.d.ts +2 -4
  90. package/dist/src/lib/workflow/pr-status.js +3 -16
  91. package/dist/src/lib/workflow/qa-cache.d.ts +58 -0
  92. package/dist/src/lib/workflow/qa-cache.js +88 -0
  93. package/dist/src/lib/workflow/reconcile.d.ts +69 -0
  94. package/dist/src/lib/workflow/reconcile.js +290 -0
  95. package/dist/src/lib/workflow/ring-buffer.d.ts +17 -0
  96. package/dist/src/lib/workflow/ring-buffer.js +37 -0
  97. package/dist/src/lib/workflow/run-log-schema.d.ts +115 -24
  98. package/dist/src/lib/workflow/run-log-schema.js +47 -12
  99. package/dist/src/lib/workflow/run-reflect.js +1 -1
  100. package/dist/src/lib/workflow/state-cleanup.js +21 -0
  101. package/dist/src/lib/workflow/state-manager.d.ts +34 -3
  102. package/dist/src/lib/workflow/state-manager.js +278 -126
  103. package/dist/src/lib/workflow/state-schema.d.ts +34 -30
  104. package/dist/src/lib/workflow/state-schema.js +35 -25
  105. package/dist/src/lib/workflow/state-utils.d.ts +3 -1
  106. package/dist/src/lib/workflow/state-utils.js +1 -0
  107. package/dist/src/lib/workflow/types.d.ts +224 -6
  108. package/dist/src/lib/workflow/types.js +20 -1
  109. package/dist/src/lib/workflow/worktree-discovery.d.ts +1 -1
  110. package/dist/src/lib/workflow/worktree-discovery.js +6 -14
  111. package/dist/src/lib/workflow/worktree-manager.js +33 -51
  112. package/dist/src/mcp/index.d.ts +4 -0
  113. package/dist/src/mcp/index.js +4 -0
  114. package/dist/src/mcp/resources.d.ts +7 -0
  115. package/dist/src/mcp/resources.js +111 -0
  116. package/dist/src/mcp/run-registry.d.ts +34 -0
  117. package/dist/src/mcp/run-registry.js +42 -0
  118. package/dist/src/mcp/server.d.ts +12 -0
  119. package/dist/src/mcp/server.js +50 -0
  120. package/dist/src/mcp/tools/logs.d.ts +7 -0
  121. package/dist/src/mcp/tools/logs.js +149 -0
  122. package/dist/src/mcp/tools/run.d.ts +121 -0
  123. package/dist/src/mcp/tools/run.js +591 -0
  124. package/dist/src/mcp/tools/status.d.ts +7 -0
  125. package/dist/src/mcp/tools/status.js +127 -0
  126. package/package.json +26 -7
  127. package/templates/hooks/post-tool.sh +19 -8
  128. package/templates/hooks/pre-tool.sh +36 -49
  129. package/templates/mcp.json +6 -0
  130. package/templates/skills/assess/SKILL.md +354 -352
  131. package/templates/skills/exec/SKILL.md +64 -1
  132. package/templates/skills/fullsolve/SKILL.md +35 -4
  133. package/templates/skills/qa/SKILL.md +486 -9
  134. package/templates/skills/qa/scripts/quality-checks.sh +1 -1
  135. package/templates/skills/setup/SKILL.md +386 -0
  136. package/templates/skills/solve/SKILL.md +38 -664
  137. package/templates/skills/spec/SKILL.md +90 -31
@@ -1,16 +1,19 @@
1
1
  /**
2
2
  * Phase execution engine for workflow orchestration.
3
3
  *
4
- * Handles executing individual phases via the Claude Agent SDK,
4
+ * Handles executing individual phases via an AgentDriver interface,
5
5
  * including cold-start retry logic and MCP fallback strategies.
6
+ *
7
+ * The SDK import has been moved to ClaudeCodeDriver — this module
8
+ * is agent-agnostic.
6
9
  */
7
10
  import chalk from "chalk";
8
- import { query } from "@anthropic-ai/claude-agent-sdk";
9
- import { getMcpServersConfig } from "../system.js";
11
+ import { execSync } from "child_process";
10
12
  import { readAgentsMd } from "../agents-md.js";
13
+ import { getDriver } from "./drivers/index.js";
11
14
  /**
12
- * Natural language prompts for each phase
13
- * These prompts will invoke the corresponding skills via natural language
15
+ * Natural language prompts for each phase.
16
+ * Claude Code invokes the corresponding skills via natural language.
14
17
  */
15
18
  const PHASE_PROMPTS = {
16
19
  spec: "Review GitHub issue #{issue} and create an implementation plan with verification criteria. Run the /spec {issue} workflow.",
@@ -18,8 +21,51 @@ const PHASE_PROMPTS = {
18
21
  testgen: "Generate test stubs for GitHub issue #{issue} based on the specification. Run the /testgen {issue} workflow.",
19
22
  exec: "Implement the feature for GitHub issue #{issue} following the spec. Run the /exec {issue} workflow.",
20
23
  test: "Execute structured browser-based testing for GitHub issue #{issue}. Run the /test {issue} workflow.",
24
+ verify: "Verify the implementation for GitHub issue #{issue} by running commands and capturing output. Run the /verify {issue} workflow.",
21
25
  qa: "Review the implementation for GitHub issue #{issue} against acceptance criteria. Run the /qa {issue} workflow.",
22
26
  loop: "Parse test/QA findings for GitHub issue #{issue} and iterate until quality gates pass. Run the /loop {issue} workflow.",
27
+ merger: "Integrate and merge completed worktrees for GitHub issue #{issue}. Run the /merger {issue} workflow.",
28
+ };
29
+ /**
30
+ * Self-contained prompts for non-Claude agents (Aider, Codex, etc.).
31
+ * These agents don't have a skill system, so prompts must include
32
+ * full instructions rather than skill invocations.
33
+ */
34
+ const AIDER_PHASE_PROMPTS = {
35
+ spec: `Read GitHub issue #{issue} using 'gh issue view #{issue}'.
36
+ Create a spec comment on the issue with:
37
+ 1. Implementation plan
38
+ 2. Acceptance criteria as a checklist
39
+ 3. Risk assessment
40
+ Post the comment using 'gh issue comment #{issue} --body "<comment>"'.`,
41
+ "security-review": `Perform a security review for GitHub issue #{issue}.
42
+ Read the issue with 'gh issue view #{issue}'.
43
+ Check for auth, permissions, injection, and sensitive data issues.
44
+ Post findings as a comment on the issue.`,
45
+ testgen: `Generate test stubs for GitHub issue #{issue}.
46
+ Read the spec comments on the issue with 'gh issue view #{issue} --comments'.
47
+ Create test files with describe/it blocks covering the acceptance criteria.
48
+ Use the project's existing test framework.`,
49
+ exec: `Implement the feature described in GitHub issue #{issue}.
50
+ Read the issue and any spec comments with 'gh issue view #{issue} --comments'.
51
+ Follow the implementation plan from the spec.
52
+ Write tests for new functionality.
53
+ Ensure the build passes with 'npm test' and 'npm run build'.`,
54
+ test: `Test the implementation for GitHub issue #{issue}.
55
+ Run 'npm test' and verify all tests pass.
56
+ Check for edge cases and error handling.`,
57
+ verify: `Verify the implementation for GitHub issue #{issue}.
58
+ Run relevant commands and capture their output for review.`,
59
+ qa: `Review the changes for GitHub issue #{issue}.
60
+ Run 'npm test' and 'npm run build' to verify everything works.
61
+ Check each acceptance criterion from the issue comments.
62
+ Output a verdict: READY_FOR_MERGE, AC_MET_BUT_NOT_A_PLUS, or AC_NOT_MET
63
+ with format "### Verdict: <VERDICT>" followed by an explanation.`,
64
+ loop: `Review test and QA findings for GitHub issue #{issue}.
65
+ Fix any issues identified in the QA feedback.
66
+ Re-run 'npm test' and 'npm run build' until all quality gates pass.`,
67
+ merger: `Integrate and merge completed worktrees for GitHub issue #{issue}.
68
+ Ensure all branches are up to date and merge cleanly.`,
23
69
  };
24
70
  /**
25
71
  * Phases that require worktree isolation.
@@ -44,6 +90,16 @@ const ISOLATED_PHASES = [
44
90
  */
45
91
  const COLD_START_THRESHOLD_SECONDS = 60;
46
92
  const COLD_START_MAX_RETRIES = 2;
93
+ /**
94
+ * Spec-specific retry configuration.
95
+ * Spec failures have a higher failure rate (~8.6%) than other phases due to
96
+ * transient GitHub API issues and rate limits. One extra retry with backoff
97
+ * recovers most of these without user intervention.
98
+ */
99
+ /** @internal Exported for testing only */
100
+ export const SPEC_RETRY_BACKOFF_MS = 5000;
101
+ /** @internal Exported for testing only */
102
+ export const SPEC_EXTRA_RETRIES = 1;
47
103
  export function parseQaVerdict(output) {
48
104
  if (!output)
49
105
  return null;
@@ -60,6 +116,95 @@ export function parseQaVerdict(output) {
60
116
  const verdict = verdictMatch[1].toUpperCase().replace(/-/g, "_");
61
117
  return verdict;
62
118
  }
119
+ /**
120
+ * Parse condensed QA summary from QA phase output (#434).
121
+ *
122
+ * Handles multiple AC table formats produced by the QA skill:
123
+ * - 5-column: | AC-N | source | desc | STATUS | notes |
124
+ * - 4-column: | AC-N | desc | STATUS | notes |
125
+ * - 3-column: | AC-N | desc | STATUS |
126
+ *
127
+ * Status cells may contain emoji prefixes (✅ MET), shorthand
128
+ * (PARTIAL), or trailing text (MET — explanation).
129
+ *
130
+ * @internal Exported for testing only
131
+ */
132
+ export function parseQaSummary(output) {
133
+ if (!output)
134
+ return null;
135
+ // Anchored pattern: cell content starts with optional emoji, then status keyword
136
+ // Uses alternation (not character class) to avoid ESLint no-misleading-character-class
137
+ const STATUS_CELL = /^(?:\u2705|\u274C|\u26A0\uFE0F|\u2B50|\u2139\uFE0F|\u2753|\u2757)?\s*(MET|NOT_MET|PARTIALLY_MET|PARTIAL|PENDING|N\/A)\b/i;
138
+ const lines = output.split("\n");
139
+ const acRows = lines.filter((line) => /^\s*\|\s*\*?\*?AC-\d+/.test(line));
140
+ if (acRows.length === 0)
141
+ return null;
142
+ let acMet = 0;
143
+ let acTotal = 0;
144
+ for (const row of acRows) {
145
+ const cells = row
146
+ .split("|")
147
+ .map((c) => c.trim())
148
+ .filter(Boolean);
149
+ // Scan cells right-to-left to find the status cell
150
+ let found = false;
151
+ for (let i = cells.length - 1; i >= 1; i--) {
152
+ const match = cells[i].match(STATUS_CELL);
153
+ if (match) {
154
+ const status = match[1].toUpperCase();
155
+ acTotal++;
156
+ if (status === "MET")
157
+ acMet++;
158
+ found = true;
159
+ break;
160
+ }
161
+ }
162
+ // Row with AC-N but no parseable status is skipped
163
+ if (!found)
164
+ continue;
165
+ }
166
+ if (acTotal === 0)
167
+ return null;
168
+ const gaps = parseListSection(output, /\*\*(?:Issues|Gaps)/);
169
+ const suggestions = parseListSection(output, /\*\*Suggestions/);
170
+ return { acMet, acTotal, gaps, suggestions };
171
+ }
172
+ /**
173
+ * Parse a markdown bullet list section, filtering out "None" variants.
174
+ */
175
+ function parseListSection(output, headerPattern) {
176
+ const items = [];
177
+ const lines = output.split("\n");
178
+ let inSection = false;
179
+ for (const line of lines) {
180
+ if (headerPattern.test(line)) {
181
+ // If the header line itself contains a bullet (inline), capture it
182
+ inSection = true;
183
+ continue;
184
+ }
185
+ if (inSection) {
186
+ // Section ends at next markdown header or bold label
187
+ if (/^#{1,4}\s/.test(line) || /^\*\*[^*]+\*\*:/.test(line)) {
188
+ break;
189
+ }
190
+ const bulletMatch = line.match(/^\s*[-*]\s+(.+)/);
191
+ if (bulletMatch) {
192
+ const trimmed = bulletMatch[1].trim();
193
+ // Filter "None", "None found", "None — text", etc.
194
+ if (trimmed && !/^None\b/i.test(trimmed)) {
195
+ items.push(trimmed);
196
+ }
197
+ }
198
+ else if (line.trim() === "") {
199
+ continue;
200
+ }
201
+ else {
202
+ break;
203
+ }
204
+ }
205
+ }
206
+ return items;
207
+ }
63
208
  /**
64
209
  * Format duration in human-readable format
65
210
  */
@@ -73,11 +218,19 @@ export function formatDuration(seconds) {
73
218
  }
74
219
  /**
75
220
  * Get the prompt for a phase with the issue number substituted.
221
+ * Selects self-contained prompts for non-Claude agents.
76
222
  * Includes AGENTS.md content as context so non-Claude agents
77
223
  * receive project conventions and workflow instructions.
224
+ *
225
+ * @internal Exported for testing only
78
226
  */
79
- async function getPhasePrompt(phase, issueNumber) {
80
- const basePrompt = PHASE_PROMPTS[phase].replace(/\{issue\}/g, String(issueNumber));
227
+ export async function getPhasePrompt(phase, issueNumber, agent, promptContext) {
228
+ const prompts = agent && agent !== "claude-code" ? AIDER_PHASE_PROMPTS : PHASE_PROMPTS;
229
+ let basePrompt = prompts[phase].replace(/\{issue\}/g, String(issueNumber));
230
+ // Append phase-specific context (e.g., QA findings for loop phase)
231
+ if (promptContext) {
232
+ basePrompt += `\n\n---\n\n${promptContext}`;
233
+ }
81
234
  // Include AGENTS.md content in the prompt context for non-Claude agent compatibility.
82
235
  // Claude reads CLAUDE.md natively, but other agents (Aider, Codex, Gemini CLI)
83
236
  // rely on AGENTS.md for project context.
@@ -88,22 +241,24 @@ async function getPhasePrompt(phase, issueNumber) {
88
241
  return basePrompt;
89
242
  }
90
243
  /**
91
- * Execute a single phase for an issue using Claude Agent SDK
244
+ * Execute a single phase for an issue using the configured AgentDriver.
92
245
  */
93
246
  async function executePhase(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner) {
94
247
  const startTime = Date.now();
248
+ const prompt = await getPhasePrompt(phase, issueNumber, config.agent, config.promptContext);
95
249
  if (config.dryRun) {
96
- // Dry run - just simulate
250
+ // Dry run - show the prompt that would be sent, then return
97
251
  if (config.verbose) {
98
252
  console.log(chalk.gray(` Would execute: /${phase} ${issueNumber}`));
253
+ console.log(chalk.gray(` Prompt: ${prompt}`));
99
254
  }
100
255
  return {
101
256
  phase,
102
257
  success: true,
103
258
  durationSeconds: 0,
259
+ output: prompt,
104
260
  };
105
261
  }
106
- const prompt = await getPhasePrompt(phase, issueNumber);
107
262
  if (config.verbose) {
108
263
  console.log(chalk.gray(` Prompt: ${prompt}`));
109
264
  if (worktreePath && ISOLATED_PHASES.includes(phase)) {
@@ -113,235 +268,177 @@ async function executePhase(issueNumber, phase, config, sessionId, worktreePath,
113
268
  // Determine working directory and environment
114
269
  const shouldUseWorktree = worktreePath && ISOLATED_PHASES.includes(phase);
115
270
  const cwd = shouldUseWorktree ? worktreePath : process.cwd();
116
- // Track stderr for error diagnostics (declared outside try for catch access)
117
- let capturedStderr = "";
118
- try {
119
- // Check if shutdown is in progress
120
- if (shutdownManager?.shuttingDown) {
121
- return {
122
- phase,
123
- success: false,
124
- durationSeconds: 0,
125
- error: "Shutdown in progress",
126
- };
127
- }
128
- // Create abort controller for timeout
129
- const abortController = new AbortController();
130
- const timeoutId = setTimeout(() => {
131
- abortController.abort();
132
- }, config.phaseTimeout * 1000);
133
- // Register abort controller with shutdown manager for graceful shutdown
134
- if (shutdownManager) {
135
- shutdownManager.setAbortController(abortController);
136
- }
137
- let resultSessionId;
138
- let resultMessage;
139
- let lastError;
140
- let capturedOutput = "";
141
- // Build environment with worktree isolation variables
142
- const env = {
143
- ...process.env,
144
- CLAUDE_HOOKS_SMART_TESTS: config.noSmartTests ? "false" : "true",
145
- };
146
- // Set worktree isolation environment variables
147
- if (shouldUseWorktree) {
148
- env.SEQUANT_WORKTREE = worktreePath;
149
- env.SEQUANT_ISSUE = String(issueNumber);
150
- }
151
- // Set orchestration context for skills to detect they're part of a workflow
152
- // Skills can check these to skip redundant pre-flight checks
153
- env.SEQUANT_ORCHESTRATOR = "sequant-run";
154
- env.SEQUANT_PHASE = phase;
155
- // Execute using Claude Agent SDK
156
- // Safety: never resume a session when worktree isolation is active.
157
- // Even if THIS phase doesn't use the worktree, a previous phase may have
158
- // created the session there. Resuming from a different cwd crashes the SDK
159
- // (exit code 1). ISOLATED_PHASES prevents this by design, but this guard
160
- // catches edge cases (e.g. a new phase added without updating ISOLATED_PHASES).
161
- const canResume = sessionId && !worktreePath;
162
- // Get MCP servers config if enabled
163
- // Reads from Claude Desktop config and passes to SDK for headless MCP support
164
- const mcpServers = config.mcp ? getMcpServersConfig() : undefined;
165
- // Track whether we're actively streaming verbose output
166
- // Pausing spinner once per streaming session prevents truncation from rapid pause/resume cycles
167
- // (Issue #283: ora's stop() clears the current line, which can truncate output when
168
- // pause/resume is called for every chunk in rapid succession)
169
- let verboseStreamingActive = false;
170
- const queryInstance = query({
171
- prompt,
172
- options: {
173
- abortController,
271
+ // Resolve file context for file-oriented drivers (e.g., Aider --file)
272
+ let files;
273
+ if (config.agent && config.agent !== "claude-code") {
274
+ try {
275
+ const output = execSync("git diff --name-only main...HEAD", {
174
276
  cwd,
175
- // Load project settings including skills
176
- settingSources: ["project"],
177
- // Use Claude Code's system prompt and tools
178
- systemPrompt: { type: "preset", preset: "claude_code" },
179
- tools: { type: "preset", preset: "claude_code" },
180
- // Bypass permissions for headless execution
181
- permissionMode: "bypassPermissions",
182
- allowDangerouslySkipPermissions: true,
183
- // Resume from previous session if provided (but not when switching directories)
184
- ...(canResume ? { resume: sessionId } : {}),
185
- // Configure smart tests and worktree isolation via environment
186
- env,
187
- // Pass MCP servers for headless mode (AC-2)
188
- ...(mcpServers ? { mcpServers } : {}),
189
- // Capture stderr for debugging (helps diagnose early exit failures)
190
- stderr: (data) => {
191
- capturedStderr += data;
192
- // Write stderr in verbose mode
193
- if (config.verbose) {
194
- // Pause spinner once to avoid truncation (Issue #283)
195
- if (!verboseStreamingActive) {
196
- spinner?.pause();
197
- verboseStreamingActive = true;
198
- }
199
- process.stderr.write(chalk.red(data));
200
- }
201
- },
202
- },
203
- });
204
- // Stream and process messages
205
- for await (const message of queryInstance) {
206
- // Capture session ID from system init message
207
- if (message.type === "system" && message.subtype === "init") {
208
- resultSessionId = message.session_id;
209
- }
210
- // Capture output from assistant messages
211
- if (message.type === "assistant") {
212
- // Extract text content from the message
213
- const content = message.message.content;
214
- const textContent = content
215
- .filter((c) => c.type === "text" && c.text)
216
- .map((c) => c.text)
217
- .join("");
218
- if (textContent) {
219
- capturedOutput += textContent;
220
- // Show streaming output in verbose mode
221
- if (config.verbose) {
222
- // Pause spinner once at start of streaming to avoid truncation
223
- // (Issue #283: repeated pause/resume causes ora to clear lines between chunks)
224
- if (!verboseStreamingActive) {
225
- spinner?.pause();
226
- verboseStreamingActive = true;
227
- }
228
- process.stdout.write(chalk.gray(textContent));
229
- }
230
- }
277
+ encoding: "utf-8",
278
+ stdio: ["pipe", "pipe", "pipe"],
279
+ }).trim();
280
+ if (output) {
281
+ files = output.split("\n").filter(Boolean);
231
282
  }
232
- // Capture the final result
233
- if (message.type === "result") {
234
- resultMessage = message;
235
- }
236
- }
237
- // Resume spinner after streaming completes (if we paused it)
238
- if (verboseStreamingActive) {
239
- spinner?.resume();
240
- verboseStreamingActive = false;
241
283
  }
242
- clearTimeout(timeoutId);
243
- // Clear abort controller from shutdown manager
244
- if (shutdownManager) {
245
- shutdownManager.clearAbortController();
284
+ catch {
285
+ // No changed files or git error — proceed without file context
246
286
  }
247
- const durationSeconds = (Date.now() - startTime) / 1000;
248
- // Check result status
249
- if (resultMessage) {
250
- if (resultMessage.subtype === "success") {
251
- // For QA phase, check the verdict to determine actual success
252
- // SDK "success" just means the query completed - we need to parse the verdict
253
- if (phase === "qa" && capturedOutput) {
254
- const verdict = parseQaVerdict(capturedOutput);
255
- // Only READY_FOR_MERGE and NEEDS_VERIFICATION are considered passing
256
- // NEEDS_VERIFICATION is external verification, not a code quality issue
257
- if (verdict &&
258
- verdict !== "READY_FOR_MERGE" &&
259
- verdict !== "NEEDS_VERIFICATION") {
260
- return {
261
- phase,
262
- success: false,
263
- durationSeconds,
264
- error: `QA verdict: ${verdict}`,
265
- sessionId: resultSessionId,
266
- output: capturedOutput,
267
- verdict, // Include parsed verdict
268
- };
269
- }
270
- // Pass case - include verdict for logging
271
- return {
272
- phase,
273
- success: true,
274
- durationSeconds,
275
- sessionId: resultSessionId,
276
- output: capturedOutput,
277
- verdict: verdict ?? undefined, // Include if found
278
- };
287
+ }
288
+ // Check if shutdown is in progress
289
+ if (shutdownManager?.shuttingDown) {
290
+ return {
291
+ phase,
292
+ success: false,
293
+ durationSeconds: 0,
294
+ error: "Shutdown in progress",
295
+ };
296
+ }
297
+ // Create abort controller for timeout
298
+ const abortController = new AbortController();
299
+ const timeoutId = setTimeout(() => {
300
+ abortController.abort();
301
+ }, config.phaseTimeout * 1000);
302
+ // Register abort controller with shutdown manager for graceful shutdown
303
+ // Uses add/remove to support concurrent phase execution (#404)
304
+ if (shutdownManager) {
305
+ shutdownManager.addAbortController(abortController);
306
+ }
307
+ // Build environment with worktree isolation variables
308
+ const env = {
309
+ ...process.env,
310
+ CLAUDE_HOOKS_SMART_TESTS: config.noSmartTests ? "false" : "true",
311
+ };
312
+ // Set worktree isolation environment variables
313
+ if (shouldUseWorktree) {
314
+ env.SEQUANT_WORKTREE = worktreePath;
315
+ env.SEQUANT_ISSUE = String(issueNumber);
316
+ }
317
+ // Set orchestration context for skills to detect they're part of a workflow
318
+ // Skills can check these to skip redundant pre-flight checks
319
+ env.SEQUANT_ORCHESTRATOR = "sequant-run";
320
+ env.SEQUANT_PHASE = phase;
321
+ // Propagate issue type for skills to adapt behavior (e.g., lighter QA for docs)
322
+ if (config.issueType) {
323
+ env.SEQUANT_ISSUE_TYPE = config.issueType;
324
+ }
325
+ // Pass QA context to loop phase so it doesn't need to reconstruct from GitHub (#488)
326
+ if (config.lastVerdict) {
327
+ env.SEQUANT_LAST_VERDICT = config.lastVerdict;
328
+ }
329
+ if (config.failedAcs) {
330
+ env.SEQUANT_FAILED_ACS = config.failedAcs;
331
+ }
332
+ // Track whether we're actively streaming verbose output
333
+ // Pausing spinner once per streaming session prevents truncation from rapid pause/resume cycles
334
+ // (Issue #283: ora's stop() clears the current line, which can truncate output when
335
+ // pause/resume is called for every chunk in rapid succession)
336
+ let verboseStreamingActive = false;
337
+ // Safety: never resume a session when worktree isolation is active.
338
+ // Even if THIS phase doesn't use the worktree, a previous phase may have
339
+ // created the session there. Resuming from a different cwd crashes the SDK
340
+ // (exit code 1). ISOLATED_PHASES prevents this by design, but this guard
341
+ // catches edge cases (e.g. a new phase added without updating ISOLATED_PHASES).
342
+ const canResume = sessionId && !worktreePath;
343
+ // Build AgentExecutionConfig for the driver
344
+ const agentConfig = {
345
+ cwd,
346
+ env,
347
+ abortSignal: abortController.signal,
348
+ phaseTimeout: config.phaseTimeout,
349
+ verbose: config.verbose,
350
+ mcp: config.mcp,
351
+ sessionId: canResume ? sessionId : undefined,
352
+ files,
353
+ onOutput: config.verbose
354
+ ? (text) => {
355
+ if (!verboseStreamingActive) {
356
+ spinner?.pause();
357
+ verboseStreamingActive = true;
279
358
  }
280
- return {
281
- phase,
282
- success: true,
283
- durationSeconds,
284
- sessionId: resultSessionId,
285
- output: capturedOutput,
286
- };
359
+ process.stdout.write(chalk.gray(text));
287
360
  }
288
- else {
289
- // Handle error subtypes
290
- const errorSubtype = resultMessage.subtype;
291
- if (errorSubtype === "error_max_turns") {
292
- lastError = "Max turns reached";
293
- }
294
- else if (errorSubtype === "error_during_execution") {
295
- lastError =
296
- resultMessage.errors?.join(", ") || "Error during execution";
297
- }
298
- else if (errorSubtype === "error_max_budget_usd") {
299
- lastError = "Budget limit exceeded";
300
- }
301
- else {
302
- lastError = `Error: ${errorSubtype}`;
361
+ : undefined,
362
+ onStderr: config.verbose
363
+ ? (data) => {
364
+ if (!verboseStreamingActive) {
365
+ spinner?.pause();
366
+ verboseStreamingActive = true;
303
367
  }
368
+ process.stderr.write(chalk.red(data));
369
+ }
370
+ : undefined,
371
+ };
372
+ // Resolve driver from config or default
373
+ const driver = getDriver(config.agent, {
374
+ aiderSettings: config.aiderSettings,
375
+ });
376
+ const agentResult = await driver.executePhase(prompt, agentConfig);
377
+ // Resume spinner after execution completes (if we paused it)
378
+ if (verboseStreamingActive) {
379
+ spinner?.resume();
380
+ }
381
+ clearTimeout(timeoutId);
382
+ // Remove this specific abort controller from shutdown manager
383
+ if (shutdownManager) {
384
+ shutdownManager.removeAbortController(abortController);
385
+ }
386
+ const durationSeconds = (Date.now() - startTime) / 1000;
387
+ // Map AgentPhaseResult to PhaseResult
388
+ const tails = {
389
+ stderrTail: agentResult.stderrTail,
390
+ stdoutTail: agentResult.stdoutTail,
391
+ exitCode: agentResult.exitCode,
392
+ };
393
+ if (agentResult.success) {
394
+ // For QA phase, check the verdict to determine actual success
395
+ // Agent "success" just means the execution completed — we need to parse the verdict
396
+ if (phase === "qa" && agentResult.output) {
397
+ const verdict = parseQaVerdict(agentResult.output);
398
+ const summary = parseQaSummary(agentResult.output) ?? undefined;
399
+ if (verdict &&
400
+ verdict !== "READY_FOR_MERGE" &&
401
+ verdict !== "NEEDS_VERIFICATION") {
304
402
  return {
305
403
  phase,
306
404
  success: false,
307
405
  durationSeconds,
308
- error: lastError,
309
- sessionId: resultSessionId,
406
+ error: `QA verdict: ${verdict}`,
407
+ sessionId: agentResult.sessionId,
408
+ output: agentResult.output,
409
+ verdict,
410
+ summary,
411
+ ...tails,
310
412
  };
311
413
  }
312
- }
313
- // No result message received
314
- return {
315
- phase,
316
- success: false,
317
- durationSeconds: (Date.now() - startTime) / 1000,
318
- error: "No result received from Claude",
319
- sessionId: resultSessionId,
320
- };
321
- }
322
- catch (err) {
323
- const durationSeconds = (Date.now() - startTime) / 1000;
324
- const error = err instanceof Error ? err.message : String(err);
325
- // Check if it was an abort (timeout)
326
- if (error.includes("abort") || error.includes("AbortError")) {
327
414
  return {
328
415
  phase,
329
- success: false,
416
+ success: true,
330
417
  durationSeconds,
331
- error: `Timeout after ${config.phaseTimeout}s`,
418
+ sessionId: agentResult.sessionId,
419
+ output: agentResult.output,
420
+ verdict: verdict ?? undefined,
421
+ summary,
422
+ ...tails,
332
423
  };
333
424
  }
334
- // Include stderr in error message if available (helps diagnose early exit failures)
335
- const stderrSuffix = capturedStderr
336
- ? `\nStderr: ${capturedStderr.slice(0, 500)}`
337
- : "";
338
425
  return {
339
426
  phase,
340
- success: false,
427
+ success: true,
341
428
  durationSeconds,
342
- error: error + stderrSuffix,
429
+ sessionId: agentResult.sessionId,
430
+ output: agentResult.output,
431
+ ...tails,
343
432
  };
344
433
  }
434
+ return {
435
+ phase,
436
+ success: false,
437
+ durationSeconds,
438
+ error: agentResult.error,
439
+ sessionId: agentResult.sessionId,
440
+ ...tails,
441
+ };
345
442
  }
346
443
  /**
347
444
  * Execute a phase with automatic retry for cold-start failures and MCP fallback.
@@ -359,32 +456,58 @@ async function executePhase(issueNumber, phase, config, sessionId, worktreePath,
359
456
  */
360
457
  export async function executePhaseWithRetry(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner,
361
458
  /** @internal Injected for testing — defaults to module-level executePhase */
362
- executePhaseFn = executePhase) {
459
+ executePhaseFn = executePhase,
460
+ /** @internal Injected for testing — defaults to setTimeout-based delay */
461
+ delayFn = (ms) => new Promise((resolve) => setTimeout(resolve, ms))) {
363
462
  // Skip retry logic if explicitly disabled
364
463
  if (config.retry === false) {
365
464
  return executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
366
465
  }
466
+ // Skip cold-start retries for `loop` phase (#488).
467
+ // Loop is always a re-run after a failed QA — never a first boot.
468
+ // Failures at 47-51s are genuine skill failures, not cold-start issues.
469
+ // Without this guard, 2 cold-start retries + 1 MCP fallback = 3 wasted spawns per loop.
470
+ const skipColdStartRetry = phase === "loop";
367
471
  let lastResult;
368
- // Phase 1: Cold-start retry attempts (with MCP enabled if configured)
369
- for (let attempt = 0; attempt <= COLD_START_MAX_RETRIES; attempt++) {
472
+ if (skipColdStartRetry) {
473
+ // Single attempt no cold-start retry loop
370
474
  lastResult = await executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
371
- const duration = lastResult.durationSeconds ?? 0;
372
- // Success or genuine failure (took long enough to be real work)
373
- if (lastResult.success || duration >= COLD_START_THRESHOLD_SECONDS) {
475
+ if (lastResult.success) {
374
476
  return lastResult;
375
477
  }
376
- // Cold-start failure detected — retry
377
- if (attempt < COLD_START_MAX_RETRIES) {
378
- if (config.verbose) {
379
- console.log(chalk.yellow(`\n ⟳ Cold-start failure detected (${duration.toFixed(1)}s), retrying... (attempt ${attempt + 2}/${COLD_START_MAX_RETRIES + 1})`));
478
+ }
479
+ else {
480
+ // Phase 1: Cold-start retry attempts (with MCP enabled if configured)
481
+ for (let attempt = 0; attempt <= COLD_START_MAX_RETRIES; attempt++) {
482
+ lastResult = await executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
483
+ const duration = lastResult.durationSeconds ?? 0;
484
+ // Success → return immediately
485
+ if (lastResult.success) {
486
+ return lastResult;
487
+ }
488
+ // Genuine failure (took long enough to be real work) → skip cold-start retries.
489
+ // For spec phase, break to allow Phase 3 (spec-specific retry) to run.
490
+ // For other phases, return immediately — no further retries.
491
+ if (duration >= COLD_START_THRESHOLD_SECONDS) {
492
+ if (phase === "spec") {
493
+ break;
494
+ }
495
+ return lastResult;
496
+ }
497
+ // Cold-start failure detected — retry
498
+ if (attempt < COLD_START_MAX_RETRIES) {
499
+ if (config.verbose) {
500
+ console.log(chalk.yellow(`\n ⟳ Cold-start failure detected (${duration.toFixed(1)}s), retrying... (attempt ${attempt + 2}/${COLD_START_MAX_RETRIES + 1})`));
501
+ }
380
502
  }
381
503
  }
382
504
  }
383
505
  // Capture the original error for better diagnostics
384
506
  const originalError = lastResult.error;
385
507
  // Phase 2: MCP fallback - if MCP is enabled and we're still failing, try without MCP
386
- // This handles npx-based MCP servers that fail on first run due to cold-cache issues
387
- if (config.mcp && !lastResult.success) {
508
+ // This handles npx-based MCP servers that fail on first run due to cold-cache issues.
509
+ // Skip for `loop` phase — MCP is never the cause of loop failures (#488).
510
+ if (config.mcp && !lastResult.success && !skipColdStartRetry) {
388
511
  console.log(chalk.yellow(`\n ⚠️ Phase failed with MCP enabled, retrying without MCP...`));
389
512
  // Create config copy with MCP disabled
390
513
  const configWithoutMcp = {
@@ -396,7 +519,30 @@ executePhaseFn = executePhase) {
396
519
  console.log(chalk.green(` ✓ Phase succeeded without MCP (MCP cold-start issue detected)`));
397
520
  return retryResult;
398
521
  }
399
- // Both attempts failed - return original error for better diagnostics
522
+ // Update lastResult for Phase 3 (spec retry)
523
+ lastResult = retryResult;
524
+ // Non-spec phases: return original error after MCP fallback exhausted
525
+ if (phase !== "spec") {
526
+ return {
527
+ ...lastResult,
528
+ error: originalError,
529
+ };
530
+ }
531
+ }
532
+ // Phase 3: Spec-specific retry — spec has a higher transient failure rate
533
+ // than other phases (~8.6%), so one extra retry with backoff recovers most cases.
534
+ if (phase === "spec" && !lastResult.success) {
535
+ for (let i = 0; i < SPEC_EXTRA_RETRIES; i++) {
536
+ console.log(chalk.yellow(`\n ⟳ Spec phase failed, retrying with ${SPEC_RETRY_BACKOFF_MS}ms backoff... (spec retry ${i + 1}/${SPEC_EXTRA_RETRIES})`));
537
+ await delayFn(SPEC_RETRY_BACKOFF_MS);
538
+ const specRetryResult = await executePhaseFn(issueNumber, phase, config, sessionId, worktreePath, shutdownManager, spinner);
539
+ if (specRetryResult.success) {
540
+ console.log(chalk.green(` ✓ Spec phase succeeded on retry`));
541
+ return specRetryResult;
542
+ }
543
+ lastResult = specRetryResult;
544
+ }
545
+ // All spec retries exhausted — return with original error for diagnostics
400
546
  return {
401
547
  ...lastResult,
402
548
  error: originalError,