sequant 2.7.0 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/.claude-plugin/marketplace.json +1 -1
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +9 -1
  4. package/dist/bin/cli.d.ts +1 -1
  5. package/dist/bin/cli.js +10 -1
  6. package/dist/bin/preflight.d.ts +21 -0
  7. package/dist/bin/preflight.js +45 -0
  8. package/dist/marketplace/external_plugins/sequant/.claude-plugin/plugin.json +1 -1
  9. package/dist/marketplace/external_plugins/sequant/skills/_shared/references/force-push.md +34 -0
  10. package/dist/marketplace/external_plugins/sequant/skills/assess/SKILL.md +24 -7
  11. package/dist/marketplace/external_plugins/sequant/skills/exec/SKILL.md +29 -0
  12. package/dist/marketplace/external_plugins/sequant/skills/loop/SKILL.md +100 -2
  13. package/dist/marketplace/external_plugins/sequant/skills/qa/SKILL.md +24 -0
  14. package/dist/marketplace/external_plugins/sequant/skills/qa/references/anti-pattern-detection.md +285 -0
  15. package/dist/marketplace/external_plugins/sequant/skills/qa/references/call-site-review.md +202 -0
  16. package/dist/marketplace/external_plugins/sequant/skills/qa/references/quality-gates.md +287 -0
  17. package/dist/marketplace/external_plugins/sequant/skills/qa/references/test-quality-checklist.md +272 -0
  18. package/dist/marketplace/external_plugins/sequant/skills/qa/references/testing-requirements.md +40 -0
  19. package/dist/marketplace/external_plugins/sequant/skills/qa/scripts/quality-checks.sh +95 -11
  20. package/dist/marketplace/external_plugins/sequant/skills/references/shared/framework-gotchas.md +186 -0
  21. package/dist/marketplace/external_plugins/sequant/skills/release/SKILL.md +661 -0
  22. package/dist/marketplace/external_plugins/sequant/skills/test/references/browser-testing-patterns.md +423 -0
  23. package/dist/marketplace/external_plugins/sequant/skills/upstream/SKILL.md +419 -0
  24. package/dist/src/lib/errors.d.ts +85 -0
  25. package/dist/src/lib/errors.js +111 -0
  26. package/dist/src/lib/version-check.d.ts +19 -0
  27. package/dist/src/lib/version-check.js +44 -0
  28. package/dist/src/lib/workflow/batch-executor.js +61 -6
  29. package/dist/src/lib/workflow/drivers/agent-driver.d.ts +17 -0
  30. package/dist/src/lib/workflow/drivers/claude-code.d.ts +22 -0
  31. package/dist/src/lib/workflow/drivers/claude-code.js +111 -7
  32. package/dist/src/lib/workflow/log-writer.d.ts +1 -1
  33. package/dist/src/lib/workflow/phase-executor.d.ts +18 -0
  34. package/dist/src/lib/workflow/phase-executor.js +76 -14
  35. package/dist/src/lib/workflow/run-log-schema.d.ts +3 -0
  36. package/dist/src/lib/workflow/run-log-schema.js +7 -0
  37. package/dist/src/lib/workflow/state-manager.d.ts +1 -0
  38. package/dist/src/lib/workflow/state-manager.js +6 -0
  39. package/dist/src/lib/workflow/state-schema.d.ts +3 -0
  40. package/dist/src/lib/workflow/state-schema.js +7 -0
  41. package/dist/src/lib/workflow/types.d.ts +17 -0
  42. package/dist/src/ui/tui/theme.d.ts +18 -4
  43. package/dist/src/ui/tui/theme.js +18 -4
  44. package/package.json +4 -3
  45. package/templates/skills/_shared/references/force-push.md +34 -0
  46. package/templates/skills/assess/SKILL.md +24 -7
  47. package/templates/skills/exec/SKILL.md +29 -0
  48. package/templates/skills/loop/SKILL.md +100 -2
  49. package/templates/skills/qa/SKILL.md +24 -0
  50. package/templates/skills/qa/references/anti-pattern-detection.md +285 -0
  51. package/templates/skills/qa/references/call-site-review.md +202 -0
  52. package/templates/skills/qa/references/quality-gates.md +287 -0
  53. package/templates/skills/qa/references/test-quality-checklist.md +272 -0
  54. package/templates/skills/qa/references/testing-requirements.md +40 -0
  55. package/templates/skills/qa/scripts/quality-checks.sh +95 -11
  56. package/templates/skills/references/shared/framework-gotchas.md +186 -0
  57. package/templates/skills/release/SKILL.md +661 -0
  58. package/templates/skills/test/references/browser-testing-patterns.md +423 -0
  59. package/templates/skills/upstream/SKILL.md +419 -0
@@ -258,6 +258,50 @@ export function compareVersions(a, b) {
258
258
  }
259
259
  return 0;
260
260
  }
261
+ /**
262
+ * Pure preflight check for the running Node version against the engines floor.
263
+ *
264
+ * Returns an actionable, multi-line message when `current` is below `floor`,
265
+ * or `null` when it satisfies the floor (or when `floor` is missing/unparseable,
266
+ * in which case the guard is skipped rather than crashing the CLI).
267
+ *
268
+ * `floor` is the raw `engines.node` value (e.g. ">=22.12.0"); the leading range
269
+ * operator is stripped before comparison. Reuses {@link compareVersions} — no
270
+ * `semver` dependency.
271
+ */
272
+ export function getNodeVersionError(current, floor) {
273
+ // Strip any range operator (">=", "^", "~", etc.) from the floor.
274
+ const normalizedFloor = (floor ?? "").replace(/^[^\d]*/, "");
275
+ // No usable floor → skip the guard (metadata problem must not crash the CLI).
276
+ if (!/^\d/.test(normalizedFloor)) {
277
+ return null;
278
+ }
279
+ if (compareVersions(current, normalizedFloor) >= 0) {
280
+ return null;
281
+ }
282
+ const currentClean = current.replace(/^v/, "");
283
+ return [
284
+ `Sequant requires Node.js >=${normalizedFloor}, but you are running ${currentClean}.`,
285
+ "",
286
+ "Upgrade Node, then re-run:",
287
+ " • fnm: fnm install 22 && fnm use 22",
288
+ " • nvm: nvm install 22 && nvm use 22",
289
+ " • or download: https://nodejs.org/en/download",
290
+ ].join("\n");
291
+ }
292
+ /**
293
+ * Side-effecting wrapper around {@link getNodeVersionError}: prints the message
294
+ * and exits non-zero when the running Node is below the floor. Uses only
295
+ * built-in globals (`process.version`, `console`, `process.exit`) so it runs —
296
+ * rather than crashes — on the old Node it rejects.
297
+ */
298
+ export function assertNodeVersion(floor) {
299
+ const error = getNodeVersionError(process.version, floor);
300
+ if (error) {
301
+ console.error(error);
302
+ process.exit(1);
303
+ }
304
+ }
261
305
  /**
262
306
  * Check if the current version is outdated
263
307
  */
@@ -432,7 +432,17 @@ export async function runIssueWithLogging(ctx) {
432
432
  }
433
433
  }
434
434
  else {
435
- const extra = { error: specResult.error ?? "unknown" };
435
+ // Mirror the main phase loop (#739): a turn-capped spec phase surfaces the
436
+ // distinct "partial output preserved" signal rather than a generic failure
437
+ // reason, so the cap is recognizable on the spec path too (it has its own
438
+ // failure handling, separate from the main loop). The partial output is
439
+ // preserved in `phaseResults` (pushed above) and the run still halts via
440
+ // the early return below.
441
+ const extra = {
442
+ error: specResult.capped
443
+ ? "turn cap reached — partial output preserved (resume to continue)"
444
+ : (specResult.error ?? "unknown"),
445
+ };
436
446
  emitProgressLine(issueNumber, "spec", "failed", extra);
437
447
  try {
438
448
  onProgress?.(issueNumber, "spec", "failed", extra);
@@ -462,7 +472,13 @@ export async function runIssueWithLogging(ctx) {
462
472
  ? "success"
463
473
  : specResult.error?.includes("Timeout")
464
474
  ? "timeout"
465
- : "failure", { error: specResult.error, errorContext: specErrorContext });
475
+ : "failure", {
476
+ error: specResult.error,
477
+ // Mark a turn-capped spec phase distinctly in the log (#739), matching
478
+ // the main phase loop: status stays "failure" but `capped` flags it.
479
+ capped: specResult.capped,
480
+ errorContext: specErrorContext,
481
+ });
466
482
  logWriter.logPhase(phaseLog);
467
483
  }
468
484
  // Track spec phase completion in state
@@ -471,6 +487,9 @@ export async function runIssueWithLogging(ctx) {
471
487
  const phaseStatus = specResult.success ? "completed" : "failed";
472
488
  await stateManager.updatePhaseStatus(issueNumber, "spec", phaseStatus, {
473
489
  error: specResult.error,
490
+ // Mark a turn-capped spec halt distinctly in state (#739), matching
491
+ // the run-log marker — status stays "failed", `capped` flags it.
492
+ capped: specResult.capped,
474
493
  });
475
494
  }
476
495
  catch {
@@ -595,6 +614,10 @@ export async function runIssueWithLogging(ctx) {
595
614
  const useQualityLoop = config.qualityLoop || detectedQualityLoop;
596
615
  const maxIterations = useQualityLoop ? config.maxIterations : 1;
597
616
  let completedSuccessfully = false;
617
+ // Set when a phase hits its turn cap (#739): halt the outer quality-loop
618
+ // retry too, not just the inner /loop spawn — re-running a capped phase
619
+ // would only cap again, and "surface + halt" means the user resumes.
620
+ let haltedByCap = false;
598
621
  while (iteration < maxIterations) {
599
622
  iteration++;
600
623
  if (useQualityLoop && iteration > 1) {
@@ -655,7 +678,18 @@ export async function runIssueWithLogging(ctx) {
655
678
  }
656
679
  }
657
680
  else {
658
- const extra = { error: result.error ?? "unknown", iteration };
681
+ // A turn-capped phase is incomplete-but-not-hard-failed (#739): surface a
682
+ // distinct "partial output preserved" signal instead of a generic failure
683
+ // reason, so the user knows the run halted on a recoverable cap (and can
684
+ // resume) rather than on a genuine error. The partial `result.output` is
685
+ // already preserved in `phaseResults` (pushed above) and the phase log
686
+ // (`capped` flag below); the run still halts cleanly at the `break` below.
687
+ const extra = {
688
+ error: result.capped
689
+ ? "turn cap reached — partial output preserved (resume to continue)"
690
+ : (result.error ?? "unknown"),
691
+ iteration,
692
+ };
659
693
  emitProgressLine(issueNumber, phase, "failed", extra);
660
694
  try {
661
695
  onProgress?.(issueNumber, phase, "failed", extra);
@@ -696,6 +730,9 @@ export async function runIssueWithLogging(ctx) {
696
730
  ? "timeout"
697
731
  : "failure", {
698
732
  error: result.error,
733
+ // Mark a turn-capped phase distinctly in the log (#739): status stays
734
+ // "failure" (no new enum value) but `capped` flags it as recoverable.
735
+ capped: result.capped,
699
736
  verdict: result.verdict,
700
737
  summary: result.summary,
701
738
  // Observability fields (AC-1, AC-2, AC-3, AC-7)
@@ -715,7 +752,13 @@ export async function runIssueWithLogging(ctx) {
715
752
  : result.error?.includes("Timeout")
716
753
  ? "failed"
717
754
  : "failed";
718
- await stateManager.updatePhaseStatus(issueNumber, phase, phaseStatus, { error: result.error });
755
+ await stateManager.updatePhaseStatus(issueNumber, phase, phaseStatus, {
756
+ error: result.error,
757
+ // Mark a turn-capped phase halt distinctly in state (#739),
758
+ // matching the run-log marker — status stays "failed",
759
+ // `capped` flags it as recoverable for the resume path.
760
+ capped: result.capped,
761
+ });
719
762
  }
720
763
  catch {
721
764
  // State tracking errors shouldn't stop execution
@@ -726,8 +769,15 @@ export async function runIssueWithLogging(ctx) {
726
769
  }
727
770
  else {
728
771
  phasesFailed = true;
729
- // If quality loop enabled, run loop phase to fix issues
730
- if (useQualityLoop && iteration < maxIterations) {
772
+ if (result.capped) {
773
+ haltedByCap = true;
774
+ }
775
+ // If quality loop enabled, run loop phase to fix issues.
776
+ // A turn-capped phase (#739) is incomplete, not a genuine quality
777
+ // failure: skip the loop and halt cleanly ("surface + halt"). Spawning
778
+ // /loop on partial output would act on incomplete work — exactly the
779
+ // risk the capped path is meant to avoid. The user resumes instead.
780
+ if (useQualityLoop && iteration < maxIterations && !result.capped) {
731
781
  // #624 Item 3 (AC-3.3): the loop phase carries the current outer
732
782
  // iteration so the live-zone status cell can show `loop N/M`.
733
783
  const loopStartExtra = { iteration };
@@ -790,6 +840,11 @@ export async function runIssueWithLogging(ctx) {
790
840
  completedSuccessfully = true;
791
841
  break;
792
842
  }
843
+ // A turn-capped phase (#739) halts the outer quality-loop retry as well —
844
+ // re-running would only cap again; the partial work is already preserved.
845
+ if (haltedByCap) {
846
+ break;
847
+ }
793
848
  // If we're not in quality loop mode, don't retry
794
849
  if (!config.qualityLoop) {
795
850
  break;
@@ -5,6 +5,7 @@
5
5
  * Continue.dev, Copilot SDK, Cursor API) can be added by implementing this
6
6
  * interface without touching orchestration logic.
7
7
  */
8
+ import type { SequantError } from "../../errors.js";
8
9
  /**
9
10
  * Resume handle for a previous agent session.
10
11
  *
@@ -64,6 +65,22 @@ export interface AgentPhaseResult {
64
65
  /** Driver-tagged resume handle for cwd-safe cross-phase resume (#674). */
65
66
  resumeHandle?: ResumeHandle;
66
67
  error?: string;
68
+ /**
69
+ * Set when the agent hit its `maxTurns` ceiling (`error_max_turns`). The
70
+ * `output` is partial-but-usable rather than a hard failure, so consumers
71
+ * can treat it as inconclusive/incomplete instead of discarding the work.
72
+ * See #733.
73
+ */
74
+ capped?: boolean;
75
+ /**
76
+ * Typed error carrying structured cause data (#732). Set by drivers that can
77
+ * observe structured failure signals (e.g. ClaudeCodeDriver reading the SDK's
78
+ * `rate_limit_event` / assistant `error`). The executor prefers this over
79
+ * stderr-regex classification and uses its type to gate retry behavior (e.g.
80
+ * skipping the MCP fallback for non-retryable billing failures). Left
81
+ * undefined by drivers without structured signals (aider, subprocess paths).
82
+ */
83
+ structuredError?: SequantError;
67
84
  /** Last N lines of stderr captured via RingBuffer (#447) */
68
85
  stderrTail?: string[];
69
86
  /** Last N lines of stdout captured via RingBuffer (#447) */
@@ -28,6 +28,28 @@ export declare class ClaudeCodeDriver implements AgentDriver {
28
28
  */
29
29
  canResume(handle: ResumeHandle, targetCwd: string): boolean;
30
30
  executePhase(prompt: string, config: AgentExecutionConfig): Promise<AgentPhaseResult>;
31
+ /**
32
+ * Derive a typed {@link SequantError} from structured SDK failure signals
33
+ * (#732). Precedence: a captured `rate_limit_event` (richest signal) wins;
34
+ * otherwise the assistant-level `error`; otherwise the last `api_retry`
35
+ * error. Returns undefined when no rate-limit/billing signal was seen, so
36
+ * the executor falls back to stderr-regex classification.
37
+ *
38
+ * Exception: a non-retryable billing failure must never be downgraded to a
39
+ * retryable {@link RateLimitError}. If the `rate_limit_event` was only a
40
+ * transient throttle but the assistant separately reported `billing_error`,
41
+ * the billing cause wins — a retry cannot refill credits, and a
42
+ * RateLimitError would wrongly re-enable the retry / MCP-fallback path. When
43
+ * the `rate_limit_event` is itself a billing failure its richer metadata
44
+ * (`canUserPurchaseCredits`, etc.) is preserved.
45
+ */
46
+ private buildStructuredError;
47
+ /**
48
+ * Map the SDK's assistant/api-retry error enum to a typed error. Only
49
+ * rate-limit / billing variants are mapped; other variants (auth, etc.)
50
+ * return undefined and defer to the existing classification path.
51
+ */
52
+ private errorFromAssistantError;
31
53
  private buildResumeHandle;
32
54
  isAvailable(): Promise<boolean>;
33
55
  }
@@ -6,6 +6,7 @@
6
6
  */
7
7
  import { query } from "@anthropic-ai/claude-agent-sdk";
8
8
  import { getMcpServersConfig } from "../../system.js";
9
+ import { RateLimitError, BillingError, createRateLimitError, isRateLimitFailureInfo, } from "../../errors.js";
9
10
  import { RingBuffer } from "../ring-buffer.js";
10
11
  export class ClaudeCodeDriver {
11
12
  name = "claude-code";
@@ -47,6 +48,16 @@ export class ClaudeCodeDriver {
47
48
  let resultMessage;
48
49
  let capturedOutput = "";
49
50
  let capturedStderr = "";
51
+ // Structured rate-limit / billing signals captured from the SDK stream
52
+ // (#732). The SDK emits these but sequant previously dropped them on the
53
+ // floor, falling back to regex-on-stderr classification. We keep only the
54
+ // latest *failure-grade* rate-limit info (rejection or billing) so an
55
+ // informational `allowed_warning` event isn't mis-attributed to an
56
+ // unrelated phase failure.
57
+ let rateLimitInfo;
58
+ let assistantError;
59
+ // Last api_retry signal, captured opportunistically for diagnostics.
60
+ let apiRetryError;
50
61
  const stderrBuffer = new RingBuffer(50);
51
62
  const stdoutBuffer = new RingBuffer(50);
52
63
  // Resolve resume token with cwd-safety check.
@@ -99,7 +110,26 @@ export class ClaudeCodeDriver {
99
110
  if (message.type === "system" && message.subtype === "init") {
100
111
  resultSessionId = message.session_id;
101
112
  }
113
+ // Capture structured rate-limit info (#732). Only retain
114
+ // failure-grade events (rejection / billing) so a benign warning
115
+ // doesn't poison the failure path.
116
+ if (message.type === "rate_limit_event" &&
117
+ isRateLimitFailureInfo(message.rate_limit_info)) {
118
+ rateLimitInfo = message.rate_limit_info;
119
+ }
120
+ // Capture api_retry diagnostics (#732, optional). These are transient
121
+ // retries the SDK performs internally; recorded for the structured
122
+ // error fallback when no rate_limit_event/assistant error is present.
123
+ if (message.type === "system" && message.subtype === "api_retry") {
124
+ apiRetryError = message.error;
125
+ }
102
126
  if (message.type === "assistant") {
127
+ // Capture the assistant-level error field (#732) — `rate_limit`,
128
+ // `billing_error`, `overloaded`, etc. Previously discarded by the
129
+ // text-only content filter below.
130
+ if (message.error) {
131
+ assistantError = message.error;
132
+ }
103
133
  const content = message.message.content;
104
134
  const textContent = content
105
135
  .filter((c) => c.type === "text" && c.text)
@@ -124,6 +154,10 @@ export class ClaudeCodeDriver {
124
154
  // `config.cwd`. `sessionId` is mirrored for one release (#674) so
125
155
  // upgraded callers can still drive resume off the deprecated field.
126
156
  const resumeHandle = this.buildResumeHandle(resultSessionId, config.cwd);
157
+ // Build a typed error from structured SDK signals (#732). Present only
158
+ // when the stream surfaced a rate-limit/billing failure; otherwise
159
+ // undefined and the executor falls back to stderr-regex classification.
160
+ const structuredError = this.buildStructuredError(rateLimitInfo, assistantError, apiRetryError);
127
161
  if (resultMessage) {
128
162
  if (resultMessage.subtype === "success") {
129
163
  return {
@@ -135,13 +169,29 @@ export class ClaudeCodeDriver {
135
169
  stdoutTail: stdoutBuffer.getLines(),
136
170
  };
137
171
  }
172
+ // Turn-cap is a soft, recoverable outcome, not a hard failure: the
173
+ // agent produced partial work before hitting its `maxTurns` ceiling
174
+ // (turn caps are live on every agent since #484). Warn (not error)
175
+ // and return the partial output flagged `capped` so consumers — the
176
+ // /qa and /exec skills — can treat it as inconclusive/incomplete
177
+ // rather than discarding the work. See #733. Branched out of the
178
+ // error switch below so it never carries a hard `error` string.
179
+ if (resultMessage.subtype === "error_max_turns") {
180
+ config.onStderr?.("⚠️ Agent hit its turn cap (error_max_turns). Returning partial results.\n");
181
+ return {
182
+ success: false,
183
+ capped: true,
184
+ output: capturedOutput,
185
+ sessionId: resultSessionId,
186
+ resumeHandle,
187
+ stderrTail: stderrBuffer.getLines(),
188
+ stdoutTail: stdoutBuffer.getLines(),
189
+ };
190
+ }
138
191
  // Handle error subtypes
139
192
  let error;
140
193
  const errorSubtype = resultMessage.subtype;
141
- if (errorSubtype === "error_max_turns") {
142
- error = "Max turns reached";
143
- }
144
- else if (errorSubtype === "error_during_execution") {
194
+ if (errorSubtype === "error_during_execution") {
145
195
  error = resultMessage.errors?.join(", ") || "Error during execution";
146
196
  }
147
197
  else if (errorSubtype === "error_max_budget_usd") {
@@ -155,7 +205,10 @@ export class ClaudeCodeDriver {
155
205
  output: capturedOutput,
156
206
  sessionId: resultSessionId,
157
207
  resumeHandle,
158
- error,
208
+ // Prefer the structured cause (e.g. "Out of credits") over the
209
+ // generic subtype text when available (#732).
210
+ error: structuredError?.message ?? error,
211
+ structuredError,
159
212
  stderrTail: stderrBuffer.getLines(),
160
213
  stdoutTail: stdoutBuffer.getLines(),
161
214
  };
@@ -165,7 +218,8 @@ export class ClaudeCodeDriver {
165
218
  output: capturedOutput,
166
219
  sessionId: resultSessionId,
167
220
  resumeHandle,
168
- error: "No result received from Claude",
221
+ error: structuredError?.message ?? "No result received from Claude",
222
+ structuredError,
169
223
  stderrTail: stderrBuffer.getLines(),
170
224
  stdoutTail: stdoutBuffer.getLines(),
171
225
  };
@@ -182,6 +236,12 @@ export class ClaudeCodeDriver {
182
236
  stdoutTail: stdoutBuffer.getLines(),
183
237
  };
184
238
  }
239
+ // If the stream surfaced a failure-grade rate-limit/billing signal before
240
+ // throwing, prefer that typed cause (#732) over the raw thrown message — a
241
+ // mid-stream throw after a *rejected* rate_limit_event is very likely the
242
+ // proximate cause. Abort/timeout is handled above first, so a genuine
243
+ // timeout is never masked by a stale rate-limit signal.
244
+ const structuredError = this.buildStructuredError(rateLimitInfo, assistantError, apiRetryError);
185
245
  const stderrSuffix = capturedStderr
186
246
  ? `\nStderr: ${capturedStderr.slice(0, 500)}`
187
247
  : "";
@@ -190,12 +250,56 @@ export class ClaudeCodeDriver {
190
250
  output: capturedOutput,
191
251
  sessionId: resultSessionId,
192
252
  resumeHandle: this.buildResumeHandle(resultSessionId, config.cwd),
193
- error: error + stderrSuffix,
253
+ error: structuredError?.message ?? error + stderrSuffix,
254
+ structuredError,
194
255
  stderrTail: stderrBuffer.getLines(),
195
256
  stdoutTail: stdoutBuffer.getLines(),
196
257
  };
197
258
  }
198
259
  }
260
+ /**
261
+ * Derive a typed {@link SequantError} from structured SDK failure signals
262
+ * (#732). Precedence: a captured `rate_limit_event` (richest signal) wins;
263
+ * otherwise the assistant-level `error`; otherwise the last `api_retry`
264
+ * error. Returns undefined when no rate-limit/billing signal was seen, so
265
+ * the executor falls back to stderr-regex classification.
266
+ *
267
+ * Exception: a non-retryable billing failure must never be downgraded to a
268
+ * retryable {@link RateLimitError}. If the `rate_limit_event` was only a
269
+ * transient throttle but the assistant separately reported `billing_error`,
270
+ * the billing cause wins — a retry cannot refill credits, and a
271
+ * RateLimitError would wrongly re-enable the retry / MCP-fallback path. When
272
+ * the `rate_limit_event` is itself a billing failure its richer metadata
273
+ * (`canUserPurchaseCredits`, etc.) is preserved.
274
+ */
275
+ buildStructuredError(rateLimitInfo, assistantError, apiRetryError) {
276
+ if (rateLimitInfo) {
277
+ const err = createRateLimitError(rateLimitInfo);
278
+ if (err instanceof RateLimitError && assistantError === "billing_error") {
279
+ return new BillingError("Billing error");
280
+ }
281
+ return err;
282
+ }
283
+ return (this.errorFromAssistantError(assistantError) ??
284
+ this.errorFromAssistantError(apiRetryError));
285
+ }
286
+ /**
287
+ * Map the SDK's assistant/api-retry error enum to a typed error. Only
288
+ * rate-limit / billing variants are mapped; other variants (auth, etc.)
289
+ * return undefined and defer to the existing classification path.
290
+ */
291
+ errorFromAssistantError(error) {
292
+ switch (error) {
293
+ case "billing_error":
294
+ return new BillingError("Billing error");
295
+ case "rate_limit":
296
+ return new RateLimitError("Rate limited");
297
+ case "overloaded":
298
+ return new RateLimitError("API overloaded");
299
+ default:
300
+ return undefined;
301
+ }
302
+ }
199
303
  buildResumeHandle(token, originCwd) {
200
304
  if (!token)
201
305
  return undefined;
@@ -99,4 +99,4 @@ export declare class LogWriter {
99
99
  *
100
100
  * Utility function for creating phase logs when you have start/end times.
101
101
  */
102
- export declare function createPhaseLogFromTiming(phase: Phase, issueNumber: number, startTime: Date, endTime: Date, status: PhaseLog["status"], options?: Partial<Pick<PhaseLog, "error" | "iterations" | "filesModified" | "testsRun" | "testsPassed" | "verdict" | "summary" | "commitHash" | "fileDiffStats" | "cacheMetrics" | "errorContext">>): PhaseLog;
102
+ export declare function createPhaseLogFromTiming(phase: Phase, issueNumber: number, startTime: Date, endTime: Date, status: PhaseLog["status"], options?: Partial<Pick<PhaseLog, "error" | "capped" | "iterations" | "filesModified" | "testsRun" | "testsPassed" | "verdict" | "summary" | "commitHash" | "fileDiffStats" | "cacheMetrics" | "errorContext">>): PhaseLog;
@@ -111,6 +111,24 @@ export declare function mapAgentSuccessToPhaseResult(phase: Phase, agentResult:
111
111
  sessionId?: string;
112
112
  resumeHandle?: ResumeHandle;
113
113
  };
114
+ /**
115
+ * Map a failed driver result to a `PhaseResult`.
116
+ *
117
+ * Symmetric to {@link mapAgentSuccessToPhaseResult}; extracted so the
118
+ * failure-path mapping (notably the #739 capped/output gating) is unit-testable
119
+ * without spawning a driver.
120
+ *
121
+ * `output` is propagated **only** for a capped phase (#739): a capped result is
122
+ * incomplete-but-not-hard-failed, so its partial work must survive downstream.
123
+ * A genuine (non-capped) failure keeps the historical behaviour of dropping
124
+ * `output`, leaving the `/loop` fix-context (`formatFailureContext`) unchanged.
125
+ *
126
+ * @internal Exported for testing only
127
+ */
128
+ export declare function mapAgentFailureToPhaseResult(phase: Phase, agentResult: AgentPhaseResult, durationSeconds: number): PhaseResult & {
129
+ sessionId?: string;
130
+ resumeHandle?: ResumeHandle;
131
+ };
114
132
  /**
115
133
  * Get the prompt for a phase with the issue number substituted.
116
134
  * Selects self-contained prompts for non-Claude agents.
@@ -12,7 +12,7 @@ import { execSync, execFileSync } from "child_process";
12
12
  import { readAgentsMd } from "../agents-md.js";
13
13
  import { getDriver } from "./drivers/index.js";
14
14
  import { classifyError } from "./error-classifier.js";
15
- import { ApiError } from "../errors.js";
15
+ import { ApiError, BillingError } from "../errors.js";
16
16
  import { phaseRegistry } from "./phase-registry.js";
17
17
  import { bracketedConsoleLog } from "./notice.js";
18
18
  /**
@@ -408,6 +408,43 @@ export function mapAgentSuccessToPhaseResult(phase, agentResult, durationSeconds
408
408
  ...tails,
409
409
  };
410
410
  }
411
+ /**
412
+ * Map a failed driver result to a `PhaseResult`.
413
+ *
414
+ * Symmetric to {@link mapAgentSuccessToPhaseResult}; extracted so the
415
+ * failure-path mapping (notably the #739 capped/output gating) is unit-testable
416
+ * without spawning a driver.
417
+ *
418
+ * `output` is propagated **only** for a capped phase (#739): a capped result is
419
+ * incomplete-but-not-hard-failed, so its partial work must survive downstream.
420
+ * A genuine (non-capped) failure keeps the historical behaviour of dropping
421
+ * `output`, leaving the `/loop` fix-context (`formatFailureContext`) unchanged.
422
+ *
423
+ * @internal Exported for testing only
424
+ */
425
+ export function mapAgentFailureToPhaseResult(phase, agentResult, durationSeconds) {
426
+ return {
427
+ phase,
428
+ success: false,
429
+ durationSeconds,
430
+ error: agentResult.error,
431
+ // Propagate the driver's typed cause (#732) so the retry logic can prefer
432
+ // it over stderr-regex classification and gate the MCP fallback.
433
+ structuredError: agentResult.structuredError,
434
+ // Propagate the turn-cap flag and the partial output (#739). On the failure
435
+ // path `output` was previously dropped entirely — for a capped phase the
436
+ // partial work is usable and must be preserved, mirroring the driver/skill
437
+ // slice from #733. Gating `output` on `capped` keeps non-capped failures
438
+ // byte-for-byte identical to pre-#739 behaviour.
439
+ capped: agentResult.capped,
440
+ output: agentResult.capped ? agentResult.output : undefined,
441
+ sessionId: agentResult.sessionId,
442
+ resumeHandle: agentResult.resumeHandle,
443
+ stderrTail: agentResult.stderrTail,
444
+ stdoutTail: agentResult.stdoutTail,
445
+ exitCode: agentResult.exitCode,
446
+ };
447
+ }
411
448
  /**
412
449
  * Get the prompt for a phase with the issue number substituted.
413
450
  * Selects self-contained prompts for non-Claude agents.
@@ -642,17 +679,7 @@ async function executePhase(issueNumber, phase, config, resumeHandle, worktreePa
642
679
  if (agentResult.success) {
643
680
  return mapAgentSuccessToPhaseResult(phase, agentResult, durationSeconds, cwd);
644
681
  }
645
- return {
646
- phase,
647
- success: false,
648
- durationSeconds,
649
- error: agentResult.error,
650
- sessionId: agentResult.sessionId,
651
- resumeHandle: agentResult.resumeHandle,
652
- stderrTail: agentResult.stderrTail,
653
- stdoutTail: agentResult.stdoutTail,
654
- exitCode: agentResult.exitCode,
655
- };
682
+ return mapAgentFailureToPhaseResult(phase, agentResult, durationSeconds);
656
683
  }
657
684
  /**
658
685
  * Execute a phase with automatic retry for cold-start failures and MCP fallback.
@@ -693,6 +720,14 @@ delayFn = (ms) => new Promise((resolve) => setTimeout(resolve, ms))) {
693
720
  if (lastResult.success) {
694
721
  return lastResult;
695
722
  }
723
+ // Turn-capped phase (#739): incomplete-but-not-hard-failed. A retry cannot
724
+ // un-cap a turn limit, so short-circuit before any fallback — same rationale
725
+ // as the billing skip (#732), but capped must skip *all* retries (incl.
726
+ // cold-start), so an explicit early return is required, not just a guard
727
+ // flag at the MCP gate.
728
+ if (lastResult.capped) {
729
+ return lastResult;
730
+ }
696
731
  }
697
732
  else {
698
733
  // Phase 1: Cold-start retry attempts (with MCP enabled if configured)
@@ -703,11 +738,23 @@ delayFn = (ms) => new Promise((resolve) => setTimeout(resolve, ms))) {
703
738
  if (lastResult.success) {
704
739
  return lastResult;
705
740
  }
741
+ // Turn-capped phase (#739): short-circuit before cold-start retries, the
742
+ // MCP fallback, and the spec-extra retry — a retry cannot un-cap a turn
743
+ // limit. The early return here (rather than a guard at the MCP gate alone)
744
+ // is what skips the cold-start re-spawns, unlike the billing case which
745
+ // still cold-start-retries in the <60s window.
746
+ if (lastResult.capped) {
747
+ return lastResult;
748
+ }
706
749
  // Genuine failure (took long enough to be real work) → skip cold-start retries.
707
750
  // Use error classification (AC-9): if the error is retryable (e.g., API
708
751
  // rate limit, transient 503), allow one more attempt even for genuine failures.
709
752
  if (duration >= COLD_START_THRESHOLD_SECONDS) {
710
- const typedError = classifyError(lastResult.stderrTail ?? [], lastResult.exitCode);
753
+ // Prefer the driver's structured cause (#732) it reflects the real
754
+ // SDK rate-limit/billing signal — over stderr-regex classification,
755
+ // which only sees text and never the structured data.
756
+ const typedError = lastResult.structuredError ??
757
+ classifyError(lastResult.stderrTail ?? [], lastResult.exitCode);
711
758
  if (typedError.isRetryable && attempt < COLD_START_MAX_RETRIES) {
712
759
  if (config.verbose) {
713
760
  const label = typedError instanceof ApiError
@@ -735,7 +782,22 @@ delayFn = (ms) => new Promise((resolve) => setTimeout(resolve, ms))) {
735
782
  // Phase 2: MCP fallback - if MCP is enabled and we're still failing, try without MCP
736
783
  // This handles npx-based MCP servers that fail on first run due to cold-cache issues.
737
784
  // Skip for `loop` phase — MCP is never the cause of loop failures (#488).
738
- if (config.mcp && !lastResult.success && !skipColdStartRetry) {
785
+ //
786
+ // Also skip when the failure is a billing/credits error (#732): a no-MCP
787
+ // retry cannot refill credits, so the misleading "retrying without MCP"
788
+ // noise (#592) would only mask the real cause. The accurate structured
789
+ // message (e.g. "Out of credits") is surfaced instead.
790
+ const failureIsBilling = lastResult.structuredError instanceof BillingError;
791
+ // Belt-and-suspenders (#739): the capped early-returns above already exit
792
+ // before reaching here, but gate the MCP fallback on `!failureIsCapped` too so
793
+ // intent is documented and future code paths can't accidentally re-spawn a
794
+ // capped phase without MCP.
795
+ const failureIsCapped = lastResult.capped === true;
796
+ if (config.mcp &&
797
+ !lastResult.success &&
798
+ !skipColdStartRetry &&
799
+ !failureIsBilling &&
800
+ !failureIsCapped) {
739
801
  bracketedConsoleLog(spinner, chalk.yellow(`\n ! Phase failed with MCP enabled, retrying without MCP...`));
740
802
  // Create config copy with MCP disabled
741
803
  const configWithoutMcp = {
@@ -125,6 +125,7 @@ export declare const PhaseLogSchema: z.ZodObject<{
125
125
  timeout: "timeout";
126
126
  }>;
127
127
  error: z.ZodOptional<z.ZodString>;
128
+ capped: z.ZodOptional<z.ZodBoolean>;
128
129
  iterations: z.ZodOptional<z.ZodNumber>;
129
130
  filesModified: z.ZodOptional<z.ZodArray<z.ZodString>>;
130
131
  testsRun: z.ZodOptional<z.ZodNumber>;
@@ -201,6 +202,7 @@ export declare const IssueLogSchema: z.ZodObject<{
201
202
  timeout: "timeout";
202
203
  }>;
203
204
  error: z.ZodOptional<z.ZodString>;
205
+ capped: z.ZodOptional<z.ZodBoolean>;
204
206
  iterations: z.ZodOptional<z.ZodNumber>;
205
207
  filesModified: z.ZodOptional<z.ZodArray<z.ZodString>>;
206
208
  testsRun: z.ZodOptional<z.ZodNumber>;
@@ -318,6 +320,7 @@ export declare const RunLogSchema: z.ZodObject<{
318
320
  timeout: "timeout";
319
321
  }>;
320
322
  error: z.ZodOptional<z.ZodString>;
323
+ capped: z.ZodOptional<z.ZodBoolean>;
321
324
  iterations: z.ZodOptional<z.ZodNumber>;
322
325
  filesModified: z.ZodOptional<z.ZodArray<z.ZodString>>;
323
326
  testsRun: z.ZodOptional<z.ZodNumber>;
@@ -130,6 +130,13 @@ export const PhaseLogSchema = z.object({
130
130
  status: PhaseStatusSchema,
131
131
  /** Error message if failed */
132
132
  error: z.string().optional(),
133
+ /**
134
+ * Set when the phase hit its turn cap (`error_max_turns`) (#739). Distinguishes
135
+ * an incomplete-but-not-hard-failed phase (partial output preserved) from a
136
+ * genuine failure. Reuses the `"failure"` status — additive boolean rather than
137
+ * a new `PhaseStatus` enum value, to keep the persisted-log schema stable.
138
+ */
139
+ capped: z.boolean().optional(),
133
140
  /** Number of iterations (for loop phase) */
134
141
  iterations: z.number().int().nonnegative().optional(),
135
142
  /** Files modified during this phase */
@@ -116,6 +116,7 @@ export declare class StateManager {
116
116
  updatePhaseStatus(issueNumber: number, phase: Phase, status: PhaseStatus, options?: {
117
117
  error?: string;
118
118
  iteration?: number;
119
+ capped?: boolean;
119
120
  }): Promise<void>;
120
121
  /**
121
122
  * Update the overall issue status
@@ -300,6 +300,12 @@ export class StateManager {
300
300
  if (options?.iteration !== undefined) {
301
301
  phaseState.iteration = options.iteration;
302
302
  }
303
+ // Persist the turn-cap marker (#739) so a halted-on-cap phase is
304
+ // distinguishable from a genuine failure in state, not just the run-log —
305
+ // this is what makes the "reversible later" resume path first-class.
306
+ if (options?.capped !== undefined) {
307
+ phaseState.capped = options.capped;
308
+ }
303
309
  // Preserve startedAt if already set
304
310
  const existingPhase = issueState.phases[phase];
305
311
  if (existingPhase?.startedAt && status !== "pending") {