@cat-factory/orchestration 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/LICENSE +21 -0
  2. package/dist/container.d.ts +460 -0
  3. package/dist/container.d.ts.map +1 -0
  4. package/dist/container.js +657 -0
  5. package/dist/container.js.map +1 -0
  6. package/dist/index.d.ts +29 -0
  7. package/dist/index.d.ts.map +1 -0
  8. package/dist/index.js +31 -0
  9. package/dist/index.js.map +1 -0
  10. package/dist/modules/board/BoardService.d.ts +125 -0
  11. package/dist/modules/board/BoardService.d.ts.map +1 -0
  12. package/dist/modules/board/BoardService.js +496 -0
  13. package/dist/modules/board/BoardService.js.map +1 -0
  14. package/dist/modules/board/board.logic.d.ts +17 -0
  15. package/dist/modules/board/board.logic.d.ts.map +1 -0
  16. package/dist/modules/board/board.logic.js +51 -0
  17. package/dist/modules/board/board.logic.js.map +1 -0
  18. package/dist/modules/boardScan/BoardScanService.d.ts +35 -0
  19. package/dist/modules/boardScan/BoardScanService.d.ts.map +1 -0
  20. package/dist/modules/boardScan/BoardScanService.js +91 -0
  21. package/dist/modules/boardScan/BoardScanService.js.map +1 -0
  22. package/dist/modules/boardScan/board-scan.logic.d.ts +10 -0
  23. package/dist/modules/boardScan/board-scan.logic.d.ts.map +1 -0
  24. package/dist/modules/boardScan/board-scan.logic.js +26 -0
  25. package/dist/modules/boardScan/board-scan.logic.js.map +1 -0
  26. package/dist/modules/bootstrap/BootstrapService.d.ts +114 -0
  27. package/dist/modules/bootstrap/BootstrapService.d.ts.map +1 -0
  28. package/dist/modules/bootstrap/BootstrapService.js +516 -0
  29. package/dist/modules/bootstrap/BootstrapService.js.map +1 -0
  30. package/dist/modules/clarity/ClarityReviewService.d.ts +48 -0
  31. package/dist/modules/clarity/ClarityReviewService.d.ts.map +1 -0
  32. package/dist/modules/clarity/ClarityReviewService.js +63 -0
  33. package/dist/modules/clarity/ClarityReviewService.js.map +1 -0
  34. package/dist/modules/clarity/clarity.logic.d.ts +36 -0
  35. package/dist/modules/clarity/clarity.logic.d.ts.map +1 -0
  36. package/dist/modules/clarity/clarity.logic.js +98 -0
  37. package/dist/modules/clarity/clarity.logic.js.map +1 -0
  38. package/dist/modules/estimation/estimate.logic.d.ts +11 -0
  39. package/dist/modules/estimation/estimate.logic.d.ts.map +1 -0
  40. package/dist/modules/estimation/estimate.logic.js +37 -0
  41. package/dist/modules/estimation/estimate.logic.js.map +1 -0
  42. package/dist/modules/execution/AgentContextBuilder.d.ts +114 -0
  43. package/dist/modules/execution/AgentContextBuilder.d.ts.map +1 -0
  44. package/dist/modules/execution/AgentContextBuilder.js +316 -0
  45. package/dist/modules/execution/AgentContextBuilder.js.map +1 -0
  46. package/dist/modules/execution/CompanionController.d.ts +60 -0
  47. package/dist/modules/execution/CompanionController.d.ts.map +1 -0
  48. package/dist/modules/execution/CompanionController.js +216 -0
  49. package/dist/modules/execution/CompanionController.js.map +1 -0
  50. package/dist/modules/execution/ExecutionService.d.ts +874 -0
  51. package/dist/modules/execution/ExecutionService.d.ts.map +1 -0
  52. package/dist/modules/execution/ExecutionService.js +2921 -0
  53. package/dist/modules/execution/ExecutionService.js.map +1 -0
  54. package/dist/modules/execution/MergeResolver.d.ts +34 -0
  55. package/dist/modules/execution/MergeResolver.d.ts.map +1 -0
  56. package/dist/modules/execution/MergeResolver.js +81 -0
  57. package/dist/modules/execution/MergeResolver.js.map +1 -0
  58. package/dist/modules/execution/ReviewGateController.d.ts +163 -0
  59. package/dist/modules/execution/ReviewGateController.d.ts.map +1 -0
  60. package/dist/modules/execution/ReviewGateController.js +251 -0
  61. package/dist/modules/execution/ReviewGateController.js.map +1 -0
  62. package/dist/modules/execution/TesterController.d.ts +61 -0
  63. package/dist/modules/execution/TesterController.d.ts.map +1 -0
  64. package/dist/modules/execution/TesterController.js +215 -0
  65. package/dist/modules/execution/TesterController.js.map +1 -0
  66. package/dist/modules/execution/advance.d.ts +84 -0
  67. package/dist/modules/execution/advance.d.ts.map +1 -0
  68. package/dist/modules/execution/advance.js +2 -0
  69. package/dist/modules/execution/advance.js.map +1 -0
  70. package/dist/modules/execution/artifact-review.logic.d.ts +25 -0
  71. package/dist/modules/execution/artifact-review.logic.d.ts.map +1 -0
  72. package/dist/modules/execution/artifact-review.logic.js +39 -0
  73. package/dist/modules/execution/artifact-review.logic.js.map +1 -0
  74. package/dist/modules/execution/ci.logic.d.ts +101 -0
  75. package/dist/modules/execution/ci.logic.d.ts.map +1 -0
  76. package/dist/modules/execution/ci.logic.js +117 -0
  77. package/dist/modules/execution/ci.logic.js.map +1 -0
  78. package/dist/modules/execution/drive.d.ts +47 -0
  79. package/dist/modules/execution/drive.d.ts.map +1 -0
  80. package/dist/modules/execution/drive.js +112 -0
  81. package/dist/modules/execution/drive.js.map +1 -0
  82. package/dist/modules/execution/gates.d.ts +97 -0
  83. package/dist/modules/execution/gates.d.ts.map +1 -0
  84. package/dist/modules/execution/gates.js +2 -0
  85. package/dist/modules/execution/gates.js.map +1 -0
  86. package/dist/modules/execution/individualVendors.logic.d.ts +22 -0
  87. package/dist/modules/execution/individualVendors.logic.d.ts.map +1 -0
  88. package/dist/modules/execution/individualVendors.logic.js +33 -0
  89. package/dist/modules/execution/individualVendors.logic.js.map +1 -0
  90. package/dist/modules/execution/job.logic.d.ts +52 -0
  91. package/dist/modules/execution/job.logic.d.ts.map +1 -0
  92. package/dist/modules/execution/job.logic.js +56 -0
  93. package/dist/modules/execution/job.logic.js.map +1 -0
  94. package/dist/modules/execution/release.logic.d.ts +43 -0
  95. package/dist/modules/execution/release.logic.d.ts.map +1 -0
  96. package/dist/modules/execution/release.logic.js +49 -0
  97. package/dist/modules/execution/release.logic.js.map +1 -0
  98. package/dist/modules/execution/retry.logic.d.ts +40 -0
  99. package/dist/modules/execution/retry.logic.d.ts.map +1 -0
  100. package/dist/modules/execution/retry.logic.js +83 -0
  101. package/dist/modules/execution/retry.logic.js.map +1 -0
  102. package/dist/modules/execution/stepGating.logic.d.ts +15 -0
  103. package/dist/modules/execution/stepGating.logic.d.ts.map +1 -0
  104. package/dist/modules/execution/stepGating.logic.js +29 -0
  105. package/dist/modules/execution/stepGating.logic.js.map +1 -0
  106. package/dist/modules/execution/stepResolvers.d.ts +41 -0
  107. package/dist/modules/execution/stepResolvers.d.ts.map +1 -0
  108. package/dist/modules/execution/stepResolvers.js +2 -0
  109. package/dist/modules/execution/stepResolvers.js.map +1 -0
  110. package/dist/modules/execution/tester-infra.logic.d.ts +42 -0
  111. package/dist/modules/execution/tester-infra.logic.d.ts.map +1 -0
  112. package/dist/modules/execution/tester-infra.logic.js +46 -0
  113. package/dist/modules/execution/tester-infra.logic.js.map +1 -0
  114. package/dist/modules/merge/MergePresetService.d.ts +32 -0
  115. package/dist/modules/merge/MergePresetService.d.ts.map +1 -0
  116. package/dist/modules/merge/MergePresetService.js +109 -0
  117. package/dist/modules/merge/MergePresetService.js.map +1 -0
  118. package/dist/modules/modelDefaults/ModelDefaultsService.d.ts +22 -0
  119. package/dist/modules/modelDefaults/ModelDefaultsService.d.ts.map +1 -0
  120. package/dist/modules/modelDefaults/ModelDefaultsService.js +28 -0
  121. package/dist/modules/modelDefaults/ModelDefaultsService.js.map +1 -0
  122. package/dist/modules/notifications/NotificationService.d.ts +74 -0
  123. package/dist/modules/notifications/NotificationService.d.ts.map +1 -0
  124. package/dist/modules/notifications/NotificationService.js +131 -0
  125. package/dist/modules/notifications/NotificationService.js.map +1 -0
  126. package/dist/modules/observability/LlmObservabilityService.d.ts +121 -0
  127. package/dist/modules/observability/LlmObservabilityService.d.ts.map +1 -0
  128. package/dist/modules/observability/LlmObservabilityService.js +140 -0
  129. package/dist/modules/observability/LlmObservabilityService.js.map +1 -0
  130. package/dist/modules/observability/observability.logic.d.ts +57 -0
  131. package/dist/modules/observability/observability.logic.d.ts.map +1 -0
  132. package/dist/modules/observability/observability.logic.js +186 -0
  133. package/dist/modules/observability/observability.logic.js.map +1 -0
  134. package/dist/modules/pipelines/PipelineService.d.ts +54 -0
  135. package/dist/modules/pipelines/PipelineService.d.ts.map +1 -0
  136. package/dist/modules/pipelines/PipelineService.js +226 -0
  137. package/dist/modules/pipelines/PipelineService.js.map +1 -0
  138. package/dist/modules/pipelines/pipelineShape.d.ts +53 -0
  139. package/dist/modules/pipelines/pipelineShape.d.ts.map +1 -0
  140. package/dist/modules/pipelines/pipelineShape.js +74 -0
  141. package/dist/modules/pipelines/pipelineShape.js.map +1 -0
  142. package/dist/modules/recurring/RecurringPipelineService.d.ts +76 -0
  143. package/dist/modules/recurring/RecurringPipelineService.d.ts.map +1 -0
  144. package/dist/modules/recurring/RecurringPipelineService.js +295 -0
  145. package/dist/modules/recurring/RecurringPipelineService.js.map +1 -0
  146. package/dist/modules/recurring/TrackerSettingsService.d.ts +16 -0
  147. package/dist/modules/recurring/TrackerSettingsService.d.ts.map +1 -0
  148. package/dist/modules/recurring/TrackerSettingsService.js +30 -0
  149. package/dist/modules/recurring/TrackerSettingsService.js.map +1 -0
  150. package/dist/modules/recurring/schedule.logic.d.ts +14 -0
  151. package/dist/modules/recurring/schedule.logic.d.ts.map +1 -0
  152. package/dist/modules/recurring/schedule.logic.js +85 -0
  153. package/dist/modules/recurring/schedule.logic.js.map +1 -0
  154. package/dist/modules/releaseHealth/ReleaseHealthService.d.ts +38 -0
  155. package/dist/modules/releaseHealth/ReleaseHealthService.d.ts.map +1 -0
  156. package/dist/modules/releaseHealth/ReleaseHealthService.js +96 -0
  157. package/dist/modules/releaseHealth/ReleaseHealthService.js.map +1 -0
  158. package/dist/modules/requirements/RequirementReviewService.d.ts +48 -0
  159. package/dist/modules/requirements/RequirementReviewService.d.ts.map +1 -0
  160. package/dist/modules/requirements/RequirementReviewService.js +83 -0
  161. package/dist/modules/requirements/RequirementReviewService.js.map +1 -0
  162. package/dist/modules/requirements/requirements.logic.d.ts +93 -0
  163. package/dist/modules/requirements/requirements.logic.d.ts.map +1 -0
  164. package/dist/modules/requirements/requirements.logic.js +203 -0
  165. package/dist/modules/requirements/requirements.logic.js.map +1 -0
  166. package/dist/modules/review/IterativeReviewService.d.ts +175 -0
  167. package/dist/modules/review/IterativeReviewService.d.ts.map +1 -0
  168. package/dist/modules/review/IterativeReviewService.js +327 -0
  169. package/dist/modules/review/IterativeReviewService.js.map +1 -0
  170. package/dist/modules/serviceFragmentDefaults/ServiceFragmentDefaultsService.d.ts +20 -0
  171. package/dist/modules/serviceFragmentDefaults/ServiceFragmentDefaultsService.d.ts.map +1 -0
  172. package/dist/modules/serviceFragmentDefaults/ServiceFragmentDefaultsService.js +26 -0
  173. package/dist/modules/serviceFragmentDefaults/ServiceFragmentDefaultsService.js.map +1 -0
  174. package/dist/modules/services/ServiceMountService.d.ts +48 -0
  175. package/dist/modules/services/ServiceMountService.d.ts.map +1 -0
  176. package/dist/modules/services/ServiceMountService.js +90 -0
  177. package/dist/modules/services/ServiceMountService.js.map +1 -0
  178. package/dist/modules/settings/WorkspaceSettingsService.d.ts +22 -0
  179. package/dist/modules/settings/WorkspaceSettingsService.d.ts.map +1 -0
  180. package/dist/modules/settings/WorkspaceSettingsService.js +50 -0
  181. package/dist/modules/settings/WorkspaceSettingsService.js.map +1 -0
  182. package/package.json +41 -0
@@ -0,0 +1,2921 @@
1
+ import { parseBlueprintService, parseOnCallAssessment, parseSpecDoc, DEFAULT_COMPANION_MAX_ATTEMPTS, } from '@cat-factory/contracts';
2
+ import { companionFor, companionTargets, isCompanionKind, TASK_ESTIMATOR_AGENT_KIND, } from '@cat-factory/agents';
3
+ import { coerceTaskEstimate, summarizeEstimate } from '../estimation/estimate.logic.js';
4
+ import { validatePipelineShape } from '../pipelines/pipelineShape.js';
5
+ import { shouldRunGatedStep } from './stepGating.logic.js';
6
+ import { reviewableArtifactOutput } from './artifact-review.logic.js';
7
+ import { resolveIndividualVendors, } from './individualVendors.logic.js';
8
+ import { assertFound, ConflictError, getErrorMessage, isModelUsable, NotFoundError, sameSubtasks, ValidationError, } from '@cat-factory/kernel';
9
+ import { DEFAULT_MERGE_PRESET } from '@cat-factory/kernel';
10
+ import { aggregateCi, CI_AGENT_KIND, CI_FIXER_AGENT_KIND, CONFLICTS_AGENT_KIND, CONFLICT_RESOLVER_AGENT_KIND, describeFailingChecks, listFailingChecks, isCiGreen, MERGER_AGENT_KIND, REQUIREMENTS_REVIEW_AGENT_KIND, CLARITY_REVIEW_AGENT_KIND, BUG_INVESTIGATOR_AGENT_KIND, TRACKER_AGENT_KIND, ANALYSIS_AGENT_KIND, TESTER_AGENT_KIND, } from './ci.logic.js';
11
+ import { POST_RELEASE_HEALTH_AGENT_KIND, ON_CALL_AGENT_KIND, classifyReleaseHealth, describeRegressedSignals, } from './release.logic.js';
12
+ import { AgentContextBuilder } from './AgentContextBuilder.js';
13
+ import { CompanionController } from './CompanionController.js';
14
+ import { MergeResolver } from './MergeResolver.js';
15
+ import { ReviewGateController } from './ReviewGateController.js';
16
+ import { TesterController } from './TesterController.js';
17
+ import { isAsyncAgentExecutor } from '@cat-factory/kernel';
18
+ import { isDeployStep } from '@cat-factory/integrations';
19
+ import { descendantIds, serviceOf } from '../board/board.logic.js';
20
+ import { requireWorkspace } from '@cat-factory/kernel';
21
+ import { planResumedSteps, planRestartFromStep } from './retry.logic.js';
22
+ import { isContainerEvictionError, isTransientEviction, MAX_EVICTION_RECOVERIES, MAX_TRANSIENT_EVICTION_RECOVERIES, } from './job.logic.js';
23
+ import { decideTesterInfra, TESTER_INFRA_MESSAGES } from './tester-infra.logic.js';
24
+ /**
25
+ * Max `conflict-resolver` escalations before the conflicts gate gives up. Deliberately
26
+ * far below CI's budget (`ciMaxAttempts`, default 10): a conflict retry re-merges the
27
+ * SAME base with no new signal, so extra attempts just burn containers re-attempting an
28
+ * identical conflict. Three gives the (now conflict-aware) resolver a couple of shots at
29
+ * model variance, then fails fast to a manual-resolution notification.
30
+ */
31
+ const CONFLICT_RESOLVER_MAX_ATTEMPTS = 3;
32
+ /**
33
+ * "What to do next" guidance per failure kind a pipeline run can produce, shown
34
+ * under the failure banner on the board (mirrors bootstrap's FAILURE_HINTS). Only
35
+ * the execution-relevant subset of {@link AgentFailureKind} is keyed.
36
+ */
37
+ const EXECUTION_FAILURE_HINTS = {
38
+ agent: 'An agent step failed after its automatic retries. Review the run, then retry to re-run the pipeline.',
39
+ job_failed: 'The implementation container reported a failure. Inspect its logs (Cloudflare Workers Observability, filtered by the run id), then retry to spin a fresh container.',
40
+ evicted: 'The implementation container kept vanishing mid-run even after automatic fresh-container restarts. Most often this is transient: a deploy / new-version rollout draining the container, in which case simply retrying once the rollout has finished succeeds. If it persists, it points at a memory or crash issue on the run — inspect its logs (Cloudflare Workers Observability, filtered by the run id) and consider a heavier container instance type. Retry to try again.',
41
+ timeout: 'The run exceeded its time budget — a step or the implementation job did not finish in time. Retry to start it again.',
42
+ rejected: 'You rejected this step’s proposal, stopping the run. Retry to re-run the pipeline from the rejected step.',
43
+ companion_rejected: 'A companion agent could not return a usable quality assessment (its reply was truncated or malformed) even after a repair retry. Review the companion’s raw output on the run, then retry.',
44
+ cancelled: 'You stopped this run; its container was killed. Retry to start it again.',
45
+ unknown: 'The run failed for an unclassified reason. Review the run, then retry.',
46
+ };
47
+ /** Format a 0..1 score as a rounded percentage for notification copy. */
48
+ function pct(score) {
49
+ return `${Math.round(score * 100)}%`;
50
+ }
51
+ /**
52
+ * Parse `owner`/`repo` from a GitHub pull-request URL (`https://github.com/o/r/pull/42`).
53
+ * Returns undefined for any URL that doesn't carry both segments. Host-agnostic on
54
+ * purpose (GitHub Enterprise hosts work too); only the `/owner/repo/...` shape matters.
55
+ */
56
+ function parseRepoFromPullUrl(url) {
57
+ const match = /^https?:\/\/[^/]+\/([^/]+)\/([^/]+)\//.exec(url);
58
+ if (!match)
59
+ return undefined;
60
+ return { owner: match[1], repo: match[2] };
61
+ }
62
+ /**
63
+ * Render the Datadog evidence bundle into the prior-output text the on-call agent reads:
64
+ * the regressed monitors/SLOs, recent error groups, and the investigation brief (correlate
65
+ * the diff with the signals, return a JSON assessment, do NOT revert).
66
+ */
67
+ function renderReleaseEvidence(evidence) {
68
+ const lines = ['## Post-release regression evidence', ''];
69
+ if (evidence.regressedSignals.length > 0) {
70
+ lines.push('Regressed signals:');
71
+ for (const s of evidence.regressedSignals) {
72
+ lines.push(`- ${s.kind} "${s.name}" (${s.id}): ${s.state}${s.detail ? ` — ${s.detail}` : ''}`);
73
+ }
74
+ lines.push('');
75
+ }
76
+ if (evidence.errors.length > 0) {
77
+ lines.push('Recent errors:');
78
+ for (const e of evidence.errors) {
79
+ lines.push(`- ${e.title}${e.count != null ? ` ×${e.count}` : ''}${e.sampleMessage ? ` — ${e.sampleMessage}` : ''}`);
80
+ }
81
+ lines.push('');
82
+ }
83
+ if (evidence.notes)
84
+ lines.push(evidence.notes, '');
85
+ lines.push('Investigate whether THIS PR is the likely cause: correlate its diff with the regressed ' +
86
+ 'signals and errors above (and the service logs). Beware correlation ≠ causation. Return a ' +
87
+ 'JSON assessment: { "culpritConfidence": 0..1, "recommendation": "revert"|"hold"|"monitor", ' +
88
+ '"rationale": "…", "evidence": ["…"] }. Do NOT make commits or revert anything — a human decides.');
89
+ return lines.join('\n');
90
+ }
91
+ /**
92
+ * The execution engine. It orchestrates a pipeline of agent-performed steps and
93
+ * is fully deterministic: `advanceInstance` moves one run forward by exactly one
94
+ * step, delegating the actual work — and the choice of whether to pause for a
95
+ * human decision — to the injected {@link AgentExecutor}. The durable workflow
96
+ * driver calls it in a loop. All LLM behaviour lives behind that port, so the
97
+ * engine here can be tested with a
98
+ * deterministic fake and no timing/delays.
99
+ */
100
+ export class ExecutionService {
101
+ workspaceRepository;
102
+ blockRepository;
103
+ pipelineRepository;
104
+ executionRepository;
105
+ accountRepository;
106
+ idGenerator;
107
+ clock;
108
+ agentExecutor;
109
+ workRunner;
110
+ events;
111
+ board;
112
+ spend;
113
+ requirementReviewService;
114
+ clarityReviewService;
115
+ environmentProvisioning;
116
+ /** Assembles the per-step agent context (requirements, docs, env, service frame, fragments). */
117
+ contextBuilder;
118
+ /** Resolves a `merger` step's assessment into an auto-merge or a `merge_review` notification. */
119
+ mergeResolver;
120
+ /** Drives a companion (reviewer/spec/architect) step: grade → pass / loop producer / park. */
121
+ companionController;
122
+ /** Drives the Tester gate's fix loop: report → greenlight / dispatch fixer / fail. */
123
+ testerController;
124
+ /** Drives both iterative review gates (requirements + clarity); kind-parameterised. */
125
+ reviewGate;
126
+ /** The requirements subject for {@link reviewGate}. */
127
+ requirementsKind;
128
+ /** The clarity (bug-report triage) subject for {@link reviewGate}. */
129
+ clarityKind;
130
+ blueprintReconciler;
131
+ notificationService;
132
+ workspaceSettingsService;
133
+ llmObservability;
134
+ ciStatusProvider;
135
+ mergeabilityProvider;
136
+ releaseHealthProvider;
137
+ incidentEnrichment;
138
+ prMerger;
139
+ mergePresetRepository;
140
+ ticketTrackerProvider;
141
+ subscriptionActivations;
142
+ resolveProviderCapabilities;
143
+ resolveWorkspaceModelDefault;
144
+ /** Whether the runtime can run the Tester's local DinD infra (false = limited mode). */
145
+ localTestInfraSupported;
146
+ /** Lazily-built polling-gate registry, keyed by `agentKind`. See {@link gateFor}. */
147
+ gateRegistryCache;
148
+ /**
149
+ * Lazily-built post-completion resolver registry, keyed by `agentKind`. See
150
+ * {@link stepResolverFor} and {@link StepCompletionResolver}.
151
+ */
152
+ stepResolverCache;
153
+ constructor({ workspaceRepository, blockRepository, pipelineRepository, executionRepository, accountRepository, idGenerator, clock, agentExecutor, workRunner, executionEventPublisher, boardService, spendService, documentRepository, taskRepository, requirementReviewRepository, requirementReviewService, clarityReviewRepository, clarityReviewService, environmentProvisioning, blueprintReconciler, notificationService, workspaceSettingsService, llmObservability, ciStatusProvider, mergeabilityProvider, releaseHealthProvider, incidentEnrichment, pullRequestMerger, mergePresetRepository, ticketTrackerProvider, subscriptionActivationRepository, resolveWorkspaceModelDefault, resolveProviderCapabilities, localTestInfraSupported, }) {
154
+ this.workspaceRepository = workspaceRepository;
155
+ this.blockRepository = blockRepository;
156
+ this.pipelineRepository = pipelineRepository;
157
+ this.executionRepository = executionRepository;
158
+ this.accountRepository = accountRepository;
159
+ this.idGenerator = idGenerator;
160
+ this.clock = clock;
161
+ this.agentExecutor = agentExecutor;
162
+ this.workRunner = workRunner;
163
+ this.events = executionEventPublisher;
164
+ this.board = boardService;
165
+ this.spend = spendService;
166
+ this.requirementReviewService = requirementReviewService;
167
+ this.clarityReviewService = clarityReviewService;
168
+ this.environmentProvisioning = environmentProvisioning;
169
+ this.contextBuilder = new AgentContextBuilder({
170
+ workspaceRepository,
171
+ blockRepository,
172
+ accountRepository,
173
+ documents: documentRepository,
174
+ tasks: taskRepository,
175
+ requirementReviews: requirementReviewRepository,
176
+ clarityReviews: clarityReviewRepository,
177
+ environmentProvisioning,
178
+ });
179
+ this.mergeResolver = new MergeResolver({
180
+ blockRepository,
181
+ notificationService,
182
+ resolveMergePreset: (ws, block) => this.resolveMergePreset(ws, block),
183
+ finalizeMerge: (ws, blockId) => this.finalizeMerge(ws, blockId),
184
+ });
185
+ this.companionController = new CompanionController({
186
+ contextBuilder: this.contextBuilder,
187
+ spend: spendService,
188
+ idGenerator,
189
+ previewStepModel: (ctx) => this.previewStepModel(ctx),
190
+ runAgent: (ctx, opts) => this.runAgent(ctx, opts),
191
+ finishStep: (s) => this.finishStep(s),
192
+ startStep: (s) => this.startStep(s),
193
+ pauseStepForInput: (s) => this.pauseStepForInput(s),
194
+ updateBlockProgress: (ws, i, st) => this.updateBlockProgress(ws, i, st),
195
+ persistInstance: (ws, i) => this.executionRepository.upsert(ws, i),
196
+ emitInstance: (ws, i) => this.emitInstance(ws, i),
197
+ stopRunContainer: (ws, i) => this.stopRunContainer(ws, i),
198
+ finalizeBlock: (ws, i, c) => this.finalizeBlock(ws, i, c),
199
+ parkStepOnDecision: (ws, i, s, p) => this.parkStepOnDecision(ws, i, s, p),
200
+ raiseDecisionRequired: (ws, i) => this.raiseDecisionRequired(ws, i),
201
+ loopCompanionProducer: (i, ci, rw) => this.loopCompanionProducer(i, ci, rw),
202
+ });
203
+ this.testerController = new TesterController({
204
+ blockRepository,
205
+ notificationService,
206
+ agentExecutor,
207
+ contextBuilder: this.contextBuilder,
208
+ resolveMergePreset: (ws, block) => this.resolveMergePreset(ws, block),
209
+ stopRunContainer: (ws, i) => this.stopRunContainer(ws, i),
210
+ persistInstance: (ws, i) => this.executionRepository.upsert(ws, i),
211
+ emitInstance: (ws, i) => this.emitInstance(ws, i),
212
+ });
213
+ this.reviewGate = new ReviewGateController({
214
+ blockRepository,
215
+ executionRepository,
216
+ workRunner,
217
+ resolveMergePreset: (ws, block) => this.resolveMergePreset(ws, block),
218
+ parkStepOnDecision: (ws, i, s, p) => this.parkStepOnDecision(ws, i, s, p),
219
+ advancePastResolvedGate: (ws, i, idx) => this.advancePastResolvedGate(ws, i, idx),
220
+ dispatchIterationCap: (ws, blockId, choice, handlers) => this.dispatchIterationCap(ws, blockId, choice, handlers),
221
+ raiseDecisionRequired: (ws, i) => this.raiseDecisionRequired(ws, i),
222
+ finishStep: (s) => this.finishStep(s),
223
+ startStep: (s) => this.startStep(s),
224
+ updateBlockProgress: (ws, i, st) => this.updateBlockProgress(ws, i, st),
225
+ finalizeBlock: (ws, i, c) => this.finalizeBlock(ws, i, c),
226
+ stopRunContainer: (ws, i) => this.stopRunContainer(ws, i),
227
+ persistInstance: (ws, i) => this.executionRepository.upsert(ws, i),
228
+ emitInstance: (ws, i) => this.emitInstance(ws, i),
229
+ });
230
+ this.requirementsKind = this.buildRequirementsKind();
231
+ this.clarityKind = this.buildClarityKind();
232
+ this.blueprintReconciler = blueprintReconciler;
233
+ this.notificationService = notificationService;
234
+ this.workspaceSettingsService = workspaceSettingsService;
235
+ this.llmObservability = llmObservability;
236
+ this.ciStatusProvider = ciStatusProvider;
237
+ this.mergeabilityProvider = mergeabilityProvider;
238
+ this.releaseHealthProvider = releaseHealthProvider;
239
+ this.incidentEnrichment = incidentEnrichment;
240
+ this.prMerger = pullRequestMerger;
241
+ this.mergePresetRepository = mergePresetRepository;
242
+ this.ticketTrackerProvider = ticketTrackerProvider;
243
+ this.subscriptionActivations = subscriptionActivationRepository;
244
+ this.resolveWorkspaceModelDefault = resolveWorkspaceModelDefault;
245
+ this.resolveProviderCapabilities = resolveProviderCapabilities;
246
+ this.localTestInfraSupported = localTestInfraSupported ?? true;
247
+ }
248
+ requireWorkspace(workspaceId) {
249
+ return requireWorkspace(this.workspaceRepository, workspaceId);
250
+ }
251
+ async requireBlock(workspaceId, id) {
252
+ return assertFound(await this.blockRepository.get(workspaceId, id), 'Block', id);
253
+ }
254
+ /**
255
+ * The individual-usage subscription vendors a run STARTED against `blockId` with
256
+ * `pipelineId` will lease a personal credential for — so the controller can gate the
257
+ * run on the initiator's personal subscription(s) up-front. Mirrors the dispatch-time
258
+ * model precedence (block pin → workspace per-kind default) across every step, AND the
259
+ * per-user dispatch decision: `hasPersonalSubscription(vendor)` reports whether the
260
+ * initiator has their own subscription for a vendor, so a dual-mode model (GLM) only
261
+ * gates a subscriber (a non-subscriber runs it on the Cloudflare base, ungated).
262
+ * Defaults to "no personal subscription" for system/unauthenticated callers.
263
+ */
264
+ async individualVendorsForBlock(workspaceId, blockId, pipelineId, hasPersonalSubscription = () => false) {
265
+ const block = await this.requireBlock(workspaceId, blockId);
266
+ const pipeline = await this.pipelineRepository.get(workspaceId, pipelineId);
267
+ return this.resolveIndividualVendors(workspaceId, block.modelId, pipeline?.agentKinds ?? [], hasPersonalSubscription);
268
+ }
269
+ /** The individual-usage vendors a failed run's resumed steps use (for the retry gate). */
270
+ async individualVendorsForRun(workspaceId, executionId, hasPersonalSubscription = () => false) {
271
+ const run = await this.executionRepository.get(workspaceId, executionId);
272
+ if (!run)
273
+ return [];
274
+ const block = await this.blockRepository.get(workspaceId, run.blockId);
275
+ if (!block)
276
+ return [];
277
+ return this.resolveIndividualVendors(workspaceId, block.modelId, run.steps.map((s) => s.agentKind), hasPersonalSubscription);
278
+ }
279
+ /**
280
+ * The set of individual-usage vendors the given steps resolve to, used to gate a run
281
+ * on the initiator's personal subscription(s) up-front. Delegates to the pure
282
+ * {@link resolveIndividualVendors}, which mirrors the dispatch-time precedence: a
283
+ * resolvable block pin decides the set alone (NONE for a non-subscription model), and
284
+ * only an unpinned run falls to the workspace per-kind defaults.
285
+ */
286
+ resolveIndividualVendors(workspaceId, blockModelId, agentKinds, hasPersonalSubscription) {
287
+ const resolveDefault = this.resolveWorkspaceModelDefault;
288
+ return resolveIndividualVendors(blockModelId, agentKinds, resolveDefault ? (kind) => resolveDefault(workspaceId, kind) : undefined, hasPersonalSubscription);
289
+ }
290
+ /**
291
+ * Guard a Tester pipeline's start: local-mode testing must have its infra
292
+ * configured on the service frame — either a docker-compose path to stand the
293
+ * dependencies up, or the explicit "no infra dependencies" flag. Ephemeral-mode
294
+ * testing uses the provisioned environment, so it needs neither. Throws a
295
+ * {@link ConflictError} (surfaced as an actionable message) when neither is set.
296
+ */
297
+ async assertTesterInfraConfigured(workspaceId, block) {
298
+ const environment = block.agentConfig?.['tester.environment'] === 'local' ? 'local' : 'ephemeral';
299
+ // The service's infra config is only needed for a `local` run; an ephemeral run is
300
+ // decided by the runtime capability + whether a provider is wired.
301
+ const service = environment === 'local'
302
+ ? await this.contextBuilder.resolveServiceConfig(workspaceId, block)
303
+ : undefined;
304
+ const decision = decideTesterInfra({
305
+ localTestInfraSupported: this.localTestInfraSupported,
306
+ environment,
307
+ noInfraDependencies: service?.noInfraDependencies === true,
308
+ hasComposePath: !!service?.testComposePath,
309
+ hasEnvironmentProvider: this.environmentProvisioning !== undefined,
310
+ });
311
+ if (decision.ok)
312
+ return;
313
+ throw new ConflictError(TESTER_INFRA_MESSAGES[decision.reason]);
314
+ }
315
+ /**
316
+ * Guard a pipeline's start on having a usable provider for every step's canonical
317
+ * model. The model a step runs is resolved by the same precedence the dispatch path
318
+ * uses (block pin → workspace per-kind default); each canonical id must have a usable
319
+ * provider given what's configured — a direct API key for its provider, a connected
320
+ * subscription vendor, or the opt-in Cloudflare lib enabled. Env-routing defaults (the
321
+ * last fallback, with no catalog id) are operator-level and not gated, matching the
322
+ * personal-credential gate. A throw aborts the start cleanly before any side effects.
323
+ * Skipped when no capability resolver is wired (tests / unconfigured facades).
324
+ */
325
+ async assertProvidersConfiguredForPipeline(workspaceId, block, pipeline, initiatedBy) {
326
+ if (!this.resolveProviderCapabilities)
327
+ return;
328
+ const caps = await this.resolveProviderCapabilities(workspaceId, initiatedBy);
329
+ const unconfigured = new Set();
330
+ const check = (id) => {
331
+ if (id && !isModelUsable(id, caps))
332
+ unconfigured.add(id);
333
+ };
334
+ if (block.modelId) {
335
+ // A block-level pin applies to every step.
336
+ check(block.modelId);
337
+ }
338
+ else if (this.resolveWorkspaceModelDefault) {
339
+ for (const kind of pipeline.agentKinds) {
340
+ check(await this.resolveWorkspaceModelDefault(workspaceId, kind));
341
+ }
342
+ }
343
+ if (unconfigured.size > 0) {
344
+ throw new ConflictError(`This pipeline uses models with no configured provider: ${[...unconfigured].join(', ')}. ` +
345
+ 'Add an API key for the provider, connect a subscription, or enable Cloudflare AI ' +
346
+ 'before starting.');
347
+ }
348
+ }
349
+ /** Start a pipeline against a block, replacing any prior run on it. */
350
+ async start(workspaceId, blockId, pipelineId,
351
+ /**
352
+ * Internal user id of the initiator. Recorded on the run so an individual-usage
353
+ * model (Claude) uses this user's OWN personal subscription. Absent for
354
+ * system-initiated runs (recurring schedules) and auth-disabled dev.
355
+ */
356
+ initiatedBy,
357
+ /**
358
+ * Mint the per-run personal-credential activation for an individual-usage model.
359
+ * Invoked with the new run's id BEFORE it is persisted/dispatched, so the async
360
+ * steps can lease it; a throw (wrong/missing password) aborts the start cleanly
361
+ * with nothing persisted. The server layer supplies this (the personal store lives
362
+ * outside the domain Core); absent for non-individual runs.
363
+ */
364
+ activate) {
365
+ await this.requireWorkspace(workspaceId);
366
+ const block = await this.requireBlock(workspaceId, blockId);
367
+ const pipeline = assertFound(await this.pipelineRepository.get(workspaceId, pipelineId), 'Pipeline', pipelineId);
368
+ // Reject a structurally-invalid pipeline before any side effects — a misplaced
369
+ // companion or estimate-gating without a preceding task-estimator. The builder also
370
+ // rejects these at save, but a pipeline can become invalid out of band, so a run
371
+ // refuses to START as well (the same shared check).
372
+ validatePipelineShape(pipeline);
373
+ // A pipeline with a Tester that runs locally needs the service's test infra
374
+ // configured (a docker-compose path, or an explicit "no infra dependencies"
375
+ // flag). Block the start with a clear, actionable error otherwise — before any
376
+ // side effects (activation mint / prior-run teardown).
377
+ if (pipeline.agentKinds.includes(TESTER_AGENT_KIND)) {
378
+ await this.assertTesterInfraConfigured(workspaceId, block);
379
+ }
380
+ // Block the start when a step's canonical model has no usable provider (no direct
381
+ // key, no subscription, no Cloudflare) — before any side effects.
382
+ await this.assertProvidersConfiguredForPipeline(workspaceId, block, pipeline, initiatedBy);
383
+ // Enforce the workspace's per-service running-task limit (off by default) — a clear,
384
+ // actionable error before any side effects, so the human knows why the start was refused.
385
+ await this.assertWithinTaskLimit(workspaceId, block);
386
+ // Mint the activation next: if the credential can't be unlocked, fail before
387
+ // tearing down the block's prior run or creating a new one.
388
+ const executionId = this.idGenerator.next('exec');
389
+ await activate?.(executionId);
390
+ // Replacing the block's prior run: clear its per-run activation now (it never reaches
391
+ // the terminal cleanup in emitInstance when it's still running), so a replaced run's
392
+ // system-encrypted token copy doesn't linger to its TTL. Keyed by the OLD run id, so
393
+ // the activation just minted for the new run is untouched.
394
+ if (this.subscriptionActivations) {
395
+ const prior = await this.executionRepository.getByBlock(workspaceId, blockId);
396
+ if (prior && prior.id !== executionId) {
397
+ await this.subscriptionActivations.deleteByExecution(prior.id);
398
+ }
399
+ }
400
+ await this.executionRepository.deleteByBlock(workspaceId, blockId);
401
+ // Build the run only from the ENABLED steps. A step the pipeline marked
402
+ // `enabled[i] === false` is kept in the saved pipeline (so it can be toggled back
403
+ // on later) but skipped here entirely. Gates/thresholds are read by the kind's
404
+ // ORIGINAL index `i`, so they stay aligned to the kind even when earlier steps are
405
+ // skipped; the first SURVIVING step is the one that starts working.
406
+ const steps = pipeline.agentKinds
407
+ .map((kind, i) => ({ kind, i }))
408
+ .filter(({ i }) => pipeline.enabled?.[i] !== false)
409
+ .map(({ kind, i }, position) => {
410
+ const companionDef = companionFor(kind);
411
+ return {
412
+ agentKind: kind,
413
+ state: position === 0 ? 'working' : 'pending',
414
+ progress: 0,
415
+ decision: null,
416
+ // A gated step pauses for human approval once its proposal is ready (see
417
+ // recordStepResult). Copied from the pipeline definition at run start.
418
+ requiresApproval: pipeline.gates?.[i] ?? false,
419
+ approval: null,
420
+ // A consensus-enabled step runs through the multi-model mechanism (the consensus
421
+ // executor reads this off the context). Copied from the pipeline at run start.
422
+ ...(pipeline.consensus?.[i] ? { consensus: pipeline.consensus[i] } : {}),
423
+ // Estimate gating: when set+enabled the step is skipped at runtime unless the
424
+ // block estimate (written by an earlier task-estimator step) meets the threshold.
425
+ ...(pipeline.gating?.[i] ? { gating: pipeline.gating[i] } : {}),
426
+ // A companion step carries its quality bar + rework budget, seeded from the
427
+ // pipeline's per-step threshold (else the companion's default).
428
+ ...(companionDef
429
+ ? {
430
+ companion: {
431
+ threshold: pipeline.thresholds?.[i] ?? companionDef.defaultThreshold,
432
+ maxAttempts: DEFAULT_COMPANION_MAX_ATTEMPTS,
433
+ attempts: 0,
434
+ verdicts: [],
435
+ },
436
+ }
437
+ : {}),
438
+ };
439
+ });
440
+ if (steps.length === 0) {
441
+ throw new ValidationError('Pipeline has no enabled steps to run.');
442
+ }
443
+ const instance = {
444
+ id: executionId,
445
+ blockId,
446
+ pipelineId: pipeline.id,
447
+ pipelineName: pipeline.name,
448
+ steps,
449
+ currentStep: 0,
450
+ status: 'running',
451
+ initiatedBy: initiatedBy ?? null,
452
+ };
453
+ await this.executionRepository.upsert(workspaceId, instance);
454
+ await this.blockRepository.update(workspaceId, blockId, {
455
+ status: 'in_progress',
456
+ progress: 0,
457
+ executionId: instance.id,
458
+ });
459
+ // Hand the run off to the durable runner so it progresses server-side without
460
+ // a browser open. With the no-op runner (tests) this does nothing and the run
461
+ // is advanced directly via advanceInstance.
462
+ await this.workRunner.startRun(workspaceId, instance.id);
463
+ await this.emitInstance(workspaceId, instance);
464
+ return instance;
465
+ }
466
+ /**
467
+ * Enforce the workspace's per-service running-task limit before a task run starts.
468
+ * No-ops unless the settings module is wired, the block is a task, and a limit mode
469
+ * is active. Counts the tasks under the same service frame that already have a live
470
+ * run (running / blocked / paused) — bucketed by task type when the mode is
471
+ * `per_type`, else shared across all types — and throws a {@link ConflictError} (→ 409,
472
+ * shown as a toast) when the cap is reached. The starting block is excluded from the
473
+ * count (its prior run is about to be replaced).
474
+ */
475
+ async assertWithinTaskLimit(workspaceId, block) {
476
+ const settingsService = this.workspaceSettingsService;
477
+ if (!settingsService || block.level !== 'task')
478
+ return;
479
+ const settings = await settingsService.get(workspaceId);
480
+ if (settings.taskLimitMode === 'off')
481
+ return;
482
+ const all = await this.blockRepository.listByWorkspace(workspaceId);
483
+ const byId = new Map(all.map((b) => [b.id, b]));
484
+ // Walk up to the owning service frame.
485
+ let frame = block;
486
+ let guard = 0;
487
+ while (frame && frame.level !== 'frame' && guard++ < 1000) {
488
+ frame = frame.parentId ? byId.get(frame.parentId) : undefined;
489
+ }
490
+ if (!frame || frame.level !== 'frame')
491
+ return; // orphan task — nothing to scope a service limit to
492
+ const frameId = frame.id;
493
+ const underFrame = (b) => {
494
+ let cur = b;
495
+ let hops = 0;
496
+ while (cur && hops++ < 1000) {
497
+ if (cur.id === frameId)
498
+ return true;
499
+ cur = cur.parentId ? byId.get(cur.parentId) : undefined;
500
+ }
501
+ return false;
502
+ };
503
+ const executions = await this.executionRepository.listByWorkspace(workspaceId);
504
+ const liveBlockIds = new Set(executions
505
+ .filter((e) => e.status === 'running' || e.status === 'blocked' || e.status === 'paused')
506
+ .map((e) => e.blockId));
507
+ const siblingTasks = all.filter((b) => b.level === 'task' && b.id !== block.id && underFrame(b));
508
+ if (settings.taskLimitMode === 'shared') {
509
+ const limit = settings.taskLimitShared ?? 0;
510
+ const running = siblingTasks.filter((b) => liveBlockIds.has(b.id)).length;
511
+ if (running >= limit) {
512
+ throw new ConflictError(`"${frame.title}" is already running ${running} of ${limit} allowed task(s). ` +
513
+ `Wait for one to finish before starting another.`);
514
+ }
515
+ return;
516
+ }
517
+ // per_type: only the configured types are capped; an unconfigured type is unbounded.
518
+ const type = block.taskType ?? 'feature';
519
+ const perType = (settings.taskLimitPerType ?? {});
520
+ const limit = perType[type];
521
+ if (limit == null)
522
+ return;
523
+ const running = siblingTasks.filter((b) => liveBlockIds.has(b.id) && (b.taskType ?? 'feature') === type).length;
524
+ if (running >= limit) {
525
+ throw new ConflictError(`"${frame.title}" is already running ${running} of ${limit} allowed ${type} task(s). ` +
526
+ `Wait for one to finish before starting another ${type} task.`);
527
+ }
528
+ }
529
+ /**
530
+ * Advance a single run by exactly one step and report what happened. This is
531
+ * the durable driver's entry point: it reloads the run from storage (so it is
532
+ * safe under replay/retry), no-ops unless the run is actively running, and
533
+ * otherwise performs one agent step via the shared {@link stepInstance} logic.
534
+ */
535
+ async advanceInstance(workspaceId, executionId, options = {}) {
536
+ const instance = await this.executionRepository.get(workspaceId, executionId);
537
+ // A paused run is still drivable: the spend gate in stepInstance resumes it
538
+ // once the budget frees up (or re-pauses it otherwise).
539
+ if (!instance || (instance.status !== 'running' && instance.status !== 'paused')) {
540
+ return { kind: 'noop' };
541
+ }
542
+ const result = await this.stepInstance(workspaceId, instance, options);
543
+ // Whenever a run parks waiting for a human, make sure there is an open notification
544
+ // for it — runs no longer time out, so the (escalating) notification is the only
545
+ // signal a human is needed. Best-effort and non-clobbering (see the helper).
546
+ // Conversely, once the run advances past the decision (the human responded, or it
547
+ // auto-passed, or the run reached a terminal state) clear that waiting card so the
548
+ // escalation sweep can't later flip a settled decision red ("Overdue").
549
+ if (result.kind === 'awaiting_decision') {
550
+ await this.ensureWaitingNotification(workspaceId, instance);
551
+ }
552
+ else {
553
+ await this.clearWaitingNotification(workspaceId, instance);
554
+ }
555
+ return result;
556
+ }
557
+ /** Advance a single running instance by one step, persisting the result. */
558
+ async stepInstance(workspaceId, instance, options = {}) {
559
+ const step = instance.steps[instance.currentStep];
560
+ if (!step)
561
+ return { kind: 'noop' };
562
+ // Spend gate: don't incur monetary LLM cost once the budget is exhausted. Pause
563
+ // the run (so the frontend can flag it) and stop here. A previously-paused run
564
+ // that finds the budget has freed up resumes and proceeds. EXEMPTION: a step that
565
+ // runs on a flat-rate subscription (quota) model — Claude Code / Codex on a pooled
566
+ // token — incurs no metered monetary cost and never contributes to the budget, so
567
+ // it must not be held hostage by a budget other (metered) models exhausted.
568
+ if (await this.spend.isOverBudget()) {
569
+ if (!(await this.currentStepIsQuotaBased(workspaceId, instance, step))) {
570
+ if (instance.status !== 'paused') {
571
+ instance.status = 'paused';
572
+ await this.executionRepository.upsert(workspaceId, instance);
573
+ await this.emitInstance(workspaceId, instance);
574
+ }
575
+ return { kind: 'paused' };
576
+ }
577
+ }
578
+ if (instance.status === 'paused')
579
+ instance.status = 'running';
580
+ if (step.state === 'waiting_decision') {
581
+ // The requirements gate is re-entrant: when the human answers the findings and asks
582
+ // to incorporate, a `pendingIncorporation` marker is set on the parked step and the
583
+ // run is signalled to wake. Fall through so the gate re-evaluates — folding the
584
+ // answers and re-reviewing in the durable driver (the LLM work that used to block the
585
+ // HTTP request) — instead of immediately re-parking. Every other parked step (and a
586
+ // requirements gate with nothing pending) re-parks on its durable decision id.
587
+ const reentrantRequirements = (step.agentKind === REQUIREMENTS_REVIEW_AGENT_KIND ||
588
+ step.agentKind === CLARITY_REVIEW_AGENT_KIND) &&
589
+ !!step.pendingIncorporation;
590
+ if (!reentrantRequirements) {
591
+ // Parked on either an agent-raised decision or a human approval gate; both
592
+ // are addressed by the same durable event id.
593
+ const pendingId = step.decision?.id ?? step.approval?.id;
594
+ if (pendingId) {
595
+ instance.status = 'blocked';
596
+ await this.executionRepository.upsert(workspaceId, instance);
597
+ await this.emitInstance(workspaceId, instance);
598
+ return { kind: 'awaiting_decision', decisionId: pendingId };
599
+ }
600
+ }
601
+ }
602
+ this.startStep(step);
603
+ const block = await this.blockRepository.get(workspaceId, instance.blockId);
604
+ if (!block)
605
+ return { kind: 'noop' };
606
+ const isFinalStep = instance.currentStep === instance.steps.length - 1;
607
+ // Estimate gating: a step gated on the task estimate (today a conditional companion)
608
+ // is transparently SKIPPED when the estimate — written by an earlier task-estimator
609
+ // step in this same run — falls below the threshold. No agent is spun up; the step
610
+ // finishes as `skipped` and the run advances. Evaluated here (not at build time)
611
+ // because the estimate only exists once the estimator step has run.
612
+ if (step.gating?.enabled && !shouldRunGatedStep(block.estimate, step.gating)) {
613
+ return this.skipGatedStep(workspaceId, instance, step, isFinalStep);
614
+ }
615
+ // A `deployer` step provisions an ephemeral environment deterministically via
616
+ // the provider — no LLM, no token usage — when the integration is wired.
617
+ // Otherwise it falls through to the normal agent path.
618
+ if (this.environmentProvisioning && isDeployStep(step.agentKind)) {
619
+ const result = await this.runDeployer(workspaceId, instance, block, options);
620
+ return this.recordStepResult(workspaceId, instance, step, isFinalStep, result);
621
+ }
622
+ // A `tracker` step files a GitHub issue / Jira ticket from the preceding
623
+ // `analysis` output (the tech-debt pipeline) — no LLM of its own. It is a
624
+ // pass-through when no tracker provider is wired or none is configured for the
625
+ // workspace. See {@link runTracker}.
626
+ if (step.agentKind === TRACKER_AGENT_KIND) {
627
+ const result = await this.runTracker(workspaceId, instance, block);
628
+ return this.recordStepResult(workspaceId, instance, step, isFinalStep, result);
629
+ }
630
+ // A `requirements-review` step runs the inline reviewer and parks for the dedicated
631
+ // review window, driving the iterative answer → incorporate → re-review loop. NOT a
632
+ // container/prose agent. Pass-through when the reviewer isn't wired. The clarity gate
633
+ // shares the SAME flow (only the subject + persisted doc differ); both run through the
634
+ // {@link ReviewGateController}, parameterised by their {@link ReviewKind}.
635
+ if (step.agentKind === REQUIREMENTS_REVIEW_AGENT_KIND) {
636
+ return this.reviewGate.evaluate(this.requirementsKind, workspaceId, instance, step, block, isFinalStep);
637
+ }
638
+ // A `clarity-review` step triages the block's bug report (optionally enriched by an
639
+ // upstream `bug-investigator` step) and parks for the dedicated review window, driving
640
+ // the same iterative loop as the requirements gate. NOT a container/prose agent.
641
+ // Pass-through when the reviewer isn't wired.
642
+ if (step.agentKind === CLARITY_REVIEW_AGENT_KIND) {
643
+ return this.reviewGate.evaluate(this.clarityKind, workspaceId, instance, step, block, isFinalStep);
644
+ }
645
+ // A polling gate step (`ci` / `conflicts`) runs a programmatic precheck and only
646
+ // escalates to a helper container agent (`ci-fixer` / `conflict-resolver`) on a
647
+ // negative verdict — no LLM of its own. Pass-through when the gate's provider is
648
+ // not wired. One generic machine drives every gate; see {@link evaluateGate}.
649
+ const gate = this.gateFor(step.agentKind);
650
+ if (gate) {
651
+ return this.evaluateGate(workspaceId, instance, step, block, isFinalStep, gate);
652
+ }
653
+ // A companion step grades the nearest preceding producer of one of its target
654
+ // kinds, looping it back for automatic rework below the threshold (and failing
655
+ // the run once the budget is spent) before any human gate. See evaluateCompanion.
656
+ if (isCompanionKind(step.agentKind)) {
657
+ return this.companionController.evaluate(workspaceId, instance, step, block, isFinalStep, options);
658
+ }
659
+ // Async (container) steps don't block: dispatch the job and park. The durable
660
+ // driver polls `pollAgentJob` between sleeps so the run can span far longer
661
+ // than a single durable step's timeout, while each step stays short. A set
662
+ // `jobId` means a prior (possibly replayed) dispatch already started the job,
663
+ // so we re-attach instead of starting a duplicate.
664
+ const context = await this.contextBuilder.buildContext(workspaceId, instance, step, isFinalStep, block);
665
+ const executor = this.agentExecutor;
666
+ if (isAsyncAgentExecutor(executor) && executor.runsAsync(context)) {
667
+ if (!step.jobId) {
668
+ // The model is fixed the moment its ref resolves (block pin > workspace
669
+ // default > env routing) — long before the container is up — so name it on
670
+ // the very first "spinning up container" emit instead of waiting for the
671
+ // dispatch to return. startJob confirms the same value below.
672
+ const previewModel = await this.previewStepModel(context);
673
+ if (previewModel)
674
+ step.model = previewModel;
675
+ // Surface an explicit "spinning up container" phase for the cold-boot
676
+ // window: dispatch blocks until the per-run container is up and has
677
+ // accepted the job, so emitting before it lets the board show the boot
678
+ // instead of a blank "working" state.
679
+ step.startingContainer = true;
680
+ await this.executionRepository.upsert(workspaceId, instance);
681
+ await this.emitInstance(workspaceId, instance);
682
+ const handle = await executor.startJob(context);
683
+ step.jobId = handle.jobId;
684
+ // Record the model at dispatch — the poll site can't resolve it later.
685
+ if (handle.model)
686
+ step.model = handle.model;
687
+ // The dispatch returned, so the container is up and execution has begun.
688
+ step.startingContainer = false;
689
+ await this.executionRepository.upsert(workspaceId, instance);
690
+ await this.emitInstance(workspaceId, instance);
691
+ }
692
+ return { kind: 'awaiting_job', jobId: step.jobId, stepIndex: instance.currentStep };
693
+ }
694
+ // Inline path: the model is resolved before the (blocking) LLM call, so surface
695
+ // it now — the board names the model while the step is querying instead of only
696
+ // once the result lands. recordStepResult re-asserts it from the result.
697
+ const previewModel = await this.previewStepModel(context);
698
+ if (previewModel && previewModel !== step.model) {
699
+ step.model = previewModel;
700
+ await this.executionRepository.upsert(workspaceId, instance);
701
+ await this.emitInstance(workspaceId, instance);
702
+ }
703
+ const result = await this.runAgent(context, options);
704
+ return this.recordStepResult(workspaceId, instance, step, isFinalStep, result);
705
+ }
706
+ /**
707
+ * Preview the model a step will run (`provider:model`) ahead of the work, so the
708
+ * board can show it during the inline query / container cold-boot rather than only
709
+ * once the result or job handle lands. Best-effort: the executor may not implement
710
+ * a preview, and a resolution failure (e.g. an unwired container kind that fails at
711
+ * dispatch anyway) must never break the run — both yield undefined.
712
+ */
713
+ async previewStepModel(context) {
714
+ if (!this.agentExecutor.resolveModel)
715
+ return undefined;
716
+ try {
717
+ return await this.agentExecutor.resolveModel(context);
718
+ }
719
+ catch {
720
+ return undefined;
721
+ }
722
+ }
723
+ /**
724
+ * Whether the current step will run on a flat-rate subscription (quota) model, so
725
+ * the spend gate can let it proceed even when the monetary budget is exhausted.
726
+ * Resolved through the executor (the authority on the "subscriptions always win"
727
+ * routing) off a full step context. Best-effort and side-effect-free: an executor
728
+ * without the capability, a missing block, or any resolution error all report false
729
+ * (the step is treated as budget-metered, the prior behaviour). Only consulted on
730
+ * the over-budget path, so the extra context build never touches the happy path.
731
+ */
732
+ async currentStepIsQuotaBased(workspaceId, instance, step) {
733
+ if (!this.agentExecutor.isQuotaBased)
734
+ return false;
735
+ try {
736
+ const block = await this.blockRepository.get(workspaceId, instance.blockId);
737
+ if (!block)
738
+ return false;
739
+ const isFinalStep = instance.currentStep === instance.steps.length - 1;
740
+ const context = await this.contextBuilder.buildContext(workspaceId, instance, step, isFinalStep, block);
741
+ return await this.agentExecutor.isQuotaBased(context);
742
+ }
743
+ catch {
744
+ return false;
745
+ }
746
+ }
747
+ /**
748
+ * Poll the asynchronous job a parked step dispatched. Returns `awaiting_job`
749
+ * while it runs (the driver keeps polling), records the result and advances on
750
+ * success, or reports `job_failed` so the driver can fail the run. Reading run
751
+ * state from storage on every call keeps it safe under Workflows replay/retry:
752
+ * once a job's result is recorded the step's `jobId` is cleared, so a re-poll
753
+ * simply lets the driver advance the now-current step.
754
+ */
755
+ async pollAgentJob(workspaceId, executionId) {
756
+ const instance = await this.executionRepository.get(workspaceId, executionId);
757
+ if (!instance || (instance.status !== 'running' && instance.status !== 'paused')) {
758
+ return { kind: 'noop' };
759
+ }
760
+ const step = instance.steps[instance.currentStep];
761
+ if (!step)
762
+ return { kind: 'noop' };
763
+ // No job in flight: a prior poll already recorded it (and advanced). Let the
764
+ // driver loop and advance whatever step is now current.
765
+ if (!step.jobId)
766
+ return { kind: 'continue' };
767
+ const executor = this.agentExecutor;
768
+ if (!isAsyncAgentExecutor(executor))
769
+ return { kind: 'noop' };
770
+ // Re-supply the run id alongside the per-step job id so the executor can address
771
+ // the same per-run container at the poll site (it only stored the per-step jobId).
772
+ const update = await executor.pollJob({ jobId: step.jobId, runId: executionId, workspaceId });
773
+ if (update.state === 'running') {
774
+ // A successful poll proves the container is up, so the cold-boot phase is
775
+ // over (defensive: a replay may have left the flag set). Surface live subtask
776
+ // progress (e.g. 3/8 todos done) without advancing the step. Only persist +
777
+ // emit when something actually changed so an idle poll doesn't churn storage
778
+ // or the event stream.
779
+ let changed = false;
780
+ if (step.startingContainer) {
781
+ step.startingContainer = false;
782
+ changed = true;
783
+ }
784
+ if (update.subtasks && !sameSubtasks(step.subtasks, update.subtasks)) {
785
+ step.subtasks = update.subtasks;
786
+ step.progress =
787
+ update.subtasks.total > 0 ? update.subtasks.completed / update.subtasks.total : 0;
788
+ changed = true;
789
+ }
790
+ if (changed) {
791
+ await this.executionRepository.upsert(workspaceId, instance);
792
+ await this.emitInstance(workspaceId, instance);
793
+ }
794
+ return { kind: 'awaiting_job', jobId: step.jobId, stepIndex: instance.currentStep };
795
+ }
796
+ // The post-release-health gate's helper is the `on-call` agent, which INVESTIGATES
797
+ // (it makes no commits and doesn't change prod), so unlike ci-fixer/conflict-resolver
798
+ // its completion must NOT re-probe to green — re-probing would just regress again and
799
+ // burn the budget. When it finishes — OR fails — resolve it the same way: raise the
800
+ // `release_regression` notification (with the regressed signals stashed at escalation),
801
+ // enrich any open incident, then finish the gate step so the run completes (a human
802
+ // acts on the notification out-of-band). A FAILED investigation must NOT fall through
803
+ // to the generic gate path: that would re-probe → still regress → exhaust the budget,
804
+ // discarding the stashed signals and failing the run with a thinner notification.
805
+ if (step.agentKind === POST_RELEASE_HEALTH_AGENT_KIND &&
806
+ step.gate?.phase === 'working' &&
807
+ (update.state === 'done' || update.state === 'failed')) {
808
+ const block = await this.blockRepository.get(workspaceId, instance.blockId);
809
+ step.jobId = undefined;
810
+ step.subtasks = undefined;
811
+ if (!block)
812
+ return { kind: 'noop' };
813
+ const isFinalStep = instance.currentStep === instance.steps.length - 1;
814
+ const result = update.state === 'done'
815
+ ? update.result
816
+ : { output: `On-call investigation did not complete: ${update.error ?? 'unknown error'}` };
817
+ return this.resolveOnCallStep(workspaceId, instance, step, block, result, isFinalStep, update.state === 'failed');
818
+ }
819
+ // A polling gate step's in-flight job is its helper agent (ci-fixer /
820
+ // conflict-resolver), NOT the step's own work: when it finishes (or fails) we
821
+ // don't record a result or advance — we drop the handle, return the gate to
822
+ // `checking`, and re-run the precheck (the helper's push triggers a fresh CI run /
823
+ // updates mergeability). A helper that failed without pushing leaves the precheck
824
+ // negative, so the next check re-dispatches (until the attempt budget is spent).
825
+ if (this.gateFor(step.agentKind)) {
826
+ step.jobId = undefined;
827
+ step.subtasks = undefined;
828
+ if (step.gate)
829
+ step.gate.phase = 'checking';
830
+ await this.executionRepository.upsert(workspaceId, instance);
831
+ await this.emitInstance(workspaceId, instance);
832
+ return { kind: 'awaiting_gate', stepIndex: instance.currentStep };
833
+ }
834
+ // A `tester` step in its `fixing` phase has a Fixer job in flight, NOT the
835
+ // step's own work: when it finishes (or fails) we drop the handle, return to
836
+ // `testing`, and re-dispatch the Tester against the (now-fixed) branch — its
837
+ // fresh report then drives greenlight-or-loop again. Mirrors the CI gate.
838
+ if (step.agentKind === TESTER_AGENT_KIND && step.test?.phase === 'fixing') {
839
+ step.jobId = undefined;
840
+ step.subtasks = undefined;
841
+ step.test.phase = 'testing';
842
+ const block = await this.blockRepository.get(workspaceId, instance.blockId);
843
+ if (!block)
844
+ return { kind: 'noop' };
845
+ // Reclaim the finished Fixer container before re-dispatching the Tester so it
846
+ // boots fresh against the just-pushed fixes (rather than re-attaching to the
847
+ // completed job by run id).
848
+ await this.stopRunContainer(workspaceId, instance);
849
+ return this.testerController.dispatchTester(workspaceId, instance, step, block);
850
+ }
851
+ if (update.state === 'failed') {
852
+ // A container eviction (the per-run container vanished, its in-memory job is
853
+ // gone) is usually transient. Recover it by dropping the dead handle and
854
+ // returning `continue`: the driver loops back into `advanceInstance`, which
855
+ // re-dispatches the SAME step to a fresh container (a new instance boots under
856
+ // the same id). Two flavours, with separate budgets:
857
+ // - one the runtime facade flagged as transient infra churn (e.g. a deploy
858
+ // draining the sandbox) is not a sick run, and can recur several times in a
859
+ // short window, so it gets the larger MAX_TRANSIENT_EVICTION_RECOVERIES
860
+ // budget (recoveries are naturally spaced by the job poll interval, riding
861
+ // out the window);
862
+ // - any other eviction (crash/OOM) gets the tight MAX_EVICTION_RECOVERIES.
863
+ // Once a budget is spent the eviction is treated as deterministic and fails the
864
+ // run as `evicted`. A genuine agent/job failure is never recovered.
865
+ if (isContainerEvictionError(update.error)) {
866
+ const transient = isTransientEviction(update.error);
867
+ const limit = transient ? MAX_TRANSIENT_EVICTION_RECOVERIES : MAX_EVICTION_RECOVERIES;
868
+ const recoveries = transient
869
+ ? (step.transientEvictionRecoveries ?? 0)
870
+ : (step.evictionRecoveries ?? 0);
871
+ if (recoveries < limit) {
872
+ if (transient)
873
+ step.transientEvictionRecoveries = recoveries + 1;
874
+ else
875
+ step.evictionRecoveries = recoveries + 1;
876
+ step.jobId = undefined;
877
+ step.subtasks = undefined;
878
+ step.progress = 0;
879
+ await this.executionRepository.upsert(workspaceId, instance);
880
+ await this.emitInstance(workspaceId, instance);
881
+ return { kind: 'continue' };
882
+ }
883
+ return {
884
+ kind: 'job_evicted',
885
+ error: transient
886
+ ? `${update.error} (still evicting after ${recoveries} automatic restarts through the infrastructure churn — treating as deterministic)`
887
+ : `${update.error ?? 'Container evicted'} (still evicting after ${recoveries} automatic container restart${recoveries === 1 ? '' : 's'} — treating as deterministic)`,
888
+ };
889
+ }
890
+ return { kind: 'job_failed', error: update.error };
891
+ }
892
+ const block = await this.blockRepository.get(workspaceId, instance.blockId);
893
+ if (!block)
894
+ return { kind: 'noop' };
895
+ const isFinalStep = instance.currentStep === instance.steps.length - 1;
896
+ // Clear the handle before recording so a replay re-attaches to nothing.
897
+ step.jobId = undefined;
898
+ return this.recordStepResult(workspaceId, instance, step, isFinalStep, update.result);
899
+ }
900
+ /**
901
+ * Re-run a polling gate step's precheck from the durable driver's `awaiting_gate`
902
+ * loop: which gate (ci / conflicts) is resolved from the current step's `agentKind`,
903
+ * and it returns the same outcomes as the initial evaluation (precheck passes →
904
+ * advance, still computing → keep polling, fails → dispatch a helper or give up).
905
+ * Safe under replay: reads run state fresh each call. A no-op unless the current
906
+ * step is a gate actively in its `checking` phase.
907
+ */
908
+ async pollGate(workspaceId, executionId) {
909
+ const instance = await this.executionRepository.get(workspaceId, executionId);
910
+ if (!instance || (instance.status !== 'running' && instance.status !== 'paused')) {
911
+ return { kind: 'noop' };
912
+ }
913
+ const step = instance.steps[instance.currentStep];
914
+ const gate = step ? this.gateFor(step.agentKind) : undefined;
915
+ if (!step || !gate)
916
+ return { kind: 'continue' };
917
+ // A helper job is in flight — the driver should be polling it, not the gate; let
918
+ // the job-poll loop drive (defensive; a replay could route here).
919
+ if (step.jobId)
920
+ return { kind: 'awaiting_job', jobId: step.jobId, stepIndex: instance.currentStep };
921
+ const block = await this.blockRepository.get(workspaceId, instance.blockId);
922
+ if (!block)
923
+ return { kind: 'noop' };
924
+ const isFinalStep = instance.currentStep === instance.steps.length - 1;
925
+ return this.evaluateGate(workspaceId, instance, step, block, isFinalStep, gate);
926
+ }
927
+ /**
928
+ * Decide what happens when the durable driver's GATE poll budget (ciMaxPolls ×
929
+ * ciPollInterval) is spent while a gate is still `pending` — called by both runtime
930
+ * drivers (Cloudflare ExecutionWorkflow / Node `driveExecution`) instead of failing
931
+ * the run directly, so the per-gate policy lives in one place. Most gates `fail`
932
+ * (CI never went green / the PR never became mergeable). A time-windowed watch gate
933
+ * (post-release-health, `pollExhaustion: 'pass'`) instead PASSES: the watch window
934
+ * simply outlasted the poll budget with no regression observed, which is healthy — not
935
+ * a timeout. Returns the result the driver should act on (it never re-fails for a fail
936
+ * gate; it returns a `job_failed` the driver funnels through its single `failRun`).
937
+ */
938
+ async resolveGatePollExhaustion(workspaceId, executionId) {
939
+ const instance = await this.executionRepository.get(workspaceId, executionId);
940
+ if (!instance || (instance.status !== 'running' && instance.status !== 'paused')) {
941
+ return { kind: 'noop' };
942
+ }
943
+ const step = instance.steps[instance.currentStep];
944
+ const gate = step ? this.gateFor(step.agentKind) : undefined;
945
+ const timeoutError = 'Gate precheck did not settle within its polling budget';
946
+ if (!step || !gate || gate.pollExhaustion !== 'pass') {
947
+ return { kind: 'job_failed', error: timeoutError, failureKind: 'timeout' };
948
+ }
949
+ // A time-windowed watch gate (post-release-health) may be configured to watch LONGER
950
+ // than the driver's single gate-poll budget (ciMaxPolls × ciPollInterval). Running out
951
+ // of polls before the window has actually elapsed is NOT a healthy pass — the release
952
+ // could still regress later in the window. Re-arm another poll cycle (the driver loops
953
+ // back into the gate-poll loop on `awaiting_gate`) so the full configured window is
954
+ // honoured rather than silently truncated to the poll budget.
955
+ const watchSince = step.gate?.watchSince;
956
+ const windowMinutes = step.gate?.watchWindowMinutes;
957
+ if (watchSince != null && windowMinutes != null) {
958
+ const windowElapsed = this.clock.now() - watchSince >= windowMinutes * 60_000;
959
+ if (!windowElapsed) {
960
+ if (step.gate)
961
+ step.gate.phase = 'checking';
962
+ await this.executionRepository.upsert(workspaceId, instance);
963
+ return { kind: 'awaiting_gate', stepIndex: instance.currentStep };
964
+ }
965
+ }
966
+ // Window genuinely elapsed (or a non-windowed pass gate): finish as a healthy pass.
967
+ const isFinalStep = instance.currentStep === instance.steps.length - 1;
968
+ return this.recordStepResult(workspaceId, instance, step, isFinalStep, {
969
+ output: `${gate.kind} gate passed: watch window elapsed with no regression observed.`,
970
+ });
971
+ }
972
+ /**
973
+ * Transition a step into `working`, stamping its start time the first time it
974
+ * actually begins. Set-once so a Workflows replay (which re-runs `advance`)
975
+ * preserves the original start rather than resetting it on every replay. An
976
+ * explicit re-run clears `startedAt` first (see {@link requestStepChanges}) so
977
+ * the fresh attempt is timed from scratch.
978
+ */
979
+ startStep(step) {
980
+ step.state = 'working';
981
+ if (step.startedAt == null)
982
+ step.startedAt = this.clock.now();
983
+ // (Re)entering `working` means the step is no longer parked on a human: resume
984
+ // its duration clock (see {@link pauseStepForInput}).
985
+ step.pausedAt = null;
986
+ }
987
+ /**
988
+ * Transition a step into `done`, stamping its finish time once. Set-once so the
989
+ * approval-gate flow (which re-asserts `done` after a human approves, long after
990
+ * the agent actually finished) keeps the agent's true completion time, and so a
991
+ * replay doesn't move it. With {@link startStep}'s `startedAt` this yields the
992
+ * step's execution duration. A step finished directly out of a parked approval
993
+ * stopped *working* when it parked, so its duration is billed to the pause instant
994
+ * ({@link pauseStepForInput}), not the (later) moment the human decided.
995
+ */
996
+ finishStep(step) {
997
+ step.state = 'done';
998
+ if (step.finishedAt == null)
999
+ step.finishedAt = step.pausedAt ?? this.clock.now();
1000
+ step.pausedAt = null;
1001
+ }
1002
+ /**
1003
+ * Finish a gated step that was skipped (its estimate gate was not satisfied) and either
1004
+ * complete the run or advance to the next step — the deterministic finish/advance tail
1005
+ * of {@link recordStepResult}, minus all the agent-result handling (no LLM ran, so there
1006
+ * is no usage / decision / PR / artifact / approval / resolver to process). The step is
1007
+ * marked `skipped` with empty output so the UI renders "skipped (gated)".
1008
+ */
1009
+ async skipGatedStep(workspaceId, instance, step, isFinalStep) {
1010
+ step.skipped = true;
1011
+ step.output = '';
1012
+ step.progress = 1;
1013
+ step.subtasks = undefined;
1014
+ this.finishStep(step);
1015
+ if (isFinalStep) {
1016
+ instance.status = 'done';
1017
+ await this.finalizeBlock(workspaceId, instance, undefined);
1018
+ await this.executionRepository.upsert(workspaceId, instance);
1019
+ await this.emitInstance(workspaceId, instance);
1020
+ await this.stopRunContainer(workspaceId, instance);
1021
+ return { kind: 'done' };
1022
+ }
1023
+ instance.currentStep += 1;
1024
+ const next = instance.steps[instance.currentStep];
1025
+ if (next)
1026
+ this.startStep(next);
1027
+ await this.updateBlockProgress(workspaceId, instance, 'in_progress');
1028
+ await this.executionRepository.upsert(workspaceId, instance);
1029
+ await this.emitInstance(workspaceId, instance);
1030
+ return { kind: 'continue' };
1031
+ }
1032
+ /**
1033
+ * Park a step on a human decision and freeze its duration clock. Records when the
1034
+ * step stopped working (`pausedAt`) so elapsed time no longer accrues while it waits
1035
+ * for input — the symmetric counterpart of the terminal freeze on `finishedAt`.
1036
+ * Set-once (a Workflows replay re-parking keeps the original instant); cleared when
1037
+ * the step resumes ({@link startStep}) or finishes ({@link finishStep}).
1038
+ */
1039
+ pauseStepForInput(step) {
1040
+ step.state = 'waiting_decision';
1041
+ if (step.pausedAt == null)
1042
+ step.pausedAt = this.clock.now();
1043
+ }
1044
+ /**
1045
+ * Record a completed agent step's result and report what the driver should do
1046
+ * next: meter token usage, park on a raised decision, or persist the output
1047
+ * (and any opened PR) and either finish the run or advance to the next step.
1048
+ * Shared by the inline path and the async-job poll path.
1049
+ */
1050
+ async recordStepResult(workspaceId, instance, step, isFinalStep, result) {
1051
+ // Meter the LLM call against the spend budget. Recorded whether the step
1052
+ // completed or raised a decision — both consumed tokens.
1053
+ if (result.usage) {
1054
+ await this.spend.record({
1055
+ workspaceId,
1056
+ executionId: instance.id,
1057
+ agentKind: step.agentKind,
1058
+ model: result.model ?? 'unknown',
1059
+ usage: result.usage,
1060
+ });
1061
+ }
1062
+ // The agent asked for a human decision and this step hasn't resolved one yet.
1063
+ if (result.decision && !step.decision?.chosen) {
1064
+ step.decision = {
1065
+ id: this.idGenerator.next('dec'),
1066
+ question: result.decision.question,
1067
+ options: [...result.decision.options],
1068
+ chosen: null,
1069
+ };
1070
+ this.pauseStepForInput(step);
1071
+ instance.status = 'blocked';
1072
+ await this.updateBlockProgress(workspaceId, instance, 'blocked');
1073
+ await this.executionRepository.upsert(workspaceId, instance);
1074
+ await this.emitInstance(workspaceId, instance);
1075
+ return { kind: 'awaiting_decision', decisionId: step.decision.id };
1076
+ }
1077
+ // A `tester` step returned a structured report. On a withheld greenlight we do
1078
+ // NOT finish the step: we loop the `fixer` (within the attempt budget) and
1079
+ // re-test, mirroring the CI gate. A greenlight (or no provider) falls through to
1080
+ // the normal finish/advance below. Records the report on the step either way.
1081
+ if (step.agentKind === TESTER_AGENT_KIND && result.testReport !== undefined) {
1082
+ const looped = await this.testerController.resolveTesterResult(workspaceId, instance, step, result);
1083
+ if (looped)
1084
+ return looped;
1085
+ }
1086
+ // The step completed.
1087
+ step.output = result.output ?? '';
1088
+ if (result.model)
1089
+ step.model = result.model;
1090
+ step.progress = 1;
1091
+ this.finishStep(step);
1092
+ // Live subtask counts only describe an in-flight run; drop them now the step
1093
+ // is done so the board doesn't show a stale "3/8" against a finished step.
1094
+ step.subtasks = undefined;
1095
+ // A companion-driven rework was just consumed by this re-run; clear it so a later
1096
+ // unrelated re-run doesn't re-apply stale feedback (the companion sets fresh
1097
+ // feedback if it still rejects the new output).
1098
+ step.rework = undefined;
1099
+ // A repo-operating step (the container "implementer" agent) opened a PR for
1100
+ // its work. Record it on the block so the board can surface and link to it,
1101
+ // regardless of whether this is the final step.
1102
+ if (result.pullRequest) {
1103
+ await this.blockRepository.update(workspaceId, instance.blockId, {
1104
+ pullRequest: result.pullRequest,
1105
+ });
1106
+ }
1107
+ // A Blueprinter step produced a fresh service decomposition. Validate it with
1108
+ // the authoritative schema (a bad payload must never touch the board), then
1109
+ // reconcile it in place onto the run's service frame.
1110
+ if (result.blueprintService !== undefined) {
1111
+ await this.ingestBlueprint(workspaceId, instance.blockId, result.blueprintService);
1112
+ }
1113
+ // A spec-writer step produced the service's unified specification (`spec.json`)
1114
+ // and committed it to the implementation branch. Strict-validate it (a bad payload
1115
+ // must never be trusted), then nudge clients to refresh.
1116
+ if (result.spec !== undefined) {
1117
+ await this.ingestSpec(workspaceId, result.spec);
1118
+ }
1119
+ // A `task-estimator` step emits a JSON triage (complexity/risk/impact). Parse it
1120
+ // tolerantly, persist it on the block (used to gate consensus steps + surfaced in
1121
+ // the UI), and replace the raw JSON output with a readable summary. An unparseable
1122
+ // estimate leaves the block untouched and keeps the raw output (no run failure).
1123
+ // The estimate works the same whether the single-actor estimator or the consensus
1124
+ // ranked-scoring variant produced the JSON — both land here.
1125
+ if (step.agentKind === TASK_ESTIMATOR_AGENT_KIND) {
1126
+ const estimate = coerceTaskEstimate(step.output, result.model ?? step.model ?? null, this.clock.now());
1127
+ if (estimate) {
1128
+ await this.blockRepository.update(workspaceId, instance.blockId, { estimate });
1129
+ step.output = summarizeEstimate(estimate);
1130
+ }
1131
+ }
1132
+ // A producer that emits a STRUCTURED ARTIFACT (the spec doc, the blueprint tree, …)
1133
+ // returns its raw Pi transcript summary as `result.output` — useless for review.
1134
+ // Replace the step's reviewable output with a rendering of the artifact ITSELF, so
1135
+ // its companion grades the PRODUCT (and the SPA reader + downstream steps see it),
1136
+ // not the agent's chatter. Grading the transcript is what made the spec-companion
1137
+ // declare every pass "unreviewable" and loop the producer to its rework cap on every
1138
+ // spec task — a trap for ANY artifact-producing agent with a companion, now and
1139
+ // future, which is why this is keyed off the artifact, not a specific agentKind.
1140
+ const reviewable = reviewableArtifactOutput(result);
1141
+ if (reviewable !== undefined)
1142
+ step.output = reviewable;
1143
+ // Human approval gate: a step the pipeline marked `requiresApproval` pauses
1144
+ // here once its proposal is ready, so a human can review (and edit) it before
1145
+ // the next step runs. We reuse the durable decision wait — returning
1146
+ // `awaiting_decision` keyed by the approval id parks the run on the same named
1147
+ // event the workflow already listens for; `approveStep` / `requestStepChanges`
1148
+ // wake it. Never gates the final step (nothing downstream to feed) and is
1149
+ // idempotent: an already-approved step falls through to advance/finish.
1150
+ if (step.requiresApproval && !isFinalStep && step.approval?.status !== 'approved') {
1151
+ step.approval = {
1152
+ id: this.idGenerator.next('appr'),
1153
+ status: 'pending',
1154
+ proposal: step.output,
1155
+ };
1156
+ this.pauseStepForInput(step);
1157
+ instance.status = 'blocked';
1158
+ await this.updateBlockProgress(workspaceId, instance, 'blocked');
1159
+ await this.executionRepository.upsert(workspaceId, instance);
1160
+ await this.emitInstance(workspaceId, instance);
1161
+ return { kind: 'awaiting_decision', decisionId: step.approval.id };
1162
+ }
1163
+ // Persist the agent's reported confidence whenever a step reports it, for board
1164
+ // transparency. Position-independent: it must NOT be tied to the final step, since a
1165
+ // confidence-reporting producer (e.g. the merger) may now be followed by a gate.
1166
+ if (result.confidence !== undefined) {
1167
+ await this.blockRepository.update(workspaceId, instance.blockId, {
1168
+ confidence: result.confidence,
1169
+ });
1170
+ }
1171
+ // Run any DETERMINISTIC post-completion logic registered for this agent kind (e.g.
1172
+ // the merger performs the real GitHub merge with backend-held credentials). This is
1173
+ // POSITION-INDEPENDENT — it fires whenever the step finishes, not only when it's last
1174
+ // — so inserting a later step (post-release-health) can't silently disable it. A
1175
+ // resolver that owns the block's terminal status (the merger sets `done`/`pr_ready`)
1176
+ // tells `finalizeBlock` to leave it alone.
1177
+ const resolver = this.stepResolverFor(step.agentKind);
1178
+ let resolverOwnsTerminalStatus = false;
1179
+ if (resolver && (resolver.applies?.(result) ?? true)) {
1180
+ const resolution = await resolver.resolve({
1181
+ workspaceId,
1182
+ instance,
1183
+ step,
1184
+ result,
1185
+ isFinalStep,
1186
+ });
1187
+ if (resolution?.output !== undefined)
1188
+ step.output = resolution.output;
1189
+ if (resolution?.ownsTerminalStatus)
1190
+ resolverOwnsTerminalStatus = true;
1191
+ }
1192
+ if (isFinalStep) {
1193
+ instance.status = 'done';
1194
+ // Merge resolution (and confidence persistence) already happened above,
1195
+ // POSITION-INDEPENDENTLY: confidence at the top of recordStepResult and the merger's
1196
+ // real merge via the step-completion resolver registry (so a trailing
1197
+ // post-release-health gate doesn't disable auto-merge). Nothing merge-specific here.
1198
+ await this.finalizeBlock(workspaceId, instance, result.confidence);
1199
+ await this.executionRepository.upsert(workspaceId, instance);
1200
+ await this.emitInstance(workspaceId, instance);
1201
+ // The run is finished: reclaim its per-run container now instead of letting it
1202
+ // idle out its sleepAfter window (~10 min of billed-but-useless compute). All
1203
+ // pipeline steps share the one container keyed by the execution id, so this is
1204
+ // only safe on the FINAL step — never between steps. Best-effort/idempotent.
1205
+ await this.stopRunContainer(workspaceId, instance);
1206
+ return { kind: 'done' };
1207
+ }
1208
+ instance.currentStep += 1;
1209
+ const next = instance.steps[instance.currentStep];
1210
+ if (next)
1211
+ this.startStep(next);
1212
+ // A resolver that already set the block's TERMINAL status (the merger flips it to
1213
+ // `done`/`pr_ready` mid-pipeline) must not be clobbered back to `in_progress` as we
1214
+ // advance to a trailing step — refresh progress only, preserving that status. (The
1215
+ // final step's `finalizeBlock` then leaves a `done` block alone.)
1216
+ if (resolverOwnsTerminalStatus) {
1217
+ await this.refreshBlockProgress(workspaceId, instance);
1218
+ }
1219
+ else {
1220
+ await this.updateBlockProgress(workspaceId, instance, 'in_progress');
1221
+ }
1222
+ await this.executionRepository.upsert(workspaceId, instance);
1223
+ await this.emitInstance(workspaceId, instance);
1224
+ return { kind: 'continue' };
1225
+ }
1226
+ /**
1227
+ * Reset a step so the durable driver re-runs it from scratch: clear its live
1228
+ * container job handle (so it dispatches FRESH work rather than re-attaching to a
1229
+ * finished or evicted job), its timings, approval gate, live subtasks and last
1230
+ * output, and drop it back to `pending`. Preserves the step's identity
1231
+ * (`agentKind` / `requiresApproval`) and any companion budget/verdict history.
1232
+ */
1233
+ resetStepForRerun(step) {
1234
+ step.state = 'pending';
1235
+ step.startedAt = null;
1236
+ step.finishedAt = null;
1237
+ step.pausedAt = null;
1238
+ step.jobId = undefined;
1239
+ step.approval = null;
1240
+ step.subtasks = undefined;
1241
+ step.progress = 0;
1242
+ step.output = undefined;
1243
+ step.rework = undefined;
1244
+ }
1245
+ /**
1246
+ * Loop a producer step back for rework and re-run every step from it up to and
1247
+ * including the companion at `companionIndex`: each one is reset (crucially clearing
1248
+ * stale container job handles so an intermediate container step re-dispatches fresh
1249
+ * work instead of re-attaching to its evicted job), the producer is handed the
1250
+ * `rework` feedback + started, and the instance cursor is moved back to the producer.
1251
+ * Shared by the automatic companion loop and the human "request changes" path.
1252
+ */
1253
+ rerunProducerThrough(instance, producerIndex, companionIndex, rework) {
1254
+ for (let i = producerIndex; i <= companionIndex; i++) {
1255
+ this.resetStepForRerun(instance.steps[i]);
1256
+ }
1257
+ const producer = instance.steps[producerIndex];
1258
+ producer.rework = rework;
1259
+ this.startStep(producer);
1260
+ instance.currentStep = producerIndex;
1261
+ }
1262
+ /**
1263
+ * The index of the nearest preceding step a companion grades (one of its target
1264
+ * producer kinds), or -1 when none precedes it. The single producer-search used by the
1265
+ * automatic companion loop, the human "request changes" redirect, and the iteration-cap
1266
+ * extra-round resolution.
1267
+ */
1268
+ companionProducerIndex(instance, companionIndex) {
1269
+ const targets = companionTargets(instance.steps[companionIndex].agentKind);
1270
+ for (let i = companionIndex - 1; i >= 0; i--) {
1271
+ if (targets.includes(instance.steps[i].agentKind))
1272
+ return i;
1273
+ }
1274
+ return -1;
1275
+ }
1276
+ /**
1277
+ * Loop a companion's producer back for one more automatic rework cycle: charge one
1278
+ * attempt against the budget, then re-run the producer (and any intermediate steps) up
1279
+ * to and including the companion so it re-grades. Shared by the automatic
1280
+ * below-threshold loop ({@link evaluateCompanion}) and the human-granted extra round
1281
+ * ({@link resolveCompanionExceeded}), so both consume the budget identically.
1282
+ */
1283
+ loopCompanionProducer(instance, companionIndex, rework) {
1284
+ const companionStep = instance.steps[companionIndex];
1285
+ const producerIndex = this.companionProducerIndex(instance, companionIndex);
1286
+ companionStep.companion.attempts += 1;
1287
+ this.rerunProducerThrough(instance, producerIndex, companionIndex, rework);
1288
+ if (instance.status === 'blocked')
1289
+ instance.status = 'running';
1290
+ }
1291
+ /**
1292
+ * Deterministically provision an ephemeral environment for a deployer step.
1293
+ * Produces a human-readable summary as the step output and reports no token
1294
+ * usage (it incurs no LLM cost). Errors are swallowed into the output unless
1295
+ * the durable driver wants them surfaced for its per-step retry.
1296
+ */
1297
+ async runDeployer(workspaceId, instance, block, options = {}) {
1298
+ try {
1299
+ const handle = await this.environmentProvisioning.provision({
1300
+ workspaceId,
1301
+ blockId: block.id,
1302
+ executionId: instance.id,
1303
+ inputs: this.deployInputs(block),
1304
+ context: this.deployContext(block),
1305
+ });
1306
+ const lines = [
1307
+ `Provisioned ephemeral environment via '${handle.providerId}'.`,
1308
+ `Status: ${handle.status}`,
1309
+ `URL: ${handle.url ?? '(pending)'}`,
1310
+ ];
1311
+ if (handle.expiresAt)
1312
+ lines.push(`Expires: ${new Date(handle.expiresAt).toISOString()}`);
1313
+ return { output: lines.join('\n'), model: `environment:${handle.providerId}` };
1314
+ }
1315
+ catch (error) {
1316
+ if (options.rethrowAgentErrors)
1317
+ throw error;
1318
+ return {
1319
+ output: `Deployer error: ${getErrorMessage(error)}`,
1320
+ };
1321
+ }
1322
+ }
1323
+ /**
1324
+ * File a tracking issue/ticket for a `tracker` step from the preceding `analysis`
1325
+ * output. Non-LLM and best-effort: when no provider is wired or none is configured
1326
+ * for the workspace it simply notes the skip; a filing error is folded into the
1327
+ * step output rather than failing the run (the implementation still proceeds).
1328
+ */
1329
+ async runTracker(workspaceId, instance, block) {
1330
+ if (!this.ticketTrackerProvider) {
1331
+ return { output: 'No issue tracker configured; skipped ticket creation.' };
1332
+ }
1333
+ // The report to file is the closest preceding `analysis` output, falling back
1334
+ // to the block description when the pipeline has no analysis step.
1335
+ const analysis = instance.steps
1336
+ .slice(0, instance.currentStep)
1337
+ .filter((s) => s.agentKind === ANALYSIS_AGENT_KIND && s.output)
1338
+ .map((s) => s.output)
1339
+ .pop();
1340
+ const body = (analysis ?? block.description ?? '').trim() || 'Automated tech-debt remediation.';
1341
+ const frameId = (await this.contextBuilder.resolveServiceFrameId(workspaceId, block.id)) ?? block.id;
1342
+ try {
1343
+ const ticket = await this.ticketTrackerProvider.createTicket({
1344
+ workspaceId,
1345
+ frameId,
1346
+ title: `Tech debt: ${block.title}`,
1347
+ body,
1348
+ });
1349
+ if (!ticket) {
1350
+ return { output: 'No issue tracker configured; skipped ticket creation.' };
1351
+ }
1352
+ return { output: `Filed tracking ticket ${ticket.externalId}: ${ticket.url}` };
1353
+ }
1354
+ catch (error) {
1355
+ return { output: `Could not file a tracking ticket: ${getErrorMessage(error)}` };
1356
+ }
1357
+ }
1358
+ /**
1359
+ * The polling-gate registry, keyed by `agentKind`. A gate runs a programmatic
1360
+ * precheck against a provider and only escalates to a helper container agent on a
1361
+ * negative verdict. Built lazily (the closures capture `this`, so the providers /
1362
+ * merge preset / notification helpers resolve at call time). Returns undefined for a
1363
+ * non-gate kind. See {@link GateDefinition} and {@link evaluateGate}.
1364
+ */
1365
+ gateFor(agentKind) {
1366
+ if (!this.gateRegistryCache)
1367
+ this.gateRegistryCache = this.buildGateRegistry();
1368
+ return this.gateRegistryCache.get(agentKind);
1369
+ }
1370
+ /**
1371
+ * The post-completion resolver for an agent kind, or undefined when the kind has none.
1372
+ * A resolver runs DETERMINISTIC backend follow-up once the step's agent finishes — e.g.
1373
+ * the merger performs the real GitHub merge — independent of the step's position in the
1374
+ * pipeline. Built lazily (closures capture `this`). See {@link StepCompletionResolver}.
1375
+ */
1376
+ stepResolverFor(agentKind) {
1377
+ if (!this.stepResolverCache)
1378
+ this.stepResolverCache = this.buildStepResolverRegistry();
1379
+ return this.stepResolverCache.get(agentKind);
1380
+ }
1381
+ buildStepResolverRegistry() {
1382
+ const resolvers = [
1383
+ // The `merger` agent OWNS the merge decision, but the merge itself is mechanical
1384
+ // and uses backend-held GitHub credentials the sandboxed agent never sees — so the
1385
+ // engine performs it deterministically from the agent's assessment here, the moment
1386
+ // the merger step finishes (NOT only when it is the pipeline's last step, which is
1387
+ // why a trailing `post-release-health` step no longer disables auto-merge).
1388
+ {
1389
+ kind: MERGER_AGENT_KIND,
1390
+ applies: (result) => result.mergeAssessment !== undefined,
1391
+ resolve: async ({ workspaceId, instance, result }) => {
1392
+ await this.mergeResolver.resolveMergerStep(workspaceId, instance, result.mergeAssessment);
1393
+ return { ownsTerminalStatus: true };
1394
+ },
1395
+ },
1396
+ ];
1397
+ return new Map(resolvers.map((r) => [r.kind, r]));
1398
+ }
1399
+ buildGateRegistry() {
1400
+ const gates = [
1401
+ // CI gate: poll the PR head's check runs; escalate to a `ci-fixer` on red CI.
1402
+ {
1403
+ kind: CI_AGENT_KIND,
1404
+ helperKind: CI_FIXER_AGENT_KIND,
1405
+ wired: () => !!this.ciStatusProvider,
1406
+ unwiredOutput: 'CI gate skipped (no CI status provider configured).',
1407
+ probe: async (workspaceId, blockId) => {
1408
+ const report = await this.ciStatusProvider.getStatus(workspaceId, blockId);
1409
+ const verdict = aggregateCi(report.checks);
1410
+ if (isCiGreen(verdict)) {
1411
+ return {
1412
+ status: 'pass',
1413
+ headSha: report.headSha,
1414
+ passOutput: verdict === 'none'
1415
+ ? 'CI gate passed: no checks configured for the PR head.'
1416
+ : `CI gate passed: ${report.checks.length} check(s) green.`,
1417
+ };
1418
+ }
1419
+ if (verdict === 'pending')
1420
+ return { status: 'pending', headSha: report.headSha };
1421
+ return {
1422
+ status: 'fail',
1423
+ headSha: report.headSha,
1424
+ failureSummary: describeFailingChecks(report.checks),
1425
+ failingChecks: listFailingChecks(report.checks),
1426
+ };
1427
+ },
1428
+ // Surface the failing-check summary to the fixer as resolved context.
1429
+ helperPriorOutput: (summary) => ({ agentKind: CI_AGENT_KIND, output: summary }),
1430
+ onExhausted: async ({ workspaceId, instance, block, step, summary }) => {
1431
+ const attempts = step.gate?.attempts ?? 0;
1432
+ await this.raiseCiFailed(workspaceId, instance, block, summary ?? '', attempts);
1433
+ return {
1434
+ error: `CI did not pass after ${attempts} CI-fixer attempt(s). ${summary ?? ''}`.trim(),
1435
+ };
1436
+ },
1437
+ },
1438
+ // Conflicts gate: check PR mergeability; escalate to a `conflict-resolver` on conflict.
1439
+ {
1440
+ kind: CONFLICTS_AGENT_KIND,
1441
+ helperKind: CONFLICT_RESOLVER_AGENT_KIND,
1442
+ wired: () => !!this.mergeabilityProvider,
1443
+ unwiredOutput: 'Conflict gate skipped (no mergeability provider configured).',
1444
+ // Unlike CI (where each fixer round gets fresh red-check output to act on), a
1445
+ // conflict retry re-merges the SAME base and gets no new signal, so a large
1446
+ // budget just burns containers re-attempting the same conflict (observed in
1447
+ // prod: 10 attempts, head SHA never moved, run failed). Cap it low and fail
1448
+ // fast to a manual-resolution notification instead of churning to CI's default
1449
+ // of 10.
1450
+ attemptBudget: () => CONFLICT_RESOLVER_MAX_ATTEMPTS,
1451
+ probe: async (workspaceId, blockId) => {
1452
+ const report = await this.mergeabilityProvider.getMergeability(workspaceId, blockId);
1453
+ // No PR resolved, or it merges cleanly → nothing to do; advance.
1454
+ if (report.headSha === null || report.verdict === 'mergeable') {
1455
+ return {
1456
+ status: 'pass',
1457
+ headSha: report.headSha,
1458
+ passOutput: report.headSha === null
1459
+ ? 'Conflict gate passed: no open PR to gate.'
1460
+ : 'Conflict gate passed: the PR merges cleanly with its base.',
1461
+ };
1462
+ }
1463
+ // GitHub still computing mergeability → keep polling.
1464
+ if (report.verdict === 'unknown')
1465
+ return { status: 'pending', headSha: report.headSha };
1466
+ return { status: 'fail', headSha: report.headSha };
1467
+ },
1468
+ onExhausted: async ({ step }) => ({
1469
+ error: `The pull request still conflicts with its base after ` +
1470
+ `${step.gate?.attempts ?? 0} conflict-resolver attempt(s). Resolve the conflict ` +
1471
+ `manually, then retry the run.`,
1472
+ }),
1473
+ },
1474
+ // Post-release-health gate: after deploy, watch the release's Datadog monitors/SLOs
1475
+ // over a window; escalate to the `on-call` agent on a regression (it investigates,
1476
+ // it does NOT fix prod, so its completion is resolved specially — see
1477
+ // resolveOnCallStep — rather than re-probing to green).
1478
+ {
1479
+ kind: POST_RELEASE_HEALTH_AGENT_KIND,
1480
+ helperKind: ON_CALL_AGENT_KIND,
1481
+ wired: () => !!this.releaseHealthProvider,
1482
+ unwiredOutput: 'Post-release health gate skipped (no release-health provider configured).',
1483
+ attemptBudget: (preset) => preset.releaseMaxAttempts,
1484
+ // Running out of poll budget while still watching means the window outlasted the
1485
+ // driver's budget with NO regression observed — a healthy pass, not a timeout.
1486
+ pollExhaustion: 'pass',
1487
+ probe: async (workspaceId, blockId, gateState) => {
1488
+ // Only watch a release that actually SHIPPED. The merger sets the block `done`
1489
+ // when it merges for real, but leaves it `pr_ready` when it raises a review
1490
+ // (assessment outside thresholds) without merging — and a no-merger pipeline
1491
+ // also never auto-merges. There is nothing deployed to watch in those cases, so
1492
+ // pass through immediately instead of polling Datadog (and possibly escalating
1493
+ // an on-call investigation) for a change that was never released.
1494
+ const block = await this.blockRepository.get(workspaceId, blockId);
1495
+ if (!block || block.status !== 'done') {
1496
+ return {
1497
+ status: 'pass',
1498
+ headSha: null,
1499
+ passOutput: 'Post-release health gate skipped: the PR was not merged (nothing deployed to watch).',
1500
+ };
1501
+ }
1502
+ const since = gateState.watchSince ?? this.clock.now();
1503
+ const report = await this.releaseHealthProvider.probe(workspaceId, blockId, since);
1504
+ // No signals configured for this block → nothing to watch; advance immediately
1505
+ // (don't park for the whole window on an unmapped release).
1506
+ if (report.signals.length === 0) {
1507
+ return {
1508
+ status: 'pass',
1509
+ headSha: null,
1510
+ passOutput: 'Post-release health gate passed: no monitors/SLOs configured for this release.',
1511
+ };
1512
+ }
1513
+ // The watch window is resolved ONCE on first entry and stashed on the gate
1514
+ // state (see evaluateGate), so the probe doesn't re-load the block + re-resolve
1515
+ // the merge preset on every poll over the window.
1516
+ const windowMinutes = gateState.watchWindowMinutes ?? DEFAULT_MERGE_PRESET.releaseWatchWindowMinutes;
1517
+ const windowElapsed = this.clock.now() - since >= windowMinutes * 60_000;
1518
+ const verdict = classifyReleaseHealth({ report, windowElapsed });
1519
+ if (verdict === 'pass') {
1520
+ return {
1521
+ status: 'pass',
1522
+ headSha: null,
1523
+ passOutput: `Post-release health gate passed: ${report.signals.length} signal(s) healthy through the watch window.`,
1524
+ };
1525
+ }
1526
+ if (verdict === 'pending')
1527
+ return { status: 'pending', headSha: null };
1528
+ return {
1529
+ status: 'fail',
1530
+ headSha: null,
1531
+ failureSummary: describeRegressedSignals(report.signals),
1532
+ };
1533
+ },
1534
+ // The on-call agent gets the full evidence bundle (regressed signals + recent
1535
+ // error logs), gathered fresh at dispatch.
1536
+ gatherHelperPriorOutputs: async (workspaceId, blockId, gateState) => {
1537
+ const since = gateState.watchSince ?? this.clock.now();
1538
+ const evidence = await this.releaseHealthProvider.gatherEvidence(workspaceId, blockId, since);
1539
+ // Stash the regressed signals on the gate state so the on-call COMPLETION handler
1540
+ // (resolveOnCallStep) builds the notification + incident enrichment from the SAME
1541
+ // evidence the agent investigated — rather than re-reading Datadog a third time
1542
+ // (which also risks disagreeing with what the agent saw if the window moved).
1543
+ // The caller spreads `...step.gate` right after, so this mutation persists.
1544
+ gateState.regressedSignals = evidence.regressedSignals;
1545
+ return [
1546
+ { agentKind: POST_RELEASE_HEALTH_AGENT_KIND, output: renderReleaseEvidence(evidence) },
1547
+ ];
1548
+ },
1549
+ onExhausted: async ({ workspaceId, instance, block, step, summary }) => {
1550
+ // Reached when releaseMaxAttempts is 0 (operator disabled the on-call
1551
+ // investigation) or there is no async executor to escalate to — a FAILED
1552
+ // investigation is handled in pollAgentJob, not here. Alert a human via the
1553
+ // notification (with any signals already captured), then flag the run.
1554
+ await this.raiseReleaseRegression(workspaceId, instance, block, null, step.gate?.regressedSignals ?? [], summary ?? '');
1555
+ return {
1556
+ error: `Post-release health regressed and no on-call investigation was configured. ${summary ?? ''}`.trim(),
1557
+ };
1558
+ },
1559
+ },
1560
+ ];
1561
+ return new Map(gates.map((gate) => [gate.kind, gate]));
1562
+ }
1563
+ /**
1564
+ * Evaluate a polling gate step once and decide (shared by the initial advance and the
1565
+ * durable `awaiting_gate` re-poll):
1566
+ * - no provider wired → pass-through (advance; nothing to gate);
1567
+ * - precheck passes → advance to the next step (the helper agent is NEVER spun up);
1568
+ * - still computing → `awaiting_gate` (the driver sleeps then calls {@link pollGate});
1569
+ * - fails, budget left → dispatch the helper container agent (`awaiting_job`);
1570
+ * - fails, budget spent → the gate's exhaustion handler, then fail the run.
1571
+ */
1572
+ async evaluateGate(workspaceId, instance, step, block, isFinalStep, gate) {
1573
+ // Re-attach after a replay: a helper is already in flight for this gate.
1574
+ if (step.gate?.phase === 'working' && step.jobId) {
1575
+ return { kind: 'awaiting_job', jobId: step.jobId, stepIndex: instance.currentStep };
1576
+ }
1577
+ // Provider not wired: the gate is a pass-through so the engine works without it.
1578
+ if (!gate.wired()) {
1579
+ return this.recordStepResult(workspaceId, instance, step, isFinalStep, {
1580
+ output: gate.unwiredOutput,
1581
+ });
1582
+ }
1583
+ // Initialise the gate's state on first entry, resolving the attempt budget from the
1584
+ // task's merge preset (stable across polls once set).
1585
+ if (!step.gate) {
1586
+ const preset = await this.resolveMergePreset(workspaceId, block);
1587
+ step.gate = {
1588
+ phase: 'checking',
1589
+ attempts: 0,
1590
+ maxAttempts: gate.attemptBudget ? gate.attemptBudget(preset) : preset.ciMaxAttempts,
1591
+ headSha: null,
1592
+ // Stash the watch window once (read on every poll by a time-windowed gate's
1593
+ // probe; harmless/unused for the CI/conflicts gates).
1594
+ watchWindowMinutes: preset.releaseWatchWindowMinutes,
1595
+ };
1596
+ }
1597
+ // A time-windowed gate (post-release-health) marks when it began watching, on first
1598
+ // entry, so its probe knows whether the monitoring window has elapsed. Harmless for
1599
+ // the CI/conflicts gates, which ignore it.
1600
+ if (step.gate.watchSince == null)
1601
+ step.gate.watchSince = this.clock.now();
1602
+ const probe = await gate.probe(workspaceId, block.id, step.gate);
1603
+ step.gate.headSha = probe.headSha;
1604
+ // Persist the precheck outcome so the run-detail UI can surface why the gate is
1605
+ // looping (the failing checks / conflict reason) — detail that was previously fed
1606
+ // only to the helper agent and then discarded.
1607
+ step.gate.lastVerdict = probe.status;
1608
+ step.gate.lastFailureSummary = probe.failureSummary ?? null;
1609
+ step.gate.failingChecks = probe.failingChecks ?? null;
1610
+ if (probe.status === 'pass') {
1611
+ // Stop the moment the precheck passes — finish the step and advance.
1612
+ return this.recordStepResult(workspaceId, instance, step, isFinalStep, {
1613
+ output: probe.passOutput ?? `${gate.kind} gate passed.`,
1614
+ });
1615
+ }
1616
+ if (probe.status === 'pending') {
1617
+ // Keep polling. Persist the head sha + phase so the board can reflect it.
1618
+ step.gate.phase = 'checking';
1619
+ await this.executionRepository.upsert(workspaceId, instance);
1620
+ await this.emitInstance(workspaceId, instance);
1621
+ return { kind: 'awaiting_gate', stepIndex: instance.currentStep };
1622
+ }
1623
+ // probe.status === 'fail'.
1624
+ const canEscalate = isAsyncAgentExecutor(this.agentExecutor);
1625
+ if (canEscalate && step.gate.attempts < step.gate.maxAttempts) {
1626
+ return this.dispatchGateHelper(workspaceId, instance, step, block, isFinalStep, gate, probe.failureSummary);
1627
+ }
1628
+ // Budget spent (or no async executor to escalate to): give up.
1629
+ const { error } = await gate.onExhausted({
1630
+ workspaceId,
1631
+ instance,
1632
+ block,
1633
+ step,
1634
+ summary: probe.failureSummary,
1635
+ });
1636
+ return { kind: 'job_failed', error };
1637
+ }
1638
+ /**
1639
+ * Dispatch a gate's helper container agent on a failed precheck: build the agent
1640
+ * context with the kind overridden to the helper (it clones the PR head branch and
1641
+ * pushes — no new PR), park on the job, and flip the gate to `working`. Idempotent
1642
+ * under replay via the step's `jobId` (re-attach handled in {@link evaluateGate}).
1643
+ */
1644
+ async dispatchGateHelper(workspaceId, instance, step, block, isFinalStep, gate, failureSummary) {
1645
+ const executor = this.agentExecutor;
1646
+ if (!isAsyncAgentExecutor(executor)) {
1647
+ // Defensive: evaluateGate only calls this when async-capable.
1648
+ return { kind: 'job_failed', error: `No async executor available for the ${gate.kind} gate.` };
1649
+ }
1650
+ const base = await this.contextBuilder.buildContext(workspaceId, instance, step, isFinalStep, block);
1651
+ // A gate may build richer helper context asynchronously (the on-call agent gets the
1652
+ // full Datadog evidence bundle); otherwise fall back to the simple summary prior.
1653
+ const extras = gate.gatherHelperPriorOutputs
1654
+ ? await gate.gatherHelperPriorOutputs(workspaceId, block.id, step.gate ?? { phase: 'checking', attempts: 0, maxAttempts: 0 })
1655
+ : [gate.helperPriorOutput?.(failureSummary ?? '')].filter((o) => o != null);
1656
+ const context = {
1657
+ ...base,
1658
+ agentKind: gate.helperKind,
1659
+ priorOutputs: [...base.priorOutputs, ...extras],
1660
+ };
1661
+ const handle = await executor.startJob(context);
1662
+ step.jobId = handle.jobId;
1663
+ if (handle.model)
1664
+ step.model = handle.model;
1665
+ step.gate = {
1666
+ // Preserve the recorded verdict/failure detail (set in evaluateGate) so the UI
1667
+ // keeps showing what the helper is fixing while it works.
1668
+ ...step.gate,
1669
+ phase: 'working',
1670
+ attempts: (step.gate?.attempts ?? 0) + 1,
1671
+ maxAttempts: step.gate?.maxAttempts ?? DEFAULT_MERGE_PRESET.ciMaxAttempts,
1672
+ headSha: step.gate?.headSha ?? null,
1673
+ };
1674
+ await this.executionRepository.upsert(workspaceId, instance);
1675
+ await this.emitInstance(workspaceId, instance);
1676
+ return { kind: 'awaiting_job', jobId: step.jobId, stepIndex: instance.currentStep };
1677
+ }
1678
+ /**
1679
+ * Raise a `decision_required` notification when a run parks on an iteration-cap gate
1680
+ * after spending its automatic budget — a quality companion at its rework cap or an
1681
+ * iterative reviewer (requirements / clarity) at its iteration cap. Without it the
1682
+ * three-choice decision is reachable only by drilling into the parked step, so the run
1683
+ * looks silently stuck. Best-effort: a missing notification service (tests) or block is
1684
+ * a no-op.
1685
+ */
1686
+ async raiseDecisionRequired(workspaceId, instance) {
1687
+ if (!this.notificationService)
1688
+ return;
1689
+ const block = await this.blockRepository.get(workspaceId, instance.blockId);
1690
+ if (!block)
1691
+ return;
1692
+ await this.notificationService.raise(workspaceId, {
1693
+ type: 'decision_required',
1694
+ blockId: block.id,
1695
+ executionId: instance.id,
1696
+ title: `"${block.title}" ran out of automatic iterations and needs your decision`,
1697
+ body: 'An automatic review loop reached its iteration cap without converging. Open the ' +
1698
+ 'task to choose: one more round, proceed with the current result, or stop and reset.',
1699
+ payload: { pipelineName: instance.pipelineName },
1700
+ });
1701
+ }
1702
+ /**
1703
+ * Ensure an open notification exists for a run that has just parked waiting for a human
1704
+ * (an agent-raised decision, an approval gate, or an iterative review gate). Without
1705
+ * the old decision timeout the run waits indefinitely, so the inbox card — which the
1706
+ * periodic sweep escalates yellow → red — is the only signal a human is needed.
1707
+ *
1708
+ * Non-clobbering: if ANY open notification is already on the block (a more specific
1709
+ * `merge_review`, iteration-cap `decision_required`, etc.), it is left untouched and we
1710
+ * raise nothing — so the richer message wins. Best-effort: no notification service
1711
+ * (tests) or a missing block is a no-op.
1712
+ */
1713
+ async ensureWaitingNotification(workspaceId, instance) {
1714
+ const svc = this.notificationService;
1715
+ if (!svc)
1716
+ return;
1717
+ const open = await svc.listOpen(workspaceId);
1718
+ if (open.some((n) => n.blockId === instance.blockId))
1719
+ return;
1720
+ const block = await this.blockRepository.get(workspaceId, instance.blockId);
1721
+ if (!block)
1722
+ return;
1723
+ await svc.raise(workspaceId, {
1724
+ type: 'decision_required',
1725
+ blockId: block.id,
1726
+ executionId: instance.id,
1727
+ title: `"${block.title}" is waiting for your input`,
1728
+ body: 'A pipeline step is parked awaiting a human decision. Open the task to respond.',
1729
+ payload: { pipelineName: instance.pipelineName },
1730
+ });
1731
+ }
1732
+ /**
1733
+ * Clear the auto-raised "waiting for a human decision" card once a run advances past
1734
+ * the decision it was parked on (so the escalation sweep can't flip a settled decision
1735
+ * red). Scoped to the `decision_required` type, so the human-actionable cards a stopped
1736
+ * run leaves behind are untouched. Best-effort: no notification service (tests) is a no-op.
1737
+ */
1738
+ async clearWaitingNotification(workspaceId, instance) {
1739
+ const svc = this.notificationService;
1740
+ if (!svc)
1741
+ return;
1742
+ await svc.clearWaitingDecision(workspaceId, instance.blockId);
1743
+ }
1744
+ /** Raise a `ci_failed` notification when the CI gate exhausts its fixer budget. */
1745
+ async raiseCiFailed(workspaceId, instance, block, summary, attempts) {
1746
+ if (!this.notificationService)
1747
+ return;
1748
+ await this.notificationService.raise(workspaceId, {
1749
+ type: 'ci_failed',
1750
+ blockId: block.id,
1751
+ executionId: instance.id,
1752
+ title: `CI is still failing for "${block.title}"`,
1753
+ body: `The CI-fixer agent tried ${attempts} time(s) but CI is still red. ${summary} ` +
1754
+ `Take a look and retry the run once fixed.`,
1755
+ payload: {
1756
+ ...(block.pullRequest?.url ? { prUrl: block.pullRequest.url } : {}),
1757
+ pipelineName: instance.pipelineName,
1758
+ },
1759
+ });
1760
+ }
1761
+ /** Provision inputs (`{{input.*}}`) derived from the block under deployment. */
1762
+ deployInputs(block) {
1763
+ const inputs = {
1764
+ blockId: block.id,
1765
+ title: block.title,
1766
+ type: block.type,
1767
+ description: block.description,
1768
+ };
1769
+ return inputs;
1770
+ }
1771
+ /**
1772
+ * Typed git/PR/repo context for the deployer, derived from the block's PR ref. A
1773
+ * PR-environment provider (e.g. an in-house adapter) needs the branch/repo to target
1774
+ * the right environment; the same values are also flattened into `{{input.*}}` for
1775
+ * the manifest path. `owner`/`repo` are parsed from the PR url when present.
1776
+ */
1777
+ deployContext(block) {
1778
+ const context = { blockId: block.id };
1779
+ const pr = block.pullRequest;
1780
+ if (!pr)
1781
+ return context;
1782
+ if (pr.branch)
1783
+ context.branch = pr.branch;
1784
+ if (pr.number !== undefined)
1785
+ context.pullNumber = pr.number;
1786
+ if (pr.url) {
1787
+ context.pullUrl = pr.url;
1788
+ const repo = parseRepoFromPullUrl(pr.url);
1789
+ if (repo) {
1790
+ context.repoOwner = repo.owner;
1791
+ context.repoName = repo.repo;
1792
+ }
1793
+ }
1794
+ return context;
1795
+ }
1796
+ /**
1797
+ * Invoke the agent for an already-built context. Failures are swallowed into the
1798
+ * step output so a run never wedges — unless `rethrowAgentErrors` is set (the
1799
+ * durable path), in which case the error propagates so the driver's per-step
1800
+ * retry can take over.
1801
+ */
1802
+ async runAgent(context, options = {}) {
1803
+ try {
1804
+ return await this.agentExecutor.run(context);
1805
+ }
1806
+ catch (error) {
1807
+ // The durable driver wants real failures to surface so its per-step retry
1808
+ // can kick in (and the error gets persisted after retries are exhausted).
1809
+ if (options.rethrowAgentErrors)
1810
+ throw error;
1811
+ // Otherwise a failed agent must not wedge the run; record and complete.
1812
+ return {
1813
+ output: `Agent error: ${getErrorMessage(error)}`,
1814
+ };
1815
+ }
1816
+ }
1817
+ /**
1818
+ * Strictly parse a Blueprinter step's tree and reconcile it onto the board. The
1819
+ * blueprint maps the whole repository, so it is reconciled onto the run block's
1820
+ * **service frame** (walked up from the block), not the task the run targeted.
1821
+ * Best-effort and reconciler-gated: a parse/reconcile failure is logged-by-throw
1822
+ * upstream only when the reconciler is wired; with no reconciler it is a no-op so
1823
+ * the blueprint's in-repo files still land.
1824
+ */
1825
+ async ingestBlueprint(workspaceId, blockId, rawService) {
1826
+ if (!this.blueprintReconciler)
1827
+ return;
1828
+ let service;
1829
+ try {
1830
+ service = parseBlueprintService(rawService);
1831
+ }
1832
+ catch {
1833
+ // A malformed tree must not fail the step (the in-repo files are already
1834
+ // committed); skip the board reconcile.
1835
+ return;
1836
+ }
1837
+ const frameId = await this.contextBuilder.resolveServiceFrameId(workspaceId, blockId);
1838
+ await this.blueprintReconciler.reconcileBlueprint(workspaceId, frameId, service);
1839
+ // The reconcile may have created/updated module + task blocks that aren't
1840
+ // individually pushed; nudge clients to refresh the board so they appear. Name the service
1841
+ // frame so the refresh fans out to every board mounting this shared service.
1842
+ await this.events.boardChanged(workspaceId, 'blueprint-reconciled', frameId);
1843
+ }
1844
+ /**
1845
+ * Strictly validate a spec-writer step's unified specification. The canonical record
1846
+ * is the in-repo `spec/` files the harness already committed; this is the trust
1847
+ * boundary (a malformed payload is dropped, never trusted) plus a client refresh
1848
+ * nudge. A persisted board projection is a deliberate later phase.
1849
+ */
1850
+ async ingestSpec(workspaceId, rawDoc) {
1851
+ try {
1852
+ parseSpecDoc(rawDoc);
1853
+ }
1854
+ catch {
1855
+ // A malformed doc must not fail the step (the in-repo files are already
1856
+ // committed); skip the refresh.
1857
+ return;
1858
+ }
1859
+ // Nudge clients to refresh so they can re-read the service's spec files.
1860
+ await this.events.boardChanged(workspaceId, 'requirements-updated');
1861
+ }
1862
+ // ---- iterative review gates (requirements + clarity) --------------------
1863
+ // The two gate flows live in {@link ReviewGateController}, parameterised by a
1864
+ // {@link ReviewKind}. The public methods below are thin delegators (the HTTP controllers
1865
+ // call them) and the kind builders supply each subject's differentiators. Three shared
1866
+ // state-machine primitives stay here — they are reused by the generic approval path and
1867
+ // the companion iteration-cap gate, so they have a single home: {@link parkStepOnDecision},
1868
+ // {@link advancePastResolvedGate} and {@link dispatchIterationCap}.
1869
+ /**
1870
+ * Park a step on the durable decision-wait the approval gate uses, so a human (or the
1871
+ * dedicated review window) can drive an iterative loop and resume the run. Shared by the
1872
+ * requirements gate and the companion iteration-cap gate: both reuse the SAME parking
1873
+ * mechanism rather than each rolling its own. `proposal` seeds the gate's stored text
1874
+ * (the companion's latest feedback; empty for the requirements window, which renders its
1875
+ * own structured surface via the universal result-view registry).
1876
+ */
1877
+ async parkStepOnDecision(workspaceId, instance, step, proposal = '') {
1878
+ step.approval = { id: this.idGenerator.next('appr'), status: 'pending', proposal };
1879
+ this.pauseStepForInput(step);
1880
+ instance.status = 'blocked';
1881
+ await this.updateBlockProgress(workspaceId, instance, 'blocked');
1882
+ await this.executionRepository.upsert(workspaceId, instance);
1883
+ await this.emitInstance(workspaceId, instance);
1884
+ return { kind: 'awaiting_decision', decisionId: step.approval.id };
1885
+ }
1886
+ /**
1887
+ * Two gates park on a `step.approval` but are NOT generic prose approvals — they are
1888
+ * iterative gates driven by their own dedicated surface, never the generic
1889
+ * approve/request-changes/reject resolvers (which would advance the run bypassing the
1890
+ * loop). Guard those resolvers so a stray approve can't short-circuit either gate:
1891
+ * - the requirements-review gate (driven by re-review / proceed / resolve-exceeded);
1892
+ * - a companion gate that hit its rework cap (`companion.exceeded`), driven by
1893
+ * {@link resolveCompanionExceeded}'s one-more-round / proceed / stop-reset choices.
1894
+ */
1895
+ assertNotIterativeGate(step) {
1896
+ if (step.agentKind === REQUIREMENTS_REVIEW_AGENT_KIND) {
1897
+ throw new ConflictError('Resolve the requirements review through its review window, not the approval gate');
1898
+ }
1899
+ if (step.agentKind === CLARITY_REVIEW_AGENT_KIND) {
1900
+ throw new ConflictError('Resolve the clarity review through its review window, not the approval gate');
1901
+ }
1902
+ if (step.companion?.exceeded) {
1903
+ throw new ConflictError('Resolve this companion review through its iteration-cap prompt, not the approval gate');
1904
+ }
1905
+ }
1906
+ /**
1907
+ * The requirements subject for {@link reviewGate}: closures over the requirements reviewer
1908
+ * service. The service-not-configured guard preserves the exact 409 the inline reviewer
1909
+ * raised before this extraction.
1910
+ */
1911
+ buildRequirementsKind() {
1912
+ const require = () => {
1913
+ if (!this.requirementReviewService?.enabled) {
1914
+ throw new ConflictError('The requirements reviewer is not configured');
1915
+ }
1916
+ return this.requirementReviewService;
1917
+ };
1918
+ return {
1919
+ agentKind: REQUIREMENTS_REVIEW_AGENT_KIND,
1920
+ entityName: 'Requirement review',
1921
+ enabled: () => !!this.requirementReviewService?.enabled,
1922
+ getForBlock: (ws, blockId) => require().getForBlock(ws, blockId),
1923
+ review: (ws, block, preset) => require().review(ws, block.id, {
1924
+ maxIterations: preset.maxRequirementIterations,
1925
+ concernThreshold: preset.maxRequirementConcernAllowed,
1926
+ }),
1927
+ reReview: (ws, reviewId, preset) => require().reReview(ws, reviewId, { concernThreshold: preset.maxRequirementConcernAllowed }),
1928
+ incorporate: async (ws, _blockId, reviewId, feedback) => {
1929
+ await require().incorporate(ws, reviewId, { feedback });
1930
+ },
1931
+ markIncorporated: (ws, reviewId) => require().markIncorporated(ws, reviewId),
1932
+ markReReviewing: (ws, reviewId) => require().markReReviewing(ws, reviewId),
1933
+ markIncorporating: (ws, reviewId) => require().markIncorporating(ws, reviewId),
1934
+ grantExtraRound: (ws, reviewId) => require().grantExtraRound(ws, reviewId),
1935
+ emit: (ws, review) => this.events.requirementReviewChanged?.(ws, review) ?? Promise.resolve(),
1936
+ };
1937
+ }
1938
+ /**
1939
+ * The clarity (bug-report triage) subject for {@link reviewGate}: threads any upstream
1940
+ * `bug-investigator` output into the reviewer/incorporation context, otherwise identical to
1941
+ * the requirements kind.
1942
+ */
1943
+ buildClarityKind() {
1944
+ const require = () => {
1945
+ if (!this.clarityReviewService?.enabled) {
1946
+ throw new ConflictError('The clarity reviewer is not configured');
1947
+ }
1948
+ return this.clarityReviewService;
1949
+ };
1950
+ return {
1951
+ agentKind: CLARITY_REVIEW_AGENT_KIND,
1952
+ entityName: 'Clarity review',
1953
+ enabled: () => !!this.clarityReviewService?.enabled,
1954
+ getForBlock: (ws, blockId) => require().getForBlock(ws, blockId),
1955
+ review: async (ws, block, preset) => require().review(ws, block.id, {
1956
+ maxIterations: preset.maxRequirementIterations,
1957
+ concernThreshold: preset.maxRequirementConcernAllowed,
1958
+ investigation: await this.investigationForBlock(ws, block.id),
1959
+ }),
1960
+ reReview: (ws, reviewId, preset) => require().reReview(ws, reviewId, { concernThreshold: preset.maxRequirementConcernAllowed }),
1961
+ incorporate: async (ws, blockId, reviewId, feedback) => {
1962
+ const investigation = await this.investigationForBlock(ws, blockId);
1963
+ await require().incorporate(ws, reviewId, { feedback, investigation });
1964
+ },
1965
+ markIncorporated: (ws, reviewId) => require().markIncorporated(ws, reviewId),
1966
+ markReReviewing: (ws, reviewId) => require().markReReviewing(ws, reviewId),
1967
+ markIncorporating: (ws, reviewId) => require().markIncorporating(ws, reviewId),
1968
+ grantExtraRound: (ws, reviewId) => require().grantExtraRound(ws, reviewId),
1969
+ emit: (ws, review) => this.events.clarityReviewChanged?.(ws, review) ?? Promise.resolve(),
1970
+ };
1971
+ }
1972
+ /**
1973
+ * Run a fresh reviewer pass over a block's collected requirements, snapshotting the
1974
+ * task's merge-preset knobs (iteration budget + tolerated severity) onto the review.
1975
+ * Shared by the pipeline gate and the off-path inspector "Run review" surface, so both
1976
+ * honour the task's preset identically.
1977
+ */
1978
+ reviewRequirements(workspaceId, blockId) {
1979
+ return this.reviewGate.review(this.requirementsKind, workspaceId, blockId);
1980
+ }
1981
+ /**
1982
+ * Incorporate the human's settled answers ASYNCHRONOUSLY. Validates that every finding is
1983
+ * answered/dismissed, flags the review `incorporating`, records the intent on the parked
1984
+ * gate step, and signals the durable driver to wake — which folds the answers and
1985
+ * re-reviews in the background. Off-path (no parked run) the fold + re-review run inline.
1986
+ */
1987
+ incorporateRequirements(workspaceId, blockId, feedback) {
1988
+ return this.reviewGate.incorporate(this.requirementsKind, workspaceId, blockId, feedback);
1989
+ }
1990
+ /**
1991
+ * Re-review the incorporated document (one more reviewer pass). On convergence
1992
+ * (`incorporated`) the parked run advances; otherwise the window shows the next cycle
1993
+ * (`ready`) or the iteration-cap choices (`exceeded`).
1994
+ */
1995
+ reReviewRequirements(workspaceId, blockId) {
1996
+ return this.reviewGate.reReview(this.requirementsKind, workspaceId, blockId);
1997
+ }
1998
+ /**
1999
+ * Proceed: settle the requirements (the last incorporated doc, if any, becomes what
2000
+ * downstream agents consume) and advance the parked run.
2001
+ */
2002
+ proceedRequirements(workspaceId, blockId) {
2003
+ return this.reviewGate.proceed(this.requirementsKind, workspaceId, blockId);
2004
+ }
2005
+ /**
2006
+ * Route an iteration-cap resolution to its gate-specific handlers. `stop-reset` is
2007
+ * uniform across gates: cancel the run and return the block to phase zero (editable),
2008
+ * keeping whatever reference artifact each gate persists (the requirements doc on its
2009
+ * own table; a companion's producer output on its branch). Shared by the requirements
2010
+ * gate ({@link resolveRequirementsExceeded}) and the companion gate
2011
+ * ({@link resolveCompanionExceeded}) so the three-way choice lives in one place.
2012
+ */
2013
+ async dispatchIterationCap(workspaceId, blockId, choice, handlers) {
2014
+ if (choice === 'extra-round') {
2015
+ await handlers.extraRound();
2016
+ }
2017
+ else if (choice === 'proceed') {
2018
+ await handlers.proceed();
2019
+ }
2020
+ else {
2021
+ // stop-reset: tear down the run + reset the block to phase zero (editable).
2022
+ await this.cancel(workspaceId, blockId);
2023
+ }
2024
+ }
2025
+ /**
2026
+ * Resolve a requirements review that hit its iteration cap: grant one more round,
2027
+ * proceed with the last incorporated doc, or stop the task and reset it to phase zero.
2028
+ */
2029
+ resolveRequirementsExceeded(workspaceId, blockId, choice) {
2030
+ return this.reviewGate.resolveExceeded(this.requirementsKind, workspaceId, blockId, choice);
2031
+ }
2032
+ /**
2033
+ * Resolve a companion step parked at its automatic-rework cap (`companion.exceeded`):
2034
+ * grant one more round, proceed accepting the producer's current output, or stop the
2035
+ * task and reset it to phase zero. The companion mirror of
2036
+ * {@link resolveRequirementsExceeded}, sharing the iteration-cap dispatch + the
2037
+ * gate-resume plumbing. Idempotent — an already-resolved gate returns the instance
2038
+ * unchanged. Scoped by execution + approval id (the execution controller surface),
2039
+ * since a companion gate is not block-addressed like the requirements window.
2040
+ */
2041
+ async resolveCompanionExceeded(workspaceId, executionId, approvalId, choice) {
2042
+ await this.requireWorkspace(workspaceId);
2043
+ const instance = assertFound(await this.executionRepository.get(workspaceId, executionId), 'Execution', executionId);
2044
+ const stepIndex = instance.steps.findIndex((s) => s.approval?.id === approvalId);
2045
+ const step = instance.steps[stepIndex];
2046
+ if (!step || !step.approval)
2047
+ throw new NotFoundError('Approval', approvalId);
2048
+ if (!step.companion?.exceeded) {
2049
+ throw new ConflictError(`Approval '${approvalId}' is not a companion iteration-cap gate`);
2050
+ }
2051
+ if (step.approval.status === 'approved')
2052
+ return instance;
2053
+ await this.dispatchIterationCap(workspaceId, instance.blockId, choice, {
2054
+ // Grant one more automatic rework: raise the budget by one, clear the cap flag, then
2055
+ // loop the producer back through the companion to re-grade (`rerunProducerThrough`
2056
+ // un-parks the gate by resetting the companion step). The last verdict's feedback
2057
+ // drives the rework, the same way the automatic loop folds the live assessment in.
2058
+ extraRound: async () => {
2059
+ step.companion.maxAttempts += 1;
2060
+ step.companion.exceeded = undefined;
2061
+ const producer = instance.steps[this.companionProducerIndex(instance, stepIndex)];
2062
+ this.loopCompanionProducer(instance, stepIndex, {
2063
+ previousProposal: producer?.output ?? '',
2064
+ feedback: step.companion.verdicts.at(-1)?.feedback ?? '',
2065
+ });
2066
+ await this.updateBlockProgress(workspaceId, instance, 'in_progress');
2067
+ await this.executionRepository.upsert(workspaceId, instance);
2068
+ await this.workRunner.signalDecision(workspaceId, instance.id, approvalId, 'extra-round');
2069
+ await this.emitInstance(workspaceId, instance);
2070
+ },
2071
+ // Proceed: accept the producer's current output and advance past the gate.
2072
+ proceed: async () => {
2073
+ step.companion.exceeded = undefined;
2074
+ step.approval.status = 'approved';
2075
+ await this.advancePastResolvedGate(workspaceId, instance, stepIndex);
2076
+ },
2077
+ });
2078
+ return instance;
2079
+ }
2080
+ /**
2081
+ * Finish a gate step the human just resolved (its `approval` already marked `approved`),
2082
+ * then either finish the run (final step) or advance to the next step, persist, and wake
2083
+ * the parked durable driver. The single advance/finalize/signal path shared by every
2084
+ * gate-resume site — the generic approval ({@link approveStep}), the review gates (via
2085
+ * {@link ReviewGateController}) and the companion iteration-cap proceed
2086
+ * ({@link resolveCompanionExceeded}) — so the logic lives in exactly one place.
2087
+ */
2088
+ async advancePastResolvedGate(workspaceId, instance, stepIndex) {
2089
+ const step = instance.steps[stepIndex];
2090
+ const decisionId = step.approval.id;
2091
+ this.finishStep(step);
2092
+ step.progress = 1;
2093
+ const isFinalStep = stepIndex === instance.steps.length - 1;
2094
+ if (isFinalStep) {
2095
+ instance.status = 'done';
2096
+ await this.finalizeBlock(workspaceId, instance, undefined);
2097
+ await this.stopRunContainer(workspaceId, instance);
2098
+ }
2099
+ else {
2100
+ instance.currentStep = stepIndex + 1;
2101
+ const next = instance.steps[instance.currentStep];
2102
+ if (next)
2103
+ this.startStep(next);
2104
+ if (instance.status === 'blocked')
2105
+ instance.status = 'running';
2106
+ await this.updateBlockProgress(workspaceId, instance, 'in_progress');
2107
+ }
2108
+ await this.executionRepository.upsert(workspaceId, instance);
2109
+ await this.workRunner.signalDecision(workspaceId, instance.id, decisionId, 'approved');
2110
+ await this.emitInstance(workspaceId, instance);
2111
+ }
2112
+ // ---- clarity-review context helpers (bug-report triage) ------------------
2113
+ // The clarity gate triages a block's bug report — optionally enriched by an upstream
2114
+ // `bug-investigator` step's prose output — through the SAME {@link ReviewGateController}
2115
+ // flow as requirements; these two helpers resolve that investigator output as the triage
2116
+ // subject, threaded into the clarity {@link ReviewKind}.
2117
+ /** The latest `bug-investigator` step output on a run (the triage subject), or undefined. */
2118
+ investigationFor(instance) {
2119
+ for (let i = instance.steps.length - 1; i >= 0; i--) {
2120
+ const s = instance.steps[i];
2121
+ if (s.agentKind === BUG_INVESTIGATOR_AGENT_KIND && s.output)
2122
+ return s.output;
2123
+ }
2124
+ return undefined;
2125
+ }
2126
+ /** Resolve a block's investigator output via its current execution (off the gate path). */
2127
+ async investigationForBlock(workspaceId, blockId) {
2128
+ const block = await this.blockRepository.get(workspaceId, blockId);
2129
+ if (!block?.executionId)
2130
+ return undefined;
2131
+ const instance = await this.executionRepository.get(workspaceId, block.executionId);
2132
+ return instance ? this.investigationFor(instance) : undefined;
2133
+ }
2134
+ /**
2135
+ * Run a fresh clarity reviewer pass over a block's bug report, snapshotting the task's
2136
+ * merge-preset knobs (iteration budget + tolerated severity) and threading in any
2137
+ * `bug-investigator` output as the triage subject. Shared by the gate + the off-path
2138
+ * inspector "Run review" surface.
2139
+ */
2140
+ reviewClarity(workspaceId, blockId) {
2141
+ return this.reviewGate.review(this.clarityKind, workspaceId, blockId);
2142
+ }
2143
+ /** Incorporate the human's settled answers ASYNCHRONOUSLY (the clarity mirror of {@link incorporateRequirements}). */
2144
+ incorporateClarity(workspaceId, blockId, feedback) {
2145
+ return this.reviewGate.incorporate(this.clarityKind, workspaceId, blockId, feedback);
2146
+ }
2147
+ /** Re-review the clarified report (one more pass). On convergence the parked run advances. */
2148
+ reReviewClarity(workspaceId, blockId) {
2149
+ return this.reviewGate.reReview(this.clarityKind, workspaceId, blockId);
2150
+ }
2151
+ /** Proceed: settle the clarity review and advance the parked run. */
2152
+ proceedClarity(workspaceId, blockId) {
2153
+ return this.reviewGate.proceed(this.clarityKind, workspaceId, blockId);
2154
+ }
2155
+ /** Resolve a clarity review that hit its iteration cap (extra-round / proceed / stop-reset). */
2156
+ resolveClarityExceeded(workspaceId, blockId, choice) {
2157
+ return this.reviewGate.resolveExceeded(this.clarityKind, workspaceId, blockId, choice);
2158
+ }
2159
+ /**
2160
+ * Push the run's latest state to subscribed clients, alongside its rolled-up
2161
+ * block so the board updates without a refetch. Best-effort: the publisher
2162
+ * swallows its own errors, and the persisted run remains the source of truth.
2163
+ */
2164
+ async emitInstance(workspaceId, instance) {
2165
+ // Stamp each step with the run id so a lone step (in a pushed event, a log line, a
2166
+ // detail view) is self-describing for debugging; the value always equals the run id.
2167
+ for (const step of instance.steps)
2168
+ step.runId = instance.id;
2169
+ // The metrics rollup and the block fetch are independent, so run them concurrently
2170
+ // — the rollup adds no serial latency to the (frequent) emit path.
2171
+ const [, block] = await Promise.all([
2172
+ this.attachStepMetrics(workspaceId, instance),
2173
+ this.blockRepository.get(workspaceId, instance.blockId),
2174
+ ]);
2175
+ await this.events.executionChanged(workspaceId, instance, block);
2176
+ // When a run reaches a terminal state, delete its per-run personal-credential
2177
+ // activation immediately (individual-usage subscriptions) so the system-encrypted
2178
+ // token copy doesn't linger to its TTL. Best-effort + idempotent — a missing repo or
2179
+ // a re-emit of an already-cleared run is a no-op, and a failure here must never
2180
+ // derail the emit.
2181
+ if (this.subscriptionActivations &&
2182
+ (instance.status === 'done' || instance.status === 'failed')) {
2183
+ try {
2184
+ await this.subscriptionActivations.deleteByExecution(instance.id);
2185
+ }
2186
+ catch {
2187
+ // Swallow — a failure here must never derail the emit. This is not a silent
2188
+ // data-loss path: the TTL sweep reclaims the row as a backstop, and the sweep
2189
+ // (Worker cron / Node retention timer) logs its own errors, so a *systemic*
2190
+ // cleanup failure surfaces there rather than being lost here.
2191
+ }
2192
+ }
2193
+ }
2194
+ /**
2195
+ * Roll the run's recorded LLM calls into per-step `metrics` for the board, in
2196
+ * place on the emitted instance. The proxy keys calls by execution + agentKind
2197
+ * (not step index), so the aggregate is per-agent-kind within the run; steps
2198
+ * sharing a kind get the same rollup. Best-effort and a no-op when the sink is
2199
+ * not wired, so it never blocks an emit.
2200
+ */
2201
+ async attachStepMetrics(workspaceId, instance) {
2202
+ if (!this.llmObservability)
2203
+ return;
2204
+ try {
2205
+ const summaries = await this.llmObservability.summarizeByExecution(workspaceId, instance.id);
2206
+ if (summaries.length === 0)
2207
+ return;
2208
+ const byKind = new Map(summaries.map((s) => [s.agentKind, s]));
2209
+ for (const step of instance.steps) {
2210
+ const s = byKind.get(step.agentKind);
2211
+ if (!s)
2212
+ continue;
2213
+ step.metrics = {
2214
+ calls: s.calls,
2215
+ promptTokens: s.promptTokens,
2216
+ completionTokens: s.completionTokens,
2217
+ peakCompletionTokens: s.peakCompletionTokens,
2218
+ maxOutputTokens: s.maxOutputTokens,
2219
+ truncatedCalls: s.truncatedCalls,
2220
+ upstreamMs: s.upstreamMs,
2221
+ overheadMs: s.overheadMs,
2222
+ errors: s.errors,
2223
+ warnings: s.warnings,
2224
+ };
2225
+ }
2226
+ }
2227
+ catch (error) {
2228
+ // Observability is best-effort; never block an emit on a metrics read.
2229
+ void error;
2230
+ }
2231
+ }
2232
+ /** Set the block's in-progress/blocked status and step-completion progress. */
2233
+ async updateBlockProgress(workspaceId, instance, status) {
2234
+ const total = instance.steps.length || 1;
2235
+ const done = instance.steps.filter((s) => s.state === 'done').length;
2236
+ await this.blockRepository.update(workspaceId, instance.blockId, {
2237
+ status,
2238
+ progress: Math.min(1, done / total),
2239
+ });
2240
+ }
2241
+ /**
2242
+ * Advance the block's step PROGRESS without touching its status — used when a step
2243
+ * resolver already owns the block's terminal status (the merger set `done`/`pr_ready`)
2244
+ * and a trailing step still follows, so the bar moves on without downgrading that status.
2245
+ */
2246
+ async refreshBlockProgress(workspaceId, instance) {
2247
+ const total = instance.steps.length || 1;
2248
+ const done = instance.steps.filter((s) => s.state === 'done').length;
2249
+ await this.blockRepository.update(workspaceId, instance.blockId, {
2250
+ progress: Math.min(1, done / total),
2251
+ });
2252
+ }
2253
+ /**
2254
+ * A pipeline finished. A frame becomes `done` (a mapping-only run leaves it
2255
+ * `ready`). A *task* never auto-`done`s from a confidence score any more — that
2256
+ * looked merged when the PR was still open with red CI. Instead:
2257
+ * - if the pipeline has a `merger` step, it already owned the merge/notify
2258
+ * decision (see {@link resolveMergerStep}); we only backstop a missing one;
2259
+ * - otherwise the work is complete but unmerged: leave the PR open (`pr_ready`)
2260
+ * and raise a `pipeline_complete` notification for a human to confirm + merge.
2261
+ * `done` now strictly means the PR was merged (see {@link finalizeMerge}).
2262
+ */
2263
+ async finalizeBlock(workspaceId, instance, confidence) {
2264
+ const block = await this.blockRepository.get(workspaceId, instance.blockId);
2265
+ if (!block || block.status === 'done')
2266
+ return;
2267
+ if ((block.level ?? 'frame') !== 'task') {
2268
+ // A mapping-only run (just the `blueprints` step, e.g. kicked off after a
2269
+ // bootstrap) leaves the service frame `ready` and droppable rather than
2270
+ // marking the whole service "done".
2271
+ const mappingOnly = instance.steps.every((s) => s.agentKind === 'blueprints');
2272
+ await this.blockRepository.update(workspaceId, block.id, {
2273
+ status: mappingOnly ? 'ready' : 'done',
2274
+ progress: 1,
2275
+ });
2276
+ return;
2277
+ }
2278
+ // Confidence is recorded by the caller (recordStepResult) before any merge, so
2279
+ // it persists on both the merge and review paths; `confidence` is unused here.
2280
+ void confidence;
2281
+ const hasMerger = instance.steps.some((s) => s.agentKind === MERGER_AGENT_KIND);
2282
+ if (hasMerger) {
2283
+ // The `merger` step already merged (→ `done`) or raised a review (→ `pr_ready`).
2284
+ // Only backstop the case where it produced no decision at all.
2285
+ const fresh = await this.blockRepository.get(workspaceId, block.id);
2286
+ if (fresh && fresh.status !== 'done' && fresh.status !== 'pr_ready') {
2287
+ await this.blockRepository.update(workspaceId, block.id, {
2288
+ status: 'pr_ready',
2289
+ progress: 1,
2290
+ });
2291
+ }
2292
+ return;
2293
+ }
2294
+ // No merger in this pipeline: complete but unmerged — ask a human to confirm.
2295
+ await this.blockRepository.update(workspaceId, block.id, { status: 'pr_ready', progress: 1 });
2296
+ await this.raisePipelineComplete(workspaceId, instance, block);
2297
+ }
2298
+ /**
2299
+ * Merge a block's PR for real, then mark it `done`. The remote merge happens
2300
+ * FIRST (via the {@link PullRequestMerger} port) and only on its success does the
2301
+ * block flip to `done` — so `done` provably means "merged", not a board-only
2302
+ * status. When no merger is wired (tests) this degrades to the old board-only
2303
+ * flip. Throws if the remote merge fails so callers can fall back to a manual
2304
+ * merge / review notification.
2305
+ */
2306
+ async finalizeMerge(workspaceId, blockId) {
2307
+ const block = await this.blockRepository.get(workspaceId, blockId);
2308
+ if (!block)
2309
+ return;
2310
+ if (this.prMerger && block.pullRequest) {
2311
+ // Throws on a blocked/failed merge — the caller decides what to do next.
2312
+ await this.prMerger.mergeForBlock(workspaceId, blockId);
2313
+ }
2314
+ await this.blockRepository.update(workspaceId, blockId, { status: 'done', progress: 1 });
2315
+ if ((block.level ?? 'frame') === 'task') {
2316
+ await this.applyModuleAssignment(workspaceId, blockId);
2317
+ }
2318
+ }
2319
+ /**
2320
+ * Resolve the merge threshold preset that governs a task: its explicitly-picked
2321
+ * preset, else the workspace default, else the built-in {@link DEFAULT_MERGE_PRESET}.
2322
+ * Returns just the thresholds the engine compares against (+ the CI attempt budget).
2323
+ */
2324
+ async resolveMergePreset(workspaceId, block) {
2325
+ if (this.mergePresetRepository) {
2326
+ if (block.mergePresetId) {
2327
+ const picked = await this.mergePresetRepository.get(workspaceId, block.mergePresetId);
2328
+ if (picked)
2329
+ return picked;
2330
+ }
2331
+ const fallback = await this.mergePresetRepository.getDefault(workspaceId);
2332
+ if (fallback)
2333
+ return fallback;
2334
+ }
2335
+ return DEFAULT_MERGE_PRESET;
2336
+ }
2337
+ /**
2338
+ * Resolve a finished `on-call` investigation (the post-release-health gate's helper):
2339
+ * parse its assessment, raise a `release_regression` notification for a human, enrich
2340
+ * any incident PagerDuty/incident.io already opened, then finish the gate step so the
2341
+ * run completes (the human acts on the notification out-of-band — the engine never
2342
+ * auto-reverts). Best-effort on the side-effects; the step always finishes.
2343
+ */
2344
+ async resolveOnCallStep(workspaceId, instance, step, block, result, isFinalStep, investigationFailed = false) {
2345
+ let assessment = null;
2346
+ try {
2347
+ assessment = parseOnCallAssessment(result.onCallAssessment);
2348
+ }
2349
+ catch {
2350
+ assessment = null;
2351
+ }
2352
+ // Reuse the regressed signals captured when the gate escalated (see the gate's
2353
+ // gatherHelperPriorOutputs) so the notification + incident enrichment reflect exactly
2354
+ // what the on-call agent investigated and we don't re-read Datadog a third time. Only
2355
+ // fall back to a fresh gather if they weren't persisted (e.g. an older parked run).
2356
+ const since = step.gate?.watchSince ?? this.clock.now();
2357
+ let regressedSignals = step.gate?.regressedSignals ?? [];
2358
+ if (regressedSignals.length === 0 && this.releaseHealthProvider) {
2359
+ try {
2360
+ const evidence = await this.releaseHealthProvider.gatherEvidence(workspaceId, block.id, since);
2361
+ regressedSignals = evidence.regressedSignals;
2362
+ }
2363
+ catch {
2364
+ // best-effort: the assessment + summary still drive the notification
2365
+ }
2366
+ }
2367
+ const baseSummary = step.gate?.lastFailureSummary ?? '';
2368
+ const summary = investigationFailed
2369
+ ? `${baseSummary} The automated on-call investigation could not complete, so no culprit assessment is available — investigate manually.`.trim()
2370
+ : baseSummary;
2371
+ await this.raiseReleaseRegression(workspaceId, instance, block, assessment, regressedSignals, summary);
2372
+ await this.enrichIncident(workspaceId, block, assessment, regressedSignals, since);
2373
+ const output = assessment
2374
+ ? `On-call investigation: ${assessment.recommendation} (culprit confidence ${pct(assessment.culpritConfidence)}). ${assessment.rationale}`
2375
+ : investigationFailed
2376
+ ? 'On-call investigation did not complete; raised a release-regression notification for manual triage.'
2377
+ : 'On-call investigation completed; see the release-regression notification.';
2378
+ return this.recordStepResult(workspaceId, instance, step, isFinalStep, { ...result, output });
2379
+ }
2380
+ /** Raise a `release_regression` notification carrying the on-call assessment + signals. */
2381
+ async raiseReleaseRegression(workspaceId, instance, block, assessment, signals, summary) {
2382
+ if (!this.notificationService)
2383
+ return;
2384
+ const body = assessment
2385
+ ? `Post-release monitoring flagged a regression after this PR shipped. On-call recommends ` +
2386
+ `**${assessment.recommendation}** (culprit confidence ${pct(assessment.culpritConfidence)}). ` +
2387
+ `${assessment.rationale}`
2388
+ : `Post-release monitoring flagged a regression after this PR shipped. ${summary} ` +
2389
+ `Investigate before deciding whether to revert.`;
2390
+ await this.notificationService.raise(workspaceId, {
2391
+ type: 'release_regression',
2392
+ blockId: block.id,
2393
+ executionId: instance.id,
2394
+ title: `Release regression for "${block.title}"`,
2395
+ body,
2396
+ payload: {
2397
+ ...(assessment ? { onCallAssessment: assessment } : {}),
2398
+ ...(signals.length ? { releaseSignals: signals } : {}),
2399
+ ...(block.pullRequest?.url ? { prUrl: block.pullRequest.url } : {}),
2400
+ pipelineName: instance.pipelineName,
2401
+ },
2402
+ });
2403
+ }
2404
+ /**
2405
+ * Best-effort: annotate an incident PagerDuty / incident.io already opened (from the
2406
+ * same monitors/SLOs) with the on-call investigation. NOT alerting — those systems
2407
+ * already paged. A no-op when no provider is wired or no matching incident exists.
2408
+ */
2409
+ async enrichIncident(workspaceId, block, assessment, signals, since) {
2410
+ if (!this.incidentEnrichment)
2411
+ return;
2412
+ const update = {
2413
+ title: `Regression suspected from "${block.title}"`,
2414
+ body: assessment
2415
+ ? `${assessment.rationale} (recommendation: ${assessment.recommendation}, culprit confidence ${pct(assessment.culpritConfidence)})`
2416
+ : 'cat-factory on-call investigated a post-release regression suspected from this change.',
2417
+ ...(block.pullRequest?.url ? { prUrl: block.pullRequest.url } : {}),
2418
+ };
2419
+ try {
2420
+ await this.incidentEnrichment.enrich({ workspaceId, signalIds: signals.map((s) => s.id), since }, update);
2421
+ }
2422
+ catch {
2423
+ // best-effort: a failing enrichment must not block the run or the notification
2424
+ }
2425
+ }
2426
+ /** Raise a `pipeline_complete` notification for a no-merger run awaiting confirmation. */
2427
+ async raisePipelineComplete(workspaceId, instance, block) {
2428
+ if (!this.notificationService)
2429
+ return;
2430
+ await this.notificationService.raise(workspaceId, {
2431
+ type: 'pipeline_complete',
2432
+ blockId: block.id,
2433
+ executionId: instance.id,
2434
+ title: `Confirm "${block.title}" is complete`,
2435
+ body: `The "${instance.pipelineName}" pipeline finished and opened a PR, but it has no ` +
2436
+ `merger step. Review the work and confirm it as complete (this merges the PR).`,
2437
+ payload: {
2438
+ ...(block.pullRequest?.url ? { prUrl: block.pullRequest.url } : {}),
2439
+ pipelineName: instance.pipelineName,
2440
+ },
2441
+ });
2442
+ }
2443
+ /**
2444
+ * Implementing a task assigned to a module materialises that module: create it
2445
+ * in the service if missing, then move the task inside it.
2446
+ */
2447
+ async applyModuleAssignment(workspaceId, taskId) {
2448
+ const task = await this.blockRepository.get(workspaceId, taskId);
2449
+ if (!task || !task.moduleName)
2450
+ return;
2451
+ const blocks = await this.blockRepository.listByWorkspace(workspaceId);
2452
+ const service = serviceOf(blocks, task);
2453
+ if (!service)
2454
+ return;
2455
+ let module = blocks.find((b) => b.parentId === service.id && b.level === 'module' && b.title === task.moduleName);
2456
+ if (!module) {
2457
+ module = await this.board.addModule(workspaceId, service.id, {
2458
+ name: task.moduleName,
2459
+ });
2460
+ }
2461
+ if (module.id !== task.parentId) {
2462
+ const n = blocks.filter((b) => b.parentId === module.id && b.level === 'task').length;
2463
+ await this.board.reparent(workspaceId, taskId, {
2464
+ parentId: module.id,
2465
+ position: { x: 16 + (n % 2) * 190, y: 40 + Math.floor(n / 2) * 130 },
2466
+ });
2467
+ }
2468
+ // A module node appeared and/or a task changed parent — the per-block event
2469
+ // can't express that hierarchy change, so signal a coarse board refresh. Name the moved
2470
+ // task so the refresh fans out to every board mounting its shared service.
2471
+ await this.events.boardChanged(workspaceId, 'module', taskId);
2472
+ }
2473
+ /** Resolve a pending decision; the run's next step lets the agent finish it. */
2474
+ async resolveDecision(workspaceId, executionId, decisionId, choice) {
2475
+ await this.requireWorkspace(workspaceId);
2476
+ const instance = assertFound(await this.executionRepository.get(workspaceId, executionId), 'Execution', executionId);
2477
+ const step = instance.steps.find((s) => s.decision?.id === decisionId);
2478
+ if (!step || !step.decision)
2479
+ throw new NotFoundError('Decision', decisionId);
2480
+ step.decision.chosen = choice;
2481
+ this.startStep(step);
2482
+ if (instance.status === 'blocked')
2483
+ instance.status = 'running';
2484
+ await this.updateBlockProgress(workspaceId, instance, 'in_progress');
2485
+ await this.executionRepository.upsert(workspaceId, instance);
2486
+ // Wake the parked durable run, if any. The DB write above remains the source
2487
+ // of truth (so the backstop sweeper can still re-drive it); the signal is an
2488
+ // optimisation that lets the workflow continue immediately.
2489
+ await this.workRunner.signalDecision(workspaceId, instance.id, decisionId, choice);
2490
+ await this.emitInstance(workspaceId, instance);
2491
+ return instance;
2492
+ }
2493
+ /**
2494
+ * Approve a step's gated proposal: the run advances to the next step, carrying
2495
+ * the (optionally human-edited) proposal forward as context. Mirrors
2496
+ * {@link resolveDecision}'s durable-wake but *advances* the pipeline instead of
2497
+ * re-running the step (the step is already done). Idempotent — re-approving an
2498
+ * already-approved gate is a no-op.
2499
+ */
2500
+ async approveStep(workspaceId, executionId, approvalId, opts = {}) {
2501
+ await this.requireWorkspace(workspaceId);
2502
+ const instance = assertFound(await this.executionRepository.get(workspaceId, executionId), 'Execution', executionId);
2503
+ const stepIndex = instance.steps.findIndex((s) => s.approval?.id === approvalId);
2504
+ const step = instance.steps[stepIndex];
2505
+ if (!step || !step.approval)
2506
+ throw new NotFoundError('Approval', approvalId);
2507
+ this.assertNotIterativeGate(step);
2508
+ if (step.approval.status === 'approved')
2509
+ return instance;
2510
+ // A human edit to the proposal replaces the agent's text, so the revised
2511
+ // proposal is what downstream steps read (via priorOutputs).
2512
+ if (opts.proposal !== undefined) {
2513
+ step.output = opts.proposal;
2514
+ step.approval.proposal = opts.proposal;
2515
+ }
2516
+ step.approval.status = 'approved';
2517
+ // A gate is never raised on the final step, but the shared advance stays defensive.
2518
+ await this.advancePastResolvedGate(workspaceId, instance, stepIndex);
2519
+ return instance;
2520
+ }
2521
+ /**
2522
+ * Request changes on a step's gated proposal: the same step re-runs with the
2523
+ * human's freeform feedback and/or per-block comments (and its prior proposal)
2524
+ * folded into the agent's context (see {@link AgentContextBuilder}). The run is left
2525
+ * `running` on the same step; on the re-run's completion the gate is raised
2526
+ * afresh. At least one of `feedback`/`comments` is expected (the controller
2527
+ * validates this), but an empty review is harmless — the agent simply re-runs.
2528
+ */
2529
+ async requestStepChanges(workspaceId, executionId, approvalId, review) {
2530
+ await this.requireWorkspace(workspaceId);
2531
+ const instance = assertFound(await this.executionRepository.get(workspaceId, executionId), 'Execution', executionId);
2532
+ const step = instance.steps.find((s) => s.approval?.id === approvalId);
2533
+ if (!step || !step.approval)
2534
+ throw new NotFoundError('Approval', approvalId);
2535
+ this.assertNotIterativeGate(step);
2536
+ if (step.approval.status === 'approved') {
2537
+ throw new ConflictError(`Approval '${approvalId}' is already approved`);
2538
+ }
2539
+ if (step.approval.status === 'rejected') {
2540
+ throw new ConflictError(`Approval '${approvalId}' was rejected`);
2541
+ }
2542
+ // A re-run is already in flight (and will raise a fresh gate on completion);
2543
+ // acting on this now-stale gate id would dispatch duplicate work.
2544
+ if (step.approval.status === 'changes_requested') {
2545
+ throw new ConflictError(`Approval '${approvalId}' is already being re-run`);
2546
+ }
2547
+ const stepIndex = instance.steps.findIndex((s) => s.approval?.id === approvalId);
2548
+ step.approval.status = 'changes_requested';
2549
+ step.approval.feedback = review.feedback;
2550
+ step.approval.comments = review.comments?.length ? review.comments : undefined;
2551
+ // A companion's gate reviews the PRODUCER's output, not the companion's own work:
2552
+ // requesting changes here must re-run the producer (with the human's feedback
2553
+ // folded in) and re-grade, NOT re-run the companion. Redirect the rework to the
2554
+ // nearest preceding step of one of the companion's target kinds.
2555
+ if (isCompanionKind(step.agentKind)) {
2556
+ const targets = companionTargets(step.agentKind);
2557
+ let producerIndex = -1;
2558
+ for (let i = stepIndex - 1; i >= 0; i--) {
2559
+ if (targets.includes(instance.steps[i].agentKind)) {
2560
+ producerIndex = i;
2561
+ break;
2562
+ }
2563
+ }
2564
+ const producer = producerIndex >= 0 ? instance.steps[producerIndex] : undefined;
2565
+ if (producer) {
2566
+ // Re-run the producer (with the human's feedback) and every step up to and
2567
+ // including the companion, then the companion re-grades. Does NOT touch the
2568
+ // companion's automatic-rework budget — a human-driven iteration is unbounded.
2569
+ const previousProposal = producer.output ?? step.approval.proposal;
2570
+ this.rerunProducerThrough(instance, producerIndex, stepIndex, {
2571
+ previousProposal,
2572
+ feedback: review.feedback ?? '',
2573
+ ...(review.comments?.length ? { comments: review.comments } : {}),
2574
+ });
2575
+ if (instance.status === 'blocked')
2576
+ instance.status = 'running';
2577
+ await this.executionRepository.upsert(workspaceId, instance);
2578
+ await this.workRunner.signalDecision(workspaceId, instance.id, approvalId, 'changes_requested');
2579
+ await this.emitInstance(workspaceId, instance);
2580
+ return instance;
2581
+ }
2582
+ }
2583
+ // Drop the live job handle so the re-run dispatches fresh work rather than
2584
+ // re-attaching to the finished job (async steps); inline steps ignore this.
2585
+ step.jobId = undefined;
2586
+ // A requested re-run is a fresh execution: clear the prior timing so the next
2587
+ // start/finish times this attempt rather than spanning the human gate wait.
2588
+ step.startedAt = null;
2589
+ step.finishedAt = null;
2590
+ this.startStep(step);
2591
+ if (instance.status === 'blocked')
2592
+ instance.status = 'running';
2593
+ await this.executionRepository.upsert(workspaceId, instance);
2594
+ await this.workRunner.signalDecision(workspaceId, instance.id, approvalId, 'changes_requested');
2595
+ await this.emitInstance(workspaceId, instance);
2596
+ return instance;
2597
+ }
2598
+ /**
2599
+ * Reject a step's gated proposal: the run stops entirely. The gate is marked
2600
+ * `rejected` and the run is failed with a dedicated `rejected` failure kind, so
2601
+ * the board surfaces it via the shared failure banner (block → `blocked`) with a
2602
+ * Retry affordance. The parked durable run is woken so it observes the now-terminal
2603
+ * status and stops (the workflow's advance loop no-ops on a non-running run).
2604
+ * Idempotent — rejecting an already-terminal gate is a no-op.
2605
+ */
2606
+ async rejectStep(workspaceId, executionId, approvalId, reason) {
2607
+ await this.requireWorkspace(workspaceId);
2608
+ const instance = assertFound(await this.executionRepository.get(workspaceId, executionId), 'Execution', executionId);
2609
+ const step = instance.steps.find((s) => s.approval?.id === approvalId);
2610
+ if (!step || !step.approval)
2611
+ throw new NotFoundError('Approval', approvalId);
2612
+ this.assertNotIterativeGate(step);
2613
+ if (step.approval.status === 'approved') {
2614
+ throw new ConflictError(`Approval '${approvalId}' is already approved`);
2615
+ }
2616
+ // A re-run is in flight; this gate id is stale (a fresh one is raised on its
2617
+ // completion). Reject the current gate via that fresh id, not this one.
2618
+ if (step.approval.status === 'changes_requested') {
2619
+ throw new ConflictError(`Approval '${approvalId}' is being re-run`);
2620
+ }
2621
+ // Already rejected (and the run already failed): return as-is.
2622
+ if (step.approval.status === 'rejected') {
2623
+ return (await this.executionRepository.get(workspaceId, executionId)) ?? instance;
2624
+ }
2625
+ step.approval.status = 'rejected';
2626
+ if (reason)
2627
+ step.approval.feedback = reason;
2628
+ await this.executionRepository.upsert(workspaceId, instance);
2629
+ const message = reason
2630
+ ? `A reviewer rejected the proposal: ${reason}`
2631
+ : 'A reviewer rejected the proposal, stopping the run.';
2632
+ // failRun persists the terminal failure + flips the block to `blocked` and emits.
2633
+ await this.failRun(workspaceId, executionId, message, 'rejected');
2634
+ // Wake the parked durable run; it re-reads the now-terminal status and stops.
2635
+ await this.workRunner.signalDecision(workspaceId, instance.id, approvalId, 'rejected');
2636
+ return assertFound(await this.executionRepository.get(workspaceId, executionId), 'Execution', executionId);
2637
+ }
2638
+ /** Merge an open PR: a block moves from `pr_ready` to `done`. */
2639
+ async mergePr(workspaceId, blockId) {
2640
+ await this.requireWorkspace(workspaceId);
2641
+ const block = await this.requireBlock(workspaceId, blockId);
2642
+ if (block.status !== 'pr_ready') {
2643
+ throw new ConflictError(`Block '${blockId}' has no PR awaiting merge`);
2644
+ }
2645
+ await this.finalizeMerge(workspaceId, blockId);
2646
+ return this.requireBlock(workspaceId, blockId);
2647
+ }
2648
+ /**
2649
+ * Record a terminal agent failure: persist a structured {@link AgentFailure},
2650
+ * flip the run to `failed`, and mark the block `blocked` (needs attention) — NOT
2651
+ * `pr_ready`, which looked like success and hid the failure. The board then
2652
+ * renders the same failure banner + retry as a failed bootstrap. Called by the
2653
+ * durable driver once a step has exhausted its retries (or a job/decision
2654
+ * faulted); `kind` classifies the cause so the right hint is shown.
2655
+ */
2656
+ async failRun(workspaceId, executionId, message, kind = 'agent', detail = null) {
2657
+ const instance = await this.executionRepository.get(workspaceId, executionId);
2658
+ if (!instance)
2659
+ return;
2660
+ // Reclaim the per-run container on the failure path too: a failed run otherwise
2661
+ // leaves its container to idle out sleepAfter. This is the single funnel for
2662
+ // every failure kind (job_failed from the driver, the spend/decision timeouts,
2663
+ // and the user-facing stopRun, which already reclaimed — the call is idempotent).
2664
+ await this.stopRunContainer(workspaceId, instance);
2665
+ // The FIRST recorded failure wins: a run already in a terminal `failed` state keeps
2666
+ // its existing (richest) failure rather than being overwritten. An inline gate that
2667
+ // knows the precise kind/detail returns a `job_failed` result the driver funnels here,
2668
+ // so there should only ever be one write — but this guards against a future path that
2669
+ // both records a failure and returns `job_failed`, which would otherwise clobber the
2670
+ // good record with a generic one (the companion-rejected regression).
2671
+ if (instance.status === 'failed')
2672
+ return;
2673
+ const failure = {
2674
+ kind,
2675
+ message,
2676
+ detail,
2677
+ hint: EXECUTION_FAILURE_HINTS[kind] ?? null,
2678
+ occurredAt: this.clock.now(),
2679
+ lastSubtasks: instance.steps[instance.currentStep]?.subtasks ?? null,
2680
+ };
2681
+ await this.executionRepository.markFailed(workspaceId, executionId, failure);
2682
+ // Progress reflects how far the pipeline got before failing.
2683
+ const done = instance.steps.filter((s) => s.state === 'done').length;
2684
+ const progress = instance.steps.length > 0 ? done / instance.steps.length : 0;
2685
+ await this.blockRepository.update(workspaceId, instance.blockId, {
2686
+ status: 'blocked',
2687
+ progress,
2688
+ });
2689
+ const failed = await this.executionRepository.get(workspaceId, executionId);
2690
+ if (failed)
2691
+ await this.emitInstance(workspaceId, failed);
2692
+ }
2693
+ /**
2694
+ * Retry a failed run: re-drive the same pipeline on the same block, **resuming
2695
+ * from the step that actually failed** rather than restarting from step 0. The
2696
+ * steps that already completed are preserved (so a `coder` failure in `pl_full`
2697
+ * doesn't re-run the human-gated `requirements`/`architect` steps before it);
2698
+ * the failed step and everything after it are reset to a clean, re-runnable
2699
+ * state. Only a `failed` run can be retried.
2700
+ *
2701
+ * A fresh instance id is minted because the durable runner addresses one
2702
+ * Workflows instance per execution id and the failed one is terminal — the new
2703
+ * instance simply starts with `currentStep` pointed at the failed step, so the
2704
+ * driver advances forward from there and never re-issues the completed steps'
2705
+ * work. Mirrors {@link BootstrapService.retry}; both are reached via the unified
2706
+ * `POST /agent-runs/:id/retry` endpoint.
2707
+ */
2708
+ async retry(workspaceId, executionId,
2709
+ /** The retrying user (their personal subscription is used for individual-usage
2710
+ * models). Falls back to the original initiator when omitted. */
2711
+ initiatedBy,
2712
+ /** Mint the per-run personal-credential activation (see {@link start}). */
2713
+ activate) {
2714
+ await this.requireWorkspace(workspaceId);
2715
+ const previous = assertFound(await this.executionRepository.get(workspaceId, executionId), 'Execution', executionId);
2716
+ if (previous.status !== 'failed') {
2717
+ throw new ConflictError(`Only a failed run can be retried (run is '${previous.status}').`);
2718
+ }
2719
+ await this.requireBlock(workspaceId, previous.blockId);
2720
+ const { steps, currentStep } = planResumedSteps(previous);
2721
+ // Mint the activation before replacing the failed run, so a bad password aborts
2722
+ // the retry without losing the retryable terminal run.
2723
+ const newId = this.idGenerator.next('exec');
2724
+ await activate?.(newId);
2725
+ // Replace the terminal failed run for this block with the resumed one (single
2726
+ // run per block, matching the board's by-block projection).
2727
+ await this.executionRepository.deleteByBlock(workspaceId, previous.blockId);
2728
+ const instance = {
2729
+ id: newId,
2730
+ blockId: previous.blockId,
2731
+ pipelineId: previous.pipelineId,
2732
+ pipelineName: previous.pipelineName,
2733
+ steps,
2734
+ currentStep,
2735
+ status: 'running',
2736
+ initiatedBy: initiatedBy ?? previous.initiatedBy ?? null,
2737
+ };
2738
+ await this.executionRepository.upsert(workspaceId, instance);
2739
+ const done = steps.filter((s) => s.state === 'done').length;
2740
+ await this.blockRepository.update(workspaceId, previous.blockId, {
2741
+ status: 'in_progress',
2742
+ progress: steps.length > 0 ? done / steps.length : 0,
2743
+ executionId: instance.id,
2744
+ });
2745
+ await this.workRunner.startRun(workspaceId, instance.id);
2746
+ await this.emitInstance(workspaceId, instance);
2747
+ return instance;
2748
+ }
2749
+ /**
2750
+ * Restart a run from a human-chosen step: re-run from `fromStepIndex` onward,
2751
+ * regardless of how far the run had progressed (a `done`, `failed`, `blocked`,
2752
+ * `paused` or still-`running` run are all valid sources). Unlike {@link retry}
2753
+ * (which resumes at the first FAILURE) this rewinds to an arbitrary step the user
2754
+ * picked — so it can re-run steps that already completed.
2755
+ *
2756
+ * What is preserved vs reset:
2757
+ * - Steps BEFORE `fromStepIndex` keep their `output`/approval/timing untouched, so
2758
+ * the engine still hands the restarted step its predecessors' work as
2759
+ * `priorOutputs` (and their resolved `decisions`) — a useful handoff.
2760
+ * - The chosen step and every later one are reset to a clean, re-runnable state,
2761
+ * dropping each step's iteration counters (companion attempts, gate/test attempts,
2762
+ * eviction recoveries) so the restart starts those loops from zero.
2763
+ * - A block's incorporated requirements are NOT touched: they live on the
2764
+ * requirement-review record, so a restarted spec-writer/coder still receives the
2765
+ * incorporated document (or the base description when none was generated). When the
2766
+ * chosen step is the `requirements-review` gate ITSELF, re-running it mints a fresh
2767
+ * iteration-1 review (the reviewer's `review()` replaces the prior one), which is
2768
+ * exactly the "reset the iterations counter from this step" semantics.
2769
+ *
2770
+ * Like {@link retry} a fresh instance id is minted (the durable runner addresses one
2771
+ * driver per execution id). Any still-live driver/container for the run being
2772
+ * replaced is torn down first, so restarting a RUNNING run never orphans a container
2773
+ * or a parked Workflows instance.
2774
+ */
2775
+ async restartFromStep(workspaceId, executionId, fromStepIndex,
2776
+ /** The restarting user (their personal subscription is used for individual-usage
2777
+ * models). Falls back to the original initiator when omitted. */
2778
+ initiatedBy,
2779
+ /** Mint the per-run personal-credential activation (see {@link start}). */
2780
+ activate) {
2781
+ await this.requireWorkspace(workspaceId);
2782
+ const previous = assertFound(await this.executionRepository.get(workspaceId, executionId), 'Execution', executionId);
2783
+ await this.requireBlock(workspaceId, previous.blockId);
2784
+ if (!Number.isInteger(fromStepIndex) ||
2785
+ fromStepIndex < 0 ||
2786
+ fromStepIndex >= previous.steps.length) {
2787
+ throw new ValidationError(`Step ${fromStepIndex} is out of range for this run (it has ${previous.steps.length} step(s)).`);
2788
+ }
2789
+ // Tear down whatever was driving the run we're about to replace — its per-run
2790
+ // container AND its durable driver — before minting the restart. A `done`/`failed`
2791
+ // run is already terminal (a no-op teardown), but a still-`running` run would
2792
+ // otherwise leak a container and a live Workflows/pg-boss driver.
2793
+ await this.stopRunContainer(workspaceId, previous);
2794
+ await this.workRunner.cancelRun(workspaceId, executionId);
2795
+ const { steps, currentStep } = planRestartFromStep(previous, fromStepIndex);
2796
+ // Mint the activation before replacing the prior run, so a bad password aborts the
2797
+ // restart without losing the source run.
2798
+ const newId = this.idGenerator.next('exec');
2799
+ await activate?.(newId);
2800
+ await this.executionRepository.deleteByBlock(workspaceId, previous.blockId);
2801
+ const instance = {
2802
+ id: newId,
2803
+ blockId: previous.blockId,
2804
+ pipelineId: previous.pipelineId,
2805
+ pipelineName: previous.pipelineName,
2806
+ steps,
2807
+ currentStep,
2808
+ status: 'running',
2809
+ initiatedBy: initiatedBy ?? previous.initiatedBy ?? null,
2810
+ };
2811
+ await this.executionRepository.upsert(workspaceId, instance);
2812
+ const done = steps.filter((s) => s.state === 'done').length;
2813
+ await this.blockRepository.update(workspaceId, previous.blockId, {
2814
+ status: 'in_progress',
2815
+ progress: steps.length > 0 ? done / steps.length : 0,
2816
+ executionId: instance.id,
2817
+ });
2818
+ await this.workRunner.startRun(workspaceId, instance.id);
2819
+ await this.emitInstance(workspaceId, instance);
2820
+ return instance;
2821
+ }
2822
+ /**
2823
+ * Resume every run paused by the spend safeguard in this workspace. Flips them
2824
+ * back to `running` and re-drives the durable runner. If the budget is still
2825
+ * exhausted the spend gate will simply pause them again on their next step.
2826
+ */
2827
+ async resumePaused(workspaceId) {
2828
+ await this.requireWorkspace(workspaceId);
2829
+ const instances = await this.executionRepository.listByWorkspace(workspaceId);
2830
+ const paused = instances.filter((e) => e.status === 'paused');
2831
+ for (const instance of paused) {
2832
+ instance.status = 'running';
2833
+ await this.executionRepository.upsert(workspaceId, instance);
2834
+ await this.workRunner.startRun(workspaceId, instance.id);
2835
+ await this.emitInstance(workspaceId, instance);
2836
+ }
2837
+ return this.executionRepository.listByWorkspace(workspaceId);
2838
+ }
2839
+ /** Cancel the run on a block, returning it to `planned`. */
2840
+ async cancel(workspaceId, blockId) {
2841
+ await this.requireWorkspace(workspaceId);
2842
+ await this.requireBlock(workspaceId, blockId);
2843
+ // Tear down the durable run (if any) AND its per-run container before removing
2844
+ // the record, so a cancel never leaves a container running until its watchdog.
2845
+ const existing = await this.executionRepository.getByBlock(workspaceId, blockId);
2846
+ if (existing) {
2847
+ await this.stopRunContainer(workspaceId, existing);
2848
+ await this.workRunner.cancelRun(workspaceId, existing.id);
2849
+ }
2850
+ await this.executionRepository.deleteByBlock(workspaceId, blockId);
2851
+ await this.blockRepository.update(workspaceId, blockId, {
2852
+ status: 'planned',
2853
+ progress: 0,
2854
+ executionId: null,
2855
+ });
2856
+ // The run record is gone and the block is back to planned; the client can't
2857
+ // reconstruct that from a per-instance event, so signal a coarse refresh. Name the block
2858
+ // so the refresh fans out to every board mounting its shared service.
2859
+ await this.events.boardChanged(workspaceId, 'cancel', blockId);
2860
+ return this.requireBlock(workspaceId, blockId);
2861
+ }
2862
+ /**
2863
+ * Explicitly stop a *running* run by id (the unified `POST /agent-runs/:id/stop`
2864
+ * surface): kill its per-run container, tear down the durable driver, then record
2865
+ * a terminal `cancelled` failure so the board shows the run stopped (with retry)
2866
+ * rather than spinning forever. Idempotent — a run already terminal is returned
2867
+ * as-is. `opts.reason`/`opts.kind` let the orphan sweep reuse this with its own
2868
+ * wording instead of the user-facing default.
2869
+ */
2870
+ async stopRun(workspaceId, executionId, opts = {}) {
2871
+ await this.requireWorkspace(workspaceId);
2872
+ const instance = assertFound(await this.executionRepository.get(workspaceId, executionId), 'Execution', executionId);
2873
+ if (instance.status === 'failed' || instance.status === 'done')
2874
+ return instance;
2875
+ await this.stopRunContainer(workspaceId, instance);
2876
+ await this.workRunner.cancelRun(workspaceId, executionId);
2877
+ await this.failRun(workspaceId, executionId, opts.reason ?? 'Stopped by the user.', opts.kind ?? 'cancelled');
2878
+ return assertFound(await this.executionRepository.get(workspaceId, executionId), 'Execution', executionId);
2879
+ }
2880
+ /**
2881
+ * Tear down every run under a block subtree — kill each container, terminate each
2882
+ * durable driver, and delete the run record — so deleting a service/module never
2883
+ * orphans a container or a Workflows instance. Best-effort and silent: the board
2884
+ * delete that follows emits the coarse refresh, so no per-run event is needed.
2885
+ */
2886
+ async teardownForBlockTree(workspaceId, rootId) {
2887
+ const blocks = await this.blockRepository.listByWorkspace(workspaceId);
2888
+ for (const blockId of descendantIds(blocks, rootId)) {
2889
+ const run = await this.executionRepository.getByBlock(workspaceId, blockId);
2890
+ if (!run)
2891
+ continue;
2892
+ await this.stopRunContainer(workspaceId, run);
2893
+ await this.workRunner.cancelRun(workspaceId, run.id);
2894
+ await this.executionRepository.deleteByBlock(workspaceId, blockId);
2895
+ }
2896
+ }
2897
+ /**
2898
+ * Best-effort: reclaim the per-run container backing an execution. The container is
2899
+ * addressed by the run (execution) id, so a backend that shares one across the run
2900
+ * (Cloudflare, local Docker) tears the whole thing down. A per-job backend (a
2901
+ * self-hosted pool) has no run container, so it cancels the run's IN-FLIGHT step job
2902
+ * instead — hence we pass the current step's job id alongside the run id. A no-op for
2903
+ * inline executors (no `stopJob`) and for an already-gone container/job; never
2904
+ * throws, so it can't derail the teardown that calls it.
2905
+ */
2906
+ async stopRunContainer(workspaceId, instance) {
2907
+ const executor = this.agentExecutor;
2908
+ if (!isAsyncAgentExecutor(executor) || !executor.stopJob)
2909
+ return;
2910
+ // The in-flight step's job id (when a job is parked), so a per-job backend can
2911
+ // cancel exactly it; the run-container backends ignore it and use the run id.
2912
+ const jobId = instance.steps[instance.currentStep]?.jobId ?? instance.id;
2913
+ try {
2914
+ await executor.stopJob({ jobId, runId: instance.id, workspaceId });
2915
+ }
2916
+ catch {
2917
+ // The container may already be gone (eviction/completion) — nothing to reclaim.
2918
+ }
2919
+ }
2920
+ }
2921
+ //# sourceMappingURL=ExecutionService.js.map