@cat-factory/orchestration 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,13 +7,14 @@ import { reviewableArtifactOutput } from './artifact-review.logic.js';
7
7
  import { resolveIndividualVendors, } from './individualVendors.logic.js';
8
8
  import { assertFound, ConflictError, getErrorMessage, isModelUsable, NotFoundError, sameSubtasks, ValidationError, } from '@cat-factory/kernel';
9
9
  import { DEFAULT_MERGE_PRESET } from '@cat-factory/kernel';
10
- import { aggregateCi, CI_AGENT_KIND, CI_FIXER_AGENT_KIND, CONFLICTS_AGENT_KIND, CONFLICT_RESOLVER_AGENT_KIND, describeFailingChecks, listFailingChecks, isCiGreen, MERGER_AGENT_KIND, REQUIREMENTS_REVIEW_AGENT_KIND, CLARITY_REVIEW_AGENT_KIND, BUG_INVESTIGATOR_AGENT_KIND, TRACKER_AGENT_KIND, ANALYSIS_AGENT_KIND, TESTER_AGENT_KIND, BLUEPRINTS_AGENT_KIND, SPEC_WRITER_AGENT_KIND, } from './ci.logic.js';
10
+ import { aggregateCi, CI_AGENT_KIND, CI_FIXER_AGENT_KIND, CONFLICTS_AGENT_KIND, CONFLICT_RESOLVER_AGENT_KIND, describeFailingChecks, listFailingChecks, isCiGreen, MERGER_AGENT_KIND, REQUIREMENTS_REVIEW_AGENT_KIND, CLARITY_REVIEW_AGENT_KIND, BUG_INVESTIGATOR_AGENT_KIND, TRACKER_AGENT_KIND, ANALYSIS_AGENT_KIND, TESTER_AGENT_KIND, HUMAN_TEST_AGENT_KIND, BLUEPRINTS_AGENT_KIND, SPEC_WRITER_AGENT_KIND, } from './ci.logic.js';
11
11
  import { POST_RELEASE_HEALTH_AGENT_KIND, ON_CALL_AGENT_KIND, classifyReleaseHealth, describeRegressedSignals, } from './release.logic.js';
12
12
  import { AgentContextBuilder } from './AgentContextBuilder.js';
13
13
  import { CompanionController } from './CompanionController.js';
14
14
  import { MergeResolver } from './MergeResolver.js';
15
15
  import { ReviewGateController } from './ReviewGateController.js';
16
16
  import { TesterController } from './TesterController.js';
17
+ import { HumanTestController } from './HumanTestController.js';
17
18
  import { recordGateAttempt } from './gates.js';
18
19
  import { isAsyncAgentExecutor } from '@cat-factory/kernel';
19
20
  import { isDeployStep } from '@cat-factory/integrations';
@@ -115,6 +116,8 @@ export class ExecutionService {
115
116
  requirementReviewService;
116
117
  clarityReviewService;
117
118
  environmentProvisioning;
119
+ environmentTeardown;
120
+ branchUpdater;
118
121
  /** Assembles the per-step agent context (requirements, docs, env, service frame, fragments). */
119
122
  contextBuilder;
120
123
  /** Resolves a `merger` step's assessment into an auto-merge or a `merge_review` notification. */
@@ -123,6 +126,8 @@ export class ExecutionService {
123
126
  companionController;
124
127
  /** Drives the Tester gate's fix loop: report → greenlight / dispatch fixer / fail. */
125
128
  testerController;
129
+ /** Drives the human-testing gate: provision env → park → confirm / fix / pull-main / recreate. */
130
+ humanTestController;
126
131
  /** Drives both iterative review gates (requirements + clarity); kind-parameterised. */
127
132
  reviewGate;
128
133
  /** The requirements subject for {@link reviewGate}. */
@@ -159,7 +164,7 @@ export class ExecutionService {
159
164
  * {@link stepResolverFor} and {@link StepCompletionResolver}.
160
165
  */
161
166
  stepResolverCache;
162
- constructor({ workspaceRepository, blockRepository, pipelineRepository, executionRepository, accountRepository, idGenerator, clock, agentExecutor, workRunner, executionEventPublisher, boardService, spendService, documentRepository, taskRepository, requirementReviewRepository, requirementReviewService, clarityReviewRepository, clarityReviewService, environmentProvisioning, blueprintReconciler, notificationService, workspaceSettingsService, llmObservability, ciStatusProvider, mergeabilityProvider, releaseHealthProvider, incidentEnrichment, pullRequestMerger, mergePresetRepository, ticketTrackerProvider, issueWriteback, subscriptionActivationRepository, resolveWorkspaceModelDefault, resolveProviderCapabilities, localTestInfraSupported, resolveRunRepoContext, runInitiatorScope, }) {
167
+ constructor({ workspaceRepository, blockRepository, pipelineRepository, executionRepository, accountRepository, idGenerator, clock, agentExecutor, workRunner, executionEventPublisher, boardService, spendService, documentRepository, taskRepository, requirementReviewRepository, requirementReviewService, clarityReviewRepository, clarityReviewService, environmentProvisioning, environmentTeardown, branchUpdater, blueprintReconciler, notificationService, workspaceSettingsService, llmObservability, ciStatusProvider, mergeabilityProvider, releaseHealthProvider, incidentEnrichment, pullRequestMerger, mergePresetRepository, ticketTrackerProvider, issueWriteback, subscriptionActivationRepository, resolveWorkspaceModelDefault, resolveProviderCapabilities, localTestInfraSupported, resolveRunRepoContext, runInitiatorScope, }) {
163
168
  this.runInitiatorScope = runInitiatorScope ?? ((_initiatedBy, fn) => fn());
164
169
  this.workspaceRepository = workspaceRepository;
165
170
  this.blockRepository = blockRepository;
@@ -176,6 +181,8 @@ export class ExecutionService {
176
181
  this.requirementReviewService = requirementReviewService;
177
182
  this.clarityReviewService = clarityReviewService;
178
183
  this.environmentProvisioning = environmentProvisioning;
184
+ this.environmentTeardown = environmentTeardown;
185
+ this.branchUpdater = branchUpdater;
179
186
  this.contextBuilder = new AgentContextBuilder({
180
187
  workspaceRepository,
181
188
  blockRepository,
@@ -220,6 +227,47 @@ export class ExecutionService {
220
227
  persistInstance: (ws, i) => this.executionRepository.upsert(ws, i),
221
228
  emitInstance: (ws, i) => this.emitInstance(ws, i),
222
229
  });
230
+ this.humanTestController = new HumanTestController({
231
+ blockRepository,
232
+ executionRepository,
233
+ workRunner,
234
+ agentExecutor,
235
+ contextBuilder: this.contextBuilder,
236
+ notificationService,
237
+ // Wrap the env services with the deployer's input/context derivation so the gate's
238
+ // provisioning matches a `deployer` step's. Left undefined when no provider is wired
239
+ // (the gate degrades to manual mode).
240
+ ...(environmentProvisioning
241
+ ? {
242
+ provisionEnvironment: (ws, block, executionId) => environmentProvisioning.provision({
243
+ workspaceId: ws,
244
+ blockId: block.id,
245
+ executionId,
246
+ inputs: this.deployInputs(block),
247
+ context: this.deployContext(block),
248
+ }),
249
+ refreshEnvironment: (ws, id) => environmentProvisioning.refreshStatus(ws, id),
250
+ }
251
+ : {}),
252
+ ...(environmentTeardown
253
+ ? {
254
+ teardownEnvironment: async (ws, id) => {
255
+ await environmentTeardown.teardown(ws, id);
256
+ },
257
+ }
258
+ : {}),
259
+ ...(branchUpdater ? { branchUpdater } : {}),
260
+ resolveMergePreset: (ws, block) => this.resolveMergePreset(ws, block),
261
+ parkStepOnDecision: (ws, i, s, p) => this.parkStepOnDecision(ws, i, s, p),
262
+ finishStep: (s) => this.finishStep(s),
263
+ startStep: (s) => this.startStep(s),
264
+ updateBlockProgress: (ws, i, st) => this.updateBlockProgress(ws, i, st),
265
+ finalizeBlock: (ws, i, c) => this.finalizeBlock(ws, i, c),
266
+ stopRunContainer: (ws, i) => this.stopRunContainer(ws, i),
267
+ persistInstance: (ws, i) => this.executionRepository.upsert(ws, i),
268
+ emitInstance: (ws, i) => this.emitInstance(ws, i),
269
+ clockNow: () => this.clock.now(),
270
+ });
223
271
  this.reviewGate = new ReviewGateController({
224
272
  blockRepository,
225
273
  executionRepository,
@@ -639,7 +687,12 @@ export class ExecutionService {
639
687
  const reentrantRequirements = (step.agentKind === REQUIREMENTS_REVIEW_AGENT_KIND ||
640
688
  step.agentKind === CLARITY_REVIEW_AGENT_KIND) &&
641
689
  !!step.pendingIncorporation;
642
- if (!reentrantRequirements) {
690
+ // The human-testing gate is likewise re-entrant: a human action (confirm / request a
691
+ // fix / pull main / recreate) records a `pendingAction` on the parked step and wakes
692
+ // the driver. Fall through so the gate re-evaluates and acts on it (dispatch a helper,
693
+ // rebuild the env, or advance) instead of immediately re-parking.
694
+ const reentrantHumanTest = step.agentKind === HUMAN_TEST_AGENT_KIND && !!step.humanTest?.pendingAction;
695
+ if (!reentrantRequirements && !reentrantHumanTest) {
643
696
  // Parked on either an agent-raised decision or a human approval gate; both
644
697
  // are addressed by the same durable event id.
645
698
  const pendingId = step.decision?.id ?? step.approval?.id;
@@ -694,6 +747,15 @@ export class ExecutionService {
694
747
  if (step.agentKind === CLARITY_REVIEW_AGENT_KIND) {
695
748
  return this.reviewGate.evaluate(this.clarityKind, workspaceId, instance, step, block, isFinalStep);
696
749
  }
750
+ // A `human-test` gate spins up an ephemeral environment and PARKS for a human to
751
+ // validate the change in a live URL before the run continues — NOT a container/prose
752
+ // agent and NOT a programmatic polling gate (the human is the verdict). It also drives
753
+ // the same helpers the other gates use on demand: the Tester's `fixer` (from findings)
754
+ // and the `conflict-resolver` (after a conflicting pull-main). Degrades to a manual
755
+ // (no-env) mode when no ephemeral-environment provider is wired. See {@link HumanTestController}.
756
+ if (step.agentKind === HUMAN_TEST_AGENT_KIND) {
757
+ return this.humanTestController.evaluate(workspaceId, instance, step, block, isFinalStep);
758
+ }
697
759
  // A polling gate step (`ci` / `conflicts`) runs a programmatic precheck and only
698
760
  // escalates to a helper container agent (`ci-fixer` / `conflict-resolver`) on a
699
761
  // negative verdict — no LLM of its own. Pass-through when the gate's provider is
@@ -934,6 +996,17 @@ export class ExecutionService {
934
996
  await this.stopRunContainer(workspaceId, instance);
935
997
  return this.testerController.dispatchTester(workspaceId, instance, step, block);
936
998
  }
999
+ // A `human-test` gate in its `fixing` / `resolving_conflicts` phase has a helper job
1000
+ // (fixer / conflict-resolver) in flight, NOT the step's own work: when it settles —
1001
+ // done OR failed — record the round's outcome, rebuild the environment against the
1002
+ // (now-updated) branch and re-park the human. We never fail the run here; the human is
1003
+ // in control. Mirrors the Tester→Fixer loop.
1004
+ if (step.agentKind === HUMAN_TEST_AGENT_KIND &&
1005
+ (step.humanTest?.phase === 'fixing' || step.humanTest?.phase === 'resolving_conflicts')) {
1006
+ return this.humanTestController.onHelperComplete(workspaceId, instance, step, {
1007
+ state: update.state === 'failed' ? 'failed' : 'done',
1008
+ });
1009
+ }
937
1010
  if (update.state === 'failed') {
938
1011
  // A container eviction (the per-run container vanished, its in-memory job is
939
1012
  // gone) is usually transient. Recover it by dropping the dead handle and
@@ -997,6 +1070,12 @@ export class ExecutionService {
997
1070
  return { kind: 'noop' };
998
1071
  }
999
1072
  const step = instance.steps[instance.currentStep];
1073
+ // The human-testing gate rides the same `awaiting_gate` poll loop while its ephemeral
1074
+ // environment provisions — re-poll the env status (ready → park the human; still
1075
+ // provisioning → keep polling; failed → degrade to manual mode).
1076
+ if (step?.agentKind === HUMAN_TEST_AGENT_KIND) {
1077
+ return this.humanTestController.pollEnvironment(workspaceId, instance);
1078
+ }
1000
1079
  const gate = step ? this.gateFor(step.agentKind) : undefined;
1001
1080
  if (!step || !gate)
1002
1081
  return { kind: 'continue' };
@@ -1027,6 +1106,11 @@ export class ExecutionService {
1027
1106
  return { kind: 'noop' };
1028
1107
  }
1029
1108
  const step = instance.steps[instance.currentStep];
1109
+ // The human-testing gate never times the RUN out while provisioning: instead of failing,
1110
+ // park the human in degraded mode so they can wait, recreate, or test by hand.
1111
+ if (step?.agentKind === HUMAN_TEST_AGENT_KIND) {
1112
+ return this.humanTestController.onProvisionTimeout(workspaceId, instance);
1113
+ }
1030
1114
  const gate = step ? this.gateFor(step.agentKind) : undefined;
1031
1115
  const timeoutError = 'Gate precheck did not settle within its polling budget';
1032
1116
  if (!step || !gate || gate.pollExhaustion !== 'pass') {
@@ -2178,6 +2262,9 @@ export class ExecutionService {
2178
2262
  if (step.agentKind === CLARITY_REVIEW_AGENT_KIND) {
2179
2263
  throw new ConflictError('Resolve the clarity review through its review window, not the approval gate');
2180
2264
  }
2265
+ if (step.agentKind === HUMAN_TEST_AGENT_KIND) {
2266
+ throw new ConflictError('Resolve the human-testing gate through its window (confirm / request a fix), not the approval gate');
2267
+ }
2181
2268
  if (step.companion?.exceeded) {
2182
2269
  throw new ConflictError('Resolve this companion review through its iteration-cap prompt, not the approval gate');
2183
2270
  }
@@ -2435,6 +2522,29 @@ export class ExecutionService {
2435
2522
  resolveClarityExceeded(workspaceId, blockId, choice) {
2436
2523
  return this.reviewGate.resolveExceeded(this.clarityKind, workspaceId, blockId, choice);
2437
2524
  }
2525
+ // ---- human-testing gate actions (driven from the dedicated window) -------
2526
+ // Each mutates the parked gate step and wakes the durable driver, which re-enters the gate
2527
+ // and performs the (env / helper) work; see {@link HumanTestController}.
2528
+ /** Confirm the change works: tear the ephemeral env down and advance the run. */
2529
+ confirmHumanTest(workspaceId, blockId) {
2530
+ return this.humanTestController.confirm(workspaceId, blockId);
2531
+ }
2532
+ /** Submit findings and request a fix: dispatch the Tester's `fixer`, then rebuild the env. */
2533
+ requestHumanTestFix(workspaceId, blockId, findings) {
2534
+ return this.humanTestController.requestFix(workspaceId, blockId, findings);
2535
+ }
2536
+ /** Pull the repo default branch into the PR branch + redeploy (conflict → conflict-resolver). */
2537
+ pullMainHumanTest(workspaceId, blockId) {
2538
+ return this.humanTestController.pullMain(workspaceId, blockId);
2539
+ }
2540
+ /** Rebuild the ephemeral environment on demand. */
2541
+ recreateHumanTestEnv(workspaceId, blockId) {
2542
+ return this.humanTestController.recreateEnvironment(workspaceId, blockId);
2543
+ }
2544
+ /** Destroy the ephemeral environment on demand (the run stays parked). */
2545
+ destroyHumanTestEnv(workspaceId, blockId) {
2546
+ return this.humanTestController.destroyEnvironment(workspaceId, blockId);
2547
+ }
2438
2548
  /**
2439
2549
  * Push the run's latest state to subscribed clients, alongside its rolled-up
2440
2550
  * block so the board updates without a refetch. Best-effort: the publisher