@cat-factory/orchestration 0.30.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/dist/container.d.ts +6 -1
  2. package/dist/container.d.ts.map +1 -1
  3. package/dist/container.js.map +1 -1
  4. package/dist/index.d.ts +2 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +2 -1
  7. package/dist/index.js.map +1 -1
  8. package/dist/modules/artifacts/artifactRetention.d.ts +17 -0
  9. package/dist/modules/artifacts/artifactRetention.d.ts.map +1 -0
  10. package/dist/modules/artifacts/artifactRetention.js +25 -0
  11. package/dist/modules/artifacts/artifactRetention.js.map +1 -0
  12. package/dist/modules/execution/ExecutionService.d.ts +16 -2
  13. package/dist/modules/execution/ExecutionService.d.ts.map +1 -1
  14. package/dist/modules/execution/ExecutionService.js +69 -7
  15. package/dist/modules/execution/ExecutionService.js.map +1 -1
  16. package/dist/modules/execution/VisualConfirmationController.d.ts +102 -0
  17. package/dist/modules/execution/VisualConfirmationController.d.ts.map +1 -0
  18. package/dist/modules/execution/VisualConfirmationController.js +352 -0
  19. package/dist/modules/execution/VisualConfirmationController.js.map +1 -0
  20. package/dist/modules/execution/ci.logic.d.ts +28 -5
  21. package/dist/modules/execution/ci.logic.d.ts.map +1 -1
  22. package/dist/modules/execution/ci.logic.js +30 -5
  23. package/dist/modules/execution/ci.logic.js.map +1 -1
  24. package/dist/modules/observability/observability.logic.d.ts +6 -0
  25. package/dist/modules/observability/observability.logic.d.ts.map +1 -1
  26. package/dist/modules/observability/observability.logic.js +15 -0
  27. package/dist/modules/observability/observability.logic.js.map +1 -1
  28. package/dist/modules/settings/WorkspaceSettingsService.d.ts.map +1 -1
  29. package/dist/modules/settings/WorkspaceSettingsService.js +1 -0
  30. package/dist/modules/settings/WorkspaceSettingsService.js.map +1 -1
  31. package/package.json +9 -9
@@ -7,7 +7,7 @@ import { reviewableArtifactOutput } from './artifact-review.logic.js';
7
7
  import { resolveIndividualVendors, } from './individualVendors.logic.js';
8
8
  import { assertFound, ConflictError, getErrorMessage, isModelUsable, NotFoundError, parseLocalModelId, resolveModelRef, sameSubtasks, subscriptionOptionFor, ValidationError, } from '@cat-factory/kernel';
9
9
  import { DEFAULT_MERGE_PRESET } from '@cat-factory/kernel';
10
- import { CONFLICTS_AGENT_KIND, MERGER_AGENT_KIND, REQUIREMENTS_REVIEW_AGENT_KIND, CLARITY_REVIEW_AGENT_KIND, REQUIREMENTS_BRAINSTORM_AGENT_KIND, ARCHITECTURE_BRAINSTORM_AGENT_KIND, BUG_INVESTIGATOR_AGENT_KIND, TRACKER_AGENT_KIND, ANALYSIS_AGENT_KIND, TESTER_AGENT_KIND, HUMAN_TEST_AGENT_KIND, HUMAN_REVIEW_AGENT_KIND, BLUEPRINTS_AGENT_KIND, SPEC_WRITER_AGENT_KIND, } from './ci.logic.js';
10
+ import { CONFLICTS_AGENT_KIND, MERGER_AGENT_KIND, REQUIREMENTS_REVIEW_AGENT_KIND, CLARITY_REVIEW_AGENT_KIND, REQUIREMENTS_BRAINSTORM_AGENT_KIND, ARCHITECTURE_BRAINSTORM_AGENT_KIND, BUG_INVESTIGATOR_AGENT_KIND, TRACKER_AGENT_KIND, ANALYSIS_AGENT_KIND, TESTER_AGENT_KIND, UI_TESTER_AGENT_KIND, isTesterKind, HUMAN_TEST_AGENT_KIND, VISUAL_CONFIRM_AGENT_KIND, HUMAN_REVIEW_AGENT_KIND, BLUEPRINTS_AGENT_KIND, SPEC_WRITER_AGENT_KIND, } from './ci.logic.js';
11
11
  import { DEFAULT_FOLLOW_UP_MAX_LOOPS, FOLLOW_UP_PRODUCER_KIND, followUpsToSendBack, hasPendingFollowUps, renderFollowUpRework, shouldLoopCoder, } from './followUp.logic.js';
12
12
  import { AgentContextBuilder } from './AgentContextBuilder.js';
13
13
  import { CompanionController } from './CompanionController.js';
@@ -16,6 +16,7 @@ import { MergeResolver } from './MergeResolver.js';
16
16
  import { ReviewGateController } from './ReviewGateController.js';
17
17
  import { TesterController } from './TesterController.js';
18
18
  import { HumanTestController } from './HumanTestController.js';
19
+ import { VisualConfirmationController } from './VisualConfirmationController.js';
19
20
  import { getProvider, recordGateAttempt, registeredGateFactories, registeredStepResolverFactories, requireProvider, } from '@cat-factory/kernel';
20
21
  import { isAsyncAgentExecutor } from '@cat-factory/kernel';
21
22
  import { isDeployStep } from '@cat-factory/integrations';
@@ -46,7 +47,12 @@ const EXECUTION_FAILURE_HINTS = {
46
47
  * the per-poll env projection so the `getByBlock` read never hits the hot path for
47
48
  * the many container steps that have no env to show (see attachEnvironmentProjection).
48
49
  */
49
- const ENV_PROJECTION_KINDS = new Set(['deployer', 'tester', 'playwright']);
50
+ const ENV_PROJECTION_KINDS = new Set([
51
+ 'deployer',
52
+ TESTER_AGENT_KIND,
53
+ UI_TESTER_AGENT_KIND,
54
+ 'playwright',
55
+ ]);
50
56
  /**
51
57
  * Parse `owner`/`repo` from a GitHub pull-request URL (`https://github.com/o/r/pull/42`).
52
58
  * Returns undefined for any URL that doesn't carry both segments. Host-agnostic on
@@ -98,6 +104,8 @@ export class ExecutionService {
98
104
  testerController;
99
105
  /** Drives the human-testing gate: provision env → park → confirm / fix / pull-main / recreate. */
100
106
  humanTestController;
107
+ /** Drives the visual-confirmation gate: gather screenshots → park → approve / fix / recapture. */
108
+ visualConfirmationController;
101
109
  /** Drives both iterative review gates (requirements + clarity); kind-parameterised. */
102
110
  reviewGate;
103
111
  /** The requirements subject for {@link reviewGate}. */
@@ -139,7 +147,7 @@ export class ExecutionService {
139
147
  * {@link stepResolverFor} and {@link StepCompletionResolver}.
140
148
  */
141
149
  stepResolverCache;
142
- constructor({ workspaceRepository, blockRepository, pipelineRepository, executionRepository, accountRepository, idGenerator, clock, agentExecutor, workRunner, executionEventPublisher, boardService, spendService, documentRepository, taskRepository, requirementReviewRepository, requirementReviewService, kaizenScheduler, clarityReviewRepository, clarityReviewService, brainstormServices, brainstormSessionRepository, fragmentResolver, environmentProvisioning, environmentTeardown, branchUpdater, blueprintReconciler, notificationService, workspaceSettingsService, llmObservability, pullRequestMerger, mergePresetRepository, ticketTrackerProvider, issueWriteback, subscriptionActivationRepository, resolveWorkspaceModelDefault, resolveProviderCapabilities, localTestInfraSupported, resolveRunRepoContext, resolveTesterFallbackDefault, resolveRequireEnvironmentProvider, assertAgentBackendConfigured, runInitiatorScope, }) {
150
+ constructor({ workspaceRepository, blockRepository, pipelineRepository, executionRepository, accountRepository, idGenerator, clock, agentExecutor, workRunner, executionEventPublisher, boardService, spendService, documentRepository, taskRepository, requirementReviewRepository, requirementReviewService, kaizenScheduler, clarityReviewRepository, clarityReviewService, brainstormServices, brainstormSessionRepository, fragmentResolver, environmentProvisioning, environmentTeardown, branchUpdater, blueprintReconciler, notificationService, binaryArtifactStore, workspaceSettingsService, llmObservability, pullRequestMerger, mergePresetRepository, ticketTrackerProvider, issueWriteback, subscriptionActivationRepository, resolveWorkspaceModelDefault, resolveProviderCapabilities, localTestInfraSupported, resolveRunRepoContext, resolveTesterFallbackDefault, resolveRequireEnvironmentProvider, assertAgentBackendConfigured, runInitiatorScope, }) {
143
151
  this.runInitiatorScope = runInitiatorScope ?? ((_initiatedBy, fn) => fn());
144
152
  this.workspaceRepository = workspaceRepository;
145
153
  this.blockRepository = blockRepository;
@@ -249,6 +257,25 @@ export class ExecutionService {
249
257
  emitInstance: (ws, i) => this.emitInstance(ws, i),
250
258
  clockNow: () => this.clock.now(),
251
259
  });
260
+ this.visualConfirmationController = new VisualConfirmationController({
261
+ blockRepository,
262
+ executionRepository,
263
+ workRunner,
264
+ agentExecutor,
265
+ contextBuilder: this.contextBuilder,
266
+ notificationService,
267
+ ...(binaryArtifactStore ? { binaryArtifactStore } : {}),
268
+ resolveMergePreset: (ws, block) => this.resolveMergePreset(ws, block),
269
+ parkStepOnDecision: (ws, i, s, p) => this.parkStepOnDecision(ws, i, s, p),
270
+ finishStep: (s) => this.finishStep(s),
271
+ startStep: (s) => this.startStep(s),
272
+ updateBlockProgress: (ws, i, st) => this.updateBlockProgress(ws, i, st),
273
+ finalizeBlock: (ws, i, c) => this.finalizeBlock(ws, i, c),
274
+ stopRunContainer: (ws, i) => this.stopRunContainer(ws, i),
275
+ persistInstance: (ws, i) => this.executionRepository.upsert(ws, i),
276
+ emitInstance: (ws, i) => this.emitInstance(ws, i),
277
+ clockNow: () => this.clock.now(),
278
+ });
252
279
  this.reviewGate = new ReviewGateController({
253
280
  blockRepository,
254
281
  executionRepository,
@@ -472,7 +499,7 @@ export class ExecutionService {
472
499
  // configured (a docker-compose path, or an explicit "no infra dependencies"
473
500
  // flag). Block the start with a clear, actionable error otherwise — before any
474
501
  // side effects (activation mint / prior-run teardown).
475
- if (pipeline.agentKinds.includes(TESTER_AGENT_KIND)) {
502
+ if (pipeline.agentKinds.some(isTesterKind)) {
476
503
  await this.assertTesterInfraConfigured(workspaceId, block);
477
504
  }
478
505
  // Block the start when the workspace delegates container agents to a runner pool that
@@ -756,7 +783,9 @@ export class ExecutionService {
756
783
  // the driver. Fall through so the gate re-evaluates and acts on it (dispatch a helper,
757
784
  // rebuild the env, or advance) instead of immediately re-parking.
758
785
  const reentrantHumanTest = step.agentKind === HUMAN_TEST_AGENT_KIND && !!step.humanTest?.pendingAction;
759
- if (!reentrantRequirements && !reentrantHumanTest) {
786
+ // The visual-confirmation gate is likewise re-entrant on a human action.
787
+ const reentrantVisualConfirm = step.agentKind === VISUAL_CONFIRM_AGENT_KIND && !!step.visualConfirm?.pendingAction;
788
+ if (!reentrantRequirements && !reentrantHumanTest && !reentrantVisualConfirm) {
760
789
  // Parked on either an agent-raised decision or a human approval gate; both
761
790
  // are addressed by the same durable event id.
762
791
  const pendingId = step.decision?.id ?? step.approval?.id;
@@ -829,6 +858,13 @@ export class ExecutionService {
829
858
  if (step.agentKind === HUMAN_TEST_AGENT_KIND) {
830
859
  return this.humanTestController.evaluate(workspaceId, instance, step, block, isFinalStep);
831
860
  }
861
+ // A `visual-confirmation` gate gathers the UI tester's screenshots + the uploaded
862
+ // reference designs and PARKS for a human to review actual-vs-reference, then on demand
863
+ // dispatches the Tester's `fixer`. Passes through (auto-advances) when no binary-artifact
864
+ // store is wired. See {@link VisualConfirmationController}.
865
+ if (step.agentKind === VISUAL_CONFIRM_AGENT_KIND) {
866
+ return this.visualConfirmationController.evaluate(workspaceId, instance, step, block, isFinalStep);
867
+ }
832
868
  // A polling gate step (`ci` / `conflicts`) runs a programmatic precheck and only
833
869
  // escalates to a helper container agent (`ci-fixer` / `conflict-resolver`) on a
834
870
  // negative verdict — no LLM of its own. Pass-through when the gate's provider is
@@ -1149,7 +1185,7 @@ export class ExecutionService {
1149
1185
  // step's own work: when it finishes (or fails) we drop the handle, return to
1150
1186
  // `testing`, and re-dispatch the Tester against the (now-fixed) branch — its
1151
1187
  // fresh report then drives greenlight-or-loop again. Mirrors the CI gate.
1152
- if (step.agentKind === TESTER_AGENT_KIND && step.test?.phase === 'fixing') {
1188
+ if (isTesterKind(step.agentKind) && step.test?.phase === 'fixing') {
1153
1189
  step.jobId = undefined;
1154
1190
  step.subtasks = undefined;
1155
1191
  step.test.phase = 'testing';
@@ -1173,6 +1209,13 @@ export class ExecutionService {
1173
1209
  state: update.state === 'failed' ? 'failed' : 'done',
1174
1210
  });
1175
1211
  }
1212
+ // A `visual-confirmation` gate in its `fixing` phase has a `fixer` job in flight: when it
1213
+ // settles, record the round, refresh the screenshot pairs, and re-park the human.
1214
+ if (step.agentKind === VISUAL_CONFIRM_AGENT_KIND && step.visualConfirm?.phase === 'fixing') {
1215
+ return this.visualConfirmationController.onHelperComplete(workspaceId, instance, step, {
1216
+ state: update.state === 'failed' ? 'failed' : 'done',
1217
+ });
1218
+ }
1176
1219
  if (update.state === 'failed') {
1177
1220
  // A container eviction (the per-run container vanished, its in-memory job is
1178
1221
  // gone) is usually transient. Recover it by dropping the dead handle and
@@ -1488,7 +1531,7 @@ export class ExecutionService {
1488
1531
  // NOT finish the step: we loop the `fixer` (within the attempt budget) and
1489
1532
  // re-test, mirroring the CI gate. A greenlight (or no provider) falls through to
1490
1533
  // the normal finish/advance below. Records the report on the step either way.
1491
- if (step.agentKind === TESTER_AGENT_KIND && result.testReport !== undefined) {
1534
+ if (isTesterKind(step.agentKind) && result.testReport !== undefined) {
1492
1535
  const looped = await this.testerController.resolveTesterResult(workspaceId, instance, step, result);
1493
1536
  if (looped)
1494
1537
  return looped;
@@ -2655,6 +2698,9 @@ export class ExecutionService {
2655
2698
  if (step.agentKind === HUMAN_TEST_AGENT_KIND) {
2656
2699
  throw new ConflictError('Resolve the human-testing gate through its window (confirm / request a fix), not the approval gate');
2657
2700
  }
2701
+ if (step.agentKind === VISUAL_CONFIRM_AGENT_KIND) {
2702
+ throw new ConflictError('Resolve the visual-confirmation gate through its window (approve / request a fix), not the approval gate');
2703
+ }
2658
2704
  if (step.companion?.exceeded) {
2659
2705
  throw new ConflictError('Resolve this companion review through its iteration-cap prompt, not the approval gate');
2660
2706
  }
@@ -3034,6 +3080,21 @@ export class ExecutionService {
3034
3080
  destroyHumanTestEnv(workspaceId, blockId) {
3035
3081
  return this.humanTestController.destroyEnvironment(workspaceId, blockId);
3036
3082
  }
3083
+ // ---- visual-confirmation gate actions (driven from the dedicated window) --
3084
+ // Each mutates the parked gate step and wakes the durable driver; see
3085
+ // {@link VisualConfirmationController}.
3086
+ /** Approve the reviewed screenshots: advance the run. */
3087
+ approveVisualConfirm(workspaceId, blockId) {
3088
+ return this.visualConfirmationController.approve(workspaceId, blockId);
3089
+ }
3090
+ /** Submit findings and request a fix: dispatch the Tester's `fixer`, then re-park. */
3091
+ requestVisualConfirmFix(workspaceId, blockId, findings) {
3092
+ return this.visualConfirmationController.requestFix(workspaceId, blockId, findings);
3093
+ }
3094
+ /** Refresh the screenshot pairs from the latest UI-tester report. */
3095
+ recaptureVisualConfirm(workspaceId, blockId) {
3096
+ return this.visualConfirmationController.recapture(workspaceId, blockId);
3097
+ }
3037
3098
  /**
3038
3099
  * Dispatch the `fixer` against the human-review gate's PR branch from a human's freeform
3039
3100
  * instructions — bypassing the precheck + grace window. Parks a `pendingFix` on the gate step,
@@ -3148,6 +3209,7 @@ export class ExecutionService {
3148
3209
  step.metrics = {
3149
3210
  calls: s.calls,
3150
3211
  promptTokens: s.promptTokens,
3212
+ cachedPromptTokens: s.cachedPromptTokens,
3151
3213
  completionTokens: s.completionTokens,
3152
3214
  peakCompletionTokens: s.peakCompletionTokens,
3153
3215
  maxOutputTokens: s.maxOutputTokens,