npm - @cat-factory/orchestration - Versions diffs - 0.30.0 → 0.32.0 - Mend

@cat-factory/orchestration 0.30.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/modules/execution/ExecutionService.js CHANGED Viewed

@@ -7,7 +7,7 @@ import { reviewableArtifactOutput } from './artifact-review.logic.js';
 import { resolveIndividualVendors, } from './individualVendors.logic.js';
 import { assertFound, ConflictError, getErrorMessage, isModelUsable, NotFoundError, parseLocalModelId, resolveModelRef, sameSubtasks, subscriptionOptionFor, ValidationError, } from '@cat-factory/kernel';
 import { DEFAULT_MERGE_PRESET } from '@cat-factory/kernel';
-import { CONFLICTS_AGENT_KIND, MERGER_AGENT_KIND, REQUIREMENTS_REVIEW_AGENT_KIND, CLARITY_REVIEW_AGENT_KIND, REQUIREMENTS_BRAINSTORM_AGENT_KIND, ARCHITECTURE_BRAINSTORM_AGENT_KIND, BUG_INVESTIGATOR_AGENT_KIND, TRACKER_AGENT_KIND, ANALYSIS_AGENT_KIND, TESTER_AGENT_KIND, HUMAN_TEST_AGENT_KIND, HUMAN_REVIEW_AGENT_KIND, BLUEPRINTS_AGENT_KIND, SPEC_WRITER_AGENT_KIND, } from './ci.logic.js';
+import { CONFLICTS_AGENT_KIND, MERGER_AGENT_KIND, REQUIREMENTS_REVIEW_AGENT_KIND, CLARITY_REVIEW_AGENT_KIND, REQUIREMENTS_BRAINSTORM_AGENT_KIND, ARCHITECTURE_BRAINSTORM_AGENT_KIND, BUG_INVESTIGATOR_AGENT_KIND, TRACKER_AGENT_KIND, ANALYSIS_AGENT_KIND, TESTER_AGENT_KIND, UI_TESTER_AGENT_KIND, isTesterKind, HUMAN_TEST_AGENT_KIND, VISUAL_CONFIRM_AGENT_KIND, HUMAN_REVIEW_AGENT_KIND, BLUEPRINTS_AGENT_KIND, SPEC_WRITER_AGENT_KIND, } from './ci.logic.js';
 import { DEFAULT_FOLLOW_UP_MAX_LOOPS, FOLLOW_UP_PRODUCER_KIND, followUpsToSendBack, hasPendingFollowUps, renderFollowUpRework, shouldLoopCoder, } from './followUp.logic.js';
 import { AgentContextBuilder } from './AgentContextBuilder.js';
 import { CompanionController } from './CompanionController.js';
@@ -16,6 +16,7 @@ import { MergeResolver } from './MergeResolver.js';
 import { ReviewGateController } from './ReviewGateController.js';
 import { TesterController } from './TesterController.js';
 import { HumanTestController } from './HumanTestController.js';
+import { VisualConfirmationController } from './VisualConfirmationController.js';
 import { getProvider, recordGateAttempt, registeredGateFactories, registeredStepResolverFactories, requireProvider, } from '@cat-factory/kernel';
 import { isAsyncAgentExecutor } from '@cat-factory/kernel';
 import { isDeployStep } from '@cat-factory/integrations';
@@ -46,7 +47,12 @@ const EXECUTION_FAILURE_HINTS = {
  * the per-poll env projection so the `getByBlock` read never hits the hot path for
  * the many container steps that have no env to show (see attachEnvironmentProjection).
  */
-const ENV_PROJECTION_KINDS = new Set(['deployer', 'tester', 'playwright']);
+const ENV_PROJECTION_KINDS = new Set([
+    'deployer',
+    TESTER_AGENT_KIND,
+    UI_TESTER_AGENT_KIND,
+    'playwright',
+]);
 /**
  * Parse `owner`/`repo` from a GitHub pull-request URL (`https://github.com/o/r/pull/42`).
  * Returns undefined for any URL that doesn't carry both segments. Host-agnostic on
@@ -98,6 +104,8 @@ export class ExecutionService {
     testerController;
     /** Drives the human-testing gate: provision env → park → confirm / fix / pull-main / recreate. */
     humanTestController;
+    /** Drives the visual-confirmation gate: gather screenshots → park → approve / fix / recapture. */
+    visualConfirmationController;
     /** Drives both iterative review gates (requirements + clarity); kind-parameterised. */
     reviewGate;
     /** The requirements subject for {@link reviewGate}. */
@@ -139,7 +147,7 @@ export class ExecutionService {
      * {@link stepResolverFor} and {@link StepCompletionResolver}.
      */
     stepResolverCache;
-    constructor({ workspaceRepository, blockRepository, pipelineRepository, executionRepository, accountRepository, idGenerator, clock, agentExecutor, workRunner, executionEventPublisher, boardService, spendService, documentRepository, taskRepository, requirementReviewRepository, requirementReviewService, kaizenScheduler, clarityReviewRepository, clarityReviewService, brainstormServices, brainstormSessionRepository, fragmentResolver, environmentProvisioning, environmentTeardown, branchUpdater, blueprintReconciler, notificationService, workspaceSettingsService, llmObservability, pullRequestMerger, mergePresetRepository, ticketTrackerProvider, issueWriteback, subscriptionActivationRepository, resolveWorkspaceModelDefault, resolveProviderCapabilities, localTestInfraSupported, resolveRunRepoContext, resolveTesterFallbackDefault, resolveRequireEnvironmentProvider, assertAgentBackendConfigured, runInitiatorScope, }) {
+    constructor({ workspaceRepository, blockRepository, pipelineRepository, executionRepository, accountRepository, idGenerator, clock, agentExecutor, workRunner, executionEventPublisher, boardService, spendService, documentRepository, taskRepository, requirementReviewRepository, requirementReviewService, kaizenScheduler, clarityReviewRepository, clarityReviewService, brainstormServices, brainstormSessionRepository, fragmentResolver, environmentProvisioning, environmentTeardown, branchUpdater, blueprintReconciler, notificationService, binaryArtifactStore, workspaceSettingsService, llmObservability, pullRequestMerger, mergePresetRepository, ticketTrackerProvider, issueWriteback, subscriptionActivationRepository, resolveWorkspaceModelDefault, resolveProviderCapabilities, localTestInfraSupported, resolveRunRepoContext, resolveTesterFallbackDefault, resolveRequireEnvironmentProvider, assertAgentBackendConfigured, runInitiatorScope, }) {
         this.runInitiatorScope = runInitiatorScope ?? ((_initiatedBy, fn) => fn());
         this.workspaceRepository = workspaceRepository;
         this.blockRepository = blockRepository;
@@ -249,6 +257,25 @@ export class ExecutionService {
             emitInstance: (ws, i) => this.emitInstance(ws, i),
             clockNow: () => this.clock.now(),
         });
+        this.visualConfirmationController = new VisualConfirmationController({
+            blockRepository,
+            executionRepository,
+            workRunner,
+            agentExecutor,
+            contextBuilder: this.contextBuilder,
+            notificationService,
+            ...(binaryArtifactStore ? { binaryArtifactStore } : {}),
+            resolveMergePreset: (ws, block) => this.resolveMergePreset(ws, block),
+            parkStepOnDecision: (ws, i, s, p) => this.parkStepOnDecision(ws, i, s, p),
+            finishStep: (s) => this.finishStep(s),
+            startStep: (s) => this.startStep(s),
+            updateBlockProgress: (ws, i, st) => this.updateBlockProgress(ws, i, st),
+            finalizeBlock: (ws, i, c) => this.finalizeBlock(ws, i, c),
+            stopRunContainer: (ws, i) => this.stopRunContainer(ws, i),
+            persistInstance: (ws, i) => this.executionRepository.upsert(ws, i),
+            emitInstance: (ws, i) => this.emitInstance(ws, i),
+            clockNow: () => this.clock.now(),
+        });
         this.reviewGate = new ReviewGateController({
             blockRepository,
             executionRepository,
@@ -472,7 +499,7 @@ export class ExecutionService {
         // configured (a docker-compose path, or an explicit "no infra dependencies"
         // flag). Block the start with a clear, actionable error otherwise — before any
         // side effects (activation mint / prior-run teardown).
-        if (pipeline.agentKinds.includes(TESTER_AGENT_KIND)) {
+        if (pipeline.agentKinds.some(isTesterKind)) {
             await this.assertTesterInfraConfigured(workspaceId, block);
         }
         // Block the start when the workspace delegates container agents to a runner pool that
@@ -756,7 +783,9 @@ export class ExecutionService {
             // the driver. Fall through so the gate re-evaluates and acts on it (dispatch a helper,
             // rebuild the env, or advance) instead of immediately re-parking.
             const reentrantHumanTest = step.agentKind === HUMAN_TEST_AGENT_KIND && !!step.humanTest?.pendingAction;
-            if (!reentrantRequirements && !reentrantHumanTest) {
+            // The visual-confirmation gate is likewise re-entrant on a human action.
+            const reentrantVisualConfirm = step.agentKind === VISUAL_CONFIRM_AGENT_KIND && !!step.visualConfirm?.pendingAction;
+            if (!reentrantRequirements && !reentrantHumanTest && !reentrantVisualConfirm) {
                 // Parked on either an agent-raised decision or a human approval gate; both
                 // are addressed by the same durable event id.
                 const pendingId = step.decision?.id ?? step.approval?.id;
@@ -829,6 +858,13 @@ export class ExecutionService {
         if (step.agentKind === HUMAN_TEST_AGENT_KIND) {
             return this.humanTestController.evaluate(workspaceId, instance, step, block, isFinalStep);
         }
+        // A `visual-confirmation` gate gathers the UI tester's screenshots + the uploaded
+        // reference designs and PARKS for a human to review actual-vs-reference, then on demand
+        // dispatches the Tester's `fixer`. Passes through (auto-advances) when no binary-artifact
+        // store is wired. See {@link VisualConfirmationController}.
+        if (step.agentKind === VISUAL_CONFIRM_AGENT_KIND) {
+            return this.visualConfirmationController.evaluate(workspaceId, instance, step, block, isFinalStep);
+        }
         // A polling gate step (`ci` / `conflicts`) runs a programmatic precheck and only
         // escalates to a helper container agent (`ci-fixer` / `conflict-resolver`) on a
         // negative verdict — no LLM of its own. Pass-through when the gate's provider is
@@ -1149,7 +1185,7 @@ export class ExecutionService {
         // step's own work: when it finishes (or fails) we drop the handle, return to
         // `testing`, and re-dispatch the Tester against the (now-fixed) branch — its
         // fresh report then drives greenlight-or-loop again. Mirrors the CI gate.
-        if (step.agentKind === TESTER_AGENT_KIND && step.test?.phase === 'fixing') {
+        if (isTesterKind(step.agentKind) && step.test?.phase === 'fixing') {
             step.jobId = undefined;
             step.subtasks = undefined;
             step.test.phase = 'testing';
@@ -1173,6 +1209,13 @@ export class ExecutionService {
                 state: update.state === 'failed' ? 'failed' : 'done',
             });
         }
+        // A `visual-confirmation` gate in its `fixing` phase has a `fixer` job in flight: when it
+        // settles, record the round, refresh the screenshot pairs, and re-park the human.
+        if (step.agentKind === VISUAL_CONFIRM_AGENT_KIND && step.visualConfirm?.phase === 'fixing') {
+            return this.visualConfirmationController.onHelperComplete(workspaceId, instance, step, {
+                state: update.state === 'failed' ? 'failed' : 'done',
+            });
+        }
         if (update.state === 'failed') {
             // A container eviction (the per-run container vanished, its in-memory job is
             // gone) is usually transient. Recover it by dropping the dead handle and
@@ -1488,7 +1531,7 @@ export class ExecutionService {
         // NOT finish the step: we loop the `fixer` (within the attempt budget) and
         // re-test, mirroring the CI gate. A greenlight (or no provider) falls through to
         // the normal finish/advance below. Records the report on the step either way.
-        if (step.agentKind === TESTER_AGENT_KIND && result.testReport !== undefined) {
+        if (isTesterKind(step.agentKind) && result.testReport !== undefined) {
             const looped = await this.testerController.resolveTesterResult(workspaceId, instance, step, result);
             if (looped)
                 return looped;
@@ -2655,6 +2698,9 @@ export class ExecutionService {
         if (step.agentKind === HUMAN_TEST_AGENT_KIND) {
             throw new ConflictError('Resolve the human-testing gate through its window (confirm / request a fix), not the approval gate');
         }
+        if (step.agentKind === VISUAL_CONFIRM_AGENT_KIND) {
+            throw new ConflictError('Resolve the visual-confirmation gate through its window (approve / request a fix), not the approval gate');
+        }
         if (step.companion?.exceeded) {
             throw new ConflictError('Resolve this companion review through its iteration-cap prompt, not the approval gate');
         }
@@ -3034,6 +3080,21 @@ export class ExecutionService {
     destroyHumanTestEnv(workspaceId, blockId) {
         return this.humanTestController.destroyEnvironment(workspaceId, blockId);
     }
+    // ---- visual-confirmation gate actions (driven from the dedicated window) --
+    // Each mutates the parked gate step and wakes the durable driver; see
+    // {@link VisualConfirmationController}.
+    /** Approve the reviewed screenshots: advance the run. */
+    approveVisualConfirm(workspaceId, blockId) {
+        return this.visualConfirmationController.approve(workspaceId, blockId);
+    }
+    /** Submit findings and request a fix: dispatch the Tester's `fixer`, then re-park. */
+    requestVisualConfirmFix(workspaceId, blockId, findings) {
+        return this.visualConfirmationController.requestFix(workspaceId, blockId, findings);
+    }
+    /** Refresh the screenshot pairs from the latest UI-tester report. */
+    recaptureVisualConfirm(workspaceId, blockId) {
+        return this.visualConfirmationController.recapture(workspaceId, blockId);
+    }
     /**
      * Dispatch the `fixer` against the human-review gate's PR branch from a human's freeform
      * instructions — bypassing the precheck + grace window. Parks a `pendingFix` on the gate step,
@@ -3148,6 +3209,7 @@ export class ExecutionService {
                 step.metrics = {
                     calls: s.calls,
                     promptTokens: s.promptTokens,
+                    cachedPromptTokens: s.cachedPromptTokens,
                     completionTokens: s.completionTokens,
                     peakCompletionTokens: s.peakCompletionTokens,
                     maxOutputTokens: s.maxOutputTokens,