npm - donobu - Versions diffs - 5.53.0 → 5.55.0 - Mend

donobu 5.53.0 → 5.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/dist/apis/FlowsApi.d.ts +95 -7
package/dist/apis/FlowsApi.js +139 -11
package/dist/apis/TestsApi.js +4 -3
package/dist/codegen/CodeGenerator.js +4 -2
package/dist/esm/apis/FlowsApi.d.ts +95 -7
package/dist/esm/apis/FlowsApi.js +139 -11
package/dist/esm/apis/TestsApi.js +4 -3
package/dist/esm/codegen/CodeGenerator.js +4 -2
package/dist/esm/lib/test/testExtension.js +34 -29
package/dist/esm/managers/AdminApiController.js +4 -0
package/dist/esm/managers/DonobuFlow.d.ts +98 -1
package/dist/esm/managers/DonobuFlow.js +345 -21
package/dist/esm/managers/DonobuFlowsManager.d.ts +14 -1
package/dist/esm/managers/DonobuFlowsManager.js +20 -2
package/dist/esm/models/ControlPanel.d.ts +22 -0
package/dist/esm/models/CreateDonobuFlow.d.ts +1 -0
package/dist/esm/models/CreateTest.d.ts +1 -0
package/dist/esm/models/FlowMetadata.d.ts +6 -0
package/dist/esm/models/FlowMetadata.js +3 -1
package/dist/esm/models/RunMode.d.ts +1 -0
package/dist/esm/models/RunMode.js +7 -1
package/dist/esm/models/TestMetadata.d.ts +9 -0
package/dist/esm/persistence/DonobuSqliteDb.js +3 -2
package/dist/esm/tools/ReplayableInteraction.d.ts +20 -0
package/dist/esm/tools/ReplayableInteraction.js +63 -0
package/dist/esm/tools/SetRunModeTool.d.ts +2 -0
package/dist/esm/tools/Tool.d.ts +16 -0
package/dist/esm/tools/Tool.js +16 -0
package/dist/esm/tools/TriggerDonobuFlowTool.d.ts +2 -0
package/dist/lib/test/testExtension.js +34 -29
package/dist/managers/AdminApiController.js +4 -0
package/dist/managers/DonobuFlow.d.ts +98 -1
package/dist/managers/DonobuFlow.js +345 -21
package/dist/managers/DonobuFlowsManager.d.ts +14 -1
package/dist/managers/DonobuFlowsManager.js +20 -2
package/dist/models/ControlPanel.d.ts +22 -0
package/dist/models/CreateDonobuFlow.d.ts +1 -0
package/dist/models/CreateTest.d.ts +1 -0
package/dist/models/FlowMetadata.d.ts +6 -0
package/dist/models/FlowMetadata.js +3 -1
package/dist/models/RunMode.d.ts +1 -0
package/dist/models/RunMode.js +7 -1
package/dist/models/TestMetadata.d.ts +9 -0
package/dist/persistence/DonobuSqliteDb.js +3 -2
package/dist/tools/ReplayableInteraction.d.ts +20 -0
package/dist/tools/ReplayableInteraction.js +63 -0
package/dist/tools/SetRunModeTool.d.ts +2 -0
package/dist/tools/Tool.d.ts +16 -0
package/dist/tools/Tool.js +16 -0
package/dist/tools/TriggerDonobuFlowTool.d.ts +2 -0
package/package.json +1 -1

package/dist/esm/models/FlowMetadata.js CHANGED Viewed

@@ -12,6 +12,7 @@ exports.StateSchema = v4_1.z
     'INITIALIZING',
     'QUERYING_LLM_FOR_NEXT_ACTION',
     'WAITING_ON_USER_FOR_NEXT_ACTION',
+    'WAITING_FOR_APPROVAL',
     'PAUSED',
     'RESUMING',
     'RUNNING_ACTION',
@@ -22,8 +23,9 @@ exports.StateSchema = v4_1.z
  - UNSTARTED: Flow created but not yet initialized.
  - INITIALIZING: Setting up browser context and initial state.
  - RUNNING_ACTION: Executing a tool call.
- - QUERYING_LLM_FOR_NEXT_ACTION: AI determining next action (AUTONOMOUS mode).
+ - QUERYING_LLM_FOR_NEXT_ACTION: AI determining next action (AUTONOMOUS/SUPERVISED mode).
  - WAITING_ON_USER_FOR_NEXT_ACTION: Waiting for user input (INSTRUCT mode).
+ - WAITING_FOR_APPROVAL: An AI-proposed action is waiting for the user to approve or reject it (SUPERVISED mode).
  - PAUSED: Flow execution temporarily suspended.
  - RESUMING: Transitioning from paused to active state.
  - FAILED: Flow terminated unsuccessfully.

package/dist/esm/models/RunMode.d.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { z } from 'zod/v4';
 export declare const RunModeSchema: z.ZodEnum<{
     AUTONOMOUS: "AUTONOMOUS";
+    SUPERVISED: "SUPERVISED";
     INSTRUCT: "INSTRUCT";
     DETERMINISTIC: "DETERMINISTIC";
 }>;

package/dist/esm/models/RunMode.js CHANGED Viewed

@@ -3,7 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.RunModeSchema = void 0;
 const v4_1 = require("zod/v4");
 exports.RunModeSchema = v4_1.z
-    .enum(['AUTONOMOUS', 'INSTRUCT', 'DETERMINISTIC'])
+    .enum(['AUTONOMOUS', 'SUPERVISED', 'INSTRUCT', 'DETERMINISTIC'])
     .describe(`The execution mode that determines how a flow operates and makes decisions:
 - AUTONOMOUS: The flow is driven by an AI agent (GPT) that autonomously decides what actions to
@@ -11,6 +11,12 @@ exports.RunModeSchema = v4_1.z
               determines the next appropriate action, and executes it without human intervention.
               This mode requires a valid GPT client configuration.
+- SUPERVISED: The flow is driven by an AI agent (GPT) that proposes the next action, but a human
+              supervises each proposal: every AI-proposed action waits for the user to approve it,
+              or reject it with optional feedback, before it executes. Rejecting discards the
+              proposal and lets the AI try again with the feedback in mind. Like AUTONOMOUS, this
+              mode pursues an overall objective and requires a valid GPT client configuration.
 - INSTRUCT: The flow waits for explicit user instructions for each action. The user manually
             directs what tools to call and when, making this mode suitable for guided walkthroughs
             or when human oversight is required for each step. No GPT client is required.

package/dist/esm/models/TestMetadata.d.ts CHANGED Viewed

@@ -141,6 +141,7 @@ export declare const TestMetadataSchema: z.ZodObject<{
     suiteId: z.ZodNullable<z.ZodString>;
     nextRunMode: z.ZodEnum<{
         AUTONOMOUS: "AUTONOMOUS";
+        SUPERVISED: "SUPERVISED";
         INSTRUCT: "INSTRUCT";
         DETERMINISTIC: "DETERMINISTIC";
     }>;
@@ -324,6 +325,7 @@ export declare const TestListItemSchema: z.ZodObject<{
     suiteId: z.ZodNullable<z.ZodString>;
     nextRunMode: z.ZodEnum<{
         AUTONOMOUS: "AUTONOMOUS";
+        SUPERVISED: "SUPERVISED";
         INSTRUCT: "INSTRUCT";
         DETERMINISTIC: "DETERMINISTIC";
     }>;
@@ -486,6 +488,7 @@ export declare const TestListItemSchema: z.ZodObject<{
         defaultMessageDuration: z.ZodNullable<z.ZodNumber>;
         runMode: z.ZodEnum<{
             AUTONOMOUS: "AUTONOMOUS";
+            SUPERVISED: "SUPERVISED";
             INSTRUCT: "INSTRUCT";
             DETERMINISTIC: "DETERMINISTIC";
         }>;
@@ -500,6 +503,7 @@ export declare const TestListItemSchema: z.ZodObject<{
             INITIALIZING: "INITIALIZING";
             QUERYING_LLM_FOR_NEXT_ACTION: "QUERYING_LLM_FOR_NEXT_ACTION";
             WAITING_ON_USER_FOR_NEXT_ACTION: "WAITING_ON_USER_FOR_NEXT_ACTION";
+            WAITING_FOR_APPROVAL: "WAITING_FOR_APPROVAL";
             PAUSED: "PAUSED";
             RESUMING: "RESUMING";
             RUNNING_ACTION: "RUNNING_ACTION";
@@ -511,6 +515,7 @@ export declare const TestListItemSchema: z.ZodObject<{
             INITIALIZING: "INITIALIZING";
             QUERYING_LLM_FOR_NEXT_ACTION: "QUERYING_LLM_FOR_NEXT_ACTION";
             WAITING_ON_USER_FOR_NEXT_ACTION: "WAITING_ON_USER_FOR_NEXT_ACTION";
+            WAITING_FOR_APPROVAL: "WAITING_FOR_APPROVAL";
             PAUSED: "PAUSED";
             RESUMING: "RESUMING";
             RUNNING_ACTION: "RUNNING_ACTION";
@@ -676,6 +681,7 @@ export declare const TestListItemPaginatedResultSchema: z.ZodObject<{
         suiteId: z.ZodNullable<z.ZodString>;
         nextRunMode: z.ZodEnum<{
             AUTONOMOUS: "AUTONOMOUS";
+            SUPERVISED: "SUPERVISED";
             INSTRUCT: "INSTRUCT";
             DETERMINISTIC: "DETERMINISTIC";
         }>;
@@ -838,6 +844,7 @@ export declare const TestListItemPaginatedResultSchema: z.ZodObject<{
             defaultMessageDuration: z.ZodNullable<z.ZodNumber>;
             runMode: z.ZodEnum<{
                 AUTONOMOUS: "AUTONOMOUS";
+                SUPERVISED: "SUPERVISED";
                 INSTRUCT: "INSTRUCT";
                 DETERMINISTIC: "DETERMINISTIC";
             }>;
@@ -852,6 +859,7 @@ export declare const TestListItemPaginatedResultSchema: z.ZodObject<{
                 INITIALIZING: "INITIALIZING";
                 QUERYING_LLM_FOR_NEXT_ACTION: "QUERYING_LLM_FOR_NEXT_ACTION";
                 WAITING_ON_USER_FOR_NEXT_ACTION: "WAITING_ON_USER_FOR_NEXT_ACTION";
+                WAITING_FOR_APPROVAL: "WAITING_FOR_APPROVAL";
                 PAUSED: "PAUSED";
                 RESUMING: "RESUMING";
                 RUNNING_ACTION: "RUNNING_ACTION";
@@ -863,6 +871,7 @@ export declare const TestListItemPaginatedResultSchema: z.ZodObject<{
                 INITIALIZING: "INITIALIZING";
                 QUERYING_LLM_FOR_NEXT_ACTION: "QUERYING_LLM_FOR_NEXT_ACTION";
                 WAITING_ON_USER_FOR_NEXT_ACTION: "WAITING_ON_USER_FOR_NEXT_ACTION";
+                WAITING_FOR_APPROVAL: "WAITING_FOR_APPROVAL";
                 PAUSED: "PAUSED";
                 RESUMING: "RESUMING";
                 RUNNING_ACTION: "RUNNING_ACTION";

package/dist/esm/persistence/DonobuSqliteDb.js CHANGED Viewed

@@ -471,7 +471,8 @@ CREATE INDEX IF NOT EXISTS idx_ai_queries_flow_id_started_at ON ai_queries(flow_
                     // value (likely null) if no autonomous flow exists in the group.
                     const newestAutonomous = [...group.flows]
                         .reverse()
-                        .find((f) => f.metadata.runMode === 'AUTONOMOUS')?.metadata;
+                        .find((f) => f.metadata.runMode === 'AUTONOMOUS' ||
+                        f.metadata.runMode === 'SUPERVISED')?.metadata;
                     const maxToolCalls = newestAutonomous?.maxToolCalls ?? newest.maxToolCalls;
                     const testName = (0, displayName_1.getDisplayName)(newest, 'Untitled Test');
                     const testMetadata = {
@@ -649,7 +650,7 @@ CREATE INDEX IF NOT EXISTS idx_ai_queries_flow_id_started_at ON ai_queries(flow_
                     // value (likely null) if no autonomous flow exists in the group.
                     const newestAutonomous = [...flows]
                         .reverse()
-                        .find((f) => f.runMode === 'AUTONOMOUS');
+                        .find((f) => f.runMode === 'AUTONOMOUS' || f.runMode === 'SUPERVISED');
                     const maxToolCalls = newestAutonomous?.maxToolCalls ?? newestFlow.maxToolCalls;
                     const testName = (0, displayName_1.getDisplayName)(newestFlow, 'Untitled Test');
                     const testMetadata = {

package/dist/esm/tools/ReplayableInteraction.d.ts CHANGED Viewed

@@ -84,8 +84,28 @@ export declare abstract class ReplayableInteraction<CoreSchema extends z.ZodObje
     readonly coreSchema: CoreSchema;
     static readonly MAX_SELECTOR_FAILOVERS = 3;
     static readonly MAX_LOCATOR_MATCH_COUNT = 3;
+    /**
+     * How long the preview cursor takes to glide to (and dwell on) the proposed
+     * target, in milliseconds. {@link InteractionVisualizer.pointAt} animates the
+     * move over half this duration.
+     */
+    static readonly PREVIEW_CURSOR_DURATION_MILLIS = 900;
     constructor(name: string, description: string, coreSchema: CoreSchema, inputSchema: NonGptSchema, inputSchemaForGpt: GptSchema, requiresGpt?: boolean);
     call(context: ToolCallContext, parameters: z.infer<NonGptSchema>): Promise<ToolCallResult>;
+    /**
+     * SUPERVISED-mode preview: resolve the element this interaction *would*
+     * target (from either an annotation- or selector-based proposal) and move the
+     * on-screen cursor to it, without performing the interaction. Best-effort —
+     * an unresolvable element simply leaves the cursor where it is.
+     */
+    previewInteraction(context: ToolCallContext, parameters: Record<string, unknown>): Promise<void>;
+    /**
+     * Resolve a proposed interaction's parameters to the {@link Locator} it would
+     * act on, mirroring the resolution in {@link call} (selector-based) and
+     * {@link callFromGpt} (annotation-based) but returning only the locator and
+     * never invoking anything. Returns `null` if the element can't be found.
+     */
+    private resolvePreviewLocator;
     callFromGpt(context: ToolCallContext, parameters: z.infer<GptSchema>): Promise<ToolCallResult>;
     /**
      * **Concrete subclasses implement the actual user action here.**

package/dist/esm/tools/ReplayableInteraction.js CHANGED Viewed

@@ -96,6 +96,63 @@ class ReplayableInteraction extends Tool_1.Tool {
         }
         return this.callCore(context, parameters, locators, parameters.selector);
     }
+    /**
+     * SUPERVISED-mode preview: resolve the element this interaction *would*
+     * target (from either an annotation- or selector-based proposal) and move the
+     * on-screen cursor to it, without performing the interaction. Best-effort —
+     * an unresolvable element simply leaves the cursor where it is.
+     */
+    async previewInteraction(context, parameters) {
+        const page = (0, TargetUtils_1.webPage)(context);
+        const locator = await this.resolvePreviewLocator(context, page, parameters);
+        if (!locator) {
+            return;
+        }
+        // Point at the same visible target (or its label) the real interaction
+        // would, so the preview matches what approval will touch.
+        const pointTarget = await ReplayableInteraction.getLocatorOrItsLabel(locator.first());
+        // Only reveal the cursor now that we have a real target to point at, so a
+        // non-interactive proposal never pops a stationary cursor.
+        await context.targetInspector.showInteractionCursor();
+        await context.interactionVisualizer.pointAt(page, pointTarget.first(), undefined, ReplayableInteraction.PREVIEW_CURSOR_DURATION_MILLIS);
+    }
+    /**
+     * Resolve a proposed interaction's parameters to the {@link Locator} it would
+     * act on, mirroring the resolution in {@link call} (selector-based) and
+     * {@link callFromGpt} (annotation-based) but returning only the locator and
+     * never invoking anything. Returns `null` if the element can't be found.
+     */
+    async resolvePreviewLocator(context, page, parameters) {
+        // Selector-based (deterministic) proposal.
+        if (parameters.selector) {
+            const parsed = ElementSelector_1.ElementSelectorSchema.safeParse(parameters.selector);
+            if (!parsed.success) {
+                return null;
+            }
+            const locators = await ReplayableInteraction.getLocatorsOrderedByMatchCount(page, parsed.data, ReplayableInteraction.MAX_LOCATOR_MATCH_COUNT, ReplayableInteraction.MAX_SELECTOR_FAILOVERS);
+            return locators[0]?.locator ?? null;
+        }
+        // Annotation-based (LLM-driven) proposal.
+        if (typeof parameters.annotation === 'string' ||
+            typeof parameters.annotation === 'number') {
+            const elementSelector = `[${context.targetInspector.interactableElementAttribute}="${parameters.annotation}"]`;
+            for (const frame of page.frames()) {
+                if (frame.isDetached()) {
+                    continue;
+                }
+                try {
+                    const candidate = frame.locator(elementSelector);
+                    if ((await candidate.count()) > 0) {
+                        return candidate;
+                    }
+                }
+                catch {
+                    // Detached or cross-origin frame — skip it.
+                }
+            }
+        }
+        return null;
+    }
     async callFromGpt(context, parameters) {
         const page = (0, TargetUtils_1.webPage)(context);
         const elementSelector = `[${context.targetInspector.interactableElementAttribute}="${parameters.annotation}"]`;
@@ -655,4 +712,10 @@ class ReplayableInteraction extends Tool_1.Tool {
 exports.ReplayableInteraction = ReplayableInteraction;
 ReplayableInteraction.MAX_SELECTOR_FAILOVERS = 3;
 ReplayableInteraction.MAX_LOCATOR_MATCH_COUNT = 3;
+/**
+ * How long the preview cursor takes to glide to (and dwell on) the proposed
+ * target, in milliseconds. {@link InteractionVisualizer.pointAt} animates the
+ * move over half this duration.
+ */
+ReplayableInteraction.PREVIEW_CURSOR_DURATION_MILLIS = 900;
 //# sourceMappingURL=ReplayableInteraction.js.map

package/dist/esm/tools/SetRunModeTool.d.ts CHANGED Viewed

@@ -5,6 +5,7 @@ import { Tool } from './Tool';
 export declare const SetRunModeCoreSchema: z.ZodObject<{
     runMode: z.ZodEnum<{
         AUTONOMOUS: "AUTONOMOUS";
+        SUPERVISED: "SUPERVISED";
         INSTRUCT: "INSTRUCT";
         DETERMINISTIC: "DETERMINISTIC";
     }>;
@@ -12,6 +13,7 @@ export declare const SetRunModeCoreSchema: z.ZodObject<{
 export declare const SetRunModeGptSchema: z.ZodObject<{
     runMode: z.ZodEnum<{
         AUTONOMOUS: "AUTONOMOUS";
+        SUPERVISED: "SUPERVISED";
         INSTRUCT: "INSTRUCT";
         DETERMINISTIC: "DETERMINISTIC";
     }>;

package/dist/esm/tools/Tool.d.ts CHANGED Viewed

@@ -36,6 +36,22 @@ export declare abstract class Tool<CallSchema extends z.ZodObject, CallFromGptSc
      * Invoke the tool as made from a GPT with the given context and parameters.
      */
     abstract callFromGpt(context: ToolCallContext, parameters: z.infer<CallFromGptSchema>): Promise<ToolCallResult>;
+    /**
+     * Move the on-screen cursor to where this tool *would* interact, WITHOUT
+     * performing the action. Used by SUPERVISED mode to show the user what an
+     * AI-proposed action would touch while it awaits their approval.
+     *
+     * The default is a no-op — only tools with a visible interaction target
+     * (e.g. {@link ReplayableInteraction}-derived click/type/hover tools)
+     * override it. Implementations must be best-effort and side-effect-free with
+     * respect to the page: they may move the visual cursor but must never click,
+     * type, navigate, or otherwise mutate page state.
+     *
+     * @param context - The active {@link ToolCallContext}.
+     * @param parameters - The proposed tool call's raw parameters (as proposed by
+     *   the LLM or a user); implementations validate/resolve these themselves.
+     */
+    previewInteraction(_context: ToolCallContext, _parameters: Record<string, unknown>): Promise<void>;
     /**
      * Transform a completed tool call into a {@link ProposedToolCall} suitable
      * for deterministic replay / code generation.

package/dist/esm/tools/Tool.js CHANGED Viewed

@@ -27,6 +27,22 @@ class Tool {
         this.controlPanelMessage = controlPanelMessage;
         this.supportedTargets = supportedTargets;
     }
+    /**
+     * Move the on-screen cursor to where this tool *would* interact, WITHOUT
+     * performing the action. Used by SUPERVISED mode to show the user what an
+     * AI-proposed action would touch while it awaits their approval.
+     *
+     * The default is a no-op — only tools with a visible interaction target
+     * (e.g. {@link ReplayableInteraction}-derived click/type/hover tools)
+     * override it. Implementations must be best-effort and side-effect-free with
+     * respect to the page: they may move the visual cursor but must never click,
+     * type, navigate, or otherwise mutate page state.
+     *
+     * @param context - The active {@link ToolCallContext}.
+     * @param parameters - The proposed tool call's raw parameters (as proposed by
+     *   the LLM or a user); implementations validate/resolve these themselves.
+     */
+    async previewInteraction(_context, _parameters) { }
     /**
      * Transform a completed tool call into a {@link ProposedToolCall} suitable
      * for deterministic replay / code generation.

package/dist/esm/tools/TriggerDonobuFlowTool.d.ts CHANGED Viewed

@@ -138,6 +138,7 @@ export declare const TriggerDonobuFlowCoreSchema: z.ZodObject<{
         gptConfigNameOverride: z.ZodOptional<z.ZodNullable<z.ZodString>>;
         initialRunMode: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
             AUTONOMOUS: "AUTONOMOUS";
+            SUPERVISED: "SUPERVISED";
             INSTRUCT: "INSTRUCT";
             DETERMINISTIC: "DETERMINISTIC";
         }>>>;
@@ -286,6 +287,7 @@ export declare const TriggerDonobuFlowGptSchema: z.ZodObject<{
         gptConfigNameOverride: z.ZodOptional<z.ZodNullable<z.ZodString>>;
         initialRunMode: z.ZodOptional<z.ZodNullable<z.ZodEnum<{
             AUTONOMOUS: "AUTONOMOUS";
+            SUPERVISED: "SUPERVISED";
             INSTRUCT: "INSTRUCT";
             DETERMINISTIC: "DETERMINISTIC";
         }>>>;

package/dist/lib/test/testExtension.js CHANGED Viewed

@@ -908,14 +908,14 @@ async function persistFlowJson(persistence, flowId, fileId, value) {
  * See `fetchBaselineScreenshot` / `gatherTestFailureEvidence` in
  * triageTestFailure.ts.
  *
- * Runs for any meaningful end state; skipped only for `skipped` tests (no real
- * page state), when triage is disabled, or for V1 (legacy self-heal) tests.
- * Best-effort and fails open.
+ * Runs for any meaningful end state, including V1 (objective-annotated) tests;
+ * skipped only for `skipped` tests (no real page state) or when triage is
+ * disabled. Triage reads this screenshot as the current run's failure shot and
+ * as the baseline for a later failing run. Best-effort and fails open.
  */
 async function captureAndPersistFinalState(page, testInfo) {
     if (testInfo.status === 'skipped' ||
-        process.env.DONOBU_TRIAGE_DISABLED === '1' ||
-        isV1Test(testInfo)) {
+        process.env.DONOBU_TRIAGE_DISABLED === '1') {
         return;
     }
     const flowId = page._dnb?.donobuFlowMetadata?.id;
@@ -1015,35 +1015,40 @@ async function finalizeTest(page, testInfo, logBuffer, videoOption) {
     // future failing run reads a successful run's copy as its baseline.
     await captureAndPersistFinalState(page, testInfo);
     if (testInfo.status === 'failed') {
-        if (isV1Test(testInfo)) {
-            if (isV1SelfHealingEnabled(testInfo) &&
-                !MiscUtils_1.MiscUtils.yn(envVars_1.env.data.DONOBU_AUTO_HEAL_ACTIVE)) {
-                if (!sharedState.gptClient) {
-                    Logger_1.appLogger.warn('Will not self-heal due to no GPT client being set up.');
-                }
-                else {
-                    try {
-                        await (0, selfHealing_1.selfHeal)(sharedState.gptClient, testInfo, page);
-                    }
-                    catch (error) {
-                        Logger_1.appLogger.error('Error when attempting to self-heal', error);
-                    }
-                }
+        // Gather failure-triage evidence for every failed test, regardless of its
+        // V1 (objective-annotated) classification or self-heal setting. Triage is a
+        // standalone diagnostic: it writes the failure evidence that populates the
+        // triage run directory, feeds the reports, and supplies the treatment plans
+        // auto-heal consumes. Legacy V1 self-heal runs separately, below.
+        try {
+            const evidenceResult = await (0, triageTestFailure_1.gatherTestFailureEvidence)(testInfo, page);
+            if (evidenceResult?.filePath) {
+                Logger_1.appLogger.info(`Persisted Donobu triage evidence for "${testInfo.title}" to ${evidenceResult.filePath}.`);
+            }
+            else if (evidenceResult?.evidence) {
+                Logger_1.appLogger.info(`Captured Donobu triage evidence for "${testInfo.title}" (schema v${evidenceResult.evidence.schemaVersion}).`);
             }
         }
-        else {
-            try {
-                const evidenceResult = await (0, triageTestFailure_1.gatherTestFailureEvidence)(testInfo, page);
-                if (evidenceResult?.filePath) {
-                    Logger_1.appLogger.info(`Persisted Donobu triage evidence for "${testInfo.title}" to ${evidenceResult.filePath}.`);
+        catch (error) {
+            Logger_1.appLogger.error(`Failed to gather test failure evidence for "${testInfo.title}".`, error);
+        }
+        // Legacy V1 self-heal: only for objective-annotated tests that opt in via
+        // SELF_HEAL_TESTS_ENABLED, and never during an auto-heal rerun (which owns
+        // its own remediation path).
+        if (isV1Test(testInfo) &&
+            isV1SelfHealingEnabled(testInfo) &&
+            !MiscUtils_1.MiscUtils.yn(envVars_1.env.data.DONOBU_AUTO_HEAL_ACTIVE)) {
+            if (!sharedState.gptClient) {
+                Logger_1.appLogger.warn('Will not self-heal due to no GPT client being set up.');
+            }
+            else {
+                try {
+                    await (0, selfHealing_1.selfHeal)(sharedState.gptClient, testInfo, page);
                 }
-                else if (evidenceResult?.evidence) {
-                    Logger_1.appLogger.info(`Captured Donobu triage evidence for "${testInfo.title}" (schema v${evidenceResult.evidence.schemaVersion}).`);
+                catch (error) {
+                    Logger_1.appLogger.error('Error when attempting to self-heal', error);
                 }
             }
-            catch (error) {
-                Logger_1.appLogger.error(`Failed to gather test failure evidence for "${testInfo.title}".`, error);
-            }
         }
     }
     else if (testInfo.status === 'passed' &&

package/dist/managers/AdminApiController.js CHANGED Viewed

@@ -287,6 +287,10 @@ class AdminApiController {
         app.post('/api/flows/:flowId/cancel', this.asyncHandler(apis.flowsApi.cancelFlow.bind(apis.flowsApi)));
         app.post('/api/flows/:flowId/pause', this.asyncHandler(apis.flowsApi.pauseFlow.bind(apis.flowsApi)));
         app.post('/api/flows/:flowId/resume', this.asyncHandler(apis.flowsApi.resumeFlow.bind(apis.flowsApi)));
+        app.post('/api/flows/:flowId/approve', this.asyncHandler(apis.flowsApi.approveProposal.bind(apis.flowsApi)));
+        app.post('/api/flows/:flowId/reject', this.asyncHandler(apis.flowsApi.rejectProposal.bind(apis.flowsApi)));
+        app.get('/api/flows/:flowId/pending-tool-calls', this.asyncHandler(apis.flowsApi.getPendingToolCalls.bind(apis.flowsApi)));
+        app.post('/api/flows/:flowId/run-mode', this.asyncHandler(apis.flowsApi.setRunMode.bind(apis.flowsApi)));
         app.post('/api/flows/:flowId/tool-calls', this.asyncHandler(apis.flowsToolCallsApi.postToolCalls.bind(apis.flowsToolCallsApi)));
     }
     /**

package/dist/managers/DonobuFlow.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import type { z } from 'zod/v4';
 import type { GptClient } from '../clients/GptClient';
 import type { AiQuery } from '../models/AiQuery';
-import type { ControlPanel } from '../models/ControlPanel';
+import type { ControlPanel, UserAction } from '../models/ControlPanel';
 import type { FlowMetadata } from '../models/FlowMetadata';
 import type { GptMessage, StructuredOutputMessage, TextItem } from '../models/GptMessage';
 import type { SystemMessage } from '../models/GptMessage';
@@ -40,8 +40,23 @@ export declare class DonobuFlow {
     readonly controlPanel: ControlPanel;
     private static readonly MAIN_MESSAGE_ELEMENT_LIST_MARKER;
     static readonly USER_INTERRUPT_MARKER = "[User interruption while flow was paused, this MUST be acknowledged]";
+    static readonly REJECTION_MARKER = "[The user rejected your previously proposed action(s). Do NOT repeat them. Propose a different next action, taking the following feedback into account]";
     inProgressToolCall: ToolCall | null;
     readonly aiQueries: AiQuery[];
+    /**
+     * In SUPERVISED mode, the set of `toolCallId`s the user has explicitly
+     * approved. A proposed tool call only executes once its id is in this set;
+     * AI-proposed calls whose id is absent park the flow in
+     * `WAITING_FOR_APPROVAL`. Ids are removed as their calls run, so the set only
+     * ever holds currently-pending approvals.
+     */
+    private readonly approvedToolCallIds;
+    /**
+     * User actions submitted out-of-band (e.g. via REST endpoints rather than the
+     * desktop control panel). Drained by the run loop alongside the control
+     * panel, so both surfaces drive the flow through the same code path.
+     */
+    private readonly userActionInbox;
     constructor(flowsManager: DonobuFlowsManager, envData: Record<string, string>, persistence: FlowsPersistence, gptClient: GptClient | null, toolManager: ToolManager, interactionVisualizer: InteractionVisualizer, proposedToolCalls: ProposedToolCall[], invokedToolCalls: ToolCall[], gptMessages: GptMessage[], targetInspector: TargetInspector, metadata: FlowMetadata, controlPanel: ControlPanel);
     /**
      * Drives the entire Donobu flow state-machine until it reaches a
@@ -78,6 +93,25 @@ export declare class DonobuFlow {
      *          explicit result.
      */
     run(): Promise<FlowMetadata['result']>;
+    /**
+     * The single entry point for external user imperatives. Every cooperative
+     * control interrupt — pause, resume, end, approve, reject, run-mode change —
+     * arrives here as a {@link UserAction}, whether it came from a REST endpoint
+     * (web frontend / SDK) or the desktop control panel. The action is queued and
+     * drained by the run loop ({@link popUserAction}) and handled uniformly by
+     * {@link onUserInterruption}, so all transports drive the flow identically.
+     *
+     * (The forceful `cancelFlow` and the queue-injecting `proposeToolCall` on
+     * {@link DonobuFlowsManager} intentionally do NOT use this path — see their
+     * docs.)
+     */
+    submitUserAction(action: UserAction): void;
+    /**
+     * Returns and clears the next pending user action, preferring out-of-band
+     * actions (REST) over the control panel. Both sources feed the same
+     * intervention path so the desktop and web surfaces behave identically.
+     */
+    private popUserAction;
     /**
      * Delegates to the inspector to attempt recovery after the target is
      * closed. If recovery fails, the flow is marked as failed.
@@ -100,6 +134,40 @@ export declare class DonobuFlow {
      * Note that this *bypasses* the normal state transition logic!
      */
     private onUserInterruption;
+    /**
+     * Closes out the currently-proposed AI tool call(s) without executing them:
+     * emits a `tool_call_result` for each (so the LLM message history stays
+     * well-formed — every tool call needs a matching result) and clears the
+     * proposal queue and any pending approvals. Shared by REJECT and manual
+     * takeover.
+     */
+    private closeOutPendingProposals;
+    /**
+     * Records a synthetic {@link AcknowledgeUserInstructionTool} tool call so a
+     * user-driven event (rejection, mode change) shows up in the flow timeline.
+     * Mirrors how RESUME records a user instruction.
+     */
+    private recordAdHocToolCall;
+    /**
+     * Moves the flow along the autonomy axis at runtime — the primitive behind
+     * "start asking me" (→ SUPERVISED), "go fully autonomous" (→ AUTONOMOUS),
+     * and "I'll take over" (→ INSTRUCT). After adjusting `runMode` and the
+     * pending proposal as appropriate, it routes through RESUMING so the next
+     * {@link transitionState} recomputes the correct state under the new mode.
+     *
+     * @param runMode - The target live mode. DETERMINISTIC is not a live mode and
+     *   is ignored. AI modes (AUTONOMOUS/SUPERVISED) require a GPT client.
+     * @param approvePending - When switching to AUTONOMOUS with an AI proposal
+     *   awaiting approval, approve and run it as part of the switch.
+     */
+    private applyRunModeChange;
+    /**
+     * Whether the flow can hand control to the AI: it needs both a GPT client and
+     * an overall objective for the agent to pursue. Surfaced to the UI (as
+     * `canUseAi`) so the autonomy selector can disable the AI modes when they
+     * wouldn't work — e.g. a Playwright-imported test with no objective.
+     */
+    private canHandOffToAi;
     /**
      * This method is called if there is an unhandled unexpected exception. This
      * method will mark the flow as a failure.
@@ -171,9 +239,38 @@ export declare class DonobuFlow {
      * initializes the GPT message history.
      */
     private onInitializing;
+    /**
+     * Assembles the {@link ToolCallContext} handed to a tool. Shared by actual
+     * execution ({@link onRunningAction}) and the SUPERVISED-mode cursor preview
+     * ({@link previewProposedInteraction}) so both see an identical environment.
+     */
+    private buildToolCallContext;
+    /**
+     * SUPERVISED mode: move the on-screen cursor to where the head proposed
+     * action *would* interact, so the user can see the target while deciding
+     * whether to approve it. This never executes the action — it only previews
+     * the interaction point. Best-effort: tools without a visible target (and any
+     * resolution failure) are simply skipped.
+     */
+    private previewProposedInteraction;
     private onRunningAction;
     private onQueryingLlmForNextAction;
     private onWaitingForUserForNextAction;
+    /**
+     * SUPERVISED mode: an AI-proposed action is parked awaiting the user's
+     * decision. We idle here until an APPROVE/REJECT (or other intervention)
+     * arrives via the control panel or a REST endpoint, which the run loop picks
+     * up as a {@link UserInterruptException}. Mirrors
+     * {@link onWaitingForUserForNextAction}.
+     *
+     * Unlike {@link onPaused}, we must NOT pin `nextState` here: the proposal
+     * still sits in `proposedToolCalls`, so the approval gate in
+     * {@link transitionState} re-parks us each poll on its own. Pinning it would
+     * also leave a stale `nextState` that survives an APPROVE interrupt (which
+     * sets `state` directly), causing the next transition to skip querying the
+     * LLM and park forever with an empty proposal queue.
+     */
+    private onWaitingForApproval;
     private onPaused;
     private onResuming;
     private onFailed;