npm - donobu - Versions diffs - 5.55.0 → 5.56.0 - Mend

donobu 5.55.0 → 5.56.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/esm/managers/DonobuFlow.d.ts +16 -3
package/dist/esm/managers/DonobuFlow.js +108 -13
package/dist/esm/managers/DonobuFlowsManager.js +11 -7
package/dist/esm/models/ControlPanel.d.ts +18 -13
package/dist/esm/tools/AcknowledgeUserInstruction.d.ts +6 -0
package/dist/esm/tools/AcknowledgeUserInstruction.js +7 -0
package/dist/esm/tools/Tool.d.ts +6 -3
package/dist/esm/tools/Tool.js +5 -2
package/dist/managers/DonobuFlow.d.ts +16 -3
package/dist/managers/DonobuFlow.js +108 -13
package/dist/managers/DonobuFlowsManager.js +11 -7
package/dist/models/ControlPanel.d.ts +18 -13
package/dist/tools/AcknowledgeUserInstruction.d.ts +6 -0
package/dist/tools/AcknowledgeUserInstruction.js +7 -0
package/dist/tools/Tool.d.ts +6 -3
package/dist/tools/Tool.js +5 -2
package/package.json +1 -1

package/dist/esm/managers/DonobuFlow.d.ts CHANGED Viewed

@@ -134,6 +134,14 @@ export declare class DonobuFlow {
      * Note that this *bypasses* the normal state transition logic!
      */
     private onUserInterruption;
+    /**
+     * Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
+     * standing goal yet, the text becomes the `overallObjective`; otherwise it's
+     * added as extra guidance. Either way it's injected into the LLM history (the
+     * system prompt was built at init, possibly before any objective existed) and
+     * recorded in the timeline. No-op for empty text.
+     */
+    private applyComposeInstruction;
     /**
      * Closes out the currently-proposed AI tool call(s) without executing them:
      * emits a `tool_call_result` for each (so the LLM message history stays
@@ -163,11 +171,16 @@ export declare class DonobuFlow {
     private applyRunModeChange;
     /**
      * Whether the flow can hand control to the AI: it needs both a GPT client and
-     * an overall objective for the agent to pursue. Surfaced to the UI (as
-     * `canUseAi`) so the autonomy selector can disable the AI modes when they
-     * wouldn't work — e.g. a Playwright-imported test with no objective.
+     * a goal to pursue.
      */
     private canHandOffToAi;
+    /**
+     * Whether there is a standing goal for the AI to pursue (a non-empty
+     * `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
+     * transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
+     * and ▶ Play needs either a goal or a typed instruction.
+     */
+    private hasGoal;
     /**
      * This method is called if there is an unhandled unexpected exception. This
      * method will mark the flow as a failure.

package/dist/esm/managers/DonobuFlow.js CHANGED Viewed

@@ -161,12 +161,13 @@ class DonobuFlow {
             try {
                 this.controlPanel.update({
                     state: this.metadata.state,
-                    availableToolNames: this.toolManager.tools.map((t) => t.name),
+                    runMode: this.metadata.runMode,
+                    overallObjective: this.metadata.overallObjective,
+                    allowedTools: this.metadata.allowedTools,
                     pendingToolCalls: this.metadata.state === 'WAITING_FOR_APPROVAL'
                         ? [...this.proposedToolCalls]
                         : undefined,
-                    runMode: this.metadata.runMode,
-                    canUseAi: this.canHandOffToAi(),
+                    hasGptClient: this.gptClient !== null,
                 });
                 switch (this.metadata.state) {
                     case 'UNSTARTED':
@@ -324,6 +325,11 @@ class DonobuFlow {
         // Set the next state based on user action
         switch (userAction.type) {
             case 'PAUSE':
+                // Pausing while an AI proposal awaits approval abandons that proposal so
+                // the user returns to a clean compose state rather than a stale prompt.
+                if (this.metadata.state === 'WAITING_FOR_APPROVAL') {
+                    this.closeOutPendingProposals('Superseded because the user paused before approving; not executed.');
+                }
                 this.metadata.state = 'PAUSED';
                 await this.targetInspector.hideInteractionCursor();
                 break;
@@ -441,9 +447,73 @@ class DonobuFlow {
                 await this.applyRunModeChange(userAction.runMode, userAction.approvePending ?? false);
                 break;
             }
+            case 'STEP': {
+                // ▶ Play: start supervised running toward the goal — the AI proposes
+                // each action and the user approves it before it runs, continuing until
+                // the objective is met or the user pauses. Needs a GPT client and a goal
+                // (the typed instruction can supply the goal).
+                if (!this.gptClient) {
+                    break;
+                }
+                // The user is directing the next move, which supersedes anything still
+                // queued (e.g. unreplayed recorded steps of a paused DETERMINISTIC run).
+                this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
+                await this.applyComposeInstruction(userAction.instruction);
+                if (!this.hasGoal()) {
+                    break;
+                }
+                this.metadata.runMode = 'SUPERVISED';
+                await this.targetInspector.showInteractionCursor();
+                this.metadata.state = 'RESUMING';
+                break;
+            }
+            case 'RUN': {
+                // ⏩ Fast-forward: run autonomously toward the goal until done/paused.
+                if (!this.gptClient) {
+                    break;
+                }
+                this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
+                await this.applyComposeInstruction(userAction.instruction);
+                if (!this.hasGoal()) {
+                    break;
+                }
+                this.metadata.runMode = 'AUTONOMOUS';
+                await this.targetInspector.showInteractionCursor();
+                this.metadata.state = 'RESUMING';
+                break;
+            }
         }
         await this.persistence.setFlowMetadata(this.metadata);
     }
+    /**
+     * Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
+     * standing goal yet, the text becomes the `overallObjective`; otherwise it's
+     * added as extra guidance. Either way it's injected into the LLM history (the
+     * system prompt was built at init, possibly before any objective existed) and
+     * recorded in the timeline. No-op for empty text.
+     */
+    async applyComposeInstruction(instruction) {
+        const text = instruction?.trim();
+        if (!text) {
+            return;
+        }
+        const settingObjective = !this.hasGoal();
+        if (settingObjective) {
+            this.metadata.overallObjective = text;
+        }
+        this.gptMessages.push({
+            type: 'user',
+            items: [
+                {
+                    type: 'text',
+                    text: settingObjective
+                        ? `Your overall objective: ${text}`
+                        : `${DonobuFlow.USER_INTERRUPT_MARKER}: ${text}`,
+                },
+            ],
+        });
+        await this.recordAdHocToolCall(text, text);
+    }
     /**
      * Closes out the currently-proposed AI tool call(s) without executing them:
      * emits a `tool_call_result` for each (so the LLM message history stays
@@ -515,10 +585,15 @@ class DonobuFlow {
             !this.canHandOffToAi()) {
             return;
         }
+        // A deliberate pause should survive a mode change: update the run mode but
+        // keep the flow parked, so it only continues when the user hits play
+        // (RESUME). Other rest points (awaiting approval, waiting on the user) are
+        // active decision points, so a switch there takes effect immediately.
+        const wasPaused = this.metadata.state === 'PAUSED';
         if (runMode === this.metadata.runMode &&
             this.proposedToolCalls.length === 0) {
             // Nothing to change.
-            this.metadata.state = 'RESUMING';
+            this.metadata.state = wasPaused ? 'PAUSED' : 'RESUMING';
             return;
         }
         const previousRunMode = this.metadata.runMode;
@@ -571,6 +646,14 @@ class DonobuFlow {
                 : 'User handed off to Donobu.';
             await this.recordAdHocToolCall(note, note);
         }
+        if (wasPaused) {
+            // Stay paused after the mode change; the user resumes deliberately with
+            // play. Leave the cursor as-is — the RESUME handler shows/hides it when
+            // the flow actually continues.
+            this.metadata.state = 'PAUSED';
+            this.metadata.nextState = 'PAUSED';
+            return;
+        }
         // The interaction cursor belongs to the AI; show it for AI modes, hide it
         // when the human takes over.
         if (runMode === 'INSTRUCT') {
@@ -584,13 +667,19 @@ class DonobuFlow {
     }
     /**
      * Whether the flow can hand control to the AI: it needs both a GPT client and
-     * an overall objective for the agent to pursue. Surfaced to the UI (as
-     * `canUseAi`) so the autonomy selector can disable the AI modes when they
-     * wouldn't work — e.g. a Playwright-imported test with no objective.
+     * a goal to pursue.
      */
     canHandOffToAi() {
-        return (this.gptClient !== null &&
-            (this.metadata.overallObjective?.trim().length ?? 0) > 0);
+        return this.gptClient !== null && this.hasGoal();
+    }
+    /**
+     * Whether there is a standing goal for the AI to pursue (a non-empty
+     * `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
+     * transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
+     * and ▶ Play needs either a goal or a typed instruction.
+     */
+    hasGoal() {
+        return (this.metadata.overallObjective?.trim().length ?? 0) > 0;
     }
     /**
      * This method is called if there is an unhandled unexpected exception. This
@@ -897,9 +986,15 @@ Message: ${dialog.message()}`;
                 switch (this.metadata.runMode) {
                     case 'AUTONOMOUS':
                     case 'SUPERVISED':
-                        // The LLM is driving the flow, so ask the LLM what to do next.
-                        // (In SUPERVISED mode the proposal will then wait for approval.)
-                        nextState = 'QUERYING_LLM_FOR_NEXT_ACTION';
+                        // The LLM drives continuously toward a goal — but only if there is
+                        // one. Without a goal, rest in the compose state until the user
+                        // supplies it (via a ▶/⏩ action). SUPERVISED differs only in that
+                        // each proposed action is gated for the user's approval (see the
+                        // approval check above); it keeps proposing the next step after each
+                        // approval until the objective is met or the user pauses.
+                        nextState = this.hasGoal()
+                            ? 'QUERYING_LLM_FOR_NEXT_ACTION'
+                            : 'WAITING_ON_USER_FOR_NEXT_ACTION';
                         break;
                     case 'INSTRUCT':
                         // A user is driving the flow, so wait for them to tell us what to
@@ -1058,7 +1153,7 @@ Message: ${dialog.message()}`;
         if (!proposedToolCall) {
             return;
         }
-        // This proposal is now being executed, so its approval (if any) is spent.
+        // This proposal is being executed, so its approval (if any) is spent.
         if (proposedToolCall.toolCallId) {
             this.approvedToolCallIds.delete(proposedToolCall.toolCallId);
         }

package/dist/esm/managers/DonobuFlowsManager.js CHANGED Viewed

@@ -828,15 +828,14 @@ async function validateFlowParams(flowParams, gptClient, initialRunMode, toolReg
     switch (initialRunMode) {
         case 'AUTONOMOUS':
         case 'SUPERVISED':
-            // Both modes pursue an overall objective via an AI agent, so both need an
-            // objective and a GPT client. SUPERVISED additionally gates each
-            // AI-proposed action on user approval at runtime.
-            if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
-                throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, `'initialRunMode' has a value of '${initialRunMode}'`);
-            }
+            // Both modes are AI-driven, so both need a GPT client and an objective
+            // to pursue.
             if (!gptClient) {
                 throw new InvalidParamValueException_1.InvalidParamValueException('initialRunMode', initialRunMode, `no GPT client is available`);
             }
+            if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
+                throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, 'an AI-driven flow needs an objective to pursue');
+            }
             break;
         case 'INSTRUCT':
             break;
@@ -984,7 +983,12 @@ async function prepareToolCallsForRerun(toolCalls, options, toolRegistry) {
             continue;
         }
         try {
-            proposedToolCalls.push(tool.prepareForRerun(toolCall, options));
+            // A tool returns null to exclude itself from replay (e.g. a recorded
+            // user instruction, which is a live artifact, not a replayable action).
+            const prepared = tool.prepareForRerun(toolCall, options);
+            if (prepared) {
+                proposedToolCalls.push(prepared);
+            }
         }
         catch (e) {
             Logger_1.appLogger.warn(`Failed to prepare tool call for rerun: ${JSON.stringify(toolCall)}`, e);

package/dist/esm/models/ControlPanel.d.ts CHANGED Viewed

@@ -21,24 +21,29 @@ export type UserAction = {
     type: 'SET_RUN_MODE';
     runMode: RunMode;
     approvePending?: boolean;
+} | {
+    type: 'STEP';
+    instruction?: string;
+} | {
+    type: 'RUN';
+    instruction?: string;
 };
 export type ControlPanelDataUpdate = {
     state: State;
+    runMode?: RunMode;
+    /** The flow's overall objective; the panel treats a non-empty value (or a
+     * typed instruction) as a "goal", which gates ⏩ Fast-forward and ▶ Play. */
+    overallObjective?: string | null;
+    /** The tools the flow can actually run (resolved from the ToolManager). */
+    allowedTools?: string[] | null;
     headline?: string;
-    /** Names of tools loaded in the flow's ToolManager. Surfaced to the UI so
-     * the control panel can offer only tools the flow can actually run. */
-    availableToolNames?: string[];
-    /** In SUPERVISED mode, the AI-proposed tool call(s) currently awaiting the
-     * user's approval. Surfaced to the UI so the user can see what they are
-     * approving or rejecting. Empty/undefined when nothing is pending. */
+    /** AI-proposed tool call(s) awaiting approval (SUPERVISED). */
     pendingToolCalls?: ProposedToolCall[];
-    /** The flow's current run mode, so the UI can render and drive the autonomy
-     * selector (Manual/Supervised/Autonomous). */
-    runMode?: RunMode;
-    /** Whether AI-driven modes (Autonomous/Supervised) are available — i.e. the
-     * flow has a GPT client. False for purely manual flows, so the UI can disable
-     * those options on the autonomy selector. */
-    canUseAi?: boolean;
+    /** Whether the flow has a GPT client (AI available at all). Not cleanly a
+     * FlowMetadata field — a client can come from env/default config, not just a
+     * named gptConfig — so it's surfaced explicitly. Drives whether the compose
+     * surface and the ▶/⏩ transport are offered at all. */
+    hasGptClient?: boolean;
 };
 export interface ControlPanel {
     /** Cheap, idempotent render update. */

package/dist/esm/tools/AcknowledgeUserInstruction.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { z } from 'zod/v4';
+import type { ProposedToolCall } from '../models/ProposedToolCall';
 import type { ToolCallContext } from '../models/ToolCallContext';
 import type { ToolCallResult } from '../models/ToolCallResult';
 import { Tool } from './Tool';
@@ -20,5 +21,10 @@ export declare class AcknowledgeUserInstructionTool extends Tool<typeof Acknowle
     constructor();
     call(_context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionCoreSchema>): Promise<ToolCallResult>;
     callFromGpt(context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionGptSchema>): Promise<ToolCallResult>;
+    /**
+     * A user instruction is a live-interaction artifact, not a replayable action,
+     * so it is excluded from reruns and generated scripts.
+     */
+    prepareForRerun(): ProposedToolCall | null;
 }
 //# sourceMappingURL=AcknowledgeUserInstruction.d.ts.map

package/dist/esm/tools/AcknowledgeUserInstruction.js CHANGED Viewed

@@ -33,6 +33,13 @@ class AcknowledgeUserInstructionTool extends Tool_1.Tool {
     async callFromGpt(context, parameters) {
         return this.call(context, parameters);
     }
+    /**
+     * A user instruction is a live-interaction artifact, not a replayable action,
+     * so it is excluded from reruns and generated scripts.
+     */
+    prepareForRerun() {
+        return null;
+    }
 }
 exports.AcknowledgeUserInstructionTool = AcknowledgeUserInstructionTool;
 AcknowledgeUserInstructionTool.NAME = 'acknowledgeUserInstruction';

package/dist/esm/tools/Tool.d.ts CHANGED Viewed

@@ -54,14 +54,17 @@ export declare abstract class Tool<CallSchema extends z.ZodObject, CallFromGptSc
     previewInteraction(_context: ToolCallContext, _parameters: Record<string, unknown>): Promise<void>;
     /**
      * Transform a completed tool call into a {@link ProposedToolCall} suitable
-     * for deterministic replay / code generation.
+     * for deterministic replay / code generation, or `null` to exclude the call
+     * from replay entirely.
      *
      * The default implementation is a passthrough — `{ name, parameters }` —
      * which is correct for tools that have no replay-specific logic
      * (waits, assertions, markers, etc.). Tools that need to hoist
      * selector metadata out of their outcome, strip LLM-only fields, or
-     * otherwise rewrite themselves override this method.
+     * otherwise rewrite themselves override this method. Tools that record
+     * live-interaction artifacts rather than replayable actions (e.g. a user
+     * instruction) return `null` so they don't reappear on reruns.
      */
-    prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall;
+    prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall | null;
 }
 //# sourceMappingURL=Tool.d.ts.map

package/dist/esm/tools/Tool.js CHANGED Viewed

@@ -45,13 +45,16 @@ class Tool {
     async previewInteraction(_context, _parameters) { }
     /**
      * Transform a completed tool call into a {@link ProposedToolCall} suitable
-     * for deterministic replay / code generation.
+     * for deterministic replay / code generation, or `null` to exclude the call
+     * from replay entirely.
      *
      * The default implementation is a passthrough — `{ name, parameters }` —
      * which is correct for tools that have no replay-specific logic
      * (waits, assertions, markers, etc.). Tools that need to hoist
      * selector metadata out of their outcome, strip LLM-only fields, or
-     * otherwise rewrite themselves override this method.
+     * otherwise rewrite themselves override this method. Tools that record
+     * live-interaction artifacts rather than replayable actions (e.g. a user
+     * instruction) return `null` so they don't reappear on reruns.
      */
     prepareForRerun(toolCall, _options) {
         return {

package/dist/managers/DonobuFlow.d.ts CHANGED Viewed

@@ -134,6 +134,14 @@ export declare class DonobuFlow {
      * Note that this *bypasses* the normal state transition logic!
      */
     private onUserInterruption;
+    /**
+     * Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
+     * standing goal yet, the text becomes the `overallObjective`; otherwise it's
+     * added as extra guidance. Either way it's injected into the LLM history (the
+     * system prompt was built at init, possibly before any objective existed) and
+     * recorded in the timeline. No-op for empty text.
+     */
+    private applyComposeInstruction;
     /**
      * Closes out the currently-proposed AI tool call(s) without executing them:
      * emits a `tool_call_result` for each (so the LLM message history stays
@@ -163,11 +171,16 @@ export declare class DonobuFlow {
     private applyRunModeChange;
     /**
      * Whether the flow can hand control to the AI: it needs both a GPT client and
-     * an overall objective for the agent to pursue. Surfaced to the UI (as
-     * `canUseAi`) so the autonomy selector can disable the AI modes when they
-     * wouldn't work — e.g. a Playwright-imported test with no objective.
+     * a goal to pursue.
      */
     private canHandOffToAi;
+    /**
+     * Whether there is a standing goal for the AI to pursue (a non-empty
+     * `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
+     * transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
+     * and ▶ Play needs either a goal or a typed instruction.
+     */
+    private hasGoal;
     /**
      * This method is called if there is an unhandled unexpected exception. This
      * method will mark the flow as a failure.

package/dist/managers/DonobuFlow.js CHANGED Viewed

@@ -161,12 +161,13 @@ class DonobuFlow {
             try {
                 this.controlPanel.update({
                     state: this.metadata.state,
-                    availableToolNames: this.toolManager.tools.map((t) => t.name),
+                    runMode: this.metadata.runMode,
+                    overallObjective: this.metadata.overallObjective,
+                    allowedTools: this.metadata.allowedTools,
                     pendingToolCalls: this.metadata.state === 'WAITING_FOR_APPROVAL'
                         ? [...this.proposedToolCalls]
                         : undefined,
-                    runMode: this.metadata.runMode,
-                    canUseAi: this.canHandOffToAi(),
+                    hasGptClient: this.gptClient !== null,
                 });
                 switch (this.metadata.state) {
                     case 'UNSTARTED':
@@ -324,6 +325,11 @@ class DonobuFlow {
         // Set the next state based on user action
         switch (userAction.type) {
             case 'PAUSE':
+                // Pausing while an AI proposal awaits approval abandons that proposal so
+                // the user returns to a clean compose state rather than a stale prompt.
+                if (this.metadata.state === 'WAITING_FOR_APPROVAL') {
+                    this.closeOutPendingProposals('Superseded because the user paused before approving; not executed.');
+                }
                 this.metadata.state = 'PAUSED';
                 await this.targetInspector.hideInteractionCursor();
                 break;
@@ -441,9 +447,73 @@ class DonobuFlow {
                 await this.applyRunModeChange(userAction.runMode, userAction.approvePending ?? false);
                 break;
             }
+            case 'STEP': {
+                // ▶ Play: start supervised running toward the goal — the AI proposes
+                // each action and the user approves it before it runs, continuing until
+                // the objective is met or the user pauses. Needs a GPT client and a goal
+                // (the typed instruction can supply the goal).
+                if (!this.gptClient) {
+                    break;
+                }
+                // The user is directing the next move, which supersedes anything still
+                // queued (e.g. unreplayed recorded steps of a paused DETERMINISTIC run).
+                this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
+                await this.applyComposeInstruction(userAction.instruction);
+                if (!this.hasGoal()) {
+                    break;
+                }
+                this.metadata.runMode = 'SUPERVISED';
+                await this.targetInspector.showInteractionCursor();
+                this.metadata.state = 'RESUMING';
+                break;
+            }
+            case 'RUN': {
+                // ⏩ Fast-forward: run autonomously toward the goal until done/paused.
+                if (!this.gptClient) {
+                    break;
+                }
+                this.closeOutPendingProposals('Superseded by the user directing the next action; not executed.');
+                await this.applyComposeInstruction(userAction.instruction);
+                if (!this.hasGoal()) {
+                    break;
+                }
+                this.metadata.runMode = 'AUTONOMOUS';
+                await this.targetInspector.showInteractionCursor();
+                this.metadata.state = 'RESUMING';
+                break;
+            }
         }
         await this.persistence.setFlowMetadata(this.metadata);
     }
+    /**
+     * Incorporates the compose-field text from a ▶/⏩ action: if the flow has no
+     * standing goal yet, the text becomes the `overallObjective`; otherwise it's
+     * added as extra guidance. Either way it's injected into the LLM history (the
+     * system prompt was built at init, possibly before any objective existed) and
+     * recorded in the timeline. No-op for empty text.
+     */
+    async applyComposeInstruction(instruction) {
+        const text = instruction?.trim();
+        if (!text) {
+            return;
+        }
+        const settingObjective = !this.hasGoal();
+        if (settingObjective) {
+            this.metadata.overallObjective = text;
+        }
+        this.gptMessages.push({
+            type: 'user',
+            items: [
+                {
+                    type: 'text',
+                    text: settingObjective
+                        ? `Your overall objective: ${text}`
+                        : `${DonobuFlow.USER_INTERRUPT_MARKER}: ${text}`,
+                },
+            ],
+        });
+        await this.recordAdHocToolCall(text, text);
+    }
     /**
      * Closes out the currently-proposed AI tool call(s) without executing them:
      * emits a `tool_call_result` for each (so the LLM message history stays
@@ -515,10 +585,15 @@ class DonobuFlow {
             !this.canHandOffToAi()) {
             return;
         }
+        // A deliberate pause should survive a mode change: update the run mode but
+        // keep the flow parked, so it only continues when the user hits play
+        // (RESUME). Other rest points (awaiting approval, waiting on the user) are
+        // active decision points, so a switch there takes effect immediately.
+        const wasPaused = this.metadata.state === 'PAUSED';
         if (runMode === this.metadata.runMode &&
             this.proposedToolCalls.length === 0) {
             // Nothing to change.
-            this.metadata.state = 'RESUMING';
+            this.metadata.state = wasPaused ? 'PAUSED' : 'RESUMING';
             return;
         }
         const previousRunMode = this.metadata.runMode;
@@ -571,6 +646,14 @@ class DonobuFlow {
                 : 'User handed off to Donobu.';
             await this.recordAdHocToolCall(note, note);
         }
+        if (wasPaused) {
+            // Stay paused after the mode change; the user resumes deliberately with
+            // play. Leave the cursor as-is — the RESUME handler shows/hides it when
+            // the flow actually continues.
+            this.metadata.state = 'PAUSED';
+            this.metadata.nextState = 'PAUSED';
+            return;
+        }
         // The interaction cursor belongs to the AI; show it for AI modes, hide it
         // when the human takes over.
         if (runMode === 'INSTRUCT') {
@@ -584,13 +667,19 @@ class DonobuFlow {
     }
     /**
      * Whether the flow can hand control to the AI: it needs both a GPT client and
-     * an overall objective for the agent to pursue. Surfaced to the UI (as
-     * `canUseAi`) so the autonomy selector can disable the AI modes when they
-     * wouldn't work — e.g. a Playwright-imported test with no objective.
+     * a goal to pursue.
      */
     canHandOffToAi() {
-        return (this.gptClient !== null &&
-            (this.metadata.overallObjective?.trim().length ?? 0) > 0);
+        return this.gptClient !== null && this.hasGoal();
+    }
+    /**
+     * Whether there is a standing goal for the AI to pursue (a non-empty
+     * `overallObjective`). Surfaced to the UI as `hasGoal` to drive the
+     * transport: ⏩ Fast-forward (autonomous run) is only offered with a goal,
+     * and ▶ Play needs either a goal or a typed instruction.
+     */
+    hasGoal() {
+        return (this.metadata.overallObjective?.trim().length ?? 0) > 0;
     }
     /**
      * This method is called if there is an unhandled unexpected exception. This
@@ -897,9 +986,15 @@ Message: ${dialog.message()}`;
                 switch (this.metadata.runMode) {
                     case 'AUTONOMOUS':
                     case 'SUPERVISED':
-                        // The LLM is driving the flow, so ask the LLM what to do next.
-                        // (In SUPERVISED mode the proposal will then wait for approval.)
-                        nextState = 'QUERYING_LLM_FOR_NEXT_ACTION';
+                        // The LLM drives continuously toward a goal — but only if there is
+                        // one. Without a goal, rest in the compose state until the user
+                        // supplies it (via a ▶/⏩ action). SUPERVISED differs only in that
+                        // each proposed action is gated for the user's approval (see the
+                        // approval check above); it keeps proposing the next step after each
+                        // approval until the objective is met or the user pauses.
+                        nextState = this.hasGoal()
+                            ? 'QUERYING_LLM_FOR_NEXT_ACTION'
+                            : 'WAITING_ON_USER_FOR_NEXT_ACTION';
                         break;
                     case 'INSTRUCT':
                         // A user is driving the flow, so wait for them to tell us what to
@@ -1058,7 +1153,7 @@ Message: ${dialog.message()}`;
         if (!proposedToolCall) {
             return;
         }
-        // This proposal is now being executed, so its approval (if any) is spent.
+        // This proposal is being executed, so its approval (if any) is spent.
         if (proposedToolCall.toolCallId) {
             this.approvedToolCallIds.delete(proposedToolCall.toolCallId);
         }

package/dist/managers/DonobuFlowsManager.js CHANGED Viewed

@@ -828,15 +828,14 @@ async function validateFlowParams(flowParams, gptClient, initialRunMode, toolReg
     switch (initialRunMode) {
         case 'AUTONOMOUS':
         case 'SUPERVISED':
-            // Both modes pursue an overall objective via an AI agent, so both need an
-            // objective and a GPT client. SUPERVISED additionally gates each
-            // AI-proposed action on user approval at runtime.
-            if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
-                throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, `'initialRunMode' has a value of '${initialRunMode}'`);
-            }
+            // Both modes are AI-driven, so both need a GPT client and an objective
+            // to pursue.
             if (!gptClient) {
                 throw new InvalidParamValueException_1.InvalidParamValueException('initialRunMode', initialRunMode, `no GPT client is available`);
             }
+            if ((flowParams.overallObjective?.trim().length ?? 0) === 0) {
+                throw new InvalidParamValueException_1.InvalidParamValueException('overallObjective', flowParams.overallObjective, 'an AI-driven flow needs an objective to pursue');
+            }
             break;
         case 'INSTRUCT':
             break;
@@ -984,7 +983,12 @@ async function prepareToolCallsForRerun(toolCalls, options, toolRegistry) {
             continue;
         }
         try {
-            proposedToolCalls.push(tool.prepareForRerun(toolCall, options));
+            // A tool returns null to exclude itself from replay (e.g. a recorded
+            // user instruction, which is a live artifact, not a replayable action).
+            const prepared = tool.prepareForRerun(toolCall, options);
+            if (prepared) {
+                proposedToolCalls.push(prepared);
+            }
         }
         catch (e) {
             Logger_1.appLogger.warn(`Failed to prepare tool call for rerun: ${JSON.stringify(toolCall)}`, e);

package/dist/models/ControlPanel.d.ts CHANGED Viewed

@@ -21,24 +21,29 @@ export type UserAction = {
     type: 'SET_RUN_MODE';
     runMode: RunMode;
     approvePending?: boolean;
+} | {
+    type: 'STEP';
+    instruction?: string;
+} | {
+    type: 'RUN';
+    instruction?: string;
 };
 export type ControlPanelDataUpdate = {
     state: State;
+    runMode?: RunMode;
+    /** The flow's overall objective; the panel treats a non-empty value (or a
+     * typed instruction) as a "goal", which gates ⏩ Fast-forward and ▶ Play. */
+    overallObjective?: string | null;
+    /** The tools the flow can actually run (resolved from the ToolManager). */
+    allowedTools?: string[] | null;
     headline?: string;
-    /** Names of tools loaded in the flow's ToolManager. Surfaced to the UI so
-     * the control panel can offer only tools the flow can actually run. */
-    availableToolNames?: string[];
-    /** In SUPERVISED mode, the AI-proposed tool call(s) currently awaiting the
-     * user's approval. Surfaced to the UI so the user can see what they are
-     * approving or rejecting. Empty/undefined when nothing is pending. */
+    /** AI-proposed tool call(s) awaiting approval (SUPERVISED). */
     pendingToolCalls?: ProposedToolCall[];
-    /** The flow's current run mode, so the UI can render and drive the autonomy
-     * selector (Manual/Supervised/Autonomous). */
-    runMode?: RunMode;
-    /** Whether AI-driven modes (Autonomous/Supervised) are available — i.e. the
-     * flow has a GPT client. False for purely manual flows, so the UI can disable
-     * those options on the autonomy selector. */
-    canUseAi?: boolean;
+    /** Whether the flow has a GPT client (AI available at all). Not cleanly a
+     * FlowMetadata field — a client can come from env/default config, not just a
+     * named gptConfig — so it's surfaced explicitly. Drives whether the compose
+     * surface and the ▶/⏩ transport are offered at all. */
+    hasGptClient?: boolean;
 };
 export interface ControlPanel {
     /** Cheap, idempotent render update. */

package/dist/tools/AcknowledgeUserInstruction.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { z } from 'zod/v4';
+import type { ProposedToolCall } from '../models/ProposedToolCall';
 import type { ToolCallContext } from '../models/ToolCallContext';
 import type { ToolCallResult } from '../models/ToolCallResult';
 import { Tool } from './Tool';
@@ -20,5 +21,10 @@ export declare class AcknowledgeUserInstructionTool extends Tool<typeof Acknowle
     constructor();
     call(_context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionCoreSchema>): Promise<ToolCallResult>;
     callFromGpt(context: ToolCallContext, parameters: z.infer<typeof AcknowledgeUserInstructionGptSchema>): Promise<ToolCallResult>;
+    /**
+     * A user instruction is a live-interaction artifact, not a replayable action,
+     * so it is excluded from reruns and generated scripts.
+     */
+    prepareForRerun(): ProposedToolCall | null;
 }
 //# sourceMappingURL=AcknowledgeUserInstruction.d.ts.map

package/dist/tools/AcknowledgeUserInstruction.js CHANGED Viewed

@@ -33,6 +33,13 @@ class AcknowledgeUserInstructionTool extends Tool_1.Tool {
     async callFromGpt(context, parameters) {
         return this.call(context, parameters);
     }
+    /**
+     * A user instruction is a live-interaction artifact, not a replayable action,
+     * so it is excluded from reruns and generated scripts.
+     */
+    prepareForRerun() {
+        return null;
+    }
 }
 exports.AcknowledgeUserInstructionTool = AcknowledgeUserInstructionTool;
 AcknowledgeUserInstructionTool.NAME = 'acknowledgeUserInstruction';

package/dist/tools/Tool.d.ts CHANGED Viewed

@@ -54,14 +54,17 @@ export declare abstract class Tool<CallSchema extends z.ZodObject, CallFromGptSc
     previewInteraction(_context: ToolCallContext, _parameters: Record<string, unknown>): Promise<void>;
     /**
      * Transform a completed tool call into a {@link ProposedToolCall} suitable
-     * for deterministic replay / code generation.
+     * for deterministic replay / code generation, or `null` to exclude the call
+     * from replay entirely.
      *
      * The default implementation is a passthrough — `{ name, parameters }` —
      * which is correct for tools that have no replay-specific logic
      * (waits, assertions, markers, etc.). Tools that need to hoist
      * selector metadata out of their outcome, strip LLM-only fields, or
-     * otherwise rewrite themselves override this method.
+     * otherwise rewrite themselves override this method. Tools that record
+     * live-interaction artifacts rather than replayable actions (e.g. a user
+     * instruction) return `null` so they don't reappear on reruns.
      */
-    prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall;
+    prepareForRerun(toolCall: ToolCall, _options: CodeGenerationOptions): ProposedToolCall | null;
 }
 //# sourceMappingURL=Tool.d.ts.map

package/dist/tools/Tool.js CHANGED Viewed

@@ -45,13 +45,16 @@ class Tool {
     async previewInteraction(_context, _parameters) { }
     /**
      * Transform a completed tool call into a {@link ProposedToolCall} suitable
-     * for deterministic replay / code generation.
+     * for deterministic replay / code generation, or `null` to exclude the call
+     * from replay entirely.
      *
      * The default implementation is a passthrough — `{ name, parameters }` —
      * which is correct for tools that have no replay-specific logic
      * (waits, assertions, markers, etc.). Tools that need to hoist
      * selector metadata out of their outcome, strip LLM-only fields, or
-     * otherwise rewrite themselves override this method.
+     * otherwise rewrite themselves override this method. Tools that record
+     * live-interaction artifacts rather than replayable actions (e.g. a user
+     * instruction) return `null` so they don't reappear on reruns.
      */
     prepareForRerun(toolCall, _options) {
         return {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "donobu",
-  "version": "5.55.0",
+  "version": "5.56.0",
   "description": "Create browser automations with an LLM agent and replay them as Playwright scripts.",
   "main": "dist/main.js",
   "module": "dist/esm/main.js",