npm - @loadmill/droid-cua - Versions diffs - 2.3.0 → 2.5.0 - Mend

@loadmill/droid-cua 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +13 -0
package/build/index.js +10 -2
package/build/src/cli/headless-execution-config.js +33 -1
package/build/src/commands/help.js +4 -0
package/build/src/commands/run.js +1 -1
package/build/src/core/execution-engine.js +85 -6
package/build/src/core/prompts.js +3 -279
package/build/src/device/android/actions.js +11 -7
package/build/src/device/assertions.js +1 -21
package/build/src/device/cloud/actions.js +13 -8
package/build/src/device/ios/actions.js +13 -9
package/build/src/device/openai.js +8 -113
package/build/src/device/screenshot-resolution.js +33 -0
package/build/src/device/scroll-gesture.js +20 -0
package/build/src/integrations/loadmill/interpreter.js +3 -56
package/build/src/modes/design-mode-ink.js +12 -17
package/build/src/modes/design-mode.js +12 -17
package/build/src/modes/execution-mode.js +20 -17
package/build/src/prompts/base.js +139 -0
package/build/src/prompts/design.js +115 -0
package/build/src/prompts/editor.js +19 -0
package/build/src/prompts/execution.js +182 -0
package/build/src/prompts/loadmill.js +60 -0
package/build/src/test-store/test-manager.js +3 -5
package/build/src/test-store/test-script.js +50 -0
package/package.json +1 -1

package/build/src/device/assertions.js CHANGED Viewed

@@ -2,6 +2,7 @@
  * Assertion handling for script validation
  */
 import { printCliOutput } from "../utils/console-output.js";
+export { buildAssertionSystemPrompt } from "../prompts/execution.js";
 export function isAssertion(userInput) {
     const trimmed = userInput.trim();
     const lower = trimmed.toLowerCase();
@@ -20,27 +21,6 @@ export function extractAssertionPrompt(userInput) {
     }
     return trimmed;
 }
-export function buildAssertionSystemPrompt(baseSystemPrompt, assertionPrompt) {
-    return `${baseSystemPrompt}
-ASSERTION MODE:
-You are now validating an assertion. The user has provided an assertion statement that you must verify.
-Your task:
-1. Take screenshots and perform LIMITED actions if needed to validate the assertion.
-2. Determine if the assertion is TRUE or FALSE based on the current state.
-3. You MUST respond with a clear verdict in this exact format:
-   - If the assertion is true, include the text: "ASSERTION RESULT: PASS"
-   - If the assertion is false or cannot be confidently validated, include: "ASSERTION RESULT: FAIL"
-4. After the verdict, provide a brief explanation (1-2 sentences) of why it passed or failed.
-The assertion to validate is: "${assertionPrompt}"
-Remember:
-- If you cannot confidently validate the assertion, treat it as FAIL.
-- You must include either "ASSERTION RESULT: PASS" or "ASSERTION RESULT: FAIL" in your response.
-- Be thorough but efficient. Only take the actions necessary to validate the assertion.`;
-}
 export function checkAssertionResult(transcript) {
     const transcriptText = transcript.join("\n");
     const hasPassed = transcriptText.includes("ASSERTION RESULT: PASS");

package/build/src/device/cloud/actions.js CHANGED Viewed

@@ -2,6 +2,7 @@ import { logger } from "../../utils/logger.js";
 import { emitDesktopDebug, truncateForDebug } from "../../utils/desktop-debug.js";
 import { getConfiguredStepDelayMs } from "../../utils/step-delay.js";
 import { getActiveSession, getDevicePixelRatio } from "./connection.js";
+import { resolveScrollGesture } from "../scroll-gesture.js";
 function normalizeMobileKeypress(platform, keys = []) {
     if (!Array.isArray(keys) || keys.length === 0) {
         throw new Error("Keypress action is missing keys");
@@ -93,14 +94,18 @@ export async function handleModelAction(deviceId, action, scale = 1.0, context =
                 break;
             }
             case "scroll": {
-                const scrollX = Math.round((action.scroll_x / scale) / dpr);
-                const scrollY = Math.round((action.scroll_y / scale) / dpr);
-                const centerX = 200;
-                const centerY = 400;
-                const endX = centerX + scrollX;
-                const endY = centerY - scrollY;
-                addOutput({ type: "action", text: `Scrolling by (${scrollX}, ${scrollY})`, ...meta({ scrollX, scrollY }) });
-                await session.client.scroll(session.sessionId, centerX, centerY, endX, endY);
+                const { scrollX, scrollY, startX, startY, endX, endY, hasAnchor } = resolveScrollGesture(action, {
+                    scale,
+                    dpr,
+                    fallbackStartX: 200,
+                    fallbackStartY: 400
+                });
+                addOutput({
+                    type: "action",
+                    text: `Scrolling from (${startX}, ${startY}) to (${endX}, ${endY}) by (${scrollX}, ${scrollY})`,
+                    ...meta({ scrollX, scrollY, startX, startY, endX, endY, anchorSource: hasAnchor ? "action" : "fallback" })
+                });
+                await session.client.scroll(session.sessionId, startX, startY, endX, endY);
                 break;
             }
             case "drag": {

package/build/src/device/ios/actions.js CHANGED Viewed

@@ -8,6 +8,7 @@ import { getActiveSession, getDevicePixelRatio } from "./connection.js";
 import { logger } from "../../utils/logger.js";
 import { emitDesktopDebug, truncateForDebug } from "../../utils/desktop-debug.js";
 import { getConfiguredStepDelayMs } from "../../utils/step-delay.js";
+import { resolveScrollGesture } from "../scroll-gesture.js";
 function normalizeMobileKeypress(keys = []) {
     if (!Array.isArray(keys) || keys.length === 0) {
         throw new Error("Keypress action is missing keys");
@@ -92,15 +93,18 @@ export async function handleModelAction(simulatorId, action, scale = 1.0, contex
             }
             case "scroll": {
                 const dpr = getDevicePixelRatio();
-                const scrollX = Math.round((action.scroll_x / scale) / dpr);
-                const scrollY = Math.round((action.scroll_y / scale) / dpr);
-                addOutput({ type: "action", text: `Scrolling by (${scrollX}, ${scrollY}) points`, ...meta({ scrollX, scrollY, unit: "points" }) });
-                // Start from center of screen (in logical points)
-                const centerX = 197; // Center of iPhone 16 (393/2)
-                const centerY = 426; // Center of iPhone 16 (852/2)
-                const endX = centerX + scrollX;
-                const endY = centerY - scrollY; // Invert Y for natural scrolling
-                await appium.scroll(session.sessionId, centerX, centerY, endX, endY);
+                const { scrollX, scrollY, startX, startY, endX, endY, hasAnchor } = resolveScrollGesture(action, {
+                    scale,
+                    dpr,
+                    fallbackStartX: 197,
+                    fallbackStartY: 426
+                });
+                addOutput({
+                    type: "action",
+                    text: `Scrolling from (${startX}, ${startY}) to (${endX}, ${endY}) by (${scrollX}, ${scrollY}) points`,
+                    ...meta({ scrollX, scrollY, startX, startY, endX, endY, anchorSource: hasAnchor ? "action" : "fallback", unit: "points" })
+                });
+                await appium.scroll(session.sessionId, startX, startY, endX, endY);
                 break;
             }
             case "drag": {

package/build/src/device/openai.js CHANGED Viewed

@@ -1,4 +1,6 @@
 import OpenAI from "openai";
+import { buildTestRevisionSystemPrompt } from "../prompts/editor.js";
+import { buildAppContextCompactionInput } from "../prompts/execution.js";
 import { logger } from "../utils/logger.js";
 import { CuaDebugTracer } from "../utils/cua-debug-tracer.js";
 let openai = null;
@@ -129,23 +131,7 @@ export async function reviseTestScript(originalScript, revisionRequest) {
         model: "gpt-4o",
         messages: [{
                 role: "system",
-                content: `You are editing a test script based on user feedback.
-Current test script:
-${originalScript}
-User's revision request:
-${revisionRequest}
-Apply the user's changes and output the revised test script.
-FORMAT RULES:
-- One simple instruction per line (NO numbers, NO bullets)
-- Use imperative commands: "Open X", "Click Y", "Type Z"
-- Include "assert: <condition>" lines to validate expected behavior
-- End with "exit"
-Output only the revised test script, nothing else.`
+                content: buildTestRevisionSystemPrompt(originalScript, revisionRequest)
             }]
     });
     return response.choices[0].message.content.trim();
@@ -154,102 +140,11 @@ export async function compactAppContext({ contextDocument, taskDescription, toke
     const response = await getOpenAI().responses.create({
         model: "gpt-5.4",
         temperature: 0,
-        input: [
-            {
-                role: "system",
-                content: [{
-                        type: "input_text",
-                        text: `You are compressing an app context document for a mobile testing agent.
-You will receive:
-1. A context document
-2. A test task
-Your job is to SELECT only the facts from the context document that are useful for the given task.
-The output will be injected into a system prompt with a strict token budget.
-CRITICAL:
-- Use only facts explicitly supported by the context document
-- Never invent, infer, normalize, substitute, or improve credentials, labels, screen names, button names, or numeric values
-- Preserve exact values verbatim when present in the source
-- Prefer facts that help the agent act correctly when they are not obvious from the task alone
-- Do not restate, paraphrase, summarize, or reorganize the test task
-- The output must not read like instructions or a test plan
-- Do not describe what the agent should do
-- Output only reference knowledge about the app
-- If a line could be copied from the task with minor wording changes, omit it
-- Prefer copying source facts verbatim or near-verbatim over rewriting them
-- Do not collapse multiple specific source facts into one generic summary if that removes useful distinctions
-Selection priority:
-1. Facts the agent would NOT know from the test script alone
-2. Facts that are hard to infer from screenshots
-3. Non-obvious navigation or interaction details
-4. Exact visible labels needed to act correctly
-5. Credentials and other exact values
-High-value facts:
-- exact UI labels
-- how state, mode, or account selection is performed
-- where logout is located
-- hidden or non-obvious navigation
-- which menu items are decorative or non-functional
-- screen titles and section labels used to confirm location
-- exact credentials and role labels
-Low-value facts:
-- restating the test steps
-- repeating literal values already present in the task
-- generic summaries like "approve the transaction"
-When the task involves authentication, switching state or mode, opening menus, or moving between major areas of the app, strongly prefer including:
-- how account, state, or mode selection is performed
-- exact visible labels for the relevant controls
-- where exit or sign-out actions are located
-- the screen or section labels that confirm the agent is in the right place
-Rules:
-- Output plain text only
-- No markdown, no bullet symbols, no numbering, no headers
-- Use terse, factual language: one fact per line, no filler words
-- Blank lines only to separate logical groups
-- Prefer exact visible UI labels over summaries
-- Do not describe step-by-step procedures
-- Do not restate the test workflow
-- State only facts about screens, elements, hidden interactions, entities, credentials, and navigation
-- If a useful fact is not explicitly stated in the context document, omit it
-- Include only information relevant to this task
-- Do not waste space repeating the task itself
-- If the task already states a value or action, include it only when the context adds non-obvious execution details
-- Return a short result or an empty string if little is relevant
-- Target: under ${tokenBudget} tokens
-Bad output patterns to avoid:
-- generic summaries that remove actionable details
-- lines that restate the task in generic prose
-- lines that describe obvious workflow steps instead of app knowledge
-- lines that replace exact source labels or mechanisms with broad summaries
-Good output characteristics:
-- preserves the exact label or mechanism from the source when it matters
-- keeps distinctions like dropdown vs tabs, drawer vs visible button, exact section titles, exact button text
-- includes hidden or non-obvious navigation details when relevant
-Return only the briefing text.`
-                    }]
-            },
-            {
-                role: "user",
-                content: [{
-                        type: "input_text",
-                        text: `APP CONTEXT DOCUMENT:
-${contextDocument}
-TASK:
-${taskDescription}`
-                    }]
-            }
-        ]
+        input: buildAppContextCompactionInput({
+            contextDocument,
+            taskDescription,
+            tokenBudget,
+        })
     });
     return {
         briefing: typeof response.output_text === "string" ? response.output_text.trim() : "",

package/build/src/device/screenshot-resolution.js ADDED Viewed

@@ -0,0 +1,33 @@
+export const TARGET_SCALED_WIDTH = 400;
+export const SCREENSHOT_RESOLUTION_MODE_DOWNSCALED = "downscaled";
+export const SCREENSHOT_RESOLUTION_MODE_NATIVE = "native";
+export function normalizeScreenshotResolutionMode(value) {
+    return value === SCREENSHOT_RESOLUTION_MODE_NATIVE
+        ? SCREENSHOT_RESOLUTION_MODE_NATIVE
+        : SCREENSHOT_RESOLUTION_MODE_DOWNSCALED;
+}
+export function validateScreenshotResolutionMode(value, label) {
+    if (typeof value !== "string") {
+        throw new Error(`${label} must be one of: downscaled, native.`);
+    }
+    const normalized = normalizeScreenshotResolutionMode(value);
+    if (normalized !== value) {
+        throw new Error(`${label} must be one of: downscaled, native.`);
+    }
+    return normalized;
+}
+export function buildResolutionAwareDeviceInfo({ width, height, screenshotResolutionMode, }) {
+    const normalizedMode = normalizeScreenshotResolutionMode(screenshotResolutionMode);
+    const scale = normalizedMode === SCREENSHOT_RESOLUTION_MODE_NATIVE || width <= TARGET_SCALED_WIDTH
+        ? 1.0
+        : TARGET_SCALED_WIDTH / width;
+    return {
+        scaled_width: Math.round(width * scale),
+        scaled_height: Math.round(height * scale),
+        scale,
+        screenshot_resolution_mode: normalizedMode,
+    };
+}
+export function readScreenshotResolutionModeFromEnv() {
+    return normalizeScreenshotResolutionMode(process.env.DROID_CUA_SCREENSHOT_RESOLUTION_MODE);
+}

package/build/src/device/scroll-gesture.js ADDED Viewed

@@ -0,0 +1,20 @@
+// The model returns scroll actions with `scroll_x` / `scroll_y` plus optional
+// anchor coordinates `x` / `y`. This helper converts those model coordinates
+// into backend gesture coordinates: startX/startY and endX/endY.
+export function resolveScrollGesture(action, { scale = 1.0, dpr = 1.0, fallbackStartX = 0, fallbackStartY = 0 } = {}) {
+    const divisor = scale * dpr;
+    const scrollX = Math.round((action?.scroll_x ?? 0) / divisor);
+    const scrollY = Math.round((action?.scroll_y ?? 0) / divisor);
+    const hasAnchor = Number.isFinite(action?.x) && Number.isFinite(action?.y);
+    const startX = hasAnchor ? Math.round(action.x / divisor) : Math.round(fallbackStartX);
+    const startY = hasAnchor ? Math.round(action.y / divisor) : Math.round(fallbackStartY);
+    return {
+        scrollX,
+        scrollY,
+        startX,
+        startY,
+        endX: startX + scrollX,
+        endY: startY - scrollY,
+        hasAnchor
+    };
+}

package/build/src/integrations/loadmill/interpreter.js CHANGED Viewed

@@ -2,6 +2,7 @@
  * AI-powered text interpretation for Loadmill commands
  */
 import OpenAI from "openai";
+import { buildLoadmillCommandInterpretationMessages, buildLoadmillFlowSelectionMessages, } from "../../prompts/loadmill.js";
 let openai = null;
 function getOpenAI() {
     if (!openai) {
@@ -19,39 +20,7 @@ function getOpenAI() {
 export async function interpretLoadmillCommand(userInput) {
     const response = await getOpenAI().chat.completions.create({
         model: "gpt-4o-mini",
-        messages: [
-            {
-                role: "system",
-                content: `You are a parser that extracts structured data from natural language Loadmill commands.
-Extract the following from the user's input:
-1. searchQuery: The flow name or description to search for (required). FIX any obvious typos or misspellings.
-2. parameters: Any key=value pairs mentioned (as an object)
-3. action: Either "run" (if user wants to execute) or "search" (if user just wants to find flows)
-Output JSON only, no markdown or explanation.
-Examples:
-Input: "run the checkout flow with user=test123"
-Output: {"searchQuery": "checkout flow", "parameters": {"user": "test123"}, "action": "run"}
-Input: "search for login test"
-Output: {"searchQuery": "login test", "parameters": {}, "action": "search"}
-Input: "run user authentication with email=test@example.com password=secret123"
-Output: {"searchQuery": "user authentication", "parameters": {"email": "test@example.com", "password": "secret123"}, "action": "run"}
-Input: "execute payment flow"
-Output: {"searchQuery": "payment flow", "parameters": {}, "action": "run"}
-Input: "create a transction with amount=200"
-Output: {"searchQuery": "transaction", "parameters": {"amount": "200"}, "action": "run"}`
-            },
-            {
-                role: "user",
-                content: userInput
-            }
-        ],
+        messages: buildLoadmillCommandInterpretationMessages(userInput),
         response_format: { type: "json_object" }
     });
     const content = response.choices[0].message.content;
@@ -84,29 +53,7 @@ export async function selectBestFlow(flows, originalQuery) {
     }).join("\n");
     const response = await getOpenAI().chat.completions.create({
         model: "gpt-4o-mini",
-        messages: [
-            {
-                role: "system",
-                content: `You are selecting the best matching test flow based on a user query.
-Given the user's query and a list of available flows, select the best match.
-Output JSON with:
-- index: 1-based index of the best matching flow
-- confidence: number between 0 and 1 indicating how confident you are
-If no flow seems to match well, set confidence to a low value (< 0.5).
-Output JSON only, no markdown.`
-            },
-            {
-                role: "user",
-                content: `Query: "${originalQuery}"
-Available flows:
-${flowList}`
-            }
-        ],
+        messages: buildLoadmillFlowSelectionMessages(originalQuery, flowList),
         response_format: { type: "json_object" }
     });
     const content = response.choices[0].message.content;

package/build/src/modes/design-mode-ink.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { getScreenshotAsBase64 } from "../device/connection.js";
 import { sendCUARequest, reviseTestScript } from "../device/openai.js";
-import { buildDesignModePrompt } from "../core/prompts.js";
+import { buildDesignModePrompt, buildDesignRecoveryPrompt } from "../core/prompts.js";
 import { saveTest } from "../test-store/test-manager.js";
 import { logger } from "../utils/logger.js";
 /**
@@ -20,6 +20,7 @@ export class DesignModeInk {
         this.waitingForInput = false; // Flag to indicate we're explicitly waiting for input
         this.inputResolver = null; // Promise resolver for input
         this.initialUserPrompt = null; // Store initial prompt for error recovery
+        this.baseDesignPrompt = null;
         this.consecutiveErrorCount = 0;
         this.maxConsecutiveErrors = 3;
     }
@@ -30,7 +31,10 @@ export class DesignModeInk {
     async start() {
         const addOutput = this.context.addOutput;
         // Set design mode system prompt
-        const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
+        const designPrompt = buildDesignModePrompt(this.session.deviceInfo, {}, {
+            strictMode: Boolean(this.engine?.strictMode)
+        });
+        this.baseDesignPrompt = designPrompt;
         this.session.setSystemPrompt(designPrompt);
         // Update UI
         if (this.context.setMode) {
@@ -330,21 +334,12 @@ export class DesignModeInk {
                 }
                 // Automatic recovery - continue from where we left off using transcript
                 addOutput({ type: 'info', text: 'Recovering from error and continuing...' });
-                // Build recovery context with transcript
-                const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
-                const recoveryContext = `${designPrompt}
-RECOVERY MODE:
-The previous session encountered an error and was interrupted. Here is everything that happened so far:
-${this.session.getTranscriptText()}
-Continue from where we left off and complete the original task: "${this.initialUserPrompt}"
-Remember:
-- Don't repeat actions that already succeeded
-- Continue towards generating the test script
-- If the flow was complete before the error, generate the script now`;
+                const recoveryContext = buildDesignRecoveryPrompt({
+                    basePrompt: this.baseDesignPrompt || this.session.systemPrompt || buildDesignModePrompt(this.session.deviceInfo),
+                    transcript: this.session.getTranscriptText(),
+                    objective: this.initialUserPrompt,
+                    errorMessage: err.message
+                });
                 // Reset conversation state for fresh API call
                 this.session.clearMessages();
                 this.session.addMessage("system", recoveryContext);

package/build/src/modes/design-mode.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import readline from "readline";
 import { getScreenshotAsBase64 } from "../device/connection.js";
 import { sendCUARequest, reviseTestScript } from "../device/openai.js";
-import { buildDesignModePrompt } from "../core/prompts.js";
+import { buildDesignModePrompt, buildDesignRecoveryPrompt } from "../core/prompts.js";
 import { saveTest } from "../test-store/test-manager.js";
 import { logger } from "../utils/logger.js";
 /**
@@ -17,6 +17,7 @@ export class DesignMode {
         this.escPressed = false;
         this.recentActions = []; // Track recent actions for stuck detection
         this.initialUserPrompt = null; // Store initial prompt for error recovery
+        this.baseDesignPrompt = null;
         this.consecutiveErrorCount = 0;
         this.maxConsecutiveErrors = 3;
     }
@@ -27,7 +28,10 @@ export class DesignMode {
      */
     async start(context) {
         // Set design mode system prompt
-        const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
+        const designPrompt = buildDesignModePrompt(this.session.deviceInfo, {}, {
+            strictMode: Boolean(this.engine?.strictMode)
+        });
+        this.baseDesignPrompt = designPrompt;
         this.session.setSystemPrompt(designPrompt);
         console.log(`\n=== Design Mode: Creating test "${this.testName}" ===`);
         console.log("Describe what you want to test. The agent will explore autonomously.");
@@ -314,21 +318,12 @@ export class DesignMode {
                 }
                 // Automatic recovery - continue from where we left off using transcript
                 console.log("\nRecovering from error and continuing...");
-                // Build recovery context with transcript
-                const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
-                const recoveryContext = `${designPrompt}
-RECOVERY MODE:
-The previous session encountered an error and was interrupted. Here is everything that happened so far:
-${this.session.getTranscriptText()}
-Continue from where we left off and complete the original task: "${this.initialUserPrompt}"
-Remember:
-- Don't repeat actions that already succeeded
-- Continue towards generating the test script
-- If the flow was complete before the error, generate the script now`;
+                const recoveryContext = buildDesignRecoveryPrompt({
+                    basePrompt: this.baseDesignPrompt || this.session.systemPrompt || buildDesignModePrompt(this.session.deviceInfo),
+                    transcript: this.session.getTranscriptText(),
+                    objective: this.initialUserPrompt,
+                    errorMessage: err.message
+                });
                 // Reset conversation state for fresh API call
                 this.session.clearMessages();
                 this.session.addMessage("system", recoveryContext);

package/build/src/modes/execution-mode.js CHANGED Viewed

@@ -1,10 +1,19 @@
 import { getScreenshotAsBase64, connectToDevice, getDeviceInfo, getCurrentPlatform } from "../device/connection.js";
 import { sendCUARequest } from "../device/openai.js";
+import { buildExecutionRecoveryPrompt } from "../core/prompts.js";
 import { isAssertion, extractAssertionPrompt, buildAssertionSystemPrompt, checkAssertionResult, handleAssertionFailure, handleAssertionSuccess, } from "../device/assertions.js";
 import { isLoadmillInstruction, extractLoadmillCommand, executeLoadmillInstruction, } from "../device/loadmill.js";
 import { logger } from "../utils/logger.js";
 import { emitDesktopDebug } from "../utils/desktop-debug.js";
 import { printCliOutput } from "../utils/console-output.js";
+export function buildExecutionRequestPayload({ instruction, isAssertionStep, messages, previousResponseId }) {
+    return {
+        messagesToSend: previousResponseId && !isAssertionStep
+            ? [{ role: "user", content: instruction }]
+            : messages,
+        previousResponseIdToSend: previousResponseId,
+    };
+}
 /**
  * Execution Mode - Run test scripts line-by-line
  * Each instruction is executed in isolation (messages cleared after each turn)
@@ -210,18 +219,13 @@ export class ExecutionMode {
                 instructionIndex: stepContext?.instructionIndex,
                 captureSource: isAssertionStep ? "instruction-input-assertion" : "instruction-input"
             });
-            // When continuing with previousResponseId, only send the new instruction
-            // The server already has full context from previous responses
-            let messagesToSend;
-            const previousResponseIdToSend = isAssertionStep ? null : this.session.previousResponseId;
-            if (this.session.previousResponseId && !isAssertionStep) {
-                // Only send the new user instruction
-                messagesToSend = [{ role: "user", content: instruction }];
-            }
-            else {
-                // Fresh start or assertion - send full messages (system + user)
-                messagesToSend = this.session.messages;
-            }
+            // Assertions rely on the prior response chain for earlier execution context.
+            const { messagesToSend, previousResponseIdToSend } = buildExecutionRequestPayload({
+                instruction,
+                isAssertionStep,
+                messages: this.session.messages,
+                previousResponseId: this.session.previousResponseId
+            });
             const response = await sendCUARequest({
                 messages: messagesToSend,
                 screenshotBase64,
@@ -410,11 +414,10 @@ export class ExecutionMode {
             const transcriptContext = this.session.getTranscriptText();
             this.session.clearMessages();
             // clearMessages() restores the base system prompt, but we need to add context
-            // Build enhanced system prompt with recovery context
-            let recoverySystemPrompt = this.initialSystemText;
-            if (transcriptContext) {
-                recoverySystemPrompt += `\n\n[SESSION RECOVERY - Connection was lost. Previous actions completed before the error:]\n${transcriptContext}\n\n[IMPORTANT: Resume execution silently. Do NOT narrate or explain. Just execute the next instruction.]`;
-            }
+            const recoverySystemPrompt = buildExecutionRecoveryPrompt({
+                basePrompt: this.initialSystemText,
+                transcript: transcriptContext
+            });
             // Replace the system message with the enhanced one
             this.session.messages = [{ role: "system", content: recoverySystemPrompt }];
             this.session.updateResponseId(undefined);