@loadmill/droid-cua 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
  * Assertion handling for script validation
3
3
  */
4
4
  import { printCliOutput } from "../utils/console-output.js";
5
+ export { buildAssertionSystemPrompt } from "../prompts/execution.js";
5
6
  export function isAssertion(userInput) {
6
7
  const trimmed = userInput.trim();
7
8
  const lower = trimmed.toLowerCase();
@@ -20,27 +21,6 @@ export function extractAssertionPrompt(userInput) {
20
21
  }
21
22
  return trimmed;
22
23
  }
23
- export function buildAssertionSystemPrompt(baseSystemPrompt, assertionPrompt) {
24
- return `${baseSystemPrompt}
25
-
26
- ASSERTION MODE:
27
- You are now validating an assertion. The user has provided an assertion statement that you must verify.
28
-
29
- Your task:
30
- 1. Take screenshots and perform LIMITED actions if needed to validate the assertion.
31
- 2. Determine if the assertion is TRUE or FALSE based on the current state.
32
- 3. You MUST respond with a clear verdict in this exact format:
33
- - If the assertion is true, include the text: "ASSERTION RESULT: PASS"
34
- - If the assertion is false or cannot be confidently validated, include: "ASSERTION RESULT: FAIL"
35
- 4. After the verdict, provide a brief explanation (1-2 sentences) of why it passed or failed.
36
-
37
- The assertion to validate is: "${assertionPrompt}"
38
-
39
- Remember:
40
- - If you cannot confidently validate the assertion, treat it as FAIL.
41
- - You must include either "ASSERTION RESULT: PASS" or "ASSERTION RESULT: FAIL" in your response.
42
- - Be thorough but efficient. Only take the actions necessary to validate the assertion.`;
43
- }
44
24
  export function checkAssertionResult(transcript) {
45
25
  const transcriptText = transcript.join("\n");
46
26
  const hasPassed = transcriptText.includes("ASSERTION RESULT: PASS");
@@ -2,6 +2,7 @@ import { logger } from "../../utils/logger.js";
2
2
  import { emitDesktopDebug, truncateForDebug } from "../../utils/desktop-debug.js";
3
3
  import { getConfiguredStepDelayMs } from "../../utils/step-delay.js";
4
4
  import { getActiveSession, getDevicePixelRatio } from "./connection.js";
5
+ import { resolveScrollGesture } from "../scroll-gesture.js";
5
6
  function normalizeMobileKeypress(platform, keys = []) {
6
7
  if (!Array.isArray(keys) || keys.length === 0) {
7
8
  throw new Error("Keypress action is missing keys");
@@ -93,14 +94,18 @@ export async function handleModelAction(deviceId, action, scale = 1.0, context =
93
94
  break;
94
95
  }
95
96
  case "scroll": {
96
- const scrollX = Math.round((action.scroll_x / scale) / dpr);
97
- const scrollY = Math.round((action.scroll_y / scale) / dpr);
98
- const centerX = 200;
99
- const centerY = 400;
100
- const endX = centerX + scrollX;
101
- const endY = centerY - scrollY;
102
- addOutput({ type: "action", text: `Scrolling by (${scrollX}, ${scrollY})`, ...meta({ scrollX, scrollY }) });
103
- await session.client.scroll(session.sessionId, centerX, centerY, endX, endY);
97
+ const { scrollX, scrollY, startX, startY, endX, endY, hasAnchor } = resolveScrollGesture(action, {
98
+ scale,
99
+ dpr,
100
+ fallbackStartX: 200,
101
+ fallbackStartY: 400
102
+ });
103
+ addOutput({
104
+ type: "action",
105
+ text: `Scrolling from (${startX}, ${startY}) to (${endX}, ${endY}) by (${scrollX}, ${scrollY})`,
106
+ ...meta({ scrollX, scrollY, startX, startY, endX, endY, anchorSource: hasAnchor ? "action" : "fallback" })
107
+ });
108
+ await session.client.scroll(session.sessionId, startX, startY, endX, endY);
104
109
  break;
105
110
  }
106
111
  case "drag": {
@@ -8,6 +8,7 @@ import { getActiveSession, getDevicePixelRatio } from "./connection.js";
8
8
  import { logger } from "../../utils/logger.js";
9
9
  import { emitDesktopDebug, truncateForDebug } from "../../utils/desktop-debug.js";
10
10
  import { getConfiguredStepDelayMs } from "../../utils/step-delay.js";
11
+ import { resolveScrollGesture } from "../scroll-gesture.js";
11
12
  function normalizeMobileKeypress(keys = []) {
12
13
  if (!Array.isArray(keys) || keys.length === 0) {
13
14
  throw new Error("Keypress action is missing keys");
@@ -92,15 +93,18 @@ export async function handleModelAction(simulatorId, action, scale = 1.0, contex
92
93
  }
93
94
  case "scroll": {
94
95
  const dpr = getDevicePixelRatio();
95
- const scrollX = Math.round((action.scroll_x / scale) / dpr);
96
- const scrollY = Math.round((action.scroll_y / scale) / dpr);
97
- addOutput({ type: "action", text: `Scrolling by (${scrollX}, ${scrollY}) points`, ...meta({ scrollX, scrollY, unit: "points" }) });
98
- // Start from center of screen (in logical points)
99
- const centerX = 197; // Center of iPhone 16 (393/2)
100
- const centerY = 426; // Center of iPhone 16 (852/2)
101
- const endX = centerX + scrollX;
102
- const endY = centerY - scrollY; // Invert Y for natural scrolling
103
- await appium.scroll(session.sessionId, centerX, centerY, endX, endY);
96
+ const { scrollX, scrollY, startX, startY, endX, endY, hasAnchor } = resolveScrollGesture(action, {
97
+ scale,
98
+ dpr,
99
+ fallbackStartX: 197,
100
+ fallbackStartY: 426
101
+ });
102
+ addOutput({
103
+ type: "action",
104
+ text: `Scrolling from (${startX}, ${startY}) to (${endX}, ${endY}) by (${scrollX}, ${scrollY}) points`,
105
+ ...meta({ scrollX, scrollY, startX, startY, endX, endY, anchorSource: hasAnchor ? "action" : "fallback", unit: "points" })
106
+ });
107
+ await appium.scroll(session.sessionId, startX, startY, endX, endY);
104
108
  break;
105
109
  }
106
110
  case "drag": {
@@ -1,4 +1,6 @@
1
1
  import OpenAI from "openai";
2
+ import { buildTestRevisionSystemPrompt } from "../prompts/editor.js";
3
+ import { buildAppContextCompactionInput } from "../prompts/execution.js";
2
4
  import { logger } from "../utils/logger.js";
3
5
  import { CuaDebugTracer } from "../utils/cua-debug-tracer.js";
4
6
  let openai = null;
@@ -129,23 +131,7 @@ export async function reviseTestScript(originalScript, revisionRequest) {
129
131
  model: "gpt-4o",
130
132
  messages: [{
131
133
  role: "system",
132
- content: `You are editing a test script based on user feedback.
133
-
134
- Current test script:
135
- ${originalScript}
136
-
137
- User's revision request:
138
- ${revisionRequest}
139
-
140
- Apply the user's changes and output the revised test script.
141
-
142
- FORMAT RULES:
143
- - One simple instruction per line (NO numbers, NO bullets)
144
- - Use imperative commands: "Open X", "Click Y", "Type Z"
145
- - Include "assert: <condition>" lines to validate expected behavior
146
- - End with "exit"
147
-
148
- Output only the revised test script, nothing else.`
134
+ content: buildTestRevisionSystemPrompt(originalScript, revisionRequest)
149
135
  }]
150
136
  });
151
137
  return response.choices[0].message.content.trim();
@@ -154,102 +140,11 @@ export async function compactAppContext({ contextDocument, taskDescription, toke
154
140
  const response = await getOpenAI().responses.create({
155
141
  model: "gpt-5.4",
156
142
  temperature: 0,
157
- input: [
158
- {
159
- role: "system",
160
- content: [{
161
- type: "input_text",
162
- text: `You are compressing an app context document for a mobile testing agent.
163
-
164
- You will receive:
165
- 1. A context document
166
- 2. A test task
167
-
168
- Your job is to SELECT only the facts from the context document that are useful for the given task.
169
- The output will be injected into a system prompt with a strict token budget.
170
-
171
- CRITICAL:
172
- - Use only facts explicitly supported by the context document
173
- - Never invent, infer, normalize, substitute, or improve credentials, labels, screen names, button names, or numeric values
174
- - Preserve exact values verbatim when present in the source
175
- - Prefer facts that help the agent act correctly when they are not obvious from the task alone
176
- - Do not restate, paraphrase, summarize, or reorganize the test task
177
- - The output must not read like instructions or a test plan
178
- - Do not describe what the agent should do
179
- - Output only reference knowledge about the app
180
- - If a line could be copied from the task with minor wording changes, omit it
181
- - Prefer copying source facts verbatim or near-verbatim over rewriting them
182
- - Do not collapse multiple specific source facts into one generic summary if that removes useful distinctions
183
-
184
- Selection priority:
185
- 1. Facts the agent would NOT know from the test script alone
186
- 2. Facts that are hard to infer from screenshots
187
- 3. Non-obvious navigation or interaction details
188
- 4. Exact visible labels needed to act correctly
189
- 5. Credentials and other exact values
190
-
191
- High-value facts:
192
- - exact UI labels
193
- - how state, mode, or account selection is performed
194
- - where logout is located
195
- - hidden or non-obvious navigation
196
- - which menu items are decorative or non-functional
197
- - screen titles and section labels used to confirm location
198
- - exact credentials and role labels
199
-
200
- Low-value facts:
201
- - restating the test steps
202
- - repeating literal values already present in the task
203
- - generic summaries like "approve the transaction"
204
-
205
- When the task involves authentication, switching state or mode, opening menus, or moving between major areas of the app, strongly prefer including:
206
- - how account, state, or mode selection is performed
207
- - exact visible labels for the relevant controls
208
- - where exit or sign-out actions are located
209
- - the screen or section labels that confirm the agent is in the right place
210
-
211
- Rules:
212
- - Output plain text only
213
- - No markdown, no bullet symbols, no numbering, no headers
214
- - Use terse, factual language: one fact per line, no filler words
215
- - Blank lines only to separate logical groups
216
- - Prefer exact visible UI labels over summaries
217
- - Do not describe step-by-step procedures
218
- - Do not restate the test workflow
219
- - State only facts about screens, elements, hidden interactions, entities, credentials, and navigation
220
- - If a useful fact is not explicitly stated in the context document, omit it
221
- - Include only information relevant to this task
222
- - Do not waste space repeating the task itself
223
- - If the task already states a value or action, include it only when the context adds non-obvious execution details
224
- - Return a short result or an empty string if little is relevant
225
- - Target: under ${tokenBudget} tokens
226
-
227
- Bad output patterns to avoid:
228
- - generic summaries that remove actionable details
229
- - lines that restate the task in generic prose
230
- - lines that describe obvious workflow steps instead of app knowledge
231
- - lines that replace exact source labels or mechanisms with broad summaries
232
-
233
- Good output characteristics:
234
- - preserves the exact label or mechanism from the source when it matters
235
- - keeps distinctions like dropdown vs tabs, drawer vs visible button, exact section titles, exact button text
236
- - includes hidden or non-obvious navigation details when relevant
237
-
238
- Return only the briefing text.`
239
- }]
240
- },
241
- {
242
- role: "user",
243
- content: [{
244
- type: "input_text",
245
- text: `APP CONTEXT DOCUMENT:
246
- ${contextDocument}
247
-
248
- TASK:
249
- ${taskDescription}`
250
- }]
251
- }
252
- ]
143
+ input: buildAppContextCompactionInput({
144
+ contextDocument,
145
+ taskDescription,
146
+ tokenBudget,
147
+ })
253
148
  });
254
149
  return {
255
150
  briefing: typeof response.output_text === "string" ? response.output_text.trim() : "",
@@ -0,0 +1,33 @@
1
+ export const TARGET_SCALED_WIDTH = 400;
2
+ export const SCREENSHOT_RESOLUTION_MODE_DOWNSCALED = "downscaled";
3
+ export const SCREENSHOT_RESOLUTION_MODE_NATIVE = "native";
4
+ export function normalizeScreenshotResolutionMode(value) {
5
+ return value === SCREENSHOT_RESOLUTION_MODE_NATIVE
6
+ ? SCREENSHOT_RESOLUTION_MODE_NATIVE
7
+ : SCREENSHOT_RESOLUTION_MODE_DOWNSCALED;
8
+ }
9
+ export function validateScreenshotResolutionMode(value, label) {
10
+ if (typeof value !== "string") {
11
+ throw new Error(`${label} must be one of: downscaled, native.`);
12
+ }
13
+ const normalized = normalizeScreenshotResolutionMode(value);
14
+ if (normalized !== value) {
15
+ throw new Error(`${label} must be one of: downscaled, native.`);
16
+ }
17
+ return normalized;
18
+ }
19
+ export function buildResolutionAwareDeviceInfo({ width, height, screenshotResolutionMode, }) {
20
+ const normalizedMode = normalizeScreenshotResolutionMode(screenshotResolutionMode);
21
+ const scale = normalizedMode === SCREENSHOT_RESOLUTION_MODE_NATIVE || width <= TARGET_SCALED_WIDTH
22
+ ? 1.0
23
+ : TARGET_SCALED_WIDTH / width;
24
+ return {
25
+ scaled_width: Math.round(width * scale),
26
+ scaled_height: Math.round(height * scale),
27
+ scale,
28
+ screenshot_resolution_mode: normalizedMode,
29
+ };
30
+ }
31
+ export function readScreenshotResolutionModeFromEnv() {
32
+ return normalizeScreenshotResolutionMode(process.env.DROID_CUA_SCREENSHOT_RESOLUTION_MODE);
33
+ }
@@ -0,0 +1,20 @@
1
+ // The model returns scroll actions with `scroll_x` / `scroll_y` plus optional
2
+ // anchor coordinates `x` / `y`. This helper converts those model coordinates
3
+ // into backend gesture coordinates: startX/startY and endX/endY.
4
+ export function resolveScrollGesture(action, { scale = 1.0, dpr = 1.0, fallbackStartX = 0, fallbackStartY = 0 } = {}) {
5
+ const divisor = scale * dpr;
6
+ const scrollX = Math.round((action?.scroll_x ?? 0) / divisor);
7
+ const scrollY = Math.round((action?.scroll_y ?? 0) / divisor);
8
+ const hasAnchor = Number.isFinite(action?.x) && Number.isFinite(action?.y);
9
+ const startX = hasAnchor ? Math.round(action.x / divisor) : Math.round(fallbackStartX);
10
+ const startY = hasAnchor ? Math.round(action.y / divisor) : Math.round(fallbackStartY);
11
+ return {
12
+ scrollX,
13
+ scrollY,
14
+ startX,
15
+ startY,
16
+ endX: startX + scrollX,
17
+ endY: startY - scrollY,
18
+ hasAnchor
19
+ };
20
+ }
@@ -2,6 +2,7 @@
2
2
  * AI-powered text interpretation for Loadmill commands
3
3
  */
4
4
  import OpenAI from "openai";
5
+ import { buildLoadmillCommandInterpretationMessages, buildLoadmillFlowSelectionMessages, } from "../../prompts/loadmill.js";
5
6
  let openai = null;
6
7
  function getOpenAI() {
7
8
  if (!openai) {
@@ -19,39 +20,7 @@ function getOpenAI() {
19
20
  export async function interpretLoadmillCommand(userInput) {
20
21
  const response = await getOpenAI().chat.completions.create({
21
22
  model: "gpt-4o-mini",
22
- messages: [
23
- {
24
- role: "system",
25
- content: `You are a parser that extracts structured data from natural language Loadmill commands.
26
-
27
- Extract the following from the user's input:
28
- 1. searchQuery: The flow name or description to search for (required). FIX any obvious typos or misspellings.
29
- 2. parameters: Any key=value pairs mentioned (as an object)
30
- 3. action: Either "run" (if user wants to execute) or "search" (if user just wants to find flows)
31
-
32
- Output JSON only, no markdown or explanation.
33
-
34
- Examples:
35
- Input: "run the checkout flow with user=test123"
36
- Output: {"searchQuery": "checkout flow", "parameters": {"user": "test123"}, "action": "run"}
37
-
38
- Input: "search for login test"
39
- Output: {"searchQuery": "login test", "parameters": {}, "action": "search"}
40
-
41
- Input: "run user authentication with email=test@example.com password=secret123"
42
- Output: {"searchQuery": "user authentication", "parameters": {"email": "test@example.com", "password": "secret123"}, "action": "run"}
43
-
44
- Input: "execute payment flow"
45
- Output: {"searchQuery": "payment flow", "parameters": {}, "action": "run"}
46
-
47
- Input: "create a transction with amount=200"
48
- Output: {"searchQuery": "transaction", "parameters": {"amount": "200"}, "action": "run"}`
49
- },
50
- {
51
- role: "user",
52
- content: userInput
53
- }
54
- ],
23
+ messages: buildLoadmillCommandInterpretationMessages(userInput),
55
24
  response_format: { type: "json_object" }
56
25
  });
57
26
  const content = response.choices[0].message.content;
@@ -84,29 +53,7 @@ export async function selectBestFlow(flows, originalQuery) {
84
53
  }).join("\n");
85
54
  const response = await getOpenAI().chat.completions.create({
86
55
  model: "gpt-4o-mini",
87
- messages: [
88
- {
89
- role: "system",
90
- content: `You are selecting the best matching test flow based on a user query.
91
-
92
- Given the user's query and a list of available flows, select the best match.
93
-
94
- Output JSON with:
95
- - index: 1-based index of the best matching flow
96
- - confidence: number between 0 and 1 indicating how confident you are
97
-
98
- If no flow seems to match well, set confidence to a low value (< 0.5).
99
-
100
- Output JSON only, no markdown.`
101
- },
102
- {
103
- role: "user",
104
- content: `Query: "${originalQuery}"
105
-
106
- Available flows:
107
- ${flowList}`
108
- }
109
- ],
56
+ messages: buildLoadmillFlowSelectionMessages(originalQuery, flowList),
110
57
  response_format: { type: "json_object" }
111
58
  });
112
59
  const content = response.choices[0].message.content;
@@ -1,6 +1,6 @@
1
1
  import { getScreenshotAsBase64 } from "../device/connection.js";
2
2
  import { sendCUARequest, reviseTestScript } from "../device/openai.js";
3
- import { buildDesignModePrompt } from "../core/prompts.js";
3
+ import { buildDesignModePrompt, buildDesignRecoveryPrompt } from "../core/prompts.js";
4
4
  import { saveTest } from "../test-store/test-manager.js";
5
5
  import { logger } from "../utils/logger.js";
6
6
  /**
@@ -20,6 +20,7 @@ export class DesignModeInk {
20
20
  this.waitingForInput = false; // Flag to indicate we're explicitly waiting for input
21
21
  this.inputResolver = null; // Promise resolver for input
22
22
  this.initialUserPrompt = null; // Store initial prompt for error recovery
23
+ this.baseDesignPrompt = null;
23
24
  this.consecutiveErrorCount = 0;
24
25
  this.maxConsecutiveErrors = 3;
25
26
  }
@@ -30,7 +31,10 @@ export class DesignModeInk {
30
31
  async start() {
31
32
  const addOutput = this.context.addOutput;
32
33
  // Set design mode system prompt
33
- const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
34
+ const designPrompt = buildDesignModePrompt(this.session.deviceInfo, {}, {
35
+ strictMode: Boolean(this.engine?.strictMode)
36
+ });
37
+ this.baseDesignPrompt = designPrompt;
34
38
  this.session.setSystemPrompt(designPrompt);
35
39
  // Update UI
36
40
  if (this.context.setMode) {
@@ -330,21 +334,12 @@ export class DesignModeInk {
330
334
  }
331
335
  // Automatic recovery - continue from where we left off using transcript
332
336
  addOutput({ type: 'info', text: 'Recovering from error and continuing...' });
333
- // Build recovery context with transcript
334
- const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
335
- const recoveryContext = `${designPrompt}
336
-
337
- RECOVERY MODE:
338
- The previous session encountered an error and was interrupted. Here is everything that happened so far:
339
-
340
- ${this.session.getTranscriptText()}
341
-
342
- Continue from where we left off and complete the original task: "${this.initialUserPrompt}"
343
-
344
- Remember:
345
- - Don't repeat actions that already succeeded
346
- - Continue towards generating the test script
347
- - If the flow was complete before the error, generate the script now`;
337
+ const recoveryContext = buildDesignRecoveryPrompt({
338
+ basePrompt: this.baseDesignPrompt || this.session.systemPrompt || buildDesignModePrompt(this.session.deviceInfo),
339
+ transcript: this.session.getTranscriptText(),
340
+ objective: this.initialUserPrompt,
341
+ errorMessage: err.message
342
+ });
348
343
  // Reset conversation state for fresh API call
349
344
  this.session.clearMessages();
350
345
  this.session.addMessage("system", recoveryContext);
@@ -1,7 +1,7 @@
1
1
  import readline from "readline";
2
2
  import { getScreenshotAsBase64 } from "../device/connection.js";
3
3
  import { sendCUARequest, reviseTestScript } from "../device/openai.js";
4
- import { buildDesignModePrompt } from "../core/prompts.js";
4
+ import { buildDesignModePrompt, buildDesignRecoveryPrompt } from "../core/prompts.js";
5
5
  import { saveTest } from "../test-store/test-manager.js";
6
6
  import { logger } from "../utils/logger.js";
7
7
  /**
@@ -17,6 +17,7 @@ export class DesignMode {
17
17
  this.escPressed = false;
18
18
  this.recentActions = []; // Track recent actions for stuck detection
19
19
  this.initialUserPrompt = null; // Store initial prompt for error recovery
20
+ this.baseDesignPrompt = null;
20
21
  this.consecutiveErrorCount = 0;
21
22
  this.maxConsecutiveErrors = 3;
22
23
  }
@@ -27,7 +28,10 @@ export class DesignMode {
27
28
  */
28
29
  async start(context) {
29
30
  // Set design mode system prompt
30
- const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
31
+ const designPrompt = buildDesignModePrompt(this.session.deviceInfo, {}, {
32
+ strictMode: Boolean(this.engine?.strictMode)
33
+ });
34
+ this.baseDesignPrompt = designPrompt;
31
35
  this.session.setSystemPrompt(designPrompt);
32
36
  console.log(`\n=== Design Mode: Creating test "${this.testName}" ===`);
33
37
  console.log("Describe what you want to test. The agent will explore autonomously.");
@@ -314,21 +318,12 @@ export class DesignMode {
314
318
  }
315
319
  // Automatic recovery - continue from where we left off using transcript
316
320
  console.log("\nRecovering from error and continuing...");
317
- // Build recovery context with transcript
318
- const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
319
- const recoveryContext = `${designPrompt}
320
-
321
- RECOVERY MODE:
322
- The previous session encountered an error and was interrupted. Here is everything that happened so far:
323
-
324
- ${this.session.getTranscriptText()}
325
-
326
- Continue from where we left off and complete the original task: "${this.initialUserPrompt}"
327
-
328
- Remember:
329
- - Don't repeat actions that already succeeded
330
- - Continue towards generating the test script
331
- - If the flow was complete before the error, generate the script now`;
321
+ const recoveryContext = buildDesignRecoveryPrompt({
322
+ basePrompt: this.baseDesignPrompt || this.session.systemPrompt || buildDesignModePrompt(this.session.deviceInfo),
323
+ transcript: this.session.getTranscriptText(),
324
+ objective: this.initialUserPrompt,
325
+ errorMessage: err.message
326
+ });
332
327
  // Reset conversation state for fresh API call
333
328
  this.session.clearMessages();
334
329
  this.session.addMessage("system", recoveryContext);
@@ -1,10 +1,19 @@
1
1
  import { getScreenshotAsBase64, connectToDevice, getDeviceInfo, getCurrentPlatform } from "../device/connection.js";
2
2
  import { sendCUARequest } from "../device/openai.js";
3
+ import { buildExecutionRecoveryPrompt } from "../core/prompts.js";
3
4
  import { isAssertion, extractAssertionPrompt, buildAssertionSystemPrompt, checkAssertionResult, handleAssertionFailure, handleAssertionSuccess, } from "../device/assertions.js";
4
5
  import { isLoadmillInstruction, extractLoadmillCommand, executeLoadmillInstruction, } from "../device/loadmill.js";
5
6
  import { logger } from "../utils/logger.js";
6
7
  import { emitDesktopDebug } from "../utils/desktop-debug.js";
7
8
  import { printCliOutput } from "../utils/console-output.js";
9
+ export function buildExecutionRequestPayload({ instruction, isAssertionStep, messages, previousResponseId }) {
10
+ return {
11
+ messagesToSend: previousResponseId && !isAssertionStep
12
+ ? [{ role: "user", content: instruction }]
13
+ : messages,
14
+ previousResponseIdToSend: previousResponseId,
15
+ };
16
+ }
8
17
  /**
9
18
  * Execution Mode - Run test scripts line-by-line
10
19
  * Each instruction is executed in isolation (messages cleared after each turn)
@@ -210,18 +219,13 @@ export class ExecutionMode {
210
219
  instructionIndex: stepContext?.instructionIndex,
211
220
  captureSource: isAssertionStep ? "instruction-input-assertion" : "instruction-input"
212
221
  });
213
- // When continuing with previousResponseId, only send the new instruction
214
- // The server already has full context from previous responses
215
- let messagesToSend;
216
- const previousResponseIdToSend = isAssertionStep ? null : this.session.previousResponseId;
217
- if (this.session.previousResponseId && !isAssertionStep) {
218
- // Only send the new user instruction
219
- messagesToSend = [{ role: "user", content: instruction }];
220
- }
221
- else {
222
- // Fresh start or assertion - send full messages (system + user)
223
- messagesToSend = this.session.messages;
224
- }
222
+ // Assertions rely on the prior response chain for earlier execution context.
223
+ const { messagesToSend, previousResponseIdToSend } = buildExecutionRequestPayload({
224
+ instruction,
225
+ isAssertionStep,
226
+ messages: this.session.messages,
227
+ previousResponseId: this.session.previousResponseId
228
+ });
225
229
  const response = await sendCUARequest({
226
230
  messages: messagesToSend,
227
231
  screenshotBase64,
@@ -410,11 +414,10 @@ export class ExecutionMode {
410
414
  const transcriptContext = this.session.getTranscriptText();
411
415
  this.session.clearMessages();
412
416
  // clearMessages() restores the base system prompt, but we need to add context
413
- // Build enhanced system prompt with recovery context
414
- let recoverySystemPrompt = this.initialSystemText;
415
- if (transcriptContext) {
416
- recoverySystemPrompt += `\n\n[SESSION RECOVERY - Connection was lost. Previous actions completed before the error:]\n${transcriptContext}\n\n[IMPORTANT: Resume execution silently. Do NOT narrate or explain. Just execute the next instruction.]`;
417
- }
417
+ const recoverySystemPrompt = buildExecutionRecoveryPrompt({
418
+ basePrompt: this.initialSystemText,
419
+ transcript: transcriptContext
420
+ });
418
421
  // Replace the system message with the enhanced one
419
422
  this.session.messages = [{ role: "system", content: recoverySystemPrompt }];
420
423
  this.session.updateResponseId(undefined);