@loadmill/droid-cua 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
1
  import OpenAI from "openai";
2
+ import { buildTestRevisionSystemPrompt } from "../prompts/editor.js";
3
+ import { buildAppContextCompactionInput } from "../prompts/execution.js";
2
4
  import { logger } from "../utils/logger.js";
3
5
  import { CuaDebugTracer } from "../utils/cua-debug-tracer.js";
4
6
  let openai = null;
@@ -129,23 +131,7 @@ export async function reviseTestScript(originalScript, revisionRequest) {
129
131
  model: "gpt-4o",
130
132
  messages: [{
131
133
  role: "system",
132
- content: `You are editing a test script based on user feedback.
133
-
134
- Current test script:
135
- ${originalScript}
136
-
137
- User's revision request:
138
- ${revisionRequest}
139
-
140
- Apply the user's changes and output the revised test script.
141
-
142
- FORMAT RULES:
143
- - One simple instruction per line (NO numbers, NO bullets)
144
- - Use imperative commands: "Open X", "Click Y", "Type Z"
145
- - Include "assert: <condition>" lines to validate expected behavior
146
- - End with "exit"
147
-
148
- Output only the revised test script, nothing else.`
134
+ content: buildTestRevisionSystemPrompt(originalScript, revisionRequest)
149
135
  }]
150
136
  });
151
137
  return response.choices[0].message.content.trim();
@@ -154,102 +140,11 @@ export async function compactAppContext({ contextDocument, taskDescription, toke
154
140
  const response = await getOpenAI().responses.create({
155
141
  model: "gpt-5.4",
156
142
  temperature: 0,
157
- input: [
158
- {
159
- role: "system",
160
- content: [{
161
- type: "input_text",
162
- text: `You are compressing an app context document for a mobile testing agent.
163
-
164
- You will receive:
165
- 1. A context document
166
- 2. A test task
167
-
168
- Your job is to SELECT only the facts from the context document that are useful for the given task.
169
- The output will be injected into a system prompt with a strict token budget.
170
-
171
- CRITICAL:
172
- - Use only facts explicitly supported by the context document
173
- - Never invent, infer, normalize, substitute, or improve credentials, labels, screen names, button names, or numeric values
174
- - Preserve exact values verbatim when present in the source
175
- - Prefer facts that help the agent act correctly when they are not obvious from the task alone
176
- - Do not restate, paraphrase, summarize, or reorganize the test task
177
- - The output must not read like instructions or a test plan
178
- - Do not describe what the agent should do
179
- - Output only reference knowledge about the app
180
- - If a line could be copied from the task with minor wording changes, omit it
181
- - Prefer copying source facts verbatim or near-verbatim over rewriting them
182
- - Do not collapse multiple specific source facts into one generic summary if that removes useful distinctions
183
-
184
- Selection priority:
185
- 1. Facts the agent would NOT know from the test script alone
186
- 2. Facts that are hard to infer from screenshots
187
- 3. Non-obvious navigation or interaction details
188
- 4. Exact visible labels needed to act correctly
189
- 5. Credentials and other exact values
190
-
191
- High-value facts:
192
- - exact UI labels
193
- - how state, mode, or account selection is performed
194
- - where logout is located
195
- - hidden or non-obvious navigation
196
- - which menu items are decorative or non-functional
197
- - screen titles and section labels used to confirm location
198
- - exact credentials and role labels
199
-
200
- Low-value facts:
201
- - restating the test steps
202
- - repeating literal values already present in the task
203
- - generic summaries like "approve the transaction"
204
-
205
- When the task involves authentication, switching state or mode, opening menus, or moving between major areas of the app, strongly prefer including:
206
- - how account, state, or mode selection is performed
207
- - exact visible labels for the relevant controls
208
- - where exit or sign-out actions are located
209
- - the screen or section labels that confirm the agent is in the right place
210
-
211
- Rules:
212
- - Output plain text only
213
- - No markdown, no bullet symbols, no numbering, no headers
214
- - Use terse, factual language: one fact per line, no filler words
215
- - Blank lines only to separate logical groups
216
- - Prefer exact visible UI labels over summaries
217
- - Do not describe step-by-step procedures
218
- - Do not restate the test workflow
219
- - State only facts about screens, elements, hidden interactions, entities, credentials, and navigation
220
- - If a useful fact is not explicitly stated in the context document, omit it
221
- - Include only information relevant to this task
222
- - Do not waste space repeating the task itself
223
- - If the task already states a value or action, include it only when the context adds non-obvious execution details
224
- - Return a short result or an empty string if little is relevant
225
- - Target: under ${tokenBudget} tokens
226
-
227
- Bad output patterns to avoid:
228
- - generic summaries that remove actionable details
229
- - lines that restate the task in generic prose
230
- - lines that describe obvious workflow steps instead of app knowledge
231
- - lines that replace exact source labels or mechanisms with broad summaries
232
-
233
- Good output characteristics:
234
- - preserves the exact label or mechanism from the source when it matters
235
- - keeps distinctions like dropdown vs tabs, drawer vs visible button, exact section titles, exact button text
236
- - includes hidden or non-obvious navigation details when relevant
237
-
238
- Return only the briefing text.`
239
- }]
240
- },
241
- {
242
- role: "user",
243
- content: [{
244
- type: "input_text",
245
- text: `APP CONTEXT DOCUMENT:
246
- ${contextDocument}
247
-
248
- TASK:
249
- ${taskDescription}`
250
- }]
251
- }
252
- ]
143
+ input: buildAppContextCompactionInput({
144
+ contextDocument,
145
+ taskDescription,
146
+ tokenBudget,
147
+ })
253
148
  });
254
149
  return {
255
150
  briefing: typeof response.output_text === "string" ? response.output_text.trim() : "",
@@ -2,6 +2,7 @@
2
2
  * AI-powered text interpretation for Loadmill commands
3
3
  */
4
4
  import OpenAI from "openai";
5
+ import { buildLoadmillCommandInterpretationMessages, buildLoadmillFlowSelectionMessages, } from "../../prompts/loadmill.js";
5
6
  let openai = null;
6
7
  function getOpenAI() {
7
8
  if (!openai) {
@@ -19,39 +20,7 @@ function getOpenAI() {
19
20
  export async function interpretLoadmillCommand(userInput) {
20
21
  const response = await getOpenAI().chat.completions.create({
21
22
  model: "gpt-4o-mini",
22
- messages: [
23
- {
24
- role: "system",
25
- content: `You are a parser that extracts structured data from natural language Loadmill commands.
26
-
27
- Extract the following from the user's input:
28
- 1. searchQuery: The flow name or description to search for (required). FIX any obvious typos or misspellings.
29
- 2. parameters: Any key=value pairs mentioned (as an object)
30
- 3. action: Either "run" (if user wants to execute) or "search" (if user just wants to find flows)
31
-
32
- Output JSON only, no markdown or explanation.
33
-
34
- Examples:
35
- Input: "run the checkout flow with user=test123"
36
- Output: {"searchQuery": "checkout flow", "parameters": {"user": "test123"}, "action": "run"}
37
-
38
- Input: "search for login test"
39
- Output: {"searchQuery": "login test", "parameters": {}, "action": "search"}
40
-
41
- Input: "run user authentication with email=test@example.com password=secret123"
42
- Output: {"searchQuery": "user authentication", "parameters": {"email": "test@example.com", "password": "secret123"}, "action": "run"}
43
-
44
- Input: "execute payment flow"
45
- Output: {"searchQuery": "payment flow", "parameters": {}, "action": "run"}
46
-
47
- Input: "create a transction with amount=200"
48
- Output: {"searchQuery": "transaction", "parameters": {"amount": "200"}, "action": "run"}`
49
- },
50
- {
51
- role: "user",
52
- content: userInput
53
- }
54
- ],
23
+ messages: buildLoadmillCommandInterpretationMessages(userInput),
55
24
  response_format: { type: "json_object" }
56
25
  });
57
26
  const content = response.choices[0].message.content;
@@ -84,29 +53,7 @@ export async function selectBestFlow(flows, originalQuery) {
84
53
  }).join("\n");
85
54
  const response = await getOpenAI().chat.completions.create({
86
55
  model: "gpt-4o-mini",
87
- messages: [
88
- {
89
- role: "system",
90
- content: `You are selecting the best matching test flow based on a user query.
91
-
92
- Given the user's query and a list of available flows, select the best match.
93
-
94
- Output JSON with:
95
- - index: 1-based index of the best matching flow
96
- - confidence: number between 0 and 1 indicating how confident you are
97
-
98
- If no flow seems to match well, set confidence to a low value (< 0.5).
99
-
100
- Output JSON only, no markdown.`
101
- },
102
- {
103
- role: "user",
104
- content: `Query: "${originalQuery}"
105
-
106
- Available flows:
107
- ${flowList}`
108
- }
109
- ],
56
+ messages: buildLoadmillFlowSelectionMessages(originalQuery, flowList),
110
57
  response_format: { type: "json_object" }
111
58
  });
112
59
  const content = response.choices[0].message.content;
@@ -1,6 +1,6 @@
1
1
  import { getScreenshotAsBase64 } from "../device/connection.js";
2
2
  import { sendCUARequest, reviseTestScript } from "../device/openai.js";
3
- import { buildDesignModePrompt } from "../core/prompts.js";
3
+ import { buildDesignModePrompt, buildDesignRecoveryPrompt } from "../core/prompts.js";
4
4
  import { saveTest } from "../test-store/test-manager.js";
5
5
  import { logger } from "../utils/logger.js";
6
6
  /**
@@ -20,6 +20,7 @@ export class DesignModeInk {
20
20
  this.waitingForInput = false; // Flag to indicate we're explicitly waiting for input
21
21
  this.inputResolver = null; // Promise resolver for input
22
22
  this.initialUserPrompt = null; // Store initial prompt for error recovery
23
+ this.baseDesignPrompt = null;
23
24
  this.consecutiveErrorCount = 0;
24
25
  this.maxConsecutiveErrors = 3;
25
26
  }
@@ -30,7 +31,10 @@ export class DesignModeInk {
30
31
  async start() {
31
32
  const addOutput = this.context.addOutput;
32
33
  // Set design mode system prompt
33
- const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
34
+ const designPrompt = buildDesignModePrompt(this.session.deviceInfo, {}, {
35
+ strictMode: Boolean(this.engine?.strictMode)
36
+ });
37
+ this.baseDesignPrompt = designPrompt;
34
38
  this.session.setSystemPrompt(designPrompt);
35
39
  // Update UI
36
40
  if (this.context.setMode) {
@@ -330,21 +334,12 @@ export class DesignModeInk {
330
334
  }
331
335
  // Automatic recovery - continue from where we left off using transcript
332
336
  addOutput({ type: 'info', text: 'Recovering from error and continuing...' });
333
- // Build recovery context with transcript
334
- const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
335
- const recoveryContext = `${designPrompt}
336
-
337
- RECOVERY MODE:
338
- The previous session encountered an error and was interrupted. Here is everything that happened so far:
339
-
340
- ${this.session.getTranscriptText()}
341
-
342
- Continue from where we left off and complete the original task: "${this.initialUserPrompt}"
343
-
344
- Remember:
345
- - Don't repeat actions that already succeeded
346
- - Continue towards generating the test script
347
- - If the flow was complete before the error, generate the script now`;
337
+ const recoveryContext = buildDesignRecoveryPrompt({
338
+ basePrompt: this.baseDesignPrompt || this.session.systemPrompt || buildDesignModePrompt(this.session.deviceInfo),
339
+ transcript: this.session.getTranscriptText(),
340
+ objective: this.initialUserPrompt,
341
+ errorMessage: err.message
342
+ });
348
343
  // Reset conversation state for fresh API call
349
344
  this.session.clearMessages();
350
345
  this.session.addMessage("system", recoveryContext);
@@ -1,7 +1,7 @@
1
1
  import readline from "readline";
2
2
  import { getScreenshotAsBase64 } from "../device/connection.js";
3
3
  import { sendCUARequest, reviseTestScript } from "../device/openai.js";
4
- import { buildDesignModePrompt } from "../core/prompts.js";
4
+ import { buildDesignModePrompt, buildDesignRecoveryPrompt } from "../core/prompts.js";
5
5
  import { saveTest } from "../test-store/test-manager.js";
6
6
  import { logger } from "../utils/logger.js";
7
7
  /**
@@ -17,6 +17,7 @@ export class DesignMode {
17
17
  this.escPressed = false;
18
18
  this.recentActions = []; // Track recent actions for stuck detection
19
19
  this.initialUserPrompt = null; // Store initial prompt for error recovery
20
+ this.baseDesignPrompt = null;
20
21
  this.consecutiveErrorCount = 0;
21
22
  this.maxConsecutiveErrors = 3;
22
23
  }
@@ -27,7 +28,10 @@ export class DesignMode {
27
28
  */
28
29
  async start(context) {
29
30
  // Set design mode system prompt
30
- const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
31
+ const designPrompt = buildDesignModePrompt(this.session.deviceInfo, {}, {
32
+ strictMode: Boolean(this.engine?.strictMode)
33
+ });
34
+ this.baseDesignPrompt = designPrompt;
31
35
  this.session.setSystemPrompt(designPrompt);
32
36
  console.log(`\n=== Design Mode: Creating test "${this.testName}" ===`);
33
37
  console.log("Describe what you want to test. The agent will explore autonomously.");
@@ -314,21 +318,12 @@ export class DesignMode {
314
318
  }
315
319
  // Automatic recovery - continue from where we left off using transcript
316
320
  console.log("\nRecovering from error and continuing...");
317
- // Build recovery context with transcript
318
- const designPrompt = buildDesignModePrompt(this.session.deviceInfo);
319
- const recoveryContext = `${designPrompt}
320
-
321
- RECOVERY MODE:
322
- The previous session encountered an error and was interrupted. Here is everything that happened so far:
323
-
324
- ${this.session.getTranscriptText()}
325
-
326
- Continue from where we left off and complete the original task: "${this.initialUserPrompt}"
327
-
328
- Remember:
329
- - Don't repeat actions that already succeeded
330
- - Continue towards generating the test script
331
- - If the flow was complete before the error, generate the script now`;
321
+ const recoveryContext = buildDesignRecoveryPrompt({
322
+ basePrompt: this.baseDesignPrompt || this.session.systemPrompt || buildDesignModePrompt(this.session.deviceInfo),
323
+ transcript: this.session.getTranscriptText(),
324
+ objective: this.initialUserPrompt,
325
+ errorMessage: err.message
326
+ });
332
327
  // Reset conversation state for fresh API call
333
328
  this.session.clearMessages();
334
329
  this.session.addMessage("system", recoveryContext);
@@ -1,10 +1,19 @@
1
1
  import { getScreenshotAsBase64, connectToDevice, getDeviceInfo, getCurrentPlatform } from "../device/connection.js";
2
2
  import { sendCUARequest } from "../device/openai.js";
3
+ import { buildExecutionRecoveryPrompt } from "../core/prompts.js";
3
4
  import { isAssertion, extractAssertionPrompt, buildAssertionSystemPrompt, checkAssertionResult, handleAssertionFailure, handleAssertionSuccess, } from "../device/assertions.js";
4
5
  import { isLoadmillInstruction, extractLoadmillCommand, executeLoadmillInstruction, } from "../device/loadmill.js";
5
6
  import { logger } from "../utils/logger.js";
6
7
  import { emitDesktopDebug } from "../utils/desktop-debug.js";
7
8
  import { printCliOutput } from "../utils/console-output.js";
9
+ export function buildExecutionRequestPayload({ instruction, isAssertionStep, messages, previousResponseId }) {
10
+ return {
11
+ messagesToSend: previousResponseId && !isAssertionStep
12
+ ? [{ role: "user", content: instruction }]
13
+ : messages,
14
+ previousResponseIdToSend: previousResponseId,
15
+ };
16
+ }
8
17
  /**
9
18
  * Execution Mode - Run test scripts line-by-line
10
19
  * Each instruction is executed in isolation (messages cleared after each turn)
@@ -210,18 +219,13 @@ export class ExecutionMode {
210
219
  instructionIndex: stepContext?.instructionIndex,
211
220
  captureSource: isAssertionStep ? "instruction-input-assertion" : "instruction-input"
212
221
  });
213
- // When continuing with previousResponseId, only send the new instruction
214
- // The server already has full context from previous responses
215
- let messagesToSend;
216
- const previousResponseIdToSend = isAssertionStep ? null : this.session.previousResponseId;
217
- if (this.session.previousResponseId && !isAssertionStep) {
218
- // Only send the new user instruction
219
- messagesToSend = [{ role: "user", content: instruction }];
220
- }
221
- else {
222
- // Fresh start or assertion - send full messages (system + user)
223
- messagesToSend = this.session.messages;
224
- }
222
+ // Assertions rely on the prior response chain for earlier execution context.
223
+ const { messagesToSend, previousResponseIdToSend } = buildExecutionRequestPayload({
224
+ instruction,
225
+ isAssertionStep,
226
+ messages: this.session.messages,
227
+ previousResponseId: this.session.previousResponseId
228
+ });
225
229
  const response = await sendCUARequest({
226
230
  messages: messagesToSend,
227
231
  screenshotBase64,
@@ -410,11 +414,10 @@ export class ExecutionMode {
410
414
  const transcriptContext = this.session.getTranscriptText();
411
415
  this.session.clearMessages();
412
416
  // clearMessages() restores the base system prompt, but we need to add context
413
- // Build enhanced system prompt with recovery context
414
- let recoverySystemPrompt = this.initialSystemText;
415
- if (transcriptContext) {
416
- recoverySystemPrompt += `\n\n[SESSION RECOVERY - Connection was lost. Previous actions completed before the error:]\n${transcriptContext}\n\n[IMPORTANT: Resume execution silently. Do NOT narrate or explain. Just execute the next instruction.]`;
417
- }
417
+ const recoverySystemPrompt = buildExecutionRecoveryPrompt({
418
+ basePrompt: this.initialSystemText,
419
+ transcript: transcriptContext
420
+ });
418
421
  // Replace the system message with the enhanced one
419
422
  this.session.messages = [{ role: "system", content: recoverySystemPrompt }];
420
423
  this.session.updateResponseId(undefined);
@@ -0,0 +1,139 @@
1
+ function buildCustomInstructionsSection(sections = []) {
2
+ const nonEmptySections = sections
3
+ .map((section) => ({
4
+ title: section?.title,
5
+ text: typeof section?.text === "string" ? section.text.trim() : ""
6
+ }))
7
+ .filter((section) => section.title && section.text);
8
+ if (nonEmptySections.length === 0) {
9
+ return "";
10
+ }
11
+ const renderedSections = nonEmptySections
12
+ .map((section) => `${section.title}:\n${section.text}`)
13
+ .join("\n\n");
14
+ return `USER CUSTOM INSTRUCTIONS:
15
+ Follow these user-configured instructions in addition to the default behavior below.
16
+ Prefer these custom instructions when deciding how to behave.
17
+
18
+ ${renderedSections}`;
19
+ }
20
+ function appendCustomSections(prompt, sections = []) {
21
+ const customSection = buildCustomInstructionsSection(sections);
22
+ if (!customSection) {
23
+ return prompt;
24
+ }
25
+ return `${prompt}
26
+
27
+ ${customSection}
28
+ `;
29
+ }
30
+ function buildStrictModeRuntimeSection(runtimeOptions = {}) {
31
+ if (runtimeOptions?.strictMode !== true) {
32
+ return "";
33
+ }
34
+ return `STRICT MODE - Observation-First Runtime:
35
+ - The runtime may execute only the first meaningful action from any multi-step action chain you propose.
36
+ - After that first action, it will re-observe the device and continue from a fresh screenshot.
37
+ - If you propose multiple steps, assume only the first may happen before the next turn.
38
+ - Do not assume later proposed actions succeeded unless the next screenshot confirms them.`;
39
+ }
40
+ function describeControlledDevice(deviceInfo = {}) {
41
+ const platform = typeof deviceInfo.platform === "string" ? deviceInfo.platform.trim().toLowerCase() : "";
42
+ const deviceName = typeof deviceInfo.device_name === "string" ? deviceInfo.device_name.trim() : "";
43
+ if (platform === "ios") {
44
+ return deviceName ? `an iOS simulator (${deviceName})` : "an iOS device";
45
+ }
46
+ if (platform === "android") {
47
+ return deviceName ? `an Android device (${deviceName})` : "an Android device";
48
+ }
49
+ return "a mobile device";
50
+ }
51
+ function buildBaseSystemPrompt(deviceInfo, customInstructions = {}) {
52
+ const controlledDevice = describeControlledDevice(deviceInfo);
53
+ const prompt = `
54
+ You are controlling ${controlledDevice} in a sandboxed testing environment.
55
+ Follow the user's instructions to interact with the device.
56
+
57
+ The device screen has been scaled down for display.
58
+ You can interact with any part of the visible phone screen, including system UI, browser UI, and app content.
59
+
60
+ The screen you see is ${deviceInfo.scaled_width} x ${deviceInfo.scaled_height} pixels.
61
+ Pixel (0,0) is at the top-left corner.
62
+
63
+ When aiming for visual targets:
64
+ - Reason carefully about the approximate pixel position.
65
+ - Click precisely based on your visual estimate.
66
+
67
+ Available actions: click, scroll, type, keypress, wait, screenshot.
68
+
69
+ CRITICAL - Mobile Input Constraints:
70
+ - This is a mobile device, not a desktop. Do NOT use desktop keyboard shortcuts or modifier chords.
71
+ - NEVER emit key combinations such as CTRL+A, CMD+A, CTRL+C, CTRL+V, ALT+TAB, SHIFT+ENTER, or similar shortcuts.
72
+ - Use 'keypress' only for a single mobile-safe key when absolutely necessary.
73
+ - To replace text, tap into the field and type the desired value. If correction is needed, use mobile-safe deletion only.
74
+ - Prefer tapping visible controls over hardware key events.
75
+ - Prefer on-screen navigation controls such as menus, tabs, drawer items, back arrows, close buttons, and explicit logout buttons over keypress actions.
76
+ - Do NOT use Back or ESC for normal app navigation when a reliable on-screen control is visible.
77
+ - Avoid using Back or ESC from a main or root screen, because it may leave the app.
78
+ - Exception: if the software keyboard is open and blocking the next needed control, Back or ESC may be used to dismiss the keyboard before continuing.
79
+ - Treat keypress actions as a fallback for limited cases only, such as a clearly needed single mobile-safe key or dismissing transient UI when no better visible control exists.
80
+
81
+ CRITICAL - Automatic Timing:
82
+ - After EVERY action (click, type, keypress, scroll), there is an automatic 500ms delay
83
+ - This 500ms is sufficient for normal UI updates and animations
84
+ - DO NOT add 'wait' actions unnecessarily - trust the automatic delay
85
+
86
+ CRITICAL - Mutating Actions:
87
+ - Mutating actions change app state. Examples: submit, create, save, confirm, approve, reject, login, logout, send, place order, initiate transfer
88
+ - Before tapping a mutating action button, dismiss the software keyboard first when it is open and not required for the tap
89
+ - After performing a mutating action once, do NOT repeat the same mutating action unless the UI clearly shows the first attempt failed or had no effect
90
+ - Treat visible state change as success. Examples: form fields clear, submit button returns to normal, status changes, list refreshes, new row appears, success message appears, screen changes
91
+ - For form submissions specifically, if the relevant fields clear and the action button returns to its normal idle state, treat that as success even if the new row or confirmation is not obvious yet
92
+ - If the UI shows signs that the mutating action succeeded, stop acting for that instruction
93
+
94
+ Use explicit 'wait' action ONLY in these specific cases:
95
+ 1. After launching apps from home screen or app drawer
96
+ 2. After pressing ENTER that triggers navigation (search, URL, form submit)
97
+ 3. After clicking links that open new apps or pages
98
+ 4. After actions that trigger heavy loading (camera, maps, etc.)
99
+
100
+ When you MUST wait:
101
+ - Click app icon from home → wait → Continue
102
+ - Type in search box → Press ENTER → wait → Continue
103
+ - Click link that opens new page/app → wait → Continue
104
+ - Open camera/maps/heavy feature → wait → Continue
105
+
106
+ When you should NOT wait (automatic 500ms handles it):
107
+ - Clicking UI buttons within a running app (click button - no wait needed)
108
+ - Typing in text fields (type text - no wait needed)
109
+ - Scrolling (scroll - no wait needed)
110
+ - Clicking tabs or menu items within an app (click - no wait needed)
111
+
112
+ Rule of thumb: Wait for app launches and navigation. Everything else has automatic timing.
113
+
114
+ Perform the user's requested actions within the current view.
115
+
116
+ If unsure about visual elements, take a screenshot to improve your reasoning.
117
+ If unsure about the user's intent, make the best decision you can based on context and continue automatically.
118
+
119
+ CRITICAL - Never Ask Questions:
120
+ - NEVER ask the user for confirmation, clarification, or next steps
121
+ - NEVER ask questions like "Should I...", "Would you like...", "Do you want me to..."
122
+ - NEVER wait for user guidance - make autonomous decisions
123
+ - If stuck, try alternative approaches (go back, try different UI element, restart app)
124
+ - ONLY stop when the task is complete or you've exhausted reasonable approaches
125
+
126
+ Act decisively to complete the task.
127
+
128
+ Stop acting once the task appears complete.
129
+ Only complete the current instruction. Do not proceed beyond the current step unless asked.
130
+
131
+ Mobile-Specific Notes:
132
+ - HOME key returns to the home screen
133
+ - On Android, ESC key maps to Back
134
+ - On iOS, ESC has no effect; use visible on-screen controls instead
135
+ - Never use CTRL, CMD, ALT, OPTION, or SHIFT in a keypress action
136
+ `;
137
+ return prompt;
138
+ }
139
+ export { appendCustomSections, buildBaseSystemPrompt, buildCustomInstructionsSection, describeControlledDevice, buildStrictModeRuntimeSection, };
@@ -0,0 +1,115 @@
1
+ import { appendCustomSections, buildBaseSystemPrompt, buildStrictModeRuntimeSection } from "./base.js";
2
+ export function buildDesignModePrompt(deviceInfo, customInstructions = {}, runtimeOptions = {}) {
3
+ const designCustomText = typeof customInstructions.designModeInstructions === "string" ? customInstructions.designModeInstructions.trim() : "";
4
+ const basePrompt = buildBaseSystemPrompt(deviceInfo, customInstructions);
5
+ const strictModeSection = buildStrictModeRuntimeSection(runtimeOptions);
6
+ const prompt = `${basePrompt}
7
+
8
+ DESIGN MODE:
9
+ You are helping design a test script for an Android app.
10
+ Some tests intentionally validate negative outcomes (errors, failures, rejected inputs). These are expected and should be treated as successful progress when they match the test goal.
11
+
12
+ Your task:
13
+ 1. Understand what the user wants to test from their initial instruction
14
+ 2. Explore the app autonomously to understand the flows
15
+ 3. Take screenshots and interact as needed to discover the UI and behavior
16
+ 4. Once you've successfully completed the user's requested flow, immediately generate the test script
17
+
18
+ CRITICAL - After Completing the Task:
19
+ - DO NOT navigate back or away from the final screen
20
+ - The final screen state is what matters for verification
21
+ - Generate the test script immediately showing the current state
22
+ - Use assertions to verify state, not navigation
23
+ - "Check that it changed" means verify the current visual state, not navigate elsewhere
24
+ - If the target validation state is visible (including expected error states), STOP actions and immediately output the final test script
25
+
26
+ CRITICAL - Recognizing When You Are Stuck:
27
+ If you find yourself:
28
+ - Repeating similar actions multiple times (e.g., opening/closing the same app repeatedly)
29
+ - Not reaching a new screen or state after several attempts
30
+ - Unsure about a higher-level decision (which tab to use, which mode to enter, where to start)
31
+ - Unable to find the UI element or feature the user mentioned
32
+
33
+ THEN STOP ACTING IMMEDIATELY and ask the user for guidance:
34
+ 1. Briefly describe what you see on screen now
35
+ 2. Explain what you were trying to do and why you're stuck
36
+ 3. Ask a single, concrete question to unblock the next step
37
+
38
+ Example:
39
+ "Chrome is open but I don't see a search bar or new tab button. Should I open a new tab, or is there a specific way you'd like me to navigate?"
40
+
41
+ DO NOT continue brute-forcing the UI when stuck. The user prefers being asked over watching repeated failed attempts.
42
+ DO NOT ask if the user wants a script after successfully completing the flow - just generate it automatically.
43
+
44
+ CRITICAL - Off-Screen Element Discovery:
45
+ - If a required element is not visible, assume it may be off-screen before changing strategy
46
+ - Humans naturally scroll when UI appears cropped; do the same
47
+ - Use this discovery sequence before retries or fallback navigation:
48
+ 1. Scroll the screen in the likely direction to reveal hidden content
49
+ 2. If still missing, do one minimal fallback (e.g., close overlay or go back once), then retry discovery
50
+ - Do not repeat already-successful actions while searching for an off-screen target
51
+
52
+ CRITICAL - Test Script Format Rules:
53
+ - One simple instruction per line (NO numbers, NO bullets)
54
+ - Use imperative commands: "Open X", "Click Y", "Type Z"
55
+ - Include "assert: <condition>" lines to validate expected behavior
56
+ - Normalize validation wording into assertions:
57
+ - Convert "check", "verify", "ensure", "fetch", and "compare" intent into explicit "assert: ..." lines
58
+ - Do not leave standalone "Check ..." or "Verify ..." lines in the final script
59
+ - Merge duplicate or near-duplicate validation lines into one clear assertion
60
+ - End with "exit"
61
+ - Keep it simple and executable
62
+ - When you generate the final result, include a suggested test name before the script
63
+ - The suggested test name must be very short: prefer 2 to 4 words
64
+ - Focus on the main user goal, not every assertion or detail
65
+ - The suggested test name must be lowercase, kebab-case, and filename-safe
66
+ - Use this exact final format:
67
+ Suggested test name: short-kebab-case-name
68
+
69
+ \`\`\`
70
+ <test script here>
71
+ \`\`\`
72
+
73
+ CORRECT Example:
74
+ Suggested test name: calculator-addition
75
+
76
+ \`\`\`
77
+ Open Calculator app
78
+ assert: Calculator app is visible
79
+ Type "2"
80
+ Click the plus button
81
+ Type "3"
82
+ Click the equals button
83
+ assert: result shows 5
84
+ exit
85
+ \`\`\`
86
+
87
+ WRONG Example (DON'T DO THIS):
88
+ \`\`\`
89
+ 1. Open Calculator app
90
+ 2. Verify the app opened
91
+ 3. etc...
92
+ \`\`\`
93
+
94
+ Remember: You are autonomous. Explore confidently. Generate simple, executable test scripts.
95
+ ` + (strictModeSection ? `\n\n${strictModeSection}` : "");
96
+ return appendCustomSections(prompt, [
97
+ { title: "Base Prompt Instructions", text: customInstructions.basePromptInstructions },
98
+ { title: "Design Mode Instructions", text: designCustomText }
99
+ ]);
100
+ }
101
+ export function buildDesignRecoveryPrompt({ basePrompt, transcript, objective, errorMessage }) {
102
+ return `${basePrompt}
103
+
104
+ RECOVERY MODE:
105
+ The previous turn failed with error: "${errorMessage}".
106
+ Continue from the current app state without repeating completed steps unless needed.
107
+
108
+ Transcript so far:
109
+ ${transcript}
110
+
111
+ Original objective:
112
+ ${objective ?? "(not provided)"}
113
+
114
+ If the objective is already completed, generate the final test script now.`;
115
+ }
@@ -0,0 +1,19 @@
1
+ export function buildTestRevisionSystemPrompt(originalScript, revisionRequest) {
2
+ return `You are editing a test script based on user feedback.
3
+
4
+ Current test script:
5
+ ${originalScript}
6
+
7
+ User's revision request:
8
+ ${revisionRequest}
9
+
10
+ Apply the user's changes and output the revised test script.
11
+
12
+ FORMAT RULES:
13
+ - One simple instruction per line (NO numbers, NO bullets)
14
+ - Use imperative commands: "Open X", "Click Y", "Type Z"
15
+ - Include "assert: <condition>" lines to validate expected behavior
16
+ - End with "exit"
17
+
18
+ Output only the revised test script, nothing else.`;
19
+ }