@loadmill/droid-cua 2.2.2 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +69 -0
  2. package/build/index.js +177 -24
  3. package/build/src/cli/headless-debug.js +55 -0
  4. package/build/src/cli/headless-execution-config.js +203 -0
  5. package/build/src/cli/ink-shell.js +8 -2
  6. package/build/src/commands/help.js +13 -1
  7. package/build/src/commands/run.js +30 -1
  8. package/build/src/core/app-context.js +57 -0
  9. package/build/src/core/execution-engine.js +151 -20
  10. package/build/src/core/prompts.js +3 -247
  11. package/build/src/device/android/actions.js +2 -2
  12. package/build/src/device/assertions.js +4 -23
  13. package/build/src/device/cloud/browserstack/adapter.js +1 -0
  14. package/build/src/device/cloud/lambdatest/adapter.js +402 -0
  15. package/build/src/device/cloud/registry.js +2 -1
  16. package/build/src/device/interface.js +1 -1
  17. package/build/src/device/ios/actions.js +8 -2
  18. package/build/src/device/loadmill.js +4 -3
  19. package/build/src/device/openai.js +32 -26
  20. package/build/src/integrations/loadmill/interpreter.js +3 -56
  21. package/build/src/modes/design-mode-ink.js +12 -17
  22. package/build/src/modes/design-mode.js +12 -17
  23. package/build/src/modes/execution-mode.js +32 -22
  24. package/build/src/prompts/base.js +139 -0
  25. package/build/src/prompts/design.js +115 -0
  26. package/build/src/prompts/editor.js +19 -0
  27. package/build/src/prompts/execution.js +182 -0
  28. package/build/src/prompts/loadmill.js +60 -0
  29. package/build/src/utils/console-output.js +35 -0
  30. package/build/src/utils/run-screenshot-recorder.js +98 -0
  31. package/build/src/utils/structured-debug-log-manager.js +325 -0
  32. package/package.json +2 -1
@@ -4,6 +4,8 @@
4
4
  import { loadTest, listTests, testExists } from "../test-store/test-manager.js";
5
5
  import { ExecutionMode } from "../modes/execution-mode.js";
6
6
  import { buildExecutionModePrompt } from "../core/prompts.js";
7
+ import { buildAppContextBriefing } from "../core/app-context.js";
8
+ import { logger } from "../utils/logger.js";
7
9
  /**
8
10
  * Handle /run command
9
11
  * @param {string} args - Test name
@@ -49,6 +51,7 @@ export async function handleRun(args, session, context) {
49
51
  // Load test instructions
50
52
  addOutput({ type: 'system', text: `Loading test: ${testName}` });
51
53
  const instructions = await loadTest(testName);
54
+ const taskText = instructions.join("\n");
52
55
  addOutput({ type: 'info', text: `Loaded ${instructions.length} instructions` });
53
56
  addOutput({ type: 'info', text: '' });
54
57
  // Disable free-form input during execution (only allow commands like /exit)
@@ -69,8 +72,34 @@ export async function handleRun(args, session, context) {
69
72
  // Each test instruction should execute in isolation
70
73
  session.updateResponseId(undefined);
71
74
  session.clearMessages();
75
+ let appContextBriefing = '';
76
+ if (context.appContextPath) {
77
+ try {
78
+ const result = await buildAppContextBriefing({
79
+ contextPath: context.appContextPath,
80
+ taskText,
81
+ budget: context.appContextBudget,
82
+ });
83
+ appContextBriefing = result.briefing;
84
+ if (appContextBriefing) {
85
+ addOutput({ type: 'info', text: `Loaded app context briefing from: ${result.contextPath}` });
86
+ }
87
+ }
88
+ catch (error) {
89
+ const message = error instanceof Error ? error.message : 'Unknown app context error';
90
+ logger.error('CLI app context compaction failed', {
91
+ contextPath: context.appContextPath,
92
+ testName,
93
+ message,
94
+ });
95
+ addOutput({
96
+ type: 'warning',
97
+ text: `Warning: could not load app context from ${context.appContextPath}. Running without briefing.`,
98
+ });
99
+ }
100
+ }
72
101
  // Set execution mode system prompt (replaces any design mode prompt)
73
- const executionPrompt = buildExecutionModePrompt(session.deviceInfo);
102
+ const executionPrompt = buildExecutionModePrompt(session.deviceInfo, {}, appContextBriefing, { strictMode: Boolean(context.engine?.strictMode) });
74
103
  session.setSystemPrompt(executionPrompt);
75
104
  // Create execution mode
76
105
  const executionMode = new ExecutionMode(session, context.engine, instructions);
@@ -0,0 +1,57 @@
1
+ import path from "path";
2
+ import { access, readFile } from "fs/promises";
3
+ import { constants as fsConstants } from "fs";
4
+ import { compactAppContext } from "../device/openai.js";
5
+ export const APP_CONTEXT_FILENAME = "context.md";
6
+ export const DEFAULT_APP_CONTEXT_BUDGET = 300;
7
+ export const MIN_APP_CONTEXT_BUDGET = 100;
8
+ export const MAX_APP_CONTEXT_BUDGET = 2000;
9
+ export function normalizeAppContextBudget(value) {
10
+ if (typeof value !== "number" || !Number.isFinite(value)) {
11
+ return DEFAULT_APP_CONTEXT_BUDGET;
12
+ }
13
+ const normalized = Math.round(value);
14
+ if (normalized < MIN_APP_CONTEXT_BUDGET) {
15
+ return MIN_APP_CONTEXT_BUDGET;
16
+ }
17
+ if (normalized > MAX_APP_CONTEXT_BUDGET) {
18
+ return MAX_APP_CONTEXT_BUDGET;
19
+ }
20
+ return normalized;
21
+ }
22
+ export function getDefaultProjectContextPath(projectPath) {
23
+ return path.join(projectPath, APP_CONTEXT_FILENAME);
24
+ }
25
+ export async function readAppContextFile(filePath) {
26
+ await access(filePath, fsConstants.R_OK);
27
+ return await readFile(filePath, "utf-8");
28
+ }
29
+ export async function appContextFileExists(filePath) {
30
+ try {
31
+ await access(filePath, fsConstants.R_OK);
32
+ return true;
33
+ }
34
+ catch {
35
+ return false;
36
+ }
37
+ }
38
+ export async function buildAppContextBriefing({ contextPath, taskText, budget = DEFAULT_APP_CONTEXT_BUDGET, }) {
39
+ if (!contextPath) {
40
+ return { briefing: "", contextPath: null };
41
+ }
42
+ const normalizedBudget = normalizeAppContextBudget(budget);
43
+ if (normalizedBudget === 0) {
44
+ return { briefing: "", contextPath };
45
+ }
46
+ const rawContext = await readAppContextFile(contextPath);
47
+ const result = await compactAppContext({
48
+ contextDocument: rawContext,
49
+ taskDescription: taskText,
50
+ tokenBudget: normalizedBudget,
51
+ });
52
+ return {
53
+ briefing: result.briefing,
54
+ outputTokens: result.outputTokens,
55
+ contextPath,
56
+ };
57
+ }
@@ -5,6 +5,7 @@ import { handleModelAction } from "../device/actions.js";
5
5
  import { sendCUARequest } from "../device/openai.js";
6
6
  import { emitDesktopDebug } from "../utils/desktop-debug.js";
7
7
  import { getConfiguredStepDelayMs } from "../utils/step-delay.js";
8
+ import { printCliOutput } from "../utils/console-output.js";
8
9
  function extractComputerCalls(items) {
9
10
  const entries = [];
10
11
  for (const item of items) {
@@ -30,12 +31,105 @@ function extractComputerCalls(items) {
30
31
  }
31
32
  return entries;
32
33
  }
34
+ function getScopeAndIds(context = null, stepContext = null) {
35
+ const scope = context?.sessionId ? "design" : "execution";
36
+ const ids = scope === "design"
37
+ ? {
38
+ sessionId: context?.sessionId,
39
+ stepId: stepContext?.stepId,
40
+ instructionIndex: stepContext?.instructionIndex
41
+ }
42
+ : {
43
+ runId: context?.runId,
44
+ stepId: stepContext?.stepId,
45
+ instructionIndex: stepContext?.instructionIndex
46
+ };
47
+ return { scope, ids };
48
+ }
49
+ function buildStrictModePlan(actions = [], strictMode = false) {
50
+ if (!strictMode || actions.length === 0) {
51
+ return {
52
+ actionsToExecute: actions,
53
+ droppedActions: [],
54
+ truncationReason: null,
55
+ runtimeNote: null
56
+ };
57
+ }
58
+ const actionsToExecute = [actions[0]];
59
+ const droppedActions = actions.slice(1);
60
+ if (droppedActions.length === 0) {
61
+ return {
62
+ actionsToExecute,
63
+ droppedActions,
64
+ truncationReason: null,
65
+ runtimeNote: null
66
+ };
67
+ }
68
+ const leadingActionType = actionsToExecute[0]?.type;
69
+ const truncationReason = leadingActionType === "screenshot"
70
+ ? "leading_screenshot_reobserve"
71
+ : "post_first_action_reobserve";
72
+ const droppedActionTypes = droppedActions
73
+ .map((action) => action?.type)
74
+ .filter(Boolean);
75
+ const runtimeNote = `Strict Mode: I executed only the first ${leadingActionType === "screenshot" ? "screenshot request" : "action"} from your previous chain and intentionally skipped the remaining ${droppedActions.length} action${droppedActions.length === 1 ? "" : "s"} (${droppedActionTypes.join(", ")}) so I could re-observe the device before continuing. Base your next step only on what is visible now.`;
76
+ return {
77
+ actionsToExecute,
78
+ droppedActions,
79
+ truncationReason,
80
+ runtimeNote
81
+ };
82
+ }
33
83
  export class ExecutionEngine {
34
84
  constructor(session, options = {}) {
35
85
  this.session = session;
36
86
  this.recordScreenshots = options.recordScreenshots || false;
37
87
  this.screenshotDir = options.screenshotDir || null;
88
+ this.screenshotRecorder = options.screenshotRecorder || null;
89
+ this.strictMode = options.strictMode === true;
38
90
  this.stepDelayMs = getConfiguredStepDelayMs();
91
+ this.reportedScreenshotWriteError = false;
92
+ this.getScreenshotAsBase64 = options.getScreenshotAsBase64 || getScreenshotAsBase64;
93
+ this.handleModelAction = options.handleModelAction || handleModelAction;
94
+ this.sendCUARequest = options.sendCUARequest || sendCUARequest;
95
+ this.getCurrentPlatform = options.getCurrentPlatform || getCurrentPlatform;
96
+ }
97
+ async recordScreenshot(screenshotBase64, metadata = {}) {
98
+ if (typeof screenshotBase64 !== "string" || !screenshotBase64) {
99
+ return null;
100
+ }
101
+ try {
102
+ if (this.screenshotRecorder?.saveScreenshot) {
103
+ return await this.screenshotRecorder.saveScreenshot(screenshotBase64, metadata);
104
+ }
105
+ if (this.recordScreenshots && this.screenshotDir) {
106
+ const framePath = path.join(this.screenshotDir, `frame_${String(Date.now())}.png`);
107
+ await writeFile(framePath, Buffer.from(screenshotBase64, "base64"));
108
+ return framePath;
109
+ }
110
+ }
111
+ catch (error) {
112
+ if (!this.reportedScreenshotWriteError) {
113
+ this.reportedScreenshotWriteError = true;
114
+ const scope = typeof metadata.sessionId === "string" ? "design" : "execution";
115
+ const ids = scope === "design"
116
+ ? {
117
+ sessionId: metadata.sessionId,
118
+ stepId: metadata.stepId,
119
+ instructionIndex: metadata.instructionIndex
120
+ }
121
+ : {
122
+ runId: metadata.runId,
123
+ stepId: metadata.stepId,
124
+ instructionIndex: metadata.instructionIndex
125
+ };
126
+ emitDesktopDebug("run.screenshot_write.error", scope, ids, {
127
+ captureSource: metadata.captureSource ?? null,
128
+ message: error instanceof Error ? error.message : "Failed to persist screenshot"
129
+ });
130
+ }
131
+ }
132
+ return null;
39
133
  }
40
134
  /**
41
135
  * Run a full turn with the CUA model
@@ -46,8 +140,10 @@ export class ExecutionEngine {
46
140
  * @param {Object} context - Optional Ink context for output
47
141
  */
48
142
  async runFullTurn(response, trackAction = null, context = null, stepContext = null) {
49
- const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
143
+ const addOutput = context?.addOutput || printCliOutput;
50
144
  let newResponseId = response.id;
145
+ const shouldStop = () => Boolean(trackAction?.());
146
+ const { scope, ids } = getScopeAndIds(context, stepContext);
51
147
  const eventMeta = (extra = {}) => ({
52
148
  runId: context?.runId,
53
149
  stepId: stepContext?.stepId,
@@ -56,11 +152,8 @@ export class ExecutionEngine {
56
152
  });
57
153
  while (true) {
58
154
  // Check for interruption before processing next batch of actions
59
- if (trackAction) {
60
- const shouldStop = trackAction(null); // null action = pre-batch check
61
- if (shouldStop) {
62
- return newResponseId;
63
- }
155
+ if (shouldStop()) {
156
+ return newResponseId;
64
157
  }
65
158
  const items = response.output || [];
66
159
  const computerCalls = extractComputerCalls(items);
@@ -118,8 +211,12 @@ export class ExecutionEngine {
118
211
  for (const { call_id, actions } of computerCalls) {
119
212
  if (!call_id)
120
213
  continue;
214
+ const { actionsToExecute, droppedActions, truncationReason, runtimeNote } = buildStrictModePlan(actions, this.strictMode);
121
215
  let sawExplicitScreenshotAction = false;
122
- for (const action of actions) {
216
+ for (const action of actionsToExecute) {
217
+ if (shouldStop()) {
218
+ return newResponseId;
219
+ }
123
220
  if (action.type === "screenshot") {
124
221
  sawExplicitScreenshotAction = true;
125
222
  addOutput({
@@ -136,18 +233,19 @@ export class ExecutionEngine {
136
233
  });
137
234
  }
138
235
  else {
139
- await handleModelAction(this.session.deviceId, action, this.session.deviceInfo.scale, {
236
+ await this.handleModelAction(this.session.deviceId, action, this.session.deviceInfo.scale, {
140
237
  ...context,
238
+ shouldStop,
141
239
  stepId: stepContext?.stepId,
142
240
  instructionIndex: stepContext?.instructionIndex
143
241
  });
144
242
  // Track action and check for interruption
145
243
  if (trackAction) {
146
- const shouldStop = trackAction(action);
147
- if (shouldStop) {
148
- // User interrupted - stop execution immediately
149
- return newResponseId;
150
- }
244
+ trackAction(action);
245
+ }
246
+ if (shouldStop()) {
247
+ // User interrupted - stop execution immediately
248
+ return newResponseId;
151
249
  }
152
250
  // Add delay after UI-changing actions to let the interface update
153
251
  // before taking the screenshot (except for explicit wait actions which have their own delay)
@@ -156,7 +254,10 @@ export class ExecutionEngine {
156
254
  }
157
255
  }
158
256
  }
159
- const screenshotBase64 = await getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
257
+ if (shouldStop()) {
258
+ return newResponseId;
259
+ }
260
+ const screenshotBase64 = await this.getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
160
261
  emitDesktopDebug("device.screenshot", "device", {
161
262
  runId: context?.runId,
162
263
  stepId: stepContext?.stepId,
@@ -168,9 +269,30 @@ export class ExecutionEngine {
168
269
  height: this.session.deviceInfo?.scaled_height,
169
270
  base64Length: screenshotBase64.length
170
271
  });
171
- if (this.recordScreenshots && this.screenshotDir) {
172
- const framePath = path.join(this.screenshotDir, `frame_${String(Date.now())}.png`);
173
- await writeFile(framePath, Buffer.from(screenshotBase64, "base64"));
272
+ await this.recordScreenshot(screenshotBase64, {
273
+ runId: context?.runId,
274
+ sessionId: context?.sessionId,
275
+ stepId: stepContext?.stepId,
276
+ instructionIndex: stepContext?.instructionIndex,
277
+ callId: call_id,
278
+ captureSource: sawExplicitScreenshotAction ? "call-output-explicit-action" : "call-output-post-action"
279
+ });
280
+ if (runtimeNote) {
281
+ const executedActionTypes = actionsToExecute
282
+ .map((action) => action?.type)
283
+ .filter(Boolean);
284
+ const droppedActionTypes = droppedActions
285
+ .map((action) => action?.type)
286
+ .filter(Boolean);
287
+ emitDesktopDebug("cua.strict_mode.truncation", scope, ids, {
288
+ callId: call_id,
289
+ executedCount: actionsToExecute.length,
290
+ droppedCount: droppedActions.length,
291
+ executedActionTypes,
292
+ droppedActionTypes,
293
+ reason: truncationReason,
294
+ note: runtimeNote
295
+ });
174
296
  }
175
297
  // Build next input: screenshot + any carryover reasoning
176
298
  const selectedCuaModel = process.env.OPENAI_CUA_MODEL === "computer-use-preview" ? "computer-use-preview" : "gpt-5.4";
@@ -182,16 +304,25 @@ export class ExecutionEngine {
182
304
  image_url: `data:image/png;base64,${screenshotBase64}`,
183
305
  },
184
306
  ...(selectedCuaModel === "computer-use-preview"
185
- ? { current_url: getCurrentPlatform() === "ios" ? "ios://simulator" : "android://device" }
307
+ ? { current_url: this.getCurrentPlatform() === "ios" ? "ios://simulator" : "android://device" }
186
308
  : {}),
187
309
  ...(pendingSafetyChecks.length > 0 ? { acknowledged_safety_checks: pendingSafetyChecks } : {})
188
310
  }];
189
- response = await sendCUARequest({
311
+ if (runtimeNote) {
312
+ input.push({
313
+ role: "user",
314
+ content: runtimeNote
315
+ });
316
+ }
317
+ if (shouldStop()) {
318
+ return newResponseId;
319
+ }
320
+ response = await this.sendCUARequest({
190
321
  messages: input,
191
322
  previousResponseId: newResponseId,
192
323
  deviceInfo: this.session.deviceInfo,
193
324
  debugContext: {
194
- scope: context?.sessionId ? "design" : "execution",
325
+ scope,
195
326
  runId: context?.runId,
196
327
  sessionId: context?.sessionId,
197
328
  stepId: stepContext?.stepId,
@@ -1,247 +1,3 @@
1
- /**
2
- * System prompt templates for different modes
3
- */
4
- function buildCustomInstructionsSection(sections = []) {
5
- const nonEmptySections = sections
6
- .map((section) => ({
7
- title: section?.title,
8
- text: typeof section?.text === "string" ? section.text.trim() : ""
9
- }))
10
- .filter((section) => section.title && section.text);
11
- if (nonEmptySections.length === 0) {
12
- return "";
13
- }
14
- const renderedSections = nonEmptySections
15
- .map((section) => `${section.title}:\n${section.text}`)
16
- .join("\n\n");
17
- return `USER CUSTOM INSTRUCTIONS:
18
- Follow these user-configured instructions in addition to the default behavior below.
19
- Prefer these custom instructions when deciding how to behave.
20
-
21
- ${renderedSections}`;
22
- }
23
- function appendCustomSections(prompt, sections = []) {
24
- const customSection = buildCustomInstructionsSection(sections);
25
- if (!customSection) {
26
- return prompt;
27
- }
28
- return `${prompt}
29
-
30
- ${customSection}
31
- `;
32
- }
33
- function describeControlledDevice(deviceInfo = {}) {
34
- const platform = typeof deviceInfo.platform === "string" ? deviceInfo.platform.trim().toLowerCase() : "";
35
- const deviceName = typeof deviceInfo.device_name === "string" ? deviceInfo.device_name.trim() : "";
36
- if (platform === "ios") {
37
- return deviceName ? `an iOS simulator (${deviceName})` : "an iOS device";
38
- }
39
- if (platform === "android") {
40
- return deviceName ? `an Android device (${deviceName})` : "an Android device";
41
- }
42
- return "a mobile device";
43
- }
44
- export function buildBaseSystemPrompt(deviceInfo, customInstructions = {}) {
45
- const controlledDevice = describeControlledDevice(deviceInfo);
46
- const prompt = `
47
- You are controlling ${controlledDevice} in a sandboxed testing environment.
48
- Follow the user's instructions to interact with the device.
49
-
50
- The device screen has been scaled down for display.
51
- You can interact with any part of the visible phone screen, including system UI, browser UI, and app content.
52
-
53
- The screen you see is ${deviceInfo.scaled_width} x ${deviceInfo.scaled_height} pixels.
54
- Pixel (0,0) is at the top-left corner.
55
-
56
- When aiming for visual targets:
57
- - Reason carefully about the approximate pixel position.
58
- - Click precisely based on your visual estimate.
59
-
60
- Available actions: click, scroll, type, keypress, wait, screenshot.
61
-
62
- CRITICAL - Mobile Input Constraints:
63
- - This is a mobile device, not a desktop. Do NOT use desktop keyboard shortcuts or modifier chords.
64
- - NEVER emit key combinations such as CTRL+A, CMD+A, CTRL+C, CTRL+V, ALT+TAB, SHIFT+ENTER, or similar shortcuts.
65
- - Use 'keypress' only for a single mobile-safe key when absolutely necessary.
66
- - To replace text, tap into the field and type the desired value. If correction is needed, use mobile-safe deletion only.
67
- - Prefer tapping visible controls over hardware key events.
68
-
69
- CRITICAL - Automatic Timing:
70
- - After EVERY action (click, type, keypress, scroll), there is an automatic 500ms delay
71
- - This 500ms is sufficient for normal UI updates and animations
72
- - DO NOT add 'wait' actions unnecessarily - trust the automatic delay
73
-
74
- Use explicit 'wait' action ONLY in these specific cases:
75
- 1. After launching apps from home screen or app drawer
76
- 2. After pressing ENTER that triggers navigation (search, URL, form submit)
77
- 3. After clicking links that open new apps or pages
78
- 4. After actions that trigger heavy loading (camera, maps, etc.)
79
-
80
- When you MUST wait:
81
- - Click app icon from home → wait → Continue
82
- - Type in search box → Press ENTER → wait → Continue
83
- - Click link that opens new page/app → wait → Continue
84
- - Open camera/maps/heavy feature → wait → Continue
85
-
86
- When you should NOT wait (automatic 500ms handles it):
87
- - Clicking UI buttons within a running app (click button - no wait needed)
88
- - Typing in text fields (type text - no wait needed)
89
- - Scrolling (scroll - no wait needed)
90
- - Clicking tabs or menu items within an app (click - no wait needed)
91
-
92
- Rule of thumb: Wait for app launches and navigation. Everything else has automatic timing.
93
-
94
- Perform the user's requested actions within the current view.
95
-
96
- If unsure about visual elements, take a screenshot to improve your reasoning.
97
- If unsure about the user's intent, make the best decision you can based on context and continue automatically.
98
-
99
- CRITICAL - Never Ask Questions:
100
- - NEVER ask the user for confirmation, clarification, or next steps
101
- - NEVER ask questions like "Should I...", "Would you like...", "Do you want me to..."
102
- - NEVER wait for user guidance - make autonomous decisions
103
- - If stuck, try alternative approaches (go back, try different UI element, restart app)
104
- - ONLY stop when the task is complete or you've exhausted reasonable approaches
105
-
106
- Act decisively to complete the task.
107
-
108
- Stop acting once the task appears complete.
109
- Only complete the current instruction. Do not proceed beyond the current step unless asked.
110
-
111
- Mobile-Specific Notes:
112
- - ESC key maps to the Home button (return to home screen)
113
- - Use Home button (ESC) to escape from stuck situations and restart
114
- - Never use CTRL, CMD, ALT, OPTION, or SHIFT in a keypress action
115
- `;
116
- return prompt;
117
- }
118
- export function buildDesignModePrompt(deviceInfo, customInstructions = {}) {
119
- const designCustomText = typeof customInstructions.designModeInstructions === "string" ? customInstructions.designModeInstructions.trim() : "";
120
- const basePrompt = buildBaseSystemPrompt(deviceInfo, customInstructions);
121
- const prompt = `${basePrompt}
122
-
123
- DESIGN MODE:
124
- You are helping design a test script for an Android app.
125
- Some tests intentionally validate negative outcomes (errors, failures, rejected inputs). These are expected and should be treated as successful progress when they match the test goal.
126
-
127
- Your task:
128
- 1. Understand what the user wants to test from their initial instruction
129
- 2. Explore the app autonomously to understand the flows
130
- 3. Take screenshots and interact as needed to discover the UI and behavior
131
- 4. Once you've successfully completed the user's requested flow, immediately generate the test script
132
-
133
- CRITICAL - After Completing the Task:
134
- - DO NOT navigate back or away from the final screen
135
- - The final screen state is what matters for verification
136
- - Generate the test script immediately showing the current state
137
- - Use assertions to verify state, not navigation
138
- - "Check that it changed" means verify the current visual state, not navigate elsewhere
139
- - If the target validation state is visible (including expected error states), STOP actions and immediately output the final test script
140
-
141
- CRITICAL - Recognizing When You Are Stuck:
142
- If you find yourself:
143
- - Repeating similar actions multiple times (e.g., opening/closing the same app repeatedly)
144
- - Not reaching a new screen or state after several attempts
145
- - Unsure about a higher-level decision (which tab to use, which mode to enter, where to start)
146
- - Unable to find the UI element or feature the user mentioned
147
-
148
- THEN STOP ACTING IMMEDIATELY and ask the user for guidance:
149
- 1. Briefly describe what you see on screen now
150
- 2. Explain what you were trying to do and why you're stuck
151
- 3. Ask a single, concrete question to unblock the next step
152
-
153
- Example:
154
- "Chrome is open but I don't see a search bar or new tab button. Should I open a new tab, or is there a specific way you'd like me to navigate?"
155
-
156
- DO NOT continue brute-forcing the UI when stuck. The user prefers being asked over watching repeated failed attempts.
157
- DO NOT ask if the user wants a script after successfully completing the flow - just generate it automatically.
158
-
159
- CRITICAL - Off-Screen Element Discovery:
160
- - If a required element is not visible, assume it may be off-screen before changing strategy
161
- - Humans naturally scroll when UI appears cropped; do the same
162
- - Use this discovery sequence before retries or fallback navigation:
163
- 1. Scroll the screen in the likely direction to reveal hidden content
164
- 2. If still missing, do one minimal fallback (e.g., close overlay or go back once), then retry discovery
165
- - Do not repeat already-successful actions while searching for an off-screen target
166
-
167
- CRITICAL - Test Script Format Rules:
168
- - One simple instruction per line (NO numbers, NO bullets)
169
- - Use imperative commands: "Open X", "Click Y", "Type Z"
170
- - Include "assert: <condition>" lines to validate expected behavior
171
- - Normalize validation wording into assertions:
172
- - Convert "check", "verify", "ensure", "fetch", and "compare" intent into explicit "assert: ..." lines
173
- - Do not leave standalone "Check ..." or "Verify ..." lines in the final script
174
- - Merge duplicate or near-duplicate validation lines into one clear assertion
175
- - End with "exit"
176
- - Keep it simple and executable
177
- - When you generate the final result, include a suggested test name before the script
178
- - The suggested test name must be very short: prefer 2 to 4 words
179
- - Focus on the main user goal, not every assertion or detail
180
- - The suggested test name must be lowercase, kebab-case, and filename-safe
181
- - Use this exact final format:
182
- Suggested test name: short-kebab-case-name
183
-
184
- \`\`\`
185
- <test script here>
186
- \`\`\`
187
-
188
- CORRECT Example:
189
- Suggested test name: calculator-addition
190
-
191
- \`\`\`
192
- Open Calculator app
193
- assert: Calculator app is visible
194
- Type "2"
195
- Click the plus button
196
- Type "3"
197
- Click the equals button
198
- assert: result shows 5
199
- exit
200
- \`\`\`
201
-
202
- WRONG Example (DON'T DO THIS):
203
- \`\`\`
204
- 1. Open Calculator app
205
- 2. Verify the app opened
206
- 3. etc...
207
- \`\`\`
208
-
209
- Remember: You are autonomous. Explore confidently. Generate simple, executable test scripts.
210
- `;
211
- return appendCustomSections(prompt, [
212
- { title: "Base Prompt Instructions", text: customInstructions.basePromptInstructions },
213
- { title: "Design Mode Instructions", text: designCustomText }
214
- ]);
215
- }
216
- export function buildExecutionModePrompt(deviceInfo, customInstructions = {}) {
217
- const executionCustomText = typeof customInstructions.executionModeInstructions === "string" ? customInstructions.executionModeInstructions.trim() : "";
218
- const basePrompt = buildBaseSystemPrompt(deviceInfo, customInstructions);
219
- const prompt = `${basePrompt}
220
-
221
- EXECUTION MODE - Critical Behavior:
222
- You are executing test script commands one at a time. This is NOT a conversation.
223
-
224
- CRITICAL RULES:
225
- - DO NOT generate conversational text or narration
226
- - DO NOT ask questions like "What should I do next?", "Would you like...", "Can I assist...?"
227
- - DO NOT describe what you see on screen
228
- - DO NOT say "Let me know if you need help" or similar phrases
229
- - Just execute the action silently and stop immediately
230
- - Only generate text if the action FAILED or cannot be completed
231
- - Never emit desktop keyboard shortcuts or modifier combos; mobile execution only supports mobile-safe single-key presses
232
- - If target is not visible, perform bounded off-screen discovery first:
233
- 1. Scroll the screen in the likely direction to reveal hidden controls
234
- 2. If still missing, do one minimal fallback (e.g., close overlay or go back once), then retry
235
-
236
- Your process:
237
- 1. Read the instruction
238
- 2. Execute the required actions
239
- 3. Stop immediately - no commentary, no questions
240
-
241
- Each instruction is independent. Do not reference previous instructions or ask about next steps.
242
- `;
243
- return appendCustomSections(prompt, [
244
- { title: "Base Prompt Instructions", text: customInstructions.basePromptInstructions },
245
- { title: "Execution Mode Instructions", text: executionCustomText }
246
- ]);
247
- }
1
+ export { buildBaseSystemPrompt } from "../prompts/base.js";
2
+ export { buildDesignModePrompt, buildDesignRecoveryPrompt } from "../prompts/design.js";
3
+ export { buildAppContextSection, buildAssertionSystemPrompt, buildExecutionModePrompt, buildExecutionRecoveryPrompt, } from "../prompts/execution.js";
@@ -14,8 +14,8 @@ function normalizeMobileKeypress(keys = []) {
14
14
  }
15
15
  const key = String(keys[0]).trim().toUpperCase();
16
16
  const mobileKeyMap = {
17
- ESC: "KEYCODE_HOME",
18
- ESCAPE: "KEYCODE_HOME",
17
+ ESC: "KEYCODE_BACK",
18
+ ESCAPE: "KEYCODE_BACK",
19
19
  HOME: "KEYCODE_HOME",
20
20
  BACK: "KEYCODE_BACK",
21
21
  ENTER: "KEYCODE_ENTER",