@loadmill/droid-cua 1.1.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -197
- package/build/index.js +2 -0
- package/build/src/cli/app.js +60 -3
- package/build/src/cli/components/CommandSuggestions.js +46 -6
- package/build/src/cli/components/OutputPanel.js +16 -0
- package/build/src/cli/device-selector.js +55 -28
- package/build/src/commands/help.js +4 -3
- package/build/src/core/execution-engine.js +127 -25
- package/build/src/core/prompts.js +71 -10
- package/build/src/device/actions.js +1 -1
- package/build/src/device/android/actions.js +97 -20
- package/build/src/device/android/connection.js +176 -73
- package/build/src/device/android/tools.js +21 -0
- package/build/src/device/assertions.js +28 -6
- package/build/src/device/connection.js +2 -2
- package/build/src/device/factory.js +1 -1
- package/build/src/device/interface.js +6 -2
- package/build/src/device/ios/actions.js +87 -26
- package/build/src/device/ios/appium-server.js +62 -8
- package/build/src/device/ios/connection.js +41 -3
- package/build/src/device/loadmill.js +66 -17
- package/build/src/device/openai.js +84 -73
- package/build/src/integrations/loadmill/client.js +24 -3
- package/build/src/integrations/loadmill/executor.js +2 -2
- package/build/src/integrations/loadmill/interpreter.js +11 -7
- package/build/src/modes/design-mode-ink.js +13 -0
- package/build/src/modes/design-mode.js +9 -0
- package/build/src/modes/execution-mode.js +225 -29
- package/build/src/test-store/test-manager.js +12 -4
- package/build/src/utils/cua-debug-tracer.js +362 -0
- package/build/src/utils/desktop-debug.js +36 -0
- package/package.json +1 -1
|
@@ -51,49 +51,76 @@ function renderSelection(title, items) {
|
|
|
51
51
|
});
|
|
52
52
|
}
|
|
53
53
|
/**
|
|
54
|
-
* Get list
|
|
54
|
+
* Get a unified Android list: connected ADB devices + launchable AVDs
|
|
55
55
|
*/
|
|
56
56
|
async function getAndroidDevices() {
|
|
57
|
-
const
|
|
58
|
-
|
|
57
|
+
const connectedDevices = [];
|
|
58
|
+
const availableAvds = [];
|
|
59
|
+
const avdInventory = new Set();
|
|
60
|
+
const connectedAvdNames = new Set();
|
|
59
61
|
try {
|
|
60
62
|
const { stdout: adbOutput } = await execAsync("adb devices");
|
|
61
|
-
const
|
|
62
|
-
.trim()
|
|
63
|
+
const connectedIds = adbOutput
|
|
63
64
|
.split("\n")
|
|
64
65
|
.slice(1)
|
|
65
|
-
.map((line) => line.
|
|
66
|
-
.filter(
|
|
67
|
-
|
|
66
|
+
.map((line) => line.trim())
|
|
67
|
+
.filter(Boolean)
|
|
68
|
+
.map((line) => {
|
|
69
|
+
const [id, state] = line.split("\t");
|
|
70
|
+
return { id: id?.trim() || "", state: state?.trim() || "" };
|
|
71
|
+
})
|
|
72
|
+
.filter((entry) => entry.id.length > 0 && entry.state === "device");
|
|
73
|
+
for (const entry of connectedIds) {
|
|
74
|
+
const id = entry.id;
|
|
75
|
+
let model = "";
|
|
76
|
+
let avdName = "";
|
|
68
77
|
try {
|
|
69
|
-
const { stdout } = await execAsync(`adb -s ${id}
|
|
70
|
-
|
|
71
|
-
devices.push({
|
|
72
|
-
label: `${name} (running)`,
|
|
73
|
-
value: name,
|
|
74
|
-
running: true,
|
|
75
|
-
});
|
|
78
|
+
const { stdout } = await execAsync(`adb -s "${id}" shell getprop ro.product.model`);
|
|
79
|
+
model = stdout.trim();
|
|
76
80
|
}
|
|
77
81
|
catch { }
|
|
82
|
+
if (id.startsWith("emulator-")) {
|
|
83
|
+
try {
|
|
84
|
+
const { stdout } = await execAsync(`adb -s "${id}" emu avd name`);
|
|
85
|
+
avdName = stdout.trim();
|
|
86
|
+
}
|
|
87
|
+
catch { }
|
|
88
|
+
}
|
|
89
|
+
if (avdName) {
|
|
90
|
+
connectedAvdNames.add(avdName);
|
|
91
|
+
}
|
|
92
|
+
connectedDevices.push({
|
|
93
|
+
label: model ? `${model} (${id})` : id,
|
|
94
|
+
value: `adb:${id}`,
|
|
95
|
+
running: true,
|
|
96
|
+
});
|
|
78
97
|
}
|
|
79
98
|
}
|
|
80
99
|
catch { }
|
|
81
|
-
// Get available AVDs
|
|
82
100
|
try {
|
|
83
101
|
const { stdout } = await execAsync("emulator -list-avds");
|
|
84
|
-
const avds = stdout
|
|
102
|
+
const avds = stdout
|
|
103
|
+
.split("\n")
|
|
104
|
+
.map((line) => line.trim())
|
|
105
|
+
.filter(Boolean);
|
|
85
106
|
for (const avd of avds) {
|
|
86
|
-
|
|
87
|
-
devices.push({
|
|
88
|
-
label: avd,
|
|
89
|
-
value: avd,
|
|
90
|
-
running: false,
|
|
91
|
-
});
|
|
92
|
-
}
|
|
107
|
+
avdInventory.add(avd);
|
|
93
108
|
}
|
|
94
109
|
}
|
|
95
110
|
catch { }
|
|
96
|
-
|
|
111
|
+
for (const connectedAvdName of connectedAvdNames) {
|
|
112
|
+
avdInventory.delete(connectedAvdName);
|
|
113
|
+
}
|
|
114
|
+
for (const avdName of avdInventory) {
|
|
115
|
+
availableAvds.push({
|
|
116
|
+
label: avdName,
|
|
117
|
+
value: `avd:${avdName}`,
|
|
118
|
+
running: false,
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
connectedDevices.sort((a, b) => a.label.localeCompare(b.label));
|
|
122
|
+
availableAvds.sort((a, b) => a.label.localeCompare(b.label));
|
|
123
|
+
return [...connectedDevices, ...availableAvds];
|
|
97
124
|
}
|
|
98
125
|
/**
|
|
99
126
|
* Get list of available iOS Simulators (iPhones only)
|
|
@@ -147,7 +174,7 @@ export async function selectDevice() {
|
|
|
147
174
|
const hasIOS = iosDevices.length > 0;
|
|
148
175
|
if (!hasAndroid && !hasIOS) {
|
|
149
176
|
console.error("\nNo devices found!");
|
|
150
|
-
console.error(" Android:
|
|
177
|
+
console.error(" Android: Connect a device or create an AVD, then run droid-cua again");
|
|
151
178
|
console.error(" iOS: Xcode Simulator must be available");
|
|
152
179
|
process.exit(1);
|
|
153
180
|
}
|
|
@@ -156,7 +183,7 @@ export async function selectDevice() {
|
|
|
156
183
|
if (hasAndroid) {
|
|
157
184
|
const runningCount = androidDevices.filter((d) => d.running).length;
|
|
158
185
|
platformOptions.push({
|
|
159
|
-
label: `Android${runningCount > 0 ? ` (${runningCount}
|
|
186
|
+
label: `Android${runningCount > 0 ? ` (${runningCount} connected)` : ""} - ${androidDevices.length} target(s)`,
|
|
160
187
|
value: "android",
|
|
161
188
|
});
|
|
162
189
|
}
|
|
@@ -179,7 +206,7 @@ export async function selectDevice() {
|
|
|
179
206
|
}
|
|
180
207
|
// Select device
|
|
181
208
|
const deviceList = platform === "ios" ? iosDevices : androidDevices;
|
|
182
|
-
const deviceType = platform === "ios" ? "Simulator" : "
|
|
209
|
+
const deviceType = platform === "ios" ? "Simulator" : "Android Target";
|
|
183
210
|
let deviceName;
|
|
184
211
|
if (deviceList.length === 1) {
|
|
185
212
|
deviceName = deviceList[0].value;
|
|
@@ -16,7 +16,7 @@ export async function handleHelp(args, session, context) {
|
|
|
16
16
|
addOutput({ type: 'info', text: ' droid-cua --avd <device-name> [options]' });
|
|
17
17
|
addOutput({ type: 'info', text: '' });
|
|
18
18
|
addOutput({ type: 'info', text: 'Options:' });
|
|
19
|
-
addOutput({ type: 'info', text: ' --avd <name> Device name (Android
|
|
19
|
+
addOutput({ type: 'info', text: ' --avd <name> Device name (Android device ID/serial or iOS Simulator)' });
|
|
20
20
|
addOutput({ type: 'info', text: ' --platform <platform> Force platform: android or ios' });
|
|
21
21
|
addOutput({ type: 'info', text: ' --instructions <file> Run test file in headless mode' });
|
|
22
22
|
addOutput({ type: 'info', text: ' --record Record screenshots during execution' });
|
|
@@ -37,7 +37,7 @@ export async function handleHelp(args, session, context) {
|
|
|
37
37
|
addOutput({ type: 'info', text: ' /loadmill <command> Run Loadmill test flows using natural language' });
|
|
38
38
|
addOutput({ type: 'info', text: '' });
|
|
39
39
|
addOutput({ type: 'info', text: 'Platform Support:' });
|
|
40
|
-
addOutput({ type: 'info', text: ' Android: Uses ADB to communicate with Android
|
|
40
|
+
addOutput({ type: 'info', text: ' Android: Uses ADB to communicate with Android devices (physical or emulator)' });
|
|
41
41
|
addOutput({ type: 'info', text: ' iOS: Uses Appium + XCUITest for iOS Simulator automation' });
|
|
42
42
|
addOutput({ type: 'info', text: '' });
|
|
43
43
|
addOutput({ type: 'info', text: 'Platform Detection:' });
|
|
@@ -53,7 +53,8 @@ export async function handleHelp(args, session, context) {
|
|
|
53
53
|
addOutput({ type: 'info', text: ' Note: Appium server is auto-started when iOS platform is detected' });
|
|
54
54
|
addOutput({ type: 'info', text: '' });
|
|
55
55
|
addOutput({ type: 'info', text: 'Examples:' });
|
|
56
|
-
addOutput({ type: 'info', text: ' droid-cua --avd
|
|
56
|
+
addOutput({ type: 'info', text: ' droid-cua --avd adb:emulator-5554 (Connected Android target by adb serial)' });
|
|
57
|
+
addOutput({ type: 'info', text: ' droid-cua --avd avd:Pixel_8_API_35 (Launch Android AVD then connect)' });
|
|
57
58
|
addOutput({ type: 'info', text: ' droid-cua --avd "iPhone 16" (iOS Simulator, auto-detected)' });
|
|
58
59
|
addOutput({ type: 'info', text: ' droid-cua --platform ios --avd MySim (Force iOS platform)' });
|
|
59
60
|
addOutput({ type: 'info', text: ' /create login-test (design a new test)' });
|
|
@@ -3,6 +3,32 @@ import { writeFile } from "fs/promises";
|
|
|
3
3
|
import { getScreenshotAsBase64, getCurrentPlatform } from "../device/connection.js";
|
|
4
4
|
import { handleModelAction } from "../device/actions.js";
|
|
5
5
|
import { sendCUARequest } from "../device/openai.js";
|
|
6
|
+
import { emitDesktopDebug } from "../utils/desktop-debug.js";
|
|
7
|
+
function extractComputerCalls(items) {
|
|
8
|
+
const entries = [];
|
|
9
|
+
for (const item of items) {
|
|
10
|
+
if (item?.type !== "computer_call")
|
|
11
|
+
continue;
|
|
12
|
+
const baseCallId = item.call_id || item.id;
|
|
13
|
+
if (!baseCallId)
|
|
14
|
+
continue;
|
|
15
|
+
const actions = [];
|
|
16
|
+
if (Array.isArray(item.actions)) {
|
|
17
|
+
for (const actionEntry of item.actions) {
|
|
18
|
+
if (!actionEntry)
|
|
19
|
+
continue;
|
|
20
|
+
const action = actionEntry.action || actionEntry;
|
|
21
|
+
if (action?.type)
|
|
22
|
+
actions.push(action);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
else if (item.action?.type) {
|
|
26
|
+
actions.push(item.action);
|
|
27
|
+
}
|
|
28
|
+
entries.push({ call_id: baseCallId, actions });
|
|
29
|
+
}
|
|
30
|
+
return entries;
|
|
31
|
+
}
|
|
6
32
|
export class ExecutionEngine {
|
|
7
33
|
constructor(session, options = {}) {
|
|
8
34
|
this.session = session;
|
|
@@ -17,9 +43,15 @@ export class ExecutionEngine {
|
|
|
17
43
|
* @param {Function} trackAction - Optional callback to track actions for stuck detection
|
|
18
44
|
* @param {Object} context - Optional Ink context for output
|
|
19
45
|
*/
|
|
20
|
-
async runFullTurn(response, trackAction = null, context = null) {
|
|
46
|
+
async runFullTurn(response, trackAction = null, context = null, stepContext = null) {
|
|
21
47
|
const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
|
|
22
48
|
let newResponseId = response.id;
|
|
49
|
+
const eventMeta = (extra = {}) => ({
|
|
50
|
+
runId: context?.runId,
|
|
51
|
+
stepId: stepContext?.stepId,
|
|
52
|
+
instructionIndex: stepContext?.instructionIndex,
|
|
53
|
+
...extra
|
|
54
|
+
});
|
|
23
55
|
while (true) {
|
|
24
56
|
// Check for interruption before processing next batch of actions
|
|
25
57
|
if (trackAction) {
|
|
@@ -29,7 +61,7 @@ export class ExecutionEngine {
|
|
|
29
61
|
}
|
|
30
62
|
}
|
|
31
63
|
const items = response.output || [];
|
|
32
|
-
const
|
|
64
|
+
const computerCalls = extractComputerCalls(items);
|
|
33
65
|
// ── Collect pending safety checks ──
|
|
34
66
|
const pendingSafetyChecks = items
|
|
35
67
|
.filter(item => item.type === "pending_safety_check")
|
|
@@ -39,7 +71,12 @@ export class ExecutionEngine {
|
|
|
39
71
|
if (item.type === "reasoning") {
|
|
40
72
|
for (const entry of item.summary) {
|
|
41
73
|
if (entry.type === "summary_text") {
|
|
42
|
-
addOutput({
|
|
74
|
+
addOutput({
|
|
75
|
+
type: 'reasoning',
|
|
76
|
+
text: entry.text,
|
|
77
|
+
eventType: 'reasoning',
|
|
78
|
+
...eventMeta()
|
|
79
|
+
});
|
|
43
80
|
this.session.addToTranscript(`[Reasoning] ${entry.text}`);
|
|
44
81
|
}
|
|
45
82
|
}
|
|
@@ -47,45 +84,94 @@ export class ExecutionEngine {
|
|
|
47
84
|
else if (item.type === "message") {
|
|
48
85
|
const textPart = item.content.find(c => c.type === "output_text");
|
|
49
86
|
if (textPart) {
|
|
50
|
-
addOutput({
|
|
87
|
+
addOutput({
|
|
88
|
+
type: 'assistant',
|
|
89
|
+
text: textPart.text,
|
|
90
|
+
eventType: 'assistant_message',
|
|
91
|
+
...eventMeta()
|
|
92
|
+
});
|
|
51
93
|
this.session.addToTranscript(`[Assistant] ${textPart.text}`);
|
|
52
94
|
}
|
|
53
95
|
}
|
|
54
96
|
else if (item.type === "pending_safety_check") {
|
|
55
|
-
addOutput({
|
|
97
|
+
addOutput({
|
|
98
|
+
type: 'warning',
|
|
99
|
+
text: `⚠️ Safety check: ${item.code} - ${item.message}`,
|
|
100
|
+
eventType: 'system_message',
|
|
101
|
+
...eventMeta({
|
|
102
|
+
payload: {
|
|
103
|
+
id: item.id,
|
|
104
|
+
code: item.code,
|
|
105
|
+
message: item.message
|
|
106
|
+
}
|
|
107
|
+
})
|
|
108
|
+
});
|
|
56
109
|
}
|
|
57
110
|
}
|
|
58
|
-
if (
|
|
111
|
+
if (computerCalls.length === 0) {
|
|
59
112
|
// No actions = turn complete
|
|
60
113
|
break;
|
|
61
114
|
}
|
|
62
115
|
// ── Process model actions ──
|
|
63
|
-
for (const {
|
|
64
|
-
if (
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
116
|
+
for (const { call_id, actions } of computerCalls) {
|
|
117
|
+
if (!call_id)
|
|
118
|
+
continue;
|
|
119
|
+
let sawExplicitScreenshotAction = false;
|
|
120
|
+
for (const action of actions) {
|
|
121
|
+
if (action.type === "screenshot") {
|
|
122
|
+
sawExplicitScreenshotAction = true;
|
|
123
|
+
addOutput({
|
|
124
|
+
type: 'info',
|
|
125
|
+
text: 'Capturing screen',
|
|
126
|
+
eventType: 'screenshot_captured',
|
|
127
|
+
actionType: 'screenshot',
|
|
128
|
+
...eventMeta({
|
|
129
|
+
payload: {
|
|
130
|
+
callId: call_id,
|
|
131
|
+
source: 'explicit_action'
|
|
132
|
+
}
|
|
133
|
+
})
|
|
134
|
+
});
|
|
76
135
|
}
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
136
|
+
else {
|
|
137
|
+
await handleModelAction(this.session.deviceId, action, this.session.deviceInfo.scale, {
|
|
138
|
+
...context,
|
|
139
|
+
stepId: stepContext?.stepId,
|
|
140
|
+
instructionIndex: stepContext?.instructionIndex
|
|
141
|
+
});
|
|
142
|
+
// Track action and check for interruption
|
|
143
|
+
if (trackAction) {
|
|
144
|
+
const shouldStop = trackAction(action);
|
|
145
|
+
if (shouldStop) {
|
|
146
|
+
// User interrupted - stop execution immediately
|
|
147
|
+
return newResponseId;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
// Add delay after UI-changing actions to let the interface update
|
|
151
|
+
// before taking the screenshot (except for explicit wait actions which have their own delay)
|
|
152
|
+
if (action.type !== "wait") {
|
|
153
|
+
await new Promise(resolve => setTimeout(resolve, 500));
|
|
154
|
+
}
|
|
81
155
|
}
|
|
82
156
|
}
|
|
83
157
|
const screenshotBase64 = await getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
|
|
158
|
+
emitDesktopDebug("device.screenshot", "device", {
|
|
159
|
+
runId: context?.runId,
|
|
160
|
+
stepId: stepContext?.stepId,
|
|
161
|
+
instructionIndex: stepContext?.instructionIndex
|
|
162
|
+
}, {
|
|
163
|
+
source: sawExplicitScreenshotAction ? "explicit_action" : "post_action",
|
|
164
|
+
callId: call_id,
|
|
165
|
+
width: this.session.deviceInfo?.scaled_width,
|
|
166
|
+
height: this.session.deviceInfo?.scaled_height,
|
|
167
|
+
base64Length: screenshotBase64.length
|
|
168
|
+
});
|
|
84
169
|
if (this.recordScreenshots && this.screenshotDir) {
|
|
85
170
|
const framePath = path.join(this.screenshotDir, `frame_${String(Date.now())}.png`);
|
|
86
171
|
await writeFile(framePath, Buffer.from(screenshotBase64, "base64"));
|
|
87
172
|
}
|
|
88
173
|
// Build next input: screenshot + any carryover reasoning
|
|
174
|
+
const selectedCuaModel = process.env.OPENAI_CUA_MODEL === "computer-use-preview" ? "computer-use-preview" : "gpt-5.4";
|
|
89
175
|
const input = [{
|
|
90
176
|
type: "computer_call_output",
|
|
91
177
|
call_id,
|
|
@@ -93,21 +179,37 @@ export class ExecutionEngine {
|
|
|
93
179
|
type: "computer_screenshot",
|
|
94
180
|
image_url: `data:image/png;base64,${screenshotBase64}`,
|
|
95
181
|
},
|
|
96
|
-
|
|
182
|
+
...(selectedCuaModel === "computer-use-preview"
|
|
183
|
+
? { current_url: getCurrentPlatform() === "ios" ? "ios://simulator" : "android://device" }
|
|
184
|
+
: {}),
|
|
97
185
|
...(pendingSafetyChecks.length > 0 ? { acknowledged_safety_checks: pendingSafetyChecks } : {})
|
|
98
186
|
}];
|
|
99
187
|
response = await sendCUARequest({
|
|
100
188
|
messages: input,
|
|
101
189
|
previousResponseId: newResponseId,
|
|
102
190
|
deviceInfo: this.session.deviceInfo,
|
|
191
|
+
debugContext: {
|
|
192
|
+
scope: context?.sessionId ? "design" : "execution",
|
|
193
|
+
runId: context?.runId,
|
|
194
|
+
sessionId: context?.sessionId,
|
|
195
|
+
stepId: stepContext?.stepId,
|
|
196
|
+
instructionIndex: stepContext?.instructionIndex
|
|
197
|
+
}
|
|
103
198
|
});
|
|
104
199
|
newResponseId = response.id;
|
|
200
|
+
// Each tool output advances the response chain; process newly returned calls next.
|
|
201
|
+
break;
|
|
105
202
|
}
|
|
106
203
|
}
|
|
107
204
|
// ── At end, if last output was only reasoning ──
|
|
108
205
|
const finalItems = response.output || [];
|
|
109
206
|
if (finalItems.length > 0 && finalItems.at(-1).type === "reasoning") {
|
|
110
|
-
addOutput({
|
|
207
|
+
addOutput({
|
|
208
|
+
type: 'info',
|
|
209
|
+
text: 'Warning: last item was reasoning without follow-up. Dropping to avoid 400 error.',
|
|
210
|
+
eventType: 'system_message',
|
|
211
|
+
...eventMeta()
|
|
212
|
+
});
|
|
111
213
|
}
|
|
112
214
|
return newResponseId;
|
|
113
215
|
}
|
|
@@ -1,9 +1,32 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* System prompt templates for different modes
|
|
3
3
|
*/
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
4
|
+
function appendCustomSection(prompt, customText) {
|
|
5
|
+
const trimmed = typeof customText === "string" ? customText.trim() : "";
|
|
6
|
+
if (!trimmed) {
|
|
7
|
+
return prompt;
|
|
8
|
+
}
|
|
9
|
+
return `${prompt}
|
|
10
|
+
|
|
11
|
+
CUSTOM INSTRUCTIONS:
|
|
12
|
+
${trimmed}
|
|
13
|
+
`;
|
|
14
|
+
}
|
|
15
|
+
function describeControlledDevice(deviceInfo = {}) {
|
|
16
|
+
const platform = typeof deviceInfo.platform === "string" ? deviceInfo.platform.trim().toLowerCase() : "";
|
|
17
|
+
const deviceName = typeof deviceInfo.device_name === "string" ? deviceInfo.device_name.trim() : "";
|
|
18
|
+
if (platform === "ios") {
|
|
19
|
+
return deviceName ? `an iOS simulator (${deviceName})` : "an iOS device";
|
|
20
|
+
}
|
|
21
|
+
if (platform === "android") {
|
|
22
|
+
return deviceName ? `an Android device (${deviceName})` : "an Android device";
|
|
23
|
+
}
|
|
24
|
+
return "a mobile device";
|
|
25
|
+
}
|
|
26
|
+
export function buildBaseSystemPrompt(deviceInfo, customInstructions = {}) {
|
|
27
|
+
const controlledDevice = describeControlledDevice(deviceInfo);
|
|
28
|
+
const prompt = `
|
|
29
|
+
You are controlling ${controlledDevice} in a sandboxed testing environment.
|
|
7
30
|
Follow the user's instructions to interact with the device.
|
|
8
31
|
|
|
9
32
|
The device screen has been scaled down for display.
|
|
@@ -18,6 +41,13 @@ export function buildBaseSystemPrompt(deviceInfo) {
|
|
|
18
41
|
|
|
19
42
|
Available actions: click, scroll, type, keypress, wait, screenshot.
|
|
20
43
|
|
|
44
|
+
CRITICAL - Mobile Input Constraints:
|
|
45
|
+
- This is a mobile device, not a desktop. Do NOT use desktop keyboard shortcuts or modifier chords.
|
|
46
|
+
- NEVER emit key combinations such as CTRL+A, CMD+A, CTRL+C, CTRL+V, ALT+TAB, SHIFT+ENTER, or similar shortcuts.
|
|
47
|
+
- Use 'keypress' only for a single mobile-safe key when absolutely necessary.
|
|
48
|
+
- To replace text, tap into the field and type the desired value. If correction is needed, use mobile-safe deletion only.
|
|
49
|
+
- Prefer tapping visible controls over hardware key events.
|
|
50
|
+
|
|
21
51
|
CRITICAL - Automatic Timing:
|
|
22
52
|
- After EVERY action (click, type, keypress, scroll), there is an automatic 500ms delay
|
|
23
53
|
- This 500ms is sufficient for normal UI updates and animations
|
|
@@ -63,15 +93,22 @@ export function buildBaseSystemPrompt(deviceInfo) {
|
|
|
63
93
|
Mobile-Specific Notes:
|
|
64
94
|
- ESC key maps to the Home button (return to home screen)
|
|
65
95
|
- Use Home button (ESC) to escape from stuck situations and restart
|
|
66
|
-
-
|
|
96
|
+
- Never use CTRL, CMD, ALT, OPTION, or SHIFT in a keypress action
|
|
67
97
|
`;
|
|
98
|
+
return appendCustomSection(prompt, customInstructions.basePromptInstructions);
|
|
68
99
|
}
|
|
69
|
-
export function buildDesignModePrompt(deviceInfo) {
|
|
70
|
-
const
|
|
71
|
-
|
|
100
|
+
export function buildDesignModePrompt(deviceInfo, customInstructions = {}) {
|
|
101
|
+
const designCustomText = typeof customInstructions.designModeInstructions === "string" ? customInstructions.designModeInstructions.trim() : "";
|
|
102
|
+
const mergedBaseInstructions = [customInstructions.basePromptInstructions, designCustomText].filter(Boolean).join("\n\n");
|
|
103
|
+
const basePrompt = buildBaseSystemPrompt(deviceInfo, {
|
|
104
|
+
...customInstructions,
|
|
105
|
+
basePromptInstructions: mergedBaseInstructions
|
|
106
|
+
});
|
|
107
|
+
const prompt = `${basePrompt}
|
|
72
108
|
|
|
73
109
|
DESIGN MODE:
|
|
74
110
|
You are helping design a test script for an Android app.
|
|
111
|
+
Some tests intentionally validate negative outcomes (errors, failures, rejected inputs). These are expected and should be treated as successful progress when they match the test goal.
|
|
75
112
|
|
|
76
113
|
Your task:
|
|
77
114
|
1. Understand what the user wants to test from their initial instruction
|
|
@@ -85,6 +122,7 @@ CRITICAL - After Completing the Task:
|
|
|
85
122
|
- Generate the test script immediately showing the current state
|
|
86
123
|
- Use assertions to verify state, not navigation
|
|
87
124
|
- "Check that it changed" means verify the current visual state, not navigate elsewhere
|
|
125
|
+
- If the target validation state is visible (including expected error states), STOP actions and immediately output the final test script
|
|
88
126
|
|
|
89
127
|
CRITICAL - Recognizing When You Are Stuck:
|
|
90
128
|
If you find yourself:
|
|
@@ -104,10 +142,22 @@ Example:
|
|
|
104
142
|
DO NOT continue brute-forcing the UI when stuck. The user prefers being asked over watching repeated failed attempts.
|
|
105
143
|
DO NOT ask if the user wants a script after successfully completing the flow - just generate it automatically.
|
|
106
144
|
|
|
145
|
+
CRITICAL - Off-Screen Element Discovery:
|
|
146
|
+
- If a required element is not visible, assume it may be off-screen before changing strategy
|
|
147
|
+
- Humans naturally scroll when UI appears cropped; do the same
|
|
148
|
+
- Use this discovery sequence before retries or fallback navigation:
|
|
149
|
+
1. Scroll the screen in the likely direction to reveal hidden content
|
|
150
|
+
2. If still missing, do one minimal fallback (e.g., close overlay or go back once), then retry discovery
|
|
151
|
+
- Do not repeat already-successful actions while searching for an off-screen target
|
|
152
|
+
|
|
107
153
|
CRITICAL - Test Script Format Rules:
|
|
108
154
|
- One simple instruction per line (NO numbers, NO bullets)
|
|
109
155
|
- Use imperative commands: "Open X", "Click Y", "Type Z"
|
|
110
156
|
- Include "assert: <condition>" lines to validate expected behavior
|
|
157
|
+
- Normalize validation wording into assertions:
|
|
158
|
+
- Convert "check", "verify", "ensure", "fetch", and "compare" intent into explicit "assert: ..." lines
|
|
159
|
+
- Do not leave standalone "Check ..." or "Verify ..." lines in the final script
|
|
160
|
+
- Merge duplicate or near-duplicate validation lines into one clear assertion
|
|
111
161
|
- End with "exit"
|
|
112
162
|
- Keep it simple and executable
|
|
113
163
|
|
|
@@ -132,10 +182,16 @@ WRONG Example (DON'T DO THIS):
|
|
|
132
182
|
|
|
133
183
|
Remember: You are autonomous. Explore confidently. Generate simple, executable test scripts.
|
|
134
184
|
`;
|
|
185
|
+
return prompt;
|
|
135
186
|
}
|
|
136
|
-
export function buildExecutionModePrompt(deviceInfo) {
|
|
137
|
-
const
|
|
138
|
-
|
|
187
|
+
export function buildExecutionModePrompt(deviceInfo, customInstructions = {}) {
|
|
188
|
+
const executionCustomText = typeof customInstructions.executionModeInstructions === "string" ? customInstructions.executionModeInstructions.trim() : "";
|
|
189
|
+
const mergedBaseInstructions = [customInstructions.basePromptInstructions, executionCustomText].filter(Boolean).join("\n\n");
|
|
190
|
+
const basePrompt = buildBaseSystemPrompt(deviceInfo, {
|
|
191
|
+
...customInstructions,
|
|
192
|
+
basePromptInstructions: mergedBaseInstructions
|
|
193
|
+
});
|
|
194
|
+
const prompt = `${basePrompt}
|
|
139
195
|
|
|
140
196
|
EXECUTION MODE - Critical Behavior:
|
|
141
197
|
You are executing test script commands one at a time. This is NOT a conversation.
|
|
@@ -147,6 +203,10 @@ CRITICAL RULES:
|
|
|
147
203
|
- DO NOT say "Let me know if you need help" or similar phrases
|
|
148
204
|
- Just execute the action silently and stop immediately
|
|
149
205
|
- Only generate text if the action FAILED or cannot be completed
|
|
206
|
+
- Never emit desktop keyboard shortcuts or modifier combos; mobile execution only supports mobile-safe single-key presses
|
|
207
|
+
- If target is not visible, perform bounded off-screen discovery first:
|
|
208
|
+
1. Scroll the screen in the likely direction to reveal hidden controls
|
|
209
|
+
2. If still missing, do one minimal fallback (e.g., close overlay or go back once), then retry
|
|
150
210
|
|
|
151
211
|
Your process:
|
|
152
212
|
1. Read the instruction
|
|
@@ -155,4 +215,5 @@ Your process:
|
|
|
155
215
|
|
|
156
216
|
Each instruction is independent. Do not reference previous instructions or ask about next steps.
|
|
157
217
|
`;
|
|
218
|
+
return prompt;
|
|
158
219
|
}
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
import { getDeviceBackend, getCurrentPlatform } from "./factory.js";
|
|
8
8
|
/**
|
|
9
9
|
* Handle an action from the CUA model
|
|
10
|
-
* @param {string} deviceId - The device/
|
|
10
|
+
* @param {string} deviceId - The connected device/simulator ID
|
|
11
11
|
* @param {object} action - The action to execute
|
|
12
12
|
* @param {number} scale - Scale factor for coordinates
|
|
13
13
|
* @param {object} context - Context with addOutput function
|