@loadmill/droid-cua 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,402 @@
1
+ import { readFile } from "node:fs/promises";
2
+ const HUB_URL = "https://mobile-hub.lambdatest.com/wd/hub";
3
+ const CONCURRENCY_URL = "https://mobile-api.lambdatest.com/mobile-automation/api/v1/org/concurrency";
4
+ const DEVICE_REGIONS = ["us", "ap", "eu"];
5
+ const DEVICE_LIST_URL = "https://mobile-api.lambdatest.com/mobile-automation/api/v1/list";
6
+ const APP_UPLOAD_URL = "https://manual-api.lambdatest.com/app/upload/realDevice";
7
+ const APP_DATA_URL = "https://manual-api.lambdatest.com/app/data";
8
+ function notImplemented(methodName) {
9
+ throw new Error(`LambdaTest adapter stub: ${methodName} is not implemented yet.`);
10
+ }
11
+ function normalizeString(value) {
12
+ return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
13
+ }
14
+ function normalizeRemoteAppRef(value) {
15
+ const normalized = normalizeString(value);
16
+ return normalized?.startsWith("lt://") ? normalized : undefined;
17
+ }
18
+ function normalizePlanLabel(value) {
19
+ if (typeof value === "string") {
20
+ return value.trim() || undefined;
21
+ }
22
+ if (typeof value === "object" && value !== null && !Array.isArray(value)) {
23
+ return (normalizePlanLabel(value.plan) ??
24
+ normalizePlanLabel(value.name) ??
25
+ normalizePlanLabel(value.label) ??
26
+ normalizePlanLabel(value.tier) ??
27
+ normalizePlanLabel(value.type));
28
+ }
29
+ return undefined;
30
+ }
31
+ function readPlanLabel(payload) {
32
+ const candidates = [
33
+ payload.plan,
34
+ payload.plan_name,
35
+ payload.planName,
36
+ payload.plan_type,
37
+ payload.account_plan,
38
+ payload.accountPlan,
39
+ payload.tier,
40
+ payload.tier_name,
41
+ payload.subscription,
42
+ payload.organization_plan
43
+ ];
44
+ for (const candidate of candidates) {
45
+ const normalized = normalizePlanLabel(candidate);
46
+ if (normalized) {
47
+ return normalized;
48
+ }
49
+ }
50
+ return undefined;
51
+ }
52
+ function readFiniteNumber(value) {
53
+ if (typeof value === "number" && Number.isFinite(value)) {
54
+ return value;
55
+ }
56
+ if (typeof value === "string" && value.trim().length > 0) {
57
+ const parsed = Number(value);
58
+ if (Number.isFinite(parsed)) {
59
+ return parsed;
60
+ }
61
+ }
62
+ return undefined;
63
+ }
64
+ function readParallelLimit(payload) {
65
+ const candidates = [
66
+ payload.max_concurrency,
67
+ payload.maxConcurrency,
68
+ payload.parallel_sessions,
69
+ payload.parallelSessions,
70
+ payload.parallel_limit,
71
+ payload.parallelLimit,
72
+ payload.concurrency,
73
+ payload.max_sessions,
74
+ payload.maxSessions
75
+ ];
76
+ for (const candidate of candidates) {
77
+ const normalized = readFiniteNumber(candidate);
78
+ if (normalized !== undefined) {
79
+ return normalized;
80
+ }
81
+ }
82
+ return undefined;
83
+ }
84
+ function readSummary(payload, plan, parallelLimit) {
85
+ const message = normalizeString(payload.message) ??
86
+ normalizeString(payload.summary) ??
87
+ normalizeString(payload.status_message) ??
88
+ normalizeString(payload.statusMessage);
89
+ if (message) {
90
+ return message;
91
+ }
92
+ if (plan && parallelLimit !== undefined) {
93
+ return `${plan} account with ${parallelLimit} parallel sessions available.`;
94
+ }
95
+ if (parallelLimit !== undefined) {
96
+ return `LambdaTest credentials validated. ${parallelLimit} parallel sessions available.`;
97
+ }
98
+ return undefined;
99
+ }
100
+ function normalizePlatform(value) {
101
+ if (typeof value !== "string") {
102
+ return null;
103
+ }
104
+ const normalized = value.trim().toLowerCase();
105
+ if (normalized === "android") {
106
+ return "android";
107
+ }
108
+ if (normalized === "ios" || normalized === "iphone" || normalized === "ipad") {
109
+ return "ios";
110
+ }
111
+ return null;
112
+ }
113
+ function sortOsVersionsDescending(values) {
114
+ return [...values].sort((left, right) => right.localeCompare(left, undefined, { numeric: true, sensitivity: "base" }));
115
+ }
116
+ function unwrapDevicePayload(payload) {
117
+ if (Array.isArray(payload)) {
118
+ return payload;
119
+ }
120
+ if (typeof payload !== "object" || payload === null) {
121
+ return [];
122
+ }
123
+ const candidates = [payload.devices, payload.data, payload.results, payload.list];
124
+ for (const candidate of candidates) {
125
+ if (Array.isArray(candidate)) {
126
+ return candidate;
127
+ }
128
+ }
129
+ return [];
130
+ }
131
+ async function requestConcurrency(creds) {
132
+ const response = await fetch(CONCURRENCY_URL, {
133
+ method: "GET",
134
+ headers: {
135
+ Authorization: lambdaTestAdapter.getAuthHeader(creds)
136
+ }
137
+ });
138
+ if (response.status === 401 || response.status === 403) {
139
+ throw new Error("LambdaTest rejected these credentials. Check the username and access key and try again.");
140
+ }
141
+ if (!response.ok) {
142
+ throw new Error(`LambdaTest validation failed with status ${response.status}.`);
143
+ }
144
+ const payload = await response.json();
145
+ if (typeof payload !== "object" || payload === null || Array.isArray(payload)) {
146
+ throw new Error("LambdaTest returned an unexpected validation response.");
147
+ }
148
+ const plan = readPlanLabel(payload);
149
+ const parallelLimit = readParallelLimit(payload);
150
+ const summary = readSummary(payload, plan, parallelLimit);
151
+ return {
152
+ plan,
153
+ parallelLimit,
154
+ summary
155
+ };
156
+ }
157
+ async function requestDevicesForRegion(creds, region) {
158
+ const response = await fetch(`${DEVICE_LIST_URL}?region=${encodeURIComponent(region)}`, {
159
+ method: "GET",
160
+ headers: {
161
+ Authorization: lambdaTestAdapter.getAuthHeader(creds)
162
+ }
163
+ });
164
+ if (response.status === 401 || response.status === 403) {
165
+ throw new Error("LambdaTest rejected these credentials. Reconnect LambdaTest and try refreshing devices again.");
166
+ }
167
+ if (!response.ok) {
168
+ throw new Error(`LambdaTest device catalog failed with status ${response.status}.`);
169
+ }
170
+ return unwrapDevicePayload(await response.json());
171
+ }
172
+ async function requestDevices(creds) {
173
+ const regionPayloads = await Promise.all(DEVICE_REGIONS.map((region) => requestDevicesForRegion(creds, region)));
174
+ const deduped = new Map();
175
+ for (const payload of regionPayloads.flat()) {
176
+ if (typeof payload !== "object" || payload === null || Array.isArray(payload)) {
177
+ continue;
178
+ }
179
+ const platform = normalizePlatform(payload.platform) ??
180
+ normalizePlatform(payload.platformName) ??
181
+ normalizePlatform(payload.os) ??
182
+ normalizePlatform(payload.os_type) ??
183
+ normalizePlatform(payload.osType);
184
+ const deviceName = normalizeString(payload.device_name) ??
185
+ normalizeString(payload.deviceName) ??
186
+ normalizeString(payload.device) ??
187
+ normalizeString(payload.name);
188
+ const osVersion = normalizeString(payload.platform_version) ??
189
+ normalizeString(payload.platformVersion) ??
190
+ normalizeString(payload.os_version) ??
191
+ normalizeString(payload.osVersion) ??
192
+ normalizeString(payload.version);
193
+ if (!platform || !deviceName || !osVersion) {
194
+ continue;
195
+ }
196
+ const key = `${platform}::${deviceName}::${osVersion}`;
197
+ if (!deduped.has(key)) {
198
+ deduped.set(key, {
199
+ id: key,
200
+ name: deviceName,
201
+ deviceName,
202
+ platform,
203
+ osVersion
204
+ });
205
+ }
206
+ }
207
+ return [...deduped.values()].sort((left, right) => {
208
+ if (left.platform !== right.platform) {
209
+ return left.platform.localeCompare(right.platform);
210
+ }
211
+ if (left.name !== right.name) {
212
+ return left.name.localeCompare(right.name);
213
+ }
214
+ return sortOsVersionsDescending([left.osVersion ?? "", right.osVersion ?? ""])[0] === (left.osVersion ?? "") ? -1 : 1;
215
+ });
216
+ }
217
+ function readUploadedAppEntries(payload) {
218
+ if (Array.isArray(payload)) {
219
+ return payload;
220
+ }
221
+ if (typeof payload !== "object" || payload === null) {
222
+ return [];
223
+ }
224
+ const candidates = [payload.data, payload.apps, payload.files, payload.results];
225
+ for (const candidate of candidates) {
226
+ if (Array.isArray(candidate)) {
227
+ return candidate;
228
+ }
229
+ }
230
+ return [];
231
+ }
232
+ async function uploadRealDeviceApp(creds, localPath) {
233
+ const fileContents = await readFile(localPath);
234
+ const fileName = localPath.split(/[\\/]/).pop() ?? "app";
235
+ const form = new FormData();
236
+ form.append("appFile", new Blob([new Uint8Array(fileContents)]), fileName);
237
+ const response = await fetch(APP_UPLOAD_URL, {
238
+ method: "POST",
239
+ headers: {
240
+ Authorization: lambdaTestAdapter.getAuthHeader(creds)
241
+ },
242
+ body: form
243
+ });
244
+ if (response.status === 401 || response.status === 403) {
245
+ throw new Error("LambdaTest rejected these credentials. Reconnect LambdaTest and try again.");
246
+ }
247
+ if (!response.ok) {
248
+ throw new Error(`LambdaTest app upload failed with status ${response.status}.`);
249
+ }
250
+ const payload = await response.json();
251
+ if (typeof payload !== "object" || payload === null || Array.isArray(payload)) {
252
+ throw new Error("LambdaTest returned an unexpected app upload response.");
253
+ }
254
+ const remotePath = normalizeRemoteAppRef(payload.app_url) ??
255
+ normalizeRemoteAppRef(payload.appUrl) ??
256
+ normalizeRemoteAppRef(payload.appURL) ??
257
+ normalizeRemoteAppRef(payload.value) ??
258
+ normalizeRemoteAppRef(payload.id);
259
+ if (!remotePath) {
260
+ throw new Error("LambdaTest did not return a valid app reference.");
261
+ }
262
+ const id = normalizeString(payload.app_id) ??
263
+ normalizeString(payload.appId) ??
264
+ normalizeString(payload.id) ??
265
+ remotePath;
266
+ return {
267
+ id,
268
+ remotePath
269
+ };
270
+ }
271
+ async function fetchUploadedApps(creds, platformType) {
272
+ const response = await fetch(`${APP_DATA_URL}?type=${encodeURIComponent(platformType)}&level=user`, {
273
+ method: "GET",
274
+ headers: {
275
+ Authorization: lambdaTestAdapter.getAuthHeader(creds)
276
+ }
277
+ });
278
+ if (response.status === 401 || response.status === 403) {
279
+ throw new Error("LambdaTest rejected these credentials. Reconnect LambdaTest and try again.");
280
+ }
281
+ if (!response.ok) {
282
+ throw new Error(`LambdaTest uploaded-app lookup failed with status ${response.status}.`);
283
+ }
284
+ return readUploadedAppEntries(await response.json());
285
+ }
286
+ async function lookupUploadedApp(creds, ref) {
287
+ const lists = await Promise.all(["android", "ios"].map((platformType) => fetchUploadedApps(creds, platformType)));
288
+ for (const item of lists.flat()) {
289
+ if (typeof item !== "object" || item === null || Array.isArray(item)) {
290
+ continue;
291
+ }
292
+ const remotePath = normalizeRemoteAppRef(item.app_url) ??
293
+ normalizeRemoteAppRef(item.appUrl) ??
294
+ normalizeRemoteAppRef(item.appURL) ??
295
+ normalizeRemoteAppRef(item.url) ??
296
+ normalizeRemoteAppRef(item.value);
297
+ if (!remotePath) {
298
+ continue;
299
+ }
300
+ const customId = normalizeString(item.app_id) ??
301
+ normalizeString(item.appId) ??
302
+ normalizeString(item.id) ??
303
+ normalizeString(item.name);
304
+ if (remotePath === ref.remotePath || (customId && customId === ref.id)) {
305
+ return item;
306
+ }
307
+ }
308
+ return null;
309
+ }
310
+ /** @type {import("../adapter").CloudProviderAdapter} */
311
+ export const lambdaTestAdapter = {
312
+ id: "lambdatest",
313
+ displayName: "LambdaTest",
314
+ async validateCredentials(creds) {
315
+ try {
316
+ const account = await requestConcurrency(creds);
317
+ return {
318
+ ok: true,
319
+ message: account.summary ?? "LambdaTest credentials validated successfully.",
320
+ account
321
+ };
322
+ }
323
+ catch (error) {
324
+ const message = error instanceof Error ? error.message : "Failed to validate LambdaTest credentials.";
325
+ if (/ENOTFOUND|fetch failed|network|timed out|ECONN/i.test(message)) {
326
+ return {
327
+ ok: false,
328
+ message: "Could not reach LambdaTest. Check your network connection and try again."
329
+ };
330
+ }
331
+ return {
332
+ ok: false,
333
+ message
334
+ };
335
+ }
336
+ },
337
+ async getAccountInfo(creds) {
338
+ return requestConcurrency(creds);
339
+ },
340
+ async getAvailableDevices(creds) {
341
+ return requestDevices(creds);
342
+ },
343
+ async uploadApp(creds, localPath) {
344
+ return uploadRealDeviceApp(creds, localPath);
345
+ },
346
+ async getAppStatus(creds, ref) {
347
+ const match = await lookupUploadedApp(creds, ref);
348
+ if (!match) {
349
+ return {
350
+ status: "missing",
351
+ message: "Uploaded app reference is missing or has expired on LambdaTest."
352
+ };
353
+ }
354
+ return {
355
+ status: "uploaded",
356
+ message: "Uploaded app reference is still available on LambdaTest."
357
+ };
358
+ },
359
+ async deleteApp(_creds, _ref) {
360
+ return notImplemented("deleteApp");
361
+ },
362
+ buildCapabilities(opts) {
363
+ const ltOptions = {
364
+ platformName: opts.platform,
365
+ deviceName: opts.deviceName,
366
+ platformVersion: opts.osVersion,
367
+ app: opts.app,
368
+ isRealMobile: true,
369
+ w3c: true,
370
+ build: opts.buildName ?? "droid-cua",
371
+ name: opts.sessionName ?? `${opts.deviceName} ${opts.platform === "ios" ? "iOS" : "Android"} ${opts.osVersion ?? ""}`.trim(),
372
+ video: true,
373
+ console: true
374
+ };
375
+ if (opts.platform === "android") {
376
+ ltOptions.visual = true;
377
+ ltOptions.devicelog = true;
378
+ }
379
+ else {
380
+ ltOptions.network = false;
381
+ }
382
+ return {
383
+ "lt:options": ltOptions
384
+ };
385
+ },
386
+ getHubUrl() {
387
+ return HUB_URL;
388
+ },
389
+ getAuthHeader(creds) {
390
+ const username = typeof creds.username === "string" ? creds.username : "";
391
+ const accessKey = typeof creds.accessKey === "string" ? creds.accessKey : "";
392
+ return `Basic ${Buffer.from(`${username}:${accessKey}`).toString("base64")}`;
393
+ },
394
+ async getSessionArtifacts(_creds, sessionId) {
395
+ return {
396
+ dashboardUrl: `https://automation.lambdatest.com/logs/?sessionID=${encodeURIComponent(sessionId)}`
397
+ };
398
+ },
399
+ async setSessionStatus(_creds, _sessionId, _status) {
400
+ return notImplemented("setSessionStatus");
401
+ }
402
+ };
@@ -1,5 +1,6 @@
1
1
  import { browserStackAdapter } from "./browserstack/adapter.js";
2
- const availableAdapters = [browserStackAdapter];
2
+ import { lambdaTestAdapter } from "./lambdatest/adapter.js";
3
+ const availableAdapters = [browserStackAdapter, lambdaTestAdapter];
3
4
  export function listCloudProviderAdapters() {
4
5
  return availableAdapters;
5
6
  }
@@ -47,7 +47,7 @@ export const SUPPORTED_ACTIONS = [
47
47
  'type', // Enter text
48
48
  'scroll', // Scroll by (scroll_x, scroll_y)
49
49
  'drag', // Drag from start to end via path
50
- 'keypress', // Press a single mobile-safe key (ESC/ESCAPE maps to home)
50
+ 'keypress', // Press a single mobile-safe key (Android ESC/ESCAPE maps to Back; iOS ignores ESC/ESCAPE)
51
51
  'wait', // Wait for UI to settle
52
52
  'screenshot' // Capture screen (handled by engine, not backend)
53
53
  ];
@@ -16,7 +16,10 @@ function normalizeMobileKeypress(keys = []) {
16
16
  throw new Error(`Unsupported mobile key chord: ${keys.join(", ")}. Use taps and text entry instead.`);
17
17
  }
18
18
  const key = String(keys[0]).trim().toUpperCase();
19
- if (key === "ESC" || key === "ESCAPE" || key === "HOME") {
19
+ if (key === "ESC" || key === "ESCAPE") {
20
+ return { kind: "noop", originalKey: keys[0], label: "Ignored ESC key" };
21
+ }
22
+ if (key === "HOME") {
20
23
  return { kind: "button", originalKey: keys[0], mapped: "home" };
21
24
  }
22
25
  if (key === "ENTER" || key === "RETURN") {
@@ -125,7 +128,10 @@ export async function handleModelAction(simulatorId, action, scale = 1.0, contex
125
128
  }
126
129
  case "keypress": {
127
130
  const normalized = normalizeMobileKeypress(action.keys);
128
- if (normalized.kind === "button") {
131
+ if (normalized.kind === "noop") {
132
+ addOutput({ type: "info", text: `Ignoring keypress: ${normalized.originalKey}`, ...meta({ keys: [normalized.originalKey], ignored: true }) });
133
+ }
134
+ else if (normalized.kind === "button") {
129
135
  addOutput({ type: "action", text: "Pressing Home button", ...meta({ keys: [normalized.originalKey], mapped: normalized.mapped }) });
130
136
  await appium.pressButton(session.sessionId, normalized.mapped);
131
137
  }
@@ -2,6 +2,7 @@
2
2
  * Loadmill instruction handling for script execution
3
3
  */
4
4
  import { executeLoadmillCommand } from "../integrations/loadmill/index.js";
5
+ import { printCliOutput } from "../utils/console-output.js";
5
6
  function getLoadmillSiteBaseUrl() {
6
7
  const rawBaseUrl = process.env.LOADMILL_BASE_URL || "https://app.loadmill.com/api";
7
8
  return rawBaseUrl.replace(/\/api\/?$/, "");
@@ -43,7 +44,7 @@ export function extractLoadmillCommand(userInput) {
43
44
  * @returns {Promise<{success: boolean, error?: string}>}
44
45
  */
45
46
  export async function executeLoadmillInstruction(command, isHeadlessMode, context, stepContext = null) {
46
- const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
47
+ const addOutput = context?.addOutput || printCliOutput;
47
48
  const meta = {
48
49
  runId: context?.runId,
49
50
  stepId: stepContext?.stepId,
@@ -82,7 +83,7 @@ export async function executeLoadmillInstruction(command, isHeadlessMode, contex
82
83
  * @returns {Promise<{success: boolean, error?: string}>}
83
84
  */
84
85
  export async function handleLoadmillFailure(command, error, isHeadlessMode, context, stepContext = null, suiteRunId = null) {
85
- const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
86
+ const addOutput = context?.addOutput || printCliOutput;
86
87
  const meta = {
87
88
  runId: context?.runId,
88
89
  stepId: stepContext?.stepId,
@@ -146,7 +147,7 @@ export async function handleLoadmillFailure(command, error, isHeadlessMode, cont
146
147
  * @param {Object|null} stepContext - Optional step context metadata
147
148
  */
148
149
  export function handleLoadmillSuccess(command, result, context, stepContext = null) {
149
- const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
150
+ const addOutput = context?.addOutput || printCliOutput;
150
151
  const meta = {
151
152
  runId: context?.runId,
152
153
  stepId: stepContext?.stepId,
@@ -97,7 +97,18 @@ function mapCuaError(err, cuaModel) {
97
97
  return err;
98
98
  }
99
99
  export function isNonRetryableCuaError(err) {
100
- return err?.status === 400 && err?.type === "invalid_request_error";
100
+ const status = Number(err?.status);
101
+ const code = typeof err?.code === "string" ? err.code.toLowerCase() : "";
102
+ const type = typeof err?.type === "string" ? err.type.toLowerCase() : "";
103
+ const message = typeof err?.message === "string" ? err.message.toLowerCase() : "";
104
+ if ([401, 403, 404].includes(status)) {
105
+ return true;
106
+ }
107
+ return (code.includes("model_not_found") ||
108
+ code.includes("permission") ||
109
+ type.includes("permission") ||
110
+ message.includes("does not have access to computer-use-preview") ||
111
+ message.includes("switch to gpt-5.4 in settings > cua model"));
101
112
  }
102
113
  function getOpenAI() {
103
114
  if (!openai) {
@@ -139,6 +150,112 @@ Output only the revised test script, nothing else.`
139
150
  });
140
151
  return response.choices[0].message.content.trim();
141
152
  }
153
+ export async function compactAppContext({ contextDocument, taskDescription, tokenBudget }) {
154
+ const response = await getOpenAI().responses.create({
155
+ model: "gpt-5.4",
156
+ temperature: 0,
157
+ input: [
158
+ {
159
+ role: "system",
160
+ content: [{
161
+ type: "input_text",
162
+ text: `You are compressing an app context document for a mobile testing agent.
163
+
164
+ You will receive:
165
+ 1. A context document
166
+ 2. A test task
167
+
168
+ Your job is to SELECT only the facts from the context document that are useful for the given task.
169
+ The output will be injected into a system prompt with a strict token budget.
170
+
171
+ CRITICAL:
172
+ - Use only facts explicitly supported by the context document
173
+ - Never invent, infer, normalize, substitute, or improve credentials, labels, screen names, button names, or numeric values
174
+ - Preserve exact values verbatim when present in the source
175
+ - Prefer facts that help the agent act correctly when they are not obvious from the task alone
176
+ - Do not restate, paraphrase, summarize, or reorganize the test task
177
+ - The output must not read like instructions or a test plan
178
+ - Do not describe what the agent should do
179
+ - Output only reference knowledge about the app
180
+ - If a line could be copied from the task with minor wording changes, omit it
181
+ - Prefer copying source facts verbatim or near-verbatim over rewriting them
182
+ - Do not collapse multiple specific source facts into one generic summary if that removes useful distinctions
183
+
184
+ Selection priority:
185
+ 1. Facts the agent would NOT know from the test script alone
186
+ 2. Facts that are hard to infer from screenshots
187
+ 3. Non-obvious navigation or interaction details
188
+ 4. Exact visible labels needed to act correctly
189
+ 5. Credentials and other exact values
190
+
191
+ High-value facts:
192
+ - exact UI labels
193
+ - how state, mode, or account selection is performed
194
+ - where logout is located
195
+ - hidden or non-obvious navigation
196
+ - which menu items are decorative or non-functional
197
+ - screen titles and section labels used to confirm location
198
+ - exact credentials and role labels
199
+
200
+ Low-value facts:
201
+ - restating the test steps
202
+ - repeating literal values already present in the task
203
+ - generic summaries like "approve the transaction"
204
+
205
+ When the task involves authentication, switching state or mode, opening menus, or moving between major areas of the app, strongly prefer including:
206
+ - how account, state, or mode selection is performed
207
+ - exact visible labels for the relevant controls
208
+ - where exit or sign-out actions are located
209
+ - the screen or section labels that confirm the agent is in the right place
210
+
211
+ Rules:
212
+ - Output plain text only
213
+ - No markdown, no bullet symbols, no numbering, no headers
214
+ - Use terse, factual language: one fact per line, no filler words
215
+ - Blank lines only to separate logical groups
216
+ - Prefer exact visible UI labels over summaries
217
+ - Do not describe step-by-step procedures
218
+ - Do not restate the test workflow
219
+ - State only facts about screens, elements, hidden interactions, entities, credentials, and navigation
220
+ - If a useful fact is not explicitly stated in the context document, omit it
221
+ - Include only information relevant to this task
222
+ - Do not waste space repeating the task itself
223
+ - If the task already states a value or action, include it only when the context adds non-obvious execution details
224
+ - Return a short result or an empty string if little is relevant
225
+ - Target: under ${tokenBudget} tokens
226
+
227
+ Bad output patterns to avoid:
228
+ - generic summaries that remove actionable details
229
+ - lines that restate the task in generic prose
230
+ - lines that describe obvious workflow steps instead of app knowledge
231
+ - lines that replace exact source labels or mechanisms with broad summaries
232
+
233
+ Good output characteristics:
234
+ - preserves the exact label or mechanism from the source when it matters
235
+ - keeps distinctions like dropdown vs tabs, drawer vs visible button, exact section titles, exact button text
236
+ - includes hidden or non-obvious navigation details when relevant
237
+
238
+ Return only the briefing text.`
239
+ }]
240
+ },
241
+ {
242
+ role: "user",
243
+ content: [{
244
+ type: "input_text",
245
+ text: `APP CONTEXT DOCUMENT:
246
+ ${contextDocument}
247
+
248
+ TASK:
249
+ ${taskDescription}`
250
+ }]
251
+ }
252
+ ]
253
+ });
254
+ return {
255
+ briefing: typeof response.output_text === "string" ? response.output_text.trim() : "",
256
+ outputTokens: typeof response.usage?.output_tokens === "number" ? response.usage.output_tokens : null,
257
+ };
258
+ }
142
259
  export async function sendCUARequest({ messages, screenshotBase64, previousResponseId, callId, deviceInfo, debugContext, }) {
143
260
  const cuaModel = getSelectedCuaModel();
144
261
  const includeInitialScreenshot = cuaModel === "computer-use-preview" && !previousResponseId && !callId;
@@ -1,9 +1,10 @@
1
1
  import { getScreenshotAsBase64, connectToDevice, getDeviceInfo, getCurrentPlatform } from "../device/connection.js";
2
- import { isNonRetryableCuaError, sendCUARequest } from "../device/openai.js";
2
+ import { sendCUARequest } from "../device/openai.js";
3
3
  import { isAssertion, extractAssertionPrompt, buildAssertionSystemPrompt, checkAssertionResult, handleAssertionFailure, handleAssertionSuccess, } from "../device/assertions.js";
4
4
  import { isLoadmillInstruction, extractLoadmillCommand, executeLoadmillInstruction, } from "../device/loadmill.js";
5
5
  import { logger } from "../utils/logger.js";
6
6
  import { emitDesktopDebug } from "../utils/desktop-debug.js";
7
+ import { printCliOutput } from "../utils/console-output.js";
7
8
  /**
8
9
  * Execution Mode - Run test scripts line-by-line
9
10
  * Each instruction is executed in isolation (messages cleared after each turn)
@@ -84,7 +85,7 @@ export class ExecutionMode {
84
85
  ...context,
85
86
  runId: context.runId || `run-${Date.now()}`
86
87
  };
87
- const addOutput = runContext.addOutput || ((item) => console.log(item.text || item));
88
+ const addOutput = runContext.addOutput || printCliOutput;
88
89
  // Start timing
89
90
  this.stats.startTime = Date.now();
90
91
  for (let i = 0; i < this.instructions.length; i++) {
@@ -172,7 +173,7 @@ export class ExecutionMode {
172
173
  */
173
174
  async executeInstruction(instruction, context, retryCount = 0, stepContext = null) {
174
175
  const MAX_RETRIES = 10;
175
- const addOutput = context.addOutput || ((item) => console.log(item.text || item));
176
+ const addOutput = context.addOutput || printCliOutput;
176
177
  // ── Check for Loadmill instruction ──
177
178
  if (isLoadmillInstruction(instruction)) {
178
179
  const loadmillCommand = extractLoadmillCommand(instruction);
@@ -203,6 +204,12 @@ export class ExecutionMode {
203
204
  }
204
205
  try {
205
206
  const screenshotBase64 = await getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
207
+ await this.engine.recordScreenshot?.(screenshotBase64, {
208
+ runId: context?.runId,
209
+ stepId: stepContext?.stepId,
210
+ instructionIndex: stepContext?.instructionIndex,
211
+ captureSource: isAssertionStep ? "instruction-input-assertion" : "instruction-input"
212
+ });
206
213
  // When continuing with previousResponseId, only send the new instruction
207
214
  // The server already has full context from previous responses
208
215
  let messagesToSend;
@@ -228,11 +235,11 @@ export class ExecutionMode {
228
235
  }
229
236
  });
230
237
  // Track actions for stats
231
- const trackAction = (action) => {
238
+ const trackAction = (action = null) => {
232
239
  if (action && action.type !== 'screenshot') {
233
240
  this.stats.actionCount++;
234
241
  }
235
- return false; // Don't stop execution
242
+ return this.shouldStop;
236
243
  };
237
244
  const newResponseId = await this.engine.runFullTurn(response, trackAction, context, stepContext);
238
245
  this.session.updateResponseId(newResponseId);
@@ -307,19 +314,7 @@ export class ExecutionMode {
307
314
  error: err.error,
308
315
  stack: err.stack
309
316
  });
310
- const addOutput = context.addOutput || ((item) => console.log(item.text || item));
311
- if (isNonRetryableCuaError(err)) {
312
- const message = `CUA request was rejected by the API: ${err.message}`;
313
- this.emit(addOutput, 'error', message, context, stepContext, {
314
- eventType: 'error',
315
- payload: {
316
- message: err.message,
317
- status: err.status,
318
- type: err.type
319
- }
320
- });
321
- return { success: false, error: message };
322
- }
317
+ const addOutput = context.addOutput || printCliOutput;
323
318
  // Check if we've exceeded max retries
324
319
  if (retryCount >= MAX_RETRIES) {
325
320
  emitDesktopDebug("reconnect.attempt", "device", {