npm - @loadmill/droid-cua - Versions diffs - 2.2.1 → 2.3.0 - Mend

@loadmill/droid-cua 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +56 -0
package/build/index.js +169 -24
package/build/src/cli/headless-debug.js +55 -0
package/build/src/cli/headless-execution-config.js +171 -0
package/build/src/cli/ink-shell.js +8 -2
package/build/src/commands/help.js +9 -1
package/build/src/commands/run.js +30 -1
package/build/src/core/app-context.js +57 -0
package/build/src/core/execution-engine.js +67 -15
package/build/src/core/prompts.js +37 -5
package/build/src/device/android/actions.js +2 -2
package/build/src/device/assertions.js +3 -2
package/build/src/device/cloud/browserstack/adapter.js +1 -0
package/build/src/device/cloud/lambdatest/adapter.js +402 -0
package/build/src/device/cloud/registry.js +2 -1
package/build/src/device/interface.js +1 -1
package/build/src/device/ios/actions.js +8 -2
package/build/src/device/loadmill.js +4 -3
package/build/src/device/openai.js +118 -1
package/build/src/modes/execution-mode.js +13 -18
package/build/src/utils/console-output.js +35 -0
package/build/src/utils/run-screenshot-recorder.js +98 -0
package/build/src/utils/structured-debug-log-manager.js +325 -0
package/package.json +2 -1

package/build/src/device/cloud/lambdatest/adapter.js ADDED Viewed

@@ -0,0 +1,402 @@
+import { readFile } from "node:fs/promises";
+const HUB_URL = "https://mobile-hub.lambdatest.com/wd/hub";
+const CONCURRENCY_URL = "https://mobile-api.lambdatest.com/mobile-automation/api/v1/org/concurrency";
+const DEVICE_REGIONS = ["us", "ap", "eu"];
+const DEVICE_LIST_URL = "https://mobile-api.lambdatest.com/mobile-automation/api/v1/list";
+const APP_UPLOAD_URL = "https://manual-api.lambdatest.com/app/upload/realDevice";
+const APP_DATA_URL = "https://manual-api.lambdatest.com/app/data";
+function notImplemented(methodName) {
+    throw new Error(`LambdaTest adapter stub: ${methodName} is not implemented yet.`);
+}
+function normalizeString(value) {
+    return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
+}
+function normalizeRemoteAppRef(value) {
+    const normalized = normalizeString(value);
+    return normalized?.startsWith("lt://") ? normalized : undefined;
+}
+function normalizePlanLabel(value) {
+    if (typeof value === "string") {
+        return value.trim() || undefined;
+    }
+    if (typeof value === "object" && value !== null && !Array.isArray(value)) {
+        return (normalizePlanLabel(value.plan) ??
+            normalizePlanLabel(value.name) ??
+            normalizePlanLabel(value.label) ??
+            normalizePlanLabel(value.tier) ??
+            normalizePlanLabel(value.type));
+    }
+    return undefined;
+}
+function readPlanLabel(payload) {
+    const candidates = [
+        payload.plan,
+        payload.plan_name,
+        payload.planName,
+        payload.plan_type,
+        payload.account_plan,
+        payload.accountPlan,
+        payload.tier,
+        payload.tier_name,
+        payload.subscription,
+        payload.organization_plan
+    ];
+    for (const candidate of candidates) {
+        const normalized = normalizePlanLabel(candidate);
+        if (normalized) {
+            return normalized;
+        }
+    }
+    return undefined;
+}
+function readFiniteNumber(value) {
+    if (typeof value === "number" && Number.isFinite(value)) {
+        return value;
+    }
+    if (typeof value === "string" && value.trim().length > 0) {
+        const parsed = Number(value);
+        if (Number.isFinite(parsed)) {
+            return parsed;
+        }
+    }
+    return undefined;
+}
+function readParallelLimit(payload) {
+    const candidates = [
+        payload.max_concurrency,
+        payload.maxConcurrency,
+        payload.parallel_sessions,
+        payload.parallelSessions,
+        payload.parallel_limit,
+        payload.parallelLimit,
+        payload.concurrency,
+        payload.max_sessions,
+        payload.maxSessions
+    ];
+    for (const candidate of candidates) {
+        const normalized = readFiniteNumber(candidate);
+        if (normalized !== undefined) {
+            return normalized;
+        }
+    }
+    return undefined;
+}
+function readSummary(payload, plan, parallelLimit) {
+    const message = normalizeString(payload.message) ??
+        normalizeString(payload.summary) ??
+        normalizeString(payload.status_message) ??
+        normalizeString(payload.statusMessage);
+    if (message) {
+        return message;
+    }
+    if (plan && parallelLimit !== undefined) {
+        return `${plan} account with ${parallelLimit} parallel sessions available.`;
+    }
+    if (parallelLimit !== undefined) {
+        return `LambdaTest credentials validated. ${parallelLimit} parallel sessions available.`;
+    }
+    return undefined;
+}
+function normalizePlatform(value) {
+    if (typeof value !== "string") {
+        return null;
+    }
+    const normalized = value.trim().toLowerCase();
+    if (normalized === "android") {
+        return "android";
+    }
+    if (normalized === "ios" || normalized === "iphone" || normalized === "ipad") {
+        return "ios";
+    }
+    return null;
+}
+function sortOsVersionsDescending(values) {
+    return [...values].sort((left, right) => right.localeCompare(left, undefined, { numeric: true, sensitivity: "base" }));
+}
+function unwrapDevicePayload(payload) {
+    if (Array.isArray(payload)) {
+        return payload;
+    }
+    if (typeof payload !== "object" || payload === null) {
+        return [];
+    }
+    const candidates = [payload.devices, payload.data, payload.results, payload.list];
+    for (const candidate of candidates) {
+        if (Array.isArray(candidate)) {
+            return candidate;
+        }
+    }
+    return [];
+}
+async function requestConcurrency(creds) {
+    const response = await fetch(CONCURRENCY_URL, {
+        method: "GET",
+        headers: {
+            Authorization: lambdaTestAdapter.getAuthHeader(creds)
+        }
+    });
+    if (response.status === 401 || response.status === 403) {
+        throw new Error("LambdaTest rejected these credentials. Check the username and access key and try again.");
+    }
+    if (!response.ok) {
+        throw new Error(`LambdaTest validation failed with status ${response.status}.`);
+    }
+    const payload = await response.json();
+    if (typeof payload !== "object" || payload === null || Array.isArray(payload)) {
+        throw new Error("LambdaTest returned an unexpected validation response.");
+    }
+    const plan = readPlanLabel(payload);
+    const parallelLimit = readParallelLimit(payload);
+    const summary = readSummary(payload, plan, parallelLimit);
+    return {
+        plan,
+        parallelLimit,
+        summary
+    };
+}
+async function requestDevicesForRegion(creds, region) {
+    const response = await fetch(`${DEVICE_LIST_URL}?region=${encodeURIComponent(region)}`, {
+        method: "GET",
+        headers: {
+            Authorization: lambdaTestAdapter.getAuthHeader(creds)
+        }
+    });
+    if (response.status === 401 || response.status === 403) {
+        throw new Error("LambdaTest rejected these credentials. Reconnect LambdaTest and try refreshing devices again.");
+    }
+    if (!response.ok) {
+        throw new Error(`LambdaTest device catalog failed with status ${response.status}.`);
+    }
+    return unwrapDevicePayload(await response.json());
+}
+async function requestDevices(creds) {
+    const regionPayloads = await Promise.all(DEVICE_REGIONS.map((region) => requestDevicesForRegion(creds, region)));
+    const deduped = new Map();
+    for (const payload of regionPayloads.flat()) {
+        if (typeof payload !== "object" || payload === null || Array.isArray(payload)) {
+            continue;
+        }
+        const platform = normalizePlatform(payload.platform) ??
+            normalizePlatform(payload.platformName) ??
+            normalizePlatform(payload.os) ??
+            normalizePlatform(payload.os_type) ??
+            normalizePlatform(payload.osType);
+        const deviceName = normalizeString(payload.device_name) ??
+            normalizeString(payload.deviceName) ??
+            normalizeString(payload.device) ??
+            normalizeString(payload.name);
+        const osVersion = normalizeString(payload.platform_version) ??
+            normalizeString(payload.platformVersion) ??
+            normalizeString(payload.os_version) ??
+            normalizeString(payload.osVersion) ??
+            normalizeString(payload.version);
+        if (!platform || !deviceName || !osVersion) {
+            continue;
+        }
+        const key = `${platform}::${deviceName}::${osVersion}`;
+        if (!deduped.has(key)) {
+            deduped.set(key, {
+                id: key,
+                name: deviceName,
+                deviceName,
+                platform,
+                osVersion
+            });
+        }
+    }
+    return [...deduped.values()].sort((left, right) => {
+        if (left.platform !== right.platform) {
+            return left.platform.localeCompare(right.platform);
+        }
+        if (left.name !== right.name) {
+            return left.name.localeCompare(right.name);
+        }
+        return sortOsVersionsDescending([left.osVersion ?? "", right.osVersion ?? ""])[0] === (left.osVersion ?? "") ? -1 : 1;
+    });
+}
+function readUploadedAppEntries(payload) {
+    if (Array.isArray(payload)) {
+        return payload;
+    }
+    if (typeof payload !== "object" || payload === null) {
+        return [];
+    }
+    const candidates = [payload.data, payload.apps, payload.files, payload.results];
+    for (const candidate of candidates) {
+        if (Array.isArray(candidate)) {
+            return candidate;
+        }
+    }
+    return [];
+}
+async function uploadRealDeviceApp(creds, localPath) {
+    const fileContents = await readFile(localPath);
+    const fileName = localPath.split(/[\\/]/).pop() ?? "app";
+    const form = new FormData();
+    form.append("appFile", new Blob([new Uint8Array(fileContents)]), fileName);
+    const response = await fetch(APP_UPLOAD_URL, {
+        method: "POST",
+        headers: {
+            Authorization: lambdaTestAdapter.getAuthHeader(creds)
+        },
+        body: form
+    });
+    if (response.status === 401 || response.status === 403) {
+        throw new Error("LambdaTest rejected these credentials. Reconnect LambdaTest and try again.");
+    }
+    if (!response.ok) {
+        throw new Error(`LambdaTest app upload failed with status ${response.status}.`);
+    }
+    const payload = await response.json();
+    if (typeof payload !== "object" || payload === null || Array.isArray(payload)) {
+        throw new Error("LambdaTest returned an unexpected app upload response.");
+    }
+    const remotePath = normalizeRemoteAppRef(payload.app_url) ??
+        normalizeRemoteAppRef(payload.appUrl) ??
+        normalizeRemoteAppRef(payload.appURL) ??
+        normalizeRemoteAppRef(payload.value) ??
+        normalizeRemoteAppRef(payload.id);
+    if (!remotePath) {
+        throw new Error("LambdaTest did not return a valid app reference.");
+    }
+    const id = normalizeString(payload.app_id) ??
+        normalizeString(payload.appId) ??
+        normalizeString(payload.id) ??
+        remotePath;
+    return {
+        id,
+        remotePath
+    };
+}
+async function fetchUploadedApps(creds, platformType) {
+    const response = await fetch(`${APP_DATA_URL}?type=${encodeURIComponent(platformType)}&level=user`, {
+        method: "GET",
+        headers: {
+            Authorization: lambdaTestAdapter.getAuthHeader(creds)
+        }
+    });
+    if (response.status === 401 || response.status === 403) {
+        throw new Error("LambdaTest rejected these credentials. Reconnect LambdaTest and try again.");
+    }
+    if (!response.ok) {
+        throw new Error(`LambdaTest uploaded-app lookup failed with status ${response.status}.`);
+    }
+    return readUploadedAppEntries(await response.json());
+}
+async function lookupUploadedApp(creds, ref) {
+    const lists = await Promise.all(["android", "ios"].map((platformType) => fetchUploadedApps(creds, platformType)));
+    for (const item of lists.flat()) {
+        if (typeof item !== "object" || item === null || Array.isArray(item)) {
+            continue;
+        }
+        const remotePath = normalizeRemoteAppRef(item.app_url) ??
+            normalizeRemoteAppRef(item.appUrl) ??
+            normalizeRemoteAppRef(item.appURL) ??
+            normalizeRemoteAppRef(item.url) ??
+            normalizeRemoteAppRef(item.value);
+        if (!remotePath) {
+            continue;
+        }
+        const customId = normalizeString(item.app_id) ??
+            normalizeString(item.appId) ??
+            normalizeString(item.id) ??
+            normalizeString(item.name);
+        if (remotePath === ref.remotePath || (customId && customId === ref.id)) {
+            return item;
+        }
+    }
+    return null;
+}
+/** @type {import("../adapter").CloudProviderAdapter} */
+export const lambdaTestAdapter = {
+    id: "lambdatest",
+    displayName: "LambdaTest",
+    async validateCredentials(creds) {
+        try {
+            const account = await requestConcurrency(creds);
+            return {
+                ok: true,
+                message: account.summary ?? "LambdaTest credentials validated successfully.",
+                account
+            };
+        }
+        catch (error) {
+            const message = error instanceof Error ? error.message : "Failed to validate LambdaTest credentials.";
+            if (/ENOTFOUND|fetch failed|network|timed out|ECONN/i.test(message)) {
+                return {
+                    ok: false,
+                    message: "Could not reach LambdaTest. Check your network connection and try again."
+                };
+            }
+            return {
+                ok: false,
+                message
+            };
+        }
+    },
+    async getAccountInfo(creds) {
+        return requestConcurrency(creds);
+    },
+    async getAvailableDevices(creds) {
+        return requestDevices(creds);
+    },
+    async uploadApp(creds, localPath) {
+        return uploadRealDeviceApp(creds, localPath);
+    },
+    async getAppStatus(creds, ref) {
+        const match = await lookupUploadedApp(creds, ref);
+        if (!match) {
+            return {
+                status: "missing",
+                message: "Uploaded app reference is missing or has expired on LambdaTest."
+            };
+        }
+        return {
+            status: "uploaded",
+            message: "Uploaded app reference is still available on LambdaTest."
+        };
+    },
+    async deleteApp(_creds, _ref) {
+        return notImplemented("deleteApp");
+    },
+    buildCapabilities(opts) {
+        const ltOptions = {
+            platformName: opts.platform,
+            deviceName: opts.deviceName,
+            platformVersion: opts.osVersion,
+            app: opts.app,
+            isRealMobile: true,
+            w3c: true,
+            build: opts.buildName ?? "droid-cua",
+            name: opts.sessionName ?? `${opts.deviceName} ${opts.platform === "ios" ? "iOS" : "Android"} ${opts.osVersion ?? ""}`.trim(),
+            video: true,
+            console: true
+        };
+        if (opts.platform === "android") {
+            ltOptions.visual = true;
+            ltOptions.devicelog = true;
+        }
+        else {
+            ltOptions.network = false;
+        }
+        return {
+            "lt:options": ltOptions
+        };
+    },
+    getHubUrl() {
+        return HUB_URL;
+    },
+    getAuthHeader(creds) {
+        const username = typeof creds.username === "string" ? creds.username : "";
+        const accessKey = typeof creds.accessKey === "string" ? creds.accessKey : "";
+        return `Basic ${Buffer.from(`${username}:${accessKey}`).toString("base64")}`;
+    },
+    async getSessionArtifacts(_creds, sessionId) {
+        return {
+            dashboardUrl: `https://automation.lambdatest.com/logs/?sessionID=${encodeURIComponent(sessionId)}`
+        };
+    },
+    async setSessionStatus(_creds, _sessionId, _status) {
+        return notImplemented("setSessionStatus");
+    }
+};

package/build/src/device/cloud/registry.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { browserStackAdapter } from "./browserstack/adapter.js";
-const availableAdapters = [browserStackAdapter];
+import { lambdaTestAdapter } from "./lambdatest/adapter.js";
+const availableAdapters = [browserStackAdapter, lambdaTestAdapter];
 export function listCloudProviderAdapters() {
     return availableAdapters;
 }

package/build/src/device/interface.js CHANGED Viewed

@@ -47,7 +47,7 @@ export const SUPPORTED_ACTIONS = [
     'type', // Enter text
     'scroll', // Scroll by (scroll_x, scroll_y)
     'drag', // Drag from start to end via path
-    'keypress', // Press a single mobile-safe key (ESC/ESCAPE maps to home)
+    'keypress', // Press a single mobile-safe key (Android ESC/ESCAPE maps to Back; iOS ignores ESC/ESCAPE)
     'wait', // Wait for UI to settle
     'screenshot' // Capture screen (handled by engine, not backend)
 ];

package/build/src/device/ios/actions.js CHANGED Viewed

@@ -16,7 +16,10 @@ function normalizeMobileKeypress(keys = []) {
         throw new Error(`Unsupported mobile key chord: ${keys.join(", ")}. Use taps and text entry instead.`);
     }
     const key = String(keys[0]).trim().toUpperCase();
-    if (key === "ESC" || key === "ESCAPE" || key === "HOME") {
+    if (key === "ESC" || key === "ESCAPE") {
+        return { kind: "noop", originalKey: keys[0], label: "Ignored ESC key" };
+    }
+    if (key === "HOME") {
         return { kind: "button", originalKey: keys[0], mapped: "home" };
     }
     if (key === "ENTER" || key === "RETURN") {
@@ -125,7 +128,10 @@ export async function handleModelAction(simulatorId, action, scale = 1.0, contex
             }
             case "keypress": {
                 const normalized = normalizeMobileKeypress(action.keys);
-                if (normalized.kind === "button") {
+                if (normalized.kind === "noop") {
+                    addOutput({ type: "info", text: `Ignoring keypress: ${normalized.originalKey}`, ...meta({ keys: [normalized.originalKey], ignored: true }) });
+                }
+                else if (normalized.kind === "button") {
                     addOutput({ type: "action", text: "Pressing Home button", ...meta({ keys: [normalized.originalKey], mapped: normalized.mapped }) });
                     await appium.pressButton(session.sessionId, normalized.mapped);
                 }

package/build/src/device/loadmill.js CHANGED Viewed

@@ -2,6 +2,7 @@
  * Loadmill instruction handling for script execution
  */
 import { executeLoadmillCommand } from "../integrations/loadmill/index.js";
+import { printCliOutput } from "../utils/console-output.js";
 function getLoadmillSiteBaseUrl() {
     const rawBaseUrl = process.env.LOADMILL_BASE_URL || "https://app.loadmill.com/api";
     return rawBaseUrl.replace(/\/api\/?$/, "");
@@ -43,7 +44,7 @@ export function extractLoadmillCommand(userInput) {
  * @returns {Promise<{success: boolean, error?: string}>}
  */
 export async function executeLoadmillInstruction(command, isHeadlessMode, context, stepContext = null) {
-    const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
+    const addOutput = context?.addOutput || printCliOutput;
     const meta = {
         runId: context?.runId,
         stepId: stepContext?.stepId,
@@ -82,7 +83,7 @@ export async function executeLoadmillInstruction(command, isHeadlessMode, contex
  * @returns {Promise<{success: boolean, error?: string}>}
  */
 export async function handleLoadmillFailure(command, error, isHeadlessMode, context, stepContext = null, suiteRunId = null) {
-    const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
+    const addOutput = context?.addOutput || printCliOutput;
     const meta = {
         runId: context?.runId,
         stepId: stepContext?.stepId,
@@ -146,7 +147,7 @@ export async function handleLoadmillFailure(command, error, isHeadlessMode, cont
  * @param {Object|null} stepContext - Optional step context metadata
  */
 export function handleLoadmillSuccess(command, result, context, stepContext = null) {
-    const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
+    const addOutput = context?.addOutput || printCliOutput;
     const meta = {
         runId: context?.runId,
         stepId: stepContext?.stepId,

package/build/src/device/openai.js CHANGED Viewed

@@ -97,7 +97,18 @@ function mapCuaError(err, cuaModel) {
     return err;
 }
 export function isNonRetryableCuaError(err) {
-    return err?.status === 400 && err?.type === "invalid_request_error";
+    const status = Number(err?.status);
+    const code = typeof err?.code === "string" ? err.code.toLowerCase() : "";
+    const type = typeof err?.type === "string" ? err.type.toLowerCase() : "";
+    const message = typeof err?.message === "string" ? err.message.toLowerCase() : "";
+    if ([401, 403, 404].includes(status)) {
+        return true;
+    }
+    return (code.includes("model_not_found") ||
+        code.includes("permission") ||
+        type.includes("permission") ||
+        message.includes("does not have access to computer-use-preview") ||
+        message.includes("switch to gpt-5.4 in settings > cua model"));
 }
 function getOpenAI() {
     if (!openai) {
@@ -139,6 +150,112 @@ Output only the revised test script, nothing else.`
     });
     return response.choices[0].message.content.trim();
 }
+export async function compactAppContext({ contextDocument, taskDescription, tokenBudget }) {
+    const response = await getOpenAI().responses.create({
+        model: "gpt-5.4",
+        temperature: 0,
+        input: [
+            {
+                role: "system",
+                content: [{
+                        type: "input_text",
+                        text: `You are compressing an app context document for a mobile testing agent.
+You will receive:
+1. A context document
+2. A test task
+Your job is to SELECT only the facts from the context document that are useful for the given task.
+The output will be injected into a system prompt with a strict token budget.
+CRITICAL:
+- Use only facts explicitly supported by the context document
+- Never invent, infer, normalize, substitute, or improve credentials, labels, screen names, button names, or numeric values
+- Preserve exact values verbatim when present in the source
+- Prefer facts that help the agent act correctly when they are not obvious from the task alone
+- Do not restate, paraphrase, summarize, or reorganize the test task
+- The output must not read like instructions or a test plan
+- Do not describe what the agent should do
+- Output only reference knowledge about the app
+- If a line could be copied from the task with minor wording changes, omit it
+- Prefer copying source facts verbatim or near-verbatim over rewriting them
+- Do not collapse multiple specific source facts into one generic summary if that removes useful distinctions
+Selection priority:
+1. Facts the agent would NOT know from the test script alone
+2. Facts that are hard to infer from screenshots
+3. Non-obvious navigation or interaction details
+4. Exact visible labels needed to act correctly
+5. Credentials and other exact values
+High-value facts:
+- exact UI labels
+- how state, mode, or account selection is performed
+- where logout is located
+- hidden or non-obvious navigation
+- which menu items are decorative or non-functional
+- screen titles and section labels used to confirm location
+- exact credentials and role labels
+Low-value facts:
+- restating the test steps
+- repeating literal values already present in the task
+- generic summaries like "approve the transaction"
+When the task involves authentication, switching state or mode, opening menus, or moving between major areas of the app, strongly prefer including:
+- how account, state, or mode selection is performed
+- exact visible labels for the relevant controls
+- where exit or sign-out actions are located
+- the screen or section labels that confirm the agent is in the right place
+Rules:
+- Output plain text only
+- No markdown, no bullet symbols, no numbering, no headers
+- Use terse, factual language: one fact per line, no filler words
+- Blank lines only to separate logical groups
+- Prefer exact visible UI labels over summaries
+- Do not describe step-by-step procedures
+- Do not restate the test workflow
+- State only facts about screens, elements, hidden interactions, entities, credentials, and navigation
+- If a useful fact is not explicitly stated in the context document, omit it
+- Include only information relevant to this task
+- Do not waste space repeating the task itself
+- If the task already states a value or action, include it only when the context adds non-obvious execution details
+- Return a short result or an empty string if little is relevant
+- Target: under ${tokenBudget} tokens
+Bad output patterns to avoid:
+- generic summaries that remove actionable details
+- lines that restate the task in generic prose
+- lines that describe obvious workflow steps instead of app knowledge
+- lines that replace exact source labels or mechanisms with broad summaries
+Good output characteristics:
+- preserves the exact label or mechanism from the source when it matters
+- keeps distinctions like dropdown vs tabs, drawer vs visible button, exact section titles, exact button text
+- includes hidden or non-obvious navigation details when relevant
+Return only the briefing text.`
+                    }]
+            },
+            {
+                role: "user",
+                content: [{
+                        type: "input_text",
+                        text: `APP CONTEXT DOCUMENT:
+${contextDocument}
+TASK:
+${taskDescription}`
+                    }]
+            }
+        ]
+    });
+    return {
+        briefing: typeof response.output_text === "string" ? response.output_text.trim() : "",
+        outputTokens: typeof response.usage?.output_tokens === "number" ? response.usage.output_tokens : null,
+    };
+}
 export async function sendCUARequest({ messages, screenshotBase64, previousResponseId, callId, deviceInfo, debugContext, }) {
     const cuaModel = getSelectedCuaModel();
     const includeInitialScreenshot = cuaModel === "computer-use-preview" && !previousResponseId && !callId;

package/build/src/modes/execution-mode.js CHANGED Viewed

@@ -1,9 +1,10 @@
 import { getScreenshotAsBase64, connectToDevice, getDeviceInfo, getCurrentPlatform } from "../device/connection.js";
-import { isNonRetryableCuaError, sendCUARequest } from "../device/openai.js";
+import { sendCUARequest } from "../device/openai.js";
 import { isAssertion, extractAssertionPrompt, buildAssertionSystemPrompt, checkAssertionResult, handleAssertionFailure, handleAssertionSuccess, } from "../device/assertions.js";
 import { isLoadmillInstruction, extractLoadmillCommand, executeLoadmillInstruction, } from "../device/loadmill.js";
 import { logger } from "../utils/logger.js";
 import { emitDesktopDebug } from "../utils/desktop-debug.js";
+import { printCliOutput } from "../utils/console-output.js";
 /**
  * Execution Mode - Run test scripts line-by-line
  * Each instruction is executed in isolation (messages cleared after each turn)
@@ -84,7 +85,7 @@ export class ExecutionMode {
             ...context,
             runId: context.runId || `run-${Date.now()}`
         };
-        const addOutput = runContext.addOutput || ((item) => console.log(item.text || item));
+        const addOutput = runContext.addOutput || printCliOutput;
         // Start timing
         this.stats.startTime = Date.now();
         for (let i = 0; i < this.instructions.length; i++) {
@@ -172,7 +173,7 @@ export class ExecutionMode {
      */
     async executeInstruction(instruction, context, retryCount = 0, stepContext = null) {
         const MAX_RETRIES = 10;
-        const addOutput = context.addOutput || ((item) => console.log(item.text || item));
+        const addOutput = context.addOutput || printCliOutput;
         // ── Check for Loadmill instruction ──
         if (isLoadmillInstruction(instruction)) {
             const loadmillCommand = extractLoadmillCommand(instruction);
@@ -203,6 +204,12 @@ export class ExecutionMode {
         }
         try {
             const screenshotBase64 = await getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
+            await this.engine.recordScreenshot?.(screenshotBase64, {
+                runId: context?.runId,
+                stepId: stepContext?.stepId,
+                instructionIndex: stepContext?.instructionIndex,
+                captureSource: isAssertionStep ? "instruction-input-assertion" : "instruction-input"
+            });
             // When continuing with previousResponseId, only send the new instruction
             // The server already has full context from previous responses
             let messagesToSend;
@@ -228,11 +235,11 @@ export class ExecutionMode {
                 }
             });
             // Track actions for stats
-            const trackAction = (action) => {
+            const trackAction = (action = null) => {
                 if (action && action.type !== 'screenshot') {
                     this.stats.actionCount++;
                 }
-                return false; // Don't stop execution
+                return this.shouldStop;
             };
             const newResponseId = await this.engine.runFullTurn(response, trackAction, context, stepContext);
             this.session.updateResponseId(newResponseId);
@@ -307,19 +314,7 @@ export class ExecutionMode {
                 error: err.error,
                 stack: err.stack
             });
-            const addOutput = context.addOutput || ((item) => console.log(item.text || item));
-            if (isNonRetryableCuaError(err)) {
-                const message = `CUA request was rejected by the API: ${err.message}`;
-                this.emit(addOutput, 'error', message, context, stepContext, {
-                    eventType: 'error',
-                    payload: {
-                        message: err.message,
-                        status: err.status,
-                        type: err.type
-                    }
-                });
-                return { success: false, error: message };
-            }
+            const addOutput = context.addOutput || printCliOutput;
             // Check if we've exceeded max retries
             if (retryCount >= MAX_RETRIES) {
                 emitDesktopDebug("reconnect.attempt", "device", {