npm - @loadmill/droid-cua - Versions diffs - 2.2.0 → 2.2.1 - Mend

@loadmill/droid-cua 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/build/src/core/execution-engine.js +3 -1
package/build/src/core/prompts.js +34 -18
package/build/src/device/android/actions.js +2 -1
package/build/src/device/cloud/actions.js +2 -1
package/build/src/device/ios/actions.js +2 -1
package/build/src/device/openai.js +58 -11
package/build/src/modes/execution-mode.js +13 -1
package/build/src/utils/step-delay.js +20 -0
package/package.json +1 -1

package/build/src/core/execution-engine.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { getScreenshotAsBase64, getCurrentPlatform } from "../device/connection.
 import { handleModelAction } from "../device/actions.js";
 import { sendCUARequest } from "../device/openai.js";
 import { emitDesktopDebug } from "../utils/desktop-debug.js";
+import { getConfiguredStepDelayMs } from "../utils/step-delay.js";
 function extractComputerCalls(items) {
     const entries = [];
     for (const item of items) {
@@ -34,6 +35,7 @@ export class ExecutionEngine {
         this.session = session;
         this.recordScreenshots = options.recordScreenshots || false;
         this.screenshotDir = options.screenshotDir || null;
+        this.stepDelayMs = getConfiguredStepDelayMs();
     }
     /**
      * Run a full turn with the CUA model
@@ -150,7 +152,7 @@ export class ExecutionEngine {
                         // Add delay after UI-changing actions to let the interface update
                         // before taking the screenshot (except for explicit wait actions which have their own delay)
                         if (action.type !== "wait") {
-                            await new Promise(resolve => setTimeout(resolve, 500));
+                            await new Promise(resolve => setTimeout(resolve, this.stepDelayMs));
                         }
                     }
                 }

package/build/src/core/prompts.js CHANGED Viewed

@@ -1,15 +1,33 @@
 /**
  * System prompt templates for different modes
  */
-function appendCustomSection(prompt, customText) {
-    const trimmed = typeof customText === "string" ? customText.trim() : "";
-    if (!trimmed) {
+function buildCustomInstructionsSection(sections = []) {
+    const nonEmptySections = sections
+        .map((section) => ({
+        title: section?.title,
+        text: typeof section?.text === "string" ? section.text.trim() : ""
+    }))
+        .filter((section) => section.title && section.text);
+    if (nonEmptySections.length === 0) {
+        return "";
+    }
+    const renderedSections = nonEmptySections
+        .map((section) => `${section.title}:\n${section.text}`)
+        .join("\n\n");
+    return `USER CUSTOM INSTRUCTIONS:
+Follow these user-configured instructions in addition to the default behavior below.
+Prefer these custom instructions when deciding how to behave.
+${renderedSections}`;
+}
+function appendCustomSections(prompt, sections = []) {
+    const customSection = buildCustomInstructionsSection(sections);
+    if (!customSection) {
         return prompt;
     }
     return `${prompt}
-CUSTOM INSTRUCTIONS:
-${trimmed}
+${customSection}
 `;
 }
 function describeControlledDevice(deviceInfo = {}) {
@@ -95,15 +113,11 @@ export function buildBaseSystemPrompt(deviceInfo, customInstructions = {}) {
   - Use Home button (ESC) to escape from stuck situations and restart
   - Never use CTRL, CMD, ALT, OPTION, or SHIFT in a keypress action
   `;
-    return appendCustomSection(prompt, customInstructions.basePromptInstructions);
+    return prompt;
 }
 export function buildDesignModePrompt(deviceInfo, customInstructions = {}) {
     const designCustomText = typeof customInstructions.designModeInstructions === "string" ? customInstructions.designModeInstructions.trim() : "";
-    const mergedBaseInstructions = [customInstructions.basePromptInstructions, designCustomText].filter(Boolean).join("\n\n");
-    const basePrompt = buildBaseSystemPrompt(deviceInfo, {
-        ...customInstructions,
-        basePromptInstructions: mergedBaseInstructions
-    });
+    const basePrompt = buildBaseSystemPrompt(deviceInfo, customInstructions);
     const prompt = `${basePrompt}
 DESIGN MODE:
@@ -194,15 +208,14 @@ WRONG Example (DON'T DO THIS):
 Remember: You are autonomous. Explore confidently. Generate simple, executable test scripts.
 `;
-    return prompt;
+    return appendCustomSections(prompt, [
+        { title: "Base Prompt Instructions", text: customInstructions.basePromptInstructions },
+        { title: "Design Mode Instructions", text: designCustomText }
+    ]);
 }
 export function buildExecutionModePrompt(deviceInfo, customInstructions = {}) {
     const executionCustomText = typeof customInstructions.executionModeInstructions === "string" ? customInstructions.executionModeInstructions.trim() : "";
-    const mergedBaseInstructions = [customInstructions.basePromptInstructions, executionCustomText].filter(Boolean).join("\n\n");
-    const basePrompt = buildBaseSystemPrompt(deviceInfo, {
-        ...customInstructions,
-        basePromptInstructions: mergedBaseInstructions
-    });
+    const basePrompt = buildBaseSystemPrompt(deviceInfo, customInstructions);
     const prompt = `${basePrompt}
 EXECUTION MODE - Critical Behavior:
@@ -227,5 +240,8 @@ Your process:
 Each instruction is independent. Do not reference previous instructions or ask about next steps.
 `;
-    return prompt;
+    return appendCustomSections(prompt, [
+        { title: "Base Prompt Instructions", text: customInstructions.basePromptInstructions },
+        { title: "Execution Mode Instructions", text: executionCustomText }
+    ]);
 }

package/build/src/device/android/actions.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { logger } from "../../utils/logger.js";
 import { emitDesktopDebug, truncateForDebug } from "../../utils/desktop-debug.js";
+import { getConfiguredStepDelayMs } from "../../utils/step-delay.js";
 import { execAdb } from "./tools.js";
 function adbShell(deviceId, command) {
     return execAdb(["-s", deviceId, "shell", command]);
@@ -115,7 +116,7 @@ export async function handleModelAction(deviceId, action, scale = 1.0, context =
                 break;
             case "wait":
                 addOutput({ type: 'action', text: 'Waiting...', ...meta({}) });
-                await new Promise(res => setTimeout(res, 1000));
+                await new Promise(res => setTimeout(res, getConfiguredStepDelayMs()));
                 break;
             default:
                 addOutput({ type: 'info', text: `Unknown action: ${JSON.stringify(action)}` });

package/build/src/device/cloud/actions.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { logger } from "../../utils/logger.js";
 import { emitDesktopDebug, truncateForDebug } from "../../utils/desktop-debug.js";
+import { getConfiguredStepDelayMs } from "../../utils/step-delay.js";
 import { getActiveSession, getDevicePixelRatio } from "./connection.js";
 function normalizeMobileKeypress(platform, keys = []) {
     if (!Array.isArray(keys) || keys.length === 0) {
@@ -140,7 +141,7 @@ export async function handleModelAction(deviceId, action, scale = 1.0, context =
             }
             case "wait":
                 addOutput({ type: "action", text: "Waiting...", ...meta({}) });
-                await new Promise((resolve) => setTimeout(resolve, 1000));
+                await new Promise((resolve) => setTimeout(resolve, getConfiguredStepDelayMs()));
                 break;
             default:
                 addOutput({ type: "info", text: `Unknown action: ${JSON.stringify(action)}` });

package/build/src/device/ios/actions.js CHANGED Viewed

@@ -7,6 +7,7 @@ import * as appium from "./appium-client.js";
 import { getActiveSession, getDevicePixelRatio } from "./connection.js";
 import { logger } from "../../utils/logger.js";
 import { emitDesktopDebug, truncateForDebug } from "../../utils/desktop-debug.js";
+import { getConfiguredStepDelayMs } from "../../utils/step-delay.js";
 function normalizeMobileKeypress(keys = []) {
     if (!Array.isArray(keys) || keys.length === 0) {
         throw new Error("Keypress action is missing keys");
@@ -136,7 +137,7 @@ export async function handleModelAction(simulatorId, action, scale = 1.0, contex
             }
             case "wait": {
                 addOutput({ type: "action", text: "Waiting...", ...meta({}) });
-                await new Promise((resolve) => setTimeout(resolve, 1000));
+                await new Promise((resolve) => setTimeout(resolve, getConfiguredStepDelayMs()));
                 break;
             }
             default:

package/build/src/device/openai.js CHANGED Viewed

@@ -6,28 +6,69 @@ const cuaDebugTracer = new CuaDebugTracer(logger);
 function getSelectedCuaModel() {
     return process.env.OPENAI_CUA_MODEL === "computer-use-preview" ? "computer-use-preview" : "gpt-5.4";
 }
+function normalizePreviewMessages(messages, screenshotBase64, includeInitialScreenshot) {
+    const normalized = messages.map((message) => {
+        if (!message?.role) {
+            return message;
+        }
+        const contentItems = Array.isArray(message.content)
+            ? message.content.map((item) => {
+                if (typeof item === "string") {
+                    return { type: "input_text", text: item };
+                }
+                return item;
+            })
+            : [{ type: "input_text", text: String(message.content ?? "") }];
+        return {
+            role: message.role,
+            content: contentItems,
+        };
+    });
+    if (!includeInitialScreenshot || !screenshotBase64) {
+        return normalized;
+    }
+    for (let index = normalized.length - 1; index >= 0; index -= 1) {
+        const item = normalized[index];
+        if (item?.role !== "user" || !Array.isArray(item.content))
+            continue;
+        item.content.push({
+            type: "input_image",
+            image_url: `data:image/png;base64,${screenshotBase64}`,
+        });
+        return normalized;
+    }
+    normalized.push({
+        role: "user",
+        content: [{
+                type: "input_image",
+                image_url: `data:image/png;base64,${screenshotBase64}`,
+            }],
+    });
+    return normalized;
+}
 function buildCuaRequestParams({ cuaModel, previousResponseId, deviceInfo, input }) {
-    const common = {
-        model: cuaModel,
-        previous_response_id: previousResponseId || undefined,
-        input,
-        store: true,
-        truncation: "auto",
-    };
     if (cuaModel === "computer-use-preview") {
         return {
-            ...common,
+            model: cuaModel,
+            previous_response_id: previousResponseId || undefined,
+            input,
+            store: true,
+            truncation: "auto",
             tools: [{
                     type: "computer_use_preview",
                     display_width: deviceInfo.scaled_width,
                     display_height: deviceInfo.scaled_height,
                     environment: "browser",
                 }],
-            reasoning: { generate_summary: "concise" },
+            reasoning: { summary: "concise" },
         };
     }
     return {
-        ...common,
+        model: cuaModel,
+        previous_response_id: previousResponseId || undefined,
+        input,
+        store: true,
+        truncation: "auto",
         tools: [{
                 type: "computer",
             }],
@@ -55,6 +96,9 @@ function mapCuaError(err, cuaModel) {
     }
     return err;
 }
+export function isNonRetryableCuaError(err) {
+    return err?.status === 400 && err?.type === "invalid_request_error";
+}
 function getOpenAI() {
     if (!openai) {
         openai = new OpenAI({
@@ -97,7 +141,10 @@ Output only the revised test script, nothing else.`
 }
 export async function sendCUARequest({ messages, screenshotBase64, previousResponseId, callId, deviceInfo, debugContext, }) {
     const cuaModel = getSelectedCuaModel();
-    const input = [...messages];
+    const includeInitialScreenshot = cuaModel === "computer-use-preview" && !previousResponseId && !callId;
+    const input = cuaModel === "computer-use-preview"
+        ? normalizePreviewMessages(messages, screenshotBase64, includeInitialScreenshot)
+        : [...messages];
     if (callId && screenshotBase64) {
         input.push({
             type: "computer_call_output",

package/build/src/modes/execution-mode.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import { getScreenshotAsBase64, connectToDevice, getDeviceInfo, getCurrentPlatform } from "../device/connection.js";
-import { sendCUARequest } from "../device/openai.js";
+import { isNonRetryableCuaError, sendCUARequest } from "../device/openai.js";
 import { isAssertion, extractAssertionPrompt, buildAssertionSystemPrompt, checkAssertionResult, handleAssertionFailure, handleAssertionSuccess, } from "../device/assertions.js";
 import { isLoadmillInstruction, extractLoadmillCommand, executeLoadmillInstruction, } from "../device/loadmill.js";
 import { logger } from "../utils/logger.js";
@@ -308,6 +308,18 @@ export class ExecutionMode {
                 stack: err.stack
             });
             const addOutput = context.addOutput || ((item) => console.log(item.text || item));
+            if (isNonRetryableCuaError(err)) {
+                const message = `CUA request was rejected by the API: ${err.message}`;
+                this.emit(addOutput, 'error', message, context, stepContext, {
+                    eventType: 'error',
+                    payload: {
+                        message: err.message,
+                        status: err.status,
+                        type: err.type
+                    }
+                });
+                return { success: false, error: message };
+            }
             // Check if we've exceeded max retries
             if (retryCount >= MAX_RETRIES) {
                 emitDesktopDebug("reconnect.attempt", "device", {

package/build/src/utils/step-delay.js ADDED Viewed

@@ -0,0 +1,20 @@
+const DEFAULT_STEP_DELAY_MS = 1000;
+const MAX_STEP_DELAY_MS = 10000;
+export function normalizeStepDelayMs(value) {
+    if (typeof value !== "number" || !Number.isFinite(value)) {
+        return DEFAULT_STEP_DELAY_MS;
+    }
+    const normalized = Math.round(value);
+    if (normalized < 0) {
+        return 0;
+    }
+    if (normalized > MAX_STEP_DELAY_MS) {
+        return MAX_STEP_DELAY_MS;
+    }
+    return normalized;
+}
+export function getConfiguredStepDelayMs() {
+    const raw = Number.parseInt(process.env.DROID_CUA_STEP_DELAY_MS ?? "", 10);
+    return normalizeStepDelayMs(raw);
+}
+export { DEFAULT_STEP_DELAY_MS, MAX_STEP_DELAY_MS };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@loadmill/droid-cua",
-  "version": "2.2.0",
+  "version": "2.2.1",
   "description": "AI-powered Android testing agent using OpenAI's computer-use model and ADB",
   "main": "build/index.js",
   "type": "module",