npm - @loadmill/droid-cua - Versions diffs - 1.1.1 → 2.0.0 - Mend

@loadmill/droid-cua 1.1.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +71 -197
package/build/index.js +2 -0
package/build/src/cli/app.js +60 -3
package/build/src/cli/components/CommandSuggestions.js +46 -6
package/build/src/cli/components/OutputPanel.js +16 -0
package/build/src/cli/device-selector.js +55 -28
package/build/src/commands/help.js +4 -3
package/build/src/core/execution-engine.js +127 -25
package/build/src/core/prompts.js +71 -10
package/build/src/device/actions.js +1 -1
package/build/src/device/android/actions.js +97 -20
package/build/src/device/android/connection.js +176 -73
package/build/src/device/android/tools.js +21 -0
package/build/src/device/assertions.js +28 -6
package/build/src/device/connection.js +2 -2
package/build/src/device/factory.js +1 -1
package/build/src/device/interface.js +6 -2
package/build/src/device/ios/actions.js +87 -26
package/build/src/device/ios/appium-server.js +62 -8
package/build/src/device/ios/connection.js +41 -3
package/build/src/device/loadmill.js +66 -17
package/build/src/device/openai.js +84 -73
package/build/src/integrations/loadmill/client.js +24 -3
package/build/src/integrations/loadmill/executor.js +2 -2
package/build/src/integrations/loadmill/interpreter.js +11 -7
package/build/src/modes/design-mode-ink.js +13 -0
package/build/src/modes/design-mode.js +9 -0
package/build/src/modes/execution-mode.js +225 -29
package/build/src/test-store/test-manager.js +12 -4
package/build/src/utils/cua-debug-tracer.js +362 -0
package/build/src/utils/desktop-debug.js +36 -0
package/package.json +1 -1

package/build/src/device/openai.js CHANGED Viewed

@@ -1,10 +1,68 @@
 import OpenAI from "openai";
-import dotenv from "dotenv";
 import { logger } from "../utils/logger.js";
-dotenv.config();
-const openai = new OpenAI({
-    apiKey: process.env.OPENAI_API_KEY,
-});
+import { CuaDebugTracer } from "../utils/cua-debug-tracer.js";
+let openai = null;
+const cuaDebugTracer = new CuaDebugTracer(logger);
+function getSelectedCuaModel() {
+    return process.env.OPENAI_CUA_MODEL === "computer-use-preview" ? "computer-use-preview" : "gpt-5.4";
+}
+function buildCuaRequestParams({ cuaModel, previousResponseId, deviceInfo, input }) {
+    const common = {
+        model: cuaModel,
+        previous_response_id: previousResponseId || undefined,
+        input,
+        store: true,
+        truncation: "auto",
+    };
+    if (cuaModel === "computer-use-preview") {
+        return {
+            ...common,
+            tools: [{
+                    type: "computer_use_preview",
+                    display_width: deviceInfo.scaled_width,
+                    display_height: deviceInfo.scaled_height,
+                    environment: "browser",
+                }],
+            reasoning: { generate_summary: "concise" },
+        };
+    }
+    return {
+        ...common,
+        tools: [{
+                type: "computer",
+            }],
+    };
+}
+function shouldMapPreviewAccessError(err) {
+    const status = err?.status;
+    const code = typeof err?.code === "string" ? err.code.toLowerCase() : "";
+    const type = typeof err?.type === "string" ? err.type.toLowerCase() : "";
+    const message = typeof err?.message === "string" ? err.message.toLowerCase() : "";
+    if (status === 403 || status === 404)
+        return true;
+    if (code.includes("model_not_found") || code.includes("permission"))
+        return true;
+    if (type.includes("permission"))
+        return true;
+    return (message.includes("computer-use-preview") &&
+        (message.includes("access") || message.includes("permission") || message.includes("not found") || message.includes("unsupported")));
+}
+function mapCuaError(err, cuaModel) {
+    if (cuaModel === "computer-use-preview" && shouldMapPreviewAccessError(err)) {
+        const mapped = new Error("OpenAI API key does not have access to computer-use-preview. Switch to gpt-5.4 in Settings > CUA Model.");
+        mapped.cause = err;
+        return mapped;
+    }
+    return err;
+}
+function getOpenAI() {
+    if (!openai) {
+        openai = new OpenAI({
+            apiKey: process.env.OPENAI_API_KEY,
+        });
+    }
+    return openai;
+}
 /**
  * Revise a test script based on user feedback using simple chat completion
  * @param {string} originalScript - The original test script
@@ -12,7 +70,7 @@ const openai = new OpenAI({
  * @returns {Promise<string>} - The revised test script
  */
 export async function reviseTestScript(originalScript, revisionRequest) {
-    const response = await openai.chat.completions.create({
+    const response = await getOpenAI().chat.completions.create({
         model: "gpt-4o",
         messages: [{
                 role: "system",
@@ -37,7 +95,8 @@ Output only the revised test script, nothing else.`
     });
     return response.choices[0].message.content.trim();
 }
-export async function sendCUARequest({ messages, screenshotBase64, previousResponseId, callId, deviceInfo, }) {
+export async function sendCUARequest({ messages, screenshotBase64, previousResponseId, callId, deviceInfo, debugContext, }) {
+    const cuaModel = getSelectedCuaModel();
     const input = [...messages];
     if (callId && screenshotBase64) {
         input.push({
@@ -49,76 +108,28 @@ export async function sendCUARequest({ messages, screenshotBase64, previousRespo
             },
         });
     }
-    const requestParams = {
-        model: "computer-use-preview",
-        previous_response_id: previousResponseId || undefined,
-        tools: [{
-                type: "computer_use_preview",
-                display_width: deviceInfo.scaled_width,
-                display_height: deviceInfo.scaled_height,
-                environment: "browser",
-            }],
+    const requestParams = buildCuaRequestParams({
+        cuaModel,
+        previousResponseId,
+        deviceInfo,
         input,
-        store: true,
-        reasoning: { generate_summary: "concise" },
-        truncation: "auto",
-    };
-    // Log request details (without full screenshot to avoid clutter)
-    const requestLog = {
-        ...requestParams,
-        input: input.map(item => {
-            if (item.type === "computer_call_output" && item.output?.image_url) {
-                // Extract actual base64 length from the image_url
-                const imageUrl = item.output.image_url;
-                const base64Data = imageUrl.replace('data:image/png;base64,', '');
-                return {
-                    ...item,
-                    output: {
-                        ...item.output,
-                        image_url: `data:image/png;base64,[${base64Data.length} chars]`
-                    },
-                    current_url: item.current_url,
-                    acknowledged_safety_checks: item.acknowledged_safety_checks
-                };
-            }
-            return item;
-        })
-    };
-    logger.debug('CUA Request:', requestLog);
+    });
+    const trace = cuaDebugTracer.startTurn({
+        requestParams,
+        input,
+        screenshotBase64,
+        deviceInfo,
+        debugContext,
+        previousResponseId
+    });
+    logger.debug("CUA Request:", trace.requestLog);
     try {
-        const response = await openai.responses.create(requestParams);
-        // Log ALL output item types to catch everything
-        const outputTypes = (response.output || []).map(item => item.type);
-        const toolCalls = (response.output || [])
-            .filter(item => item.type === 'computer_call')
-            .map(item => ({
-            call_id: item.call_id,
-            action_type: item.action?.type
-        }));
-        const safetyChecks = (response.output || [])
-            .filter(item => item.type === 'pending_safety_check')
-            .map(item => ({
-            id: item.id,
-            code: item.code
-        }));
-        // Log full output array if there are unaccounted items
-        const accountedItems = toolCalls.length + safetyChecks.length;
-        const totalItems = response.output?.length || 0;
-        logger.debug('CUA Response:', {
-            id: response.id,
-            output_length: totalItems,
-            output_types: outputTypes,
-            tool_calls: toolCalls.length > 0 ? toolCalls : 'none',
-            pending_safety_checks: safetyChecks.length > 0 ? safetyChecks : 'none'
-        });
-        // If we're missing items in our logging, log the full output for investigation
-        if (accountedItems < totalItems) {
-            logger.debug('UNACCOUNTED OUTPUT ITEMS - Full output array:', response.output);
-        }
+        const response = await getOpenAI().responses.create(requestParams);
+        cuaDebugTracer.onResponse(trace, response);
         return response;
     }
     catch (err) {
-        logger.error('CUA Request failed', { request: requestLog, error: err });
-        throw err;
+        cuaDebugTracer.onError(trace, err);
+        throw mapCuaError(err, cuaModel);
     }
 }

package/build/src/integrations/loadmill/client.js CHANGED Viewed

@@ -1,9 +1,24 @@
 /**
  * Loadmill API client for interacting with test flows
  */
-import dotenv from "dotenv";
-dotenv.config();
 const DEFAULT_BASE_URL = "https://app.loadmill.com/api";
+function normalizeApiBaseUrl(rawBaseUrl) {
+    const candidate = (rawBaseUrl || DEFAULT_BASE_URL).trim();
+    if (!candidate) {
+        return DEFAULT_BASE_URL;
+    }
+    try {
+        const parsed = new URL(candidate);
+        const pathname = parsed.pathname.replace(/\/+$/, "");
+        parsed.pathname = pathname.endsWith("/api") ? pathname : `${pathname}/api`;
+        parsed.search = "";
+        parsed.hash = "";
+        return parsed.toString().replace(/\/$/, "");
+    }
+    catch {
+        return DEFAULT_BASE_URL;
+    }
+}
 /**
  * Get Loadmill API token from environment
  * @returns {string|null}
@@ -16,7 +31,7 @@ export function getApiToken() {
  * @returns {string}
  */
 export function getBaseUrl() {
-    return process.env.LOADMILL_BASE_URL || DEFAULT_BASE_URL;
+    return normalizeApiBaseUrl(process.env.LOADMILL_BASE_URL);
 }
 /**
  * Make an authenticated request to Loadmill API
@@ -39,6 +54,12 @@ async function apiRequest(endpoint, options = {}) {
             ...options.headers,
         },
     });
+    if (response.status === 401 || response.status === 403) {
+        const unauthorizedHandler = globalThis.__DROID_CUA_HANDLE_LOADMILL_UNAUTHORIZED__;
+        if (typeof unauthorizedHandler === "function") {
+            await unauthorizedHandler();
+        }
+    }
     if (!response.ok) {
         const errorText = await response.text();
         throw new Error(`Loadmill API error (${response.status}): ${errorText}`);

package/build/src/integrations/loadmill/executor.js CHANGED Viewed

@@ -130,9 +130,9 @@ export async function executeLoadmillCommand(userInput, options = {}) {
             };
         }
         // Step 5: Poll for completion
-        onProgress({ step: "polling", message: `Test started (ID: ${runId}). Waiting for completion...` });
+        onProgress({ step: "polling", message: `Test started (ID: ${runId}). Waiting for completion...`, runId });
         const finalResult = await pollForCompletion(runId, (status) => {
-            onProgress({ step: "polling", message: `Status: ${status.status}...` });
+            onProgress({ step: "polling", message: `Status: ${status.status}...`, runId });
         });
         return {
             ...finalResult,

package/build/src/integrations/loadmill/interpreter.js CHANGED Viewed

@@ -2,18 +2,22 @@
  * AI-powered text interpretation for Loadmill commands
  */
 import OpenAI from "openai";
-import dotenv from "dotenv";
-dotenv.config();
-const openai = new OpenAI({
-    apiKey: process.env.OPENAI_API_KEY,
-});
+let openai = null;
+function getOpenAI() {
+    if (!openai) {
+        openai = new OpenAI({
+            apiKey: process.env.OPENAI_API_KEY,
+        });
+    }
+    return openai;
+}
 /**
  * Interpret a natural language Loadmill command into structured data
  * @param {string} userInput - Natural language command
  * @returns {Promise<{searchQuery: string, parameters: Object, action: 'run'|'search'}>}
  */
 export async function interpretLoadmillCommand(userInput) {
-    const response = await openai.chat.completions.create({
+    const response = await getOpenAI().chat.completions.create({
         model: "gpt-4o-mini",
         messages: [
             {
@@ -78,7 +82,7 @@ export async function selectBestFlow(flows, originalQuery) {
         const suite = f.testSuiteDescription || "";
         return `${i + 1}. ID: ${f.id}, Name: "${name}"${suite ? `, Suite: "${suite}"` : ""}`;
     }).join("\n");
-    const response = await openai.chat.completions.create({
+    const response = await getOpenAI().chat.completions.create({
         model: "gpt-4o-mini",
         messages: [
             {

package/build/src/modes/design-mode-ink.js CHANGED Viewed

@@ -20,6 +20,8 @@ export class DesignModeInk {
         this.waitingForInput = false; // Flag to indicate we're explicitly waiting for input
         this.inputResolver = null; // Promise resolver for input
         this.initialUserPrompt = null; // Store initial prompt for error recovery
+        this.consecutiveErrorCount = 0;
+        this.maxConsecutiveErrors = 3;
     }
     /**
      * Start design mode conversation
@@ -202,6 +204,7 @@ export class DesignModeInk {
                     return false; // Continue execution
                 }, this.context);
                 this.session.updateResponseId(newResponseId);
+                this.consecutiveErrorCount = 0;
                 // Clear agent working status
                 if (this.context.setAgentWorking) {
                     this.context.setAgentWorking(false);
@@ -315,6 +318,16 @@ export class DesignModeInk {
                 });
                 // Show user-friendly error message
                 addOutput({ type: 'error', text: `⚠️ Error in design mode: ${err.message}` });
+                this.consecutiveErrorCount += 1;
+                if (this.consecutiveErrorCount > this.maxConsecutiveErrors) {
+                    addOutput({
+                        type: 'error',
+                        text: `Design mode could not recover after ${this.maxConsecutiveErrors} consecutive errors and stopped.`
+                    });
+                    this.conversationActive = false;
+                    this.cleanup();
+                    return;
+                }
                 // Automatic recovery - continue from where we left off using transcript
                 addOutput({ type: 'info', text: 'Recovering from error and continuing...' });
                 // Build recovery context with transcript

package/build/src/modes/design-mode.js CHANGED Viewed

@@ -17,6 +17,8 @@ export class DesignMode {
         this.escPressed = false;
         this.recentActions = []; // Track recent actions for stuck detection
         this.initialUserPrompt = null; // Store initial prompt for error recovery
+        this.consecutiveErrorCount = 0;
+        this.maxConsecutiveErrors = 3;
     }
     /**
      * Start design mode conversation
@@ -167,6 +169,7 @@ export class DesignMode {
                     return false; // Continue execution
                 });
                 this.session.updateResponseId(newResponseId);
+                this.consecutiveErrorCount = 0;
                 // Cleanup ESC detection
                 this.cleanupEscDetection(keypressHandler);
                 // Check if user pressed ESC
@@ -303,6 +306,12 @@ export class DesignMode {
                 });
                 // Show user-friendly error message
                 console.error("\n⚠️ Error in design mode:", err.message);
+                this.consecutiveErrorCount += 1;
+                if (this.consecutiveErrorCount > this.maxConsecutiveErrors) {
+                    console.error(`\nDesign mode could not recover after ${this.maxConsecutiveErrors} consecutive errors and stopped.`);
+                    this.conversationActive = false;
+                    return;
+                }
                 // Automatic recovery - continue from where we left off using transcript
                 console.log("\nRecovering from error and continuing...");
                 // Build recovery context with transcript