@loadmill/droid-cua 1.1.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +71 -197
  2. package/build/index.js +2 -0
  3. package/build/src/cli/app.js +60 -3
  4. package/build/src/cli/components/CommandSuggestions.js +46 -6
  5. package/build/src/cli/components/OutputPanel.js +16 -0
  6. package/build/src/cli/device-selector.js +55 -28
  7. package/build/src/commands/help.js +4 -3
  8. package/build/src/core/execution-engine.js +127 -25
  9. package/build/src/core/prompts.js +71 -10
  10. package/build/src/device/actions.js +1 -1
  11. package/build/src/device/android/actions.js +97 -20
  12. package/build/src/device/android/connection.js +176 -73
  13. package/build/src/device/android/tools.js +21 -0
  14. package/build/src/device/assertions.js +28 -6
  15. package/build/src/device/connection.js +2 -2
  16. package/build/src/device/factory.js +1 -1
  17. package/build/src/device/interface.js +6 -2
  18. package/build/src/device/ios/actions.js +87 -26
  19. package/build/src/device/ios/appium-server.js +62 -8
  20. package/build/src/device/ios/connection.js +41 -3
  21. package/build/src/device/loadmill.js +66 -17
  22. package/build/src/device/openai.js +84 -73
  23. package/build/src/integrations/loadmill/client.js +24 -3
  24. package/build/src/integrations/loadmill/executor.js +2 -2
  25. package/build/src/integrations/loadmill/interpreter.js +11 -7
  26. package/build/src/modes/design-mode-ink.js +13 -0
  27. package/build/src/modes/design-mode.js +9 -0
  28. package/build/src/modes/execution-mode.js +225 -29
  29. package/build/src/test-store/test-manager.js +12 -4
  30. package/build/src/utils/cua-debug-tracer.js +362 -0
  31. package/build/src/utils/desktop-debug.js +36 -0
  32. package/package.json +1 -1
@@ -1,10 +1,68 @@
1
1
  import OpenAI from "openai";
2
- import dotenv from "dotenv";
3
2
  import { logger } from "../utils/logger.js";
4
- dotenv.config();
5
- const openai = new OpenAI({
6
- apiKey: process.env.OPENAI_API_KEY,
7
- });
3
+ import { CuaDebugTracer } from "../utils/cua-debug-tracer.js";
4
+ let openai = null;
5
+ const cuaDebugTracer = new CuaDebugTracer(logger);
6
+ function getSelectedCuaModel() {
7
+ return process.env.OPENAI_CUA_MODEL === "computer-use-preview" ? "computer-use-preview" : "gpt-5.4";
8
+ }
9
+ function buildCuaRequestParams({ cuaModel, previousResponseId, deviceInfo, input }) {
10
+ const common = {
11
+ model: cuaModel,
12
+ previous_response_id: previousResponseId || undefined,
13
+ input,
14
+ store: true,
15
+ truncation: "auto",
16
+ };
17
+ if (cuaModel === "computer-use-preview") {
18
+ return {
19
+ ...common,
20
+ tools: [{
21
+ type: "computer_use_preview",
22
+ display_width: deviceInfo.scaled_width,
23
+ display_height: deviceInfo.scaled_height,
24
+ environment: "browser",
25
+ }],
26
+ reasoning: { generate_summary: "concise" },
27
+ };
28
+ }
29
+ return {
30
+ ...common,
31
+ tools: [{
32
+ type: "computer",
33
+ }],
34
+ };
35
+ }
36
+ function shouldMapPreviewAccessError(err) {
37
+ const status = err?.status;
38
+ const code = typeof err?.code === "string" ? err.code.toLowerCase() : "";
39
+ const type = typeof err?.type === "string" ? err.type.toLowerCase() : "";
40
+ const message = typeof err?.message === "string" ? err.message.toLowerCase() : "";
41
+ if (status === 403 || status === 404)
42
+ return true;
43
+ if (code.includes("model_not_found") || code.includes("permission"))
44
+ return true;
45
+ if (type.includes("permission"))
46
+ return true;
47
+ return (message.includes("computer-use-preview") &&
48
+ (message.includes("access") || message.includes("permission") || message.includes("not found") || message.includes("unsupported")));
49
+ }
50
+ function mapCuaError(err, cuaModel) {
51
+ if (cuaModel === "computer-use-preview" && shouldMapPreviewAccessError(err)) {
52
+ const mapped = new Error("OpenAI API key does not have access to computer-use-preview. Switch to gpt-5.4 in Settings > CUA Model.");
53
+ mapped.cause = err;
54
+ return mapped;
55
+ }
56
+ return err;
57
+ }
58
+ function getOpenAI() {
59
+ if (!openai) {
60
+ openai = new OpenAI({
61
+ apiKey: process.env.OPENAI_API_KEY,
62
+ });
63
+ }
64
+ return openai;
65
+ }
8
66
  /**
9
67
  * Revise a test script based on user feedback using simple chat completion
10
68
  * @param {string} originalScript - The original test script
@@ -12,7 +70,7 @@ const openai = new OpenAI({
12
70
  * @returns {Promise<string>} - The revised test script
13
71
  */
14
72
  export async function reviseTestScript(originalScript, revisionRequest) {
15
- const response = await openai.chat.completions.create({
73
+ const response = await getOpenAI().chat.completions.create({
16
74
  model: "gpt-4o",
17
75
  messages: [{
18
76
  role: "system",
@@ -37,7 +95,8 @@ Output only the revised test script, nothing else.`
37
95
  });
38
96
  return response.choices[0].message.content.trim();
39
97
  }
40
- export async function sendCUARequest({ messages, screenshotBase64, previousResponseId, callId, deviceInfo, }) {
98
+ export async function sendCUARequest({ messages, screenshotBase64, previousResponseId, callId, deviceInfo, debugContext, }) {
99
+ const cuaModel = getSelectedCuaModel();
41
100
  const input = [...messages];
42
101
  if (callId && screenshotBase64) {
43
102
  input.push({
@@ -49,76 +108,28 @@ export async function sendCUARequest({ messages, screenshotBase64, previousRespo
49
108
  },
50
109
  });
51
110
  }
52
- const requestParams = {
53
- model: "computer-use-preview",
54
- previous_response_id: previousResponseId || undefined,
55
- tools: [{
56
- type: "computer_use_preview",
57
- display_width: deviceInfo.scaled_width,
58
- display_height: deviceInfo.scaled_height,
59
- environment: "browser",
60
- }],
111
+ const requestParams = buildCuaRequestParams({
112
+ cuaModel,
113
+ previousResponseId,
114
+ deviceInfo,
61
115
  input,
62
- store: true,
63
- reasoning: { generate_summary: "concise" },
64
- truncation: "auto",
65
- };
66
- // Log request details (without full screenshot to avoid clutter)
67
- const requestLog = {
68
- ...requestParams,
69
- input: input.map(item => {
70
- if (item.type === "computer_call_output" && item.output?.image_url) {
71
- // Extract actual base64 length from the image_url
72
- const imageUrl = item.output.image_url;
73
- const base64Data = imageUrl.replace('data:image/png;base64,', '');
74
- return {
75
- ...item,
76
- output: {
77
- ...item.output,
78
- image_url: `data:image/png;base64,[${base64Data.length} chars]`
79
- },
80
- current_url: item.current_url,
81
- acknowledged_safety_checks: item.acknowledged_safety_checks
82
- };
83
- }
84
- return item;
85
- })
86
- };
87
- logger.debug('CUA Request:', requestLog);
116
+ });
117
+ const trace = cuaDebugTracer.startTurn({
118
+ requestParams,
119
+ input,
120
+ screenshotBase64,
121
+ deviceInfo,
122
+ debugContext,
123
+ previousResponseId
124
+ });
125
+ logger.debug("CUA Request:", trace.requestLog);
88
126
  try {
89
- const response = await openai.responses.create(requestParams);
90
- // Log ALL output item types to catch everything
91
- const outputTypes = (response.output || []).map(item => item.type);
92
- const toolCalls = (response.output || [])
93
- .filter(item => item.type === 'computer_call')
94
- .map(item => ({
95
- call_id: item.call_id,
96
- action_type: item.action?.type
97
- }));
98
- const safetyChecks = (response.output || [])
99
- .filter(item => item.type === 'pending_safety_check')
100
- .map(item => ({
101
- id: item.id,
102
- code: item.code
103
- }));
104
- // Log full output array if there are unaccounted items
105
- const accountedItems = toolCalls.length + safetyChecks.length;
106
- const totalItems = response.output?.length || 0;
107
- logger.debug('CUA Response:', {
108
- id: response.id,
109
- output_length: totalItems,
110
- output_types: outputTypes,
111
- tool_calls: toolCalls.length > 0 ? toolCalls : 'none',
112
- pending_safety_checks: safetyChecks.length > 0 ? safetyChecks : 'none'
113
- });
114
- // If we're missing items in our logging, log the full output for investigation
115
- if (accountedItems < totalItems) {
116
- logger.debug('UNACCOUNTED OUTPUT ITEMS - Full output array:', response.output);
117
- }
127
+ const response = await getOpenAI().responses.create(requestParams);
128
+ cuaDebugTracer.onResponse(trace, response);
118
129
  return response;
119
130
  }
120
131
  catch (err) {
121
- logger.error('CUA Request failed', { request: requestLog, error: err });
122
- throw err;
132
+ cuaDebugTracer.onError(trace, err);
133
+ throw mapCuaError(err, cuaModel);
123
134
  }
124
135
  }
@@ -1,9 +1,24 @@
1
1
  /**
2
2
  * Loadmill API client for interacting with test flows
3
3
  */
4
- import dotenv from "dotenv";
5
- dotenv.config();
6
4
  const DEFAULT_BASE_URL = "https://app.loadmill.com/api";
5
+ function normalizeApiBaseUrl(rawBaseUrl) {
6
+ const candidate = (rawBaseUrl || DEFAULT_BASE_URL).trim();
7
+ if (!candidate) {
8
+ return DEFAULT_BASE_URL;
9
+ }
10
+ try {
11
+ const parsed = new URL(candidate);
12
+ const pathname = parsed.pathname.replace(/\/+$/, "");
13
+ parsed.pathname = pathname.endsWith("/api") ? pathname : `${pathname}/api`;
14
+ parsed.search = "";
15
+ parsed.hash = "";
16
+ return parsed.toString().replace(/\/$/, "");
17
+ }
18
+ catch {
19
+ return DEFAULT_BASE_URL;
20
+ }
21
+ }
7
22
  /**
8
23
  * Get Loadmill API token from environment
9
24
  * @returns {string|null}
@@ -16,7 +31,7 @@ export function getApiToken() {
16
31
  * @returns {string}
17
32
  */
18
33
  export function getBaseUrl() {
19
- return process.env.LOADMILL_BASE_URL || DEFAULT_BASE_URL;
34
+ return normalizeApiBaseUrl(process.env.LOADMILL_BASE_URL);
20
35
  }
21
36
  /**
22
37
  * Make an authenticated request to Loadmill API
@@ -39,6 +54,12 @@ async function apiRequest(endpoint, options = {}) {
39
54
  ...options.headers,
40
55
  },
41
56
  });
57
+ if (response.status === 401 || response.status === 403) {
58
+ const unauthorizedHandler = globalThis.__DROID_CUA_HANDLE_LOADMILL_UNAUTHORIZED__;
59
+ if (typeof unauthorizedHandler === "function") {
60
+ await unauthorizedHandler();
61
+ }
62
+ }
42
63
  if (!response.ok) {
43
64
  const errorText = await response.text();
44
65
  throw new Error(`Loadmill API error (${response.status}): ${errorText}`);
@@ -130,9 +130,9 @@ export async function executeLoadmillCommand(userInput, options = {}) {
130
130
  };
131
131
  }
132
132
  // Step 5: Poll for completion
133
- onProgress({ step: "polling", message: `Test started (ID: ${runId}). Waiting for completion...` });
133
+ onProgress({ step: "polling", message: `Test started (ID: ${runId}). Waiting for completion...`, runId });
134
134
  const finalResult = await pollForCompletion(runId, (status) => {
135
- onProgress({ step: "polling", message: `Status: ${status.status}...` });
135
+ onProgress({ step: "polling", message: `Status: ${status.status}...`, runId });
136
136
  });
137
137
  return {
138
138
  ...finalResult,
@@ -2,18 +2,22 @@
2
2
  * AI-powered text interpretation for Loadmill commands
3
3
  */
4
4
  import OpenAI from "openai";
5
- import dotenv from "dotenv";
6
- dotenv.config();
7
- const openai = new OpenAI({
8
- apiKey: process.env.OPENAI_API_KEY,
9
- });
5
+ let openai = null;
6
+ function getOpenAI() {
7
+ if (!openai) {
8
+ openai = new OpenAI({
9
+ apiKey: process.env.OPENAI_API_KEY,
10
+ });
11
+ }
12
+ return openai;
13
+ }
10
14
  /**
11
15
  * Interpret a natural language Loadmill command into structured data
12
16
  * @param {string} userInput - Natural language command
13
17
  * @returns {Promise<{searchQuery: string, parameters: Object, action: 'run'|'search'}>}
14
18
  */
15
19
  export async function interpretLoadmillCommand(userInput) {
16
- const response = await openai.chat.completions.create({
20
+ const response = await getOpenAI().chat.completions.create({
17
21
  model: "gpt-4o-mini",
18
22
  messages: [
19
23
  {
@@ -78,7 +82,7 @@ export async function selectBestFlow(flows, originalQuery) {
78
82
  const suite = f.testSuiteDescription || "";
79
83
  return `${i + 1}. ID: ${f.id}, Name: "${name}"${suite ? `, Suite: "${suite}"` : ""}`;
80
84
  }).join("\n");
81
- const response = await openai.chat.completions.create({
85
+ const response = await getOpenAI().chat.completions.create({
82
86
  model: "gpt-4o-mini",
83
87
  messages: [
84
88
  {
@@ -20,6 +20,8 @@ export class DesignModeInk {
20
20
  this.waitingForInput = false; // Flag to indicate we're explicitly waiting for input
21
21
  this.inputResolver = null; // Promise resolver for input
22
22
  this.initialUserPrompt = null; // Store initial prompt for error recovery
23
+ this.consecutiveErrorCount = 0;
24
+ this.maxConsecutiveErrors = 3;
23
25
  }
24
26
  /**
25
27
  * Start design mode conversation
@@ -202,6 +204,7 @@ export class DesignModeInk {
202
204
  return false; // Continue execution
203
205
  }, this.context);
204
206
  this.session.updateResponseId(newResponseId);
207
+ this.consecutiveErrorCount = 0;
205
208
  // Clear agent working status
206
209
  if (this.context.setAgentWorking) {
207
210
  this.context.setAgentWorking(false);
@@ -315,6 +318,16 @@ export class DesignModeInk {
315
318
  });
316
319
  // Show user-friendly error message
317
320
  addOutput({ type: 'error', text: `⚠️ Error in design mode: ${err.message}` });
321
+ this.consecutiveErrorCount += 1;
322
+ if (this.consecutiveErrorCount > this.maxConsecutiveErrors) {
323
+ addOutput({
324
+ type: 'error',
325
+ text: `Design mode could not recover after ${this.maxConsecutiveErrors} consecutive errors and stopped.`
326
+ });
327
+ this.conversationActive = false;
328
+ this.cleanup();
329
+ return;
330
+ }
318
331
  // Automatic recovery - continue from where we left off using transcript
319
332
  addOutput({ type: 'info', text: 'Recovering from error and continuing...' });
320
333
  // Build recovery context with transcript
@@ -17,6 +17,8 @@ export class DesignMode {
17
17
  this.escPressed = false;
18
18
  this.recentActions = []; // Track recent actions for stuck detection
19
19
  this.initialUserPrompt = null; // Store initial prompt for error recovery
20
+ this.consecutiveErrorCount = 0;
21
+ this.maxConsecutiveErrors = 3;
20
22
  }
21
23
  /**
22
24
  * Start design mode conversation
@@ -167,6 +169,7 @@ export class DesignMode {
167
169
  return false; // Continue execution
168
170
  });
169
171
  this.session.updateResponseId(newResponseId);
172
+ this.consecutiveErrorCount = 0;
170
173
  // Cleanup ESC detection
171
174
  this.cleanupEscDetection(keypressHandler);
172
175
  // Check if user pressed ESC
@@ -303,6 +306,12 @@ export class DesignMode {
303
306
  });
304
307
  // Show user-friendly error message
305
308
  console.error("\n⚠️ Error in design mode:", err.message);
309
+ this.consecutiveErrorCount += 1;
310
+ if (this.consecutiveErrorCount > this.maxConsecutiveErrors) {
311
+ console.error(`\nDesign mode could not recover after ${this.maxConsecutiveErrors} consecutive errors and stopped.`);
312
+ this.conversationActive = false;
313
+ return;
314
+ }
306
315
  // Automatic recovery - continue from where we left off using transcript
307
316
  console.log("\nRecovering from error and continuing...");
308
317
  // Build recovery context with transcript