@loadmill/droid-cua 1.1.2 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -197
- package/build/index.js +2 -0
- package/build/src/cli/app.js +60 -3
- package/build/src/cli/components/CommandSuggestions.js +46 -6
- package/build/src/cli/components/OutputPanel.js +16 -0
- package/build/src/cli/device-selector.js +60 -28
- package/build/src/commands/help.js +4 -3
- package/build/src/core/execution-engine.js +127 -25
- package/build/src/core/prompts.js +71 -10
- package/build/src/device/actions.js +1 -1
- package/build/src/device/android/actions.js +97 -20
- package/build/src/device/android/connection.js +176 -73
- package/build/src/device/android/tools.js +21 -0
- package/build/src/device/assertions.js +28 -6
- package/build/src/device/connection.js +2 -2
- package/build/src/device/factory.js +1 -1
- package/build/src/device/interface.js +6 -2
- package/build/src/device/ios/actions.js +87 -26
- package/build/src/device/ios/appium-server.js +62 -8
- package/build/src/device/ios/connection.js +41 -3
- package/build/src/device/loadmill.js +66 -17
- package/build/src/device/openai.js +84 -73
- package/build/src/integrations/loadmill/client.js +24 -3
- package/build/src/integrations/loadmill/executor.js +2 -2
- package/build/src/integrations/loadmill/interpreter.js +11 -7
- package/build/src/modes/design-mode-ink.js +13 -0
- package/build/src/modes/design-mode.js +9 -0
- package/build/src/modes/execution-mode.js +225 -29
- package/build/src/utils/cua-debug-tracer.js +362 -0
- package/build/src/utils/desktop-debug.js +36 -0
- package/package.json +1 -1
|
@@ -1,10 +1,68 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
|
-
import dotenv from "dotenv";
|
|
3
2
|
import { logger } from "../utils/logger.js";
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
import { CuaDebugTracer } from "../utils/cua-debug-tracer.js";
|
|
4
|
+
let openai = null;
|
|
5
|
+
const cuaDebugTracer = new CuaDebugTracer(logger);
|
|
6
|
+
function getSelectedCuaModel() {
|
|
7
|
+
return process.env.OPENAI_CUA_MODEL === "computer-use-preview" ? "computer-use-preview" : "gpt-5.4";
|
|
8
|
+
}
|
|
9
|
+
function buildCuaRequestParams({ cuaModel, previousResponseId, deviceInfo, input }) {
|
|
10
|
+
const common = {
|
|
11
|
+
model: cuaModel,
|
|
12
|
+
previous_response_id: previousResponseId || undefined,
|
|
13
|
+
input,
|
|
14
|
+
store: true,
|
|
15
|
+
truncation: "auto",
|
|
16
|
+
};
|
|
17
|
+
if (cuaModel === "computer-use-preview") {
|
|
18
|
+
return {
|
|
19
|
+
...common,
|
|
20
|
+
tools: [{
|
|
21
|
+
type: "computer_use_preview",
|
|
22
|
+
display_width: deviceInfo.scaled_width,
|
|
23
|
+
display_height: deviceInfo.scaled_height,
|
|
24
|
+
environment: "browser",
|
|
25
|
+
}],
|
|
26
|
+
reasoning: { generate_summary: "concise" },
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
return {
|
|
30
|
+
...common,
|
|
31
|
+
tools: [{
|
|
32
|
+
type: "computer",
|
|
33
|
+
}],
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
function shouldMapPreviewAccessError(err) {
|
|
37
|
+
const status = err?.status;
|
|
38
|
+
const code = typeof err?.code === "string" ? err.code.toLowerCase() : "";
|
|
39
|
+
const type = typeof err?.type === "string" ? err.type.toLowerCase() : "";
|
|
40
|
+
const message = typeof err?.message === "string" ? err.message.toLowerCase() : "";
|
|
41
|
+
if (status === 403 || status === 404)
|
|
42
|
+
return true;
|
|
43
|
+
if (code.includes("model_not_found") || code.includes("permission"))
|
|
44
|
+
return true;
|
|
45
|
+
if (type.includes("permission"))
|
|
46
|
+
return true;
|
|
47
|
+
return (message.includes("computer-use-preview") &&
|
|
48
|
+
(message.includes("access") || message.includes("permission") || message.includes("not found") || message.includes("unsupported")));
|
|
49
|
+
}
|
|
50
|
+
function mapCuaError(err, cuaModel) {
|
|
51
|
+
if (cuaModel === "computer-use-preview" && shouldMapPreviewAccessError(err)) {
|
|
52
|
+
const mapped = new Error("OpenAI API key does not have access to computer-use-preview. Switch to gpt-5.4 in Settings > CUA Model.");
|
|
53
|
+
mapped.cause = err;
|
|
54
|
+
return mapped;
|
|
55
|
+
}
|
|
56
|
+
return err;
|
|
57
|
+
}
|
|
58
|
+
function getOpenAI() {
|
|
59
|
+
if (!openai) {
|
|
60
|
+
openai = new OpenAI({
|
|
61
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
return openai;
|
|
65
|
+
}
|
|
8
66
|
/**
|
|
9
67
|
* Revise a test script based on user feedback using simple chat completion
|
|
10
68
|
* @param {string} originalScript - The original test script
|
|
@@ -12,7 +70,7 @@ const openai = new OpenAI({
|
|
|
12
70
|
* @returns {Promise<string>} - The revised test script
|
|
13
71
|
*/
|
|
14
72
|
export async function reviseTestScript(originalScript, revisionRequest) {
|
|
15
|
-
const response = await
|
|
73
|
+
const response = await getOpenAI().chat.completions.create({
|
|
16
74
|
model: "gpt-4o",
|
|
17
75
|
messages: [{
|
|
18
76
|
role: "system",
|
|
@@ -37,7 +95,8 @@ Output only the revised test script, nothing else.`
|
|
|
37
95
|
});
|
|
38
96
|
return response.choices[0].message.content.trim();
|
|
39
97
|
}
|
|
40
|
-
export async function sendCUARequest({ messages, screenshotBase64, previousResponseId, callId, deviceInfo, }) {
|
|
98
|
+
export async function sendCUARequest({ messages, screenshotBase64, previousResponseId, callId, deviceInfo, debugContext, }) {
|
|
99
|
+
const cuaModel = getSelectedCuaModel();
|
|
41
100
|
const input = [...messages];
|
|
42
101
|
if (callId && screenshotBase64) {
|
|
43
102
|
input.push({
|
|
@@ -49,76 +108,28 @@ export async function sendCUARequest({ messages, screenshotBase64, previousRespo
|
|
|
49
108
|
},
|
|
50
109
|
});
|
|
51
110
|
}
|
|
52
|
-
const requestParams = {
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
type: "computer_use_preview",
|
|
57
|
-
display_width: deviceInfo.scaled_width,
|
|
58
|
-
display_height: deviceInfo.scaled_height,
|
|
59
|
-
environment: "browser",
|
|
60
|
-
}],
|
|
111
|
+
const requestParams = buildCuaRequestParams({
|
|
112
|
+
cuaModel,
|
|
113
|
+
previousResponseId,
|
|
114
|
+
deviceInfo,
|
|
61
115
|
input,
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
const imageUrl = item.output.image_url;
|
|
73
|
-
const base64Data = imageUrl.replace('data:image/png;base64,', '');
|
|
74
|
-
return {
|
|
75
|
-
...item,
|
|
76
|
-
output: {
|
|
77
|
-
...item.output,
|
|
78
|
-
image_url: `data:image/png;base64,[${base64Data.length} chars]`
|
|
79
|
-
},
|
|
80
|
-
current_url: item.current_url,
|
|
81
|
-
acknowledged_safety_checks: item.acknowledged_safety_checks
|
|
82
|
-
};
|
|
83
|
-
}
|
|
84
|
-
return item;
|
|
85
|
-
})
|
|
86
|
-
};
|
|
87
|
-
logger.debug('CUA Request:', requestLog);
|
|
116
|
+
});
|
|
117
|
+
const trace = cuaDebugTracer.startTurn({
|
|
118
|
+
requestParams,
|
|
119
|
+
input,
|
|
120
|
+
screenshotBase64,
|
|
121
|
+
deviceInfo,
|
|
122
|
+
debugContext,
|
|
123
|
+
previousResponseId
|
|
124
|
+
});
|
|
125
|
+
logger.debug("CUA Request:", trace.requestLog);
|
|
88
126
|
try {
|
|
89
|
-
const response = await
|
|
90
|
-
|
|
91
|
-
const outputTypes = (response.output || []).map(item => item.type);
|
|
92
|
-
const toolCalls = (response.output || [])
|
|
93
|
-
.filter(item => item.type === 'computer_call')
|
|
94
|
-
.map(item => ({
|
|
95
|
-
call_id: item.call_id,
|
|
96
|
-
action_type: item.action?.type
|
|
97
|
-
}));
|
|
98
|
-
const safetyChecks = (response.output || [])
|
|
99
|
-
.filter(item => item.type === 'pending_safety_check')
|
|
100
|
-
.map(item => ({
|
|
101
|
-
id: item.id,
|
|
102
|
-
code: item.code
|
|
103
|
-
}));
|
|
104
|
-
// Log full output array if there are unaccounted items
|
|
105
|
-
const accountedItems = toolCalls.length + safetyChecks.length;
|
|
106
|
-
const totalItems = response.output?.length || 0;
|
|
107
|
-
logger.debug('CUA Response:', {
|
|
108
|
-
id: response.id,
|
|
109
|
-
output_length: totalItems,
|
|
110
|
-
output_types: outputTypes,
|
|
111
|
-
tool_calls: toolCalls.length > 0 ? toolCalls : 'none',
|
|
112
|
-
pending_safety_checks: safetyChecks.length > 0 ? safetyChecks : 'none'
|
|
113
|
-
});
|
|
114
|
-
// If we're missing items in our logging, log the full output for investigation
|
|
115
|
-
if (accountedItems < totalItems) {
|
|
116
|
-
logger.debug('UNACCOUNTED OUTPUT ITEMS - Full output array:', response.output);
|
|
117
|
-
}
|
|
127
|
+
const response = await getOpenAI().responses.create(requestParams);
|
|
128
|
+
cuaDebugTracer.onResponse(trace, response);
|
|
118
129
|
return response;
|
|
119
130
|
}
|
|
120
131
|
catch (err) {
|
|
121
|
-
|
|
122
|
-
throw err;
|
|
132
|
+
cuaDebugTracer.onError(trace, err);
|
|
133
|
+
throw mapCuaError(err, cuaModel);
|
|
123
134
|
}
|
|
124
135
|
}
|
|
@@ -1,9 +1,24 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Loadmill API client for interacting with test flows
|
|
3
3
|
*/
|
|
4
|
-
import dotenv from "dotenv";
|
|
5
|
-
dotenv.config();
|
|
6
4
|
const DEFAULT_BASE_URL = "https://app.loadmill.com/api";
|
|
5
|
+
function normalizeApiBaseUrl(rawBaseUrl) {
|
|
6
|
+
const candidate = (rawBaseUrl || DEFAULT_BASE_URL).trim();
|
|
7
|
+
if (!candidate) {
|
|
8
|
+
return DEFAULT_BASE_URL;
|
|
9
|
+
}
|
|
10
|
+
try {
|
|
11
|
+
const parsed = new URL(candidate);
|
|
12
|
+
const pathname = parsed.pathname.replace(/\/+$/, "");
|
|
13
|
+
parsed.pathname = pathname.endsWith("/api") ? pathname : `${pathname}/api`;
|
|
14
|
+
parsed.search = "";
|
|
15
|
+
parsed.hash = "";
|
|
16
|
+
return parsed.toString().replace(/\/$/, "");
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
return DEFAULT_BASE_URL;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
7
22
|
/**
|
|
8
23
|
* Get Loadmill API token from environment
|
|
9
24
|
* @returns {string|null}
|
|
@@ -16,7 +31,7 @@ export function getApiToken() {
|
|
|
16
31
|
* @returns {string}
|
|
17
32
|
*/
|
|
18
33
|
export function getBaseUrl() {
|
|
19
|
-
return process.env.LOADMILL_BASE_URL
|
|
34
|
+
return normalizeApiBaseUrl(process.env.LOADMILL_BASE_URL);
|
|
20
35
|
}
|
|
21
36
|
/**
|
|
22
37
|
* Make an authenticated request to Loadmill API
|
|
@@ -39,6 +54,12 @@ async function apiRequest(endpoint, options = {}) {
|
|
|
39
54
|
...options.headers,
|
|
40
55
|
},
|
|
41
56
|
});
|
|
57
|
+
if (response.status === 401 || response.status === 403) {
|
|
58
|
+
const unauthorizedHandler = globalThis.__DROID_CUA_HANDLE_LOADMILL_UNAUTHORIZED__;
|
|
59
|
+
if (typeof unauthorizedHandler === "function") {
|
|
60
|
+
await unauthorizedHandler();
|
|
61
|
+
}
|
|
62
|
+
}
|
|
42
63
|
if (!response.ok) {
|
|
43
64
|
const errorText = await response.text();
|
|
44
65
|
throw new Error(`Loadmill API error (${response.status}): ${errorText}`);
|
|
@@ -130,9 +130,9 @@ export async function executeLoadmillCommand(userInput, options = {}) {
|
|
|
130
130
|
};
|
|
131
131
|
}
|
|
132
132
|
// Step 5: Poll for completion
|
|
133
|
-
onProgress({ step: "polling", message: `Test started (ID: ${runId}). Waiting for completion
|
|
133
|
+
onProgress({ step: "polling", message: `Test started (ID: ${runId}). Waiting for completion...`, runId });
|
|
134
134
|
const finalResult = await pollForCompletion(runId, (status) => {
|
|
135
|
-
onProgress({ step: "polling", message: `Status: ${status.status}
|
|
135
|
+
onProgress({ step: "polling", message: `Status: ${status.status}...`, runId });
|
|
136
136
|
});
|
|
137
137
|
return {
|
|
138
138
|
...finalResult,
|
|
@@ -2,18 +2,22 @@
|
|
|
2
2
|
* AI-powered text interpretation for Loadmill commands
|
|
3
3
|
*/
|
|
4
4
|
import OpenAI from "openai";
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
let openai = null;
|
|
6
|
+
function getOpenAI() {
|
|
7
|
+
if (!openai) {
|
|
8
|
+
openai = new OpenAI({
|
|
9
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
10
|
+
});
|
|
11
|
+
}
|
|
12
|
+
return openai;
|
|
13
|
+
}
|
|
10
14
|
/**
|
|
11
15
|
* Interpret a natural language Loadmill command into structured data
|
|
12
16
|
* @param {string} userInput - Natural language command
|
|
13
17
|
* @returns {Promise<{searchQuery: string, parameters: Object, action: 'run'|'search'}>}
|
|
14
18
|
*/
|
|
15
19
|
export async function interpretLoadmillCommand(userInput) {
|
|
16
|
-
const response = await
|
|
20
|
+
const response = await getOpenAI().chat.completions.create({
|
|
17
21
|
model: "gpt-4o-mini",
|
|
18
22
|
messages: [
|
|
19
23
|
{
|
|
@@ -78,7 +82,7 @@ export async function selectBestFlow(flows, originalQuery) {
|
|
|
78
82
|
const suite = f.testSuiteDescription || "";
|
|
79
83
|
return `${i + 1}. ID: ${f.id}, Name: "${name}"${suite ? `, Suite: "${suite}"` : ""}`;
|
|
80
84
|
}).join("\n");
|
|
81
|
-
const response = await
|
|
85
|
+
const response = await getOpenAI().chat.completions.create({
|
|
82
86
|
model: "gpt-4o-mini",
|
|
83
87
|
messages: [
|
|
84
88
|
{
|
|
@@ -20,6 +20,8 @@ export class DesignModeInk {
|
|
|
20
20
|
this.waitingForInput = false; // Flag to indicate we're explicitly waiting for input
|
|
21
21
|
this.inputResolver = null; // Promise resolver for input
|
|
22
22
|
this.initialUserPrompt = null; // Store initial prompt for error recovery
|
|
23
|
+
this.consecutiveErrorCount = 0;
|
|
24
|
+
this.maxConsecutiveErrors = 3;
|
|
23
25
|
}
|
|
24
26
|
/**
|
|
25
27
|
* Start design mode conversation
|
|
@@ -202,6 +204,7 @@ export class DesignModeInk {
|
|
|
202
204
|
return false; // Continue execution
|
|
203
205
|
}, this.context);
|
|
204
206
|
this.session.updateResponseId(newResponseId);
|
|
207
|
+
this.consecutiveErrorCount = 0;
|
|
205
208
|
// Clear agent working status
|
|
206
209
|
if (this.context.setAgentWorking) {
|
|
207
210
|
this.context.setAgentWorking(false);
|
|
@@ -315,6 +318,16 @@ export class DesignModeInk {
|
|
|
315
318
|
});
|
|
316
319
|
// Show user-friendly error message
|
|
317
320
|
addOutput({ type: 'error', text: `⚠️ Error in design mode: ${err.message}` });
|
|
321
|
+
this.consecutiveErrorCount += 1;
|
|
322
|
+
if (this.consecutiveErrorCount > this.maxConsecutiveErrors) {
|
|
323
|
+
addOutput({
|
|
324
|
+
type: 'error',
|
|
325
|
+
text: `Design mode could not recover after ${this.maxConsecutiveErrors} consecutive errors and stopped.`
|
|
326
|
+
});
|
|
327
|
+
this.conversationActive = false;
|
|
328
|
+
this.cleanup();
|
|
329
|
+
return;
|
|
330
|
+
}
|
|
318
331
|
// Automatic recovery - continue from where we left off using transcript
|
|
319
332
|
addOutput({ type: 'info', text: 'Recovering from error and continuing...' });
|
|
320
333
|
// Build recovery context with transcript
|
|
@@ -17,6 +17,8 @@ export class DesignMode {
|
|
|
17
17
|
this.escPressed = false;
|
|
18
18
|
this.recentActions = []; // Track recent actions for stuck detection
|
|
19
19
|
this.initialUserPrompt = null; // Store initial prompt for error recovery
|
|
20
|
+
this.consecutiveErrorCount = 0;
|
|
21
|
+
this.maxConsecutiveErrors = 3;
|
|
20
22
|
}
|
|
21
23
|
/**
|
|
22
24
|
* Start design mode conversation
|
|
@@ -167,6 +169,7 @@ export class DesignMode {
|
|
|
167
169
|
return false; // Continue execution
|
|
168
170
|
});
|
|
169
171
|
this.session.updateResponseId(newResponseId);
|
|
172
|
+
this.consecutiveErrorCount = 0;
|
|
170
173
|
// Cleanup ESC detection
|
|
171
174
|
this.cleanupEscDetection(keypressHandler);
|
|
172
175
|
// Check if user pressed ESC
|
|
@@ -303,6 +306,12 @@ export class DesignMode {
|
|
|
303
306
|
});
|
|
304
307
|
// Show user-friendly error message
|
|
305
308
|
console.error("\n⚠️ Error in design mode:", err.message);
|
|
309
|
+
this.consecutiveErrorCount += 1;
|
|
310
|
+
if (this.consecutiveErrorCount > this.maxConsecutiveErrors) {
|
|
311
|
+
console.error(`\nDesign mode could not recover after ${this.maxConsecutiveErrors} consecutive errors and stopped.`);
|
|
312
|
+
this.conversationActive = false;
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
306
315
|
// Automatic recovery - continue from where we left off using transcript
|
|
307
316
|
console.log("\nRecovering from error and continuing...");
|
|
308
317
|
// Build recovery context with transcript
|