@loadmill/droid-cua 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -0
- package/README.md +227 -0
- package/bin/droid-cua +6 -0
- package/build/index.js +58 -0
- package/build/src/cli/app.js +115 -0
- package/build/src/cli/command-parser.js +57 -0
- package/build/src/cli/components/AgentStatus.js +21 -0
- package/build/src/cli/components/CommandSuggestions.js +33 -0
- package/build/src/cli/components/InputPanel.js +21 -0
- package/build/src/cli/components/OutputPanel.js +58 -0
- package/build/src/cli/components/StatusBar.js +22 -0
- package/build/src/cli/ink-shell.js +56 -0
- package/build/src/commands/create.js +42 -0
- package/build/src/commands/edit.js +61 -0
- package/build/src/commands/exit.js +20 -0
- package/build/src/commands/help.js +34 -0
- package/build/src/commands/index.js +49 -0
- package/build/src/commands/list.js +55 -0
- package/build/src/commands/run.js +112 -0
- package/build/src/commands/stop.js +32 -0
- package/build/src/commands/view.js +43 -0
- package/build/src/core/execution-engine.js +114 -0
- package/build/src/core/prompts.js +158 -0
- package/build/src/core/session.js +57 -0
- package/build/src/device/actions.js +81 -0
- package/build/src/device/assertions.js +75 -0
- package/build/src/device/connection.js +123 -0
- package/build/src/device/openai.js +124 -0
- package/build/src/modes/design-mode-ink.js +396 -0
- package/build/src/modes/design-mode.js +366 -0
- package/build/src/modes/execution-mode.js +165 -0
- package/build/src/test-store/test-manager.js +92 -0
- package/build/src/utils/logger.js +86 -0
- package/package.json +68 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System prompt templates for different modes
|
|
3
|
+
*/
|
|
4
|
+
export function buildBaseSystemPrompt(deviceInfo) {
|
|
5
|
+
return `
|
|
6
|
+
You are controlling an Android phone in a sandboxed testing environment.
|
|
7
|
+
Follow the user's instructions to interact with the device.
|
|
8
|
+
|
|
9
|
+
The device screen has been scaled down for display.
|
|
10
|
+
You can interact with any part of the visible phone screen, including system UI, browser UI, and app content.
|
|
11
|
+
|
|
12
|
+
The screen you see is ${deviceInfo.scaled_width} x ${deviceInfo.scaled_height} pixels.
|
|
13
|
+
Pixel (0,0) is at the top-left corner.
|
|
14
|
+
|
|
15
|
+
When aiming for visual targets:
|
|
16
|
+
- Reason carefully about the approximate pixel position.
|
|
17
|
+
- Click precisely based on your visual estimate.
|
|
18
|
+
|
|
19
|
+
Available actions: click, scroll, type, keypress, wait, screenshot.
|
|
20
|
+
|
|
21
|
+
CRITICAL - Automatic Timing:
|
|
22
|
+
- After EVERY action (click, type, keypress, scroll), there is an automatic 500ms delay
|
|
23
|
+
- This 500ms is sufficient for normal UI updates and animations
|
|
24
|
+
- DO NOT add 'wait' actions unnecessarily - trust the automatic delay
|
|
25
|
+
|
|
26
|
+
Use explicit 'wait' action ONLY in these specific cases:
|
|
27
|
+
1. After launching apps from home screen or app drawer
|
|
28
|
+
2. After pressing ENTER that triggers navigation (search, URL, form submit)
|
|
29
|
+
3. After clicking links that open new apps or pages
|
|
30
|
+
4. After actions that trigger heavy loading (camera, maps, etc.)
|
|
31
|
+
|
|
32
|
+
When you MUST wait:
|
|
33
|
+
- Click app icon from home → wait → Continue
|
|
34
|
+
- Type in search box → Press ENTER → wait → Continue
|
|
35
|
+
- Click link that opens new page/app → wait → Continue
|
|
36
|
+
- Open camera/maps/heavy feature → wait → Continue
|
|
37
|
+
|
|
38
|
+
When you should NOT wait (automatic 500ms handles it):
|
|
39
|
+
- Clicking UI buttons within a running app (click button - no wait needed)
|
|
40
|
+
- Typing in text fields (type text - no wait needed)
|
|
41
|
+
- Scrolling (scroll - no wait needed)
|
|
42
|
+
- Clicking tabs or menu items within an app (click - no wait needed)
|
|
43
|
+
|
|
44
|
+
Rule of thumb: Wait for app launches and navigation. Everything else has automatic timing.
|
|
45
|
+
|
|
46
|
+
Perform the user's requested actions within the current view.
|
|
47
|
+
|
|
48
|
+
If unsure about visual elements, take a screenshot to improve your reasoning.
|
|
49
|
+
If unsure about the user's intent, make the best decision you can based on context and continue automatically.
|
|
50
|
+
|
|
51
|
+
CRITICAL - Never Ask Questions:
|
|
52
|
+
- NEVER ask the user for confirmation, clarification, or next steps
|
|
53
|
+
- NEVER ask questions like "Should I...", "Would you like...", "Do you want me to..."
|
|
54
|
+
- NEVER wait for user guidance - make autonomous decisions
|
|
55
|
+
- If stuck, try alternative approaches (go back, try different UI element, restart app)
|
|
56
|
+
- ONLY stop when the task is complete or you've exhausted reasonable approaches
|
|
57
|
+
|
|
58
|
+
Act decisively to complete the task.
|
|
59
|
+
|
|
60
|
+
Stop acting once the task appears complete.
|
|
61
|
+
Only complete the current instruction. Do not proceed beyond the current step unless asked.
|
|
62
|
+
|
|
63
|
+
Mobile-Specific Notes:
|
|
64
|
+
- ESC key maps to the Home button (return to home screen)
|
|
65
|
+
- Use Home button (ESC) to escape from stuck situations and restart
|
|
66
|
+
- Back button navigates within apps
|
|
67
|
+
`;
|
|
68
|
+
}
|
|
69
|
+
export function buildDesignModePrompt(deviceInfo) {
|
|
70
|
+
const basePrompt = buildBaseSystemPrompt(deviceInfo);
|
|
71
|
+
return `${basePrompt}
|
|
72
|
+
|
|
73
|
+
DESIGN MODE:
|
|
74
|
+
You are helping design a test script for an Android app.
|
|
75
|
+
|
|
76
|
+
Your task:
|
|
77
|
+
1. Understand what the user wants to test from their initial instruction
|
|
78
|
+
2. Explore the app autonomously to understand the flows
|
|
79
|
+
3. Take screenshots and interact as needed to discover the UI and behavior
|
|
80
|
+
4. Once you've successfully completed the user's requested flow, immediately generate the test script
|
|
81
|
+
|
|
82
|
+
CRITICAL - After Completing the Task:
|
|
83
|
+
- DO NOT navigate back or away from the final screen
|
|
84
|
+
- The final screen state is what matters for verification
|
|
85
|
+
- Generate the test script immediately showing the current state
|
|
86
|
+
- Use assertions to verify state, not navigation
|
|
87
|
+
- "Check that it changed" means verify the current visual state, not navigate elsewhere
|
|
88
|
+
|
|
89
|
+
CRITICAL - Recognizing When You Are Stuck:
|
|
90
|
+
If you find yourself:
|
|
91
|
+
- Repeating similar actions multiple times (e.g., opening/closing the same app repeatedly)
|
|
92
|
+
- Not reaching a new screen or state after several attempts
|
|
93
|
+
- Unsure about a higher-level decision (which tab to use, which mode to enter, where to start)
|
|
94
|
+
- Unable to find the UI element or feature the user mentioned
|
|
95
|
+
|
|
96
|
+
THEN STOP ACTING IMMEDIATELY and ask the user for guidance:
|
|
97
|
+
1. Briefly describe what you see on screen now
|
|
98
|
+
2. Explain what you were trying to do and why you're stuck
|
|
99
|
+
3. Ask a single, concrete question to unblock the next step
|
|
100
|
+
|
|
101
|
+
Example:
|
|
102
|
+
"Chrome is open but I don't see a search bar or new tab button. Should I open a new tab, or is there a specific way you'd like me to navigate?"
|
|
103
|
+
|
|
104
|
+
DO NOT continue brute-forcing the UI when stuck. The user prefers being asked over watching repeated failed attempts.
|
|
105
|
+
DO NOT ask if the user wants a script after successfully completing the flow - just generate it automatically.
|
|
106
|
+
|
|
107
|
+
CRITICAL - Test Script Format Rules:
|
|
108
|
+
- One simple instruction per line (NO numbers, NO bullets)
|
|
109
|
+
- Use imperative commands: "Open X", "Click Y", "Type Z"
|
|
110
|
+
- Include "assert: <condition>" lines to validate expected behavior
|
|
111
|
+
- End with "exit"
|
|
112
|
+
- Keep it simple and executable
|
|
113
|
+
|
|
114
|
+
CORRECT Example:
|
|
115
|
+
\`\`\`
|
|
116
|
+
Open Calculator app
|
|
117
|
+
assert: Calculator app is visible
|
|
118
|
+
Type "2"
|
|
119
|
+
Click the plus button
|
|
120
|
+
Type "3"
|
|
121
|
+
Click the equals button
|
|
122
|
+
assert: result shows 5
|
|
123
|
+
exit
|
|
124
|
+
\`\`\`
|
|
125
|
+
|
|
126
|
+
WRONG Example (DON'T DO THIS):
|
|
127
|
+
\`\`\`
|
|
128
|
+
1. Open Calculator app
|
|
129
|
+
2. Verify the app opened
|
|
130
|
+
3. etc...
|
|
131
|
+
\`\`\`
|
|
132
|
+
|
|
133
|
+
Remember: You are autonomous. Explore confidently. Generate simple, executable test scripts.
|
|
134
|
+
`;
|
|
135
|
+
}
|
|
136
|
+
export function buildExecutionModePrompt(deviceInfo) {
|
|
137
|
+
const basePrompt = buildBaseSystemPrompt(deviceInfo);
|
|
138
|
+
return `${basePrompt}
|
|
139
|
+
|
|
140
|
+
EXECUTION MODE - Critical Behavior:
|
|
141
|
+
You are executing test script commands one at a time. This is NOT a conversation.
|
|
142
|
+
|
|
143
|
+
CRITICAL RULES:
|
|
144
|
+
- DO NOT generate conversational text or narration
|
|
145
|
+
- DO NOT ask questions like "What should I do next?", "Would you like...", "Can I assist...?"
|
|
146
|
+
- DO NOT describe what you see on screen
|
|
147
|
+
- DO NOT say "Let me know if you need help" or similar phrases
|
|
148
|
+
- Just execute the action silently and stop immediately
|
|
149
|
+
- Only generate text if the action FAILED or cannot be completed
|
|
150
|
+
|
|
151
|
+
Your process:
|
|
152
|
+
1. Read the instruction
|
|
153
|
+
2. Execute the required actions
|
|
154
|
+
3. Stop immediately - no commentary, no questions
|
|
155
|
+
|
|
156
|
+
Each instruction is independent. Do not reference previous instructions or ask about next steps.
|
|
157
|
+
`;
|
|
158
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session manages the state for a single CLI session
|
|
3
|
+
* Includes device info, message history, transcript, and response chaining
|
|
4
|
+
*/
|
|
5
|
+
export class Session {
|
|
6
|
+
constructor(deviceId, deviceInfo) {
|
|
7
|
+
this.deviceId = deviceId;
|
|
8
|
+
this.deviceInfo = deviceInfo;
|
|
9
|
+
this.messages = [];
|
|
10
|
+
this.previousResponseId = null;
|
|
11
|
+
this.transcript = [];
|
|
12
|
+
this.systemPrompt = null;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Add a message to the conversation history
|
|
16
|
+
*/
|
|
17
|
+
addMessage(role, content) {
|
|
18
|
+
this.messages.push({ role, content });
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Clear all messages (used in execution mode between turns)
|
|
22
|
+
* Preserves the system prompt if one was set
|
|
23
|
+
*/
|
|
24
|
+
clearMessages() {
|
|
25
|
+
if (this.systemPrompt) {
|
|
26
|
+
this.messages = [{ role: "system", content: this.systemPrompt }];
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
this.messages = [];
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Set the system prompt and initialize messages array
|
|
34
|
+
*/
|
|
35
|
+
setSystemPrompt(prompt) {
|
|
36
|
+
this.systemPrompt = prompt;
|
|
37
|
+
this.messages = [{ role: "system", content: prompt }];
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Add a line to the transcript (for error recovery and logging)
|
|
41
|
+
*/
|
|
42
|
+
addToTranscript(line) {
|
|
43
|
+
this.transcript.push(line);
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Get the full transcript as a string
|
|
47
|
+
*/
|
|
48
|
+
getTranscriptText() {
|
|
49
|
+
return this.transcript.join("\n");
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Update the previous response ID for chaining
|
|
53
|
+
*/
|
|
54
|
+
updateResponseId(responseId) {
|
|
55
|
+
this.previousResponseId = responseId;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { exec } from "child_process";
|
|
2
|
+
import { promisify } from "util";
|
|
3
|
+
import { logger } from "../utils/logger.js";
|
|
4
|
+
const execAsync = promisify(exec);
|
|
5
|
+
function adbShell(deviceId, command) {
|
|
6
|
+
return execAsync(`adb -s ${deviceId} shell "${command}"`);
|
|
7
|
+
}
|
|
8
|
+
export async function handleModelAction(deviceId, action, scale = 1.0, context = null) {
|
|
9
|
+
const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
|
|
10
|
+
try {
|
|
11
|
+
const { x, y, x1, y1, x2, y2, text, keys, path } = action;
|
|
12
|
+
switch (action.type) {
|
|
13
|
+
case "click":
|
|
14
|
+
const realX = Math.round(x / scale);
|
|
15
|
+
const realY = Math.round(y / scale);
|
|
16
|
+
addOutput({ type: 'action', text: `Clicking at (${realX}, ${realY})` });
|
|
17
|
+
await adbShell(deviceId, `input tap ${realX} ${realY}`);
|
|
18
|
+
break;
|
|
19
|
+
case "scroll":
|
|
20
|
+
const scrollX = Math.round(action.scroll_x / scale);
|
|
21
|
+
const scrollY = Math.round(action.scroll_y / scale);
|
|
22
|
+
addOutput({ type: 'action', text: `Scrolling by (${scrollX}, ${scrollY})` });
|
|
23
|
+
const startX = 500;
|
|
24
|
+
const startY = 500;
|
|
25
|
+
const endX = startX + scrollX;
|
|
26
|
+
const endY = startY - scrollY; // <--- INVERT Y
|
|
27
|
+
await adbShell(deviceId, `input swipe ${startX} ${startY} ${endX} ${endY} 500`);
|
|
28
|
+
break;
|
|
29
|
+
case "drag":
|
|
30
|
+
if (path && path.length >= 2) {
|
|
31
|
+
const start = path[0];
|
|
32
|
+
const end = path[path.length - 1];
|
|
33
|
+
const realStartX = Math.round(start.x / scale);
|
|
34
|
+
const realStartY = Math.round(start.y / scale);
|
|
35
|
+
const realEndX = Math.round(end.x / scale);
|
|
36
|
+
const realEndY = Math.round(end.y / scale);
|
|
37
|
+
addOutput({ type: 'action', text: `Dragging from (${realStartX}, ${realStartY}) to (${realEndX}, ${realEndY})` });
|
|
38
|
+
await adbShell(deviceId, `input swipe ${realStartX} ${realStartY} ${realEndX} ${realEndY} 500`);
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
addOutput({ type: 'info', text: `Drag action missing valid path: ${JSON.stringify(action)}` });
|
|
42
|
+
}
|
|
43
|
+
break;
|
|
44
|
+
case "type":
|
|
45
|
+
addOutput({ type: 'action', text: `Typing text: ${text}` });
|
|
46
|
+
const escapedText = text.replace(/(["\\$`])/g, "\\$1").replace(/ /g, "%s");
|
|
47
|
+
await adbShell(deviceId, `input text "${escapedText}"`);
|
|
48
|
+
break;
|
|
49
|
+
case "keypress":
|
|
50
|
+
// Map ESC to Android Home button (since ESC doesn't exist on mobile)
|
|
51
|
+
const mappedKeys = keys.map(key => {
|
|
52
|
+
if (key.toUpperCase() === 'ESC' || key.toUpperCase() === 'ESCAPE') {
|
|
53
|
+
return 'KEYCODE_HOME';
|
|
54
|
+
}
|
|
55
|
+
return key;
|
|
56
|
+
});
|
|
57
|
+
addOutput({ type: 'action', text: `Pressing key: ${mappedKeys.join(', ')}` });
|
|
58
|
+
for (const key of mappedKeys) {
|
|
59
|
+
await adbShell(deviceId, `input keyevent ${key}`);
|
|
60
|
+
}
|
|
61
|
+
break;
|
|
62
|
+
case "wait":
|
|
63
|
+
addOutput({ type: 'action', text: 'Waiting...' });
|
|
64
|
+
await new Promise(res => setTimeout(res, 1000));
|
|
65
|
+
break;
|
|
66
|
+
default:
|
|
67
|
+
addOutput({ type: 'info', text: `Unknown action: ${JSON.stringify(action)}` });
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
// Log full error details to file
|
|
72
|
+
logger.error('Action execution error', {
|
|
73
|
+
action,
|
|
74
|
+
message: error.message,
|
|
75
|
+
stack: error.stack
|
|
76
|
+
});
|
|
77
|
+
// Show user-friendly error message
|
|
78
|
+
addOutput({ type: 'error', text: `Error executing action: ${error.message}` });
|
|
79
|
+
addOutput({ type: 'info', text: 'Full error details have been logged to the debug log.' });
|
|
80
|
+
}
|
|
81
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Assertion handling for script validation
|
|
3
|
+
*/
|
|
4
|
+
export function isAssertion(userInput) {
|
|
5
|
+
const trimmed = userInput.trim();
|
|
6
|
+
const lower = trimmed.toLowerCase();
|
|
7
|
+
return lower.startsWith("assert:") || lower.startsWith("assert ");
|
|
8
|
+
}
|
|
9
|
+
export function extractAssertionPrompt(userInput) {
|
|
10
|
+
const trimmed = userInput.trim();
|
|
11
|
+
const lower = trimmed.toLowerCase();
|
|
12
|
+
// Handle "assert:" or "Assert:"
|
|
13
|
+
if (lower.startsWith("assert:")) {
|
|
14
|
+
return trimmed.substring("assert:".length).trim();
|
|
15
|
+
}
|
|
16
|
+
// Handle "assert " or "Assert "
|
|
17
|
+
if (lower.startsWith("assert ")) {
|
|
18
|
+
return trimmed.substring("assert".length).trim();
|
|
19
|
+
}
|
|
20
|
+
return trimmed;
|
|
21
|
+
}
|
|
22
|
+
export function buildAssertionSystemPrompt(baseSystemPrompt, assertionPrompt) {
|
|
23
|
+
return `${baseSystemPrompt}
|
|
24
|
+
|
|
25
|
+
ASSERTION MODE:
|
|
26
|
+
You are now validating an assertion. The user has provided an assertion statement that you must verify.
|
|
27
|
+
|
|
28
|
+
Your task:
|
|
29
|
+
1. Take screenshots and perform LIMITED actions if needed to validate the assertion.
|
|
30
|
+
2. Determine if the assertion is TRUE or FALSE based on the current state.
|
|
31
|
+
3. You MUST respond with a clear verdict in this exact format:
|
|
32
|
+
- If the assertion is true, include the text: "ASSERTION RESULT: PASS"
|
|
33
|
+
- If the assertion is false or cannot be confidently validated, include: "ASSERTION RESULT: FAIL"
|
|
34
|
+
4. After the verdict, provide a brief explanation (1-2 sentences) of why it passed or failed.
|
|
35
|
+
|
|
36
|
+
The assertion to validate is: "${assertionPrompt}"
|
|
37
|
+
|
|
38
|
+
Remember:
|
|
39
|
+
- If you cannot confidently validate the assertion, treat it as FAIL.
|
|
40
|
+
- You must include either "ASSERTION RESULT: PASS" or "ASSERTION RESULT: FAIL" in your response.
|
|
41
|
+
- Be thorough but efficient. Only take the actions necessary to validate the assertion.`;
|
|
42
|
+
}
|
|
43
|
+
export function checkAssertionResult(transcript) {
|
|
44
|
+
const transcriptText = transcript.join("\n");
|
|
45
|
+
const hasPassed = transcriptText.includes("ASSERTION RESULT: PASS");
|
|
46
|
+
const hasFailed = transcriptText.includes("ASSERTION RESULT: FAIL");
|
|
47
|
+
return {
|
|
48
|
+
passed: hasPassed && !hasFailed,
|
|
49
|
+
failed: hasFailed || !hasPassed,
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
export function extractFailureDetails(transcript) {
|
|
53
|
+
const recentTranscript = transcript.slice(-5).join("\n");
|
|
54
|
+
const parts = recentTranscript.split("ASSERTION RESULT: FAIL");
|
|
55
|
+
return parts[1]?.trim() || "Could not confidently validate the assertion.";
|
|
56
|
+
}
|
|
57
|
+
export function handleAssertionFailure(assertionPrompt, transcript, isHeadlessMode, context) {
|
|
58
|
+
const details = extractFailureDetails(transcript);
|
|
59
|
+
const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
|
|
60
|
+
addOutput({ type: 'error', text: '❌ ASSERTION FAILED' });
|
|
61
|
+
addOutput({ type: 'error', text: `Assertion: ${assertionPrompt}` });
|
|
62
|
+
addOutput({ type: 'error', text: `Details: ${details}` });
|
|
63
|
+
if (isHeadlessMode) {
|
|
64
|
+
// Headless mode: exit with error code
|
|
65
|
+
if (context?.exit) {
|
|
66
|
+
context.exit();
|
|
67
|
+
}
|
|
68
|
+
process.exit(1);
|
|
69
|
+
}
|
|
70
|
+
// Interactive mode: caller should clear remaining instructions
|
|
71
|
+
}
|
|
72
|
+
export function handleAssertionSuccess(assertionPrompt, context = null) {
|
|
73
|
+
const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
|
|
74
|
+
addOutput({ type: 'success', text: `✓ Assertion passed: ${assertionPrompt}` });
|
|
75
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { exec, spawn } from "child_process";
|
|
2
|
+
import { once } from "events";
|
|
3
|
+
import { promisify } from "util";
|
|
4
|
+
import sharp from "sharp";
|
|
5
|
+
import { logger } from "../utils/logger.js";
|
|
6
|
+
const execAsync = promisify(exec);
|
|
7
|
+
function wait(ms) {
|
|
8
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
9
|
+
}
|
|
10
|
+
async function listConnectedDevices() {
|
|
11
|
+
const { stdout } = await execAsync("adb devices");
|
|
12
|
+
return stdout
|
|
13
|
+
.trim()
|
|
14
|
+
.split("\n")
|
|
15
|
+
.slice(1)
|
|
16
|
+
.map(line => line.split("\t")[0])
|
|
17
|
+
.filter(id => id.length > 0);
|
|
18
|
+
}
|
|
19
|
+
async function waitForDeviceConnection(avdName, timeoutMs = 120000) {
|
|
20
|
+
const deadline = Date.now() + timeoutMs;
|
|
21
|
+
while (Date.now() < deadline) {
|
|
22
|
+
const devices = await listConnectedDevices();
|
|
23
|
+
const match = devices.find(id => id.includes(avdName));
|
|
24
|
+
if (match)
|
|
25
|
+
return match;
|
|
26
|
+
await wait(2000);
|
|
27
|
+
}
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
async function waitForDeviceBoot(deviceId, timeoutMs = 60000) {
|
|
31
|
+
const deadline = Date.now() + timeoutMs;
|
|
32
|
+
while (Date.now() < deadline) {
|
|
33
|
+
try {
|
|
34
|
+
const { stdout } = await execAsync(`adb -s ${deviceId} shell getprop sys.boot_completed`);
|
|
35
|
+
if (stdout.trim() === "1")
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
catch { }
|
|
39
|
+
await wait(2000);
|
|
40
|
+
}
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
export async function connectToDevice(avdName) {
|
|
44
|
+
const devices = await listConnectedDevices();
|
|
45
|
+
for (const id of devices) {
|
|
46
|
+
if (id.startsWith("emulator-")) {
|
|
47
|
+
try {
|
|
48
|
+
const { stdout } = await execAsync(`adb -s ${id} emu avd name`);
|
|
49
|
+
if (stdout.trim() === avdName) {
|
|
50
|
+
console.log(`Emulator ${avdName} is already running as ${id}`);
|
|
51
|
+
return id;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
catch { }
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
console.log(`No emulator with AVD "${avdName}" is running. Launching...`);
|
|
58
|
+
const emulatorProcess = spawn("emulator", ["-avd", avdName], { detached: true, stdio: "ignore" });
|
|
59
|
+
emulatorProcess.unref();
|
|
60
|
+
const deviceId = await waitForDeviceConnection("emulator-", 120000);
|
|
61
|
+
if (!deviceId) {
|
|
62
|
+
console.error(`Emulator ${avdName} did not appear in time.`);
|
|
63
|
+
process.exit(1);
|
|
64
|
+
}
|
|
65
|
+
console.log(`Device ${deviceId} detected. Waiting for boot...`);
|
|
66
|
+
const booted = await waitForDeviceBoot(deviceId);
|
|
67
|
+
if (!booted) {
|
|
68
|
+
console.error(`Emulator ${avdName} did not finish booting.`);
|
|
69
|
+
process.exit(1);
|
|
70
|
+
}
|
|
71
|
+
console.log(`Emulator ${avdName} is fully booted.`);
|
|
72
|
+
return deviceId;
|
|
73
|
+
}
|
|
74
|
+
export async function getDeviceInfo(deviceId) {
|
|
75
|
+
const { stdout } = await execAsync(`adb -s ${deviceId} shell wm size`);
|
|
76
|
+
const match = stdout.match(/Physical size:\s*(\d+)x(\d+)/);
|
|
77
|
+
if (!match) {
|
|
78
|
+
console.error("Could not get device screen size.");
|
|
79
|
+
process.exit(1);
|
|
80
|
+
}
|
|
81
|
+
const [_, width, height] = match.map(Number);
|
|
82
|
+
const targetWidth = 400;
|
|
83
|
+
const scale = width > targetWidth ? targetWidth / width : 1.0;
|
|
84
|
+
const scaledWidth = Math.round(width * scale);
|
|
85
|
+
const scaledHeight = Math.round(height * scale);
|
|
86
|
+
return {
|
|
87
|
+
device_width: width,
|
|
88
|
+
device_height: height,
|
|
89
|
+
scaled_width: scaledWidth,
|
|
90
|
+
scaled_height: scaledHeight,
|
|
91
|
+
scale,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
export async function getScreenshotAsBase64(deviceId, deviceInfo) {
|
|
95
|
+
const adb = spawn("adb", ["-s", deviceId, "exec-out", "screencap", "-p"]);
|
|
96
|
+
const chunks = [];
|
|
97
|
+
const stderrChunks = [];
|
|
98
|
+
adb.stdout.on("data", chunk => chunks.push(chunk));
|
|
99
|
+
adb.stderr.on("data", err => {
|
|
100
|
+
stderrChunks.push(err);
|
|
101
|
+
console.error("ADB stderr:", err.toString());
|
|
102
|
+
});
|
|
103
|
+
const [code] = await once(adb, "close");
|
|
104
|
+
if (code !== 0) {
|
|
105
|
+
const stderrOutput = Buffer.concat(stderrChunks).toString();
|
|
106
|
+
logger.error(`ADB screencap failed with code ${code}`, { stderr: stderrOutput });
|
|
107
|
+
throw new Error(`adb screencap exited with code ${code}`);
|
|
108
|
+
}
|
|
109
|
+
let buffer = Buffer.concat(chunks);
|
|
110
|
+
logger.debug(`Screenshot captured: ${buffer.length} bytes before scaling`);
|
|
111
|
+
if (buffer.length === 0) {
|
|
112
|
+
logger.error('Screenshot buffer is empty!', { deviceId, chunks: chunks.length });
|
|
113
|
+
throw new Error('Screenshot capture returned empty buffer');
|
|
114
|
+
}
|
|
115
|
+
if (deviceInfo.scale < 1.0) {
|
|
116
|
+
buffer = await sharp(buffer)
|
|
117
|
+
.resize({ width: deviceInfo.scaled_width, height: deviceInfo.scaled_height })
|
|
118
|
+
.png()
|
|
119
|
+
.toBuffer();
|
|
120
|
+
logger.debug(`Screenshot scaled: ${buffer.length} bytes after scaling`);
|
|
121
|
+
}
|
|
122
|
+
return buffer.toString("base64");
|
|
123
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
2
|
+
import dotenv from "dotenv";
|
|
3
|
+
import { logger } from "../utils/logger.js";
|
|
4
|
+
dotenv.config();
|
|
5
|
+
const openai = new OpenAI({
|
|
6
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
7
|
+
});
|
|
8
|
+
/**
|
|
9
|
+
* Revise a test script based on user feedback using simple chat completion
|
|
10
|
+
* @param {string} originalScript - The original test script
|
|
11
|
+
* @param {string} revisionRequest - User's requested changes
|
|
12
|
+
* @returns {Promise<string>} - The revised test script
|
|
13
|
+
*/
|
|
14
|
+
export async function reviseTestScript(originalScript, revisionRequest) {
|
|
15
|
+
const response = await openai.chat.completions.create({
|
|
16
|
+
model: "gpt-4o",
|
|
17
|
+
messages: [{
|
|
18
|
+
role: "system",
|
|
19
|
+
content: `You are editing a test script based on user feedback.
|
|
20
|
+
|
|
21
|
+
Current test script:
|
|
22
|
+
${originalScript}
|
|
23
|
+
|
|
24
|
+
User's revision request:
|
|
25
|
+
${revisionRequest}
|
|
26
|
+
|
|
27
|
+
Apply the user's changes and output the revised test script.
|
|
28
|
+
|
|
29
|
+
FORMAT RULES:
|
|
30
|
+
- One simple instruction per line (NO numbers, NO bullets)
|
|
31
|
+
- Use imperative commands: "Open X", "Click Y", "Type Z"
|
|
32
|
+
- Include "assert: <condition>" lines to validate expected behavior
|
|
33
|
+
- End with "exit"
|
|
34
|
+
|
|
35
|
+
Output only the revised test script, nothing else.`
|
|
36
|
+
}]
|
|
37
|
+
});
|
|
38
|
+
return response.choices[0].message.content.trim();
|
|
39
|
+
}
|
|
40
|
+
export async function sendCUARequest({ messages, screenshotBase64, previousResponseId, callId, deviceInfo, }) {
|
|
41
|
+
const input = [...messages];
|
|
42
|
+
if (callId && screenshotBase64) {
|
|
43
|
+
input.push({
|
|
44
|
+
type: "computer_call_output",
|
|
45
|
+
call_id: callId,
|
|
46
|
+
output: {
|
|
47
|
+
type: "computer_screenshot",
|
|
48
|
+
image_url: `data:image/png;base64,${screenshotBase64}`,
|
|
49
|
+
},
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
const requestParams = {
|
|
53
|
+
model: "computer-use-preview",
|
|
54
|
+
previous_response_id: previousResponseId || undefined,
|
|
55
|
+
tools: [{
|
|
56
|
+
type: "computer_use_preview",
|
|
57
|
+
display_width: deviceInfo.scaled_width,
|
|
58
|
+
display_height: deviceInfo.scaled_height,
|
|
59
|
+
environment: "browser",
|
|
60
|
+
}],
|
|
61
|
+
input,
|
|
62
|
+
store: true,
|
|
63
|
+
reasoning: { generate_summary: "concise" },
|
|
64
|
+
truncation: "auto",
|
|
65
|
+
};
|
|
66
|
+
// Log request details (without full screenshot to avoid clutter)
|
|
67
|
+
const requestLog = {
|
|
68
|
+
...requestParams,
|
|
69
|
+
input: input.map(item => {
|
|
70
|
+
if (item.type === "computer_call_output" && item.output?.image_url) {
|
|
71
|
+
// Extract actual base64 length from the image_url
|
|
72
|
+
const imageUrl = item.output.image_url;
|
|
73
|
+
const base64Data = imageUrl.replace('data:image/png;base64,', '');
|
|
74
|
+
return {
|
|
75
|
+
...item,
|
|
76
|
+
output: {
|
|
77
|
+
...item.output,
|
|
78
|
+
image_url: `data:image/png;base64,[${base64Data.length} chars]`
|
|
79
|
+
},
|
|
80
|
+
current_url: item.current_url,
|
|
81
|
+
acknowledged_safety_checks: item.acknowledged_safety_checks
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
return item;
|
|
85
|
+
})
|
|
86
|
+
};
|
|
87
|
+
logger.debug('CUA Request:', requestLog);
|
|
88
|
+
try {
|
|
89
|
+
const response = await openai.responses.create(requestParams);
|
|
90
|
+
// Log ALL output item types to catch everything
|
|
91
|
+
const outputTypes = (response.output || []).map(item => item.type);
|
|
92
|
+
const toolCalls = (response.output || [])
|
|
93
|
+
.filter(item => item.type === 'computer_call')
|
|
94
|
+
.map(item => ({
|
|
95
|
+
call_id: item.call_id,
|
|
96
|
+
action_type: item.action?.type
|
|
97
|
+
}));
|
|
98
|
+
const safetyChecks = (response.output || [])
|
|
99
|
+
.filter(item => item.type === 'pending_safety_check')
|
|
100
|
+
.map(item => ({
|
|
101
|
+
id: item.id,
|
|
102
|
+
code: item.code
|
|
103
|
+
}));
|
|
104
|
+
// Log full output array if there are unaccounted items
|
|
105
|
+
const accountedItems = toolCalls.length + safetyChecks.length;
|
|
106
|
+
const totalItems = response.output?.length || 0;
|
|
107
|
+
logger.debug('CUA Response:', {
|
|
108
|
+
id: response.id,
|
|
109
|
+
output_length: totalItems,
|
|
110
|
+
output_types: outputTypes,
|
|
111
|
+
tool_calls: toolCalls.length > 0 ? toolCalls : 'none',
|
|
112
|
+
pending_safety_checks: safetyChecks.length > 0 ? safetyChecks : 'none'
|
|
113
|
+
});
|
|
114
|
+
// If we're missing items in our logging, log the full output for investigation
|
|
115
|
+
if (accountedItems < totalItems) {
|
|
116
|
+
logger.debug('UNACCOUNTED OUTPUT ITEMS - Full output array:', response.output);
|
|
117
|
+
}
|
|
118
|
+
return response;
|
|
119
|
+
}
|
|
120
|
+
catch (err) {
|
|
121
|
+
logger.error('CUA Request failed', { request: requestLog, error: err });
|
|
122
|
+
throw err;
|
|
123
|
+
}
|
|
124
|
+
}
|