@loadmill/droid-cua 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/LICENSE +1 -0
  2. package/README.md +227 -0
  3. package/bin/droid-cua +6 -0
  4. package/build/index.js +58 -0
  5. package/build/src/cli/app.js +115 -0
  6. package/build/src/cli/command-parser.js +57 -0
  7. package/build/src/cli/components/AgentStatus.js +21 -0
  8. package/build/src/cli/components/CommandSuggestions.js +33 -0
  9. package/build/src/cli/components/InputPanel.js +21 -0
  10. package/build/src/cli/components/OutputPanel.js +58 -0
  11. package/build/src/cli/components/StatusBar.js +22 -0
  12. package/build/src/cli/ink-shell.js +56 -0
  13. package/build/src/commands/create.js +42 -0
  14. package/build/src/commands/edit.js +61 -0
  15. package/build/src/commands/exit.js +20 -0
  16. package/build/src/commands/help.js +34 -0
  17. package/build/src/commands/index.js +49 -0
  18. package/build/src/commands/list.js +55 -0
  19. package/build/src/commands/run.js +112 -0
  20. package/build/src/commands/stop.js +32 -0
  21. package/build/src/commands/view.js +43 -0
  22. package/build/src/core/execution-engine.js +114 -0
  23. package/build/src/core/prompts.js +158 -0
  24. package/build/src/core/session.js +57 -0
  25. package/build/src/device/actions.js +81 -0
  26. package/build/src/device/assertions.js +75 -0
  27. package/build/src/device/connection.js +123 -0
  28. package/build/src/device/openai.js +124 -0
  29. package/build/src/modes/design-mode-ink.js +396 -0
  30. package/build/src/modes/design-mode.js +366 -0
  31. package/build/src/modes/execution-mode.js +165 -0
  32. package/build/src/test-store/test-manager.js +92 -0
  33. package/build/src/utils/logger.js +86 -0
  34. package/package.json +68 -0
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Create command handler
3
+ */
4
+ import { testExists } from "../test-store/test-manager.js";
5
+ import { DesignModeInk } from "../modes/design-mode-ink.js";
6
+ /**
7
+ * Handle /create command
8
+ * @param {string} args - Test name
9
+ * @param {Object} session - Current session
10
+ * @param {Object} context - Additional context (includes engine, addOutput, etc.)
11
+ * @returns {Promise<boolean>} - true to continue loop
12
+ */
13
+ export async function handleCreate(args, session, context) {
14
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
15
+ const testName = args.trim();
16
+ // Check if test name provided
17
+ if (!testName) {
18
+ addOutput({ type: 'error', text: 'Usage: /create <test-name>' });
19
+ addOutput({ type: 'info', text: '' });
20
+ addOutput({ type: 'info', text: 'Example:' });
21
+ addOutput({ type: 'info', text: ' /create login-flow' });
22
+ addOutput({ type: 'info', text: ' /create calculator-test' });
23
+ return true; // Continue loop
24
+ }
25
+ // Check if test already exists
26
+ const exists = await testExists(testName);
27
+ if (exists) {
28
+ addOutput({ type: 'error', text: `Test already exists: ${testName}` });
29
+ addOutput({ type: 'info', text: 'Choose a different name or delete the existing test first.' });
30
+ return true; // Continue loop
31
+ }
32
+ // Create design mode
33
+ const designMode = new DesignModeInk(session, context.engine, testName, context);
34
+ // Store reference in context so ink-shell can route inputs to it
35
+ if (context.setActiveDesignMode) {
36
+ context.setActiveDesignMode(designMode);
37
+ }
38
+ // Start design mode conversation
39
+ await designMode.start();
40
+ addOutput({ type: 'system', text: '=== Exited Design Mode ===' });
41
+ return true; // Continue loop
42
+ }
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Edit command handler
3
+ */
4
+ import { spawn } from 'child_process';
5
+ import { testExists } from "../test-store/test-manager.js";
6
+ import path from "path";
7
+ import { fileURLToPath } from "url";
8
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
9
+ const TESTS_DIR = path.join(__dirname, "../../tests");
10
+ /**
11
+ * Handle /edit command
12
+ * @param {string} args - Test name
13
+ * @param {Object} session - Current session
14
+ * @param {Object} context - Additional context
15
+ * @returns {Promise<boolean>} - true to continue loop
16
+ */
17
+ export async function handleEdit(args, session, context) {
18
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
19
+ const testName = args.trim();
20
+ // Check if test name provided
21
+ if (!testName) {
22
+ addOutput({ type: 'error', text: 'Usage: /edit <test-name>' });
23
+ addOutput({ type: 'info', text: '' });
24
+ addOutput({ type: 'info', text: 'Example:' });
25
+ addOutput({ type: 'info', text: ' /edit example' });
26
+ return true; // Continue loop
27
+ }
28
+ // Check if test exists
29
+ const exists = await testExists(testName);
30
+ if (!exists) {
31
+ addOutput({ type: 'error', text: `Test not found: ${testName}` });
32
+ addOutput({ type: 'info', text: 'Use /list to see available tests.' });
33
+ return true; // Continue loop
34
+ }
35
+ // Determine editor to use
36
+ const editor = process.env.EDITOR || process.env.VISUAL || 'nano';
37
+ const filename = testName.endsWith('.dcua') ? testName : `${testName}.dcua`;
38
+ const filepath = path.join(TESTS_DIR, filename);
39
+ addOutput({ type: 'system', text: `Opening ${testName} in ${editor}...` });
40
+ addOutput({ type: 'info', text: 'Save and exit the editor to return to droid-cua.' });
41
+ // Open editor in foreground (blocking)
42
+ return new Promise((resolve) => {
43
+ const editorProcess = spawn(editor, [filepath], {
44
+ stdio: 'inherit', // Inherit stdin/stdout/stderr to allow interactive editing
45
+ });
46
+ editorProcess.on('exit', (code) => {
47
+ if (code === 0) {
48
+ addOutput({ type: 'success', text: `Finished editing ${testName}` });
49
+ }
50
+ else {
51
+ addOutput({ type: 'error', text: `Editor exited with code ${code}` });
52
+ }
53
+ resolve(true); // Continue loop
54
+ });
55
+ editorProcess.on('error', (err) => {
56
+ addOutput({ type: 'error', text: `Failed to open editor: ${err.message}` });
57
+ addOutput({ type: 'info', text: 'Try setting the EDITOR environment variable.' });
58
+ resolve(true); // Continue loop
59
+ });
60
+ });
61
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Exit command handler
3
+ */
4
+ /**
5
+ * Handle /exit command
6
+ * @param {string} args - Command arguments
7
+ * @param {Object} session - Current session
8
+ * @param {Object} context - Additional context (includes rl)
9
+ * @returns {Promise<boolean>} - false to exit loop
10
+ */
11
+ export async function handleExit(args, session, context) {
12
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
13
+ addOutput({ type: 'system', text: 'Goodbye!' });
14
+ // Force immediate exit to stop any ongoing execution
15
+ // Use setTimeout to allow the "Goodbye!" message to render first
16
+ setTimeout(() => {
17
+ process.exit(0);
18
+ }, 100);
19
+ return false;
20
+ }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Help command handler
3
+ */
4
+ /**
5
+ * Handle /help command
6
+ * @param {string} args - Command arguments
7
+ * @param {Object} session - Current session
8
+ * @param {Object} context - Additional context
9
+ * @returns {Promise<boolean>} - true to continue loop
10
+ */
11
+ export async function handleHelp(args, session, context) {
12
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
13
+ addOutput({ type: 'system', text: 'droid-cua - AI-powered Android testing CLI' });
14
+ addOutput({ type: 'info', text: '' });
15
+ addOutput({ type: 'info', text: 'Available commands:' });
16
+ addOutput({ type: 'info', text: ' /help Show this help message' });
17
+ addOutput({ type: 'info', text: ' /exit Exit the CLI' });
18
+ addOutput({ type: 'info', text: '' });
19
+ addOutput({ type: 'info', text: 'Test Management:' });
20
+ addOutput({ type: 'info', text: ' /create <test-name> Create a new test with autonomous design' });
21
+ addOutput({ type: 'info', text: ' /run <test-name> Execute an existing test' });
22
+ addOutput({ type: 'info', text: ' /list List all available tests' });
23
+ addOutput({ type: 'info', text: ' /view <test-name> View test contents with line numbers' });
24
+ addOutput({ type: 'info', text: ' /edit <test-name> Edit a test in your default editor' });
25
+ addOutput({ type: 'info', text: '' });
26
+ addOutput({ type: 'info', text: 'Examples:' });
27
+ addOutput({ type: 'info', text: ' /create login-test (design a new test)' });
28
+ addOutput({ type: 'info', text: ' /list (see all tests)' });
29
+ addOutput({ type: 'info', text: ' /view login-test (view test contents)' });
30
+ addOutput({ type: 'info', text: ' /run login-test (execute test)' });
31
+ addOutput({ type: 'info', text: '' });
32
+ addOutput({ type: 'info', text: 'For more info, see README.md' });
33
+ return true; // Continue loop
34
+ }
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Command registry and router
3
+ */
4
+ import { handleHelp } from './help.js';
5
+ import { handleExit } from './exit.js';
6
+ import { handleRun } from './run.js';
7
+ import { handleCreate } from './create.js';
8
+ import { handleList } from './list.js';
9
+ import { handleView } from './view.js';
10
+ import { handleEdit } from './edit.js';
11
+ import { handleStop } from './stop.js';
12
+ /**
13
+ * Map of command names to their handlers
14
+ * Each handler receives (args, session, context)
15
+ */
16
+ const COMMAND_HANDLERS = {
17
+ help: handleHelp,
18
+ exit: handleExit,
19
+ run: handleRun,
20
+ create: handleCreate,
21
+ list: handleList,
22
+ view: handleView,
23
+ edit: handleEdit,
24
+ stop: handleStop,
25
+ };
26
+ /**
27
+ * Route a command to its handler
28
+ * @param {string} command - Command name (without /)
29
+ * @param {string} args - Command arguments
30
+ * @param {Object} session - Current session
31
+ * @param {Object} context - Additional context (rl, etc.)
32
+ * @returns {Promise<boolean>} - true if command should continue loop, false to exit
33
+ */
34
+ export async function routeCommand(command, args, session, context) {
35
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
36
+ const handler = COMMAND_HANDLERS[command];
37
+ if (!handler) {
38
+ addOutput({ type: 'error', text: `Unknown command: /${command}` });
39
+ addOutput({ type: 'info', text: 'Type /help to see available commands.' });
40
+ return true; // Continue loop
41
+ }
42
+ return await handler(args, session, context);
43
+ }
44
+ /**
45
+ * Get list of available commands
46
+ */
47
+ export function getAvailableCommands() {
48
+ return Object.keys(COMMAND_HANDLERS);
49
+ }
@@ -0,0 +1,55 @@
1
+ /**
2
+ * List command handler
3
+ */
4
+ import { listTests } from "../test-store/test-manager.js";
5
+ /**
6
+ * Handle /list command
7
+ * @param {string} args - Command arguments (unused)
8
+ * @param {Object} session - Current session
9
+ * @param {Object} context - Additional context
10
+ * @returns {Promise<boolean>} - true to continue loop
11
+ */
12
+ export async function handleList(args, session, context) {
13
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
14
+ addOutput({ type: 'system', text: 'Available tests:' });
15
+ addOutput({ type: 'info', text: '' });
16
+ const tests = await listTests();
17
+ if (tests.length === 0) {
18
+ addOutput({ type: 'info', text: ' (no tests found)' });
19
+ addOutput({ type: 'info', text: '' });
20
+ addOutput({ type: 'info', text: 'Create a test with: /create <test-name>' });
21
+ }
22
+ else {
23
+ for (const test of tests) {
24
+ // Format date as relative time
25
+ const now = new Date();
26
+ const diff = now - test.modified;
27
+ const minutes = Math.floor(diff / 60000);
28
+ const hours = Math.floor(minutes / 60);
29
+ const days = Math.floor(hours / 24);
30
+ let timeAgo;
31
+ if (days > 0) {
32
+ timeAgo = `${days} day${days > 1 ? 's' : ''} ago`;
33
+ }
34
+ else if (hours > 0) {
35
+ timeAgo = `${hours} hour${hours > 1 ? 's' : ''} ago`;
36
+ }
37
+ else if (minutes > 0) {
38
+ timeAgo = `${minutes} minute${minutes > 1 ? 's' : ''} ago`;
39
+ }
40
+ else {
41
+ timeAgo = 'just now';
42
+ }
43
+ // Output test name in cyan, metadata in dim
44
+ addOutput({
45
+ type: 'test-name',
46
+ text: ` ${test.name}`,
47
+ metadata: `(${test.lines} lines, modified ${timeAgo})`
48
+ });
49
+ }
50
+ addOutput({ type: 'info', text: '' });
51
+ addOutput({ type: 'info', text: 'Run a test with: /run <test-name>' });
52
+ addOutput({ type: 'info', text: 'View a test with: /view <test-name>' });
53
+ }
54
+ return true; // Continue loop
55
+ }
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Run command handler
3
+ */
4
+ import { loadTest, listTests, testExists } from "../test-store/test-manager.js";
5
+ import { ExecutionMode } from "../modes/execution-mode.js";
6
+ import { buildExecutionModePrompt } from "../core/prompts.js";
7
+ /**
8
+ * Handle /run command
9
+ * @param {string} args - Test name
10
+ * @param {Object} session - Current session
11
+ * @param {Object} context - Additional context (includes rl, engine)
12
+ * @returns {Promise<boolean>} - true to continue loop
13
+ */
14
+ export async function handleRun(args, session, context) {
15
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
16
+ const testName = args.trim();
17
+ // If no test name provided, list available tests
18
+ if (!testName) {
19
+ addOutput({ type: 'info', text: 'Usage: /run <test-name>' });
20
+ addOutput({ type: 'info', text: '' });
21
+ addOutput({ type: 'info', text: 'Available tests:' });
22
+ const tests = await listTests();
23
+ if (tests.length === 0) {
24
+ addOutput({ type: 'info', text: ' (no tests found)' });
25
+ addOutput({ type: 'info', text: '' });
26
+ addOutput({ type: 'info', text: 'Create a test first with: /create <test-name>' });
27
+ }
28
+ else {
29
+ for (const test of tests) {
30
+ addOutput({ type: 'info', text: ` ${test.name} (${test.lines} lines)` });
31
+ }
32
+ }
33
+ return true; // Continue loop
34
+ }
35
+ // Check if test exists
36
+ const exists = await testExists(testName);
37
+ if (!exists) {
38
+ addOutput({ type: 'error', text: `Test not found: ${testName}` });
39
+ addOutput({ type: 'info', text: 'Use /list to see available tests.' });
40
+ return true; // Continue loop
41
+ }
42
+ // Set mode and test name in Ink UI
43
+ if (context.setMode) {
44
+ context.setMode('execution');
45
+ }
46
+ if (context.setTestName) {
47
+ context.setTestName(testName);
48
+ }
49
+ // Load test instructions
50
+ addOutput({ type: 'system', text: `Loading test: ${testName}` });
51
+ const instructions = await loadTest(testName);
52
+ addOutput({ type: 'info', text: `Loaded ${instructions.length} instructions` });
53
+ addOutput({ type: 'info', text: '' });
54
+ // Disable free-form input during execution (only allow commands like /exit)
55
+ if (context.setInputDisabled) {
56
+ context.setInputDisabled(false); // Keep input enabled, but...
57
+ }
58
+ if (context.setExecutionMode) {
59
+ context.setExecutionMode(true); // Signal we're in execution mode
60
+ }
61
+ if (context.setInputPlaceholder) {
62
+ context.setInputPlaceholder('Type /stop or /exit to stop test execution');
63
+ }
64
+ // Set agent working status
65
+ if (context.setAgentWorking) {
66
+ context.setAgentWorking(true, 'Executing test...');
67
+ }
68
+ // Reset session state to prevent context leak from design mode
69
+ // Each test instruction should execute in isolation
70
+ session.updateResponseId(undefined);
71
+ session.clearMessages();
72
+ // Set execution mode system prompt (replaces any design mode prompt)
73
+ const executionPrompt = buildExecutionModePrompt(session.deviceInfo);
74
+ session.setSystemPrompt(executionPrompt);
75
+ // Create execution mode
76
+ const executionMode = new ExecutionMode(session, context.engine, instructions);
77
+ // Store reference in context so /stop command can access it
78
+ if (context.setActiveExecutionMode) {
79
+ context.setActiveExecutionMode(executionMode);
80
+ }
81
+ // Execute the test
82
+ const result = await executionMode.execute(context);
83
+ // Clear agent working status
84
+ if (context.setAgentWorking) {
85
+ context.setAgentWorking(false);
86
+ }
87
+ // Clear execution mode reference
88
+ if (context.setActiveExecutionMode) {
89
+ context.setActiveExecutionMode(null);
90
+ }
91
+ // Re-enable free-form input
92
+ if (context.setExecutionMode) {
93
+ context.setExecutionMode(false);
94
+ }
95
+ if (context.setInputPlaceholder) {
96
+ context.setInputPlaceholder('Type a command or message...');
97
+ }
98
+ // Reset mode
99
+ if (context.setMode) {
100
+ context.setMode('command');
101
+ }
102
+ if (context.setTestName) {
103
+ context.setTestName(null);
104
+ }
105
+ if (result.success) {
106
+ addOutput({ type: 'success', text: '✓ Test passed!' });
107
+ }
108
+ else {
109
+ addOutput({ type: 'error', text: `✗ Test failed: ${result.error || "Unknown error"}` });
110
+ }
111
+ return true; // Continue loop
112
+ }
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Stop command handler - stops current test creation or execution
3
+ */
4
+ /**
5
+ * Handle /stop command
6
+ * @param {string} args - Command arguments (unused)
7
+ * @param {Object} session - Current session
8
+ * @param {Object} context - Additional context
9
+ * @returns {Promise<boolean>} - true to continue loop
10
+ */
11
+ export async function handleStop(args, session, context) {
12
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
13
+ // Check if we're in design mode
14
+ if (context.activeDesignMode) {
15
+ addOutput({ type: 'info', text: 'Stopping test creation...' });
16
+ // Signal design mode to stop by queuing "cancel"
17
+ context.activeDesignMode.handleUserInput('cancel');
18
+ return true; // Continue loop
19
+ }
20
+ // Check if we're in execution mode
21
+ if (context.isExecutionMode) {
22
+ addOutput({ type: 'info', text: 'Stopping test execution...' });
23
+ // Set flag to stop execution
24
+ if (context.activeExecutionMode) {
25
+ context.activeExecutionMode.shouldStop = true;
26
+ }
27
+ return true; // Continue loop
28
+ }
29
+ // Not in any mode that can be stopped
30
+ addOutput({ type: 'info', text: 'No active test creation or execution to stop.' });
31
+ return true; // Continue loop
32
+ }
@@ -0,0 +1,43 @@
1
+ /**
2
+ * View command handler
3
+ */
4
+ import { testExists, getTestContent } from "../test-store/test-manager.js";
5
+ /**
6
+ * Handle /view command
7
+ * @param {string} args - Test name
8
+ * @param {Object} session - Current session
9
+ * @param {Object} context - Additional context
10
+ * @returns {Promise<boolean>} - true to continue loop
11
+ */
12
+ export async function handleView(args, session, context) {
13
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
14
+ const testName = args.trim();
15
+ // Check if test name provided
16
+ if (!testName) {
17
+ addOutput({ type: 'error', text: 'Usage: /view <test-name>' });
18
+ addOutput({ type: 'info', text: '' });
19
+ addOutput({ type: 'info', text: 'Example:' });
20
+ addOutput({ type: 'info', text: ' /view example' });
21
+ return true; // Continue loop
22
+ }
23
+ // Check if test exists
24
+ const exists = await testExists(testName);
25
+ if (!exists) {
26
+ addOutput({ type: 'error', text: `Test not found: ${testName}` });
27
+ addOutput({ type: 'info', text: 'Use /list to see available tests.' });
28
+ return true; // Continue loop
29
+ }
30
+ // Load and display test content
31
+ const content = await getTestContent(testName);
32
+ const lines = content.split('\n');
33
+ addOutput({ type: 'system', text: `Test: ${testName}` });
34
+ addOutput({ type: 'info', text: '─'.repeat(60) });
35
+ // Display with line numbers
36
+ lines.forEach((line, index) => {
37
+ const lineNum = String(index + 1).padStart(3, ' ');
38
+ addOutput({ type: 'info', text: `${lineNum} │ ${line}` });
39
+ });
40
+ addOutput({ type: 'info', text: '─'.repeat(60) });
41
+ addOutput({ type: 'info', text: `${lines.length} lines total` });
42
+ return true; // Continue loop
43
+ }
@@ -0,0 +1,114 @@
1
+ import path from "path";
2
+ import { writeFile } from "fs/promises";
3
+ import { getScreenshotAsBase64 } from "../device/connection.js";
4
+ import { handleModelAction } from "../device/actions.js";
5
+ import { sendCUARequest } from "../device/openai.js";
6
+ export class ExecutionEngine {
7
+ constructor(session, options = {}) {
8
+ this.session = session;
9
+ this.recordScreenshots = options.recordScreenshots || false;
10
+ this.screenshotDir = options.screenshotDir || null;
11
+ }
12
+ /**
13
+ * Run a full turn with the CUA model
14
+ * Executes actions until the model stops requesting actions
15
+ * Returns the new response ID for chaining
16
+ * @param {Object} response - The CUA response
17
+ * @param {Function} trackAction - Optional callback to track actions for stuck detection
18
+ * @param {Object} context - Optional Ink context for output
19
+ */
20
+ async runFullTurn(response, trackAction = null, context = null) {
21
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
22
+ let newResponseId = response.id;
23
+ while (true) {
24
+ // Check for interruption before processing next batch of actions
25
+ if (trackAction) {
26
+ const shouldStop = trackAction(null); // null action = pre-batch check
27
+ if (shouldStop) {
28
+ return newResponseId;
29
+ }
30
+ }
31
+ const items = response.output || [];
32
+ const actions = items.filter(item => item.type === "computer_call");
33
+ // ── Collect pending safety checks ──
34
+ const pendingSafetyChecks = items
35
+ .filter(item => item.type === "pending_safety_check")
36
+ .map(item => ({ id: item.id }));
37
+ // ── Print assistant output ──
38
+ for (const item of items) {
39
+ if (item.type === "reasoning") {
40
+ for (const entry of item.summary) {
41
+ if (entry.type === "summary_text") {
42
+ addOutput({ type: 'reasoning', text: entry.text });
43
+ this.session.addToTranscript(`[Reasoning] ${entry.text}`);
44
+ }
45
+ }
46
+ }
47
+ else if (item.type === "message") {
48
+ const textPart = item.content.find(c => c.type === "output_text");
49
+ if (textPart) {
50
+ addOutput({ type: 'assistant', text: textPart.text });
51
+ this.session.addToTranscript(`[Assistant] ${textPart.text}`);
52
+ }
53
+ }
54
+ else if (item.type === "pending_safety_check") {
55
+ addOutput({ type: 'warning', text: `⚠️ Safety check: ${item.code} - ${item.message}` });
56
+ }
57
+ }
58
+ if (actions.length === 0) {
59
+ // No actions = turn complete
60
+ break;
61
+ }
62
+ // ── Process model actions ──
63
+ for (const { action, call_id } of actions) {
64
+ if (action.type === "screenshot") {
65
+ addOutput({ type: 'info', text: '📸 Capturing screen' });
66
+ }
67
+ else {
68
+ await handleModelAction(this.session.deviceId, action, this.session.deviceInfo.scale, context);
69
+ // Track action and check for interruption
70
+ if (trackAction) {
71
+ const shouldStop = trackAction(action);
72
+ if (shouldStop) {
73
+ // User interrupted - stop execution immediately
74
+ return newResponseId;
75
+ }
76
+ }
77
+ // Add delay after UI-changing actions to let the interface update
78
+ // before taking the screenshot (except for explicit wait actions which have their own delay)
79
+ if (action.type !== "wait") {
80
+ await new Promise(resolve => setTimeout(resolve, 500));
81
+ }
82
+ }
83
+ const screenshotBase64 = await getScreenshotAsBase64(this.session.deviceId, this.session.deviceInfo);
84
+ if (this.recordScreenshots && this.screenshotDir) {
85
+ const framePath = path.join(this.screenshotDir, `frame_${String(Date.now())}.png`);
86
+ await writeFile(framePath, Buffer.from(screenshotBase64, "base64"));
87
+ }
88
+ // Build next input: screenshot + any carryover reasoning
89
+ const input = [{
90
+ type: "computer_call_output",
91
+ call_id,
92
+ output: {
93
+ type: "computer_screenshot",
94
+ image_url: `data:image/png;base64,${screenshotBase64}`,
95
+ },
96
+ current_url: "android://emulator", // Android emulator doesn't have URLs like a browser
97
+ ...(pendingSafetyChecks.length > 0 ? { acknowledged_safety_checks: pendingSafetyChecks } : {})
98
+ }];
99
+ response = await sendCUARequest({
100
+ messages: input,
101
+ previousResponseId: newResponseId,
102
+ deviceInfo: this.session.deviceInfo,
103
+ });
104
+ newResponseId = response.id;
105
+ }
106
+ }
107
+ // ── At end, if last output was only reasoning ──
108
+ const finalItems = response.output || [];
109
+ if (finalItems.length > 0 && finalItems.at(-1).type === "reasoning") {
110
+ addOutput({ type: 'info', text: 'Warning: last item was reasoning without follow-up. Dropping to avoid 400 error.' });
111
+ }
112
+ return newResponseId;
113
+ }
114
+ }