@loadmill/droid-cua 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,81 +1,22 @@
1
- import { exec } from "child_process";
2
- import { promisify } from "util";
3
- import { logger } from "../utils/logger.js";
4
- const execAsync = promisify(exec);
5
- function adbShell(deviceId, command) {
6
- return execAsync(`adb -s ${deviceId} shell "${command}"`);
7
- }
1
+ /**
2
+ * Device Actions Module
3
+ *
4
+ * Thin wrapper that delegates to the appropriate platform backend.
5
+ * Maintains backwards compatibility with existing code.
6
+ */
7
+ import { getDeviceBackend, getCurrentPlatform } from "./factory.js";
8
+ /**
9
+ * Handle an action from the CUA model
10
+ * @param {string} deviceId - The device/emulator/simulator ID
11
+ * @param {object} action - The action to execute
12
+ * @param {number} scale - Scale factor for coordinates
13
+ * @param {object} context - Context with addOutput function
14
+ */
8
15
  export async function handleModelAction(deviceId, action, scale = 1.0, context = null) {
9
- const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
10
- try {
11
- const { x, y, x1, y1, x2, y2, text, keys, path } = action;
12
- switch (action.type) {
13
- case "click":
14
- const realX = Math.round(x / scale);
15
- const realY = Math.round(y / scale);
16
- addOutput({ type: 'action', text: `Clicking at (${realX}, ${realY})` });
17
- await adbShell(deviceId, `input tap ${realX} ${realY}`);
18
- break;
19
- case "scroll":
20
- const scrollX = Math.round(action.scroll_x / scale);
21
- const scrollY = Math.round(action.scroll_y / scale);
22
- addOutput({ type: 'action', text: `Scrolling by (${scrollX}, ${scrollY})` });
23
- const startX = 500;
24
- const startY = 500;
25
- const endX = startX + scrollX;
26
- const endY = startY - scrollY; // <--- INVERT Y
27
- await adbShell(deviceId, `input swipe ${startX} ${startY} ${endX} ${endY} 500`);
28
- break;
29
- case "drag":
30
- if (path && path.length >= 2) {
31
- const start = path[0];
32
- const end = path[path.length - 1];
33
- const realStartX = Math.round(start.x / scale);
34
- const realStartY = Math.round(start.y / scale);
35
- const realEndX = Math.round(end.x / scale);
36
- const realEndY = Math.round(end.y / scale);
37
- addOutput({ type: 'action', text: `Dragging from (${realStartX}, ${realStartY}) to (${realEndX}, ${realEndY})` });
38
- await adbShell(deviceId, `input swipe ${realStartX} ${realStartY} ${realEndX} ${realEndY} 500`);
39
- }
40
- else {
41
- addOutput({ type: 'info', text: `Drag action missing valid path: ${JSON.stringify(action)}` });
42
- }
43
- break;
44
- case "type":
45
- addOutput({ type: 'action', text: `Typing text: ${text}` });
46
- const escapedText = text.replace(/(["\\$`])/g, "\\$1").replace(/ /g, "%s");
47
- await adbShell(deviceId, `input text "${escapedText}"`);
48
- break;
49
- case "keypress":
50
- // Map ESC to Android Home button (since ESC doesn't exist on mobile)
51
- const mappedKeys = keys.map(key => {
52
- if (key.toUpperCase() === 'ESC' || key.toUpperCase() === 'ESCAPE') {
53
- return 'KEYCODE_HOME';
54
- }
55
- return key;
56
- });
57
- addOutput({ type: 'action', text: `Pressing key: ${mappedKeys.join(', ')}` });
58
- for (const key of mappedKeys) {
59
- await adbShell(deviceId, `input keyevent ${key}`);
60
- }
61
- break;
62
- case "wait":
63
- addOutput({ type: 'action', text: 'Waiting...' });
64
- await new Promise(res => setTimeout(res, 1000));
65
- break;
66
- default:
67
- addOutput({ type: 'info', text: `Unknown action: ${JSON.stringify(action)}` });
68
- }
69
- }
70
- catch (error) {
71
- // Log full error details to file
72
- logger.error('Action execution error', {
73
- action,
74
- message: error.message,
75
- stack: error.stack
76
- });
77
- // Show user-friendly error message
78
- addOutput({ type: 'error', text: `Error executing action: ${error.message}` });
79
- addOutput({ type: 'info', text: 'Full error details have been logged to the debug log.' });
16
+ const platform = getCurrentPlatform();
17
+ if (!platform) {
18
+ throw new Error("No platform set. Call connectToDevice first.");
80
19
  }
20
+ const backend = getDeviceBackend(platform);
21
+ return backend.handleModelAction(deviceId, action, scale, context);
81
22
  }
@@ -0,0 +1,81 @@
1
+ import { exec } from "child_process";
2
+ import { promisify } from "util";
3
+ import { logger } from "../../utils/logger.js";
4
+ const execAsync = promisify(exec);
5
+ function adbShell(deviceId, command) {
6
+ return execAsync(`adb -s ${deviceId} shell "${command}"`);
7
+ }
8
+ export async function handleModelAction(deviceId, action, scale = 1.0, context = null) {
9
+ const addOutput = context?.addOutput || ((item) => console.log(item.text || item));
10
+ try {
11
+ const { x, y, x1, y1, x2, y2, text, keys, path } = action;
12
+ switch (action.type) {
13
+ case "click":
14
+ const realX = Math.round(x / scale);
15
+ const realY = Math.round(y / scale);
16
+ addOutput({ type: 'action', text: `Clicking at (${realX}, ${realY})` });
17
+ await adbShell(deviceId, `input tap ${realX} ${realY}`);
18
+ break;
19
+ case "scroll":
20
+ const scrollX = Math.round(action.scroll_x / scale);
21
+ const scrollY = Math.round(action.scroll_y / scale);
22
+ addOutput({ type: 'action', text: `Scrolling by (${scrollX}, ${scrollY})` });
23
+ const startX = 500;
24
+ const startY = 500;
25
+ const endX = startX + scrollX;
26
+ const endY = startY - scrollY; // <--- INVERT Y
27
+ await adbShell(deviceId, `input swipe ${startX} ${startY} ${endX} ${endY} 500`);
28
+ break;
29
+ case "drag":
30
+ if (path && path.length >= 2) {
31
+ const start = path[0];
32
+ const end = path[path.length - 1];
33
+ const realStartX = Math.round(start.x / scale);
34
+ const realStartY = Math.round(start.y / scale);
35
+ const realEndX = Math.round(end.x / scale);
36
+ const realEndY = Math.round(end.y / scale);
37
+ addOutput({ type: 'action', text: `Dragging from (${realStartX}, ${realStartY}) to (${realEndX}, ${realEndY})` });
38
+ await adbShell(deviceId, `input swipe ${realStartX} ${realStartY} ${realEndX} ${realEndY} 500`);
39
+ }
40
+ else {
41
+ addOutput({ type: 'info', text: `Drag action missing valid path: ${JSON.stringify(action)}` });
42
+ }
43
+ break;
44
+ case "type":
45
+ addOutput({ type: 'action', text: `Typing text: ${text}` });
46
+ const escapedText = text.replace(/(["\\$`])/g, "\\$1").replace(/ /g, "%s");
47
+ await adbShell(deviceId, `input text "${escapedText}"`);
48
+ break;
49
+ case "keypress":
50
+ // Map ESC to Android Home button (since ESC doesn't exist on mobile)
51
+ const mappedKeys = keys.map(key => {
52
+ if (key.toUpperCase() === 'ESC' || key.toUpperCase() === 'ESCAPE') {
53
+ return 'KEYCODE_HOME';
54
+ }
55
+ return key;
56
+ });
57
+ addOutput({ type: 'action', text: `Pressing key: ${mappedKeys.join(', ')}` });
58
+ for (const key of mappedKeys) {
59
+ await adbShell(deviceId, `input keyevent ${key}`);
60
+ }
61
+ break;
62
+ case "wait":
63
+ addOutput({ type: 'action', text: 'Waiting...' });
64
+ await new Promise(res => setTimeout(res, 1000));
65
+ break;
66
+ default:
67
+ addOutput({ type: 'info', text: `Unknown action: ${JSON.stringify(action)}` });
68
+ }
69
+ }
70
+ catch (error) {
71
+ // Log full error details to file
72
+ logger.error('Action execution error', {
73
+ action,
74
+ message: error.message,
75
+ stack: error.stack
76
+ });
77
+ // Show user-friendly error message
78
+ addOutput({ type: 'error', text: `Error executing action: ${error.message}` });
79
+ addOutput({ type: 'info', text: 'Full error details have been logged to the debug log.' });
80
+ }
81
+ }
@@ -0,0 +1,154 @@
1
+ import { exec, spawn } from "child_process";
2
+ import { once } from "events";
3
+ import { promisify } from "util";
4
+ import sharp from "sharp";
5
+ import { logger } from "../../utils/logger.js";
6
+ const execAsync = promisify(exec);
7
+ function wait(ms) {
8
+ return new Promise(resolve => setTimeout(resolve, ms));
9
+ }
10
+ async function listConnectedDevices() {
11
+ const { stdout } = await execAsync("adb devices");
12
+ return stdout
13
+ .trim()
14
+ .split("\n")
15
+ .slice(1)
16
+ .map(line => line.split("\t")[0])
17
+ .filter(id => id.length > 0);
18
+ }
19
+ async function waitForDeviceConnection(avdName, timeoutMs = 120000) {
20
+ const deadline = Date.now() + timeoutMs;
21
+ while (Date.now() < deadline) {
22
+ const devices = await listConnectedDevices();
23
+ const match = devices.find(id => id.includes(avdName));
24
+ if (match)
25
+ return match;
26
+ await wait(2000);
27
+ }
28
+ return null;
29
+ }
30
+ async function waitForDeviceBoot(deviceId, timeoutMs = 60000) {
31
+ const deadline = Date.now() + timeoutMs;
32
+ while (Date.now() < deadline) {
33
+ try {
34
+ const { stdout } = await execAsync(`adb -s ${deviceId} shell getprop sys.boot_completed`);
35
+ if (stdout.trim() === "1")
36
+ return true;
37
+ }
38
+ catch { }
39
+ await wait(2000);
40
+ }
41
+ return false;
42
+ }
43
+ /**
44
+ * Get list of available AVDs
45
+ */
46
+ async function listAvailableAVDs() {
47
+ try {
48
+ const { stdout } = await execAsync("emulator -list-avds");
49
+ return stdout.trim().split("\n").filter(name => name.length > 0);
50
+ }
51
+ catch {
52
+ return [];
53
+ }
54
+ }
55
+ export async function connectToDevice(avdName) {
56
+ const devices = await listConnectedDevices();
57
+ // If no AVD specified, try to use an already-running emulator or pick the first available
58
+ if (!avdName) {
59
+ // Check for already-running emulator
60
+ for (const id of devices) {
61
+ if (id.startsWith("emulator-")) {
62
+ console.log(`Using already-running emulator: ${id}`);
63
+ return id;
64
+ }
65
+ }
66
+ // No running emulator, pick first available AVD
67
+ const avds = await listAvailableAVDs();
68
+ if (avds.length === 0) {
69
+ console.error("No Android AVDs found. Create one with Android Studio or run:");
70
+ console.error(" avdmanager create avd -n Pixel_8 -k 'system-images;android-35;google_apis;arm64-v8a'");
71
+ process.exit(1);
72
+ }
73
+ avdName = avds[0];
74
+ console.log(`No AVD specified, using first available: ${avdName}`);
75
+ }
76
+ for (const id of devices) {
77
+ if (id.startsWith("emulator-")) {
78
+ try {
79
+ const { stdout } = await execAsync(`adb -s ${id} emu avd name`);
80
+ if (stdout.trim() === avdName) {
81
+ console.log(`Emulator ${avdName} is already running as ${id}`);
82
+ return id;
83
+ }
84
+ }
85
+ catch { }
86
+ }
87
+ }
88
+ console.log(`No emulator with AVD "${avdName}" is running. Launching...`);
89
+ const emulatorProcess = spawn("emulator", ["-avd", avdName], { detached: true, stdio: "ignore" });
90
+ emulatorProcess.unref();
91
+ const deviceId = await waitForDeviceConnection("emulator-", 120000);
92
+ if (!deviceId) {
93
+ console.error(`Emulator ${avdName} did not appear in time.`);
94
+ process.exit(1);
95
+ }
96
+ console.log(`Device ${deviceId} detected. Waiting for boot...`);
97
+ const booted = await waitForDeviceBoot(deviceId);
98
+ if (!booted) {
99
+ console.error(`Emulator ${avdName} did not finish booting.`);
100
+ process.exit(1);
101
+ }
102
+ console.log(`Emulator ${avdName} is fully booted.`);
103
+ return deviceId;
104
+ }
105
+ export async function getDeviceInfo(deviceId) {
106
+ const { stdout } = await execAsync(`adb -s ${deviceId} shell wm size`);
107
+ const match = stdout.match(/Physical size:\s*(\d+)x(\d+)/);
108
+ if (!match) {
109
+ console.error("Could not get device screen size.");
110
+ process.exit(1);
111
+ }
112
+ const [_, width, height] = match.map(Number);
113
+ const targetWidth = 400;
114
+ const scale = width > targetWidth ? targetWidth / width : 1.0;
115
+ const scaledWidth = Math.round(width * scale);
116
+ const scaledHeight = Math.round(height * scale);
117
+ return {
118
+ device_width: width,
119
+ device_height: height,
120
+ scaled_width: scaledWidth,
121
+ scaled_height: scaledHeight,
122
+ scale,
123
+ };
124
+ }
125
+ export async function getScreenshotAsBase64(deviceId, deviceInfo) {
126
+ const adb = spawn("adb", ["-s", deviceId, "exec-out", "screencap", "-p"]);
127
+ const chunks = [];
128
+ const stderrChunks = [];
129
+ adb.stdout.on("data", chunk => chunks.push(chunk));
130
+ adb.stderr.on("data", err => {
131
+ stderrChunks.push(err);
132
+ console.error("ADB stderr:", err.toString());
133
+ });
134
+ const [code] = await once(adb, "close");
135
+ if (code !== 0) {
136
+ const stderrOutput = Buffer.concat(stderrChunks).toString();
137
+ logger.error(`ADB screencap failed with code ${code}`, { stderr: stderrOutput });
138
+ throw new Error(`adb screencap exited with code ${code}`);
139
+ }
140
+ let buffer = Buffer.concat(chunks);
141
+ logger.debug(`Screenshot captured: ${buffer.length} bytes before scaling`);
142
+ if (buffer.length === 0) {
143
+ logger.error('Screenshot buffer is empty!', { deviceId, chunks: chunks.length });
144
+ throw new Error('Screenshot capture returned empty buffer');
145
+ }
146
+ if (deviceInfo.scale < 1.0) {
147
+ buffer = await sharp(buffer)
148
+ .resize({ width: deviceInfo.scaled_width, height: deviceInfo.scaled_height })
149
+ .png()
150
+ .toBuffer();
151
+ logger.debug(`Screenshot scaled: ${buffer.length} bytes after scaling`);
152
+ }
153
+ return buffer.toString("base64");
154
+ }
@@ -1,123 +1,58 @@
1
- import { exec, spawn } from "child_process";
2
- import { once } from "events";
3
- import { promisify } from "util";
4
- import sharp from "sharp";
5
- import { logger } from "../utils/logger.js";
6
- const execAsync = promisify(exec);
7
- function wait(ms) {
8
- return new Promise(resolve => setTimeout(resolve, ms));
9
- }
10
- async function listConnectedDevices() {
11
- const { stdout } = await execAsync("adb devices");
12
- return stdout
13
- .trim()
14
- .split("\n")
15
- .slice(1)
16
- .map(line => line.split("\t")[0])
17
- .filter(id => id.length > 0);
18
- }
19
- async function waitForDeviceConnection(avdName, timeoutMs = 120000) {
20
- const deadline = Date.now() + timeoutMs;
21
- while (Date.now() < deadline) {
22
- const devices = await listConnectedDevices();
23
- const match = devices.find(id => id.includes(avdName));
24
- if (match)
25
- return match;
26
- await wait(2000);
27
- }
28
- return null;
29
- }
30
- async function waitForDeviceBoot(deviceId, timeoutMs = 60000) {
31
- const deadline = Date.now() + timeoutMs;
32
- while (Date.now() < deadline) {
33
- try {
34
- const { stdout } = await execAsync(`adb -s ${deviceId} shell getprop sys.boot_completed`);
35
- if (stdout.trim() === "1")
36
- return true;
37
- }
38
- catch { }
39
- await wait(2000);
40
- }
41
- return false;
42
- }
43
- export async function connectToDevice(avdName) {
44
- const devices = await listConnectedDevices();
45
- for (const id of devices) {
46
- if (id.startsWith("emulator-")) {
47
- try {
48
- const { stdout } = await execAsync(`adb -s ${id} emu avd name`);
49
- if (stdout.trim() === avdName) {
50
- console.log(`Emulator ${avdName} is already running as ${id}`);
51
- return id;
52
- }
53
- }
54
- catch { }
55
- }
56
- }
57
- console.log(`No emulator with AVD "${avdName}" is running. Launching...`);
58
- const emulatorProcess = spawn("emulator", ["-avd", avdName], { detached: true, stdio: "ignore" });
59
- emulatorProcess.unref();
60
- const deviceId = await waitForDeviceConnection("emulator-", 120000);
61
- if (!deviceId) {
62
- console.error(`Emulator ${avdName} did not appear in time.`);
63
- process.exit(1);
64
- }
65
- console.log(`Device ${deviceId} detected. Waiting for boot...`);
66
- const booted = await waitForDeviceBoot(deviceId);
67
- if (!booted) {
68
- console.error(`Emulator ${avdName} did not finish booting.`);
69
- process.exit(1);
70
- }
71
- console.log(`Emulator ${avdName} is fully booted.`);
72
- return deviceId;
1
+ /**
2
+ * Device Connection Module
3
+ *
4
+ * Thin wrapper that delegates to the appropriate platform backend.
5
+ * Maintains backwards compatibility with existing code.
6
+ */
7
+ import { getDeviceBackend, detectPlatform, setCurrentPlatform, getCurrentPlatform } from "./factory.js";
8
+ let currentBackend = null;
9
+ /**
10
+ * Connect to a device (Android emulator or iOS simulator)
11
+ * @param {string} deviceName - AVD name (Android) or Simulator name (iOS)
12
+ * @param {string} platform - Optional platform override ('android' or 'ios')
13
+ * @returns {Promise<string>} Device ID
14
+ */
15
+ export async function connectToDevice(deviceName, platform = null) {
16
+ const detectedPlatform = platform || detectPlatform(deviceName);
17
+ setCurrentPlatform(detectedPlatform);
18
+ currentBackend = getDeviceBackend(detectedPlatform);
19
+ console.log(`Platform: ${detectedPlatform}`);
20
+ return currentBackend.connectToDevice(deviceName);
73
21
  }
22
+ /**
23
+ * Get device info (screen dimensions and scale factor)
24
+ * @param {string} deviceId
25
+ * @returns {Promise<object>}
26
+ */
74
27
  export async function getDeviceInfo(deviceId) {
75
- const { stdout } = await execAsync(`adb -s ${deviceId} shell wm size`);
76
- const match = stdout.match(/Physical size:\s*(\d+)x(\d+)/);
77
- if (!match) {
78
- console.error("Could not get device screen size.");
79
- process.exit(1);
28
+ if (!currentBackend) {
29
+ throw new Error("Not connected to a device. Call connectToDevice first.");
80
30
  }
81
- const [_, width, height] = match.map(Number);
82
- const targetWidth = 400;
83
- const scale = width > targetWidth ? targetWidth / width : 1.0;
84
- const scaledWidth = Math.round(width * scale);
85
- const scaledHeight = Math.round(height * scale);
86
- return {
87
- device_width: width,
88
- device_height: height,
89
- scaled_width: scaledWidth,
90
- scaled_height: scaledHeight,
91
- scale,
92
- };
31
+ return currentBackend.getDeviceInfo(deviceId);
93
32
  }
33
+ /**
34
+ * Get screenshot as base64 string
35
+ * @param {string} deviceId
36
+ * @param {object} deviceInfo
37
+ * @returns {Promise<string>}
38
+ */
94
39
  export async function getScreenshotAsBase64(deviceId, deviceInfo) {
95
- const adb = spawn("adb", ["-s", deviceId, "exec-out", "screencap", "-p"]);
96
- const chunks = [];
97
- const stderrChunks = [];
98
- adb.stdout.on("data", chunk => chunks.push(chunk));
99
- adb.stderr.on("data", err => {
100
- stderrChunks.push(err);
101
- console.error("ADB stderr:", err.toString());
102
- });
103
- const [code] = await once(adb, "close");
104
- if (code !== 0) {
105
- const stderrOutput = Buffer.concat(stderrChunks).toString();
106
- logger.error(`ADB screencap failed with code ${code}`, { stderr: stderrOutput });
107
- throw new Error(`adb screencap exited with code ${code}`);
40
+ if (!currentBackend) {
41
+ throw new Error("Not connected to a device. Call connectToDevice first.");
108
42
  }
109
- let buffer = Buffer.concat(chunks);
110
- logger.debug(`Screenshot captured: ${buffer.length} bytes before scaling`);
111
- if (buffer.length === 0) {
112
- logger.error('Screenshot buffer is empty!', { deviceId, chunks: chunks.length });
113
- throw new Error('Screenshot capture returned empty buffer');
114
- }
115
- if (deviceInfo.scale < 1.0) {
116
- buffer = await sharp(buffer)
117
- .resize({ width: deviceInfo.scaled_width, height: deviceInfo.scaled_height })
118
- .png()
119
- .toBuffer();
120
- logger.debug(`Screenshot scaled: ${buffer.length} bytes after scaling`);
121
- }
122
- return buffer.toString("base64");
43
+ return currentBackend.getScreenshotAsBase64(deviceId, deviceInfo);
44
+ }
45
+ /**
46
+ * Get the current platform
47
+ * @returns {string|null}
48
+ */
49
+ export { getCurrentPlatform } from "./factory.js";
50
+ /**
51
+ * Disconnect from the device
52
+ */
53
+ export async function disconnect() {
54
+ if (currentBackend?.disconnect) {
55
+ await currentBackend.disconnect();
56
+ }
57
+ currentBackend = null;
123
58
  }
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Device Factory
3
+ *
4
+ * Provides platform detection and returns the appropriate device backend.
5
+ */
6
+ import * as androidConnection from "./android/connection.js";
7
+ import * as androidActions from "./android/actions.js";
8
+ import * as iosConnection from "./ios/connection.js";
9
+ import * as iosActions from "./ios/actions.js";
10
+ // Current platform state
11
+ let currentPlatform = null;
12
+ /**
13
+ * Detect platform from device name or environment variable
14
+ * @param {string} deviceName - The device/AVD/simulator name
15
+ * @returns {string} 'ios' or 'android'
16
+ */
17
+ export function detectPlatform(deviceName) {
18
+ // Environment variable takes precedence
19
+ if (process.env.DROID_CUA_PLATFORM === "ios") {
20
+ return "ios";
21
+ }
22
+ if (process.env.DROID_CUA_PLATFORM === "android") {
23
+ return "android";
24
+ }
25
+ // Auto-detect from device name
26
+ if (deviceName) {
27
+ const lower = deviceName.toLowerCase();
28
+ if (lower.includes("iphone") || lower.includes("ipad") || lower.includes("ios")) {
29
+ return "ios";
30
+ }
31
+ }
32
+ // Default to Android
33
+ return "android";
34
+ }
35
+ /**
36
+ * Get the device backend for a platform
37
+ * @param {string} platform - 'ios' or 'android'
38
+ * @returns {object} Backend with connection and action functions
39
+ */
40
+ export function getDeviceBackend(platform) {
41
+ if (platform === "ios") {
42
+ return {
43
+ connectToDevice: iosConnection.connectToDevice,
44
+ getDeviceInfo: iosConnection.getDeviceInfo,
45
+ getScreenshotAsBase64: iosConnection.getScreenshotAsBase64,
46
+ handleModelAction: iosActions.handleModelAction,
47
+ disconnect: iosConnection.disconnect,
48
+ };
49
+ }
50
+ // Default: Android
51
+ return {
52
+ connectToDevice: androidConnection.connectToDevice,
53
+ getDeviceInfo: androidConnection.getDeviceInfo,
54
+ getScreenshotAsBase64: androidConnection.getScreenshotAsBase64,
55
+ handleModelAction: androidActions.handleModelAction,
56
+ disconnect: async () => { }, // Android doesn't need explicit disconnect
57
+ };
58
+ }
59
+ /**
60
+ * Set the current platform
61
+ * @param {string} platform
62
+ */
63
+ export function setCurrentPlatform(platform) {
64
+ currentPlatform = platform;
65
+ }
66
+ /**
67
+ * Get the current platform
68
+ * @returns {string|null}
69
+ */
70
+ export function getCurrentPlatform() {
71
+ return currentPlatform;
72
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Device interface - contract that both Android and iOS backends must implement
3
+ *
4
+ * This file documents the expected API for device backends.
5
+ * Each backend (android/, ios/) implements these functions.
6
+ */
7
+ /**
8
+ * @typedef {Object} DeviceInfo
9
+ * @property {number} device_width - Actual device screen width in pixels
10
+ * @property {number} device_height - Actual device screen height in pixels
11
+ * @property {number} scaled_width - Width as seen by the model (after scaling)
12
+ * @property {number} scaled_height - Height as seen by the model (after scaling)
13
+ * @property {number} scale - Scale factor (scaled_width / device_width)
14
+ */
15
+ /**
16
+ * @typedef {Object} ActionContext
17
+ * @property {Function} addOutput - Function to output messages to the user
18
+ */
19
+ /**
20
+ * Device Backend Interface
21
+ *
22
+ * Required exports for a device backend:
23
+ *
24
+ * connectToDevice(deviceName: string): Promise<string>
25
+ * - Connects to or launches the device/emulator/simulator
26
+ * - Returns a device ID for subsequent operations
27
+ *
28
+ * getDeviceInfo(deviceId: string): Promise<DeviceInfo>
29
+ * - Gets screen dimensions and calculates scale factor
30
+ * - Target width for scaling is 400px
31
+ *
32
+ * getScreenshotAsBase64(deviceId: string, deviceInfo: DeviceInfo): Promise<string>
33
+ * - Captures current screen state
34
+ * - Scales image if needed
35
+ * - Returns base64-encoded PNG
36
+ *
37
+ * handleModelAction(deviceId: string, action: object, scale: number, context: ActionContext): Promise<void>
38
+ * - Executes an action from the CUA model
39
+ * - Supported action types: click, type, scroll, drag, keypress, wait
40
+ */
41
+ export const SUPPORTED_ACTIONS = [
42
+ 'click', // Tap at (x, y) coordinates
43
+ 'type', // Enter text
44
+ 'scroll', // Scroll by (scroll_x, scroll_y)
45
+ 'drag', // Drag from start to end via path
46
+ 'keypress', // Press hardware keys (ESC/ESCAPE maps to home)
47
+ 'wait', // Wait for UI to settle
48
+ 'screenshot' // Capture screen (handled by engine, not backend)
49
+ ];
50
+ export const TARGET_SCALED_WIDTH = 400;