cua-computer 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
computer/computer.py CHANGED
@@ -11,6 +11,7 @@ import json
11
11
  import logging
12
12
  from .telemetry import record_computer_initialization
13
13
  import os
14
+ from . import helpers
14
15
 
15
16
  # Import provider related modules
16
17
  from .providers.base import VMProviderType
@@ -21,6 +22,20 @@ OSType = Literal["macos", "linux", "windows"]
21
22
  class Computer:
22
23
  """Computer is the main class for interacting with the computer."""
23
24
 
25
+ def create_desktop_from_apps(self, apps):
26
+ """
27
+ Create a virtual desktop from a list of app names, returning a DioramaComputer
28
+ that proxies Diorama.Interface but uses diorama_cmds via the computer interface.
29
+
30
+ Args:
31
+ apps (list[str]): List of application names to include in the desktop.
32
+ Returns:
33
+ DioramaComputer: A proxy object with the Diorama interface, but using diorama_cmds.
34
+ """
35
+ assert "app-use" in self.experiments, "App Usage is an experimental feature. Enable it by passing experiments=['app-use'] to Computer()"
36
+ from .diorama_computer import DioramaComputer
37
+ return DioramaComputer(self, apps)
38
+
24
39
  def __init__(
25
40
  self,
26
41
  display: Union[Display, Dict[str, int], str] = "1024x768",
@@ -39,7 +54,8 @@ class Computer:
39
54
  host: str = os.environ.get("PYLUME_HOST", "localhost"),
40
55
  storage: Optional[str] = None,
41
56
  ephemeral: bool = False,
42
- api_key: Optional[str] = None
57
+ api_key: Optional[str] = None,
58
+ experiments: Optional[List[str]] = None
43
59
  ):
44
60
  """Initialize a new Computer instance.
45
61
 
@@ -65,6 +81,8 @@ class Computer:
65
81
  host: Host to use for VM provider connections (e.g. "localhost", "host.docker.internal")
66
82
  storage: Optional path for persistent VM storage (Lumier provider)
67
83
  ephemeral: Whether to use ephemeral storage
84
+ api_key: Optional API key for cloud providers
85
+ experiments: Optional list of experimental features to enable (e.g. ["app-use"])
68
86
  """
69
87
 
70
88
  self.logger = Logger("cua.computer", verbosity)
@@ -80,6 +98,10 @@ class Computer:
80
98
  self.ephemeral = ephemeral
81
99
 
82
100
  self.api_key = api_key
101
+ self.experiments = experiments or []
102
+
103
+ if "app-use" in self.experiments:
104
+ assert self.os_type == "macos", "App use experiment is only supported on macOS"
83
105
 
84
106
  # The default is currently to use non-ephemeral storage
85
107
  if storage and ephemeral and storage != "ephemeral":
@@ -439,6 +461,10 @@ class Computer:
439
461
 
440
462
  # Set the initialization flag and clear the initializing flag
441
463
  self._initialized = True
464
+
465
+ # Set this instance as the default computer for remote decorators
466
+ helpers.set_default_computer(self)
467
+
442
468
  self.logger.info("Computer successfully initialized")
443
469
  except Exception as e:
444
470
  raise
@@ -701,3 +727,177 @@ class Computer:
701
727
  tuple[float, float]: (x, y) coordinates in screenshot space
702
728
  """
703
729
  return await self.interface.to_screenshot_coordinates(x, y)
730
+
731
+
732
+ # Add virtual environment management functions to computer interface
733
+ async def venv_install(self, venv_name: str, requirements: list[str]) -> tuple[str, str]:
734
+ """Install packages in a virtual environment.
735
+
736
+ Args:
737
+ venv_name: Name of the virtual environment
738
+ requirements: List of package requirements to install
739
+
740
+ Returns:
741
+ Tuple of (stdout, stderr) from the installation command
742
+ """
743
+ requirements = requirements or []
744
+
745
+ # Create virtual environment if it doesn't exist
746
+ venv_path = f"~/.venvs/{venv_name}"
747
+ create_cmd = f"mkdir -p ~/.venvs && python3 -m venv {venv_path}"
748
+
749
+ # Check if venv exists, if not create it
750
+ check_cmd = f"test -d {venv_path} || ({create_cmd})"
751
+ _, _ = await self.interface.run_command(check_cmd)
752
+
753
+ # Install packages
754
+ requirements_str = " ".join(requirements)
755
+ install_cmd = f". {venv_path}/bin/activate && pip install {requirements_str}"
756
+ return await self.interface.run_command(install_cmd)
757
+
758
+ async def venv_cmd(self, venv_name: str, command: str) -> tuple[str, str]:
759
+ """Execute a shell command in a virtual environment.
760
+
761
+ Args:
762
+ venv_name: Name of the virtual environment
763
+ command: Shell command to execute in the virtual environment
764
+
765
+ Returns:
766
+ Tuple of (stdout, stderr) from the command execution
767
+ """
768
+ venv_path = f"~/.venvs/{venv_name}"
769
+
770
+ # Check if virtual environment exists
771
+ check_cmd = f"test -d {venv_path}"
772
+ stdout, stderr = await self.interface.run_command(check_cmd)
773
+
774
+ if stderr or "test:" in stdout: # venv doesn't exist
775
+ return "", f"Virtual environment '{venv_name}' does not exist. Create it first using venv_install."
776
+
777
+ # Activate virtual environment and run command
778
+ full_command = f". {venv_path}/bin/activate && {command}"
779
+ return await self.interface.run_command(full_command)
780
+
781
+ async def venv_exec(self, venv_name: str, python_func, *args, **kwargs):
782
+ """Execute Python function in a virtual environment using source code extraction.
783
+
784
+ Args:
785
+ venv_name: Name of the virtual environment
786
+ python_func: A callable function to execute
787
+ *args: Positional arguments to pass to the function
788
+ **kwargs: Keyword arguments to pass to the function
789
+
790
+ Returns:
791
+ The result of the function execution, or raises any exception that occurred
792
+ """
793
+ import base64
794
+ import inspect
795
+ import json
796
+ import textwrap
797
+
798
+ try:
799
+ # Get function source code using inspect.getsource
800
+ source = inspect.getsource(python_func)
801
+ # Remove common leading whitespace (dedent)
802
+ func_source = textwrap.dedent(source).strip()
803
+
804
+ # Remove decorators
805
+ while func_source.lstrip().startswith("@"):
806
+ func_source = func_source.split("\n", 1)[1].strip()
807
+
808
+ # Get function name for execution
809
+ func_name = python_func.__name__
810
+
811
+ # Serialize args and kwargs as JSON (safer than dill for cross-version compatibility)
812
+ args_json = json.dumps(args, default=str)
813
+ kwargs_json = json.dumps(kwargs, default=str)
814
+
815
+ except OSError as e:
816
+ raise Exception(f"Cannot retrieve source code for function {python_func.__name__}: {e}")
817
+ except Exception as e:
818
+ raise Exception(f"Failed to reconstruct function source: {e}")
819
+
820
+ # Create Python code that will define and execute the function
821
+ python_code = f'''
822
+ import json
823
+ import traceback
824
+
825
+ try:
826
+ # Define the function from source
827
+ {textwrap.indent(func_source, " ")}
828
+
829
+ # Deserialize args and kwargs from JSON
830
+ args_json = """{args_json}"""
831
+ kwargs_json = """{kwargs_json}"""
832
+ args = json.loads(args_json)
833
+ kwargs = json.loads(kwargs_json)
834
+
835
+ # Execute the function
836
+ result = {func_name}(*args, **kwargs)
837
+
838
+ # Create success output payload
839
+ output_payload = {{
840
+ "success": True,
841
+ "result": result,
842
+ "error": None
843
+ }}
844
+
845
+ except Exception as e:
846
+ # Create error output payload
847
+ output_payload = {{
848
+ "success": False,
849
+ "result": None,
850
+ "error": {{
851
+ "type": type(e).__name__,
852
+ "message": str(e),
853
+ "traceback": traceback.format_exc()
854
+ }}
855
+ }}
856
+
857
+ # Serialize the output payload as JSON
858
+ import json
859
+ output_json = json.dumps(output_payload, default=str)
860
+
861
+ # Print the JSON output with markers
862
+ print(f"<<<VENV_EXEC_START>>>{{output_json}}<<<VENV_EXEC_END>>>")
863
+ '''
864
+
865
+ # Encode the Python code in base64 to avoid shell escaping issues
866
+ encoded_code = base64.b64encode(python_code.encode('utf-8')).decode('ascii')
867
+
868
+ # Execute the Python code in the virtual environment
869
+ python_command = f"python -c \"import base64; exec(base64.b64decode('{encoded_code}').decode('utf-8'))\""
870
+ stdout, stderr = await self.venv_cmd(venv_name, python_command)
871
+
872
+ # Parse the output to extract the payload
873
+ start_marker = "<<<VENV_EXEC_START>>>"
874
+ end_marker = "<<<VENV_EXEC_END>>>"
875
+
876
+ # Print original stdout
877
+ print(stdout[:stdout.find(start_marker)])
878
+
879
+ if start_marker in stdout and end_marker in stdout:
880
+ start_idx = stdout.find(start_marker) + len(start_marker)
881
+ end_idx = stdout.find(end_marker)
882
+
883
+ if start_idx < end_idx:
884
+ output_json = stdout[start_idx:end_idx]
885
+
886
+ try:
887
+ # Decode and deserialize the output payload from JSON
888
+ output_payload = json.loads(output_json)
889
+ except Exception as e:
890
+ raise Exception(f"Failed to decode output payload: {e}")
891
+
892
+ if output_payload["success"]:
893
+ return output_payload["result"]
894
+ else:
895
+ # Recreate and raise the original exception
896
+ error_info = output_payload["error"]
897
+ error_class = eval(error_info["type"])
898
+ raise error_class(error_info["message"])
899
+ else:
900
+ raise Exception("Invalid output format: markers found but no content between them")
901
+ else:
902
+ # Fallback: return stdout/stderr if no payload markers found
903
+ raise Exception(f"No output payload found. stdout: {stdout}, stderr: {stderr}")
@@ -0,0 +1,104 @@
1
+ import asyncio
2
+ from .interface.models import KeyType, Key
3
+
4
+ class DioramaComputer:
5
+ """
6
+ A Computer-compatible proxy for Diorama that sends commands over the ComputerInterface.
7
+ """
8
+ def __init__(self, computer, apps):
9
+ self.computer = computer
10
+ self.apps = apps
11
+ self.interface = DioramaComputerInterface(computer, apps)
12
+ self._initialized = False
13
+
14
+ async def __aenter__(self):
15
+ self._initialized = True
16
+ return self
17
+
18
+ async def run(self):
19
+ if not self._initialized:
20
+ await self.__aenter__()
21
+ return self
22
+
23
+ class DioramaComputerInterface:
24
+ """
25
+ Diorama Interface proxy that sends diorama_cmds via the Computer's interface.
26
+ """
27
+ def __init__(self, computer, apps):
28
+ self.computer = computer
29
+ self.apps = apps
30
+ self._scene_size = None
31
+
32
+ async def _send_cmd(self, action, arguments=None):
33
+ arguments = arguments or {}
34
+ arguments = {"app_list": self.apps, **arguments}
35
+ # Use the computer's interface (must be initialized)
36
+ iface = getattr(self.computer, "_interface", None)
37
+ if iface is None:
38
+ raise RuntimeError("Computer interface not initialized. Call run() first.")
39
+ result = await iface.diorama_cmd(action, arguments)
40
+ if not result.get("success"):
41
+ raise RuntimeError(f"Diorama command failed: {result.get('error')}\n{result.get('trace')}")
42
+ return result.get("result")
43
+
44
+ async def screenshot(self, as_bytes=True):
45
+ from PIL import Image
46
+ import base64
47
+ result = await self._send_cmd("screenshot")
48
+ # assume result is a b64 string of an image
49
+ img_bytes = base64.b64decode(result)
50
+ import io
51
+ img = Image.open(io.BytesIO(img_bytes))
52
+ self._scene_size = img.size
53
+ return img_bytes if as_bytes else img
54
+
55
+ async def get_screen_size(self):
56
+ if not self._scene_size:
57
+ await self.screenshot(as_bytes=False)
58
+ return {"width": self._scene_size[0], "height": self._scene_size[1]}
59
+
60
+ async def move_cursor(self, x, y):
61
+ await self._send_cmd("move_cursor", {"x": x, "y": y})
62
+
63
+ async def left_click(self, x=None, y=None):
64
+ await self._send_cmd("left_click", {"x": x, "y": y})
65
+
66
+ async def right_click(self, x=None, y=None):
67
+ await self._send_cmd("right_click", {"x": x, "y": y})
68
+
69
+ async def double_click(self, x=None, y=None):
70
+ await self._send_cmd("double_click", {"x": x, "y": y})
71
+
72
+ async def scroll_up(self, clicks=1):
73
+ await self._send_cmd("scroll_up", {"clicks": clicks})
74
+
75
+ async def scroll_down(self, clicks=1):
76
+ await self._send_cmd("scroll_down", {"clicks": clicks})
77
+
78
+ async def drag_to(self, x, y, duration=0.5):
79
+ await self._send_cmd("drag_to", {"x": x, "y": y, "duration": duration})
80
+
81
+ async def get_cursor_position(self):
82
+ return await self._send_cmd("get_cursor_position")
83
+
84
+ async def type_text(self, text):
85
+ await self._send_cmd("type_text", {"text": text})
86
+
87
+ async def press_key(self, key):
88
+ await self._send_cmd("press_key", {"key": key})
89
+
90
+ async def hotkey(self, *keys):
91
+ actual_keys = []
92
+ for key in keys:
93
+ if isinstance(key, Key):
94
+ actual_keys.append(key.value)
95
+ elif isinstance(key, str):
96
+ # Try to convert to enum if it matches a known key
97
+ key_or_enum = Key.from_string(key)
98
+ actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
99
+ else:
100
+ raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
101
+ await self._send_cmd("hotkey", {"keys": actual_keys})
102
+
103
+ async def to_screen_coordinates(self, x, y):
104
+ return await self._send_cmd("to_screen_coordinates", {"x": x, "y": y})
computer/helpers.py ADDED
@@ -0,0 +1,49 @@
1
+ """
2
+ Helper functions and decorators for the Computer module.
3
+ """
4
+ import asyncio
5
+ from functools import wraps
6
+ from typing import Any, Callable, Optional, TypeVar, cast
7
+
8
+ # Global reference to the default computer instance
9
+ _default_computer = None
10
+
11
+ def set_default_computer(computer):
12
+ """
13
+ Set the default computer instance to be used by the remote decorator.
14
+
15
+ Args:
16
+ computer: The computer instance to use as default
17
+ """
18
+ global _default_computer
19
+ _default_computer = computer
20
+
21
+
22
+ def sandboxed(venv_name: str = "default", computer: str = "default", max_retries: int = 3):
23
+ """
24
+ Decorator that wraps a function to be executed remotely via computer.venv_exec
25
+
26
+ Args:
27
+ venv_name: Name of the virtual environment to execute in
28
+ computer: The computer instance to use, or "default" to use the globally set default
29
+ max_retries: Maximum number of retries for the remote execution
30
+ """
31
+ def decorator(func):
32
+ @wraps(func)
33
+ async def wrapper(*args, **kwargs):
34
+ # Determine which computer instance to use
35
+ comp = computer if computer != "default" else _default_computer
36
+
37
+ if comp is None:
38
+ raise RuntimeError("No computer instance available. Either specify a computer instance or call set_default_computer() first.")
39
+
40
+ for i in range(max_retries):
41
+ try:
42
+ return await comp.venv_exec(venv_name, func, *args, **kwargs)
43
+ except Exception as e:
44
+ print(f"Attempt {i+1} failed: {e}")
45
+ await asyncio.sleep(1)
46
+ if i == max_retries - 1:
47
+ raise e
48
+ return wrapper
49
+ return decorator
@@ -177,7 +177,7 @@ class BaseComputerInterface(ABC):
177
177
  async def get_accessibility_tree(self) -> Dict:
178
178
  """Get the accessibility tree of the current screen."""
179
179
  pass
180
-
180
+
181
181
  @abstractmethod
182
182
  async def to_screen_coordinates(self, x: float, y: float) -> tuple[float, float]:
183
183
  """Convert screenshot coordinates to screen coordinates.
@@ -27,6 +27,7 @@ class LinuxComputerInterface(BaseComputerInterface):
27
27
  self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
28
28
  self._log_connection_attempts = True # Flag to control connection attempt logging
29
29
  self._authenticated = False # Track authentication status
30
+ self._command_lock = asyncio.Lock() # Lock to ensure only one command at a time
30
31
 
31
32
  # Set logger name for Linux interface
32
33
  self.logger = Logger("cua.interface.linux", LogLevel.NORMAL)
@@ -193,58 +194,62 @@ class LinuxComputerInterface(BaseComputerInterface):
193
194
  retry_count = 0
194
195
  last_error = None
195
196
 
196
- while retry_count < max_retries:
197
- try:
198
- await self._ensure_connection()
199
- if not self._ws:
200
- raise ConnectionError("WebSocket connection is not established")
201
-
202
- # Handle authentication if needed
203
- if self.api_key and self.vm_name and not self._authenticated:
204
- self.logger.info("Performing authentication handshake...")
205
- auth_message = {
206
- "command": "authenticate",
207
- "params": {
208
- "api_key": self.api_key,
209
- "container_name": self.vm_name
197
+ # Acquire lock to ensure only one command is processed at a time
198
+ async with self._command_lock:
199
+ self.logger.debug(f"Acquired lock for command: {command}")
200
+ while retry_count < max_retries:
201
+ try:
202
+ await self._ensure_connection()
203
+ if not self._ws:
204
+ raise ConnectionError("WebSocket connection is not established")
205
+
206
+ # Handle authentication if needed
207
+ if self.api_key and self.vm_name and not self._authenticated:
208
+ self.logger.info("Performing authentication handshake...")
209
+ auth_message = {
210
+ "command": "authenticate",
211
+ "params": {
212
+ "api_key": self.api_key,
213
+ "container_name": self.vm_name
214
+ }
210
215
  }
211
- }
212
- await self._ws.send(json.dumps(auth_message))
213
-
214
- # Wait for authentication response
215
- auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
216
- auth_result = json.loads(auth_response)
217
-
218
- if not auth_result.get("success"):
219
- error_msg = auth_result.get("error", "Authentication failed")
220
- self.logger.error(f"Authentication failed: {error_msg}")
221
- self._authenticated = False
222
- raise ConnectionError(f"Authentication failed: {error_msg}")
223
-
224
- self.logger.info("Authentication successful")
225
- self._authenticated = True
226
-
227
- message = {"command": command, "params": params or {}}
228
- await self._ws.send(json.dumps(message))
229
- response = await asyncio.wait_for(self._ws.recv(), timeout=30)
230
- return json.loads(response)
231
- except Exception as e:
232
- last_error = e
233
- retry_count += 1
234
- if retry_count < max_retries:
235
- # Only log at debug level for intermediate retries
236
- self.logger.debug(
237
- f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
238
- )
239
- await asyncio.sleep(1)
240
- continue
241
- else:
242
- # Only log at error level for the final failure
243
- self.logger.error(
244
- f"Failed to send command '{command}' after {max_retries} retries"
245
- )
246
- self.logger.debug(f"Command failure details: {e}")
247
- raise last_error if last_error else RuntimeError("Failed to send command")
216
+ await self._ws.send(json.dumps(auth_message))
217
+
218
+ # Wait for authentication response
219
+ auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
220
+ auth_result = json.loads(auth_response)
221
+
222
+ if not auth_result.get("success"):
223
+ error_msg = auth_result.get("error", "Authentication failed")
224
+ self.logger.error(f"Authentication failed: {error_msg}")
225
+ self._authenticated = False
226
+ raise ConnectionError(f"Authentication failed: {error_msg}")
227
+
228
+ self.logger.info("Authentication successful")
229
+ self._authenticated = True
230
+
231
+ message = {"command": command, "params": params or {}}
232
+ await self._ws.send(json.dumps(message))
233
+ response = await asyncio.wait_for(self._ws.recv(), timeout=30)
234
+ self.logger.debug(f"Completed command: {command}")
235
+ return json.loads(response)
236
+ except Exception as e:
237
+ last_error = e
238
+ retry_count += 1
239
+ if retry_count < max_retries:
240
+ # Only log at debug level for intermediate retries
241
+ self.logger.debug(
242
+ f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
243
+ )
244
+ await asyncio.sleep(1)
245
+ continue
246
+ else:
247
+ # Only log at error level for the final failure
248
+ self.logger.error(
249
+ f"Failed to send command '{command}' after {max_retries} retries"
250
+ )
251
+ self.logger.debug(f"Command failure details: {e}")
252
+ raise last_error if last_error else RuntimeError("Failed to send command")
248
253
 
249
254
  async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
250
255
  """Wait for WebSocket connection to become available."""
@@ -26,6 +26,7 @@ class MacOSComputerInterface(BaseComputerInterface):
26
26
  self._reconnect_delay = 1 # Start with 1 second delay
27
27
  self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
28
28
  self._log_connection_attempts = True # Flag to control connection attempt logging
29
+ self._command_lock = asyncio.Lock() # Lock to ensure only one command at a time
29
30
 
30
31
  # Set logger name for macOS interface
31
32
  self.logger = Logger("cua.interface.macos", LogLevel.NORMAL)
@@ -219,35 +220,39 @@ class MacOSComputerInterface(BaseComputerInterface):
219
220
  retry_count = 0
220
221
  last_error = None
221
222
 
222
- while retry_count < max_retries:
223
- try:
224
- await self._ensure_connection()
225
- if not self._ws:
226
- raise ConnectionError("WebSocket connection is not established")
227
-
228
- message = {"command": command, "params": params or {}}
229
- await self._ws.send(json.dumps(message))
230
- response = await asyncio.wait_for(self._ws.recv(), timeout=30)
231
- return json.loads(response)
232
- except Exception as e:
233
- last_error = e
234
- retry_count += 1
235
- if retry_count < max_retries:
236
- # Only log at debug level for intermediate retries
237
- self.logger.debug(
238
- f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
239
- )
240
- await asyncio.sleep(1)
241
- continue
242
- else:
243
- # Only log at error level for the final failure
244
- self.logger.error(
245
- f"Failed to send command '{command}' after {max_retries} retries"
246
- )
247
- self.logger.debug(f"Command failure details: {e}")
248
- raise
223
+ # Acquire lock to ensure only one command is processed at a time
224
+ async with self._command_lock:
225
+ self.logger.debug(f"Acquired lock for command: {command}")
226
+ while retry_count < max_retries:
227
+ try:
228
+ await self._ensure_connection()
229
+ if not self._ws:
230
+ raise ConnectionError("WebSocket connection is not established")
231
+
232
+ message = {"command": command, "params": params or {}}
233
+ await self._ws.send(json.dumps(message))
234
+ response = await asyncio.wait_for(self._ws.recv(), timeout=30)
235
+ self.logger.debug(f"Completed command: {command}")
236
+ return json.loads(response)
237
+ except Exception as e:
238
+ last_error = e
239
+ retry_count += 1
240
+ if retry_count < max_retries:
241
+ # Only log at debug level for intermediate retries
242
+ self.logger.debug(
243
+ f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
244
+ )
245
+ await asyncio.sleep(1)
246
+ continue
247
+ else:
248
+ # Only log at error level for the final failure
249
+ self.logger.error(
250
+ f"Failed to send command '{command}' after {max_retries} retries"
251
+ )
252
+ self.logger.debug(f"Command failure details: {e}")
253
+ raise
249
254
 
250
- raise last_error if last_error else RuntimeError("Failed to send command")
255
+ raise last_error if last_error else RuntimeError("Failed to send command")
251
256
 
252
257
  async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
253
258
  """Wait for WebSocket connection to become available."""
@@ -346,6 +351,10 @@ class MacOSComputerInterface(BaseComputerInterface):
346
351
  asyncio.create_task(self._ws.close())
347
352
  self._ws = None
348
353
 
354
+ async def diorama_cmd(self, action: str, arguments: Optional[dict] = None) -> dict:
355
+ """Send a diorama command to the server (macOS only)."""
356
+ return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}})
357
+
349
358
  # Mouse Actions
350
359
  async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
351
360
  await self._send_command("left_click", {"x": x, "y": y})
@@ -568,7 +577,7 @@ class MacOSComputerInterface(BaseComputerInterface):
568
577
  if not result.get("success", False):
569
578
  raise RuntimeError(result.get("error", "Failed to get accessibility tree"))
570
579
  return result
571
-
580
+
572
581
  async def get_active_window_bounds(self) -> Dict[str, int]:
573
582
  """Get the bounds of the currently active window."""
574
583
  result = await self._send_command("get_active_window_bounds")
computer/ui/gradio/app.py CHANGED
@@ -463,7 +463,7 @@ async def execute(name, action, arguments):
463
463
  elif action == "left_click":
464
464
  if "x" in arguments and "y" in arguments:
465
465
  await computer.interface.move_cursor(arguments["x"], arguments["y"])
466
- await computer.interface.left_click()
466
+ await computer.interface.left_click(arguments["x"], arguments["y"])
467
467
  await asyncio.sleep(0.5)
468
468
  elif action == "right_click":
469
469
  if "x" in arguments and "y" in arguments:
@@ -528,43 +528,75 @@ async def execute(name, action, arguments):
528
528
 
529
529
  return results
530
530
 
531
- async def handle_init_computer(os_choice: str):
532
- """Initialize the computer instance and tools for macOS or Ubuntu"""
531
+ async def handle_init_computer(os_choice: str, app_list=None, provider="lume"):
532
+ """Initialize the computer instance and tools for macOS or Ubuntu
533
+
534
+ Args:
535
+ os_choice: The OS to use ("macOS" or "Ubuntu")
536
+ app_list: Optional list of apps to focus on using the app-use experiment
537
+ provider: The provider to use ("lume" or "self")
538
+ """
533
539
  global computer, tool_call_logs, tools
534
-
540
+
541
+ # Check if we should enable app-use experiment
542
+ use_app_experiment = app_list and len(app_list) > 0
543
+ experiments = ["app-use"] if use_app_experiment else None
544
+
545
+ # Determine if we should use host computer server
546
+ use_host_computer_server = provider == "self"
547
+
535
548
  if os_choice == "Ubuntu":
536
- computer = Computer(
537
- image="ubuntu-noble-vanilla:latest",
538
- os_type="linux",
539
- provider_type=VMProviderType.LUME,
540
- display="1024x768",
541
- memory="8GB",
542
- cpu="4"
543
- )
544
549
  os_type_str = "linux"
545
550
  image_str = "ubuntu-noble-vanilla:latest"
551
+ else:
552
+ os_type_str = "macos"
553
+ image_str = "macos-sequoia-cua:latest"
554
+
555
+ # Create computer instance with appropriate configuration
556
+ if use_host_computer_server:
557
+ computer = Computer(
558
+ os_type=os_type_str,
559
+ use_host_computer_server=True,
560
+ experiments=experiments
561
+ )
546
562
  else:
547
563
  computer = Computer(
548
- image="macos-sequoia-cua:latest",
549
- os_type="macos",
564
+ image=image_str,
565
+ os_type=os_type_str,
550
566
  provider_type=VMProviderType.LUME,
551
567
  display="1024x768",
552
568
  memory="8GB",
553
- cpu="4"
569
+ cpu="4",
570
+ experiments=experiments
554
571
  )
555
- os_type_str = "macos"
556
- image_str = "macos-sequoia-cua:latest"
557
572
 
558
573
  await computer.run()
574
+
575
+ # If app list is provided, create desktop from apps
576
+ if use_app_experiment:
577
+ computer = computer.create_desktop_from_apps(app_list)
559
578
 
560
579
  # Log computer initialization as a tool call
561
- result = await execute("computer", "initialize", {
580
+ init_params = {
562
581
  "os": os_type_str,
563
- "image": image_str,
564
- "display": "1024x768",
565
- "memory": "8GB",
566
- "cpu": "4"
567
- })
582
+ "provider": provider
583
+ }
584
+
585
+ # Add VM-specific parameters if not using host computer server
586
+ if not use_host_computer_server:
587
+ init_params.update({
588
+ "image": image_str,
589
+ "display": "1024x768",
590
+ "memory": "8GB",
591
+ "cpu": "4"
592
+ })
593
+
594
+ # Add app list to the log if provided
595
+ if use_app_experiment:
596
+ init_params["apps"] = app_list
597
+ init_params["experiments"] = ["app-use"]
598
+
599
+ result = await execute("computer", "initialize", init_params)
568
600
 
569
601
  return result["screenshot"], json.dumps(tool_call_logs, indent=2)
570
602
 
@@ -1029,12 +1061,31 @@ def create_gradio_ui():
1029
1061
  setup_status = gr.Textbox(label="Setup Status", value="")
1030
1062
 
1031
1063
  with gr.Group():
1032
- os_choice = gr.Radio(
1033
- label="OS",
1034
- choices=["macOS", "Ubuntu"],
1035
- value="macOS",
1036
- interactive=False # disable until the ubuntu image is ready
1037
- )
1064
+ with gr.Accordion("Computer Configuration", open=False):
1065
+ with gr.Row():
1066
+ os_choice = gr.Radio(
1067
+ label="OS",
1068
+ choices=["macOS", "Ubuntu"],
1069
+ value="macOS",
1070
+ interactive=False # disable until the ubuntu image is ready
1071
+ )
1072
+
1073
+ # Provider selection radio
1074
+ provider_choice = gr.Radio(
1075
+ label="Provider",
1076
+ choices=["lume", "self"],
1077
+ value="lume",
1078
+ info="'lume' uses a VM, 'self' uses the host computer server"
1079
+ )
1080
+
1081
+ # App filtering dropdown for app-use experiment
1082
+ app_filter = gr.Dropdown(
1083
+ label="Filter by apps (App-Use)",
1084
+ multiselect=True,
1085
+ allow_custom_value=True,
1086
+ info="When apps are selected, the computer will focus on those apps using the app-use experiment"
1087
+ )
1088
+
1038
1089
  start_btn = gr.Button("Initialize Computer")
1039
1090
 
1040
1091
  with gr.Group():
@@ -1199,7 +1250,7 @@ def create_gradio_ui():
1199
1250
  )
1200
1251
 
1201
1252
  img.select(handle_click, inputs=[img, click_type], outputs=[img, action_log])
1202
- start_btn.click(handle_init_computer, inputs=[os_choice], outputs=[img, action_log])
1253
+ start_btn.click(handle_init_computer, inputs=[os_choice, app_filter, provider_choice], outputs=[img, action_log])
1203
1254
  wait_btn.click(handle_wait, outputs=[img, action_log])
1204
1255
 
1205
1256
  # DONE and FAIL buttons just do a placeholder action
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-computer
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: Computer-Use Interface (CUI) framework powering Cua
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.11
@@ -1,10 +1,12 @@
1
1
  computer/__init__.py,sha256=QOxNrrJAuLRnsUC2zIFgRfzVSuDSXiYHlEF-9vkhV0o,1241
2
- computer/computer.py,sha256=Rc32XFZdKr7XZKO0zhbEom-REvYYPPlvmvjDbw5gP9k,32218
2
+ computer/computer.py,sha256=zmx_jsQdWy8nsbeb1u9rptJRv5ChR0l0JeyQJcrhiSc,40246
3
+ computer/diorama_computer.py,sha256=jOP7_eXxxU6SMIoE25ni0YXPK0E7p5sZeLKmkYLh6G8,3871
4
+ computer/helpers.py,sha256=0ob9d9ynVGi0JRxhHCgXTuHPHFpa8AVKldn6k0hvxOo,1766
3
5
  computer/interface/__init__.py,sha256=xQvYjq5PMn9ZJOmRR5mWtONTl_0HVd8ACvW6AQnzDdw,262
4
- computer/interface/base.py,sha256=CD9WpDp-6qP-ID5MjhXA8qpYs0XhJ4TPkR917l2FFSo,6021
6
+ computer/interface/base.py,sha256=wmLBiX7rB8cG2Q4fmchdKpjralktzicuYhAh6fDIeqw,6025
5
7
  computer/interface/factory.py,sha256=RjAZAB_jFuS8JierYjLbapRX6RqFE0qE3BiIyP5UDOE,1441
6
- computer/interface/linux.py,sha256=CT1N0QA52TNKBbFG2LXdN6yAGWWJ12_2hTMEI8yNoM4,26865
7
- computer/interface/macos.py,sha256=_8R_IroxbcVmh1WagrjDQOitaT6tVkCHVzGgA_lwTrM,27077
8
+ computer/interface/linux.py,sha256=WA-jpjNHId3blaT__ftd_X7qhKi_50vwy97-jq2yd6g,27412
9
+ computer/interface/macos.py,sha256=B-siwSKNOwNNBVvUQh9TO0nWIaMRvvCUbIQ3oziIF5A,27791
8
10
  computer/interface/models.py,sha256=RZKVUdwKrKUoFqwlx2Dk8Egkmq_AInlIu_d0xg7SZzw,3238
9
11
  computer/logger.py,sha256=UVvnmZGOWVF9TCsixEbeQnDZ3wBPAJ2anW3Zp-MoJ8Y,2896
10
12
  computer/models.py,sha256=iFNM1QfZArD8uf66XJXb2EDIREsfrxqqA5_liLBMfrE,1188
@@ -21,9 +23,9 @@ computer/providers/lumier/provider.py,sha256=CXwAKwJfR9ALFGM5u7UIZ-YrFwPvew_01wT
21
23
  computer/telemetry.py,sha256=FvNFpxgeRuCMdNpREuSL7bOMZy9gSzY4J0rLeNDw0CU,3746
22
24
  computer/ui/__init__.py,sha256=pmo05ek9qiB_x7DPeE6Vf_8RsIOqTD0w1dBLMHfoOnY,45
23
25
  computer/ui/gradio/__init__.py,sha256=5_KimixM48-X74FCsLw7LbSt39MQfUMEL8-M9amK3Cw,117
24
- computer/ui/gradio/app.py,sha256=o31nphBcb6zM5OKPuODTjuOzSJ3lt61kQHpUeMBBs70,65077
26
+ computer/ui/gradio/app.py,sha256=pLMoMpxyKsGhg9wlsiqyKiRujd-lzubs0nGWAtkleL0,67316
25
27
  computer/utils.py,sha256=zY50NXB7r51GNLQ6l7lhG_qv0_ufpQ8n0-SDhCei8m4,2838
26
- cua_computer-0.2.7.dist-info/METADATA,sha256=NM_bxqQbNmvZR4bHfHHu0tXCc9GEF6kNgVEyjR6XYc8,5844
27
- cua_computer-0.2.7.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
28
- cua_computer-0.2.7.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
29
- cua_computer-0.2.7.dist-info/RECORD,,
28
+ cua_computer-0.2.9.dist-info/METADATA,sha256=wbPfNftTjYS0NSsy77kUlDdle8a3c8TBQ07ncdq65zM,5844
29
+ cua_computer-0.2.9.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
30
+ cua_computer-0.2.9.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
31
+ cua_computer-0.2.9.dist-info/RECORD,,