cua-computer 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- computer/computer.py +196 -10
- computer/diorama_computer.py +13 -2
- computer/helpers.py +49 -0
- computer/interface/base.py +67 -1
- computer/interface/linux.py +121 -57
- computer/interface/macos.py +96 -32
- computer/interface/models.py +3 -0
- computer/providers/cloud/provider.py +3 -3
- computer/ui/gradio/app.py +81 -30
- {cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/METADATA +1 -1
- {cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/RECORD +13 -12
- {cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/WHEEL +0 -0
- {cua_computer-0.2.8.dist-info → cua_computer-0.2.10.dist-info}/entry_points.txt +0 -0
computer/computer.py
CHANGED
@@ -11,6 +11,7 @@ import json
|
|
11
11
|
import logging
|
12
12
|
from .telemetry import record_computer_initialization
|
13
13
|
import os
|
14
|
+
from . import helpers
|
14
15
|
|
15
16
|
# Import provider related modules
|
16
17
|
from .providers.base import VMProviderType
|
@@ -180,24 +181,25 @@ class Computer:
|
|
180
181
|
self.logger.debug("Telemetry disabled - skipping initialization tracking")
|
181
182
|
|
182
183
|
async def __aenter__(self):
|
183
|
-
"""
|
184
|
+
"""Start the computer."""
|
185
|
+
await self.run()
|
184
186
|
return self
|
185
187
|
|
186
188
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
187
|
-
"""
|
188
|
-
|
189
|
+
"""Stop the computer."""
|
190
|
+
await self.disconnect()
|
189
191
|
|
190
192
|
def __enter__(self):
|
191
|
-
"""
|
192
|
-
# Run the event loop to call the async
|
193
|
+
"""Start the computer."""
|
194
|
+
# Run the event loop to call the async enter method
|
193
195
|
loop = asyncio.get_event_loop()
|
194
|
-
loop.run_until_complete(self.
|
196
|
+
loop.run_until_complete(self.__aenter__())
|
195
197
|
return self
|
196
198
|
|
197
199
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
198
|
-
"""
|
199
|
-
|
200
|
-
|
200
|
+
"""Stop the computer."""
|
201
|
+
loop = asyncio.get_event_loop()
|
202
|
+
loop.run_until_complete(self.__aexit__(exc_type, exc_val, exc_tb))
|
201
203
|
|
202
204
|
async def run(self) -> Optional[str]:
|
203
205
|
"""Initialize the VM and computer interface."""
|
@@ -460,6 +462,10 @@ class Computer:
|
|
460
462
|
|
461
463
|
# Set the initialization flag and clear the initializing flag
|
462
464
|
self._initialized = True
|
465
|
+
|
466
|
+
# Set this instance as the default computer for remote decorators
|
467
|
+
helpers.set_default_computer(self)
|
468
|
+
|
463
469
|
self.logger.info("Computer successfully initialized")
|
464
470
|
except Exception as e:
|
465
471
|
raise
|
@@ -468,9 +474,14 @@ class Computer:
|
|
468
474
|
duration_ms = (time.time() - start_time) * 1000
|
469
475
|
self.logger.debug(f"Computer initialization took {duration_ms:.2f}ms")
|
470
476
|
return
|
477
|
+
|
478
|
+
async def disconnect(self) -> None:
|
479
|
+
"""Disconnect from the computer's WebSocket interface."""
|
480
|
+
if self._interface:
|
481
|
+
self._interface.close()
|
471
482
|
|
472
483
|
async def stop(self) -> None:
|
473
|
-
"""
|
484
|
+
"""Disconnect from the computer's WebSocket interface and stop the computer."""
|
474
485
|
start_time = time.time()
|
475
486
|
|
476
487
|
try:
|
@@ -491,6 +502,7 @@ class Computer:
|
|
491
502
|
await self.config.vm_provider.__aexit__(None, None, None)
|
492
503
|
self._provider_context = None
|
493
504
|
|
505
|
+
await self.disconnect()
|
494
506
|
self.logger.info("Computer stopped")
|
495
507
|
except Exception as e:
|
496
508
|
self.logger.debug(f"Error during cleanup: {e}") # Log as debug since this might be expected
|
@@ -722,3 +734,177 @@ class Computer:
|
|
722
734
|
tuple[float, float]: (x, y) coordinates in screenshot space
|
723
735
|
"""
|
724
736
|
return await self.interface.to_screenshot_coordinates(x, y)
|
737
|
+
|
738
|
+
|
739
|
+
# Add virtual environment management functions to computer interface
|
740
|
+
async def venv_install(self, venv_name: str, requirements: list[str]) -> tuple[str, str]:
|
741
|
+
"""Install packages in a virtual environment.
|
742
|
+
|
743
|
+
Args:
|
744
|
+
venv_name: Name of the virtual environment
|
745
|
+
requirements: List of package requirements to install
|
746
|
+
|
747
|
+
Returns:
|
748
|
+
Tuple of (stdout, stderr) from the installation command
|
749
|
+
"""
|
750
|
+
requirements = requirements or []
|
751
|
+
|
752
|
+
# Create virtual environment if it doesn't exist
|
753
|
+
venv_path = f"~/.venvs/{venv_name}"
|
754
|
+
create_cmd = f"mkdir -p ~/.venvs && python3 -m venv {venv_path}"
|
755
|
+
|
756
|
+
# Check if venv exists, if not create it
|
757
|
+
check_cmd = f"test -d {venv_path} || ({create_cmd})"
|
758
|
+
_, _ = await self.interface.run_command(check_cmd)
|
759
|
+
|
760
|
+
# Install packages
|
761
|
+
requirements_str = " ".join(requirements)
|
762
|
+
install_cmd = f". {venv_path}/bin/activate && pip install {requirements_str}"
|
763
|
+
return await self.interface.run_command(install_cmd)
|
764
|
+
|
765
|
+
async def venv_cmd(self, venv_name: str, command: str) -> tuple[str, str]:
|
766
|
+
"""Execute a shell command in a virtual environment.
|
767
|
+
|
768
|
+
Args:
|
769
|
+
venv_name: Name of the virtual environment
|
770
|
+
command: Shell command to execute in the virtual environment
|
771
|
+
|
772
|
+
Returns:
|
773
|
+
Tuple of (stdout, stderr) from the command execution
|
774
|
+
"""
|
775
|
+
venv_path = f"~/.venvs/{venv_name}"
|
776
|
+
|
777
|
+
# Check if virtual environment exists
|
778
|
+
check_cmd = f"test -d {venv_path}"
|
779
|
+
stdout, stderr = await self.interface.run_command(check_cmd)
|
780
|
+
|
781
|
+
if stderr or "test:" in stdout: # venv doesn't exist
|
782
|
+
return "", f"Virtual environment '{venv_name}' does not exist. Create it first using venv_install."
|
783
|
+
|
784
|
+
# Activate virtual environment and run command
|
785
|
+
full_command = f". {venv_path}/bin/activate && {command}"
|
786
|
+
return await self.interface.run_command(full_command)
|
787
|
+
|
788
|
+
async def venv_exec(self, venv_name: str, python_func, *args, **kwargs):
|
789
|
+
"""Execute Python function in a virtual environment using source code extraction.
|
790
|
+
|
791
|
+
Args:
|
792
|
+
venv_name: Name of the virtual environment
|
793
|
+
python_func: A callable function to execute
|
794
|
+
*args: Positional arguments to pass to the function
|
795
|
+
**kwargs: Keyword arguments to pass to the function
|
796
|
+
|
797
|
+
Returns:
|
798
|
+
The result of the function execution, or raises any exception that occurred
|
799
|
+
"""
|
800
|
+
import base64
|
801
|
+
import inspect
|
802
|
+
import json
|
803
|
+
import textwrap
|
804
|
+
|
805
|
+
try:
|
806
|
+
# Get function source code using inspect.getsource
|
807
|
+
source = inspect.getsource(python_func)
|
808
|
+
# Remove common leading whitespace (dedent)
|
809
|
+
func_source = textwrap.dedent(source).strip()
|
810
|
+
|
811
|
+
# Remove decorators
|
812
|
+
while func_source.lstrip().startswith("@"):
|
813
|
+
func_source = func_source.split("\n", 1)[1].strip()
|
814
|
+
|
815
|
+
# Get function name for execution
|
816
|
+
func_name = python_func.__name__
|
817
|
+
|
818
|
+
# Serialize args and kwargs as JSON (safer than dill for cross-version compatibility)
|
819
|
+
args_json = json.dumps(args, default=str)
|
820
|
+
kwargs_json = json.dumps(kwargs, default=str)
|
821
|
+
|
822
|
+
except OSError as e:
|
823
|
+
raise Exception(f"Cannot retrieve source code for function {python_func.__name__}: {e}")
|
824
|
+
except Exception as e:
|
825
|
+
raise Exception(f"Failed to reconstruct function source: {e}")
|
826
|
+
|
827
|
+
# Create Python code that will define and execute the function
|
828
|
+
python_code = f'''
|
829
|
+
import json
|
830
|
+
import traceback
|
831
|
+
|
832
|
+
try:
|
833
|
+
# Define the function from source
|
834
|
+
{textwrap.indent(func_source, " ")}
|
835
|
+
|
836
|
+
# Deserialize args and kwargs from JSON
|
837
|
+
args_json = """{args_json}"""
|
838
|
+
kwargs_json = """{kwargs_json}"""
|
839
|
+
args = json.loads(args_json)
|
840
|
+
kwargs = json.loads(kwargs_json)
|
841
|
+
|
842
|
+
# Execute the function
|
843
|
+
result = {func_name}(*args, **kwargs)
|
844
|
+
|
845
|
+
# Create success output payload
|
846
|
+
output_payload = {{
|
847
|
+
"success": True,
|
848
|
+
"result": result,
|
849
|
+
"error": None
|
850
|
+
}}
|
851
|
+
|
852
|
+
except Exception as e:
|
853
|
+
# Create error output payload
|
854
|
+
output_payload = {{
|
855
|
+
"success": False,
|
856
|
+
"result": None,
|
857
|
+
"error": {{
|
858
|
+
"type": type(e).__name__,
|
859
|
+
"message": str(e),
|
860
|
+
"traceback": traceback.format_exc()
|
861
|
+
}}
|
862
|
+
}}
|
863
|
+
|
864
|
+
# Serialize the output payload as JSON
|
865
|
+
import json
|
866
|
+
output_json = json.dumps(output_payload, default=str)
|
867
|
+
|
868
|
+
# Print the JSON output with markers
|
869
|
+
print(f"<<<VENV_EXEC_START>>>{{output_json}}<<<VENV_EXEC_END>>>")
|
870
|
+
'''
|
871
|
+
|
872
|
+
# Encode the Python code in base64 to avoid shell escaping issues
|
873
|
+
encoded_code = base64.b64encode(python_code.encode('utf-8')).decode('ascii')
|
874
|
+
|
875
|
+
# Execute the Python code in the virtual environment
|
876
|
+
python_command = f"python -c \"import base64; exec(base64.b64decode('{encoded_code}').decode('utf-8'))\""
|
877
|
+
stdout, stderr = await self.venv_cmd(venv_name, python_command)
|
878
|
+
|
879
|
+
# Parse the output to extract the payload
|
880
|
+
start_marker = "<<<VENV_EXEC_START>>>"
|
881
|
+
end_marker = "<<<VENV_EXEC_END>>>"
|
882
|
+
|
883
|
+
# Print original stdout
|
884
|
+
print(stdout[:stdout.find(start_marker)])
|
885
|
+
|
886
|
+
if start_marker in stdout and end_marker in stdout:
|
887
|
+
start_idx = stdout.find(start_marker) + len(start_marker)
|
888
|
+
end_idx = stdout.find(end_marker)
|
889
|
+
|
890
|
+
if start_idx < end_idx:
|
891
|
+
output_json = stdout[start_idx:end_idx]
|
892
|
+
|
893
|
+
try:
|
894
|
+
# Decode and deserialize the output payload from JSON
|
895
|
+
output_payload = json.loads(output_json)
|
896
|
+
except Exception as e:
|
897
|
+
raise Exception(f"Failed to decode output payload: {e}")
|
898
|
+
|
899
|
+
if output_payload["success"]:
|
900
|
+
return output_payload["result"]
|
901
|
+
else:
|
902
|
+
# Recreate and raise the original exception
|
903
|
+
error_info = output_payload["error"]
|
904
|
+
error_class = eval(error_info["type"])
|
905
|
+
raise error_class(error_info["message"])
|
906
|
+
else:
|
907
|
+
raise Exception("Invalid output format: markers found but no content between them")
|
908
|
+
else:
|
909
|
+
# Fallback: return stdout/stderr if no payload markers found
|
910
|
+
raise Exception(f"No output payload found. stdout: {stdout}, stderr: {stderr}")
|
computer/diorama_computer.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
+
from .interface.models import KeyType, Key
|
2
3
|
|
3
4
|
class DioramaComputer:
|
4
5
|
"""
|
@@ -37,7 +38,7 @@ class DioramaComputerInterface:
|
|
37
38
|
raise RuntimeError("Computer interface not initialized. Call run() first.")
|
38
39
|
result = await iface.diorama_cmd(action, arguments)
|
39
40
|
if not result.get("success"):
|
40
|
-
raise RuntimeError(f"Diorama command failed: {result.get('error')}")
|
41
|
+
raise RuntimeError(f"Diorama command failed: {result.get('error')}\n{result.get('trace')}")
|
41
42
|
return result.get("result")
|
42
43
|
|
43
44
|
async def screenshot(self, as_bytes=True):
|
@@ -87,7 +88,17 @@ class DioramaComputerInterface:
|
|
87
88
|
await self._send_cmd("press_key", {"key": key})
|
88
89
|
|
89
90
|
async def hotkey(self, *keys):
|
90
|
-
|
91
|
+
actual_keys = []
|
92
|
+
for key in keys:
|
93
|
+
if isinstance(key, Key):
|
94
|
+
actual_keys.append(key.value)
|
95
|
+
elif isinstance(key, str):
|
96
|
+
# Try to convert to enum if it matches a known key
|
97
|
+
key_or_enum = Key.from_string(key)
|
98
|
+
actual_keys.append(key_or_enum.value if isinstance(key_or_enum, Key) else key_or_enum)
|
99
|
+
else:
|
100
|
+
raise ValueError(f"Invalid key type: {type(key)}. Must be Key enum or string.")
|
101
|
+
await self._send_cmd("hotkey", {"keys": actual_keys})
|
91
102
|
|
92
103
|
async def to_screen_coordinates(self, x, y):
|
93
104
|
return await self._send_cmd("to_screen_coordinates", {"x": x, "y": y})
|
computer/helpers.py
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
"""
|
2
|
+
Helper functions and decorators for the Computer module.
|
3
|
+
"""
|
4
|
+
import asyncio
|
5
|
+
from functools import wraps
|
6
|
+
from typing import Any, Callable, Optional, TypeVar, cast
|
7
|
+
|
8
|
+
# Global reference to the default computer instance
|
9
|
+
_default_computer = None
|
10
|
+
|
11
|
+
def set_default_computer(computer):
|
12
|
+
"""
|
13
|
+
Set the default computer instance to be used by the remote decorator.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
computer: The computer instance to use as default
|
17
|
+
"""
|
18
|
+
global _default_computer
|
19
|
+
_default_computer = computer
|
20
|
+
|
21
|
+
|
22
|
+
def sandboxed(venv_name: str = "default", computer: str = "default", max_retries: int = 3):
|
23
|
+
"""
|
24
|
+
Decorator that wraps a function to be executed remotely via computer.venv_exec
|
25
|
+
|
26
|
+
Args:
|
27
|
+
venv_name: Name of the virtual environment to execute in
|
28
|
+
computer: The computer instance to use, or "default" to use the globally set default
|
29
|
+
max_retries: Maximum number of retries for the remote execution
|
30
|
+
"""
|
31
|
+
def decorator(func):
|
32
|
+
@wraps(func)
|
33
|
+
async def wrapper(*args, **kwargs):
|
34
|
+
# Determine which computer instance to use
|
35
|
+
comp = computer if computer != "default" else _default_computer
|
36
|
+
|
37
|
+
if comp is None:
|
38
|
+
raise RuntimeError("No computer instance available. Either specify a computer instance or call set_default_computer() first.")
|
39
|
+
|
40
|
+
for i in range(max_retries):
|
41
|
+
try:
|
42
|
+
return await comp.venv_exec(venv_name, func, *args, **kwargs)
|
43
|
+
except Exception as e:
|
44
|
+
print(f"Attempt {i+1} failed: {e}")
|
45
|
+
await asyncio.sleep(1)
|
46
|
+
if i == max_retries - 1:
|
47
|
+
raise e
|
48
|
+
return wrapper
|
49
|
+
return decorator
|
computer/interface/base.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
from abc import ABC, abstractmethod
|
4
4
|
from typing import Optional, Dict, Any, Tuple, List
|
5
5
|
from ..logger import Logger, LogLevel
|
6
|
+
from .models import MouseButton
|
6
7
|
|
7
8
|
|
8
9
|
class BaseComputerInterface(ABC):
|
@@ -51,6 +52,16 @@ class BaseComputerInterface(ABC):
|
|
51
52
|
self.close()
|
52
53
|
|
53
54
|
# Mouse Actions
|
55
|
+
@abstractmethod
|
56
|
+
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
|
57
|
+
"""Press and hold a mouse button."""
|
58
|
+
pass
|
59
|
+
|
60
|
+
@abstractmethod
|
61
|
+
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
|
62
|
+
"""Release a mouse button."""
|
63
|
+
pass
|
64
|
+
|
54
65
|
@abstractmethod
|
55
66
|
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
56
67
|
"""Perform a left click."""
|
@@ -95,6 +106,16 @@ class BaseComputerInterface(ABC):
|
|
95
106
|
pass
|
96
107
|
|
97
108
|
# Keyboard Actions
|
109
|
+
@abstractmethod
|
110
|
+
async def key_down(self, key: str) -> None:
|
111
|
+
"""Press and hold a key."""
|
112
|
+
pass
|
113
|
+
|
114
|
+
@abstractmethod
|
115
|
+
async def key_up(self, key: str) -> None:
|
116
|
+
"""Release a key."""
|
117
|
+
pass
|
118
|
+
|
98
119
|
@abstractmethod
|
99
120
|
async def type_text(self, text: str) -> None:
|
100
121
|
"""Type the specified text."""
|
@@ -111,6 +132,11 @@ class BaseComputerInterface(ABC):
|
|
111
132
|
pass
|
112
133
|
|
113
134
|
# Scrolling Actions
|
135
|
+
@abstractmethod
|
136
|
+
async def scroll(self, x: int, y: int) -> None:
|
137
|
+
"""Scroll the mouse wheel."""
|
138
|
+
pass
|
139
|
+
|
114
140
|
@abstractmethod
|
115
141
|
async def scroll_down(self, clicks: int = 1) -> None:
|
116
142
|
"""Scroll down."""
|
@@ -166,7 +192,47 @@ class BaseComputerInterface(ABC):
|
|
166
192
|
async def directory_exists(self, path: str) -> bool:
|
167
193
|
"""Check if directory exists."""
|
168
194
|
pass
|
169
|
-
|
195
|
+
|
196
|
+
@abstractmethod
|
197
|
+
async def list_dir(self, path: str) -> List[str]:
|
198
|
+
"""List directory contents."""
|
199
|
+
pass
|
200
|
+
|
201
|
+
@abstractmethod
|
202
|
+
async def read_text(self, path: str) -> str:
|
203
|
+
"""Read file text contents."""
|
204
|
+
pass
|
205
|
+
|
206
|
+
@abstractmethod
|
207
|
+
async def write_text(self, path: str, content: str) -> None:
|
208
|
+
"""Write file text contents."""
|
209
|
+
pass
|
210
|
+
|
211
|
+
@abstractmethod
|
212
|
+
async def read_bytes(self, path: str) -> bytes:
|
213
|
+
"""Read file binary contents."""
|
214
|
+
pass
|
215
|
+
|
216
|
+
@abstractmethod
|
217
|
+
async def write_bytes(self, path: str, content: bytes) -> None:
|
218
|
+
"""Write file binary contents."""
|
219
|
+
pass
|
220
|
+
|
221
|
+
@abstractmethod
|
222
|
+
async def delete_file(self, path: str) -> None:
|
223
|
+
"""Delete file."""
|
224
|
+
pass
|
225
|
+
|
226
|
+
@abstractmethod
|
227
|
+
async def create_dir(self, path: str) -> None:
|
228
|
+
"""Create directory."""
|
229
|
+
pass
|
230
|
+
|
231
|
+
@abstractmethod
|
232
|
+
async def delete_dir(self, path: str) -> None:
|
233
|
+
"""Delete directory."""
|
234
|
+
pass
|
235
|
+
|
170
236
|
@abstractmethod
|
171
237
|
async def run_command(self, command: str) -> Tuple[str, str]:
|
172
238
|
"""Run shell command."""
|
computer/interface/linux.py
CHANGED
@@ -8,8 +8,8 @@ import websockets
|
|
8
8
|
|
9
9
|
from ..logger import Logger, LogLevel
|
10
10
|
from .base import BaseComputerInterface
|
11
|
-
from ..utils import decode_base64_image, bytes_to_image, draw_box, resize_image
|
12
|
-
from .models import Key, KeyType
|
11
|
+
from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image
|
12
|
+
from .models import Key, KeyType, MouseButton
|
13
13
|
|
14
14
|
|
15
15
|
class LinuxComputerInterface(BaseComputerInterface):
|
@@ -22,11 +22,12 @@ class LinuxComputerInterface(BaseComputerInterface):
|
|
22
22
|
self._closed = False
|
23
23
|
self._last_ping = 0
|
24
24
|
self._ping_interval = 5 # Send ping every 5 seconds
|
25
|
-
self._ping_timeout =
|
25
|
+
self._ping_timeout = 120 # Wait 120 seconds for pong response
|
26
26
|
self._reconnect_delay = 1 # Start with 1 second delay
|
27
27
|
self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
|
28
28
|
self._log_connection_attempts = True # Flag to control connection attempt logging
|
29
29
|
self._authenticated = False # Track authentication status
|
30
|
+
self._command_lock = asyncio.Lock() # Lock to ensure only one command at a time
|
30
31
|
|
31
32
|
# Set logger name for Linux interface
|
32
33
|
self.logger = Logger("cua.interface.linux", LogLevel.NORMAL)
|
@@ -86,7 +87,7 @@ class LinuxComputerInterface(BaseComputerInterface):
|
|
86
87
|
close_timeout=5,
|
87
88
|
compression=None, # Disable compression to reduce overhead
|
88
89
|
),
|
89
|
-
timeout=
|
90
|
+
timeout=120,
|
90
91
|
)
|
91
92
|
self.logger.info("WebSocket connection established")
|
92
93
|
|
@@ -193,58 +194,62 @@ class LinuxComputerInterface(BaseComputerInterface):
|
|
193
194
|
retry_count = 0
|
194
195
|
last_error = None
|
195
196
|
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
"
|
208
|
-
|
209
|
-
"
|
197
|
+
# Acquire lock to ensure only one command is processed at a time
|
198
|
+
async with self._command_lock:
|
199
|
+
self.logger.debug(f"Acquired lock for command: {command}")
|
200
|
+
while retry_count < max_retries:
|
201
|
+
try:
|
202
|
+
await self._ensure_connection()
|
203
|
+
if not self._ws:
|
204
|
+
raise ConnectionError("WebSocket connection is not established")
|
205
|
+
|
206
|
+
# Handle authentication if needed
|
207
|
+
if self.api_key and self.vm_name and not self._authenticated:
|
208
|
+
self.logger.info("Performing authentication handshake...")
|
209
|
+
auth_message = {
|
210
|
+
"command": "authenticate",
|
211
|
+
"params": {
|
212
|
+
"api_key": self.api_key,
|
213
|
+
"container_name": self.vm_name
|
214
|
+
}
|
210
215
|
}
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
216
|
+
await self._ws.send(json.dumps(auth_message))
|
217
|
+
|
218
|
+
# Wait for authentication response
|
219
|
+
auth_response = await asyncio.wait_for(self._ws.recv(), timeout=10)
|
220
|
+
auth_result = json.loads(auth_response)
|
221
|
+
|
222
|
+
if not auth_result.get("success"):
|
223
|
+
error_msg = auth_result.get("error", "Authentication failed")
|
224
|
+
self.logger.error(f"Authentication failed: {error_msg}")
|
225
|
+
self._authenticated = False
|
226
|
+
raise ConnectionError(f"Authentication failed: {error_msg}")
|
227
|
+
|
228
|
+
self.logger.info("Authentication successful")
|
229
|
+
self._authenticated = True
|
230
|
+
|
231
|
+
message = {"command": command, "params": params or {}}
|
232
|
+
await self._ws.send(json.dumps(message))
|
233
|
+
response = await asyncio.wait_for(self._ws.recv(), timeout=30)
|
234
|
+
self.logger.debug(f"Completed command: {command}")
|
235
|
+
return json.loads(response)
|
236
|
+
except Exception as e:
|
237
|
+
last_error = e
|
238
|
+
retry_count += 1
|
239
|
+
if retry_count < max_retries:
|
240
|
+
# Only log at debug level for intermediate retries
|
241
|
+
self.logger.debug(
|
242
|
+
f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
|
243
|
+
)
|
244
|
+
await asyncio.sleep(1)
|
245
|
+
continue
|
246
|
+
else:
|
247
|
+
# Only log at error level for the final failure
|
248
|
+
self.logger.error(
|
249
|
+
f"Failed to send command '{command}' after {max_retries} retries"
|
250
|
+
)
|
251
|
+
self.logger.debug(f"Command failure details: {e}")
|
252
|
+
raise last_error if last_error else RuntimeError("Failed to send command")
|
248
253
|
|
249
254
|
async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
|
250
255
|
"""Wait for WebSocket connection to become available."""
|
@@ -344,6 +349,12 @@ class LinuxComputerInterface(BaseComputerInterface):
|
|
344
349
|
self._ws = None
|
345
350
|
|
346
351
|
# Mouse Actions
|
352
|
+
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
|
353
|
+
await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
|
354
|
+
|
355
|
+
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: str = "left") -> None:
|
356
|
+
await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
|
357
|
+
|
347
358
|
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
348
359
|
await self._send_command("left_click", {"x": x, "y": y})
|
349
360
|
|
@@ -356,17 +367,23 @@ class LinuxComputerInterface(BaseComputerInterface):
|
|
356
367
|
async def move_cursor(self, x: int, y: int) -> None:
|
357
368
|
await self._send_command("move_cursor", {"x": x, "y": y})
|
358
369
|
|
359
|
-
async def drag_to(self, x: int, y: int, button:
|
370
|
+
async def drag_to(self, x: int, y: int, button: "MouseButton" = "left", duration: float = 0.5) -> None:
|
360
371
|
await self._send_command(
|
361
372
|
"drag_to", {"x": x, "y": y, "button": button, "duration": duration}
|
362
373
|
)
|
363
374
|
|
364
|
-
async def drag(self, path: List[Tuple[int, int]], button:
|
375
|
+
async def drag(self, path: List[Tuple[int, int]], button: "MouseButton" = "left", duration: float = 0.5) -> None:
|
365
376
|
await self._send_command(
|
366
377
|
"drag", {"path": path, "button": button, "duration": duration}
|
367
378
|
)
|
368
379
|
|
369
380
|
# Keyboard Actions
|
381
|
+
async def key_down(self, key: "KeyType") -> None:
|
382
|
+
await self._send_command("key_down", {"key": key})
|
383
|
+
|
384
|
+
async def key_up(self, key: "KeyType") -> None:
|
385
|
+
await self._send_command("key_up", {"key": key})
|
386
|
+
|
370
387
|
async def type_text(self, text: str) -> None:
|
371
388
|
# Temporary fix for https://github.com/trycua/cua/issues/165
|
372
389
|
# Check if text contains Unicode characters
|
@@ -459,6 +476,9 @@ class LinuxComputerInterface(BaseComputerInterface):
|
|
459
476
|
await self._send_command("hotkey", {"keys": actual_keys})
|
460
477
|
|
461
478
|
# Scrolling Actions
|
479
|
+
async def scroll(self, x: int, y: int) -> None:
|
480
|
+
await self._send_command("scroll", {"x": x, "y": y})
|
481
|
+
|
462
482
|
async def scroll_down(self, clicks: int = 1) -> None:
|
463
483
|
await self._send_command("scroll_down", {"clicks": clicks})
|
464
484
|
|
@@ -552,6 +572,50 @@ class LinuxComputerInterface(BaseComputerInterface):
|
|
552
572
|
result = await self._send_command("directory_exists", {"path": path})
|
553
573
|
return result.get("exists", False)
|
554
574
|
|
575
|
+
async def list_dir(self, path: str) -> list[str]:
|
576
|
+
result = await self._send_command("list_dir", {"path": path})
|
577
|
+
if not result.get("success", False):
|
578
|
+
raise RuntimeError(result.get("error", "Failed to list directory"))
|
579
|
+
return result.get("files", [])
|
580
|
+
|
581
|
+
async def read_text(self, path: str) -> str:
|
582
|
+
result = await self._send_command("read_text", {"path": path})
|
583
|
+
if not result.get("success", False):
|
584
|
+
raise RuntimeError(result.get("error", "Failed to read file"))
|
585
|
+
return result.get("content", "")
|
586
|
+
|
587
|
+
async def write_text(self, path: str, content: str) -> None:
|
588
|
+
result = await self._send_command("write_text", {"path": path, "content": content})
|
589
|
+
if not result.get("success", False):
|
590
|
+
raise RuntimeError(result.get("error", "Failed to write file"))
|
591
|
+
|
592
|
+
async def read_bytes(self, path: str) -> bytes:
|
593
|
+
result = await self._send_command("read_bytes", {"path": path})
|
594
|
+
if not result.get("success", False):
|
595
|
+
raise RuntimeError(result.get("error", "Failed to read file"))
|
596
|
+
content_b64 = result.get("content_b64", "")
|
597
|
+
return decode_base64_image(content_b64)
|
598
|
+
|
599
|
+
async def write_bytes(self, path: str, content: bytes) -> None:
|
600
|
+
result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
|
601
|
+
if not result.get("success", False):
|
602
|
+
raise RuntimeError(result.get("error", "Failed to write file"))
|
603
|
+
|
604
|
+
async def delete_file(self, path: str) -> None:
|
605
|
+
result = await self._send_command("delete_file", {"path": path})
|
606
|
+
if not result.get("success", False):
|
607
|
+
raise RuntimeError(result.get("error", "Failed to delete file"))
|
608
|
+
|
609
|
+
async def create_dir(self, path: str) -> None:
|
610
|
+
result = await self._send_command("create_dir", {"path": path})
|
611
|
+
if not result.get("success", False):
|
612
|
+
raise RuntimeError(result.get("error", "Failed to create directory"))
|
613
|
+
|
614
|
+
async def delete_dir(self, path: str) -> None:
|
615
|
+
result = await self._send_command("delete_dir", {"path": path})
|
616
|
+
if not result.get("success", False):
|
617
|
+
raise RuntimeError(result.get("error", "Failed to delete directory"))
|
618
|
+
|
555
619
|
async def run_command(self, command: str) -> Tuple[str, str]:
|
556
620
|
result = await self._send_command("run_command", {"command": command})
|
557
621
|
if not result.get("success", False):
|
computer/interface/macos.py
CHANGED
@@ -8,8 +8,8 @@ import websockets
|
|
8
8
|
|
9
9
|
from ..logger import Logger, LogLevel
|
10
10
|
from .base import BaseComputerInterface
|
11
|
-
from ..utils import decode_base64_image, bytes_to_image, draw_box, resize_image
|
12
|
-
from .models import Key, KeyType
|
11
|
+
from ..utils import decode_base64_image, encode_base64_image, bytes_to_image, draw_box, resize_image
|
12
|
+
from .models import Key, KeyType, MouseButton
|
13
13
|
|
14
14
|
|
15
15
|
class MacOSComputerInterface(BaseComputerInterface):
|
@@ -22,10 +22,11 @@ class MacOSComputerInterface(BaseComputerInterface):
|
|
22
22
|
self._closed = False
|
23
23
|
self._last_ping = 0
|
24
24
|
self._ping_interval = 5 # Send ping every 5 seconds
|
25
|
-
self._ping_timeout =
|
25
|
+
self._ping_timeout = 120 # Wait 120 seconds for pong response
|
26
26
|
self._reconnect_delay = 1 # Start with 1 second delay
|
27
27
|
self._max_reconnect_delay = 30 # Maximum delay between reconnection attempts
|
28
28
|
self._log_connection_attempts = True # Flag to control connection attempt logging
|
29
|
+
self._command_lock = asyncio.Lock() # Lock to ensure only one command at a time
|
29
30
|
|
30
31
|
# Set logger name for macOS interface
|
31
32
|
self.logger = Logger("cua.interface.macos", LogLevel.NORMAL)
|
@@ -85,7 +86,7 @@ class MacOSComputerInterface(BaseComputerInterface):
|
|
85
86
|
close_timeout=5,
|
86
87
|
compression=None, # Disable compression to reduce overhead
|
87
88
|
),
|
88
|
-
timeout=
|
89
|
+
timeout=120,
|
89
90
|
)
|
90
91
|
self.logger.info("WebSocket connection established")
|
91
92
|
|
@@ -219,35 +220,39 @@ class MacOSComputerInterface(BaseComputerInterface):
|
|
219
220
|
retry_count = 0
|
220
221
|
last_error = None
|
221
222
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
223
|
+
# Acquire lock to ensure only one command is processed at a time
|
224
|
+
async with self._command_lock:
|
225
|
+
self.logger.debug(f"Acquired lock for command: {command}")
|
226
|
+
while retry_count < max_retries:
|
227
|
+
try:
|
228
|
+
await self._ensure_connection()
|
229
|
+
if not self._ws:
|
230
|
+
raise ConnectionError("WebSocket connection is not established")
|
231
|
+
|
232
|
+
message = {"command": command, "params": params or {}}
|
233
|
+
await self._ws.send(json.dumps(message))
|
234
|
+
response = await asyncio.wait_for(self._ws.recv(), timeout=120)
|
235
|
+
self.logger.debug(f"Completed command: {command}")
|
236
|
+
return json.loads(response)
|
237
|
+
except Exception as e:
|
238
|
+
last_error = e
|
239
|
+
retry_count += 1
|
240
|
+
if retry_count < max_retries:
|
241
|
+
# Only log at debug level for intermediate retries
|
242
|
+
self.logger.debug(
|
243
|
+
f"Command '{command}' failed (attempt {retry_count}/{max_retries}): {e}"
|
244
|
+
)
|
245
|
+
await asyncio.sleep(1)
|
246
|
+
continue
|
247
|
+
else:
|
248
|
+
# Only log at error level for the final failure
|
249
|
+
self.logger.error(
|
250
|
+
f"Failed to send command '{command}' after {max_retries} retries"
|
251
|
+
)
|
252
|
+
self.logger.debug(f"Command failure details: {e}")
|
253
|
+
raise
|
249
254
|
|
250
|
-
|
255
|
+
raise last_error if last_error else RuntimeError("Failed to send command")
|
251
256
|
|
252
257
|
async def wait_for_ready(self, timeout: int = 60, interval: float = 1.0):
|
253
258
|
"""Wait for WebSocket connection to become available."""
|
@@ -351,6 +356,12 @@ class MacOSComputerInterface(BaseComputerInterface):
|
|
351
356
|
return await self._send_command("diorama_cmd", {"action": action, "arguments": arguments or {}})
|
352
357
|
|
353
358
|
# Mouse Actions
|
359
|
+
async def mouse_down(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
|
360
|
+
await self._send_command("mouse_down", {"x": x, "y": y, "button": button})
|
361
|
+
|
362
|
+
async def mouse_up(self, x: Optional[int] = None, y: Optional[int] = None, button: "MouseButton" = "left") -> None:
|
363
|
+
await self._send_command("mouse_up", {"x": x, "y": y, "button": button})
|
364
|
+
|
354
365
|
async def left_click(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
355
366
|
await self._send_command("left_click", {"x": x, "y": y})
|
356
367
|
|
@@ -374,6 +385,12 @@ class MacOSComputerInterface(BaseComputerInterface):
|
|
374
385
|
)
|
375
386
|
|
376
387
|
# Keyboard Actions
|
388
|
+
async def key_down(self, key: "KeyType") -> None:
|
389
|
+
await self._send_command("key_down", {"key": key})
|
390
|
+
|
391
|
+
async def key_up(self, key: "KeyType") -> None:
|
392
|
+
await self._send_command("key_up", {"key": key})
|
393
|
+
|
377
394
|
async def type_text(self, text: str) -> None:
|
378
395
|
# Temporary fix for https://github.com/trycua/cua/issues/165
|
379
396
|
# Check if text contains Unicode characters
|
@@ -466,6 +483,9 @@ class MacOSComputerInterface(BaseComputerInterface):
|
|
466
483
|
await self._send_command("hotkey", {"keys": actual_keys})
|
467
484
|
|
468
485
|
# Scrolling Actions
|
486
|
+
async def scroll(self, x: int, y: int) -> None:
|
487
|
+
await self._send_command("scroll", {"x": x, "y": y})
|
488
|
+
|
469
489
|
async def scroll_down(self, clicks: int = 1) -> None:
|
470
490
|
await self._send_command("scroll_down", {"clicks": clicks})
|
471
491
|
|
@@ -559,6 +579,50 @@ class MacOSComputerInterface(BaseComputerInterface):
|
|
559
579
|
result = await self._send_command("directory_exists", {"path": path})
|
560
580
|
return result.get("exists", False)
|
561
581
|
|
582
|
+
async def list_dir(self, path: str) -> list[str]:
|
583
|
+
result = await self._send_command("list_dir", {"path": path})
|
584
|
+
if not result.get("success", False):
|
585
|
+
raise RuntimeError(result.get("error", "Failed to list directory"))
|
586
|
+
return result.get("files", [])
|
587
|
+
|
588
|
+
async def read_text(self, path: str) -> str:
|
589
|
+
result = await self._send_command("read_text", {"path": path})
|
590
|
+
if not result.get("success", False):
|
591
|
+
raise RuntimeError(result.get("error", "Failed to read file"))
|
592
|
+
return result.get("content", "")
|
593
|
+
|
594
|
+
async def write_text(self, path: str, content: str) -> None:
|
595
|
+
result = await self._send_command("write_text", {"path": path, "content": content})
|
596
|
+
if not result.get("success", False):
|
597
|
+
raise RuntimeError(result.get("error", "Failed to write file"))
|
598
|
+
|
599
|
+
async def read_bytes(self, path: str) -> bytes:
|
600
|
+
result = await self._send_command("read_bytes", {"path": path})
|
601
|
+
if not result.get("success", False):
|
602
|
+
raise RuntimeError(result.get("error", "Failed to read file"))
|
603
|
+
content_b64 = result.get("content_b64", "")
|
604
|
+
return decode_base64_image(content_b64)
|
605
|
+
|
606
|
+
async def write_bytes(self, path: str, content: bytes) -> None:
|
607
|
+
result = await self._send_command("write_bytes", {"path": path, "content_b64": encode_base64_image(content)})
|
608
|
+
if not result.get("success", False):
|
609
|
+
raise RuntimeError(result.get("error", "Failed to write file"))
|
610
|
+
|
611
|
+
async def delete_file(self, path: str) -> None:
|
612
|
+
result = await self._send_command("delete_file", {"path": path})
|
613
|
+
if not result.get("success", False):
|
614
|
+
raise RuntimeError(result.get("error", "Failed to delete file"))
|
615
|
+
|
616
|
+
async def create_dir(self, path: str) -> None:
|
617
|
+
result = await self._send_command("create_dir", {"path": path})
|
618
|
+
if not result.get("success", False):
|
619
|
+
raise RuntimeError(result.get("error", "Failed to create directory"))
|
620
|
+
|
621
|
+
async def delete_dir(self, path: str) -> None:
|
622
|
+
result = await self._send_command("delete_dir", {"path": path})
|
623
|
+
if not result.get("success", False):
|
624
|
+
raise RuntimeError(result.get("error", "Failed to delete directory"))
|
625
|
+
|
562
626
|
async def run_command(self, command: str) -> Tuple[str, str]:
|
563
627
|
result = await self._send_command("run_command", {"command": command})
|
564
628
|
if not result.get("success", False):
|
computer/interface/models.py
CHANGED
@@ -106,6 +106,9 @@ class Key(Enum):
|
|
106
106
|
# Combined key type
|
107
107
|
KeyType = Union[Key, NavigationKey, SpecialKey, ModifierKey, FunctionKey, str]
|
108
108
|
|
109
|
+
# Key type for mouse actions
|
110
|
+
MouseButton = Literal['left', 'right', 'middle']
|
111
|
+
|
109
112
|
class AccessibilityWindow(TypedDict):
|
110
113
|
"""Information about a window in the accessibility tree."""
|
111
114
|
app_name: str
|
@@ -52,11 +52,11 @@ class CloudProvider(BaseVMProvider):
|
|
52
52
|
return []
|
53
53
|
|
54
54
|
async def run_vm(self, image: str, name: str, run_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
|
55
|
-
logger.warning("CloudProvider.run_vm is not implemented")
|
56
|
-
return {"name": name, "status": "unavailable", "message": "CloudProvider is not implemented"}
|
55
|
+
# logger.warning("CloudProvider.run_vm is not implemented")
|
56
|
+
return {"name": name, "status": "unavailable", "message": "CloudProvider.run_vm is not implemented"}
|
57
57
|
|
58
58
|
async def stop_vm(self, name: str, storage: Optional[str] = None) -> Dict[str, Any]:
|
59
|
-
logger.warning("CloudProvider.stop_vm is not implemented")
|
59
|
+
logger.warning("CloudProvider.stop_vm is not implemented. To clean up resources, please use Computer.disconnect()")
|
60
60
|
return {"name": name, "status": "stopped", "message": "CloudProvider is not implemented"}
|
61
61
|
|
62
62
|
async def update_vm(self, name: str, update_opts: Dict[str, Any], storage: Optional[str] = None) -> Dict[str, Any]:
|
computer/ui/gradio/app.py
CHANGED
@@ -463,7 +463,7 @@ async def execute(name, action, arguments):
|
|
463
463
|
elif action == "left_click":
|
464
464
|
if "x" in arguments and "y" in arguments:
|
465
465
|
await computer.interface.move_cursor(arguments["x"], arguments["y"])
|
466
|
-
await computer.interface.left_click()
|
466
|
+
await computer.interface.left_click(arguments["x"], arguments["y"])
|
467
467
|
await asyncio.sleep(0.5)
|
468
468
|
elif action == "right_click":
|
469
469
|
if "x" in arguments and "y" in arguments:
|
@@ -528,43 +528,75 @@ async def execute(name, action, arguments):
|
|
528
528
|
|
529
529
|
return results
|
530
530
|
|
531
|
-
async def handle_init_computer(os_choice: str):
|
532
|
-
"""Initialize the computer instance and tools for macOS or Ubuntu
|
531
|
+
async def handle_init_computer(os_choice: str, app_list=None, provider="lume"):
|
532
|
+
"""Initialize the computer instance and tools for macOS or Ubuntu
|
533
|
+
|
534
|
+
Args:
|
535
|
+
os_choice: The OS to use ("macOS" or "Ubuntu")
|
536
|
+
app_list: Optional list of apps to focus on using the app-use experiment
|
537
|
+
provider: The provider to use ("lume" or "self")
|
538
|
+
"""
|
533
539
|
global computer, tool_call_logs, tools
|
534
|
-
|
540
|
+
|
541
|
+
# Check if we should enable app-use experiment
|
542
|
+
use_app_experiment = app_list and len(app_list) > 0
|
543
|
+
experiments = ["app-use"] if use_app_experiment else None
|
544
|
+
|
545
|
+
# Determine if we should use host computer server
|
546
|
+
use_host_computer_server = provider == "self"
|
547
|
+
|
535
548
|
if os_choice == "Ubuntu":
|
536
|
-
computer = Computer(
|
537
|
-
image="ubuntu-noble-vanilla:latest",
|
538
|
-
os_type="linux",
|
539
|
-
provider_type=VMProviderType.LUME,
|
540
|
-
display="1024x768",
|
541
|
-
memory="8GB",
|
542
|
-
cpu="4"
|
543
|
-
)
|
544
549
|
os_type_str = "linux"
|
545
550
|
image_str = "ubuntu-noble-vanilla:latest"
|
551
|
+
else:
|
552
|
+
os_type_str = "macos"
|
553
|
+
image_str = "macos-sequoia-cua:latest"
|
554
|
+
|
555
|
+
# Create computer instance with appropriate configuration
|
556
|
+
if use_host_computer_server:
|
557
|
+
computer = Computer(
|
558
|
+
os_type=os_type_str,
|
559
|
+
use_host_computer_server=True,
|
560
|
+
experiments=experiments
|
561
|
+
)
|
546
562
|
else:
|
547
563
|
computer = Computer(
|
548
|
-
image=
|
549
|
-
os_type=
|
564
|
+
image=image_str,
|
565
|
+
os_type=os_type_str,
|
550
566
|
provider_type=VMProviderType.LUME,
|
551
567
|
display="1024x768",
|
552
568
|
memory="8GB",
|
553
|
-
cpu="4"
|
569
|
+
cpu="4",
|
570
|
+
experiments=experiments
|
554
571
|
)
|
555
|
-
os_type_str = "macos"
|
556
|
-
image_str = "macos-sequoia-cua:latest"
|
557
572
|
|
558
573
|
await computer.run()
|
574
|
+
|
575
|
+
# If app list is provided, create desktop from apps
|
576
|
+
if use_app_experiment:
|
577
|
+
computer = computer.create_desktop_from_apps(app_list)
|
559
578
|
|
560
579
|
# Log computer initialization as a tool call
|
561
|
-
|
580
|
+
init_params = {
|
562
581
|
"os": os_type_str,
|
563
|
-
"
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
582
|
+
"provider": provider
|
583
|
+
}
|
584
|
+
|
585
|
+
# Add VM-specific parameters if not using host computer server
|
586
|
+
if not use_host_computer_server:
|
587
|
+
init_params.update({
|
588
|
+
"image": image_str,
|
589
|
+
"display": "1024x768",
|
590
|
+
"memory": "8GB",
|
591
|
+
"cpu": "4"
|
592
|
+
})
|
593
|
+
|
594
|
+
# Add app list to the log if provided
|
595
|
+
if use_app_experiment:
|
596
|
+
init_params["apps"] = app_list
|
597
|
+
init_params["experiments"] = ["app-use"]
|
598
|
+
|
599
|
+
result = await execute("computer", "initialize", init_params)
|
568
600
|
|
569
601
|
return result["screenshot"], json.dumps(tool_call_logs, indent=2)
|
570
602
|
|
@@ -1029,12 +1061,31 @@ def create_gradio_ui():
|
|
1029
1061
|
setup_status = gr.Textbox(label="Setup Status", value="")
|
1030
1062
|
|
1031
1063
|
with gr.Group():
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
|
1064
|
+
with gr.Accordion("Computer Configuration", open=False):
|
1065
|
+
with gr.Row():
|
1066
|
+
os_choice = gr.Radio(
|
1067
|
+
label="OS",
|
1068
|
+
choices=["macOS", "Ubuntu"],
|
1069
|
+
value="macOS",
|
1070
|
+
interactive=False # disable until the ubuntu image is ready
|
1071
|
+
)
|
1072
|
+
|
1073
|
+
# Provider selection radio
|
1074
|
+
provider_choice = gr.Radio(
|
1075
|
+
label="Provider",
|
1076
|
+
choices=["lume", "self"],
|
1077
|
+
value="lume",
|
1078
|
+
info="'lume' uses a VM, 'self' uses the host computer server"
|
1079
|
+
)
|
1080
|
+
|
1081
|
+
# App filtering dropdown for app-use experiment
|
1082
|
+
app_filter = gr.Dropdown(
|
1083
|
+
label="Filter by apps (App-Use)",
|
1084
|
+
multiselect=True,
|
1085
|
+
allow_custom_value=True,
|
1086
|
+
info="When apps are selected, the computer will focus on those apps using the app-use experiment"
|
1087
|
+
)
|
1088
|
+
|
1038
1089
|
start_btn = gr.Button("Initialize Computer")
|
1039
1090
|
|
1040
1091
|
with gr.Group():
|
@@ -1199,7 +1250,7 @@ def create_gradio_ui():
|
|
1199
1250
|
)
|
1200
1251
|
|
1201
1252
|
img.select(handle_click, inputs=[img, click_type], outputs=[img, action_log])
|
1202
|
-
start_btn.click(handle_init_computer, inputs=[os_choice], outputs=[img, action_log])
|
1253
|
+
start_btn.click(handle_init_computer, inputs=[os_choice, app_filter, provider_choice], outputs=[img, action_log])
|
1203
1254
|
wait_btn.click(handle_wait, outputs=[img, action_log])
|
1204
1255
|
|
1205
1256
|
# DONE and FAIL buttons just do a placeholder action
|
@@ -1,18 +1,19 @@
|
|
1
1
|
computer/__init__.py,sha256=QOxNrrJAuLRnsUC2zIFgRfzVSuDSXiYHlEF-9vkhV0o,1241
|
2
|
-
computer/computer.py,sha256=
|
3
|
-
computer/diorama_computer.py,sha256=
|
2
|
+
computer/computer.py,sha256=vFJEyMkvTlT49SEO1QgLe8yMX6DbvdI9eDWjSd3CwCQ,40555
|
3
|
+
computer/diorama_computer.py,sha256=jOP7_eXxxU6SMIoE25ni0YXPK0E7p5sZeLKmkYLh6G8,3871
|
4
|
+
computer/helpers.py,sha256=0ob9d9ynVGi0JRxhHCgXTuHPHFpa8AVKldn6k0hvxOo,1766
|
4
5
|
computer/interface/__init__.py,sha256=xQvYjq5PMn9ZJOmRR5mWtONTl_0HVd8ACvW6AQnzDdw,262
|
5
|
-
computer/interface/base.py,sha256=
|
6
|
+
computer/interface/base.py,sha256=Uc3pp-8_9YJpawAwt1ixaVN3N0_MtY6nAOSvuKy0Mpc,7863
|
6
7
|
computer/interface/factory.py,sha256=RjAZAB_jFuS8JierYjLbapRX6RqFE0qE3BiIyP5UDOE,1441
|
7
|
-
computer/interface/linux.py,sha256=
|
8
|
-
computer/interface/macos.py,sha256=
|
9
|
-
computer/interface/models.py,sha256=
|
8
|
+
computer/interface/linux.py,sha256=40SXd-xqYWFUaTnx3Tf7lIDEtluNwYoDkCZaESkIvRE,30468
|
9
|
+
computer/interface/macos.py,sha256=uFU9bmPJqPPxlUBw9u1TG3ksqXqB4azJ0pYYx9cRM6w,30848
|
10
|
+
computer/interface/models.py,sha256=CYbX3PLlWqjFuDiLWMiBzPmmXB8_g9VNLfBFBC6RtvI,3317
|
10
11
|
computer/logger.py,sha256=UVvnmZGOWVF9TCsixEbeQnDZ3wBPAJ2anW3Zp-MoJ8Y,2896
|
11
12
|
computer/models.py,sha256=iFNM1QfZArD8uf66XJXb2EDIREsfrxqqA5_liLBMfrE,1188
|
12
13
|
computer/providers/__init__.py,sha256=hS9lLxmmHa1u82XJJ_xuqSKipClsYUEPx-8OK9ogtVg,194
|
13
14
|
computer/providers/base.py,sha256=J_9r6pJsvGAFDRl56jog_atN7e8uzrvlCQEdRRqye_U,3624
|
14
15
|
computer/providers/cloud/__init__.py,sha256=SDAcfhI2BlmVBrBZOHxQd3i1bJZjMIfl7QgmqjXa4z8,144
|
15
|
-
computer/providers/cloud/provider.py,sha256=
|
16
|
+
computer/providers/cloud/provider.py,sha256=XEdCrnZzRwvvkPHIwfhfJl3xB6W7tZKdBI0duKEXLw4,2930
|
16
17
|
computer/providers/factory.py,sha256=9qVdt-fIovSNOokGMZ_2B1VPCLSZeDky4edcXyelZy4,4616
|
17
18
|
computer/providers/lume/__init__.py,sha256=E6hTbVQF5lLZD8JyG4rTwUnCBO4q9K8UkYNQ31R0h7c,193
|
18
19
|
computer/providers/lume/provider.py,sha256=grLZeXd4Y8iYsNq2gfNGcQq1bnTcNYNepEv-mxmROG4,20562
|
@@ -22,9 +23,9 @@ computer/providers/lumier/provider.py,sha256=CXwAKwJfR9ALFGM5u7UIZ-YrFwPvew_01wT
|
|
22
23
|
computer/telemetry.py,sha256=FvNFpxgeRuCMdNpREuSL7bOMZy9gSzY4J0rLeNDw0CU,3746
|
23
24
|
computer/ui/__init__.py,sha256=pmo05ek9qiB_x7DPeE6Vf_8RsIOqTD0w1dBLMHfoOnY,45
|
24
25
|
computer/ui/gradio/__init__.py,sha256=5_KimixM48-X74FCsLw7LbSt39MQfUMEL8-M9amK3Cw,117
|
25
|
-
computer/ui/gradio/app.py,sha256=
|
26
|
+
computer/ui/gradio/app.py,sha256=pLMoMpxyKsGhg9wlsiqyKiRujd-lzubs0nGWAtkleL0,67316
|
26
27
|
computer/utils.py,sha256=zY50NXB7r51GNLQ6l7lhG_qv0_ufpQ8n0-SDhCei8m4,2838
|
27
|
-
cua_computer-0.2.
|
28
|
-
cua_computer-0.2.
|
29
|
-
cua_computer-0.2.
|
30
|
-
cua_computer-0.2.
|
28
|
+
cua_computer-0.2.10.dist-info/METADATA,sha256=dC5av4YtGJH20X77m7FPpn8J1bUFmU1p1J7qJX32HGs,5845
|
29
|
+
cua_computer-0.2.10.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
|
30
|
+
cua_computer-0.2.10.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
31
|
+
cua_computer-0.2.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|