cua-agent 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +21 -12
- agent/__main__.py +21 -0
- agent/adapters/__init__.py +9 -0
- agent/adapters/huggingfacelocal_adapter.py +229 -0
- agent/agent.py +594 -0
- agent/callbacks/__init__.py +19 -0
- agent/callbacks/base.py +153 -0
- agent/callbacks/budget_manager.py +44 -0
- agent/callbacks/image_retention.py +139 -0
- agent/callbacks/logging.py +247 -0
- agent/callbacks/pii_anonymization.py +259 -0
- agent/callbacks/telemetry.py +210 -0
- agent/callbacks/trajectory_saver.py +305 -0
- agent/cli.py +297 -0
- agent/computer_handler.py +107 -0
- agent/decorators.py +90 -0
- agent/loops/__init__.py +11 -0
- agent/loops/anthropic.py +728 -0
- agent/loops/omniparser.py +339 -0
- agent/loops/openai.py +95 -0
- agent/loops/uitars.py +688 -0
- agent/responses.py +207 -0
- agent/telemetry.py +135 -14
- agent/types.py +79 -0
- agent/ui/__init__.py +7 -1
- agent/ui/__main__.py +2 -13
- agent/ui/gradio/__init__.py +6 -19
- agent/ui/gradio/app.py +94 -1313
- agent/ui/gradio/ui_components.py +721 -0
- cua_agent-0.4.0.dist-info/METADATA +424 -0
- cua_agent-0.4.0.dist-info/RECORD +33 -0
- {cua_agent-0.3.1.dist-info → cua_agent-0.4.0.dist-info}/WHEEL +1 -1
- agent/core/__init__.py +0 -27
- agent/core/agent.py +0 -210
- agent/core/base.py +0 -217
- agent/core/callbacks.py +0 -200
- agent/core/experiment.py +0 -249
- agent/core/factory.py +0 -122
- agent/core/messages.py +0 -332
- agent/core/provider_config.py +0 -21
- agent/core/telemetry.py +0 -142
- agent/core/tools/__init__.py +0 -21
- agent/core/tools/base.py +0 -74
- agent/core/tools/bash.py +0 -52
- agent/core/tools/collection.py +0 -46
- agent/core/tools/computer.py +0 -113
- agent/core/tools/edit.py +0 -67
- agent/core/tools/manager.py +0 -56
- agent/core/tools.py +0 -32
- agent/core/types.py +0 -88
- agent/core/visualization.py +0 -197
- agent/providers/__init__.py +0 -4
- agent/providers/anthropic/__init__.py +0 -6
- agent/providers/anthropic/api/client.py +0 -360
- agent/providers/anthropic/api/logging.py +0 -150
- agent/providers/anthropic/api_handler.py +0 -140
- agent/providers/anthropic/callbacks/__init__.py +0 -5
- agent/providers/anthropic/callbacks/manager.py +0 -65
- agent/providers/anthropic/loop.py +0 -568
- agent/providers/anthropic/prompts.py +0 -23
- agent/providers/anthropic/response_handler.py +0 -226
- agent/providers/anthropic/tools/__init__.py +0 -33
- agent/providers/anthropic/tools/base.py +0 -88
- agent/providers/anthropic/tools/bash.py +0 -66
- agent/providers/anthropic/tools/collection.py +0 -34
- agent/providers/anthropic/tools/computer.py +0 -396
- agent/providers/anthropic/tools/edit.py +0 -326
- agent/providers/anthropic/tools/manager.py +0 -54
- agent/providers/anthropic/tools/run.py +0 -42
- agent/providers/anthropic/types.py +0 -16
- agent/providers/anthropic/utils.py +0 -367
- agent/providers/omni/__init__.py +0 -8
- agent/providers/omni/api_handler.py +0 -42
- agent/providers/omni/clients/anthropic.py +0 -103
- agent/providers/omni/clients/base.py +0 -35
- agent/providers/omni/clients/oaicompat.py +0 -195
- agent/providers/omni/clients/ollama.py +0 -122
- agent/providers/omni/clients/openai.py +0 -155
- agent/providers/omni/clients/utils.py +0 -25
- agent/providers/omni/image_utils.py +0 -34
- agent/providers/omni/loop.py +0 -990
- agent/providers/omni/parser.py +0 -307
- agent/providers/omni/prompts.py +0 -64
- agent/providers/omni/tools/__init__.py +0 -30
- agent/providers/omni/tools/base.py +0 -29
- agent/providers/omni/tools/bash.py +0 -74
- agent/providers/omni/tools/computer.py +0 -179
- agent/providers/omni/tools/manager.py +0 -61
- agent/providers/omni/utils.py +0 -236
- agent/providers/openai/__init__.py +0 -6
- agent/providers/openai/api_handler.py +0 -456
- agent/providers/openai/loop.py +0 -472
- agent/providers/openai/response_handler.py +0 -205
- agent/providers/openai/tools/__init__.py +0 -15
- agent/providers/openai/tools/base.py +0 -79
- agent/providers/openai/tools/computer.py +0 -326
- agent/providers/openai/tools/manager.py +0 -106
- agent/providers/openai/types.py +0 -36
- agent/providers/openai/utils.py +0 -98
- agent/providers/uitars/__init__.py +0 -1
- agent/providers/uitars/clients/base.py +0 -35
- agent/providers/uitars/clients/mlxvlm.py +0 -263
- agent/providers/uitars/clients/oaicompat.py +0 -214
- agent/providers/uitars/loop.py +0 -660
- agent/providers/uitars/prompts.py +0 -63
- agent/providers/uitars/tools/__init__.py +0 -1
- agent/providers/uitars/tools/computer.py +0 -283
- agent/providers/uitars/tools/manager.py +0 -60
- agent/providers/uitars/utils.py +0 -264
- cua_agent-0.3.1.dist-info/METADATA +0 -295
- cua_agent-0.3.1.dist-info/RECORD +0 -87
- {cua_agent-0.3.1.dist-info → cua_agent-0.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Computer handler implementation for OpenAI computer-use-preview protocol.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
from typing import Dict, List, Any, Literal
|
|
7
|
+
from .types import Computer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class OpenAIComputerHandler:
|
|
11
|
+
"""Computer handler that implements the Computer protocol using the computer interface."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, computer_interface):
|
|
14
|
+
"""Initialize with a computer interface (from tool schema)."""
|
|
15
|
+
self.interface = computer_interface
|
|
16
|
+
|
|
17
|
+
async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
|
|
18
|
+
"""Get the current environment type."""
|
|
19
|
+
# For now, return a default - this could be enhanced to detect actual environment
|
|
20
|
+
return "windows"
|
|
21
|
+
|
|
22
|
+
async def get_dimensions(self) -> tuple[int, int]:
|
|
23
|
+
"""Get screen dimensions as (width, height)."""
|
|
24
|
+
screen_size = await self.interface.get_screen_size()
|
|
25
|
+
return screen_size["width"], screen_size["height"]
|
|
26
|
+
|
|
27
|
+
async def screenshot(self) -> str:
|
|
28
|
+
"""Take a screenshot and return as base64 string."""
|
|
29
|
+
screenshot_bytes = await self.interface.screenshot()
|
|
30
|
+
return base64.b64encode(screenshot_bytes).decode('utf-8')
|
|
31
|
+
|
|
32
|
+
async def click(self, x: int, y: int, button: str = "left") -> None:
|
|
33
|
+
"""Click at coordinates with specified button."""
|
|
34
|
+
if button == "left":
|
|
35
|
+
await self.interface.left_click(x, y)
|
|
36
|
+
elif button == "right":
|
|
37
|
+
await self.interface.right_click(x, y)
|
|
38
|
+
else:
|
|
39
|
+
# Default to left click for unknown buttons
|
|
40
|
+
await self.interface.left_click(x, y)
|
|
41
|
+
|
|
42
|
+
async def double_click(self, x: int, y: int) -> None:
|
|
43
|
+
"""Double click at coordinates."""
|
|
44
|
+
await self.interface.double_click(x, y)
|
|
45
|
+
|
|
46
|
+
async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
|
|
47
|
+
"""Scroll at coordinates with specified scroll amounts."""
|
|
48
|
+
await self.interface.move_cursor(x, y)
|
|
49
|
+
await self.interface.scroll(scroll_x, scroll_y)
|
|
50
|
+
|
|
51
|
+
async def type(self, text: str) -> None:
|
|
52
|
+
"""Type text."""
|
|
53
|
+
await self.interface.type_text(text)
|
|
54
|
+
|
|
55
|
+
async def wait(self, ms: int = 1000) -> None:
|
|
56
|
+
"""Wait for specified milliseconds."""
|
|
57
|
+
import asyncio
|
|
58
|
+
await asyncio.sleep(ms / 1000.0)
|
|
59
|
+
|
|
60
|
+
async def move(self, x: int, y: int) -> None:
|
|
61
|
+
"""Move cursor to coordinates."""
|
|
62
|
+
await self.interface.move_cursor(x, y)
|
|
63
|
+
|
|
64
|
+
async def keypress(self, keys: List[str]) -> None:
|
|
65
|
+
"""Press key combination."""
|
|
66
|
+
if len(keys) == 1:
|
|
67
|
+
await self.interface.press_key(keys[0])
|
|
68
|
+
else:
|
|
69
|
+
# Handle key combinations
|
|
70
|
+
await self.interface.hotkey(*keys)
|
|
71
|
+
|
|
72
|
+
async def drag(self, path: List[Dict[str, int]]) -> None:
|
|
73
|
+
"""Drag along specified path."""
|
|
74
|
+
if not path:
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
# Start drag from first point
|
|
78
|
+
start = path[0]
|
|
79
|
+
await self.interface.mouse_down(start["x"], start["y"])
|
|
80
|
+
|
|
81
|
+
# Move through path
|
|
82
|
+
for point in path[1:]:
|
|
83
|
+
await self.interface.move_cursor(point["x"], point["y"])
|
|
84
|
+
|
|
85
|
+
# End drag at last point
|
|
86
|
+
end = path[-1]
|
|
87
|
+
await self.interface.mouse_up(end["x"], end["y"])
|
|
88
|
+
|
|
89
|
+
async def get_current_url(self) -> str:
|
|
90
|
+
"""Get current URL (for browser environments)."""
|
|
91
|
+
# This would need to be implemented based on the specific browser interface
|
|
92
|
+
# For now, return empty string
|
|
93
|
+
return ""
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def acknowledge_safety_check_callback(message: str) -> bool:
|
|
97
|
+
"""Safety check callback for user acknowledgment."""
|
|
98
|
+
response = input(
|
|
99
|
+
f"Safety Check Warning: {message}\nDo you want to acknowledge and proceed? (y/n): "
|
|
100
|
+
).lower()
|
|
101
|
+
return response.strip() == "y"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def check_blocklisted_url(url: str) -> None:
|
|
105
|
+
"""Check if URL is blocklisted (placeholder implementation)."""
|
|
106
|
+
# This would contain actual URL checking logic
|
|
107
|
+
pass
|
agent/decorators.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Decorators for agent - agent_loop decorator
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import inspect
|
|
7
|
+
from typing import Dict, List, Any, Callable, Optional
|
|
8
|
+
from functools import wraps
|
|
9
|
+
|
|
10
|
+
from .types import AgentLoopInfo
|
|
11
|
+
|
|
12
|
+
# Global registry
|
|
13
|
+
_agent_loops: List[AgentLoopInfo] = []
|
|
14
|
+
|
|
15
|
+
def agent_loop(models: str, priority: int = 0):
|
|
16
|
+
"""
|
|
17
|
+
Decorator to register an agent loop function.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
models: Regex pattern to match supported models
|
|
21
|
+
priority: Priority for loop selection (higher = more priority)
|
|
22
|
+
"""
|
|
23
|
+
def decorator(func: Callable):
|
|
24
|
+
# Validate function signature
|
|
25
|
+
sig = inspect.signature(func)
|
|
26
|
+
required_params = {'messages', 'model'}
|
|
27
|
+
func_params = set(sig.parameters.keys())
|
|
28
|
+
|
|
29
|
+
if not required_params.issubset(func_params):
|
|
30
|
+
missing = required_params - func_params
|
|
31
|
+
raise ValueError(f"Agent loop function must have parameters: {missing}")
|
|
32
|
+
|
|
33
|
+
# Register the loop
|
|
34
|
+
loop_info = AgentLoopInfo(
|
|
35
|
+
func=func,
|
|
36
|
+
models_regex=models,
|
|
37
|
+
priority=priority
|
|
38
|
+
)
|
|
39
|
+
_agent_loops.append(loop_info)
|
|
40
|
+
|
|
41
|
+
# Sort by priority (highest first)
|
|
42
|
+
_agent_loops.sort(key=lambda x: x.priority, reverse=True)
|
|
43
|
+
|
|
44
|
+
@wraps(func)
|
|
45
|
+
async def wrapper(*args, **kwargs):
|
|
46
|
+
# Wrap the function in an asyncio.Queue for cancellation support
|
|
47
|
+
queue = asyncio.Queue()
|
|
48
|
+
task = None
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
# Create a task that can be cancelled
|
|
52
|
+
async def run_loop():
|
|
53
|
+
try:
|
|
54
|
+
result = await func(*args, **kwargs)
|
|
55
|
+
await queue.put(('result', result))
|
|
56
|
+
except Exception as e:
|
|
57
|
+
await queue.put(('error', e))
|
|
58
|
+
|
|
59
|
+
task = asyncio.create_task(run_loop())
|
|
60
|
+
|
|
61
|
+
# Wait for result or cancellation
|
|
62
|
+
event_type, data = await queue.get()
|
|
63
|
+
|
|
64
|
+
if event_type == 'error':
|
|
65
|
+
raise data
|
|
66
|
+
return data
|
|
67
|
+
|
|
68
|
+
except asyncio.CancelledError:
|
|
69
|
+
if task:
|
|
70
|
+
task.cancel()
|
|
71
|
+
try:
|
|
72
|
+
await task
|
|
73
|
+
except asyncio.CancelledError:
|
|
74
|
+
pass
|
|
75
|
+
raise
|
|
76
|
+
|
|
77
|
+
return wrapper
|
|
78
|
+
|
|
79
|
+
return decorator
|
|
80
|
+
|
|
81
|
+
def get_agent_loops() -> List[AgentLoopInfo]:
|
|
82
|
+
"""Get all registered agent loops"""
|
|
83
|
+
return _agent_loops.copy()
|
|
84
|
+
|
|
85
|
+
def find_agent_loop(model: str) -> Optional[AgentLoopInfo]:
|
|
86
|
+
"""Find the best matching agent loop for a model"""
|
|
87
|
+
for loop_info in _agent_loops:
|
|
88
|
+
if loop_info.matches_model(model):
|
|
89
|
+
return loop_info
|
|
90
|
+
return None
|
agent/loops/__init__.py
ADDED