cua-agent 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/adapters/__init__.py +2 -0
- agent/adapters/mlxvlm_adapter.py +359 -0
- agent/agent.py +14 -3
- agent/callbacks/__init__.py +2 -0
- agent/callbacks/operator_validator.py +138 -0
- agent/callbacks/trajectory_saver.py +87 -5
- agent/integrations/hud/__init__.py +223 -72
- agent/integrations/hud/proxy.py +183 -0
- agent/loops/anthropic.py +12 -1
- agent/loops/composed_grounded.py +26 -14
- agent/loops/openai.py +15 -7
- agent/loops/uitars.py +17 -8
- agent/proxy/examples.py +192 -0
- agent/proxy/handlers.py +248 -0
- {cua_agent-0.4.17.dist-info → cua_agent-0.4.19.dist-info}/METADATA +3 -3
- {cua_agent-0.4.17.dist-info → cua_agent-0.4.19.dist-info}/RECORD +18 -16
- agent/integrations/hud/adapter.py +0 -121
- agent/integrations/hud/agent.py +0 -373
- agent/integrations/hud/computer_handler.py +0 -187
- {cua_agent-0.4.17.dist-info → cua_agent-0.4.19.dist-info}/WHEEL +0 -0
- {cua_agent-0.4.17.dist-info → cua_agent-0.4.19.dist-info}/entry_points.txt +0 -0
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
"""HUD Computer Handler for ComputerAgent integration."""
|
|
2
|
-
|
|
3
|
-
import base64
|
|
4
|
-
from io import BytesIO
|
|
5
|
-
from typing import Literal, Optional, Any, Dict, Callable
|
|
6
|
-
from PIL import Image
|
|
7
|
-
|
|
8
|
-
from agent.computers import AsyncComputerHandler
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class HUDComputerHandler(AsyncComputerHandler):
|
|
12
|
-
"""Computer handler that interfaces with HUD environment."""
|
|
13
|
-
|
|
14
|
-
def __init__(
|
|
15
|
-
self,
|
|
16
|
-
environment: Literal["windows", "mac", "linux", "browser"] = "linux",
|
|
17
|
-
dimensions: tuple[int, int] = (1024, 768),
|
|
18
|
-
screenshot_callback: Optional[Callable] = None,
|
|
19
|
-
action_callback: Optional[Callable] = None,
|
|
20
|
-
):
|
|
21
|
-
"""
|
|
22
|
-
Initialize HUD computer handler.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
environment: The environment type for HUD
|
|
26
|
-
dimensions: Screen dimensions as (width, height)
|
|
27
|
-
screenshot_callback: Optional callback to get screenshots from HUD environment
|
|
28
|
-
action_callback: Optional callback to execute actions in HUD environment
|
|
29
|
-
"""
|
|
30
|
-
super().__init__()
|
|
31
|
-
self._environment = environment
|
|
32
|
-
self._dimensions = dimensions
|
|
33
|
-
self._screenshot_callback = screenshot_callback
|
|
34
|
-
self._action_callback = action_callback
|
|
35
|
-
|
|
36
|
-
# Store the last screenshot for reuse
|
|
37
|
-
self._last_screenshot: Optional[str] = None
|
|
38
|
-
|
|
39
|
-
def set_screenshot_callback(self, callback: Callable) -> None:
|
|
40
|
-
"""Set the screenshot callback."""
|
|
41
|
-
self._screenshot_callback = callback
|
|
42
|
-
|
|
43
|
-
def set_action_callback(self, callback: Callable) -> None:
|
|
44
|
-
"""Set the action callback."""
|
|
45
|
-
self._action_callback = callback
|
|
46
|
-
|
|
47
|
-
def update_screenshot(self, screenshot: str) -> None:
|
|
48
|
-
"""Update the stored screenshot (base64 string)."""
|
|
49
|
-
self._last_screenshot = screenshot
|
|
50
|
-
|
|
51
|
-
async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
|
|
52
|
-
"""Get the current environment type."""
|
|
53
|
-
return self._environment # type: ignore
|
|
54
|
-
|
|
55
|
-
async def get_dimensions(self) -> tuple[int, int]:
|
|
56
|
-
"""Get screen dimensions as (width, height)."""
|
|
57
|
-
return self._dimensions
|
|
58
|
-
|
|
59
|
-
async def screenshot(self) -> str:
|
|
60
|
-
"""Take a screenshot and return as base64 string."""
|
|
61
|
-
if self._screenshot_callback:
|
|
62
|
-
screenshot = await self._screenshot_callback()
|
|
63
|
-
if isinstance(screenshot, str):
|
|
64
|
-
self._last_screenshot = screenshot
|
|
65
|
-
return screenshot
|
|
66
|
-
elif isinstance(screenshot, Image.Image):
|
|
67
|
-
# Convert PIL Image to base64
|
|
68
|
-
buffer = BytesIO()
|
|
69
|
-
screenshot.save(buffer, format="PNG")
|
|
70
|
-
screenshot_b64 = base64.b64encode(buffer.getvalue()).decode()
|
|
71
|
-
self._last_screenshot = screenshot_b64
|
|
72
|
-
return screenshot_b64
|
|
73
|
-
elif isinstance(screenshot, bytes):
|
|
74
|
-
screenshot_b64 = base64.b64encode(screenshot).decode()
|
|
75
|
-
self._last_screenshot = screenshot_b64
|
|
76
|
-
return screenshot_b64
|
|
77
|
-
|
|
78
|
-
# Return last screenshot if available, otherwise create a blank one
|
|
79
|
-
if self._last_screenshot:
|
|
80
|
-
return self._last_screenshot
|
|
81
|
-
|
|
82
|
-
# Create a blank screenshot as fallback
|
|
83
|
-
blank_image = Image.new('RGB', self._dimensions, color='white')
|
|
84
|
-
buffer = BytesIO()
|
|
85
|
-
blank_image.save(buffer, format="PNG")
|
|
86
|
-
screenshot_b64 = base64.b64encode(buffer.getvalue()).decode()
|
|
87
|
-
self._last_screenshot = screenshot_b64
|
|
88
|
-
return screenshot_b64
|
|
89
|
-
|
|
90
|
-
async def click(self, x: int, y: int, button: str = "left") -> None:
|
|
91
|
-
"""Click at coordinates with specified button."""
|
|
92
|
-
if self._action_callback:
|
|
93
|
-
await self._action_callback({
|
|
94
|
-
"type": "click",
|
|
95
|
-
"x": x,
|
|
96
|
-
"y": y,
|
|
97
|
-
"button": button
|
|
98
|
-
})
|
|
99
|
-
|
|
100
|
-
async def double_click(self, x: int, y: int) -> None:
|
|
101
|
-
"""Double click at coordinates."""
|
|
102
|
-
if self._action_callback:
|
|
103
|
-
await self._action_callback({
|
|
104
|
-
"type": "double_click",
|
|
105
|
-
"x": x,
|
|
106
|
-
"y": y
|
|
107
|
-
})
|
|
108
|
-
|
|
109
|
-
async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
|
|
110
|
-
"""Scroll at coordinates with specified scroll amounts."""
|
|
111
|
-
if self._action_callback:
|
|
112
|
-
await self._action_callback({
|
|
113
|
-
"type": "scroll",
|
|
114
|
-
"x": x,
|
|
115
|
-
"y": y,
|
|
116
|
-
"scroll_x": scroll_x,
|
|
117
|
-
"scroll_y": scroll_y
|
|
118
|
-
})
|
|
119
|
-
|
|
120
|
-
async def type(self, text: str) -> None:
|
|
121
|
-
"""Type text."""
|
|
122
|
-
if self._action_callback:
|
|
123
|
-
await self._action_callback({
|
|
124
|
-
"type": "type",
|
|
125
|
-
"text": text
|
|
126
|
-
})
|
|
127
|
-
|
|
128
|
-
async def wait(self, ms: int = 1000) -> None:
|
|
129
|
-
"""Wait for specified milliseconds."""
|
|
130
|
-
if self._action_callback:
|
|
131
|
-
await self._action_callback({
|
|
132
|
-
"type": "wait",
|
|
133
|
-
"ms": ms
|
|
134
|
-
})
|
|
135
|
-
|
|
136
|
-
async def move(self, x: int, y: int) -> None:
|
|
137
|
-
"""Move cursor to coordinates."""
|
|
138
|
-
if self._action_callback:
|
|
139
|
-
await self._action_callback({
|
|
140
|
-
"type": "move",
|
|
141
|
-
"x": x,
|
|
142
|
-
"y": y
|
|
143
|
-
})
|
|
144
|
-
|
|
145
|
-
async def keypress(self, keys: list[str] | str) -> None:
|
|
146
|
-
"""Press key combination."""
|
|
147
|
-
if isinstance(keys, str):
|
|
148
|
-
keys = [keys]
|
|
149
|
-
if self._action_callback:
|
|
150
|
-
await self._action_callback({
|
|
151
|
-
"type": "keypress",
|
|
152
|
-
"keys": keys
|
|
153
|
-
})
|
|
154
|
-
|
|
155
|
-
async def drag(self, path: list[dict[str, int]]) -> None:
|
|
156
|
-
"""Drag along a path of points."""
|
|
157
|
-
if self._action_callback:
|
|
158
|
-
await self._action_callback({
|
|
159
|
-
"type": "drag",
|
|
160
|
-
"path": path
|
|
161
|
-
})
|
|
162
|
-
|
|
163
|
-
async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
|
164
|
-
"""Left mouse down at coordinates."""
|
|
165
|
-
if self._action_callback:
|
|
166
|
-
await self._action_callback({
|
|
167
|
-
"type": "left_mouse_down",
|
|
168
|
-
"x": x,
|
|
169
|
-
"y": y
|
|
170
|
-
})
|
|
171
|
-
|
|
172
|
-
async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
|
|
173
|
-
"""Left mouse up at coordinates."""
|
|
174
|
-
if self._action_callback:
|
|
175
|
-
await self._action_callback({
|
|
176
|
-
"type": "left_mouse_up",
|
|
177
|
-
"x": x,
|
|
178
|
-
"y": y
|
|
179
|
-
})
|
|
180
|
-
|
|
181
|
-
async def get_current_url(self) -> str:
|
|
182
|
-
"""Get the current URL."""
|
|
183
|
-
if self._action_callback:
|
|
184
|
-
return await self._action_callback({
|
|
185
|
-
"type": "get_current_url"
|
|
186
|
-
})
|
|
187
|
-
return ""
|
|
File without changes
|
|
File without changes
|