cua-agent 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

@@ -1,187 +0,0 @@
1
- """HUD Computer Handler for ComputerAgent integration."""
2
-
3
- import base64
4
- from io import BytesIO
5
- from typing import Literal, Optional, Any, Dict, Callable
6
- from PIL import Image
7
-
8
- from agent.computers import AsyncComputerHandler
9
-
10
-
11
- class HUDComputerHandler(AsyncComputerHandler):
12
- """Computer handler that interfaces with HUD environment."""
13
-
14
- def __init__(
15
- self,
16
- environment: Literal["windows", "mac", "linux", "browser"] = "linux",
17
- dimensions: tuple[int, int] = (1024, 768),
18
- screenshot_callback: Optional[Callable] = None,
19
- action_callback: Optional[Callable] = None,
20
- ):
21
- """
22
- Initialize HUD computer handler.
23
-
24
- Args:
25
- environment: The environment type for HUD
26
- dimensions: Screen dimensions as (width, height)
27
- screenshot_callback: Optional callback to get screenshots from HUD environment
28
- action_callback: Optional callback to execute actions in HUD environment
29
- """
30
- super().__init__()
31
- self._environment = environment
32
- self._dimensions = dimensions
33
- self._screenshot_callback = screenshot_callback
34
- self._action_callback = action_callback
35
-
36
- # Store the last screenshot for reuse
37
- self._last_screenshot: Optional[str] = None
38
-
39
- def set_screenshot_callback(self, callback: Callable) -> None:
40
- """Set the screenshot callback."""
41
- self._screenshot_callback = callback
42
-
43
- def set_action_callback(self, callback: Callable) -> None:
44
- """Set the action callback."""
45
- self._action_callback = callback
46
-
47
- def update_screenshot(self, screenshot: str) -> None:
48
- """Update the stored screenshot (base64 string)."""
49
- self._last_screenshot = screenshot
50
-
51
- async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
52
- """Get the current environment type."""
53
- return self._environment # type: ignore
54
-
55
- async def get_dimensions(self) -> tuple[int, int]:
56
- """Get screen dimensions as (width, height)."""
57
- return self._dimensions
58
-
59
- async def screenshot(self) -> str:
60
- """Take a screenshot and return as base64 string."""
61
- if self._screenshot_callback:
62
- screenshot = await self._screenshot_callback()
63
- if isinstance(screenshot, str):
64
- self._last_screenshot = screenshot
65
- return screenshot
66
- elif isinstance(screenshot, Image.Image):
67
- # Convert PIL Image to base64
68
- buffer = BytesIO()
69
- screenshot.save(buffer, format="PNG")
70
- screenshot_b64 = base64.b64encode(buffer.getvalue()).decode()
71
- self._last_screenshot = screenshot_b64
72
- return screenshot_b64
73
- elif isinstance(screenshot, bytes):
74
- screenshot_b64 = base64.b64encode(screenshot).decode()
75
- self._last_screenshot = screenshot_b64
76
- return screenshot_b64
77
-
78
- # Return last screenshot if available, otherwise create a blank one
79
- if self._last_screenshot:
80
- return self._last_screenshot
81
-
82
- # Create a blank screenshot as fallback
83
- blank_image = Image.new('RGB', self._dimensions, color='white')
84
- buffer = BytesIO()
85
- blank_image.save(buffer, format="PNG")
86
- screenshot_b64 = base64.b64encode(buffer.getvalue()).decode()
87
- self._last_screenshot = screenshot_b64
88
- return screenshot_b64
89
-
90
- async def click(self, x: int, y: int, button: str = "left") -> None:
91
- """Click at coordinates with specified button."""
92
- if self._action_callback:
93
- await self._action_callback({
94
- "type": "click",
95
- "x": x,
96
- "y": y,
97
- "button": button
98
- })
99
-
100
- async def double_click(self, x: int, y: int) -> None:
101
- """Double click at coordinates."""
102
- if self._action_callback:
103
- await self._action_callback({
104
- "type": "double_click",
105
- "x": x,
106
- "y": y
107
- })
108
-
109
- async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
110
- """Scroll at coordinates with specified scroll amounts."""
111
- if self._action_callback:
112
- await self._action_callback({
113
- "type": "scroll",
114
- "x": x,
115
- "y": y,
116
- "scroll_x": scroll_x,
117
- "scroll_y": scroll_y
118
- })
119
-
120
- async def type(self, text: str) -> None:
121
- """Type text."""
122
- if self._action_callback:
123
- await self._action_callback({
124
- "type": "type",
125
- "text": text
126
- })
127
-
128
- async def wait(self, ms: int = 1000) -> None:
129
- """Wait for specified milliseconds."""
130
- if self._action_callback:
131
- await self._action_callback({
132
- "type": "wait",
133
- "ms": ms
134
- })
135
-
136
- async def move(self, x: int, y: int) -> None:
137
- """Move cursor to coordinates."""
138
- if self._action_callback:
139
- await self._action_callback({
140
- "type": "move",
141
- "x": x,
142
- "y": y
143
- })
144
-
145
- async def keypress(self, keys: list[str] | str) -> None:
146
- """Press key combination."""
147
- if isinstance(keys, str):
148
- keys = [keys]
149
- if self._action_callback:
150
- await self._action_callback({
151
- "type": "keypress",
152
- "keys": keys
153
- })
154
-
155
- async def drag(self, path: list[dict[str, int]]) -> None:
156
- """Drag along a path of points."""
157
- if self._action_callback:
158
- await self._action_callback({
159
- "type": "drag",
160
- "path": path
161
- })
162
-
163
- async def left_mouse_down(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
164
- """Left mouse down at coordinates."""
165
- if self._action_callback:
166
- await self._action_callback({
167
- "type": "left_mouse_down",
168
- "x": x,
169
- "y": y
170
- })
171
-
172
- async def left_mouse_up(self, x: Optional[int] = None, y: Optional[int] = None) -> None:
173
- """Left mouse up at coordinates."""
174
- if self._action_callback:
175
- await self._action_callback({
176
- "type": "left_mouse_up",
177
- "x": x,
178
- "y": y
179
- })
180
-
181
- async def get_current_url(self) -> str:
182
- """Get the current URL."""
183
- if self._action_callback:
184
- return await self._action_callback({
185
- "type": "get_current_url"
186
- })
187
- return ""