cua-agent 0.4.17__py3-none-any.whl → 0.4.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

@@ -0,0 +1,192 @@
1
+ """
2
+ Example usage of the proxy server and client requests.
3
+ """
4
+ import dotenv
5
+ dotenv.load_dotenv()
6
+
7
+ import asyncio
8
+ import json
9
+ import os
10
+ import aiohttp
11
+ from typing import Dict, Any
12
+
13
+
14
+ async def test_http_endpoint():
15
+ """Test the HTTP /responses endpoint."""
16
+
17
+ anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
18
+ assert isinstance(anthropic_api_key, str), "ANTHROPIC_API_KEY environment variable must be set"
19
+
20
+ # Example 1: Simple text request
21
+ simple_request = {
22
+ "model": "anthropic/claude-3-5-sonnet-20241022",
23
+ "input": "Tell me a three sentence bedtime story about a unicorn.",
24
+ "env": {
25
+ "ANTHROPIC_API_KEY": anthropic_api_key
26
+ }
27
+ }
28
+
29
+ # Example 2: Multi-modal request with image
30
+ multimodal_request = {
31
+ "model": "anthropic/claude-3-5-sonnet-20241022",
32
+ "input": [
33
+ {
34
+ "role": "user",
35
+ "content": [
36
+ {"type": "input_text", "text": "what is in this image?"},
37
+ {
38
+ "type": "input_image",
39
+ "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
40
+ }
41
+ ]
42
+ }
43
+ ],
44
+ "env": {
45
+ "ANTHROPIC_API_KEY": anthropic_api_key
46
+ }
47
+ }
48
+
49
+ # Example 3: Request with custom agent and computer kwargs
50
+ custom_request = {
51
+ "model": "anthropic/claude-3-5-sonnet-20241022",
52
+ "input": "Take a screenshot and tell me what you see",
53
+ "env": {
54
+ "ANTHROPIC_API_KEY": anthropic_api_key
55
+ }
56
+ }
57
+
58
+ # Test requests
59
+ base_url = "https://m-linux-96lcxd2c2k.containers.cloud.trycua.com:8443"
60
+ # base_url = "http://localhost:8000"
61
+ api_key = os.getenv("CUA_API_KEY")
62
+ assert isinstance(api_key, str), "CUA_API_KEY environment variable must be set"
63
+
64
+ async with aiohttp.ClientSession() as session:
65
+ for i, request_data in enumerate([
66
+ simple_request,
67
+ # multimodal_request,
68
+ custom_request
69
+ ], 1):
70
+ print(f"\n--- Test {i} ---")
71
+ print(f"Request: {json.dumps(request_data, indent=2)}")
72
+
73
+ try:
74
+ print(f"Sending request to {base_url}/responses")
75
+ async with session.post(
76
+ f"{base_url}/responses",
77
+ json=request_data,
78
+ headers={"Content-Type": "application/json", "X-API-Key": api_key}
79
+ ) as response:
80
+ result = await response.json()
81
+ print(f"Status: {response.status}")
82
+ print(f"Response: {json.dumps(result, indent=2)}")
83
+
84
+ except Exception as e:
85
+ print(f"Error: {e}")
86
+
87
+
88
+ def curl_examples():
89
+ """Print curl command examples."""
90
+
91
+ print("=== CURL Examples ===\n")
92
+
93
+ print("1. Simple text request:")
94
+ print("""curl http://localhost:8000/responses \\
95
+ -H "Content-Type: application/json" \\
96
+ -d '{
97
+ "model": "anthropic/claude-3-5-sonnet-20241022",
98
+ "input": "Tell me a three sentence bedtime story about a unicorn."
99
+ }'""")
100
+
101
+ print("\n2. Multi-modal request with image:")
102
+ print("""curl http://localhost:8000/responses \\
103
+ -H "Content-Type: application/json" \\
104
+ -d '{
105
+ "model": "anthropic/claude-3-5-sonnet-20241022",
106
+ "input": [
107
+ {
108
+ "role": "user",
109
+ "content": [
110
+ {"type": "input_text", "text": "what is in this image?"},
111
+ {
112
+ "type": "input_image",
113
+ "image_url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
114
+ }
115
+ ]
116
+ }
117
+ ]
118
+ }'""")
119
+
120
+ print("\n3. Request with custom configuration:")
121
+ print("""curl http://localhost:8000/responses \\
122
+ -H "Content-Type: application/json" \\
123
+ -d '{
124
+ "model": "anthropic/claude-3-5-sonnet-20241022",
125
+ "input": "Take a screenshot and tell me what you see",
126
+ "agent_kwargs": {
127
+ "save_trajectory": true,
128
+ "verbosity": 20
129
+ },
130
+ "computer_kwargs": {
131
+ "os_type": "linux",
132
+ "provider_type": "cloud"
133
+ }
134
+ }'""")
135
+
136
+
137
+ async def test_p2p_client():
138
+ """Example P2P client using peerjs-python."""
139
+ try:
140
+ from peerjs import Peer, PeerOptions, ConnectionEventType
141
+ from aiortc import RTCConfiguration, RTCIceServer
142
+
143
+ # Set up client peer
144
+ options = PeerOptions(
145
+ host="0.peerjs.com",
146
+ port=443,
147
+ secure=True,
148
+ config=RTCConfiguration(
149
+ iceServers=[RTCIceServer(urls="stun:stun.l.google.com:19302")]
150
+ )
151
+ )
152
+
153
+ client_peer = Peer(id="test-client", peer_options=options)
154
+ await client_peer.start()
155
+
156
+ # Connect to proxy server
157
+ connection = client_peer.connect("computer-agent-proxy")
158
+
159
+ @connection.on(ConnectionEventType.Open)
160
+ async def connection_open():
161
+ print("Connected to proxy server")
162
+
163
+ # Send a test request
164
+ request = {
165
+ "model": "anthropic/claude-3-5-sonnet-20241022",
166
+ "input": "Hello from P2P client!"
167
+ }
168
+ await connection.send(json.dumps(request))
169
+
170
+ @connection.on(ConnectionEventType.Data)
171
+ async def connection_data(data):
172
+ print(f"Received response: {data}")
173
+ await client_peer.destroy()
174
+
175
+ # Wait for connection
176
+ await asyncio.sleep(10)
177
+
178
+ except ImportError:
179
+ print("P2P dependencies not available. Install peerjs-python for P2P testing.")
180
+ except Exception as e:
181
+ print(f"P2P test error: {e}")
182
+
183
+
184
+ if __name__ == "__main__":
185
+ import sys
186
+
187
+ if len(sys.argv) > 1 and sys.argv[1] == "curl":
188
+ curl_examples()
189
+ elif len(sys.argv) > 1 and sys.argv[1] == "p2p":
190
+ asyncio.run(test_p2p_client())
191
+ else:
192
+ asyncio.run(test_http_endpoint())
@@ -0,0 +1,248 @@
1
+ """
2
+ Request handlers for the proxy endpoints.
3
+ """
4
+
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ import os
9
+ from contextlib import contextmanager
10
+ from typing import Dict, Any, List, Union, Optional
11
+
12
+ from ..agent import ComputerAgent
13
+ from computer import Computer
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class ResponsesHandler:
19
+ """Handler for /responses endpoint that processes agent requests."""
20
+
21
+ def __init__(self):
22
+ self.computer = None
23
+ self.agent = None
24
+ # Simple in-memory caches
25
+ self._computer_cache: Dict[str, Any] = {}
26
+ self._agent_cache: Dict[str, Any] = {}
27
+
28
+ async def setup_computer_agent(
29
+ self,
30
+ model: str,
31
+ agent_kwargs: Optional[Dict[str, Any]] = None,
32
+ computer_kwargs: Optional[Dict[str, Any]] = None,
33
+ ):
34
+ """Set up (and cache) computer and agent instances.
35
+
36
+ Caching keys:
37
+ - Computer cache key: computer_kwargs
38
+ - Agent cache key: {"model": model, **agent_kwargs}
39
+ """
40
+ agent_kwargs = agent_kwargs or {}
41
+ computer_kwargs = computer_kwargs or {}
42
+
43
+ def _stable_key(obj: Dict[str, Any]) -> str:
44
+ try:
45
+ return json.dumps(obj, sort_keys=True, separators=(",", ":"))
46
+ except Exception:
47
+ # Fallback: stringify non-serializable values
48
+ safe_obj = {}
49
+ for k, v in obj.items():
50
+ try:
51
+ json.dumps(v)
52
+ safe_obj[k] = v
53
+ except Exception:
54
+ safe_obj[k] = str(v)
55
+ return json.dumps(safe_obj, sort_keys=True, separators=(",", ":"))
56
+
57
+ # Determine if custom tools are supplied; if so, skip computer setup entirely
58
+ has_custom_tools = bool(agent_kwargs.get("tools"))
59
+
60
+ computer = None
61
+ if not has_custom_tools:
62
+ # ---------- Computer setup (with cache) ----------
63
+ comp_key = _stable_key(computer_kwargs)
64
+
65
+ computer = self._computer_cache.get(comp_key)
66
+ if computer is None:
67
+ # Default computer configuration
68
+ default_c_config = {
69
+ "os_type": "linux",
70
+ "provider_type": "cloud",
71
+ "name": os.getenv("CUA_CONTAINER_NAME"),
72
+ "api_key": os.getenv("CUA_API_KEY"),
73
+ }
74
+ default_c_config.update(computer_kwargs)
75
+ computer = Computer(**default_c_config)
76
+ await computer.__aenter__()
77
+ self._computer_cache[comp_key] = computer
78
+ logger.info(f"Computer created and cached with key={comp_key} config={default_c_config}")
79
+ else:
80
+ logger.info(f"Reusing cached computer for key={comp_key}")
81
+
82
+ # Bind current computer reference (None if custom tools supplied)
83
+ self.computer = computer
84
+
85
+ # ---------- Agent setup (with cache) ----------
86
+ # Build agent cache key from {model} + agent_kwargs (excluding tools unless explicitly passed)
87
+ agent_kwargs_for_key = dict(agent_kwargs)
88
+ agent_key_payload = {"model": model, **agent_kwargs_for_key}
89
+ agent_key = _stable_key(agent_key_payload)
90
+
91
+ agent = self._agent_cache.get(agent_key)
92
+ if agent is None:
93
+ # Default agent configuration
94
+ default_a_config: Dict[str, Any] = {"model": model}
95
+ if not has_custom_tools:
96
+ default_a_config["tools"] = [computer]
97
+ # Apply user overrides, but keep tools unless user explicitly sets
98
+ if agent_kwargs:
99
+ if not has_custom_tools:
100
+ agent_kwargs.setdefault("tools", [computer])
101
+ default_a_config.update(agent_kwargs)
102
+ # JSON-derived kwargs may have loose types; ignore static arg typing here
103
+ agent = ComputerAgent(**default_a_config) # type: ignore[arg-type]
104
+ self._agent_cache[agent_key] = agent
105
+ logger.info(f"Agent created and cached with key={agent_key} model={model}")
106
+ else:
107
+ # Ensure cached agent uses the current computer tool (in case object differs)
108
+ # Only update if tools not explicitly provided in agent_kwargs
109
+ if not has_custom_tools:
110
+ try:
111
+ agent.tools = [computer]
112
+ except Exception:
113
+ pass
114
+ logger.info(f"Reusing cached agent for key={agent_key}")
115
+
116
+ # Bind current agent reference
117
+ self.agent = agent
118
+
119
+ async def process_request(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
120
+ """
121
+ Process a /responses request and return the result.
122
+
123
+ Args:
124
+ request_data: Dictionary containing model, input, and optional kwargs
125
+
126
+ Returns:
127
+ Dictionary with the agent's response
128
+ """
129
+ try:
130
+ # Extract request parameters
131
+ model = request_data.get("model")
132
+ input_data = request_data.get("input")
133
+ agent_kwargs = request_data.get("agent_kwargs", {})
134
+ computer_kwargs = request_data.get("computer_kwargs", {})
135
+ env_overrides = request_data.get("env", {}) or {}
136
+
137
+ if not model:
138
+ raise ValueError("Model is required")
139
+ if not input_data:
140
+ raise ValueError("Input is required")
141
+
142
+ # Apply env overrides for the duration of this request
143
+ with self._env_overrides(env_overrides):
144
+ # Set up (and possibly reuse) computer and agent via caches
145
+ await self.setup_computer_agent(model, agent_kwargs, computer_kwargs)
146
+
147
+ # Defensive: ensure agent is initialized for type checkers
148
+ agent = self.agent
149
+ if agent is None:
150
+ raise RuntimeError("Agent failed to initialize")
151
+
152
+ # Convert input to messages format
153
+ messages = self._convert_input_to_messages(input_data)
154
+
155
+ # Run agent and get first result
156
+ async for result in agent.run(messages):
157
+ # Return the first result and break
158
+ return {
159
+ "success": True,
160
+ "result": result,
161
+ "model": model
162
+ }
163
+
164
+ # If no results were yielded
165
+ return {
166
+ "success": False,
167
+ "error": "No results from agent",
168
+ "model": model
169
+ }
170
+
171
+ except Exception as e:
172
+ logger.error(f"Error processing request: {e}")
173
+ return {
174
+ "success": False,
175
+ "error": str(e),
176
+ "model": request_data.get("model", "unknown")
177
+ }
178
+
179
+ def _convert_input_to_messages(self, input_data: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
180
+ """Convert input data to messages format."""
181
+ if isinstance(input_data, str):
182
+ # Simple string input
183
+ return [{"role": "user", "content": input_data}]
184
+ elif isinstance(input_data, list):
185
+ # Already in messages format
186
+ messages = []
187
+ for msg in input_data:
188
+ # Convert content array format if needed
189
+ if isinstance(msg.get("content"), list):
190
+ content_parts = []
191
+ for part in msg["content"]:
192
+ if part.get("type") == "input_text":
193
+ content_parts.append({"type": "text", "text": part["text"]})
194
+ elif part.get("type") == "input_image":
195
+ content_parts.append({
196
+ "type": "image_url",
197
+ "image_url": {"url": part["image_url"]}
198
+ })
199
+ else:
200
+ content_parts.append(part)
201
+ messages.append({
202
+ "role": msg["role"],
203
+ "content": content_parts
204
+ })
205
+ else:
206
+ messages.append(msg)
207
+ return messages
208
+ else:
209
+ raise ValueError("Input must be string or list of messages")
210
+
211
+ async def cleanup(self):
212
+ """Clean up resources."""
213
+ if self.computer:
214
+ try:
215
+ await self.computer.__aexit__(None, None, None)
216
+ except Exception as e:
217
+ logger.error(f"Error cleaning up computer: {e}")
218
+ finally:
219
+ self.computer = None
220
+ self.agent = None
221
+
222
+ @staticmethod
223
+ @contextmanager
224
+ def _env_overrides(env: Dict[str, str]):
225
+ """Temporarily apply environment variable overrides for the current process.
226
+ Restores previous values after the context exits.
227
+
228
+ Args:
229
+ env: Mapping of env var names to override for this request.
230
+ """
231
+ if not env:
232
+ # No-op context
233
+ yield
234
+ return
235
+
236
+ original: Dict[str, Optional[str]] = {}
237
+ try:
238
+ for k, v in env.items():
239
+ original[k] = os.environ.get(k)
240
+ os.environ[k] = str(v)
241
+ yield
242
+ finally:
243
+ for k, old in original.items():
244
+ if old is None:
245
+ # Was not set before
246
+ os.environ.pop(k, None)
247
+ else:
248
+ os.environ[k] = old
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.4.17
3
+ Version: 0.4.18
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.12
@@ -38,7 +38,7 @@ Requires-Dist: python-dotenv>=1.0.1; extra == "ui"
38
38
  Provides-Extra: cli
39
39
  Requires-Dist: yaspin>=3.1.0; extra == "cli"
40
40
  Provides-Extra: hud
41
- Requires-Dist: hud-python==0.2.10; extra == "hud"
41
+ Requires-Dist: hud-python<0.5.0,>=0.4.12; extra == "hud"
42
42
  Provides-Extra: all
43
43
  Requires-Dist: ultralytics>=8.0.0; extra == "all"
44
44
  Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "all"
@@ -49,7 +49,7 @@ Requires-Dist: transformers>=4.54.0; extra == "all"
49
49
  Requires-Dist: gradio>=5.23.3; extra == "all"
50
50
  Requires-Dist: python-dotenv>=1.0.1; extra == "all"
51
51
  Requires-Dist: yaspin>=3.1.0; extra == "all"
52
- Requires-Dist: hud-python==0.2.10; extra == "all"
52
+ Requires-Dist: hud-python<0.5.0,>=0.4.12; extra == "all"
53
53
  Description-Content-Type: text/markdown
54
54
 
55
55
  <div align="center">
@@ -1,17 +1,19 @@
1
1
  agent/__init__.py,sha256=MaW-BczJ-lCACPYH39DvFhE7ZWiSo7sBO6pBfyO7Nxc,1269
2
2
  agent/__main__.py,sha256=lBUe8Niqa5XoCjwFfXyX7GtnUwjjZXC1-j4V9mvUYSc,538
3
- agent/adapters/__init__.py,sha256=lNH6srgIMmZOI7dgicJs3LCk_1MeqLF0lou9n7b23Ts,238
3
+ agent/adapters/__init__.py,sha256=Q_OxxwXBcBIetQ_DtHS5bwZWXrvCKPX2grCg8R0UKek,301
4
4
  agent/adapters/huggingfacelocal_adapter.py,sha256=Uqjtcohhzd33VFh38Ra2y4Uv_lTghMswoqS1t-KKFkw,8480
5
5
  agent/adapters/human_adapter.py,sha256=xT4nnfNXb1z-vnGFlLmFEZN7TMcoMBGS40MtR1Zwv4o,13079
6
- agent/agent.py,sha256=XBZu_iNSWzyBk7Qf9Q-FkyHoqdikdldK6T1LAM3lLWY,29102
7
- agent/callbacks/__init__.py,sha256=yxxBXUqpXQ-jRi_ixJMtmQPxoNRy5Vz1PUBzNNa1Dwg,538
6
+ agent/adapters/mlxvlm_adapter.py,sha256=J-LsRsbMFEpgyC3V-Poqi9kr_dzEGfAn5vpaiwSsA_k,14855
7
+ agent/agent.py,sha256=JjyEgj3jTnhVcZAX7ANZtePRMkbKD0kzkukSlgwaQNk,29365
8
+ agent/callbacks/__init__.py,sha256=et6pNfX_AiJqhVzUfCvcjzFbDhfLoHazKCXN5sqwxaM,631
8
9
  agent/callbacks/base.py,sha256=UnnnYlh6XCm6HKZZsAPaT_Eyo9LUYLyjyNwF-QRm6Ns,4691
9
10
  agent/callbacks/budget_manager.py,sha256=RyKM-7iXQcDotYvrw3eURzeEHEXvQjID-NobtvQWE7k,1832
10
11
  agent/callbacks/image_retention.py,sha256=tiuRT5ke9xXTb2eP8Gz-2ITyAMY29LURUH6AbjX3RP8,6165
11
12
  agent/callbacks/logging.py,sha256=OOxU97EzrxlnUAtiEnvy9FB7SwCUK90-rdpDFA2Ae4E,10921
13
+ agent/callbacks/operator_validator.py,sha256=dLvR749glMPiGt8UP-XMLm_LcaTUUhWvRZJN_qkHV7Y,6430
12
14
  agent/callbacks/pii_anonymization.py,sha256=NEkUTUjQBi82nqus7kT-1E4RaeQ2hQrY7YCnKndLhP8,3272
13
15
  agent/callbacks/telemetry.py,sha256=RbUDhE41mTi8g9hNre0EpltK_NUZkLj8buJLWBzs0Ek,7363
14
- agent/callbacks/trajectory_saver.py,sha256=VHbiDQzI_XludkWhZIVqIMrsxgwKfFWwVtqaRot_D4U,12231
16
+ agent/callbacks/trajectory_saver.py,sha256=GZBU4EV_pkp3hSin08kwgC08QtNCvx0y8ZMFz3bulLs,12389
15
17
  agent/cli.py,sha256=AgaXwywHd3nGQWuqMRj6SbPyFaCPjfo5980Y1ApQOTQ,12413
16
18
  agent/computers/__init__.py,sha256=39ISJsaREaQIZckpzxSuLhuR763wUU3TxUux78EKjAg,1477
17
19
  agent/computers/base.py,sha256=hZntX4vgc1ahD3EnFeb9lUjtBmgka1vb27hndPl9tKQ,2187
@@ -22,20 +24,20 @@ agent/human_tool/__init__.py,sha256=3m5_g-Fo_0yX5vi7eg-A92oTqO0N3aY929Ajp78HKsE,
22
24
  agent/human_tool/__main__.py,sha256=VsW2BAghlonOuqZbP_xuCsaec9bemA1I_ibnDcED9D4,1068
23
25
  agent/human_tool/server.py,sha256=ceuL5kw_RjgAi8fueLU3nTjyzOLE25Shv1oTJnSHsoQ,7964
24
26
  agent/human_tool/ui.py,sha256=2Jk3Bh-Jctya8GUG-qtYbdi-1qDdwOtcAlUeiIqsoIE,26584
25
- agent/integrations/hud/__init__.py,sha256=1lqeM6vJAekr38l7yteLNa-Hn3R2eXCusT2FAaY8VPE,2943
26
- agent/integrations/hud/adapter.py,sha256=M7J71q29Ndr4xXIW7Y6H_HIlJmnp-JlKG_4zKZTuyps,4088
27
- agent/integrations/hud/agent.py,sha256=vXmI7OBez5lokQ9dCcgWeT8N68xfWpsWT3S36MLhdas,17264
28
- agent/integrations/hud/computer_handler.py,sha256=N5pVKeKW9bJ-oceYrE7IIHbx6ZrQRQnHItTGrytoHRM,6788
27
+ agent/integrations/hud/__init__.py,sha256=0FWeAlVqe9TZX7U26Wu8O6Zsg9UIvNNN5A-8YB1Bb_k,8339
28
+ agent/integrations/hud/proxy.py,sha256=yA7C2jeXnrpI5HS0VgCvn0BflVbAORZynIfyE27rvBg,7782
29
29
  agent/loops/__init__.py,sha256=Ef8aj07l3osibwDk-DTo80PrpL4_GdKRTP1ikl_b-BQ,328
30
- agent/loops/anthropic.py,sha256=lvDscOaOcESBWZvnjKntQRWJZ4cEaFJhSsmmFc7J1ow,69562
30
+ agent/loops/anthropic.py,sha256=Th3dNv8FULvyDXx7aPVGSzbrqBiDNFXRso3DSa88d_w,70301
31
31
  agent/loops/base.py,sha256=LK7kSTnc2CB88LI7qr2VP7LMq0eS5r2bSEnrxO6IN5U,2345
32
- agent/loops/composed_grounded.py,sha256=BgxufIyJCkWnJpp29PE1V2ce4iB9ictGjuVqFDx17B8,12122
32
+ agent/loops/composed_grounded.py,sha256=8oJoqaRzKWbI9I4VoFuAoUzQ11_CFnYT-EdPOy-NVEQ,12349
33
33
  agent/loops/glm45v.py,sha256=V1f-5vAifbYcY-qTc7fW2KXVRkAfApQI_EjavH3X2ak,35110
34
34
  agent/loops/gta1.py,sha256=ha5TaUWqUzTffx_ow1WiBU8i3VNP-6FL5XC66ajPFjg,5829
35
35
  agent/loops/model_types.csv,sha256=GmFn4x80yoUpQZuQ-GXtJkPVlOLYWZ5u_5A73HRyeNE,112
36
36
  agent/loops/omniparser.py,sha256=-db8JUL2Orn47ERIaLbuNShAXn4LeIgYzRWphn_9Dg4,15071
37
- agent/loops/openai.py,sha256=8Ad_XufpENmLq1nEnhzF3oswPrPK1EPz-C5NU8UOEs0,8035
38
- agent/loops/uitars.py,sha256=PVNOdwcn2K6RgaxoU-9I4HjBTsEH073M11LTqTrN7C4,31849
37
+ agent/loops/openai.py,sha256=3UEXdecqGkyknhTgp6zxr_cNCVg5vM-61I6SKMNl6m8,8692
38
+ agent/loops/uitars.py,sha256=QyEWyrhkI5MCksTunntY-5jtETd4pwcQB-DyzmiWezo,32350
39
+ agent/proxy/examples.py,sha256=GYFJ-sfDsSNZr9n_qpvDx_0rShqoKE5JW0ibbljWfoo,6192
40
+ agent/proxy/handlers.py,sha256=48mMNyZOU3dJQ6oI5r2kDDe29rcU49MConlB0MZeCsU,9602
39
41
  agent/responses.py,sha256=_SoN4BkaTxMHMB21EOtDc_aDBIJlfDwsCzszMBnIkH0,30764
40
42
  agent/types.py,sha256=h6SnmTAEAaryVCjwVZFAuCbio9UW13OqgQEV7HKmZVM,1060
41
43
  agent/ui/__init__.py,sha256=DTZpK85QXscXK2nM9HtpAhVBF13yAamUrtwrQSuV-kM,126
@@ -43,7 +45,7 @@ agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
43
45
  agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
44
46
  agent/ui/gradio/app.py,sha256=Ol97YEbwREZZQ9_PMjVHlfOcu9BGsawxgAGAm79hT80,9117
45
47
  agent/ui/gradio/ui_components.py,sha256=dJUvKDmc1oSejtoR_gU_oWWYwxaOOQyPloSYRGMrUCQ,36068
46
- cua_agent-0.4.17.dist-info/METADATA,sha256=ngs59u9_Ec6SfwAdvr8UytvNFLt9DV0pMIQAb3ElbA0,12698
47
- cua_agent-0.4.17.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
48
- cua_agent-0.4.17.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
49
- cua_agent-0.4.17.dist-info/RECORD,,
48
+ cua_agent-0.4.18.dist-info/METADATA,sha256=SuqfpH4uM1eqOehMaWA_rMgOknsiU6kvt23DMD-7_5Q,12712
49
+ cua_agent-0.4.18.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
50
+ cua_agent-0.4.18.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
51
+ cua_agent-0.4.18.dist-info/RECORD,,
@@ -1,121 +0,0 @@
1
- """HUD Adapter for ComputerAgent integration."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import Any, ClassVar
6
-
7
- from hud.adapters.common import CLA, Adapter
8
- from hud.adapters.common.types import (
9
- CLAButton,
10
- CLAKey,
11
- ClickAction,
12
- CustomAction,
13
- DragAction,
14
- MoveAction,
15
- Point,
16
- PressAction,
17
- ResponseAction,
18
- ScreenshotFetch,
19
- ScrollAction,
20
- TypeAction,
21
- WaitAction,
22
- )
23
-
24
-
25
- class ComputerAgentAdapter(Adapter):
26
- """Adapter for ComputerAgent to work with HUD."""
27
-
28
- KEY_MAP: ClassVar[dict[str, CLAKey]] = {
29
- "return": "enter",
30
- "arrowup": "up",
31
- "arrowdown": "down",
32
- "arrowleft": "left",
33
- "arrowright": "right",
34
- "cmd": "ctrl",
35
- "super": "win",
36
- "meta": "win",
37
- }
38
-
39
- BUTTON_MAP: ClassVar[dict[str, CLAButton]] = {
40
- "wheel": "middle",
41
- "middle": "middle",
42
- }
43
-
44
- def __init__(self) -> None:
45
- super().__init__()
46
- # ComputerAgent default dimensions (can be overridden)
47
- self.agent_width = 1024
48
- self.agent_height = 768
49
-
50
- def _map_key(self, key: str) -> CLAKey:
51
- """Map a key to its standardized form."""
52
- return self.KEY_MAP.get(key.lower(), key.lower()) # type: ignore
53
-
54
- def convert(self, data: Any) -> CLA:
55
- """Convert a ComputerAgent action to a HUD action."""
56
- try:
57
- action_type = data.get("type")
58
-
59
- if action_type == "click":
60
- x, y = data.get("x", 0), data.get("y", 0)
61
- button = data.get("button", "left")
62
- button = self.BUTTON_MAP.get(button, button)
63
- if button is None:
64
- button = "left"
65
- converted_action = ClickAction(point=Point(x=x, y=y), button=button)
66
-
67
- elif action_type == "double_click":
68
- x, y = data.get("x", 0), data.get("y", 0)
69
- converted_action = ClickAction(point=Point(x=x, y=y), button="left", pattern=[100])
70
-
71
- elif action_type == "scroll":
72
- x, y = int(data.get("x", 0)), int(data.get("y", 0))
73
- scroll_x = int(data.get("scroll_x", 0))
74
- scroll_y = int(data.get("scroll_y", 0))
75
- converted_action = ScrollAction(
76
- point=Point(x=x, y=y), scroll=Point(x=scroll_x, y=scroll_y)
77
- )
78
-
79
- elif action_type == "type":
80
- text = data.get("text", "")
81
- converted_action = TypeAction(text=text, enter_after=False)
82
-
83
- elif action_type == "wait":
84
- ms = data.get("ms", 1000)
85
- converted_action = WaitAction(time=ms)
86
-
87
- elif action_type == "move":
88
- x, y = data.get("x", 0), data.get("y", 0)
89
- converted_action = MoveAction(point=Point(x=x, y=y))
90
-
91
- elif action_type == "keypress":
92
- keys = data.get("keys", [])
93
- if isinstance(keys, str):
94
- keys = [keys]
95
- converted_action = PressAction(keys=[self._map_key(k) for k in keys])
96
-
97
- elif action_type == "drag":
98
- path = data.get("path", [])
99
- points = [Point(x=p.get("x", 0), y=p.get("y", 0)) for p in path]
100
- converted_action = DragAction(path=points)
101
-
102
- elif action_type == "screenshot":
103
- converted_action = ScreenshotFetch()
104
-
105
- elif action_type == "response":
106
- converted_action = ResponseAction(text=data.get("text", ""))
107
-
108
- elif action_type == "custom":
109
- converted_action = CustomAction(action=data.get("action", ""))
110
-
111
- else:
112
- raise ValueError(f"Unsupported action type: {action_type}")
113
-
114
- # Add reasoning and logs if available
115
- converted_action.reasoning = data.get("reasoning", "")
116
- converted_action.logs = data.get("logs", "")
117
-
118
- return converted_action
119
-
120
- except Exception as e:
121
- raise ValueError(f"Invalid action: {data}. Error: {e!s}") from e