cua-agent 0.4.32__py3-none-any.whl → 0.4.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

agent/cli.py CHANGED
@@ -167,7 +167,7 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
167
167
 
168
168
  # Process and display the output
169
169
  for item in result.get("output", []):
170
- if item.get("type") == "message":
170
+ if item.get("type") == "message" and item.get("role") == "assistant":
171
171
  # Display agent text response
172
172
  content = item.get("content", [])
173
173
  for content_part in content:
@@ -226,6 +226,13 @@ Examples:
226
226
  help="Model string (e.g., 'openai/computer-use-preview', 'anthropic/claude-3-5-sonnet-20241022')"
227
227
  )
228
228
 
229
+ parser.add_argument(
230
+ "--provider",
231
+ choices=["cloud", "lume", "winsandbox", "docker"],
232
+ default="cloud",
233
+ help="Computer provider to use: cloud (default), lume, winsandbox, or docker"
234
+ )
235
+
229
236
  parser.add_argument(
230
237
  "--images",
231
238
  type=int,
@@ -257,6 +264,12 @@ Examples:
257
264
  help="Initial prompt to send to the agent. Leave blank for interactive mode."
258
265
  )
259
266
 
267
+ parser.add_argument(
268
+ "--prompt-file",
269
+ type=Path,
270
+ help="Path to a UTF-8 text file whose contents will be used as the initial prompt. If provided, overrides --prompt."
271
+ )
272
+
260
273
  parser.add_argument(
261
274
  "--predict-click",
262
275
  dest="predict_click",
@@ -289,33 +302,35 @@ Examples:
289
302
  container_name = os.getenv("CUA_CONTAINER_NAME")
290
303
  cua_api_key = os.getenv("CUA_API_KEY")
291
304
 
292
- # Prompt for missing environment variables
305
+ # Prompt for missing environment variables (container name always required)
293
306
  if not container_name:
294
- print_colored("CUA_CONTAINER_NAME not set.", dim=True)
295
- print_colored("You can get a CUA container at https://www.trycua.com/", dim=True)
296
- container_name = input("Enter your CUA container name: ").strip()
297
- if not container_name:
298
- print_colored("❌ Container name is required.")
299
- sys.exit(1)
300
-
301
- if not cua_api_key:
307
+ if args.provider == "cloud":
308
+ print_colored("CUA_CONTAINER_NAME not set.", dim=True)
309
+ print_colored("You can get a CUA container at https://www.trycua.com/", dim=True)
310
+ container_name = input("Enter your CUA container name: ").strip()
311
+ if not container_name:
312
+ print_colored("❌ Container name is required.")
313
+ sys.exit(1)
314
+ else:
315
+ container_name = "cli-sandbox"
316
+
317
+ # Only require API key for cloud provider
318
+ if args.provider == "cloud" and not cua_api_key:
302
319
  print_colored("CUA_API_KEY not set.", dim=True)
303
320
  cua_api_key = input("Enter your CUA API key: ").strip()
304
321
  if not cua_api_key:
305
- print_colored("❌ API key is required.")
322
+ print_colored("❌ API key is required for cloud provider.")
306
323
  sys.exit(1)
307
324
 
308
325
  # Check for provider-specific API keys based on model
309
326
  provider_api_keys = {
310
327
  "openai/": "OPENAI_API_KEY",
311
328
  "anthropic/": "ANTHROPIC_API_KEY",
312
- "omniparser+": "OPENAI_API_KEY",
313
- "omniparser+": "ANTHROPIC_API_KEY",
314
329
  }
315
330
 
316
331
  # Find matching provider and check for API key
317
332
  for prefix, env_var in provider_api_keys.items():
318
- if args.model.startswith(prefix):
333
+ if prefix in args.model:
319
334
  if not os.getenv(env_var):
320
335
  print_colored(f"{env_var} not set.", dim=True)
321
336
  api_key = input(f"Enter your {env_var.replace('_', ' ').title()}: ").strip()
@@ -335,13 +350,25 @@ Examples:
335
350
  print_colored("Make sure agent and computer libraries are installed.", Colors.YELLOW)
336
351
  sys.exit(1)
337
352
 
353
+ # Resolve provider -> os_type, provider_type, api key requirement
354
+ provider_map = {
355
+ "cloud": ("linux", "cloud", True),
356
+ "lume": ("macos", "lume", False),
357
+ "winsandbox": ("windows", "winsandbox", False),
358
+ "docker": ("linux", "docker", False),
359
+ }
360
+ os_type, provider_type, needs_api_key = provider_map[args.provider]
361
+
362
+ computer_kwargs = {
363
+ "os_type": os_type,
364
+ "provider_type": provider_type,
365
+ "name": container_name,
366
+ }
367
+ if needs_api_key:
368
+ computer_kwargs["api_key"] = cua_api_key # type: ignore
369
+
338
370
  # Create computer instance
339
- async with Computer(
340
- os_type="linux",
341
- provider_type="cloud",
342
- name=container_name,
343
- api_key=cua_api_key
344
- ) as computer:
371
+ async with Computer(**computer_kwargs) as computer: # type: ignore
345
372
 
346
373
  # Create agent
347
374
  agent_kwargs = {
@@ -442,8 +469,17 @@ Examples:
442
469
  # Done
443
470
  sys.exit(0)
444
471
 
472
+ # Resolve initial prompt from --prompt-file or --prompt
473
+ initial_prompt = args.prompt or ""
474
+ if args.prompt_file:
475
+ try:
476
+ initial_prompt = args.prompt_file.read_text(encoding="utf-8")
477
+ except Exception as e:
478
+ print_colored(f"❌ Failed to read --prompt-file: {e}", Colors.RED, bold=True)
479
+ sys.exit(1)
480
+
445
481
  # Start chat loop (default interactive mode)
446
- await chat_loop(agent, args.model, container_name, args.prompt, args.usage)
482
+ await chat_loop(agent, args.model, container_name, initial_prompt, args.usage)
447
483
 
448
484
 
449
485
 
agent/loops/__init__.py CHANGED
@@ -13,6 +13,8 @@ from . import glm45v
13
13
  from . import opencua
14
14
  from . import internvl
15
15
  from . import holo
16
+ from . import moondream3
17
+ from . import gemini
16
18
 
17
19
  __all__ = [
18
20
  "anthropic",
@@ -25,4 +27,6 @@ __all__ = [
25
27
  "opencua",
26
28
  "internvl",
27
29
  "holo",
30
+ "moondream3",
31
+ "gemini"
28
32
  ]
agent/loops/anthropic.py CHANGED
@@ -33,7 +33,7 @@ from ..responses import (
33
33
  MODEL_TOOL_MAPPING = [
34
34
  # Claude 4 models
35
35
  {
36
- "pattern": r"claude-4|claude-opus-4|claude-sonnet-4",
36
+ "pattern": r"claude-4|claude-opus-4|claude-sonnet-4|claude-haiku-4",
37
37
  "tool_version": "computer_20250124",
38
38
  "beta_flag": "computer-use-2025-01-24"
39
39
  },
agent/loops/gemini.py ADDED
@@ -0,0 +1,391 @@
1
+ """
2
+ Gemini 2.5 Computer Use agent loop
3
+
4
+ Maps internal Agent SDK message format to Google's Gemini Computer Use API and back.
5
+
6
+ Key features:
7
+ - Lazy import of google.genai
8
+ - Configure Computer Use tool with excluded browser-specific predefined functions
9
+ - Optional custom function declarations hook for computer-call specific functions
10
+ - Convert Gemini function_call parts into internal computer_call actions
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import base64
16
+ import io
17
+ import uuid
18
+ from typing import Any, Dict, List, Optional, Tuple
19
+
20
+ from PIL import Image
21
+
22
+ from ..decorators import register_agent
23
+ from ..loops.base import AsyncAgentConfig
24
+ from ..types import AgentCapability
25
+
26
+
27
+ def _lazy_import_genai():
28
+ """Import google.genai lazily to avoid hard dependency unless used."""
29
+ try:
30
+ from google import genai # type: ignore
31
+ from google.genai import types # type: ignore
32
+ return genai, types
33
+ except Exception as e: # pragma: no cover
34
+ raise RuntimeError(
35
+ "google.genai is required for the Gemini Computer Use loop. Install the Google Gemini SDK."
36
+ ) from e
37
+
38
+
39
+ def _data_url_to_bytes(data_url: str) -> Tuple[bytes, str]:
40
+ """Convert a data URL to raw bytes and mime type."""
41
+ if not data_url.startswith("data:"):
42
+ # Assume it's base64 png payload
43
+ try:
44
+ return base64.b64decode(data_url), "image/png"
45
+ except Exception:
46
+ return b"", "application/octet-stream"
47
+ header, b64 = data_url.split(",", 1)
48
+ mime = "image/png"
49
+ if ";" in header:
50
+ mime = header.split(";")[0].split(":", 1)[1] or "image/png"
51
+ return base64.b64decode(b64), mime
52
+
53
+
54
+ def _bytes_image_size(img_bytes: bytes) -> Tuple[int, int]:
55
+ try:
56
+ img = Image.open(io.BytesIO(img_bytes))
57
+ return img.size
58
+ except Exception:
59
+ return (1024, 768)
60
+
61
+
62
+ def _find_last_user_text(messages: List[Dict[str, Any]]) -> List[str]:
63
+ texts: List[str] = []
64
+ for msg in reversed(messages):
65
+ if msg.get("type") in (None, "message") and msg.get("role") == "user":
66
+ content = msg.get("content")
67
+ if isinstance(content, str):
68
+ return [content]
69
+ elif isinstance(content, list):
70
+ for c in content:
71
+ if c.get("type") in ("input_text", "output_text") and c.get("text"):
72
+ texts.append(c["text"]) # newest first
73
+ if texts:
74
+ return list(reversed(texts))
75
+ return []
76
+
77
+
78
+ def _find_last_screenshot(messages: List[Dict[str, Any]]) -> Optional[bytes]:
79
+ for msg in reversed(messages):
80
+ if msg.get("type") == "computer_call_output":
81
+ out = msg.get("output", {})
82
+ if isinstance(out, dict) and out.get("type") in ("input_image", "computer_screenshot"):
83
+ image_url = out.get("image_url", "")
84
+ if image_url:
85
+ data, _ = _data_url_to_bytes(image_url)
86
+ return data
87
+ return None
88
+
89
+
90
+ def _denormalize(v: int, size: int) -> int:
91
+ # Gemini returns 0-999 normalized
92
+ try:
93
+ return max(0, min(size - 1, int(round(v / 1000 * size))))
94
+ except Exception:
95
+ return 0
96
+
97
+
98
+ def _map_gemini_fc_to_computer_call(
99
+ fc: Dict[str, Any],
100
+ screen_w: int,
101
+ screen_h: int,
102
+ ) -> Optional[Dict[str, Any]]:
103
+ name = fc.get("name")
104
+ args = fc.get("args", {}) or {}
105
+
106
+ action: Dict[str, Any] = {}
107
+ if name == "click_at":
108
+ x = _denormalize(int(args.get("x", 0)), screen_w)
109
+ y = _denormalize(int(args.get("y", 0)), screen_h)
110
+ action = {"type": "click", "x": x, "y": y, "button": "left"}
111
+ elif name == "type_text_at":
112
+ x = _denormalize(int(args.get("x", 0)), screen_w)
113
+ y = _denormalize(int(args.get("y", 0)), screen_h)
114
+ text = args.get("text", "")
115
+ if args.get("press_enter") == True:
116
+ text += "\n"
117
+ action = {"type": "type", "x": x, "y": y, "text": text}
118
+ elif name == "hover_at":
119
+ x = _denormalize(int(args.get("x", 0)), screen_w)
120
+ y = _denormalize(int(args.get("y", 0)), screen_h)
121
+ action = {"type": "move", "x": x, "y": y}
122
+ elif name == "key_combination":
123
+ keys = str(args.get("keys", ""))
124
+ action = {"type": "keypress", "keys": keys}
125
+ elif name == "scroll_document":
126
+ direction = args.get("direction", "down")
127
+ magnitude = 800
128
+ dx, dy = 0, 0
129
+ if direction == "down":
130
+ dy = magnitude
131
+ elif direction == "up":
132
+ dy = -magnitude
133
+ elif direction == "right":
134
+ dx = magnitude
135
+ elif direction == "left":
136
+ dx = -magnitude
137
+ action = {"type": "scroll", "scroll_x": dx, "scroll_y": dy, "x": int(screen_w / 2), "y": int(screen_h / 2)}
138
+ elif name == "scroll_at":
139
+ x = _denormalize(int(args.get("x", 500)), screen_w)
140
+ y = _denormalize(int(args.get("y", 500)), screen_h)
141
+ direction = args.get("direction", "down")
142
+ magnitude = int(args.get("magnitude", 800))
143
+ dx, dy = 0, 0
144
+ if direction == "down":
145
+ dy = magnitude
146
+ elif direction == "up":
147
+ dy = -magnitude
148
+ elif direction == "right":
149
+ dx = magnitude
150
+ elif direction == "left":
151
+ dx = -magnitude
152
+ action = {"type": "scroll", "scroll_x": dx, "scroll_y": dy, "x": x, "y": y}
153
+ elif name == "drag_and_drop":
154
+ x = _denormalize(int(args.get("x", 0)), screen_w)
155
+ y = _denormalize(int(args.get("y", 0)), screen_h)
156
+ dx = _denormalize(int(args.get("destination_x", x)), screen_w)
157
+ dy = _denormalize(int(args.get("destination_y", y)), screen_h)
158
+ action = {"type": "drag", "start_x": x, "start_y": y, "end_x": dx, "end_y": dy, "button": "left"}
159
+ elif name == "wait_5_seconds":
160
+ action = {"type": "wait"}
161
+ else:
162
+ # Unsupported / excluded browser-specific or custom function; ignore
163
+ return None
164
+
165
+ return {
166
+ "type": "computer_call",
167
+ "call_id": uuid.uuid4().hex,
168
+ "status": "completed",
169
+ "action": action,
170
+ }
171
+
172
+
173
+ @register_agent(models=r"^gemini-2\.5-computer-use-preview-10-2025$")
174
+ class GeminiComputerUseConfig(AsyncAgentConfig):
175
+ async def predict_step(
176
+ self,
177
+ messages: List[Dict[str, Any]],
178
+ model: str,
179
+ tools: Optional[List[Dict[str, Any]]] = None,
180
+ max_retries: Optional[int] = None,
181
+ stream: bool = False,
182
+ computer_handler=None,
183
+ use_prompt_caching: Optional[bool] = False,
184
+ _on_api_start=None,
185
+ _on_api_end=None,
186
+ _on_usage=None,
187
+ _on_screenshot=None,
188
+ **kwargs,
189
+ ) -> Dict[str, Any]:
190
+ genai, types = _lazy_import_genai()
191
+
192
+ client = genai.Client()
193
+
194
+ # Build excluded predefined functions for browser-specific behavior
195
+ excluded = [
196
+ "open_web_browser",
197
+ "search",
198
+ "navigate",
199
+ "go_forward",
200
+ "go_back",
201
+ "scroll_document",
202
+ ]
203
+ # Optional custom functions: can be extended by host code via `tools` parameter later if desired
204
+ CUSTOM_FUNCTION_DECLARATIONS: List[Any] = []
205
+
206
+ # Compose tools config
207
+ generate_content_config = types.GenerateContentConfig(
208
+ tools=[
209
+ types.Tool(
210
+ computer_use=types.ComputerUse(
211
+ environment=types.Environment.ENVIRONMENT_BROWSER,
212
+ excluded_predefined_functions=excluded,
213
+ )
214
+ ),
215
+ # types.Tool(function_declarations=CUSTOM_FUNCTION_DECLARATIONS), # enable when custom functions needed
216
+ ]
217
+ )
218
+
219
+ # Prepare contents: last user text + latest screenshot
220
+ user_texts = _find_last_user_text(messages)
221
+ screenshot_bytes = _find_last_screenshot(messages)
222
+
223
+ parts: List[Any] = []
224
+ for t in user_texts:
225
+ parts.append(types.Part(text=t))
226
+
227
+ screen_w, screen_h = 1024, 768
228
+ if screenshot_bytes:
229
+ screen_w, screen_h = _bytes_image_size(screenshot_bytes)
230
+ parts.append(types.Part.from_bytes(data=screenshot_bytes, mime_type="image/png"))
231
+
232
+ # If we don't have any content, at least pass an empty user part to prompt reasoning
233
+ if not parts:
234
+ parts = [types.Part(text="Proceed to the next action.")]
235
+
236
+ contents = [types.Content(role="user", parts=parts)]
237
+
238
+ api_kwargs = {
239
+ "model": model,
240
+ "contents": contents,
241
+ "config": generate_content_config,
242
+ }
243
+
244
+ if _on_api_start:
245
+ await _on_api_start({
246
+ "model": api_kwargs["model"],
247
+ # "contents": api_kwargs["contents"], # Disabled for now
248
+ "config": api_kwargs["config"],
249
+ })
250
+
251
+ response = client.models.generate_content(**api_kwargs)
252
+
253
+ if _on_api_end:
254
+ await _on_api_end({
255
+ "model": api_kwargs["model"],
256
+ # "contents": api_kwargs["contents"], # Disabled for now
257
+ "config": api_kwargs["config"],
258
+ }, response)
259
+
260
+ # Usage (Gemini SDK may not always provide token usage; populate when available)
261
+ usage: Dict[str, Any] = {}
262
+ try:
263
+ # Some SDKs expose response.usage; if available, copy
264
+ if getattr(response, "usage_metadata", None):
265
+ md = response.usage_metadata
266
+ usage = {
267
+ "prompt_tokens": getattr(md, "prompt_token_count", None) or 0,
268
+ "completion_tokens": getattr(md, "candidates_token_count", None) or 0,
269
+ "total_tokens": getattr(md, "total_token_count", None) or 0,
270
+ }
271
+ except Exception:
272
+ pass
273
+
274
+ if _on_usage and usage:
275
+ await _on_usage(usage)
276
+
277
+ # Parse output into internal items
278
+ output_items: List[Dict[str, Any]] = []
279
+
280
+ candidate = response.candidates[0]
281
+ # Text parts from the model (assistant message)
282
+ text_parts: List[str] = []
283
+ function_calls: List[Dict[str, Any]] = []
284
+ for p in candidate.content.parts:
285
+ if getattr(p, "text", None):
286
+ text_parts.append(p.text)
287
+ if getattr(p, "function_call", None):
288
+ # p.function_call has name and args
289
+ fc = {
290
+ "name": getattr(p.function_call, "name", None),
291
+ "args": dict(getattr(p.function_call, "args", {}) or {}),
292
+ }
293
+ function_calls.append(fc)
294
+
295
+ if text_parts:
296
+ output_items.append(
297
+ {
298
+ "type": "message",
299
+ "role": "assistant",
300
+ "content": [{"type": "output_text", "text": "\n".join(text_parts)}],
301
+ }
302
+ )
303
+
304
+ # Map function calls to internal computer_call actions
305
+ for fc in function_calls:
306
+ item = _map_gemini_fc_to_computer_call(fc, screen_w, screen_h)
307
+ if item is not None:
308
+ output_items.append(item)
309
+
310
+ return {"output": output_items, "usage": usage}
311
+
312
+ async def predict_click(
313
+ self,
314
+ model: str,
315
+ image_b64: str,
316
+ instruction: str,
317
+ **kwargs,
318
+ ) -> Optional[Tuple[float, float]]:
319
+ """Ask Gemini CUA to output a single click action for the given instruction.
320
+
321
+ Excludes all predefined tools except `click_at` and sends the screenshot.
322
+ Returns pixel (x, y) if a click is proposed, else None.
323
+ """
324
+ genai, types = _lazy_import_genai()
325
+
326
+ client = genai.Client()
327
+
328
+ # Exclude all but click_at
329
+ exclude_all_but_click = [
330
+ "open_web_browser",
331
+ "wait_5_seconds",
332
+ "go_back",
333
+ "go_forward",
334
+ "search",
335
+ "navigate",
336
+ "hover_at",
337
+ "type_text_at",
338
+ "key_combination",
339
+ "scroll_document",
340
+ "scroll_at",
341
+ "drag_and_drop",
342
+ ]
343
+
344
+ config = types.GenerateContentConfig(
345
+ tools=[
346
+ types.Tool(
347
+ computer_use=types.ComputerUse(
348
+ environment=types.Environment.ENVIRONMENT_BROWSER,
349
+ excluded_predefined_functions=exclude_all_but_click,
350
+ )
351
+ )
352
+ ]
353
+ )
354
+
355
+ # Prepare prompt parts
356
+ try:
357
+ img_bytes = base64.b64decode(image_b64)
358
+ except Exception:
359
+ img_bytes = b""
360
+
361
+ w, h = _bytes_image_size(img_bytes) if img_bytes else (1024, 768)
362
+
363
+ parts: List[Any] = [types.Part(text=f"Click {instruction}.")]
364
+ if img_bytes:
365
+ parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/png"))
366
+
367
+ contents = [types.Content(role="user", parts=parts)]
368
+
369
+ response = client.models.generate_content(
370
+ model=model,
371
+ contents=contents,
372
+ config=config,
373
+ )
374
+
375
+ # Parse first click_at
376
+ try:
377
+ candidate = response.candidates[0]
378
+ for p in candidate.content.parts:
379
+ fc = getattr(p, "function_call", None)
380
+ if fc and getattr(fc, "name", None) == "click_at":
381
+ args = dict(getattr(fc, "args", {}) or {})
382
+ x = _denormalize(int(args.get("x", 0)), w)
383
+ y = _denormalize(int(args.get("y", 0)), h)
384
+ return float(x), float(y)
385
+ except Exception:
386
+ return None
387
+
388
+ return None
389
+
390
+ def get_capabilities(self) -> List[AgentCapability]:
391
+ return ["click", "step"]
@@ -0,0 +1,464 @@
1
+ """
2
+ Moondream3+ composed-grounded agent loop implementation.
3
+ Grounding is handled by a local Moondream3 preview model via Transformers.
4
+ Thinking is delegated to the trailing LLM in the composed model string: "moondream3+<thinking_model>".
5
+
6
+ Differences from composed_grounded:
7
+ - Provides a singleton Moondream3 client outside the class.
8
+ - predict_click uses model.point(image, instruction, settings={"max_objects": 1}) and returns pixel coordinates.
9
+ - If the last image was a screenshot (or we take one), run model.detect(image, "all form ui") to get bboxes, then
10
+ run model.caption on each cropped bbox to label it. Overlay labels on the screenshot and emit via _on_screenshot.
11
+ - Add a user message listing all detected form UI names so the thinker can reference them.
12
+ - If the thinking model doesn't support vision, filter out image content before calling litellm.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import uuid
18
+ import base64
19
+ import io
20
+ from typing import Dict, List, Any, Optional, Tuple, Any
21
+
22
+ from PIL import Image, ImageDraw, ImageFont
23
+ import torch
24
+ from transformers import AutoModelForCausalLM
25
+ import litellm
26
+
27
+ from ..decorators import register_agent
28
+ from ..types import AgentCapability
29
+ from ..loops.base import AsyncAgentConfig
30
+ from ..responses import (
31
+ convert_computer_calls_xy2desc,
32
+ convert_responses_items_to_completion_messages,
33
+ convert_completion_messages_to_responses_items,
34
+ convert_computer_calls_desc2xy,
35
+ get_all_element_descriptions,
36
+ )
37
+
38
+ _MOONDREAM_SINGLETON = None
39
+
40
+ def get_moondream_model() -> Any:
41
+ """Get a singleton instance of the Moondream3 preview model."""
42
+ global _MOONDREAM_SINGLETON
43
+ if _MOONDREAM_SINGLETON is None:
44
+ _MOONDREAM_SINGLETON = AutoModelForCausalLM.from_pretrained(
45
+ "moondream/moondream3-preview",
46
+ trust_remote_code=True,
47
+ torch_dtype=torch.bfloat16,
48
+ device_map="cuda",
49
+ )
50
+ return _MOONDREAM_SINGLETON
51
+
52
+
53
+ def _decode_image_b64(image_b64: str) -> Image.Image:
54
+ data = base64.b64decode(image_b64)
55
+ return Image.open(io.BytesIO(data)).convert("RGB")
56
+
57
+
58
+ def _image_to_b64(img: Image.Image) -> str:
59
+ buf = io.BytesIO()
60
+ img.save(buf, format="PNG")
61
+ return base64.b64encode(buf.getvalue()).decode("utf-8")
62
+
63
+
64
+ def _supports_vision(model: str) -> bool:
65
+ """Heuristic vision support detection for thinking model."""
66
+ m = model.lower()
67
+ vision_markers = [
68
+ "gpt-4o",
69
+ "gpt-4.1",
70
+ "o1",
71
+ "o3",
72
+ "claude-3",
73
+ "claude-3.5",
74
+ "sonnet",
75
+ "haiku",
76
+ "opus",
77
+ "gemini-1.5",
78
+ "llava",
79
+ ]
80
+ return any(v in m for v in vision_markers)
81
+
82
+
83
+ def _filter_images_from_completion_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
84
+ filtered: List[Dict[str, Any]] = []
85
+ for msg in messages:
86
+ msg_copy = {**msg}
87
+ content = msg_copy.get("content")
88
+ if isinstance(content, list):
89
+ msg_copy["content"] = [c for c in content if c.get("type") != "image_url"]
90
+ filtered.append(msg_copy)
91
+ return filtered
92
+
93
+ def _annotate_detect_and_label_ui(base_img: Image.Image, model_md) -> Tuple[str, List[str]]:
94
+ """Detect UI elements with Moondream, caption each, draw labels with backgrounds.
95
+
96
+ Args:
97
+ base_img: PIL image of the screenshot (RGB or RGBA). Will be copied/converted internally.
98
+ model_md: Moondream model instance with .detect() and .query() methods.
99
+
100
+ Returns:
101
+ A tuple of (annotated_image_base64_png, detected_names)
102
+ """
103
+ # Ensure RGBA for semi-transparent fills
104
+ if base_img.mode != "RGBA":
105
+ base_img = base_img.convert("RGBA")
106
+ W, H = base_img.width, base_img.height
107
+
108
+ # Detect objects
109
+ try:
110
+ detect_result = model_md.detect(base_img, "all ui elements")
111
+ objects = detect_result.get("objects", []) if isinstance(detect_result, dict) else []
112
+ except Exception:
113
+ objects = []
114
+
115
+ draw = ImageDraw.Draw(base_img)
116
+ try:
117
+ font = ImageFont.load_default()
118
+ except Exception:
119
+ font = None
120
+
121
+ detected_names: List[str] = []
122
+
123
+ for i, obj in enumerate(objects):
124
+ try:
125
+ # Clamp normalized coords and crop
126
+ x_min = max(0.0, min(1.0, float(obj.get("x_min", 0.0))))
127
+ y_min = max(0.0, min(1.0, float(obj.get("y_min", 0.0))))
128
+ x_max = max(0.0, min(1.0, float(obj.get("x_max", 0.0))))
129
+ y_max = max(0.0, min(1.0, float(obj.get("y_max", 0.0))))
130
+ left, top, right, bottom = int(x_min * W), int(y_min * H), int(x_max * W), int(y_max * H)
131
+ left, top = max(0, left), max(0, top)
132
+ right, bottom = min(W - 1, right), min(H - 1, bottom)
133
+ crop = base_img.crop((left, top, right, bottom))
134
+
135
+ # Prompted short caption
136
+ try:
137
+ result = model_md.query(crop, "Caption this UI element in few words.")
138
+ caption_text = (result or {}).get("answer", "")
139
+ except Exception:
140
+ caption_text = ""
141
+
142
+ name = (caption_text or "").strip() or f"element_{i+1}"
143
+ detected_names.append(name)
144
+
145
+ # Draw bbox
146
+ draw.rectangle([left, top, right, bottom], outline=(255, 215, 0, 255), width=2)
147
+
148
+ # Label background with padding and rounded corners
149
+ label = f"{i+1}. {name}"
150
+ padding = 3
151
+ if font:
152
+ text_bbox = draw.textbbox((0, 0), label, font=font)
153
+ else:
154
+ text_bbox = draw.textbbox((0, 0), label)
155
+ text_w = text_bbox[2] - text_bbox[0]
156
+ text_h = text_bbox[3] - text_bbox[1]
157
+
158
+ tx = left + 3
159
+ ty = top - (text_h + 2 * padding + 4)
160
+ if ty < 0:
161
+ ty = top + 3
162
+
163
+ bg_left = tx - padding
164
+ bg_top = ty - padding
165
+ bg_right = tx + text_w + padding
166
+ bg_bottom = ty + text_h + padding
167
+ try:
168
+ draw.rounded_rectangle(
169
+ [bg_left, bg_top, bg_right, bg_bottom],
170
+ radius=4,
171
+ fill=(0, 0, 0, 160),
172
+ outline=(255, 215, 0, 200),
173
+ width=1,
174
+ )
175
+ except Exception:
176
+ draw.rectangle(
177
+ [bg_left, bg_top, bg_right, bg_bottom],
178
+ fill=(0, 0, 0, 160),
179
+ outline=(255, 215, 0, 200),
180
+ width=1,
181
+ )
182
+
183
+ text_fill = (255, 255, 255, 255)
184
+ if font:
185
+ draw.text((tx, ty), label, fill=text_fill, font=font)
186
+ else:
187
+ draw.text((tx, ty), label, fill=text_fill)
188
+ except Exception:
189
+ continue
190
+
191
+ # Encode PNG base64
192
+ annotated = base_img
193
+ if annotated.mode not in ("RGBA", "RGB"):
194
+ annotated = annotated.convert("RGBA")
195
+ annotated_b64 = _image_to_b64(annotated)
196
+ return annotated_b64, detected_names
197
+
198
+ GROUNDED_COMPUTER_TOOL_SCHEMA = {
199
+ "type": "function",
200
+ "function": {
201
+ "name": "computer",
202
+ "description": (
203
+ "Control a computer by taking screenshots and interacting with UI elements. "
204
+ "The screenshot action will include a list of detected form UI element names when available. "
205
+ "Use element descriptions to locate and interact with UI elements on the screen."
206
+ ),
207
+ "parameters": {
208
+ "type": "object",
209
+ "properties": {
210
+ "action": {
211
+ "type": "string",
212
+ "enum": [
213
+ "screenshot",
214
+ "click",
215
+ "double_click",
216
+ "drag",
217
+ "type",
218
+ "keypress",
219
+ "scroll",
220
+ "move",
221
+ "wait",
222
+ "get_current_url",
223
+ "get_dimensions",
224
+ "get_environment",
225
+ ],
226
+ "description": "The action to perform (required for all actions)",
227
+ },
228
+ "element_description": {
229
+ "type": "string",
230
+ "description": "Description of the element to interact with (required for click/double_click/move/scroll)",
231
+ },
232
+ "start_element_description": {
233
+ "type": "string",
234
+ "description": "Description of the element to start dragging from (required for drag)",
235
+ },
236
+ "end_element_description": {
237
+ "type": "string",
238
+ "description": "Description of the element to drag to (required for drag)",
239
+ },
240
+ "text": {
241
+ "type": "string",
242
+ "description": "The text to type (required for type)",
243
+ },
244
+ "keys": {
245
+ "type": "array",
246
+ "items": {"type": "string"},
247
+ "description": "Key(s) to press (required for keypress)",
248
+ },
249
+ "button": {
250
+ "type": "string",
251
+ "enum": ["left", "right", "wheel", "back", "forward"],
252
+ "description": "The mouse button to use for click/double_click",
253
+ },
254
+ "scroll_x": {
255
+ "type": "integer",
256
+ "description": "Horizontal scroll amount (required for scroll)",
257
+ },
258
+ "scroll_y": {
259
+ "type": "integer",
260
+ "description": "Vertical scroll amount (required for scroll)",
261
+ },
262
+ },
263
+ "required": ["action"],
264
+ },
265
+ },
266
+ }
267
+
268
+ @register_agent(r"moondream3\+.*", priority=2)
269
+ class Moondream3PlusConfig(AsyncAgentConfig):
270
+ def __init__(self):
271
+ self.desc2xy: Dict[str, Tuple[float, float]] = {}
272
+
273
+ async def predict_step(
274
+ self,
275
+ messages: List[Dict[str, Any]],
276
+ model: str,
277
+ tools: Optional[List[Dict[str, Any]]] = None,
278
+ max_retries: Optional[int] = None,
279
+ stream: bool = False,
280
+ computer_handler=None,
281
+ use_prompt_caching: Optional[bool] = False,
282
+ _on_api_start=None,
283
+ _on_api_end=None,
284
+ _on_usage=None,
285
+ _on_screenshot=None,
286
+ **kwargs,
287
+ ) -> Dict[str, Any]:
288
+ # Parse composed model: moondream3+<thinking_model>
289
+ if "+" not in model:
290
+ raise ValueError(f"Composed model must be 'moondream3+<thinking_model>', got: {model}")
291
+ _, thinking_model = model.split("+", 1)
292
+
293
+ pre_output_items: List[Dict[str, Any]] = []
294
+
295
+ # Acquire last screenshot; if missing, take one
296
+ last_image_b64: Optional[str] = None
297
+ for message in reversed(messages):
298
+ if (
299
+ isinstance(message, dict)
300
+ and message.get("type") == "computer_call_output"
301
+ and isinstance(message.get("output"), dict)
302
+ and message["output"].get("type") == "input_image"
303
+ ):
304
+ image_url = message["output"].get("image_url", "")
305
+ if image_url.startswith("data:image/png;base64,"):
306
+ last_image_b64 = image_url.split(",", 1)[1]
307
+ break
308
+
309
+ if last_image_b64 is None and computer_handler is not None:
310
+ # Take a screenshot
311
+ screenshot_b64 = await computer_handler.screenshot() # type: ignore
312
+ if screenshot_b64:
313
+ call_id = uuid.uuid4().hex
314
+ pre_output_items += [
315
+ {
316
+ "type": "message",
317
+ "role": "assistant",
318
+ "content": [
319
+ {"type": "output_text", "text": "Taking a screenshot to analyze the current screen."}
320
+ ],
321
+ },
322
+ {"type": "computer_call", "call_id": call_id, "status": "completed", "action": {"type": "screenshot"}},
323
+ {
324
+ "type": "computer_call_output",
325
+ "call_id": call_id,
326
+ "output": {"type": "input_image", "image_url": f"data:image/png;base64,{screenshot_b64}"},
327
+ },
328
+ ]
329
+ last_image_b64 = screenshot_b64
330
+ if _on_screenshot:
331
+ await _on_screenshot(screenshot_b64)
332
+
333
+ # If we have a last screenshot, run Moondream detection and labeling
334
+ detected_names: List[str] = []
335
+ if last_image_b64 is not None:
336
+ base_img = _decode_image_b64(last_image_b64)
337
+ model_md = get_moondream_model()
338
+ annotated_b64, detected_names = _annotate_detect_and_label_ui(base_img, model_md)
339
+ if _on_screenshot:
340
+ await _on_screenshot(annotated_b64, "annotated_form_ui")
341
+
342
+ # Also push a user message listing all detected names
343
+ if detected_names:
344
+ names_text = "\n".join(f"- {n}" for n in detected_names)
345
+ pre_output_items.append(
346
+ {
347
+ "type": "message",
348
+ "role": "user",
349
+ "content": [
350
+ {"type": "input_text", "text": "Detected form UI elements on screen:"},
351
+ {"type": "input_text", "text": names_text},
352
+ {"type": "input_text", "text": "Please continue with the next action needed to perform your task."}
353
+ ],
354
+ }
355
+ )
356
+
357
+ tool_schemas = []
358
+ for schema in (tools or []):
359
+ if schema.get("type") == "computer":
360
+ tool_schemas.append(GROUNDED_COMPUTER_TOOL_SCHEMA)
361
+ else:
362
+ tool_schemas.append(schema)
363
+
364
+ # Step 1: Convert computer calls from xy to descriptions
365
+ input_messages = messages + pre_output_items
366
+ messages_with_descriptions = convert_computer_calls_xy2desc(input_messages, self.desc2xy)
367
+
368
+ # Step 2: Convert responses items to completion messages
369
+ completion_messages = convert_responses_items_to_completion_messages(
370
+ messages_with_descriptions,
371
+ allow_images_in_tool_results=False,
372
+ )
373
+
374
+ # Optionally filter images if model lacks vision
375
+ if not _supports_vision(thinking_model):
376
+ completion_messages = _filter_images_from_completion_messages(completion_messages)
377
+
378
+ # Step 3: Call thinking model with litellm.acompletion
379
+ api_kwargs = {
380
+ "model": thinking_model,
381
+ "messages": completion_messages,
382
+ "tools": tool_schemas,
383
+ "max_retries": max_retries,
384
+ "stream": stream,
385
+ **kwargs,
386
+ }
387
+ if use_prompt_caching:
388
+ api_kwargs["use_prompt_caching"] = use_prompt_caching
389
+
390
+ if _on_api_start:
391
+ await _on_api_start(api_kwargs)
392
+
393
+ response = await litellm.acompletion(**api_kwargs)
394
+
395
+ if _on_api_end:
396
+ await _on_api_end(api_kwargs, response)
397
+
398
+ usage = {
399
+ **response.usage.model_dump(), # type: ignore
400
+ "response_cost": response._hidden_params.get("response_cost", 0.0),
401
+ }
402
+ if _on_usage:
403
+ await _on_usage(usage)
404
+
405
+ # Step 4: Convert completion messages back to responses items format
406
+ response_dict = response.model_dump() # type: ignore
407
+ choice_messages = [choice["message"] for choice in response_dict["choices"]]
408
+ thinking_output_items: List[Dict[str, Any]] = []
409
+ for choice_message in choice_messages:
410
+ thinking_output_items.extend(
411
+ convert_completion_messages_to_responses_items([choice_message])
412
+ )
413
+
414
+ # Step 5: Use Moondream to get coordinates for each description
415
+ element_descriptions = get_all_element_descriptions(thinking_output_items)
416
+ if element_descriptions and last_image_b64:
417
+ for desc in element_descriptions:
418
+ for _ in range(3): # try 3 times
419
+ coords = await self.predict_click(
420
+ model=model,
421
+ image_b64=last_image_b64,
422
+ instruction=desc,
423
+ )
424
+ if coords:
425
+ self.desc2xy[desc] = coords
426
+ break
427
+
428
+ # Step 6: Convert computer calls from descriptions back to xy coordinates
429
+ final_output_items = convert_computer_calls_desc2xy(thinking_output_items, self.desc2xy)
430
+
431
+ # Step 7: Return output and usage
432
+ return {"output": pre_output_items + final_output_items, "usage": usage}
433
+
434
+ async def predict_click(
435
+ self,
436
+ model: str,
437
+ image_b64: str,
438
+ instruction: str,
439
+ **kwargs,
440
+ ) -> Optional[Tuple[float, float]]:
441
+ """Predict click coordinates using Moondream3's point API.
442
+
443
+ Returns pixel coordinates (x, y) as floats.
444
+ """
445
+ img = _decode_image_b64(image_b64)
446
+ W, H = img.width, img.height
447
+ model_md = get_moondream_model()
448
+ try:
449
+ result = model_md.point(img, instruction, settings={"max_objects": 1})
450
+ except Exception:
451
+ return None
452
+
453
+ try:
454
+ pt = (result or {}).get("points", [])[0]
455
+ x_norm = float(pt.get("x", 0.0))
456
+ y_norm = float(pt.get("y", 0.0))
457
+ x_px = max(0.0, min(float(W - 1), x_norm * W))
458
+ y_px = max(0.0, min(float(H - 1), y_norm * H))
459
+ return (x_px, y_px)
460
+ except Exception:
461
+ return None
462
+
463
+ def get_capabilities(self) -> List[AgentCapability]:
464
+ return ["click", "step"]
agent/loops/openai.py CHANGED
@@ -53,8 +53,7 @@ async def _prepare_tools_for_openai(tool_schemas: List[Dict[str, Any]]) -> Tools
53
53
 
54
54
  return openai_tools
55
55
 
56
-
57
- @register_agent(models=r".*computer-use-preview.*")
56
+ @register_agent(models=r".*(^|/)computer-use-preview")
58
57
  class OpenAIComputerUseConfig:
59
58
  """
60
59
  OpenAI computer-use-preview agent configuration using liteLLM responses.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.4.32
3
+ Version: 0.4.33
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.12
@@ -49,7 +49,9 @@ Requires-Dist: python-dotenv>=1.0.1; extra == "ui"
49
49
  Provides-Extra: cli
50
50
  Requires-Dist: yaspin>=3.1.0; extra == "cli"
51
51
  Provides-Extra: hud
52
- Requires-Dist: hud-python==0.4.26; extra == "hud"
52
+ Requires-Dist: hud-python==0.4.52; extra == "hud"
53
+ Provides-Extra: gemini
54
+ Requires-Dist: google-genai>=1.41.0; extra == "gemini"
53
55
  Provides-Extra: all
54
56
  Requires-Dist: mlx-vlm>=0.1.27; sys_platform == "darwin" and extra == "all"
55
57
  Requires-Dist: accelerate; extra == "all"
@@ -62,7 +64,8 @@ Requires-Dist: blobfile>=3.0.0; extra == "all"
62
64
  Requires-Dist: gradio>=5.23.3; extra == "all"
63
65
  Requires-Dist: python-dotenv>=1.0.1; extra == "all"
64
66
  Requires-Dist: yaspin>=3.1.0; extra == "all"
65
- Requires-Dist: hud-python==0.4.26; extra == "all"
67
+ Requires-Dist: hud-python==0.4.52; extra == "all"
68
+ Requires-Dist: google-genai>=1.41.0; extra == "all"
66
69
  Description-Content-Type: text/markdown
67
70
 
68
71
  <div align="center">
@@ -20,7 +20,7 @@ agent/callbacks/pii_anonymization.py,sha256=NEkUTUjQBi82nqus7kT-1E4RaeQ2hQrY7YCn
20
20
  agent/callbacks/prompt_instructions.py,sha256=RUqsJhiNiXqaOM_P2AfyBinWUDdgDku46BExLMUJHn4,1517
21
21
  agent/callbacks/telemetry.py,sha256=RbUDhE41mTi8g9hNre0EpltK_NUZkLj8buJLWBzs0Ek,7363
22
22
  agent/callbacks/trajectory_saver.py,sha256=-XNgiKU6T8Qw_i2AZMQuw0HuUe6MHkU89rjn_T386Rw,16128
23
- agent/cli.py,sha256=HddU18IvvKdyvQu0ru21nAcNc6k7toYuyjgORIzX_qo,16110
23
+ agent/cli.py,sha256=QsHx1w0THEdaq7YOXPZ_mprWtO_n64sgPAMFUOIJ3og,17603
24
24
  agent/computers/__init__.py,sha256=39ISJsaREaQIZckpzxSuLhuR763wUU3TxUux78EKjAg,1477
25
25
  agent/computers/base.py,sha256=hZntX4vgc1ahD3EnFeb9lUjtBmgka1vb27hndPl9tKQ,2187
26
26
  agent/computers/cua.py,sha256=xp2A34kT2C1NKqSRo2GB6766gkraM-UtpFjRv8LUTSc,4889
@@ -33,17 +33,19 @@ agent/human_tool/ui.py,sha256=wu9eZorhxCkyPTlBSZjYaVzutoHMlucAz8UGNpAT4bM,30644
33
33
  agent/integrations/hud/__init__.py,sha256=xir5BVAlG2cFc7rHSx_Ea_2b1kp2TtFuKJk07jny7qY,5969
34
34
  agent/integrations/hud/agent.py,sha256=GBikd9MhjDNKMiMG8J7PE3OMSmvmC_JLZ1p5xr2cZoc,14006
35
35
  agent/integrations/hud/proxy.py,sha256=8HUoh7uZ8Z3vkhPXK0dskgePGsP8oCqyYij0mE_E7X8,10902
36
- agent/loops/__init__.py,sha256=c6stEkT15smK8ZIf9j2kyOko84uz1YIvHXx0Mbe2wq8,472
37
- agent/loops/anthropic.py,sha256=ODrMvmTkyzIOLjGq6HbKzzgBu19TE_Xlsi--7vc5T6o,70196
36
+ agent/loops/__init__.py,sha256=Nefn042YQMMaC6tTHvaQ17m9hNEVSPG4Xh2rpujfSos,549
37
+ agent/loops/anthropic.py,sha256=hGqRcUYaajnOTIlEGCpLeHqUoIzS293M8sqFOC_NTUY,70211
38
38
  agent/loops/base.py,sha256=LK7kSTnc2CB88LI7qr2VP7LMq0eS5r2bSEnrxO6IN5U,2345
39
39
  agent/loops/composed_grounded.py,sha256=Um_8G0v5DEzF_A9wWIGp_IDPDMvv4IXDTFpEDH92Vto,12367
40
+ agent/loops/gemini.py,sha256=m_bGdxujWBmzYpEnZg84OXDCyh06MYNiDrO3beVstCQ,13718
40
41
  agent/loops/glm45v.py,sha256=EKAoh-PWkcCdzBVebjXbdqoDNkXgcmJpIqmTNPiZ8TM,35127
41
42
  agent/loops/gta1.py,sha256=uGIcUH5ChzO75eGvoQxuKMBWjX-1J9-xmC7vPetobjU,5831
42
43
  agent/loops/holo.py,sha256=peQ0xx4XQDBQ3g2XKRLCgyrU_2PkXe3RaysNBqFyS90,7481
43
44
  agent/loops/internvl.py,sha256=iQs6DSoP9JOyUxRAz_HPuv4Hi2Sbv-Jc3022W-oPX5Y,6596
44
45
  agent/loops/model_types.csv,sha256=GmFn4x80yoUpQZuQ-GXtJkPVlOLYWZ5u_5A73HRyeNE,112
46
+ agent/loops/moondream3.py,sha256=_h4k6Z7VyBU8bw-Av0RiOt-GanCJqU46ZLF03eyRgug,17828
45
47
  agent/loops/omniparser.py,sha256=-db8JUL2Orn47ERIaLbuNShAXn4LeIgYzRWphn_9Dg4,15071
46
- agent/loops/openai.py,sha256=3UEXdecqGkyknhTgp6zxr_cNCVg5vM-61I6SKMNl6m8,8692
48
+ agent/loops/openai.py,sha256=2typWRS7j2sVm52AzwwigPniCrdw9IVvllypjXN2mKI,8694
47
49
  agent/loops/opencua.py,sha256=Chb4UASHDrdcX_fO__Gw2e9ay4Hl6Vq38K5x-IoHyuo,4432
48
50
  agent/loops/uitars.py,sha256=mVPt4V-HabX7ZiQnM55BVQt73CuZUjmUAsbm4Tf6TXk,32351
49
51
  agent/proxy/examples.py,sha256=GYFJ-sfDsSNZr9n_qpvDx_0rShqoKE5JW0ibbljWfoo,6192
@@ -55,7 +57,7 @@ agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
55
57
  agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
56
58
  agent/ui/gradio/app.py,sha256=Ol97YEbwREZZQ9_PMjVHlfOcu9BGsawxgAGAm79hT80,9117
57
59
  agent/ui/gradio/ui_components.py,sha256=dJUvKDmc1oSejtoR_gU_oWWYwxaOOQyPloSYRGMrUCQ,36068
58
- cua_agent-0.4.32.dist-info/METADATA,sha256=9DM4yfZ8hH6-JeNvke6WOgzZLEF0i3A8cDeb3aTGpyk,6340
59
- cua_agent-0.4.32.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
60
- cua_agent-0.4.32.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
61
- cua_agent-0.4.32.dist-info/RECORD,,
60
+ cua_agent-0.4.33.dist-info/METADATA,sha256=LIl2V_RBTn8mrq69gBc_7fpWfQnxBHEnkInmnToJ8Qw,6470
61
+ cua_agent-0.4.33.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
62
+ cua_agent-0.4.33.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
63
+ cua_agent-0.4.33.dist-info/RECORD,,