cua-agent 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

agent/agent.py CHANGED
@@ -7,7 +7,13 @@ from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Calla
7
7
 
8
8
  from litellm.responses.utils import Usage
9
9
 
10
- from .types import Messages, AgentCapability
10
+ from .types import (
11
+ Messages,
12
+ AgentCapability,
13
+ ToolError,
14
+ IllegalArgumentError
15
+ )
16
+ from .responses import make_tool_error_item, replace_failed_computer_calls_with_function_calls
11
17
  from .decorators import find_agent_config
12
18
  import json
13
19
  import litellm
@@ -30,6 +36,15 @@ from .computers import (
30
36
  make_computer_handler
31
37
  )
32
38
 
39
+ def assert_callable_with(f, *args, **kwargs):
40
+ """Check if function can be called with given arguments."""
41
+ try:
42
+ inspect.signature(f).bind(*args, **kwargs)
43
+ return True
44
+ except TypeError as e:
45
+ sig = inspect.signature(f)
46
+ raise IllegalArgumentError(f"Expected {sig}, got args={args} kwargs={kwargs}") from e
47
+
33
48
  def get_json(obj: Any, max_depth: int = 10) -> Any:
34
49
  def custom_serializer(o: Any, depth: int = 0, seen: Optional[Set[int]] = None) -> Any:
35
50
  if seen is None:
@@ -405,7 +420,8 @@ class ComputerAgent:
405
420
 
406
421
  async def _handle_item(self, item: Any, computer: Optional[AsyncComputerHandler] = None, ignore_call_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
407
422
  """Handle each item; may cause a computer action + screenshot."""
408
- if ignore_call_ids and item.get("call_id") and item.get("call_id") in ignore_call_ids:
423
+ call_id = item.get("call_id")
424
+ if ignore_call_ids and call_id and call_id in ignore_call_ids:
409
425
  return []
410
426
 
411
427
  item_type = item.get("type", None)
@@ -419,96 +435,102 @@ class ComputerAgent:
419
435
  # print(content_item.get("text"))
420
436
  return []
421
437
 
422
- if item_type == "computer_call":
423
- await self._on_computer_call_start(item)
424
- if not computer:
425
- raise ValueError("Computer handler is required for computer calls")
426
-
427
- # Perform computer actions
428
- action = item.get("action")
429
- action_type = action.get("type")
430
- if action_type is None:
431
- print(f"Action type cannot be `None`: action={action}, action_type={action_type}")
432
- return []
433
-
434
- # Extract action arguments (all fields except 'type')
435
- action_args = {k: v for k, v in action.items() if k != "type"}
436
-
437
- # print(f"{action_type}({action_args})")
438
-
439
- # Execute the computer action
440
- computer_method = getattr(computer, action_type, None)
441
- if computer_method:
442
- await computer_method(**action_args)
443
- else:
444
- print(f"Unknown computer action: {action_type}")
445
- return []
446
-
447
- # Take screenshot after action
448
- if self.screenshot_delay and self.screenshot_delay > 0:
449
- await asyncio.sleep(self.screenshot_delay)
450
- screenshot_base64 = await computer.screenshot()
451
- await self._on_screenshot(screenshot_base64, "screenshot_after")
438
+ try:
439
+ if item_type == "computer_call":
440
+ await self._on_computer_call_start(item)
441
+ if not computer:
442
+ raise ValueError("Computer handler is required for computer calls")
443
+
444
+ # Perform computer actions
445
+ action = item.get("action")
446
+ action_type = action.get("type")
447
+ if action_type is None:
448
+ print(f"Action type cannot be `None`: action={action}, action_type={action_type}")
449
+ return []
450
+
451
+ # Extract action arguments (all fields except 'type')
452
+ action_args = {k: v for k, v in action.items() if k != "type"}
453
+
454
+ # print(f"{action_type}({action_args})")
455
+
456
+ # Execute the computer action
457
+ computer_method = getattr(computer, action_type, None)
458
+ if computer_method:
459
+ assert_callable_with(computer_method, **action_args)
460
+ await computer_method(**action_args)
461
+ else:
462
+ raise ToolError(f"Unknown computer action: {action_type}")
463
+
464
+ # Take screenshot after action
465
+ if self.screenshot_delay and self.screenshot_delay > 0:
466
+ await asyncio.sleep(self.screenshot_delay)
467
+ screenshot_base64 = await computer.screenshot()
468
+ await self._on_screenshot(screenshot_base64, "screenshot_after")
469
+
470
+ # Handle safety checks
471
+ pending_checks = item.get("pending_safety_checks", [])
472
+ acknowledged_checks = []
473
+ for check in pending_checks:
474
+ check_message = check.get("message", str(check))
475
+ acknowledged_checks.append(check)
476
+ # TODO: implement a callback for safety checks
477
+ # if acknowledge_safety_check_callback(check_message, allow_always=True):
478
+ # acknowledged_checks.append(check)
479
+ # else:
480
+ # raise ValueError(f"Safety check failed: {check_message}")
481
+
482
+ # Create call output
483
+ call_output = {
484
+ "type": "computer_call_output",
485
+ "call_id": item.get("call_id"),
486
+ "acknowledged_safety_checks": acknowledged_checks,
487
+ "output": {
488
+ "type": "input_image",
489
+ "image_url": f"data:image/png;base64,{screenshot_base64}",
490
+ },
491
+ }
492
+
493
+ # # Additional URL safety checks for browser environments
494
+ # if await computer.get_environment() == "browser":
495
+ # current_url = await computer.get_current_url()
496
+ # call_output["output"]["current_url"] = current_url
497
+ # # TODO: implement a callback for URL safety checks
498
+ # # check_blocklisted_url(current_url)
499
+
500
+ result = [call_output]
501
+ await self._on_computer_call_end(item, result)
502
+ return result
452
503
 
453
- # Handle safety checks
454
- pending_checks = item.get("pending_safety_checks", [])
455
- acknowledged_checks = []
456
- for check in pending_checks:
457
- check_message = check.get("message", str(check))
458
- acknowledged_checks.append(check)
459
- # TODO: implement a callback for safety checks
460
- # if acknowledge_safety_check_callback(check_message, allow_always=True):
461
- # acknowledged_checks.append(check)
462
- # else:
463
- # raise ValueError(f"Safety check failed: {check_message}")
504
+ if item_type == "function_call":
505
+ await self._on_function_call_start(item)
506
+ # Perform function call
507
+ function = self._get_tool(item.get("name"))
508
+ if not function:
509
+ raise ToolError(f"Function {item.get("name")} not found")
464
510
 
465
- # Create call output
466
- call_output = {
467
- "type": "computer_call_output",
468
- "call_id": item.get("call_id"),
469
- "acknowledged_safety_checks": acknowledged_checks,
470
- "output": {
471
- "type": "input_image",
472
- "image_url": f"data:image/png;base64,{screenshot_base64}",
473
- },
474
- }
511
+ args = json.loads(item.get("arguments"))
512
+
513
+ # Validate arguments before execution
514
+ assert_callable_with(function, **args)
515
+
516
+ # Execute function - use asyncio.to_thread for non-async functions
517
+ if inspect.iscoroutinefunction(function):
518
+ result = await function(**args)
519
+ else:
520
+ result = await asyncio.to_thread(function, **args)
475
521
 
476
- # # Additional URL safety checks for browser environments
477
- # if await computer.get_environment() == "browser":
478
- # current_url = await computer.get_current_url()
479
- # call_output["output"]["current_url"] = current_url
480
- # # TODO: implement a callback for URL safety checks
481
- # # check_blocklisted_url(current_url)
522
+ # Create function call output
523
+ call_output = {
524
+ "type": "function_call_output",
525
+ "call_id": item.get("call_id"),
526
+ "output": str(result),
527
+ }
482
528
 
483
- result = [call_output]
484
- await self._on_computer_call_end(item, result)
485
- return result
486
-
487
- if item_type == "function_call":
488
- await self._on_function_call_start(item)
489
- # Perform function call
490
- function = self._get_tool(item.get("name"))
491
- if not function:
492
- raise ValueError(f"Function {item.get("name")} not found")
493
-
494
- args = json.loads(item.get("arguments"))
495
-
496
- # Execute function - use asyncio.to_thread for non-async functions
497
- if inspect.iscoroutinefunction(function):
498
- result = await function(**args)
499
- else:
500
- result = await asyncio.to_thread(function, **args)
501
-
502
- # Create function call output
503
- call_output = {
504
- "type": "function_call_output",
505
- "call_id": item.get("call_id"),
506
- "output": str(result),
507
- }
508
-
509
- result = [call_output]
510
- await self._on_function_call_end(item, result)
511
- return result
529
+ result = [call_output]
530
+ await self._on_function_call_end(item, result)
531
+ return result
532
+ except ToolError as e:
533
+ return [make_tool_error_item(repr(e), call_id)]
512
534
 
513
535
  return []
514
536
 
@@ -569,6 +591,7 @@ class ComputerAgent:
569
591
  # - PII anonymization
570
592
  # - Image retention policy
571
593
  combined_messages = old_items + new_items
594
+ combined_messages = replace_failed_computer_calls_with_function_calls(combined_messages)
572
595
  preprocessed_messages = await self._on_llm_start(combined_messages)
573
596
 
574
597
  loop_kwargs = {
agent/responses.py CHANGED
@@ -252,6 +252,53 @@ def make_failed_tool_call_items(tool_name: str, tool_kwargs: Dict[str, Any], err
252
252
  }
253
253
  ]
254
254
 
255
+ def make_tool_error_item(error_message: str, call_id: Optional[str] = None) -> Dict[str, Any]:
256
+ call_id = call_id if call_id else random_id()
257
+ return {
258
+ "type": "function_call_output",
259
+ "call_id": call_id,
260
+ "output": json.dumps({"error": error_message}),
261
+ }
262
+
263
+ def replace_failed_computer_calls_with_function_calls(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
264
+ """
265
+ Replace computer_call items with function_call items if they share a call_id with a function_call_output.
266
+ This indicates the computer call failed and should be treated as a function call instead.
267
+ We do this because the computer_call_output items do not support text output.
268
+
269
+ Args:
270
+ messages: List of message items to process
271
+ """
272
+ messages = messages.copy()
273
+
274
+ # Find all call_ids that have function_call_output items
275
+ failed_call_ids = set()
276
+ for msg in messages:
277
+ if msg.get("type") == "function_call_output":
278
+ call_id = msg.get("call_id")
279
+ if call_id:
280
+ failed_call_ids.add(call_id)
281
+
282
+ # Replace computer_call items that have matching call_ids
283
+ for i, msg in enumerate(messages):
284
+ if (msg.get("type") == "computer_call" and
285
+ msg.get("call_id") in failed_call_ids):
286
+
287
+ # Extract action from computer_call
288
+ action = msg.get("action", {})
289
+ call_id = msg.get("call_id")
290
+
291
+ # Create function_call replacement
292
+ messages[i] = {
293
+ "type": "function_call",
294
+ "id": msg.get("id", random_id()),
295
+ "call_id": call_id,
296
+ "name": "computer",
297
+ "arguments": json.dumps(action),
298
+ }
299
+
300
+ return messages
301
+
255
302
  # Conversion functions between element descriptions and coordinates
256
303
  def convert_computer_calls_desc2xy(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]:
257
304
  """
agent/types.py CHANGED
@@ -16,6 +16,15 @@ Tools = Optional[Iterable[ToolParam]]
16
16
  AgentResponse = ResponsesAPIResponse
17
17
  AgentCapability = Literal["step", "click"]
18
18
 
19
+ # Exception types
20
+ class ToolError(RuntimeError):
21
+ """Base exception for tool-related errors"""
22
+ pass
23
+
24
+ class IllegalArgumentError(ToolError):
25
+ """Exception raised when function arguments are invalid"""
26
+ pass
27
+
19
28
 
20
29
  # Agent config registration
21
30
  class AgentConfigInfo(BaseModel):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.4.15
3
+ Version: 0.4.17
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.12
@@ -56,8 +56,8 @@ Description-Content-Type: text/markdown
56
56
  <h1>
57
57
  <div class="image-wrapper" style="display: inline-block;">
58
58
  <picture>
59
- <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../../img/logo_white.png" style="display: block; margin: auto;">
60
- <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../../img/logo_black.png" style="display: block; margin: auto;">
59
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;">
60
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;">
61
61
  <img alt="Shows my svg">
62
62
  </picture>
63
63
  </div>
@@ -3,7 +3,7 @@ agent/__main__.py,sha256=lBUe8Niqa5XoCjwFfXyX7GtnUwjjZXC1-j4V9mvUYSc,538
3
3
  agent/adapters/__init__.py,sha256=lNH6srgIMmZOI7dgicJs3LCk_1MeqLF0lou9n7b23Ts,238
4
4
  agent/adapters/huggingfacelocal_adapter.py,sha256=Uqjtcohhzd33VFh38Ra2y4Uv_lTghMswoqS1t-KKFkw,8480
5
5
  agent/adapters/human_adapter.py,sha256=xT4nnfNXb1z-vnGFlLmFEZN7TMcoMBGS40MtR1Zwv4o,13079
6
- agent/agent.py,sha256=mEbmN5G6y8jZ0FrlUnHfJQFE7r_GlXrHxqC93twv54k,27881
6
+ agent/agent.py,sha256=XBZu_iNSWzyBk7Qf9Q-FkyHoqdikdldK6T1LAM3lLWY,29102
7
7
  agent/callbacks/__init__.py,sha256=yxxBXUqpXQ-jRi_ixJMtmQPxoNRy5Vz1PUBzNNa1Dwg,538
8
8
  agent/callbacks/base.py,sha256=UnnnYlh6XCm6HKZZsAPaT_Eyo9LUYLyjyNwF-QRm6Ns,4691
9
9
  agent/callbacks/budget_manager.py,sha256=RyKM-7iXQcDotYvrw3eURzeEHEXvQjID-NobtvQWE7k,1832
@@ -36,14 +36,14 @@ agent/loops/model_types.csv,sha256=GmFn4x80yoUpQZuQ-GXtJkPVlOLYWZ5u_5A73HRyeNE,1
36
36
  agent/loops/omniparser.py,sha256=-db8JUL2Orn47ERIaLbuNShAXn4LeIgYzRWphn_9Dg4,15071
37
37
  agent/loops/openai.py,sha256=8Ad_XufpENmLq1nEnhzF3oswPrPK1EPz-C5NU8UOEs0,8035
38
38
  agent/loops/uitars.py,sha256=PVNOdwcn2K6RgaxoU-9I4HjBTsEH073M11LTqTrN7C4,31849
39
- agent/responses.py,sha256=TTJ3wXN_eb0J26GKhO3cVQngOiZ1AgUPIUadozLUQyE,28991
40
- agent/types.py,sha256=ZoWY8a3GZtB8V0SnOzoI7DQy4nP_GRubxJKbuLPOc8c,840
39
+ agent/responses.py,sha256=_SoN4BkaTxMHMB21EOtDc_aDBIJlfDwsCzszMBnIkH0,30764
40
+ agent/types.py,sha256=h6SnmTAEAaryVCjwVZFAuCbio9UW13OqgQEV7HKmZVM,1060
41
41
  agent/ui/__init__.py,sha256=DTZpK85QXscXK2nM9HtpAhVBF13yAamUrtwrQSuV-kM,126
42
42
  agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
43
43
  agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
44
44
  agent/ui/gradio/app.py,sha256=Ol97YEbwREZZQ9_PMjVHlfOcu9BGsawxgAGAm79hT80,9117
45
45
  agent/ui/gradio/ui_components.py,sha256=dJUvKDmc1oSejtoR_gU_oWWYwxaOOQyPloSYRGMrUCQ,36068
46
- cua_agent-0.4.15.dist-info/METADATA,sha256=NZ_ccvhA-BeLMnCajZAfVhzXvttSRHYfTdJlezFCks8,12616
47
- cua_agent-0.4.15.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
48
- cua_agent-0.4.15.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
49
- cua_agent-0.4.15.dist-info/RECORD,,
46
+ cua_agent-0.4.17.dist-info/METADATA,sha256=ngs59u9_Ec6SfwAdvr8UytvNFLt9DV0pMIQAb3ElbA0,12698
47
+ cua_agent-0.4.17.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
48
+ cua_agent-0.4.17.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
49
+ cua_agent-0.4.17.dist-info/RECORD,,