cua-agent 0.4.15__py3-none-any.whl → 0.4.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/agent.py +111 -88
- agent/responses.py +47 -0
- agent/types.py +9 -0
- {cua_agent-0.4.15.dist-info → cua_agent-0.4.17.dist-info}/METADATA +3 -3
- {cua_agent-0.4.15.dist-info → cua_agent-0.4.17.dist-info}/RECORD +7 -7
- {cua_agent-0.4.15.dist-info → cua_agent-0.4.17.dist-info}/WHEEL +0 -0
- {cua_agent-0.4.15.dist-info → cua_agent-0.4.17.dist-info}/entry_points.txt +0 -0
agent/agent.py
CHANGED
|
@@ -7,7 +7,13 @@ from typing import Dict, List, Any, Optional, AsyncGenerator, Union, cast, Calla
|
|
|
7
7
|
|
|
8
8
|
from litellm.responses.utils import Usage
|
|
9
9
|
|
|
10
|
-
from .types import
|
|
10
|
+
from .types import (
|
|
11
|
+
Messages,
|
|
12
|
+
AgentCapability,
|
|
13
|
+
ToolError,
|
|
14
|
+
IllegalArgumentError
|
|
15
|
+
)
|
|
16
|
+
from .responses import make_tool_error_item, replace_failed_computer_calls_with_function_calls
|
|
11
17
|
from .decorators import find_agent_config
|
|
12
18
|
import json
|
|
13
19
|
import litellm
|
|
@@ -30,6 +36,15 @@ from .computers import (
|
|
|
30
36
|
make_computer_handler
|
|
31
37
|
)
|
|
32
38
|
|
|
39
|
+
def assert_callable_with(f, *args, **kwargs):
|
|
40
|
+
"""Check if function can be called with given arguments."""
|
|
41
|
+
try:
|
|
42
|
+
inspect.signature(f).bind(*args, **kwargs)
|
|
43
|
+
return True
|
|
44
|
+
except TypeError as e:
|
|
45
|
+
sig = inspect.signature(f)
|
|
46
|
+
raise IllegalArgumentError(f"Expected {sig}, got args={args} kwargs={kwargs}") from e
|
|
47
|
+
|
|
33
48
|
def get_json(obj: Any, max_depth: int = 10) -> Any:
|
|
34
49
|
def custom_serializer(o: Any, depth: int = 0, seen: Optional[Set[int]] = None) -> Any:
|
|
35
50
|
if seen is None:
|
|
@@ -405,7 +420,8 @@ class ComputerAgent:
|
|
|
405
420
|
|
|
406
421
|
async def _handle_item(self, item: Any, computer: Optional[AsyncComputerHandler] = None, ignore_call_ids: Optional[List[str]] = None) -> List[Dict[str, Any]]:
|
|
407
422
|
"""Handle each item; may cause a computer action + screenshot."""
|
|
408
|
-
|
|
423
|
+
call_id = item.get("call_id")
|
|
424
|
+
if ignore_call_ids and call_id and call_id in ignore_call_ids:
|
|
409
425
|
return []
|
|
410
426
|
|
|
411
427
|
item_type = item.get("type", None)
|
|
@@ -419,96 +435,102 @@ class ComputerAgent:
|
|
|
419
435
|
# print(content_item.get("text"))
|
|
420
436
|
return []
|
|
421
437
|
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
438
|
+
try:
|
|
439
|
+
if item_type == "computer_call":
|
|
440
|
+
await self._on_computer_call_start(item)
|
|
441
|
+
if not computer:
|
|
442
|
+
raise ValueError("Computer handler is required for computer calls")
|
|
443
|
+
|
|
444
|
+
# Perform computer actions
|
|
445
|
+
action = item.get("action")
|
|
446
|
+
action_type = action.get("type")
|
|
447
|
+
if action_type is None:
|
|
448
|
+
print(f"Action type cannot be `None`: action={action}, action_type={action_type}")
|
|
449
|
+
return []
|
|
450
|
+
|
|
451
|
+
# Extract action arguments (all fields except 'type')
|
|
452
|
+
action_args = {k: v for k, v in action.items() if k != "type"}
|
|
453
|
+
|
|
454
|
+
# print(f"{action_type}({action_args})")
|
|
455
|
+
|
|
456
|
+
# Execute the computer action
|
|
457
|
+
computer_method = getattr(computer, action_type, None)
|
|
458
|
+
if computer_method:
|
|
459
|
+
assert_callable_with(computer_method, **action_args)
|
|
460
|
+
await computer_method(**action_args)
|
|
461
|
+
else:
|
|
462
|
+
raise ToolError(f"Unknown computer action: {action_type}")
|
|
463
|
+
|
|
464
|
+
# Take screenshot after action
|
|
465
|
+
if self.screenshot_delay and self.screenshot_delay > 0:
|
|
466
|
+
await asyncio.sleep(self.screenshot_delay)
|
|
467
|
+
screenshot_base64 = await computer.screenshot()
|
|
468
|
+
await self._on_screenshot(screenshot_base64, "screenshot_after")
|
|
469
|
+
|
|
470
|
+
# Handle safety checks
|
|
471
|
+
pending_checks = item.get("pending_safety_checks", [])
|
|
472
|
+
acknowledged_checks = []
|
|
473
|
+
for check in pending_checks:
|
|
474
|
+
check_message = check.get("message", str(check))
|
|
475
|
+
acknowledged_checks.append(check)
|
|
476
|
+
# TODO: implement a callback for safety checks
|
|
477
|
+
# if acknowledge_safety_check_callback(check_message, allow_always=True):
|
|
478
|
+
# acknowledged_checks.append(check)
|
|
479
|
+
# else:
|
|
480
|
+
# raise ValueError(f"Safety check failed: {check_message}")
|
|
481
|
+
|
|
482
|
+
# Create call output
|
|
483
|
+
call_output = {
|
|
484
|
+
"type": "computer_call_output",
|
|
485
|
+
"call_id": item.get("call_id"),
|
|
486
|
+
"acknowledged_safety_checks": acknowledged_checks,
|
|
487
|
+
"output": {
|
|
488
|
+
"type": "input_image",
|
|
489
|
+
"image_url": f"data:image/png;base64,{screenshot_base64}",
|
|
490
|
+
},
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
# # Additional URL safety checks for browser environments
|
|
494
|
+
# if await computer.get_environment() == "browser":
|
|
495
|
+
# current_url = await computer.get_current_url()
|
|
496
|
+
# call_output["output"]["current_url"] = current_url
|
|
497
|
+
# # TODO: implement a callback for URL safety checks
|
|
498
|
+
# # check_blocklisted_url(current_url)
|
|
499
|
+
|
|
500
|
+
result = [call_output]
|
|
501
|
+
await self._on_computer_call_end(item, result)
|
|
502
|
+
return result
|
|
452
503
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
# TODO: implement a callback for safety checks
|
|
460
|
-
# if acknowledge_safety_check_callback(check_message, allow_always=True):
|
|
461
|
-
# acknowledged_checks.append(check)
|
|
462
|
-
# else:
|
|
463
|
-
# raise ValueError(f"Safety check failed: {check_message}")
|
|
504
|
+
if item_type == "function_call":
|
|
505
|
+
await self._on_function_call_start(item)
|
|
506
|
+
# Perform function call
|
|
507
|
+
function = self._get_tool(item.get("name"))
|
|
508
|
+
if not function:
|
|
509
|
+
raise ToolError(f"Function {item.get("name")} not found")
|
|
464
510
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
511
|
+
args = json.loads(item.get("arguments"))
|
|
512
|
+
|
|
513
|
+
# Validate arguments before execution
|
|
514
|
+
assert_callable_with(function, **args)
|
|
515
|
+
|
|
516
|
+
# Execute function - use asyncio.to_thread for non-async functions
|
|
517
|
+
if inspect.iscoroutinefunction(function):
|
|
518
|
+
result = await function(**args)
|
|
519
|
+
else:
|
|
520
|
+
result = await asyncio.to_thread(function, **args)
|
|
475
521
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
522
|
+
# Create function call output
|
|
523
|
+
call_output = {
|
|
524
|
+
"type": "function_call_output",
|
|
525
|
+
"call_id": item.get("call_id"),
|
|
526
|
+
"output": str(result),
|
|
527
|
+
}
|
|
482
528
|
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
await self._on_function_call_start(item)
|
|
489
|
-
# Perform function call
|
|
490
|
-
function = self._get_tool(item.get("name"))
|
|
491
|
-
if not function:
|
|
492
|
-
raise ValueError(f"Function {item.get("name")} not found")
|
|
493
|
-
|
|
494
|
-
args = json.loads(item.get("arguments"))
|
|
495
|
-
|
|
496
|
-
# Execute function - use asyncio.to_thread for non-async functions
|
|
497
|
-
if inspect.iscoroutinefunction(function):
|
|
498
|
-
result = await function(**args)
|
|
499
|
-
else:
|
|
500
|
-
result = await asyncio.to_thread(function, **args)
|
|
501
|
-
|
|
502
|
-
# Create function call output
|
|
503
|
-
call_output = {
|
|
504
|
-
"type": "function_call_output",
|
|
505
|
-
"call_id": item.get("call_id"),
|
|
506
|
-
"output": str(result),
|
|
507
|
-
}
|
|
508
|
-
|
|
509
|
-
result = [call_output]
|
|
510
|
-
await self._on_function_call_end(item, result)
|
|
511
|
-
return result
|
|
529
|
+
result = [call_output]
|
|
530
|
+
await self._on_function_call_end(item, result)
|
|
531
|
+
return result
|
|
532
|
+
except ToolError as e:
|
|
533
|
+
return [make_tool_error_item(repr(e), call_id)]
|
|
512
534
|
|
|
513
535
|
return []
|
|
514
536
|
|
|
@@ -569,6 +591,7 @@ class ComputerAgent:
|
|
|
569
591
|
# - PII anonymization
|
|
570
592
|
# - Image retention policy
|
|
571
593
|
combined_messages = old_items + new_items
|
|
594
|
+
combined_messages = replace_failed_computer_calls_with_function_calls(combined_messages)
|
|
572
595
|
preprocessed_messages = await self._on_llm_start(combined_messages)
|
|
573
596
|
|
|
574
597
|
loop_kwargs = {
|
agent/responses.py
CHANGED
|
@@ -252,6 +252,53 @@ def make_failed_tool_call_items(tool_name: str, tool_kwargs: Dict[str, Any], err
|
|
|
252
252
|
}
|
|
253
253
|
]
|
|
254
254
|
|
|
255
|
+
def make_tool_error_item(error_message: str, call_id: Optional[str] = None) -> Dict[str, Any]:
|
|
256
|
+
call_id = call_id if call_id else random_id()
|
|
257
|
+
return {
|
|
258
|
+
"type": "function_call_output",
|
|
259
|
+
"call_id": call_id,
|
|
260
|
+
"output": json.dumps({"error": error_message}),
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
def replace_failed_computer_calls_with_function_calls(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
264
|
+
"""
|
|
265
|
+
Replace computer_call items with function_call items if they share a call_id with a function_call_output.
|
|
266
|
+
This indicates the computer call failed and should be treated as a function call instead.
|
|
267
|
+
We do this because the computer_call_output items do not support text output.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
messages: List of message items to process
|
|
271
|
+
"""
|
|
272
|
+
messages = messages.copy()
|
|
273
|
+
|
|
274
|
+
# Find all call_ids that have function_call_output items
|
|
275
|
+
failed_call_ids = set()
|
|
276
|
+
for msg in messages:
|
|
277
|
+
if msg.get("type") == "function_call_output":
|
|
278
|
+
call_id = msg.get("call_id")
|
|
279
|
+
if call_id:
|
|
280
|
+
failed_call_ids.add(call_id)
|
|
281
|
+
|
|
282
|
+
# Replace computer_call items that have matching call_ids
|
|
283
|
+
for i, msg in enumerate(messages):
|
|
284
|
+
if (msg.get("type") == "computer_call" and
|
|
285
|
+
msg.get("call_id") in failed_call_ids):
|
|
286
|
+
|
|
287
|
+
# Extract action from computer_call
|
|
288
|
+
action = msg.get("action", {})
|
|
289
|
+
call_id = msg.get("call_id")
|
|
290
|
+
|
|
291
|
+
# Create function_call replacement
|
|
292
|
+
messages[i] = {
|
|
293
|
+
"type": "function_call",
|
|
294
|
+
"id": msg.get("id", random_id()),
|
|
295
|
+
"call_id": call_id,
|
|
296
|
+
"name": "computer",
|
|
297
|
+
"arguments": json.dumps(action),
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
return messages
|
|
301
|
+
|
|
255
302
|
# Conversion functions between element descriptions and coordinates
|
|
256
303
|
def convert_computer_calls_desc2xy(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]:
|
|
257
304
|
"""
|
agent/types.py
CHANGED
|
@@ -16,6 +16,15 @@ Tools = Optional[Iterable[ToolParam]]
|
|
|
16
16
|
AgentResponse = ResponsesAPIResponse
|
|
17
17
|
AgentCapability = Literal["step", "click"]
|
|
18
18
|
|
|
19
|
+
# Exception types
|
|
20
|
+
class ToolError(RuntimeError):
|
|
21
|
+
"""Base exception for tool-related errors"""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
class IllegalArgumentError(ToolError):
|
|
25
|
+
"""Exception raised when function arguments are invalid"""
|
|
26
|
+
pass
|
|
27
|
+
|
|
19
28
|
|
|
20
29
|
# Agent config registration
|
|
21
30
|
class AgentConfigInfo(BaseModel):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.17
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: >=3.12
|
|
@@ -56,8 +56,8 @@ Description-Content-Type: text/markdown
|
|
|
56
56
|
<h1>
|
|
57
57
|
<div class="image-wrapper" style="display: inline-block;">
|
|
58
58
|
<picture>
|
|
59
|
-
<source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="
|
|
60
|
-
<source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="
|
|
59
|
+
<source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_white.png" style="display: block; margin: auto;">
|
|
60
|
+
<source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="https://raw.githubusercontent.com/trycua/cua/main/img/logo_black.png" style="display: block; margin: auto;">
|
|
61
61
|
<img alt="Shows my svg">
|
|
62
62
|
</picture>
|
|
63
63
|
</div>
|
|
@@ -3,7 +3,7 @@ agent/__main__.py,sha256=lBUe8Niqa5XoCjwFfXyX7GtnUwjjZXC1-j4V9mvUYSc,538
|
|
|
3
3
|
agent/adapters/__init__.py,sha256=lNH6srgIMmZOI7dgicJs3LCk_1MeqLF0lou9n7b23Ts,238
|
|
4
4
|
agent/adapters/huggingfacelocal_adapter.py,sha256=Uqjtcohhzd33VFh38Ra2y4Uv_lTghMswoqS1t-KKFkw,8480
|
|
5
5
|
agent/adapters/human_adapter.py,sha256=xT4nnfNXb1z-vnGFlLmFEZN7TMcoMBGS40MtR1Zwv4o,13079
|
|
6
|
-
agent/agent.py,sha256=
|
|
6
|
+
agent/agent.py,sha256=XBZu_iNSWzyBk7Qf9Q-FkyHoqdikdldK6T1LAM3lLWY,29102
|
|
7
7
|
agent/callbacks/__init__.py,sha256=yxxBXUqpXQ-jRi_ixJMtmQPxoNRy5Vz1PUBzNNa1Dwg,538
|
|
8
8
|
agent/callbacks/base.py,sha256=UnnnYlh6XCm6HKZZsAPaT_Eyo9LUYLyjyNwF-QRm6Ns,4691
|
|
9
9
|
agent/callbacks/budget_manager.py,sha256=RyKM-7iXQcDotYvrw3eURzeEHEXvQjID-NobtvQWE7k,1832
|
|
@@ -36,14 +36,14 @@ agent/loops/model_types.csv,sha256=GmFn4x80yoUpQZuQ-GXtJkPVlOLYWZ5u_5A73HRyeNE,1
|
|
|
36
36
|
agent/loops/omniparser.py,sha256=-db8JUL2Orn47ERIaLbuNShAXn4LeIgYzRWphn_9Dg4,15071
|
|
37
37
|
agent/loops/openai.py,sha256=8Ad_XufpENmLq1nEnhzF3oswPrPK1EPz-C5NU8UOEs0,8035
|
|
38
38
|
agent/loops/uitars.py,sha256=PVNOdwcn2K6RgaxoU-9I4HjBTsEH073M11LTqTrN7C4,31849
|
|
39
|
-
agent/responses.py,sha256=
|
|
40
|
-
agent/types.py,sha256=
|
|
39
|
+
agent/responses.py,sha256=_SoN4BkaTxMHMB21EOtDc_aDBIJlfDwsCzszMBnIkH0,30764
|
|
40
|
+
agent/types.py,sha256=h6SnmTAEAaryVCjwVZFAuCbio9UW13OqgQEV7HKmZVM,1060
|
|
41
41
|
agent/ui/__init__.py,sha256=DTZpK85QXscXK2nM9HtpAhVBF13yAamUrtwrQSuV-kM,126
|
|
42
42
|
agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
|
|
43
43
|
agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
|
|
44
44
|
agent/ui/gradio/app.py,sha256=Ol97YEbwREZZQ9_PMjVHlfOcu9BGsawxgAGAm79hT80,9117
|
|
45
45
|
agent/ui/gradio/ui_components.py,sha256=dJUvKDmc1oSejtoR_gU_oWWYwxaOOQyPloSYRGMrUCQ,36068
|
|
46
|
-
cua_agent-0.4.
|
|
47
|
-
cua_agent-0.4.
|
|
48
|
-
cua_agent-0.4.
|
|
49
|
-
cua_agent-0.4.
|
|
46
|
+
cua_agent-0.4.17.dist-info/METADATA,sha256=ngs59u9_Ec6SfwAdvr8UytvNFLt9DV0pMIQAb3ElbA0,12698
|
|
47
|
+
cua_agent-0.4.17.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
|
48
|
+
cua_agent-0.4.17.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
49
|
+
cua_agent-0.4.17.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|