cua-agent 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (112) hide show
  1. agent/__init__.py +21 -12
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +229 -0
  5. agent/agent.py +594 -0
  6. agent/callbacks/__init__.py +19 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/telemetry.py +210 -0
  13. agent/callbacks/trajectory_saver.py +305 -0
  14. agent/cli.py +297 -0
  15. agent/computer_handler.py +107 -0
  16. agent/decorators.py +90 -0
  17. agent/loops/__init__.py +11 -0
  18. agent/loops/anthropic.py +728 -0
  19. agent/loops/omniparser.py +339 -0
  20. agent/loops/openai.py +95 -0
  21. agent/loops/uitars.py +688 -0
  22. agent/responses.py +207 -0
  23. agent/telemetry.py +135 -14
  24. agent/types.py +79 -0
  25. agent/ui/__init__.py +7 -1
  26. agent/ui/__main__.py +2 -13
  27. agent/ui/gradio/__init__.py +6 -19
  28. agent/ui/gradio/app.py +94 -1313
  29. agent/ui/gradio/ui_components.py +721 -0
  30. cua_agent-0.4.0.dist-info/METADATA +424 -0
  31. cua_agent-0.4.0.dist-info/RECORD +33 -0
  32. agent/core/__init__.py +0 -27
  33. agent/core/agent.py +0 -210
  34. agent/core/base.py +0 -217
  35. agent/core/callbacks.py +0 -200
  36. agent/core/experiment.py +0 -249
  37. agent/core/factory.py +0 -122
  38. agent/core/messages.py +0 -332
  39. agent/core/provider_config.py +0 -21
  40. agent/core/telemetry.py +0 -142
  41. agent/core/tools/__init__.py +0 -21
  42. agent/core/tools/base.py +0 -74
  43. agent/core/tools/bash.py +0 -52
  44. agent/core/tools/collection.py +0 -46
  45. agent/core/tools/computer.py +0 -113
  46. agent/core/tools/edit.py +0 -67
  47. agent/core/tools/manager.py +0 -56
  48. agent/core/tools.py +0 -32
  49. agent/core/types.py +0 -88
  50. agent/core/visualization.py +0 -197
  51. agent/providers/__init__.py +0 -4
  52. agent/providers/anthropic/__init__.py +0 -6
  53. agent/providers/anthropic/api/client.py +0 -360
  54. agent/providers/anthropic/api/logging.py +0 -150
  55. agent/providers/anthropic/api_handler.py +0 -140
  56. agent/providers/anthropic/callbacks/__init__.py +0 -5
  57. agent/providers/anthropic/callbacks/manager.py +0 -65
  58. agent/providers/anthropic/loop.py +0 -568
  59. agent/providers/anthropic/prompts.py +0 -23
  60. agent/providers/anthropic/response_handler.py +0 -226
  61. agent/providers/anthropic/tools/__init__.py +0 -33
  62. agent/providers/anthropic/tools/base.py +0 -88
  63. agent/providers/anthropic/tools/bash.py +0 -66
  64. agent/providers/anthropic/tools/collection.py +0 -34
  65. agent/providers/anthropic/tools/computer.py +0 -396
  66. agent/providers/anthropic/tools/edit.py +0 -326
  67. agent/providers/anthropic/tools/manager.py +0 -54
  68. agent/providers/anthropic/tools/run.py +0 -42
  69. agent/providers/anthropic/types.py +0 -16
  70. agent/providers/anthropic/utils.py +0 -381
  71. agent/providers/omni/__init__.py +0 -8
  72. agent/providers/omni/api_handler.py +0 -42
  73. agent/providers/omni/clients/anthropic.py +0 -103
  74. agent/providers/omni/clients/base.py +0 -35
  75. agent/providers/omni/clients/oaicompat.py +0 -195
  76. agent/providers/omni/clients/ollama.py +0 -122
  77. agent/providers/omni/clients/openai.py +0 -155
  78. agent/providers/omni/clients/utils.py +0 -25
  79. agent/providers/omni/image_utils.py +0 -34
  80. agent/providers/omni/loop.py +0 -990
  81. agent/providers/omni/parser.py +0 -307
  82. agent/providers/omni/prompts.py +0 -64
  83. agent/providers/omni/tools/__init__.py +0 -30
  84. agent/providers/omni/tools/base.py +0 -29
  85. agent/providers/omni/tools/bash.py +0 -74
  86. agent/providers/omni/tools/computer.py +0 -179
  87. agent/providers/omni/tools/manager.py +0 -61
  88. agent/providers/omni/utils.py +0 -236
  89. agent/providers/openai/__init__.py +0 -6
  90. agent/providers/openai/api_handler.py +0 -456
  91. agent/providers/openai/loop.py +0 -472
  92. agent/providers/openai/response_handler.py +0 -205
  93. agent/providers/openai/tools/__init__.py +0 -15
  94. agent/providers/openai/tools/base.py +0 -79
  95. agent/providers/openai/tools/computer.py +0 -326
  96. agent/providers/openai/tools/manager.py +0 -106
  97. agent/providers/openai/types.py +0 -36
  98. agent/providers/openai/utils.py +0 -98
  99. agent/providers/uitars/__init__.py +0 -1
  100. agent/providers/uitars/clients/base.py +0 -35
  101. agent/providers/uitars/clients/mlxvlm.py +0 -263
  102. agent/providers/uitars/clients/oaicompat.py +0 -214
  103. agent/providers/uitars/loop.py +0 -660
  104. agent/providers/uitars/prompts.py +0 -63
  105. agent/providers/uitars/tools/__init__.py +0 -1
  106. agent/providers/uitars/tools/computer.py +0 -283
  107. agent/providers/uitars/tools/manager.py +0 -60
  108. agent/providers/uitars/utils.py +0 -264
  109. cua_agent-0.3.2.dist-info/METADATA +0 -295
  110. cua_agent-0.3.2.dist-info/RECORD +0 -87
  111. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0.dist-info}/WHEEL +0 -0
  112. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,107 @@
1
+ """
2
+ Computer handler implementation for OpenAI computer-use-preview protocol.
3
+ """
4
+
5
+ import base64
6
+ from typing import Dict, List, Any, Literal
7
+ from .types import Computer
8
+
9
+
10
+ class OpenAIComputerHandler:
11
+ """Computer handler that implements the Computer protocol using the computer interface."""
12
+
13
+ def __init__(self, computer_interface):
14
+ """Initialize with a computer interface (from tool schema)."""
15
+ self.interface = computer_interface
16
+
17
+ async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
18
+ """Get the current environment type."""
19
+ # For now, return a default - this could be enhanced to detect actual environment
20
+ return "windows"
21
+
22
+ async def get_dimensions(self) -> tuple[int, int]:
23
+ """Get screen dimensions as (width, height)."""
24
+ screen_size = await self.interface.get_screen_size()
25
+ return screen_size["width"], screen_size["height"]
26
+
27
+ async def screenshot(self) -> str:
28
+ """Take a screenshot and return as base64 string."""
29
+ screenshot_bytes = await self.interface.screenshot()
30
+ return base64.b64encode(screenshot_bytes).decode('utf-8')
31
+
32
+ async def click(self, x: int, y: int, button: str = "left") -> None:
33
+ """Click at coordinates with specified button."""
34
+ if button == "left":
35
+ await self.interface.left_click(x, y)
36
+ elif button == "right":
37
+ await self.interface.right_click(x, y)
38
+ else:
39
+ # Default to left click for unknown buttons
40
+ await self.interface.left_click(x, y)
41
+
42
+ async def double_click(self, x: int, y: int) -> None:
43
+ """Double click at coordinates."""
44
+ await self.interface.double_click(x, y)
45
+
46
+ async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
47
+ """Scroll at coordinates with specified scroll amounts."""
48
+ await self.interface.move_cursor(x, y)
49
+ await self.interface.scroll(scroll_x, scroll_y)
50
+
51
+ async def type(self, text: str) -> None:
52
+ """Type text."""
53
+ await self.interface.type_text(text)
54
+
55
+ async def wait(self, ms: int = 1000) -> None:
56
+ """Wait for specified milliseconds."""
57
+ import asyncio
58
+ await asyncio.sleep(ms / 1000.0)
59
+
60
+ async def move(self, x: int, y: int) -> None:
61
+ """Move cursor to coordinates."""
62
+ await self.interface.move_cursor(x, y)
63
+
64
+ async def keypress(self, keys: List[str]) -> None:
65
+ """Press key combination."""
66
+ if len(keys) == 1:
67
+ await self.interface.press_key(keys[0])
68
+ else:
69
+ # Handle key combinations
70
+ await self.interface.hotkey(*keys)
71
+
72
+ async def drag(self, path: List[Dict[str, int]]) -> None:
73
+ """Drag along specified path."""
74
+ if not path:
75
+ return
76
+
77
+ # Start drag from first point
78
+ start = path[0]
79
+ await self.interface.mouse_down(start["x"], start["y"])
80
+
81
+ # Move through path
82
+ for point in path[1:]:
83
+ await self.interface.move_cursor(point["x"], point["y"])
84
+
85
+ # End drag at last point
86
+ end = path[-1]
87
+ await self.interface.mouse_up(end["x"], end["y"])
88
+
89
+ async def get_current_url(self) -> str:
90
+ """Get current URL (for browser environments)."""
91
+ # This would need to be implemented based on the specific browser interface
92
+ # For now, return empty string
93
+ return ""
94
+
95
+
96
+ def acknowledge_safety_check_callback(message: str) -> bool:
97
+ """Safety check callback for user acknowledgment."""
98
+ response = input(
99
+ f"Safety Check Warning: {message}\nDo you want to acknowledge and proceed? (y/n): "
100
+ ).lower()
101
+ return response.strip() == "y"
102
+
103
+
104
+ def check_blocklisted_url(url: str) -> None:
105
+ """Check if URL is blocklisted (placeholder implementation)."""
106
+ # This would contain actual URL checking logic
107
+ pass
agent/decorators.py ADDED
@@ -0,0 +1,90 @@
1
+ """
2
+ Decorators for agent - agent_loop decorator
3
+ """
4
+
5
+ import asyncio
6
+ import inspect
7
+ from typing import Dict, List, Any, Callable, Optional
8
+ from functools import wraps
9
+
10
+ from .types import AgentLoopInfo
11
+
12
+ # Global registry
13
+ _agent_loops: List[AgentLoopInfo] = []
14
+
15
+ def agent_loop(models: str, priority: int = 0):
16
+ """
17
+ Decorator to register an agent loop function.
18
+
19
+ Args:
20
+ models: Regex pattern to match supported models
21
+ priority: Priority for loop selection (higher = more priority)
22
+ """
23
+ def decorator(func: Callable):
24
+ # Validate function signature
25
+ sig = inspect.signature(func)
26
+ required_params = {'messages', 'model'}
27
+ func_params = set(sig.parameters.keys())
28
+
29
+ if not required_params.issubset(func_params):
30
+ missing = required_params - func_params
31
+ raise ValueError(f"Agent loop function must have parameters: {missing}")
32
+
33
+ # Register the loop
34
+ loop_info = AgentLoopInfo(
35
+ func=func,
36
+ models_regex=models,
37
+ priority=priority
38
+ )
39
+ _agent_loops.append(loop_info)
40
+
41
+ # Sort by priority (highest first)
42
+ _agent_loops.sort(key=lambda x: x.priority, reverse=True)
43
+
44
+ @wraps(func)
45
+ async def wrapper(*args, **kwargs):
46
+ # Wrap the function in an asyncio.Queue for cancellation support
47
+ queue = asyncio.Queue()
48
+ task = None
49
+
50
+ try:
51
+ # Create a task that can be cancelled
52
+ async def run_loop():
53
+ try:
54
+ result = await func(*args, **kwargs)
55
+ await queue.put(('result', result))
56
+ except Exception as e:
57
+ await queue.put(('error', e))
58
+
59
+ task = asyncio.create_task(run_loop())
60
+
61
+ # Wait for result or cancellation
62
+ event_type, data = await queue.get()
63
+
64
+ if event_type == 'error':
65
+ raise data
66
+ return data
67
+
68
+ except asyncio.CancelledError:
69
+ if task:
70
+ task.cancel()
71
+ try:
72
+ await task
73
+ except asyncio.CancelledError:
74
+ pass
75
+ raise
76
+
77
+ return wrapper
78
+
79
+ return decorator
80
+
81
+ def get_agent_loops() -> List[AgentLoopInfo]:
82
+ """Get all registered agent loops"""
83
+ return _agent_loops.copy()
84
+
85
+ def find_agent_loop(model: str) -> Optional[AgentLoopInfo]:
86
+ """Find the best matching agent loop for a model"""
87
+ for loop_info in _agent_loops:
88
+ if loop_info.matches_model(model):
89
+ return loop_info
90
+ return None
@@ -0,0 +1,11 @@
1
+ """
2
+ Agent loops for agent
3
+ """
4
+
5
+ # Import the loops to register them
6
+ from . import anthropic
7
+ from . import openai
8
+ from . import uitars
9
+ from . import omniparser
10
+
11
+ __all__ = ["anthropic", "openai", "uitars", "omniparser"]