cua-agent 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (112) hide show
  1. agent/__init__.py +21 -12
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +229 -0
  5. agent/agent.py +594 -0
  6. agent/callbacks/__init__.py +19 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/telemetry.py +210 -0
  13. agent/callbacks/trajectory_saver.py +305 -0
  14. agent/cli.py +297 -0
  15. agent/computer_handler.py +107 -0
  16. agent/decorators.py +90 -0
  17. agent/loops/__init__.py +11 -0
  18. agent/loops/anthropic.py +728 -0
  19. agent/loops/omniparser.py +339 -0
  20. agent/loops/openai.py +95 -0
  21. agent/loops/uitars.py +688 -0
  22. agent/responses.py +207 -0
  23. agent/telemetry.py +135 -14
  24. agent/types.py +79 -0
  25. agent/ui/__init__.py +7 -1
  26. agent/ui/__main__.py +2 -13
  27. agent/ui/gradio/__init__.py +6 -19
  28. agent/ui/gradio/app.py +94 -1313
  29. agent/ui/gradio/ui_components.py +721 -0
  30. cua_agent-0.4.0.dist-info/METADATA +424 -0
  31. cua_agent-0.4.0.dist-info/RECORD +33 -0
  32. {cua_agent-0.3.1.dist-info → cua_agent-0.4.0.dist-info}/WHEEL +1 -1
  33. agent/core/__init__.py +0 -27
  34. agent/core/agent.py +0 -210
  35. agent/core/base.py +0 -217
  36. agent/core/callbacks.py +0 -200
  37. agent/core/experiment.py +0 -249
  38. agent/core/factory.py +0 -122
  39. agent/core/messages.py +0 -332
  40. agent/core/provider_config.py +0 -21
  41. agent/core/telemetry.py +0 -142
  42. agent/core/tools/__init__.py +0 -21
  43. agent/core/tools/base.py +0 -74
  44. agent/core/tools/bash.py +0 -52
  45. agent/core/tools/collection.py +0 -46
  46. agent/core/tools/computer.py +0 -113
  47. agent/core/tools/edit.py +0 -67
  48. agent/core/tools/manager.py +0 -56
  49. agent/core/tools.py +0 -32
  50. agent/core/types.py +0 -88
  51. agent/core/visualization.py +0 -197
  52. agent/providers/__init__.py +0 -4
  53. agent/providers/anthropic/__init__.py +0 -6
  54. agent/providers/anthropic/api/client.py +0 -360
  55. agent/providers/anthropic/api/logging.py +0 -150
  56. agent/providers/anthropic/api_handler.py +0 -140
  57. agent/providers/anthropic/callbacks/__init__.py +0 -5
  58. agent/providers/anthropic/callbacks/manager.py +0 -65
  59. agent/providers/anthropic/loop.py +0 -568
  60. agent/providers/anthropic/prompts.py +0 -23
  61. agent/providers/anthropic/response_handler.py +0 -226
  62. agent/providers/anthropic/tools/__init__.py +0 -33
  63. agent/providers/anthropic/tools/base.py +0 -88
  64. agent/providers/anthropic/tools/bash.py +0 -66
  65. agent/providers/anthropic/tools/collection.py +0 -34
  66. agent/providers/anthropic/tools/computer.py +0 -396
  67. agent/providers/anthropic/tools/edit.py +0 -326
  68. agent/providers/anthropic/tools/manager.py +0 -54
  69. agent/providers/anthropic/tools/run.py +0 -42
  70. agent/providers/anthropic/types.py +0 -16
  71. agent/providers/anthropic/utils.py +0 -367
  72. agent/providers/omni/__init__.py +0 -8
  73. agent/providers/omni/api_handler.py +0 -42
  74. agent/providers/omni/clients/anthropic.py +0 -103
  75. agent/providers/omni/clients/base.py +0 -35
  76. agent/providers/omni/clients/oaicompat.py +0 -195
  77. agent/providers/omni/clients/ollama.py +0 -122
  78. agent/providers/omni/clients/openai.py +0 -155
  79. agent/providers/omni/clients/utils.py +0 -25
  80. agent/providers/omni/image_utils.py +0 -34
  81. agent/providers/omni/loop.py +0 -990
  82. agent/providers/omni/parser.py +0 -307
  83. agent/providers/omni/prompts.py +0 -64
  84. agent/providers/omni/tools/__init__.py +0 -30
  85. agent/providers/omni/tools/base.py +0 -29
  86. agent/providers/omni/tools/bash.py +0 -74
  87. agent/providers/omni/tools/computer.py +0 -179
  88. agent/providers/omni/tools/manager.py +0 -61
  89. agent/providers/omni/utils.py +0 -236
  90. agent/providers/openai/__init__.py +0 -6
  91. agent/providers/openai/api_handler.py +0 -456
  92. agent/providers/openai/loop.py +0 -472
  93. agent/providers/openai/response_handler.py +0 -205
  94. agent/providers/openai/tools/__init__.py +0 -15
  95. agent/providers/openai/tools/base.py +0 -79
  96. agent/providers/openai/tools/computer.py +0 -326
  97. agent/providers/openai/tools/manager.py +0 -106
  98. agent/providers/openai/types.py +0 -36
  99. agent/providers/openai/utils.py +0 -98
  100. agent/providers/uitars/__init__.py +0 -1
  101. agent/providers/uitars/clients/base.py +0 -35
  102. agent/providers/uitars/clients/mlxvlm.py +0 -263
  103. agent/providers/uitars/clients/oaicompat.py +0 -214
  104. agent/providers/uitars/loop.py +0 -660
  105. agent/providers/uitars/prompts.py +0 -63
  106. agent/providers/uitars/tools/__init__.py +0 -1
  107. agent/providers/uitars/tools/computer.py +0 -283
  108. agent/providers/uitars/tools/manager.py +0 -60
  109. agent/providers/uitars/utils.py +0 -264
  110. cua_agent-0.3.1.dist-info/METADATA +0 -295
  111. cua_agent-0.3.1.dist-info/RECORD +0 -87
  112. {cua_agent-0.3.1.dist-info → cua_agent-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,107 @@
1
+ """
2
+ Computer handler implementation for OpenAI computer-use-preview protocol.
3
+ """
4
+
5
+ import base64
6
+ from typing import Dict, List, Any, Literal
7
+ from .types import Computer
8
+
9
+
10
+ class OpenAIComputerHandler:
11
+ """Computer handler that implements the Computer protocol using the computer interface."""
12
+
13
+ def __init__(self, computer_interface):
14
+ """Initialize with a computer interface (from tool schema)."""
15
+ self.interface = computer_interface
16
+
17
+ async def get_environment(self) -> Literal["windows", "mac", "linux", "browser"]:
18
+ """Get the current environment type."""
19
+ # For now, return a default - this could be enhanced to detect actual environment
20
+ return "windows"
21
+
22
+ async def get_dimensions(self) -> tuple[int, int]:
23
+ """Get screen dimensions as (width, height)."""
24
+ screen_size = await self.interface.get_screen_size()
25
+ return screen_size["width"], screen_size["height"]
26
+
27
+ async def screenshot(self) -> str:
28
+ """Take a screenshot and return as base64 string."""
29
+ screenshot_bytes = await self.interface.screenshot()
30
+ return base64.b64encode(screenshot_bytes).decode('utf-8')
31
+
32
+ async def click(self, x: int, y: int, button: str = "left") -> None:
33
+ """Click at coordinates with specified button."""
34
+ if button == "left":
35
+ await self.interface.left_click(x, y)
36
+ elif button == "right":
37
+ await self.interface.right_click(x, y)
38
+ else:
39
+ # Default to left click for unknown buttons
40
+ await self.interface.left_click(x, y)
41
+
42
+ async def double_click(self, x: int, y: int) -> None:
43
+ """Double click at coordinates."""
44
+ await self.interface.double_click(x, y)
45
+
46
+ async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None:
47
+ """Scroll at coordinates with specified scroll amounts."""
48
+ await self.interface.move_cursor(x, y)
49
+ await self.interface.scroll(scroll_x, scroll_y)
50
+
51
+ async def type(self, text: str) -> None:
52
+ """Type text."""
53
+ await self.interface.type_text(text)
54
+
55
+ async def wait(self, ms: int = 1000) -> None:
56
+ """Wait for specified milliseconds."""
57
+ import asyncio
58
+ await asyncio.sleep(ms / 1000.0)
59
+
60
+ async def move(self, x: int, y: int) -> None:
61
+ """Move cursor to coordinates."""
62
+ await self.interface.move_cursor(x, y)
63
+
64
+ async def keypress(self, keys: List[str]) -> None:
65
+ """Press key combination."""
66
+ if len(keys) == 1:
67
+ await self.interface.press_key(keys[0])
68
+ else:
69
+ # Handle key combinations
70
+ await self.interface.hotkey(*keys)
71
+
72
+ async def drag(self, path: List[Dict[str, int]]) -> None:
73
+ """Drag along specified path."""
74
+ if not path:
75
+ return
76
+
77
+ # Start drag from first point
78
+ start = path[0]
79
+ await self.interface.mouse_down(start["x"], start["y"])
80
+
81
+ # Move through path
82
+ for point in path[1:]:
83
+ await self.interface.move_cursor(point["x"], point["y"])
84
+
85
+ # End drag at last point
86
+ end = path[-1]
87
+ await self.interface.mouse_up(end["x"], end["y"])
88
+
89
+ async def get_current_url(self) -> str:
90
+ """Get current URL (for browser environments)."""
91
+ # This would need to be implemented based on the specific browser interface
92
+ # For now, return empty string
93
+ return ""
94
+
95
+
96
+ def acknowledge_safety_check_callback(message: str) -> bool:
97
+ """Safety check callback for user acknowledgment."""
98
+ response = input(
99
+ f"Safety Check Warning: {message}\nDo you want to acknowledge and proceed? (y/n): "
100
+ ).lower()
101
+ return response.strip() == "y"
102
+
103
+
104
+ def check_blocklisted_url(url: str) -> None:
105
+ """Check if URL is blocklisted (placeholder implementation)."""
106
+ # This would contain actual URL checking logic
107
+ pass
agent/decorators.py ADDED
@@ -0,0 +1,90 @@
1
+ """
2
+ Decorators for agent - agent_loop decorator
3
+ """
4
+
5
+ import asyncio
6
+ import inspect
7
+ from typing import Dict, List, Any, Callable, Optional
8
+ from functools import wraps
9
+
10
+ from .types import AgentLoopInfo
11
+
12
+ # Global registry
13
+ _agent_loops: List[AgentLoopInfo] = []
14
+
15
+ def agent_loop(models: str, priority: int = 0):
16
+ """
17
+ Decorator to register an agent loop function.
18
+
19
+ Args:
20
+ models: Regex pattern to match supported models
21
+ priority: Priority for loop selection (higher = more priority)
22
+ """
23
+ def decorator(func: Callable):
24
+ # Validate function signature
25
+ sig = inspect.signature(func)
26
+ required_params = {'messages', 'model'}
27
+ func_params = set(sig.parameters.keys())
28
+
29
+ if not required_params.issubset(func_params):
30
+ missing = required_params - func_params
31
+ raise ValueError(f"Agent loop function must have parameters: {missing}")
32
+
33
+ # Register the loop
34
+ loop_info = AgentLoopInfo(
35
+ func=func,
36
+ models_regex=models,
37
+ priority=priority
38
+ )
39
+ _agent_loops.append(loop_info)
40
+
41
+ # Sort by priority (highest first)
42
+ _agent_loops.sort(key=lambda x: x.priority, reverse=True)
43
+
44
+ @wraps(func)
45
+ async def wrapper(*args, **kwargs):
46
+ # Wrap the function in an asyncio.Queue for cancellation support
47
+ queue = asyncio.Queue()
48
+ task = None
49
+
50
+ try:
51
+ # Create a task that can be cancelled
52
+ async def run_loop():
53
+ try:
54
+ result = await func(*args, **kwargs)
55
+ await queue.put(('result', result))
56
+ except Exception as e:
57
+ await queue.put(('error', e))
58
+
59
+ task = asyncio.create_task(run_loop())
60
+
61
+ # Wait for result or cancellation
62
+ event_type, data = await queue.get()
63
+
64
+ if event_type == 'error':
65
+ raise data
66
+ return data
67
+
68
+ except asyncio.CancelledError:
69
+ if task:
70
+ task.cancel()
71
+ try:
72
+ await task
73
+ except asyncio.CancelledError:
74
+ pass
75
+ raise
76
+
77
+ return wrapper
78
+
79
+ return decorator
80
+
81
+ def get_agent_loops() -> List[AgentLoopInfo]:
82
+ """Get all registered agent loops"""
83
+ return _agent_loops.copy()
84
+
85
+ def find_agent_loop(model: str) -> Optional[AgentLoopInfo]:
86
+ """Find the best matching agent loop for a model"""
87
+ for loop_info in _agent_loops:
88
+ if loop_info.matches_model(model):
89
+ return loop_info
90
+ return None
@@ -0,0 +1,11 @@
1
+ """
2
+ Agent loops for agent
3
+ """
4
+
5
+ # Import the loops to register them
6
+ from . import anthropic
7
+ from . import openai
8
+ from . import uitars
9
+ from . import omniparser
10
+
11
+ __all__ = ["anthropic", "openai", "uitars", "omniparser"]