cua-agent 0.1.6__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (42) hide show
  1. agent/__init__.py +3 -2
  2. agent/core/__init__.py +0 -5
  3. agent/core/computer_agent.py +21 -28
  4. agent/core/loop.py +78 -124
  5. agent/core/messages.py +279 -125
  6. agent/core/types.py +35 -0
  7. agent/core/visualization.py +197 -0
  8. agent/providers/anthropic/api/client.py +142 -1
  9. agent/providers/anthropic/api_handler.py +140 -0
  10. agent/providers/anthropic/callbacks/__init__.py +5 -0
  11. agent/providers/anthropic/loop.py +206 -220
  12. agent/providers/anthropic/response_handler.py +229 -0
  13. agent/providers/anthropic/tools/bash.py +0 -97
  14. agent/providers/anthropic/utils.py +370 -0
  15. agent/providers/omni/__init__.py +1 -20
  16. agent/providers/omni/api_handler.py +42 -0
  17. agent/providers/omni/clients/anthropic.py +4 -0
  18. agent/providers/omni/image_utils.py +0 -72
  19. agent/providers/omni/loop.py +490 -606
  20. agent/providers/omni/parser.py +58 -4
  21. agent/providers/omni/tools/__init__.py +25 -7
  22. agent/providers/omni/tools/base.py +29 -0
  23. agent/providers/omni/tools/bash.py +43 -38
  24. agent/providers/omni/tools/computer.py +144 -182
  25. agent/providers/omni/tools/manager.py +25 -45
  26. agent/providers/omni/types.py +0 -4
  27. agent/providers/omni/utils.py +224 -145
  28. {cua_agent-0.1.6.dist-info → cua_agent-0.1.17.dist-info}/METADATA +6 -36
  29. cua_agent-0.1.17.dist-info/RECORD +63 -0
  30. agent/providers/omni/callbacks.py +0 -78
  31. agent/providers/omni/clients/groq.py +0 -101
  32. agent/providers/omni/experiment.py +0 -276
  33. agent/providers/omni/messages.py +0 -171
  34. agent/providers/omni/tool_manager.py +0 -91
  35. agent/providers/omni/visualization.py +0 -130
  36. agent/types/__init__.py +0 -23
  37. agent/types/base.py +0 -41
  38. agent/types/messages.py +0 -36
  39. cua_agent-0.1.6.dist-info/RECORD +0 -64
  40. /agent/{types → core}/tools.py +0 -0
  41. {cua_agent-0.1.6.dist-info → cua_agent-0.1.17.dist-info}/WHEEL +0 -0
  42. {cua_agent-0.1.6.dist-info → cua_agent-0.1.17.dist-info}/entry_points.txt +0 -0
@@ -1,91 +0,0 @@
1
- # """Omni tool manager implementation."""
2
-
3
- # from typing import Dict, List, Type, Any
4
-
5
- # from computer import Computer
6
- # from ...core.tools import BaseToolManager, BashTool, EditTool
7
-
8
- # class OmniToolManager(BaseToolManager):
9
- # """Tool manager for multi-provider support."""
10
-
11
- # def __init__(self, computer: Computer):
12
- # """Initialize Omni tool manager.
13
-
14
- # Args:
15
- # computer: Computer instance for tools
16
- # """
17
- # super().__init__(computer)
18
-
19
- # def get_anthropic_tools(self) -> List[Dict[str, Any]]:
20
- # """Get tools formatted for Anthropic API.
21
-
22
- # Returns:
23
- # List of tool parameters in Anthropic format
24
- # """
25
- # tools: List[Dict[str, Any]] = []
26
-
27
- # # Map base tools to Anthropic format
28
- # for tool in self.tools.values():
29
- # if isinstance(tool, BashTool):
30
- # tools.append({
31
- # "type": "bash_20241022",
32
- # "name": tool.name
33
- # })
34
- # elif isinstance(tool, EditTool):
35
- # tools.append({
36
- # "type": "text_editor_20241022",
37
- # "name": "str_replace_editor"
38
- # })
39
-
40
- # return tools
41
-
42
- # def get_openai_tools(self) -> List[Dict]:
43
- # """Get tools formatted for OpenAI API.
44
-
45
- # Returns:
46
- # List of tool parameters in OpenAI format
47
- # """
48
- # tools = []
49
-
50
- # # Map base tools to OpenAI format
51
- # for tool in self.tools.values():
52
- # tools.append({
53
- # "type": "function",
54
- # "function": tool.get_schema()
55
- # })
56
-
57
- # return tools
58
-
59
- # def get_groq_tools(self) -> List[Dict]:
60
- # """Get tools formatted for Groq API.
61
-
62
- # Returns:
63
- # List of tool parameters in Groq format
64
- # """
65
- # tools = []
66
-
67
- # # Map base tools to Groq format
68
- # for tool in self.tools.values():
69
- # tools.append({
70
- # "type": "function",
71
- # "function": tool.get_schema()
72
- # })
73
-
74
- # return tools
75
-
76
- # def get_qwen_tools(self) -> List[Dict]:
77
- # """Get tools formatted for Qwen API.
78
-
79
- # Returns:
80
- # List of tool parameters in Qwen format
81
- # """
82
- # tools = []
83
-
84
- # # Map base tools to Qwen format
85
- # for tool in self.tools.values():
86
- # tools.append({
87
- # "type": "function",
88
- # "function": tool.get_schema()
89
- # })
90
-
91
- # return tools
@@ -1,130 +0,0 @@
1
- """Visualization utilities for the Cua provider."""
2
-
3
- import base64
4
- import logging
5
- from io import BytesIO
6
- from typing import Tuple
7
- from PIL import Image, ImageDraw
8
-
9
- logger = logging.getLogger(__name__)
10
-
11
-
12
- def visualize_click(x: int, y: int, img_base64: str) -> Image.Image:
13
- """Visualize a click action by drawing on the screenshot.
14
-
15
- Args:
16
- x: X coordinate of the click
17
- y: Y coordinate of the click
18
- img_base64: Base64 encoded image to draw on
19
-
20
- Returns:
21
- PIL Image with visualization
22
- """
23
- try:
24
- # Decode the base64 image
25
- img_data = base64.b64decode(img_base64)
26
- img = Image.open(BytesIO(img_data))
27
-
28
- # Create a drawing context
29
- draw = ImageDraw.Draw(img)
30
-
31
- # Draw concentric circles at the click position
32
- small_radius = 10
33
- large_radius = 30
34
-
35
- # Draw filled inner circle
36
- draw.ellipse(
37
- [(x - small_radius, y - small_radius), (x + small_radius, y + small_radius)],
38
- fill="red",
39
- )
40
-
41
- # Draw outlined outer circle
42
- draw.ellipse(
43
- [(x - large_radius, y - large_radius), (x + large_radius, y + large_radius)],
44
- outline="red",
45
- width=3,
46
- )
47
-
48
- return img
49
-
50
- except Exception as e:
51
- logger.error(f"Error visualizing click: {str(e)}")
52
- # Return a blank image in case of error
53
- return Image.new("RGB", (800, 600), color="white")
54
-
55
-
56
- def visualize_scroll(direction: str, clicks: int, img_base64: str) -> Image.Image:
57
- """Visualize a scroll action by drawing arrows on the screenshot.
58
-
59
- Args:
60
- direction: 'up' or 'down'
61
- clicks: Number of scroll clicks
62
- img_base64: Base64 encoded image to draw on
63
-
64
- Returns:
65
- PIL Image with visualization
66
- """
67
- try:
68
- # Decode the base64 image
69
- img_data = base64.b64decode(img_base64)
70
- img = Image.open(BytesIO(img_data))
71
-
72
- # Get image dimensions
73
- width, height = img.size
74
-
75
- # Create a drawing context
76
- draw = ImageDraw.Draw(img)
77
-
78
- # Determine arrow direction and positions
79
- center_x = width // 2
80
- arrow_width = 100
81
-
82
- if direction.lower() == "up":
83
- # Draw up arrow in the middle of the screen
84
- arrow_y = height // 2
85
- # Arrow points
86
- points = [
87
- (center_x, arrow_y - 50), # Top point
88
- (center_x - arrow_width // 2, arrow_y + 50), # Bottom left
89
- (center_x + arrow_width // 2, arrow_y + 50), # Bottom right
90
- ]
91
- color = "blue"
92
- else: # down
93
- # Draw down arrow in the middle of the screen
94
- arrow_y = height // 2
95
- # Arrow points
96
- points = [
97
- (center_x, arrow_y + 50), # Bottom point
98
- (center_x - arrow_width // 2, arrow_y - 50), # Top left
99
- (center_x + arrow_width // 2, arrow_y - 50), # Top right
100
- ]
101
- color = "green"
102
-
103
- # Draw filled arrow
104
- draw.polygon(points, fill=color)
105
-
106
- # Add text showing number of clicks
107
- text_y = arrow_y + 70 if direction.lower() == "down" else arrow_y - 70
108
- draw.text((center_x - 40, text_y), f"{clicks} clicks", fill="black")
109
-
110
- return img
111
-
112
- except Exception as e:
113
- logger.error(f"Error visualizing scroll: {str(e)}")
114
- # Return a blank image in case of error
115
- return Image.new("RGB", (800, 600), color="white")
116
-
117
-
118
- def calculate_element_center(box: Tuple[int, int, int, int]) -> Tuple[int, int]:
119
- """Calculate the center coordinates of a bounding box.
120
-
121
- Args:
122
- box: Tuple of (left, top, right, bottom) coordinates
123
-
124
- Returns:
125
- Tuple of (center_x, center_y) coordinates
126
- """
127
- left, top, right, bottom = box
128
- center_x = (left + right) // 2
129
- center_y = (top + bottom) // 2
130
- return center_x, center_y
agent/types/__init__.py DELETED
@@ -1,23 +0,0 @@
1
- """Type definitions for the agent package."""
2
-
3
- from .base import HostConfig, TaskResult, Annotation
4
- from .messages import Message, Request, Response, StepMessage, DisengageMessage
5
- from .tools import ToolInvocation, ToolInvocationState, ClientAttachment, ToolResult
6
-
7
- __all__ = [
8
- # Base types
9
- "HostConfig",
10
- "TaskResult",
11
- "Annotation",
12
- # Message types
13
- "Message",
14
- "Request",
15
- "Response",
16
- "StepMessage",
17
- "DisengageMessage",
18
- # Tool types
19
- "ToolInvocation",
20
- "ToolInvocationState",
21
- "ClientAttachment",
22
- "ToolResult",
23
- ]
agent/types/base.py DELETED
@@ -1,41 +0,0 @@
1
- """Base type definitions."""
2
-
3
- from enum import Enum, auto
4
- from typing import Dict, Any
5
- from pydantic import BaseModel, ConfigDict
6
-
7
-
8
- class HostConfig(BaseModel):
9
- """Host configuration."""
10
-
11
- model_config = ConfigDict(extra="forbid")
12
- hostname: str
13
- port: int
14
-
15
- @property
16
- def address(self) -> str:
17
- return f"{self.hostname}:{self.port}"
18
-
19
-
20
- class TaskResult(BaseModel):
21
- """Result of a task execution."""
22
-
23
- model_config = ConfigDict(extra="forbid")
24
- result: str
25
- vnc_password: str
26
-
27
-
28
- class Annotation(BaseModel):
29
- """Annotation metadata."""
30
-
31
- model_config = ConfigDict(extra="forbid")
32
- id: str
33
- vm_url: str
34
-
35
-
36
- class AgentLoop(Enum):
37
- """Enumeration of available loop types."""
38
-
39
- ANTHROPIC = auto() # Anthropic implementation
40
- OMNI = auto() # OmniLoop implementation
41
- # Add more loop types as needed
agent/types/messages.py DELETED
@@ -1,36 +0,0 @@
1
- """Message-related type definitions."""
2
-
3
- from typing import List, Dict, Any, Optional
4
- from pydantic import BaseModel, ConfigDict
5
-
6
- from .tools import ToolInvocation
7
-
8
- class Message(BaseModel):
9
- """Base message type."""
10
- model_config = ConfigDict(extra='forbid')
11
- role: str
12
- content: str
13
- annotations: Optional[List[Dict[str, Any]]] = None
14
- toolInvocations: Optional[List[ToolInvocation]] = None
15
- data: Optional[List[Dict[str, Any]]] = None
16
- errors: Optional[List[str]] = None
17
-
18
- class Request(BaseModel):
19
- """Request type."""
20
- model_config = ConfigDict(extra='forbid')
21
- messages: List[Message]
22
- selectedModel: str
23
-
24
- class Response(BaseModel):
25
- """Response type."""
26
- model_config = ConfigDict(extra='forbid')
27
- messages: List[Message]
28
- vm_url: str
29
-
30
- class StepMessage(Message):
31
- """Message for a single step."""
32
- pass
33
-
34
- class DisengageMessage(BaseModel):
35
- """Message indicating disengagement."""
36
- pass
@@ -1,64 +0,0 @@
1
- agent/README.md,sha256=8EFnLrKejthEcL9bZflQSbvA-KwpiPanBz8TEEwRub8,2153
2
- agent/__init__.py,sha256=Pil-INEbTU7iOlZXP3QN-kV_IHtW9uk7PuJCON319Ws,1437
3
- agent/core/README.md,sha256=VOXNVbR0ugxf9gCXYmZtUU2kngZhfi29haT_oSxK0Lk,3559
4
- agent/core/__init__.py,sha256=bds3kSkCILroIxxqU4scCPPCr-pooIkF2S4oU3OgsGY,638
5
- agent/core/callbacks.py,sha256=VbGIf5QkHh3Q0KsLM6wv7hRdIA5WExTVYLm64bckyUA,4306
6
- agent/core/computer_agent.py,sha256=7JOwAEeB1CL8Sw_1WAE3_a9tswZyCaBdXWDCfD3r7M4,9976
7
- agent/core/experiment.py,sha256=Ywj6q3JZFDKicfPuQsDl0vSN55HS7-Cnk3u3EcUCKe8,8866
8
- agent/core/loop.py,sha256=j4zI7h6mifQ5kMn9y0NcjAi1ZpvSBHoPL7Lk2e1OGsQ,9255
9
- agent/core/messages.py,sha256=N8pV8Eh-AJpMuDPRI5OGWUIOU6DRr-pQjK9XU0go9Hk,7637
10
- agent/core/telemetry.py,sha256=HElPd32k_w2SJ6t-Cc3j_2-AKdLbFwh2YlM8QViDgRw,4790
11
- agent/core/tools/__init__.py,sha256=xZen-PqUp2dUaMEHJowXCQm33_5Sxhsx9PSoD0rq6tI,489
12
- agent/core/tools/base.py,sha256=CdzRFNuOjNfzgyTUN4ZoCGkUDR5HI0ECQVpvrUdEij8,2295
13
- agent/core/tools/bash.py,sha256=jnJKVlHn8np8e0gWd8EO0_qqjMkfQzutSugA_Iol4jE,1585
14
- agent/core/tools/collection.py,sha256=NuwTn6dXSyznxWodfmFDQwUlxxaGb4oBPym4AEJABSQ,1338
15
- agent/core/tools/computer.py,sha256=lT_aW3huoYpcM8kffuokELupSz_WZG_qkaW1gITRC58,3892
16
- agent/core/tools/edit.py,sha256=kv4jTKCM0VXrnoNErf7mT-xlr81-7T8v49_VA9y_L4Y,2005
17
- agent/core/tools/manager.py,sha256=IRsCXjGc076nncQuyIjODoafnHTDhrf9sP5B4q5Pcdo,1742
18
- agent/providers/__init__.py,sha256=b4tIBAaIB1V7p8V0BWipHVnMhfHH_OuVgP4OWGSHdD8,194
19
- agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Zk_pMwqIA,149
20
- agent/providers/anthropic/api/client.py,sha256=Y_g4Xg8Ko4tCqjipVm0GBMw-86vw0KQVXS5aWzJinzw,7038
21
- agent/providers/anthropic/api/logging.py,sha256=vHpwkIyOZdkSTVIH4ycbBPd4a_rzhP7Osu1I-Ayouwc,5154
22
- agent/providers/anthropic/callbacks/manager.py,sha256=dRKN7MuBze2dLal0iHDxCKYqMdh_KShSphuwn7zC-c4,1878
23
- agent/providers/anthropic/loop.py,sha256=uPjgXoGRdJb5bsJchUh_0aUuyRBm-HSp7jaM2cKg61I,19466
24
- agent/providers/anthropic/messages/manager.py,sha256=6FobzAHh5-7dxaxbUdG1--1UY4w-mh3MFytX6ONrK3c,4972
25
- agent/providers/anthropic/prompts.py,sha256=nHFfgPrfvnWrEdVP7EUBGUHAI85D2X9HeZirk9EwncU,1941
26
- agent/providers/anthropic/tools/__init__.py,sha256=JyZwuVtPUnZwRSZBSCdQv9yxbLCsygm3l8Ywjjt9qTQ,661
27
- agent/providers/anthropic/tools/base.py,sha256=WnRDbqO25tQzLpS2RU2ZXTLF5wd5IqU7SiyRAglQat4,2752
28
- agent/providers/anthropic/tools/bash.py,sha256=CIh4pO0jEdSZApnjpmFhrQbTTiwxivuOgv1-QLN0Ydw,5740
29
- agent/providers/anthropic/tools/collection.py,sha256=RBK_6hxfHExR-EOxadiLl0OznmFj07nyIUjFgaYZ6Eo,960
30
- agent/providers/anthropic/tools/computer.py,sha256=vYni1jDOOgzSSBOJxHcEKxvKUYRp5_nQ-9dmpGdLwm4,25858
31
- agent/providers/anthropic/tools/edit.py,sha256=EGRP61MDA4Oue1D7Q-_vLpd6LdGbdBA1Z4HSZ66DbmI,13465
32
- agent/providers/anthropic/tools/manager.py,sha256=yNvgTkfEqnOz5isDF0RxvmBMZB0uh2PipFEH-PUXpoY,2020
33
- agent/providers/anthropic/tools/run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
34
- agent/providers/anthropic/types.py,sha256=SF00kOMC1ui8j9Ah56KaeiR2cL394qCHjFIsBpXxt5w,421
35
- agent/providers/omni/__init__.py,sha256=eTUh4Pmh4zO-RLnP-wAFm8EkJBMImT-G2xnVIYWRti0,744
36
- agent/providers/omni/callbacks.py,sha256=ZG9NCgsHWt6y5jKsfcGLaoLxTpmKnIhCArDdeP4q9sA,2369
37
- agent/providers/omni/clients/anthropic.py,sha256=X_QRVxqwA_ExdUqgBEwo1aHOfZQxVIBDmDugNHF97OM,3554
38
- agent/providers/omni/clients/base.py,sha256=zAAgPi0jl3SWPC730R9l79E8bfYPSo39UtCSE-mrK6I,1076
39
- agent/providers/omni/clients/groq.py,sha256=HEinpE0_Cp_-geMyjJ8qaTPl0regPtETPkem4U13qG4,3599
40
- agent/providers/omni/clients/openai.py,sha256=E4TAXMUFoYTunJETCWCNx5XAc6xutiN4rB6PlVpzC5s,5972
41
- agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
42
- agent/providers/omni/experiment.py,sha256=ZZ45U5NEkpzMNeMO9hJfpRf3iPNFaSZVwChcfqOgbI0,10002
43
- agent/providers/omni/image_utils.py,sha256=qIFuNi5cIMVwrqYBXG1T6PxUlbxz7gIngFFP39bZIlU,2782
44
- agent/providers/omni/loop.py,sha256=10GxyZFG8wAYObaaInWSZDRVwWNnZk_qhqdGr3PIPe0,44022
45
- agent/providers/omni/messages.py,sha256=zdjQCAMH-hOyrQQesHhTiIsQbw43KqVSmVIzS8JOIFA,6134
46
- agent/providers/omni/parser.py,sha256=4n1rzaD-mHi7sMfeqChgOyrJuciwzL95x32BGI6GATM,9194
47
- agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
48
- agent/providers/omni/tool_manager.py,sha256=O6DxyEI-Vg6jt99phh011o4q4me_vNhH2YffIxkO4GM,2585
49
- agent/providers/omni/tools/__init__.py,sha256=RkxsPTow3jpOKuXJ1ZKb-KBi6lbxGWfjC9gaV6hSZIs,278
50
- agent/providers/omni/tools/bash.py,sha256=y_ibfP9iRcbiU_E0faAoa4DCP_BlkMlKOOURdBBIGZE,2030
51
- agent/providers/omni/tools/computer.py,sha256=s8WVA_xGROEfdmCYjEqr563ySp4DRMlsLVuu54nH0Ww,9129
52
- agent/providers/omni/tools/manager.py,sha256=EyNABQeJc8bEcJ4hFsRodKyBNkZpThfOHk1Ku3Pzsg8,2519
53
- agent/providers/omni/types.py,sha256=rpr7-mH9VK1R-nJ6tVu1gKp427j-hw1DpHc197b44nU,1017
54
- agent/providers/omni/utils.py,sha256=X2IBki6yJQFaEz7PDjkx8CqQq2R1v7nldRcOT5j7YcA,6381
55
- agent/providers/omni/visualization.py,sha256=N3qVQLxYmia3iSVC5oCt5YRlMPuVfylCOyB99R33u8U,3924
56
- agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
57
- agent/types/__init__.py,sha256=Xi6nNSsfbsGxs3We8gbdEY0ew0Jf0A0Prs5393Tvveg,568
58
- agent/types/base.py,sha256=_5LNleRTqoL55VHEEZntL8x-OQom2A3FiTf2ttdM_HQ,857
59
- agent/types/messages.py,sha256=4-hwtxeAhto90_EZpHFducddtsHUsHauvXzYrpKG4RE,953
60
- agent/types/tools.py,sha256=Jes2CFCFqC727WWHbO-sG7V03rBHnQe5X7Oi9ZkuScI,877
61
- cua_agent-0.1.6.dist-info/METADATA,sha256=rD_j8q7aC5wkNQtpbgRLyo3-5z_zCDivJE8MwyPpz6I,4528
62
- cua_agent-0.1.6.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
63
- cua_agent-0.1.6.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
64
- cua_agent-0.1.6.dist-info/RECORD,,
File without changes