cua-agent 0.3.2__py3-none-any.whl → 0.4.0b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (111) hide show
  1. agent/__init__.py +15 -51
  2. agent/__main__.py +21 -0
  3. agent/adapters/__init__.py +9 -0
  4. agent/adapters/huggingfacelocal_adapter.py +229 -0
  5. agent/agent.py +577 -0
  6. agent/callbacks/__init__.py +17 -0
  7. agent/callbacks/base.py +153 -0
  8. agent/callbacks/budget_manager.py +44 -0
  9. agent/callbacks/image_retention.py +139 -0
  10. agent/callbacks/logging.py +247 -0
  11. agent/callbacks/pii_anonymization.py +259 -0
  12. agent/callbacks/trajectory_saver.py +305 -0
  13. agent/cli.py +290 -0
  14. agent/computer_handler.py +107 -0
  15. agent/decorators.py +90 -0
  16. agent/loops/__init__.py +11 -0
  17. agent/loops/anthropic.py +728 -0
  18. agent/loops/omniparser.py +339 -0
  19. agent/loops/openai.py +95 -0
  20. agent/loops/uitars.py +688 -0
  21. agent/responses.py +207 -0
  22. agent/types.py +79 -0
  23. agent/ui/__init__.py +7 -1
  24. agent/ui/gradio/__init__.py +6 -19
  25. agent/ui/gradio/app.py +80 -1299
  26. agent/ui/gradio/ui_components.py +703 -0
  27. cua_agent-0.4.0b2.dist-info/METADATA +424 -0
  28. cua_agent-0.4.0b2.dist-info/RECORD +30 -0
  29. agent/core/__init__.py +0 -27
  30. agent/core/agent.py +0 -210
  31. agent/core/base.py +0 -217
  32. agent/core/callbacks.py +0 -200
  33. agent/core/experiment.py +0 -249
  34. agent/core/factory.py +0 -122
  35. agent/core/messages.py +0 -332
  36. agent/core/provider_config.py +0 -21
  37. agent/core/telemetry.py +0 -142
  38. agent/core/tools/__init__.py +0 -21
  39. agent/core/tools/base.py +0 -74
  40. agent/core/tools/bash.py +0 -52
  41. agent/core/tools/collection.py +0 -46
  42. agent/core/tools/computer.py +0 -113
  43. agent/core/tools/edit.py +0 -67
  44. agent/core/tools/manager.py +0 -56
  45. agent/core/tools.py +0 -32
  46. agent/core/types.py +0 -88
  47. agent/core/visualization.py +0 -197
  48. agent/providers/__init__.py +0 -4
  49. agent/providers/anthropic/__init__.py +0 -6
  50. agent/providers/anthropic/api/client.py +0 -360
  51. agent/providers/anthropic/api/logging.py +0 -150
  52. agent/providers/anthropic/api_handler.py +0 -140
  53. agent/providers/anthropic/callbacks/__init__.py +0 -5
  54. agent/providers/anthropic/callbacks/manager.py +0 -65
  55. agent/providers/anthropic/loop.py +0 -568
  56. agent/providers/anthropic/prompts.py +0 -23
  57. agent/providers/anthropic/response_handler.py +0 -226
  58. agent/providers/anthropic/tools/__init__.py +0 -33
  59. agent/providers/anthropic/tools/base.py +0 -88
  60. agent/providers/anthropic/tools/bash.py +0 -66
  61. agent/providers/anthropic/tools/collection.py +0 -34
  62. agent/providers/anthropic/tools/computer.py +0 -396
  63. agent/providers/anthropic/tools/edit.py +0 -326
  64. agent/providers/anthropic/tools/manager.py +0 -54
  65. agent/providers/anthropic/tools/run.py +0 -42
  66. agent/providers/anthropic/types.py +0 -16
  67. agent/providers/anthropic/utils.py +0 -381
  68. agent/providers/omni/__init__.py +0 -8
  69. agent/providers/omni/api_handler.py +0 -42
  70. agent/providers/omni/clients/anthropic.py +0 -103
  71. agent/providers/omni/clients/base.py +0 -35
  72. agent/providers/omni/clients/oaicompat.py +0 -195
  73. agent/providers/omni/clients/ollama.py +0 -122
  74. agent/providers/omni/clients/openai.py +0 -155
  75. agent/providers/omni/clients/utils.py +0 -25
  76. agent/providers/omni/image_utils.py +0 -34
  77. agent/providers/omni/loop.py +0 -990
  78. agent/providers/omni/parser.py +0 -307
  79. agent/providers/omni/prompts.py +0 -64
  80. agent/providers/omni/tools/__init__.py +0 -30
  81. agent/providers/omni/tools/base.py +0 -29
  82. agent/providers/omni/tools/bash.py +0 -74
  83. agent/providers/omni/tools/computer.py +0 -179
  84. agent/providers/omni/tools/manager.py +0 -61
  85. agent/providers/omni/utils.py +0 -236
  86. agent/providers/openai/__init__.py +0 -6
  87. agent/providers/openai/api_handler.py +0 -456
  88. agent/providers/openai/loop.py +0 -472
  89. agent/providers/openai/response_handler.py +0 -205
  90. agent/providers/openai/tools/__init__.py +0 -15
  91. agent/providers/openai/tools/base.py +0 -79
  92. agent/providers/openai/tools/computer.py +0 -326
  93. agent/providers/openai/tools/manager.py +0 -106
  94. agent/providers/openai/types.py +0 -36
  95. agent/providers/openai/utils.py +0 -98
  96. agent/providers/uitars/__init__.py +0 -1
  97. agent/providers/uitars/clients/base.py +0 -35
  98. agent/providers/uitars/clients/mlxvlm.py +0 -263
  99. agent/providers/uitars/clients/oaicompat.py +0 -214
  100. agent/providers/uitars/loop.py +0 -660
  101. agent/providers/uitars/prompts.py +0 -63
  102. agent/providers/uitars/tools/__init__.py +0 -1
  103. agent/providers/uitars/tools/computer.py +0 -283
  104. agent/providers/uitars/tools/manager.py +0 -60
  105. agent/providers/uitars/utils.py +0 -264
  106. agent/telemetry.py +0 -21
  107. agent/ui/__main__.py +0 -15
  108. cua_agent-0.3.2.dist-info/METADATA +0 -295
  109. cua_agent-0.3.2.dist-info/RECORD +0 -87
  110. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b2.dist-info}/WHEEL +0 -0
  111. {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b2.dist-info}/entry_points.txt +0 -0
agent/core/tools/base.py DELETED
@@ -1,74 +0,0 @@
1
- """Abstract base classes for tools that can be used with any provider."""
2
-
3
- from abc import ABCMeta, abstractmethod
4
- from dataclasses import dataclass, fields, replace
5
- from typing import Any, Dict
6
-
7
-
8
- class BaseTool(metaclass=ABCMeta):
9
- """Abstract base class for provider-agnostic tools."""
10
-
11
- name: str
12
-
13
- @abstractmethod
14
- async def __call__(self, **kwargs) -> Any:
15
- """Executes the tool with the given arguments."""
16
- ...
17
-
18
- @abstractmethod
19
- def to_params(self) -> Dict[str, Any]:
20
- """Convert tool to provider-specific API parameters.
21
-
22
- Returns:
23
- Dictionary with tool parameters specific to the LLM provider
24
- """
25
- raise NotImplementedError
26
-
27
-
28
- @dataclass(kw_only=True, frozen=True)
29
- class ToolResult:
30
- """Represents the result of a tool execution."""
31
-
32
- output: str | None = None
33
- error: str | None = None
34
- base64_image: str | None = None
35
- system: str | None = None
36
- content: list[dict] | None = None
37
-
38
- def __bool__(self):
39
- return any(getattr(self, field.name) for field in fields(self))
40
-
41
- def __add__(self, other: "ToolResult"):
42
- def combine_fields(field: str | None, other_field: str | None, concatenate: bool = True):
43
- if field and other_field:
44
- if concatenate:
45
- return field + other_field
46
- raise ValueError("Cannot combine tool results")
47
- return field or other_field
48
-
49
- return ToolResult(
50
- output=combine_fields(self.output, other.output),
51
- error=combine_fields(self.error, other.error),
52
- base64_image=combine_fields(self.base64_image, other.base64_image, False),
53
- system=combine_fields(self.system, other.system),
54
- content=self.content or other.content, # Use first non-None content
55
- )
56
-
57
- def replace(self, **kwargs):
58
- """Returns a new ToolResult with the given fields replaced."""
59
- return replace(self, **kwargs)
60
-
61
-
62
- class CLIResult(ToolResult):
63
- """A ToolResult that can be rendered as a CLI output."""
64
-
65
-
66
- class ToolFailure(ToolResult):
67
- """A ToolResult that represents a failure."""
68
-
69
-
70
- class ToolError(Exception):
71
- """Raised when a tool encounters an error."""
72
-
73
- def __init__(self, message):
74
- self.message = message
agent/core/tools/bash.py DELETED
@@ -1,52 +0,0 @@
1
- """Abstract base bash/shell tool implementation."""
2
-
3
- import asyncio
4
- import logging
5
- from abc import abstractmethod
6
- from typing import Any, Dict, Tuple
7
-
8
- from computer.computer import Computer
9
-
10
- from .base import BaseTool, ToolResult
11
-
12
-
13
- class BaseBashTool(BaseTool):
14
- """Base class for bash/shell command execution tools across different providers."""
15
-
16
- name = "bash"
17
- logger = logging.getLogger(__name__)
18
- computer: Computer
19
-
20
- def __init__(self, computer: Computer):
21
- """Initialize the BashTool.
22
-
23
- Args:
24
- computer: Computer instance, may be used for related operations
25
- """
26
- self.computer = computer
27
-
28
- async def run_command(self, command: str) -> Tuple[int, str, str]:
29
- """Run a shell command and return exit code, stdout, and stderr.
30
-
31
- Args:
32
- command: Shell command to execute
33
-
34
- Returns:
35
- Tuple containing (exit_code, stdout, stderr)
36
- """
37
- try:
38
- process = await asyncio.create_subprocess_shell(
39
- command,
40
- stdout=asyncio.subprocess.PIPE,
41
- stderr=asyncio.subprocess.PIPE,
42
- )
43
- stdout, stderr = await process.communicate()
44
- return process.returncode or 0, stdout.decode(), stderr.decode()
45
- except Exception as e:
46
- self.logger.error(f"Error running command: {str(e)}")
47
- return 1, "", str(e)
48
-
49
- @abstractmethod
50
- async def __call__(self, **kwargs) -> ToolResult:
51
- """Execute the tool with the provided arguments."""
52
- raise NotImplementedError
@@ -1,46 +0,0 @@
1
- """Collection classes for managing multiple tools."""
2
-
3
- from typing import Any, Dict, List, Type
4
-
5
- from .base import (
6
- BaseTool,
7
- ToolError,
8
- ToolFailure,
9
- ToolResult,
10
- )
11
-
12
-
13
- class ToolCollection:
14
- """A collection of tools that can be used with any provider."""
15
-
16
- def __init__(self, *tools: BaseTool):
17
- self.tools = tools
18
- self.tool_map = {tool.name: tool for tool in tools}
19
-
20
- def to_params(self) -> List[Dict[str, Any]]:
21
- """Convert all tools to provider-specific parameters.
22
-
23
- Returns:
24
- List of dictionaries with tool parameters
25
- """
26
- return [tool.to_params() for tool in self.tools]
27
-
28
- async def run(self, *, name: str, tool_input: Dict[str, Any]) -> ToolResult:
29
- """Run a tool with the given input.
30
-
31
- Args:
32
- name: Name of the tool to run
33
- tool_input: Input parameters for the tool
34
-
35
- Returns:
36
- Result of the tool execution
37
- """
38
- tool = self.tool_map.get(name)
39
- if not tool:
40
- return ToolFailure(error=f"Tool {name} is invalid")
41
- try:
42
- return await tool(**tool_input)
43
- except ToolError as e:
44
- return ToolFailure(error=e.message)
45
- except Exception as e:
46
- return ToolFailure(error=f"Unexpected error in tool {name}: {str(e)}")
@@ -1,113 +0,0 @@
1
- """Abstract base computer tool implementation."""
2
-
3
- import asyncio
4
- import base64
5
- import io
6
- import logging
7
- from abc import abstractmethod
8
- from typing import Any, Dict, Optional, Tuple
9
-
10
- from PIL import Image
11
- from computer.computer import Computer
12
-
13
- from .base import BaseTool, ToolError, ToolResult
14
-
15
-
16
- class BaseComputerTool(BaseTool):
17
- """Base class for computer interaction tools across different providers."""
18
-
19
- name = "computer"
20
- logger = logging.getLogger(__name__)
21
-
22
- width: Optional[int] = None
23
- height: Optional[int] = None
24
- display_num: Optional[int] = None
25
- computer: Computer
26
-
27
- _screenshot_delay = 1.0 # Default delay for most platforms
28
- _scaling_enabled = True
29
-
30
- def __init__(self, computer: Computer):
31
- """Initialize the ComputerTool.
32
-
33
- Args:
34
- computer: Computer instance for screen interactions
35
- """
36
- self.computer = computer
37
-
38
- async def initialize_dimensions(self):
39
- """Initialize screen dimensions from the computer interface."""
40
- display_size = await self.computer.interface.get_screen_size()
41
- self.width = display_size["width"]
42
- self.height = display_size["height"]
43
- self.logger.info(f"Initialized screen dimensions to {self.width}x{self.height}")
44
-
45
- @property
46
- def options(self) -> Dict[str, Any]:
47
- """Get the options for the tool.
48
-
49
- Returns:
50
- Dictionary with tool options
51
- """
52
- if self.width is None or self.height is None:
53
- raise RuntimeError(
54
- "Screen dimensions not initialized. Call initialize_dimensions() first."
55
- )
56
- return {
57
- "display_width_px": self.width,
58
- "display_height_px": self.height,
59
- "display_number": self.display_num,
60
- }
61
-
62
- async def resize_screenshot_if_needed(self, screenshot: bytes) -> bytes:
63
- """Resize a screenshot to match the expected dimensions.
64
-
65
- Args:
66
- screenshot: Raw screenshot data
67
-
68
- Returns:
69
- Resized screenshot data
70
- """
71
- if self.width is None or self.height is None:
72
- raise ToolError("Screen dimensions not initialized")
73
-
74
- try:
75
- img = Image.open(io.BytesIO(screenshot))
76
- if img.mode in ("RGBA", "LA") or (img.mode == "P" and "transparency" in img.info):
77
- img = img.convert("RGB")
78
-
79
- # Resize if dimensions don't match
80
- if img.size != (self.width, self.height):
81
- self.logger.info(
82
- f"Scaling image from {img.size} to {self.width}x{self.height} to match screen dimensions"
83
- )
84
- img = img.resize((self.width, self.height), Image.Resampling.LANCZOS)
85
-
86
- # Save back to bytes
87
- buffer = io.BytesIO()
88
- img.save(buffer, format="PNG")
89
- return buffer.getvalue()
90
-
91
- return screenshot
92
- except Exception as e:
93
- self.logger.error(f"Error during screenshot resizing: {str(e)}")
94
- raise ToolError(f"Failed to resize screenshot: {str(e)}")
95
-
96
- async def screenshot(self) -> ToolResult:
97
- """Take a screenshot and return it as a ToolResult with base64-encoded image.
98
-
99
- Returns:
100
- ToolResult with the screenshot
101
- """
102
- try:
103
- screenshot = await self.computer.interface.screenshot()
104
- screenshot = await self.resize_screenshot_if_needed(screenshot)
105
- return ToolResult(base64_image=base64.b64encode(screenshot).decode())
106
- except Exception as e:
107
- self.logger.error(f"Error taking screenshot: {str(e)}")
108
- return ToolResult(error=f"Failed to take screenshot: {str(e)}")
109
-
110
- @abstractmethod
111
- async def __call__(self, **kwargs) -> ToolResult:
112
- """Execute the tool with the provided arguments."""
113
- raise NotImplementedError
agent/core/tools/edit.py DELETED
@@ -1,67 +0,0 @@
1
- """Abstract base edit tool implementation."""
2
-
3
- import asyncio
4
- import logging
5
- import os
6
- from abc import abstractmethod
7
- from pathlib import Path
8
- from typing import Any, Dict, Optional
9
-
10
- from computer.computer import Computer
11
-
12
- from .base import BaseTool, ToolError, ToolResult
13
-
14
-
15
- class BaseEditTool(BaseTool):
16
- """Base class for text editor tools across different providers."""
17
-
18
- name = "edit"
19
- logger = logging.getLogger(__name__)
20
- computer: Computer
21
-
22
- def __init__(self, computer: Computer):
23
- """Initialize the EditTool.
24
-
25
- Args:
26
- computer: Computer instance, may be used for related operations
27
- """
28
- self.computer = computer
29
-
30
- async def read_file(self, path: str) -> str:
31
- """Read a file and return its contents.
32
-
33
- Args:
34
- path: Path to the file to read
35
-
36
- Returns:
37
- File contents as a string
38
- """
39
- try:
40
- path_obj = Path(path)
41
- if not path_obj.exists():
42
- raise ToolError(f"File does not exist: {path}")
43
- return path_obj.read_text()
44
- except Exception as e:
45
- self.logger.error(f"Error reading file: {str(e)}")
46
- raise ToolError(f"Failed to read file: {str(e)}")
47
-
48
- async def write_file(self, path: str, content: str) -> None:
49
- """Write content to a file.
50
-
51
- Args:
52
- path: Path to the file to write
53
- content: Content to write to the file
54
- """
55
- try:
56
- path_obj = Path(path)
57
- # Create parent directories if they don't exist
58
- path_obj.parent.mkdir(parents=True, exist_ok=True)
59
- path_obj.write_text(content)
60
- except Exception as e:
61
- self.logger.error(f"Error writing file: {str(e)}")
62
- raise ToolError(f"Failed to write file: {str(e)}")
63
-
64
- @abstractmethod
65
- async def __call__(self, **kwargs) -> ToolResult:
66
- """Execute the tool with the provided arguments."""
67
- raise NotImplementedError
@@ -1,56 +0,0 @@
1
- """Tool manager for initializing and running tools."""
2
-
3
- from abc import ABC, abstractmethod
4
- from typing import Any, Dict, List
5
-
6
- from computer.computer import Computer
7
-
8
- from .base import BaseTool, ToolResult
9
- from .collection import ToolCollection
10
-
11
-
12
- class BaseToolManager(ABC):
13
- """Base class for tool managers across different providers."""
14
-
15
- def __init__(self, computer: Computer):
16
- """Initialize the tool manager.
17
-
18
- Args:
19
- computer: Computer instance for computer-related tools
20
- """
21
- self.computer = computer
22
- self.tools: ToolCollection | None = None
23
-
24
- @abstractmethod
25
- def _initialize_tools(self) -> ToolCollection:
26
- """Initialize all available tools."""
27
- ...
28
-
29
- async def initialize(self) -> None:
30
- """Initialize tool-specific requirements and create tool collection."""
31
- await self._initialize_tools_specific()
32
- self.tools = self._initialize_tools()
33
-
34
- @abstractmethod
35
- async def _initialize_tools_specific(self) -> None:
36
- """Initialize provider-specific tool requirements."""
37
- ...
38
-
39
- @abstractmethod
40
- def get_tool_params(self) -> List[Dict[str, Any]]:
41
- """Get tool parameters for API calls."""
42
- ...
43
-
44
- async def execute_tool(self, name: str, tool_input: Dict[str, Any]) -> ToolResult:
45
- """Execute a tool with the given input.
46
-
47
- Args:
48
- name: Name of the tool to execute
49
- tool_input: Input parameters for the tool
50
-
51
- Returns:
52
- Result of the tool execution
53
- """
54
- if self.tools is None:
55
- raise RuntimeError("Tools not initialized. Call initialize() first.")
56
- return await self.tools.run(name=name, tool_input=tool_input)
agent/core/tools.py DELETED
@@ -1,32 +0,0 @@
1
- """Tool-related type definitions."""
2
-
3
- from enum import StrEnum
4
- from typing import Dict, Any, Optional
5
- from pydantic import BaseModel, ConfigDict
6
-
7
- class ToolInvocationState(StrEnum):
8
- """States for tool invocation."""
9
- CALL = 'call'
10
- PARTIAL_CALL = 'partial-call'
11
- RESULT = 'result'
12
-
13
- class ToolInvocation(BaseModel):
14
- """Tool invocation type."""
15
- model_config = ConfigDict(extra='forbid')
16
- state: Optional[str] = None
17
- toolCallId: str
18
- toolName: Optional[str] = None
19
- args: Optional[Dict[str, Any]] = None
20
-
21
- class ClientAttachment(BaseModel):
22
- """Client attachment type."""
23
- name: str
24
- contentType: str
25
- url: str
26
-
27
- class ToolResult(BaseModel):
28
- """Result of a tool execution."""
29
- model_config = ConfigDict(extra='forbid')
30
- output: Optional[str] = None
31
- error: Optional[str] = None
32
- metadata: Optional[Dict[str, Any]] = None
agent/core/types.py DELETED
@@ -1,88 +0,0 @@
1
- """Core type definitions."""
2
-
3
- from typing import Any, Dict, List, Optional, TypedDict, Union
4
- from enum import StrEnum
5
- from dataclasses import dataclass
6
-
7
-
8
- class AgentLoop(StrEnum):
9
- """Enumeration of available loop types."""
10
-
11
- ANTHROPIC = "anthropic" # Anthropic implementation
12
- OMNI = "omni" # OmniLoop implementation
13
- OPENAI = "openai" # OpenAI implementation
14
- OLLAMA = "ollama" # OLLAMA implementation
15
- UITARS = "uitars" # UI-TARS implementation
16
- # Add more loop types as needed
17
-
18
-
19
- class LLMProvider(StrEnum):
20
- """Supported LLM providers."""
21
-
22
- ANTHROPIC = "anthropic"
23
- OPENAI = "openai"
24
- OLLAMA = "ollama"
25
- OAICOMPAT = "oaicompat"
26
- MLXVLM= "mlxvlm"
27
-
28
-
29
- @dataclass
30
- class LLM:
31
- """Configuration for LLM model and provider."""
32
-
33
- provider: LLMProvider
34
- name: Optional[str] = None
35
- provider_base_url: Optional[str] = None
36
-
37
- def __post_init__(self):
38
- """Set default model name if not provided."""
39
- if self.name is None:
40
- from .provider_config import DEFAULT_MODELS
41
-
42
- self.name = DEFAULT_MODELS.get(self.provider)
43
-
44
- # Set default provider URL if none provided
45
- if self.provider_base_url is None and self.provider == LLMProvider.OAICOMPAT:
46
- # Default for vLLM
47
- self.provider_base_url = "http://localhost:8000/v1"
48
- # Common alternatives:
49
- # - LM Studio: "http://localhost:1234/v1"
50
- # - LocalAI: "http://localhost:8080/v1"
51
- # - Ollama with OpenAI compatible API: "http://localhost:11434/v1"
52
-
53
-
54
- # For backward compatibility
55
- LLMModel = LLM
56
- Model = LLM
57
-
58
-
59
- class AgentResponse(TypedDict, total=False):
60
- """Agent response format."""
61
-
62
- id: str
63
- object: str
64
- created_at: int
65
- status: str
66
- error: Optional[str]
67
- incomplete_details: Optional[Any]
68
- instructions: Optional[Any]
69
- max_output_tokens: Optional[int]
70
- model: str
71
- output: List[Dict[str, Any]]
72
- parallel_tool_calls: bool
73
- previous_response_id: Optional[str]
74
- reasoning: Dict[str, str]
75
- store: bool
76
- temperature: float
77
- text: Dict[str, Dict[str, str]]
78
- tool_choice: str
79
- tools: List[Dict[str, Union[str, int]]]
80
- top_p: float
81
- truncation: str
82
- usage: Dict[str, Any]
83
- user: Optional[str]
84
- metadata: Dict[str, Any]
85
- response: Dict[str, List[Dict[str, Any]]]
86
- # Additional fields for error responses
87
- role: str
88
- content: Union[str, List[Dict[str, Any]]]