cua-agent 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (69) hide show
  1. {cua_agent-0.1.0 → cua_agent-0.1.2}/PKG-INFO +1 -1
  2. cua_agent-0.1.2/README.md +126 -0
  3. cua_agent-0.1.2/agent/__init__.py +10 -0
  4. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/README.md +2 -2
  5. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/agent.py +78 -35
  6. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/messages.py +15 -0
  7. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/__init__.py +2 -2
  8. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/api/client.py +43 -46
  9. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/loop.py +2 -2
  10. cua_agent-0.1.2/agent/providers/anthropic/types.py +16 -0
  11. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/__init__.py +2 -2
  12. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/loop.py +17 -13
  13. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/messages.py +3 -0
  14. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/prompts.py +0 -14
  15. cua_agent-0.1.2/agent/providers/omni/types.py +52 -0
  16. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/types/base.py +2 -1
  17. {cua_agent-0.1.0 → cua_agent-0.1.2}/pyproject.toml +3 -3
  18. {cua_agent-0.1.0 → cua_agent-0.1.2}/tests/test_agent.py +3 -3
  19. cua_agent-0.1.0/README.md +0 -213
  20. cua_agent-0.1.0/agent/__init__.py +0 -10
  21. cua_agent-0.1.0/agent/providers/anthropic/types.py +0 -16
  22. cua_agent-0.1.0/agent/providers/omni/types.py +0 -30
  23. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/README.md +0 -0
  24. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/__init__.py +0 -0
  25. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/base_agent.py +0 -0
  26. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/callbacks.py +0 -0
  27. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/computer_agent.py +0 -0
  28. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/experiment.py +0 -0
  29. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/factory.py +0 -0
  30. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/loop.py +0 -0
  31. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/tools/__init__.py +0 -0
  32. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/tools/base.py +0 -0
  33. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/tools/bash.py +0 -0
  34. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/tools/collection.py +0 -0
  35. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/tools/computer.py +0 -0
  36. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/tools/edit.py +0 -0
  37. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/core/tools/manager.py +0 -0
  38. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/__init__.py +0 -0
  39. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/api/logging.py +0 -0
  40. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/callbacks/manager.py +0 -0
  41. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/messages/manager.py +0 -0
  42. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/prompts.py +0 -0
  43. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/tools/__init__.py +0 -0
  44. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/tools/base.py +0 -0
  45. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/tools/bash.py +0 -0
  46. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/tools/collection.py +0 -0
  47. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/tools/computer.py +0 -0
  48. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/tools/edit.py +0 -0
  49. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/tools/manager.py +0 -0
  50. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/anthropic/tools/run.py +0 -0
  51. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/callbacks.py +0 -0
  52. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/clients/anthropic.py +0 -0
  53. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/clients/base.py +0 -0
  54. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/clients/groq.py +0 -0
  55. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/clients/openai.py +0 -0
  56. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/clients/utils.py +0 -0
  57. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/experiment.py +0 -0
  58. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/image_utils.py +0 -0
  59. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/parser.py +0 -0
  60. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/tool_manager.py +0 -0
  61. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/tools/__init__.py +0 -0
  62. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/tools/bash.py +0 -0
  63. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/tools/computer.py +0 -0
  64. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/tools/manager.py +0 -0
  65. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/utils.py +0 -0
  66. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/providers/omni/visualization.py +0 -0
  67. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/types/__init__.py +0 -0
  68. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/types/messages.py +0 -0
  69. {cua_agent-0.1.0 → cua_agent-0.1.2}/agent/types/tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: <3.13,>=3.10
@@ -0,0 +1,126 @@
1
+ <div align="center">
2
+ <h1>
3
+ <div class="image-wrapper" style="display: inline-block;">
4
+ <picture>
5
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
6
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
7
+ <img alt="Shows my svg">
8
+ </picture>
9
+ </div>
10
+
11
+ [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
12
+ [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
13
+ [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
14
+ [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
15
+ </h1>
16
+ </div>
17
+
18
+ **Agent** is a Computer Use (CUA) framework for running multi-app agentic workflows targeting macOS and Linux sandbox, supporting local (Ollama) and cloud model providers (OpenAI, Anthropic, Groq, DeepSeek, Qwen). The framework integrates with Microsoft's OmniParser for enhanced UI understanding and interaction.
19
+
20
+ ### Get started with Agent
21
+
22
+ ```python
23
+ from agent import ComputerAgent, AgentLoop, LLMProvider
24
+ from computer import Computer
25
+
26
+ computer = Computer(verbosity=logging.INFO)
27
+
28
+ agent = ComputerAgent(
29
+ computer=computer,
30
+ loop=AgentLoop.ANTHROPIC,
31
+ # loop=AgentLoop.OMNI,
32
+ model=LLM(provider=LLMProvider.ANTHROPIC, name="claude-3-7-sonnet-20250219"),
33
+ # model=LLM(provider=LLMProvider.OPENAI, name="gpt-4.5-preview"),
34
+ save_trajectory=True,
35
+ trajectory_dir=str(Path("trajectories")),
36
+ only_n_most_recent_images=3,
37
+ verbosity=logging.INFO,
38
+ )
39
+
40
+ tasks = [
41
+ """
42
+ Please help me with the following task:
43
+ 1. Open Safari browser
44
+ 2. Go to Wikipedia.org
45
+ 3. Search for "Claude AI"
46
+ 4. Summarize the main points you find about Claude AI
47
+ """
48
+ ]
49
+
50
+ async with agent:
51
+ for i, task in enumerate(tasks, 1):
52
+ print(f"\nExecuting task {i}/{len(tasks)}: {task}")
53
+ async for result in agent.run(task):
54
+ print(result)
55
+ print(f"Task {i} completed")
56
+ ```
57
+
58
+ ## Install
59
+
60
+ ### cua-agent
61
+
62
+ ```bash
63
+
64
+ pip install cua-agent[all]
65
+
66
+ # or install specific loop providers
67
+ pip install cua-agent[anthropic]
68
+ pip install cua-agent[omni]
69
+
70
+
71
+ ```
72
+
73
+ ## Features
74
+
75
+ ### OmniParser Integration
76
+ - Enhanced UI understanding with element detection
77
+ - Automatic bounding box detection for UI elements
78
+ - Improved accuracy for complex UI interactions
79
+ - Support for icon and text element recognition
80
+
81
+ ### Basic Computer Control
82
+ - Direct keyboard and mouse control
83
+ - Window and application management
84
+ - Screenshot capabilities
85
+ - Basic UI element detection
86
+
87
+ ### Provider Support
88
+ - OpenAI (GPT-4V) - Recommended for OmniParser integration
89
+ - Anthropic (Claude) - Strong general performance
90
+ - Groq - Fast inference with Llama models
91
+ - DeepSeek - Alternative model provider
92
+ - Qwen - Alibaba's multimodal model
93
+
94
+ ## Run
95
+
96
+ Refer to these notebooks for step-by-step guides on how to use the Computer-Use Agent (CUA):
97
+
98
+ - [Agent Notebook](../../notebooks/agent_nb.ipynb) - Complete examples and workflows
99
+
100
+ ## Components
101
+
102
+ The library consists of several components:
103
+
104
+ - **Core**
105
+ - `ComputerAgent`: Unified agent class supporting multiple loop types
106
+ - `BaseComputerAgent`: Abstract base class for computer agents
107
+
108
+ - **Providers**
109
+ - `Anthropic`: Implementation for Anthropic Claude models
110
+ - `Omni`: Implementation for multiple providers (OpenAI, Groq, etc.)
111
+
112
+ - **Loops**
113
+ - `AnthropicLoop`: Loop implementation for Anthropic
114
+ - `OmniLoop`: Generic loop supporting multiple providers
115
+
116
+ ## Configuration
117
+
118
+ The agent can be configured with various parameters:
119
+
120
+ - **loop_type**: The type of loop to use (ANTHROPIC or OMNI)
121
+ - **provider**: AI provider to use with the loop
122
+ - **model**: The AI model to use
123
+ - **save_trajectory**: Whether to save screenshots and logs
124
+ - **only_n_most_recent_images**: Only keep a specific number of recent images
125
+
126
+ See the [Core README](./agent/core/README.md) for more details on the unified agent.
@@ -0,0 +1,10 @@
1
+ """CUA (Computer Use) Agent for AI-driven computer interaction."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from .core.factory import AgentFactory
6
+ from .core.agent import ComputerAgent
7
+ from .providers.omni.types import LLMProvider, LLM
8
+ from .types.base import Provider, AgentLoop
9
+
10
+ __all__ = ["AgentFactory", "Provider", "ComputerAgent", "AgentLoop", "LLMProvider", "LLM"]
@@ -34,7 +34,7 @@ Here's how to use the unified ComputerAgent:
34
34
  ```python
35
35
  from agent.core.agent import ComputerAgent
36
36
  from agent.types.base import AgenticLoop
37
- from agent.providers.omni.types import APIProvider
37
+ from agent.providers.omni.types import LLMProvider
38
38
  from computer import Computer
39
39
 
40
40
  # Create a Computer instance
@@ -44,7 +44,7 @@ computer = Computer()
44
44
  agent = ComputerAgent(
45
45
  computer=computer,
46
46
  loop_type=AgenticLoop.OMNI,
47
- provider=APIProvider.OPENAI,
47
+ provider=LLMProvider.OPENAI,
48
48
  model="gpt-4o",
49
49
  api_key="your_api_key_here", # Can also use OPENAI_API_KEY environment variable
50
50
  save_trajectory=True,
@@ -3,12 +3,12 @@
3
3
  import os
4
4
  import logging
5
5
  import asyncio
6
- from typing import Any, AsyncGenerator, Dict, List, Optional, TYPE_CHECKING
6
+ from typing import Any, AsyncGenerator, Dict, List, Optional, TYPE_CHECKING, Union, cast
7
7
  from datetime import datetime
8
8
 
9
9
  from computer import Computer
10
10
 
11
- from ..types.base import Provider, AgenticLoop
11
+ from ..types.base import Provider, AgentLoop
12
12
  from .base_agent import BaseComputerAgent
13
13
 
14
14
  # Only import types for type checking to avoid circular imports
@@ -17,23 +17,23 @@ if TYPE_CHECKING:
17
17
  from ..providers.omni.loop import OmniLoop
18
18
  from ..providers.omni.parser import OmniParser
19
19
 
20
- # Import the APIProvider enum without importing the whole module
21
- from ..providers.omni.types import APIProvider
20
+ # Import the provider types
21
+ from ..providers.omni.types import LLMProvider, LLM, Model, LLMModel
22
22
 
23
23
  logger = logging.getLogger(__name__)
24
24
 
25
25
  # Default models for different providers
26
26
  DEFAULT_MODELS = {
27
- APIProvider.OPENAI: "gpt-4o",
28
- APIProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
29
- APIProvider.GROQ: "llama3-70b-8192",
27
+ LLMProvider.OPENAI: "gpt-4o",
28
+ LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
29
+ LLMProvider.GROQ: "llama3-70b-8192",
30
30
  }
31
31
 
32
32
  # Map providers to their environment variable names
33
33
  ENV_VARS = {
34
- APIProvider.OPENAI: "OPENAI_API_KEY",
35
- APIProvider.GROQ: "GROQ_API_KEY",
36
- APIProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
34
+ LLMProvider.OPENAI: "OPENAI_API_KEY",
35
+ LLMProvider.GROQ: "GROQ_API_KEY",
36
+ LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
37
37
  }
38
38
 
39
39
 
@@ -47,10 +47,9 @@ class ComputerAgent(BaseComputerAgent):
47
47
  def __init__(
48
48
  self,
49
49
  computer: Computer,
50
- loop_type: AgenticLoop = AgenticLoop.OMNI,
51
- ai_provider: APIProvider = APIProvider.OPENAI,
50
+ loop: AgentLoop = AgentLoop.OMNI,
51
+ model: Optional[Union[LLM, Dict[str, str], str]] = None,
52
52
  api_key: Optional[str] = None,
53
- model: Optional[str] = None,
54
53
  save_trajectory: bool = True,
55
54
  trajectory_dir: Optional[str] = "trajectories",
56
55
  only_n_most_recent_images: Optional[int] = None,
@@ -62,10 +61,13 @@ class ComputerAgent(BaseComputerAgent):
62
61
 
63
62
  Args:
64
63
  computer: Computer instance to control
65
- loop_type: The type of loop to use (Anthropic or Omni)
66
- ai_provider: AI provider to use (required for Cua loop)
64
+ loop: The type of loop to use (Anthropic or Omni)
65
+ model: LLM configuration. Can be:
66
+ - LLM object with provider and name
67
+ - Dict with 'provider' and 'name' keys
68
+ - String with model name (defaults to OpenAI provider)
69
+ - None (defaults based on loop)
67
70
  api_key: Optional API key (will use environment variable if not provided)
68
- model: Optional model name (will use provider default if not specified)
69
71
  save_trajectory: Whether to save screenshots and logs
70
72
  trajectory_dir: Directory to save trajectories (defaults to "trajectories")
71
73
  only_n_most_recent_images: Limit history to N most recent images
@@ -87,8 +89,7 @@ class ComputerAgent(BaseComputerAgent):
87
89
  **kwargs,
88
90
  )
89
91
 
90
- self.loop_type = loop_type
91
- self.provider = ai_provider
92
+ self.loop_type = loop
92
93
  self.save_trajectory = save_trajectory
93
94
  self.trajectory_dir = trajectory_dir
94
95
  self.only_n_most_recent_images = only_n_most_recent_images
@@ -98,14 +99,19 @@ class ComputerAgent(BaseComputerAgent):
98
99
  # Configure logging based on verbosity
99
100
  self._configure_logging(verbosity)
100
101
 
102
+ # Process model configuration
103
+ self.model_config = self._process_model_config(model, loop)
104
+
101
105
  # Get API key from environment if not provided
102
106
  if api_key is None:
103
107
  env_var = (
104
- ENV_VARS.get(ai_provider) if loop_type == AgenticLoop.OMNI else "ANTHROPIC_API_KEY"
108
+ ENV_VARS.get(self.model_config.provider)
109
+ if loop == AgentLoop.OMNI
110
+ else "ANTHROPIC_API_KEY"
105
111
  )
106
112
  if not env_var:
107
113
  raise ValueError(
108
- f"Unsupported provider: {ai_provider}. Please use one of: {list(ENV_VARS.keys())}"
114
+ f"Unsupported provider: {self.model_config.provider}. Please use one of: {list(ENV_VARS.keys())}"
109
115
  )
110
116
 
111
117
  api_key = os.environ.get(env_var)
@@ -119,18 +125,49 @@ class ComputerAgent(BaseComputerAgent):
119
125
  )
120
126
  self.api_key = api_key
121
127
 
122
- # Set model based on provider if not specified
123
- if model is None:
124
- if loop_type == AgenticLoop.OMNI:
125
- self.model = DEFAULT_MODELS[ai_provider]
126
- else: # Anthropic loop
127
- self.model = DEFAULT_MODELS[APIProvider.ANTHROPIC]
128
- else:
129
- self.model = model
130
-
131
128
  # Initialize the appropriate loop based on loop_type
132
129
  self.loop = self._init_loop()
133
130
 
131
+ def _process_model_config(
132
+ self, model_input: Optional[Union[LLM, Dict[str, str], str]], loop: AgentLoop
133
+ ) -> LLM:
134
+ """Process and normalize model configuration.
135
+
136
+ Args:
137
+ model_input: Input model configuration (LLM, dict, string, or None)
138
+ loop: The loop type being used
139
+
140
+ Returns:
141
+ Normalized LLM instance
142
+ """
143
+ # Handle case where model_input is None
144
+ if model_input is None:
145
+ # Use Anthropic for Anthropic loop, OpenAI for Omni loop
146
+ default_provider = (
147
+ LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
148
+ )
149
+ return LLM(provider=default_provider)
150
+
151
+ # Handle case where model_input is already a LLM or one of its aliases
152
+ if isinstance(model_input, (LLM, Model, LLMModel)):
153
+ return model_input
154
+
155
+ # Handle case where model_input is a dict
156
+ if isinstance(model_input, dict):
157
+ provider = model_input.get("provider", LLMProvider.OPENAI)
158
+ if isinstance(provider, str):
159
+ provider = LLMProvider(provider)
160
+ return LLM(provider=provider, name=model_input.get("name"))
161
+
162
+ # Handle case where model_input is a string (model name)
163
+ if isinstance(model_input, str):
164
+ default_provider = (
165
+ LLMProvider.ANTHROPIC if loop == AgentLoop.ANTHROPIC else LLMProvider.OPENAI
166
+ )
167
+ return LLM(provider=default_provider, name=model_input)
168
+
169
+ raise ValueError(f"Unsupported model configuration: {model_input}")
170
+
134
171
  def _configure_logging(self, verbosity: int):
135
172
  """Configure logging based on verbosity level."""
136
173
  # Use the logging level directly without mapping
@@ -159,12 +196,15 @@ class ComputerAgent(BaseComputerAgent):
159
196
  from ..providers.omni.loop import OmniLoop
160
197
  from ..providers.omni.parser import OmniParser
161
198
 
162
- if self.loop_type == AgenticLoop.ANTHROPIC:
199
+ if self.loop_type == AgentLoop.ANTHROPIC:
163
200
  from ..providers.anthropic.loop import AnthropicLoop
164
201
 
202
+ # Ensure we always have a valid model name
203
+ model_name = self.model_config.name or DEFAULT_MODELS[LLMProvider.ANTHROPIC]
204
+
165
205
  return AnthropicLoop(
166
206
  api_key=self.api_key,
167
- model=self.model,
207
+ model=model_name,
168
208
  computer=self.computer,
169
209
  save_trajectory=self.save_trajectory,
170
210
  base_dir=self.trajectory_dir,
@@ -176,10 +216,13 @@ class ComputerAgent(BaseComputerAgent):
176
216
  if "parser" not in self._kwargs:
177
217
  self._kwargs["parser"] = OmniParser()
178
218
 
219
+ # Ensure we always have a valid model name
220
+ model_name = self.model_config.name or DEFAULT_MODELS[self.model_config.provider]
221
+
179
222
  return OmniLoop(
180
- provider=self.provider,
223
+ provider=self.model_config.provider,
181
224
  api_key=self.api_key,
182
- model=self.model,
225
+ model=model_name,
183
226
  computer=self.computer,
184
227
  save_trajectory=self.save_trajectory,
185
228
  base_dir=self.trajectory_dir,
@@ -198,7 +241,7 @@ class ComputerAgent(BaseComputerAgent):
198
241
  """
199
242
  try:
200
243
  # Format the messages based on loop type
201
- if self.loop_type == AgenticLoop.ANTHROPIC:
244
+ if self.loop_type == AgentLoop.ANTHROPIC:
202
245
  # Anthropic format
203
246
  messages = [{"role": "user", "content": [{"type": "text", "text": task}]}]
204
247
  else:
@@ -221,7 +264,7 @@ class ComputerAgent(BaseComputerAgent):
221
264
  continue
222
265
 
223
266
  # Extract content and metadata based on loop type
224
- if self.loop_type == AgenticLoop.ANTHROPIC:
267
+ if self.loop_type == AgentLoop.ANTHROPIC:
225
268
  # Handle Anthropic format
226
269
  if "content" in result:
227
270
  content_text = ""
@@ -37,6 +37,17 @@ class BaseMessageManager:
37
37
  if self.image_retention_config.min_removal_threshold < 1:
38
38
  raise ValueError("min_removal_threshold must be at least 1")
39
39
 
40
+ # Track provider for message formatting
41
+ self.provider = "openai" # Default provider
42
+
43
+ def set_provider(self, provider: str) -> None:
44
+ """Set the current provider to format messages for.
45
+
46
+ Args:
47
+ provider: Provider name (e.g., 'openai', 'anthropic')
48
+ """
49
+ self.provider = provider.lower()
50
+
40
51
  def prepare_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
41
52
  """Prepare messages by applying image retention and caching as configured.
42
53
 
@@ -96,6 +107,10 @@ class BaseMessageManager:
96
107
  Args:
97
108
  messages: Messages to inject caching into
98
109
  """
110
+ # Only apply cache_control for Anthropic API, not OpenAI
111
+ if self.provider != "anthropic":
112
+ return
113
+
99
114
  # Default to caching last 3 turns
100
115
  turns_to_cache = 3
101
116
  for message in reversed(messages):
@@ -1,6 +1,6 @@
1
1
  """Anthropic provider implementation."""
2
2
 
3
3
  from .loop import AnthropicLoop
4
- from .types import APIProvider
4
+ from .types import LLMProvider
5
5
 
6
- __all__ = ["AnthropicLoop", "APIProvider"]
6
+ __all__ = ["AnthropicLoop", "LLMProvider"]
@@ -3,25 +3,28 @@ import httpx
3
3
  import asyncio
4
4
  from anthropic import Anthropic, AnthropicBedrock, AnthropicVertex
5
5
  from anthropic.types.beta import BetaMessage, BetaMessageParam, BetaToolUnionParam
6
- from ..types import APIProvider
6
+ from ..types import LLMProvider
7
7
  from .logging import log_api_interaction
8
8
  import random
9
9
  import logging
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
+
13
14
  class APIConnectionError(Exception):
14
15
  """Error raised when there are connection issues with the API."""
16
+
15
17
  pass
16
18
 
19
+
17
20
  class BaseAnthropicClient:
18
21
  """Base class for Anthropic API clients."""
19
-
22
+
20
23
  MAX_RETRIES = 10
21
24
  INITIAL_RETRY_DELAY = 1.0
22
25
  MAX_RETRY_DELAY = 60.0
23
26
  JITTER_FACTOR = 0.1
24
-
27
+
25
28
  async def create_message(
26
29
  self,
27
30
  *,
@@ -36,79 +39,67 @@ class BaseAnthropicClient:
36
39
 
37
40
  async def _make_api_call_with_retries(self, api_call):
38
41
  """Make an API call with exponential backoff retry logic.
39
-
42
+
40
43
  Args:
41
44
  api_call: Async function that makes the actual API call
42
-
45
+
43
46
  Returns:
44
47
  API response
45
-
48
+
46
49
  Raises:
47
50
  APIConnectionError: If all retries fail
48
51
  """
49
52
  retry_count = 0
50
53
  last_error = None
51
-
54
+
52
55
  while retry_count < self.MAX_RETRIES:
53
56
  try:
54
57
  return await api_call()
55
58
  except Exception as e:
56
59
  last_error = e
57
60
  retry_count += 1
58
-
61
+
59
62
  if retry_count == self.MAX_RETRIES:
60
63
  break
61
-
64
+
62
65
  # Calculate delay with exponential backoff and jitter
63
66
  delay = min(
64
- self.INITIAL_RETRY_DELAY * (2 ** (retry_count - 1)),
65
- self.MAX_RETRY_DELAY
67
+ self.INITIAL_RETRY_DELAY * (2 ** (retry_count - 1)), self.MAX_RETRY_DELAY
66
68
  )
67
69
  # Add jitter to avoid thundering herd
68
70
  jitter = delay * self.JITTER_FACTOR * (2 * random.random() - 1)
69
71
  final_delay = delay + jitter
70
-
72
+
71
73
  logger.info(
72
74
  f"Retrying request (attempt {retry_count}/{self.MAX_RETRIES}) "
73
75
  f"in {final_delay:.2f} seconds after error: {str(e)}"
74
76
  )
75
77
  await asyncio.sleep(final_delay)
76
-
78
+
77
79
  raise APIConnectionError(
78
- f"Failed after {self.MAX_RETRIES} retries. "
79
- f"Last error: {str(last_error)}"
80
+ f"Failed after {self.MAX_RETRIES} retries. " f"Last error: {str(last_error)}"
80
81
  )
81
82
 
83
+
82
84
  class AnthropicDirectClient(BaseAnthropicClient):
83
85
  """Direct Anthropic API client implementation."""
84
-
86
+
85
87
  def __init__(self, api_key: str, model: str):
86
88
  self.model = model
87
- self.client = Anthropic(
88
- api_key=api_key,
89
- http_client=self._create_http_client()
90
- )
91
-
89
+ self.client = Anthropic(api_key=api_key, http_client=self._create_http_client())
90
+
92
91
  def _create_http_client(self) -> httpx.Client:
93
92
  """Create an HTTP client with appropriate settings."""
94
93
  return httpx.Client(
95
94
  verify=True,
96
- timeout=httpx.Timeout(
97
- connect=30.0,
98
- read=300.0,
99
- write=30.0,
100
- pool=30.0
101
- ),
95
+ timeout=httpx.Timeout(connect=30.0, read=300.0, write=30.0, pool=30.0),
102
96
  transport=httpx.HTTPTransport(
103
97
  retries=3,
104
98
  verify=True,
105
- limits=httpx.Limits(
106
- max_keepalive_connections=5,
107
- max_connections=10
108
- )
109
- )
99
+ limits=httpx.Limits(max_keepalive_connections=5, max_connections=10),
100
+ ),
110
101
  )
111
-
102
+
112
103
  async def create_message(
113
104
  self,
114
105
  *,
@@ -119,6 +110,7 @@ class AnthropicDirectClient(BaseAnthropicClient):
119
110
  betas: list[str],
120
111
  ) -> BetaMessage:
121
112
  """Create a message using the direct Anthropic API with retry logic."""
113
+
122
114
  async def api_call():
123
115
  response = self.client.beta.messages.with_raw_response.create(
124
116
  max_tokens=max_tokens,
@@ -130,20 +122,21 @@ class AnthropicDirectClient(BaseAnthropicClient):
130
122
  )
131
123
  log_api_interaction(response.http_response.request, response.http_response, None)
132
124
  return response.parse()
133
-
125
+
134
126
  try:
135
127
  return await self._make_api_call_with_retries(api_call)
136
128
  except Exception as e:
137
129
  log_api_interaction(None, None, e)
138
130
  raise
139
131
 
132
+
140
133
  class AnthropicVertexClient(BaseAnthropicClient):
141
134
  """Google Cloud Vertex AI implementation of Anthropic client."""
142
-
135
+
143
136
  def __init__(self, model: str):
144
137
  self.model = model
145
138
  self.client = AnthropicVertex()
146
-
139
+
147
140
  async def create_message(
148
141
  self,
149
142
  *,
@@ -154,6 +147,7 @@ class AnthropicVertexClient(BaseAnthropicClient):
154
147
  betas: list[str],
155
148
  ) -> BetaMessage:
156
149
  """Create a message using Vertex AI with retry logic."""
150
+
157
151
  async def api_call():
158
152
  response = self.client.beta.messages.with_raw_response.create(
159
153
  max_tokens=max_tokens,
@@ -165,20 +159,21 @@ class AnthropicVertexClient(BaseAnthropicClient):
165
159
  )
166
160
  log_api_interaction(response.http_response.request, response.http_response, None)
167
161
  return response.parse()
168
-
162
+
169
163
  try:
170
164
  return await self._make_api_call_with_retries(api_call)
171
165
  except Exception as e:
172
166
  log_api_interaction(None, None, e)
173
167
  raise
174
168
 
169
+
175
170
  class AnthropicBedrockClient(BaseAnthropicClient):
176
171
  """AWS Bedrock implementation of Anthropic client."""
177
-
172
+
178
173
  def __init__(self, model: str):
179
174
  self.model = model
180
175
  self.client = AnthropicBedrock()
181
-
176
+
182
177
  async def create_message(
183
178
  self,
184
179
  *,
@@ -189,6 +184,7 @@ class AnthropicBedrockClient(BaseAnthropicClient):
189
184
  betas: list[str],
190
185
  ) -> BetaMessage:
191
186
  """Create a message using AWS Bedrock with retry logic."""
187
+
192
188
  async def api_call():
193
189
  response = self.client.beta.messages.with_raw_response.create(
194
190
  max_tokens=max_tokens,
@@ -200,23 +196,24 @@ class AnthropicBedrockClient(BaseAnthropicClient):
200
196
  )
201
197
  log_api_interaction(response.http_response.request, response.http_response, None)
202
198
  return response.parse()
203
-
199
+
204
200
  try:
205
201
  return await self._make_api_call_with_retries(api_call)
206
202
  except Exception as e:
207
203
  log_api_interaction(None, None, e)
208
204
  raise
209
205
 
206
+
210
207
  class AnthropicClientFactory:
211
208
  """Factory for creating appropriate Anthropic client implementations."""
212
-
209
+
213
210
  @staticmethod
214
- def create_client(provider: APIProvider, api_key: str, model: str) -> BaseAnthropicClient:
211
+ def create_client(provider: LLMProvider, api_key: str, model: str) -> BaseAnthropicClient:
215
212
  """Create an appropriate client based on the provider."""
216
- if provider == APIProvider.ANTHROPIC:
213
+ if provider == LLMProvider.ANTHROPIC:
217
214
  return AnthropicDirectClient(api_key, model)
218
- elif provider == APIProvider.VERTEX:
215
+ elif provider == LLMProvider.VERTEX:
219
216
  return AnthropicVertexClient(model)
220
- elif provider == APIProvider.BEDROCK:
217
+ elif provider == LLMProvider.BEDROCK:
221
218
  return AnthropicBedrockClient(model)
222
- raise ValueError(f"Unsupported provider: {provider}")
219
+ raise ValueError(f"Unsupported provider: {provider}")
@@ -32,7 +32,7 @@ from .tools.manager import ToolManager
32
32
  from .messages.manager import MessageManager
33
33
  from .callbacks.manager import CallbackManager
34
34
  from .prompts import SYSTEM_PROMPT
35
- from .types import APIProvider
35
+ from .types import LLMProvider
36
36
  from .tools import ToolResult
37
37
 
38
38
  # Constants
@@ -86,7 +86,7 @@ class AnthropicLoop(BaseLoop):
86
86
  self.model = "claude-3-7-sonnet-20250219"
87
87
 
88
88
  # Anthropic-specific attributes
89
- self.provider = APIProvider.ANTHROPIC
89
+ self.provider = LLMProvider.ANTHROPIC
90
90
  self.client = None
91
91
  self.retry_count = 0
92
92
  self.tool_manager = None