cua-agent 0.1.17__tar.gz → 0.1.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (77) hide show
  1. cua_agent-0.1.18/PKG-INFO +165 -0
  2. cua_agent-0.1.18/README.md +116 -0
  3. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/__init__.py +2 -2
  4. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/__init__.py +1 -1
  5. cua_agent-0.1.17/agent/core/computer_agent.py → cua_agent-0.1.18/agent/core/agent.py +15 -53
  6. cua_agent-0.1.17/agent/core/loop.py → cua_agent-0.1.18/agent/core/base.py +12 -25
  7. cua_agent-0.1.18/agent/core/factory.py +104 -0
  8. cua_agent-0.1.18/agent/core/provider_config.py +15 -0
  9. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/types.py +10 -0
  10. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/loop.py +1 -1
  11. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/response_handler.py +1 -4
  12. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/utils.py +1 -3
  13. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/loop.py +1 -1
  14. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/types.py +2 -0
  15. cua_agent-0.1.18/agent/providers/openai/__init__.py +6 -0
  16. cua_agent-0.1.18/agent/providers/openai/api_handler.py +453 -0
  17. cua_agent-0.1.18/agent/providers/openai/loop.py +440 -0
  18. cua_agent-0.1.18/agent/providers/openai/response_handler.py +205 -0
  19. cua_agent-0.1.18/agent/providers/openai/tools/__init__.py +15 -0
  20. cua_agent-0.1.18/agent/providers/openai/tools/base.py +79 -0
  21. cua_agent-0.1.18/agent/providers/openai/tools/computer.py +319 -0
  22. cua_agent-0.1.18/agent/providers/openai/tools/manager.py +106 -0
  23. cua_agent-0.1.18/agent/providers/openai/types.py +36 -0
  24. cua_agent-0.1.18/agent/providers/openai/utils.py +98 -0
  25. {cua_agent-0.1.17 → cua_agent-0.1.18}/pyproject.toml +7 -3
  26. cua_agent-0.1.17/PKG-INFO +0 -90
  27. cua_agent-0.1.17/README.md +0 -44
  28. cua_agent-0.1.17/agent/README.md +0 -63
  29. cua_agent-0.1.17/agent/providers/anthropic/messages/manager.py +0 -112
  30. cua_agent-0.1.17/tests/test_agent.py +0 -91
  31. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/README.md +0 -0
  32. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/callbacks.py +0 -0
  33. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/experiment.py +0 -0
  34. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/messages.py +0 -0
  35. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/telemetry.py +0 -0
  36. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/tools/__init__.py +0 -0
  37. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/tools/base.py +0 -0
  38. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/tools/bash.py +0 -0
  39. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/tools/collection.py +0 -0
  40. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/tools/computer.py +0 -0
  41. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/tools/edit.py +0 -0
  42. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/tools/manager.py +0 -0
  43. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/tools.py +0 -0
  44. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/core/visualization.py +0 -0
  45. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/__init__.py +0 -0
  46. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/__init__.py +0 -0
  47. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/api/client.py +0 -0
  48. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/api/logging.py +0 -0
  49. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/api_handler.py +0 -0
  50. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/callbacks/__init__.py +0 -0
  51. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/callbacks/manager.py +0 -0
  52. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/prompts.py +0 -0
  53. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/tools/__init__.py +0 -0
  54. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/tools/base.py +0 -0
  55. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/tools/bash.py +0 -0
  56. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/tools/collection.py +0 -0
  57. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/tools/computer.py +0 -0
  58. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/tools/edit.py +0 -0
  59. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/tools/manager.py +0 -0
  60. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/tools/run.py +0 -0
  61. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/anthropic/types.py +0 -0
  62. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/__init__.py +0 -0
  63. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/api_handler.py +0 -0
  64. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/clients/anthropic.py +0 -0
  65. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/clients/base.py +0 -0
  66. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/clients/openai.py +0 -0
  67. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/clients/utils.py +0 -0
  68. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/image_utils.py +0 -0
  69. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/parser.py +0 -0
  70. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/prompts.py +0 -0
  71. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/tools/__init__.py +0 -0
  72. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/tools/base.py +0 -0
  73. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/tools/bash.py +0 -0
  74. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/tools/computer.py +0 -0
  75. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/tools/manager.py +0 -0
  76. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/providers/omni/utils.py +0 -0
  77. {cua_agent-0.1.17 → cua_agent-0.1.18}/agent/telemetry.py +0 -0
@@ -0,0 +1,165 @@
1
+ Metadata-Version: 2.1
2
+ Name: cua-agent
3
+ Version: 0.1.18
4
+ Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
+ Author-Email: TryCua <gh@trycua.com>
6
+ Requires-Python: <3.13,>=3.10
7
+ Requires-Dist: httpx<0.29.0,>=0.27.0
8
+ Requires-Dist: aiohttp<4.0.0,>=3.9.3
9
+ Requires-Dist: asyncio
10
+ Requires-Dist: anyio<5.0.0,>=4.4.1
11
+ Requires-Dist: typing-extensions<5.0.0,>=4.12.2
12
+ Requires-Dist: pydantic<3.0.0,>=2.6.4
13
+ Requires-Dist: rich<14.0.0,>=13.7.1
14
+ Requires-Dist: python-dotenv<2.0.0,>=1.0.1
15
+ Requires-Dist: cua-computer<0.2.0,>=0.1.0
16
+ Requires-Dist: cua-core<0.2.0,>=0.1.0
17
+ Requires-Dist: certifi>=2024.2.2
18
+ Provides-Extra: anthropic
19
+ Requires-Dist: anthropic>=0.49.0; extra == "anthropic"
20
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "anthropic"
21
+ Provides-Extra: openai
22
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "openai"
23
+ Requires-Dist: httpx<0.29.0,>=0.27.0; extra == "openai"
24
+ Provides-Extra: som
25
+ Requires-Dist: torch>=2.2.1; extra == "som"
26
+ Requires-Dist: torchvision>=0.17.1; extra == "som"
27
+ Requires-Dist: ultralytics>=8.0.0; extra == "som"
28
+ Requires-Dist: transformers>=4.38.2; extra == "som"
29
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "som"
30
+ Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "som"
31
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "som"
32
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "som"
33
+ Requires-Dist: groq<0.5.0,>=0.4.0; extra == "som"
34
+ Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "som"
35
+ Requires-Dist: requests<3.0.0,>=2.31.0; extra == "som"
36
+ Provides-Extra: all
37
+ Requires-Dist: torch>=2.2.1; extra == "all"
38
+ Requires-Dist: torchvision>=0.17.1; extra == "all"
39
+ Requires-Dist: ultralytics>=8.0.0; extra == "all"
40
+ Requires-Dist: transformers>=4.38.2; extra == "all"
41
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "all"
42
+ Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "all"
43
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "all"
44
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "all"
45
+ Requires-Dist: groq<0.5.0,>=0.4.0; extra == "all"
46
+ Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "all"
47
+ Requires-Dist: requests<3.0.0,>=2.31.0; extra == "all"
48
+ Description-Content-Type: text/markdown
49
+
50
+ <div align="center">
51
+ <h1>
52
+ <div class="image-wrapper" style="display: inline-block;">
53
+ <picture>
54
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
55
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
56
+ <img alt="Shows my svg">
57
+ </picture>
58
+ </div>
59
+
60
+ [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
61
+ [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
62
+ [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
63
+ [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
64
+ </h1>
65
+ </div>
66
+
67
+ **cua-agent** is a general Computer-Use framework for running multi-app agentic workflows targeting macOS and Linux sandbox created with Cua, supporting local (Ollama) and cloud model providers (OpenAI, Anthropic, Groq, DeepSeek, Qwen).
68
+
69
+ ### Get started with Agent
70
+
71
+ <div align="center">
72
+ <img src="../../img/agent.png"/>
73
+ </div>
74
+
75
+ ## Install
76
+
77
+ ```bash
78
+ pip install "cua-agent[all]"
79
+
80
+ # or install specific loop providers
81
+ pip install "cua-agent[openai]" # OpenAI Cua Loop
82
+ pip install "cua-agent[anthropic]" # Anthropic Cua Loop
83
+ pip install "cua-agent[omni]" # Cua Loop based on OmniParser
84
+ ```
85
+
86
+ ## Run
87
+
88
+ ```bash
89
+ async with Computer() as macos_computer:
90
+ # Create agent with loop and provider
91
+ agent = ComputerAgent(
92
+ computer=macos_computer,
93
+ loop=AgentLoop.OPENAI,
94
+ model=LLM(provider=LLMProvider.OPENAI)
95
+ )
96
+
97
+ tasks = [
98
+ "Look for a repository named trycua/cua on GitHub.",
99
+ "Check the open issues, open the most recent one and read it.",
100
+ "Clone the repository in users/lume/projects if it doesn't exist yet.",
101
+ "Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
102
+ "From Cursor, open Composer if not already open.",
103
+ "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
104
+ ]
105
+
106
+ for i, task in enumerate(tasks):
107
+ print(f"\nExecuting task {i}/{len(tasks)}: {task}")
108
+ async for result in agent.run(task):
109
+ print(result)
110
+
111
+ print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
112
+ ```
113
+
114
+ Refer to these notebooks for step-by-step guides on how to use the Computer-Use Agent (CUA):
115
+
116
+ - [Agent Notebook](../../notebooks/agent_nb.ipynb) - Complete examples and workflows
117
+
118
+ ## Agent Loops
119
+
120
+ The `cua-agent` package provides three agent loops variations, based on different CUA models providers and techniques:
121
+
122
+ | Agent Loop | Supported Models | Description | Set-Of-Marks |
123
+ |:-----------|:-----------------|:------------|:-------------|
124
+ | `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
125
+ | `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
126
+ | `AgentLoop.OMNI` <br>(preview) | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `gpt-3.5-turbo` | Use OmniParser for element pixel-detection (SoM) and any VLMs | OmniParser |
127
+
128
+ ## AgentResponse
129
+ The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
130
+
131
+ ```python
132
+ async for result in agent.run(task):
133
+ print("Response ID: ", result.get("id"))
134
+
135
+ # Print detailed usage information
136
+ usage = result.get("usage")
137
+ if usage:
138
+ print("\nUsage Details:")
139
+ print(f" Input Tokens: {usage.get('input_tokens')}")
140
+ if "input_tokens_details" in usage:
141
+ print(f" Input Tokens Details: {usage.get('input_tokens_details')}")
142
+ print(f" Output Tokens: {usage.get('output_tokens')}")
143
+ if "output_tokens_details" in usage:
144
+ print(f" Output Tokens Details: {usage.get('output_tokens_details')}")
145
+ print(f" Total Tokens: {usage.get('total_tokens')}")
146
+
147
+ print("Response Text: ", result.get("text"))
148
+
149
+ # Print tools information
150
+ tools = result.get("tools")
151
+ if tools:
152
+ print("\nTools:")
153
+ print(tools)
154
+
155
+ # Print reasoning and tool call outputs
156
+ outputs = result.get("output", [])
157
+ for output in outputs:
158
+ output_type = output.get("type")
159
+ if output_type == "reasoning":
160
+ print("\nReasoning Output:")
161
+ print(output)
162
+ elif output_type == "computer_call":
163
+ print("\nTool Call Output:")
164
+ print(output)
165
+ ```
@@ -0,0 +1,116 @@
1
+ <div align="center">
2
+ <h1>
3
+ <div class="image-wrapper" style="display: inline-block;">
4
+ <picture>
5
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
6
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
7
+ <img alt="Shows my svg">
8
+ </picture>
9
+ </div>
10
+
11
+ [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
12
+ [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
13
+ [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
14
+ [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
15
+ </h1>
16
+ </div>
17
+
18
+ **cua-agent** is a general Computer-Use framework for running multi-app agentic workflows targeting macOS and Linux sandbox created with Cua, supporting local (Ollama) and cloud model providers (OpenAI, Anthropic, Groq, DeepSeek, Qwen).
19
+
20
+ ### Get started with Agent
21
+
22
+ <div align="center">
23
+ <img src="../../img/agent.png"/>
24
+ </div>
25
+
26
+ ## Install
27
+
28
+ ```bash
29
+ pip install "cua-agent[all]"
30
+
31
+ # or install specific loop providers
32
+ pip install "cua-agent[openai]" # OpenAI Cua Loop
33
+ pip install "cua-agent[anthropic]" # Anthropic Cua Loop
34
+ pip install "cua-agent[omni]" # Cua Loop based on OmniParser
35
+ ```
36
+
37
+ ## Run
38
+
39
+ ```bash
40
+ async with Computer() as macos_computer:
41
+ # Create agent with loop and provider
42
+ agent = ComputerAgent(
43
+ computer=macos_computer,
44
+ loop=AgentLoop.OPENAI,
45
+ model=LLM(provider=LLMProvider.OPENAI)
46
+ )
47
+
48
+ tasks = [
49
+ "Look for a repository named trycua/cua on GitHub.",
50
+ "Check the open issues, open the most recent one and read it.",
51
+ "Clone the repository in users/lume/projects if it doesn't exist yet.",
52
+ "Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
53
+ "From Cursor, open Composer if not already open.",
54
+ "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
55
+ ]
56
+
57
+ for i, task in enumerate(tasks):
58
+ print(f"\nExecuting task {i}/{len(tasks)}: {task}")
59
+ async for result in agent.run(task):
60
+ print(result)
61
+
62
+ print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
63
+ ```
64
+
65
+ Refer to these notebooks for step-by-step guides on how to use the Computer-Use Agent (CUA):
66
+
67
+ - [Agent Notebook](../../notebooks/agent_nb.ipynb) - Complete examples and workflows
68
+
69
+ ## Agent Loops
70
+
71
+ The `cua-agent` package provides three agent loops variations, based on different CUA models providers and techniques:
72
+
73
+ | Agent Loop | Supported Models | Description | Set-Of-Marks |
74
+ |:-----------|:-----------------|:------------|:-------------|
75
+ | `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
76
+ | `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
77
+ | `AgentLoop.OMNI` <br>(preview) | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `gpt-3.5-turbo` | Use OmniParser for element pixel-detection (SoM) and any VLMs | OmniParser |
78
+
79
+ ## AgentResponse
80
+ The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
81
+
82
+ ```python
83
+ async for result in agent.run(task):
84
+ print("Response ID: ", result.get("id"))
85
+
86
+ # Print detailed usage information
87
+ usage = result.get("usage")
88
+ if usage:
89
+ print("\nUsage Details:")
90
+ print(f" Input Tokens: {usage.get('input_tokens')}")
91
+ if "input_tokens_details" in usage:
92
+ print(f" Input Tokens Details: {usage.get('input_tokens_details')}")
93
+ print(f" Output Tokens: {usage.get('output_tokens')}")
94
+ if "output_tokens_details" in usage:
95
+ print(f" Output Tokens Details: {usage.get('output_tokens_details')}")
96
+ print(f" Total Tokens: {usage.get('total_tokens')}")
97
+
98
+ print("Response Text: ", result.get("text"))
99
+
100
+ # Print tools information
101
+ tools = result.get("tools")
102
+ if tools:
103
+ print("\nTools:")
104
+ print(tools)
105
+
106
+ # Print reasoning and tool call outputs
107
+ outputs = result.get("output", [])
108
+ for output in outputs:
109
+ output_type = output.get("type")
110
+ if output_type == "reasoning":
111
+ print("\nReasoning Output:")
112
+ print(output)
113
+ elif output_type == "computer_call":
114
+ print("\nTool Call Output:")
115
+ print(output)
116
+ ```
@@ -49,7 +49,7 @@ except Exception as e:
49
49
  logger.warning(f"Error initializing telemetry: {e}")
50
50
 
51
51
  from .providers.omni.types import LLMProvider, LLM
52
- from .core.loop import AgentLoop
53
- from .core.computer_agent import ComputerAgent
52
+ from .core.factory import AgentLoop
53
+ from .core.agent import ComputerAgent
54
54
 
55
55
  __all__ = ["AgentLoop", "LLMProvider", "LLM", "ComputerAgent"]
@@ -1,6 +1,6 @@
1
1
  """Core agent components."""
2
2
 
3
- from .loop import BaseLoop
3
+ from .factory import BaseLoop
4
4
  from .messages import (
5
5
  BaseMessageManager,
6
6
  ImageRetentionConfig,
@@ -3,32 +3,18 @@
3
3
  import asyncio
4
4
  import logging
5
5
  import os
6
- from typing import Any, AsyncGenerator, Dict, Optional, cast, List
6
+ from typing import AsyncGenerator, Optional
7
7
 
8
8
  from computer import Computer
9
- from ..providers.anthropic.loop import AnthropicLoop
10
- from ..providers.omni.loop import OmniLoop
11
- from ..providers.omni.parser import OmniParser
12
- from ..providers.omni.types import LLMProvider, LLM
9
+ from ..providers.omni.types import LLM
13
10
  from .. import AgentLoop
14
- from .messages import StandardMessageManager, ImageRetentionConfig
15
11
  from .types import AgentResponse
12
+ from .factory import LoopFactory
13
+ from .provider_config import DEFAULT_MODELS, ENV_VARS
16
14
 
17
15
  logging.basicConfig(level=logging.INFO)
18
16
  logger = logging.getLogger(__name__)
19
17
 
20
- # Default models for different providers
21
- DEFAULT_MODELS = {
22
- LLMProvider.OPENAI: "gpt-4o",
23
- LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
24
- }
25
-
26
- # Map providers to their environment variable names
27
- ENV_VARS = {
28
- LLMProvider.OPENAI: "OPENAI_API_KEY",
29
- LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
30
- }
31
-
32
18
 
33
19
  class ComputerAgent:
34
20
  """A computer agent that can perform automated tasks using natural language instructions."""
@@ -98,35 +84,27 @@ class ComputerAgent:
98
84
  f"No model specified for provider {self.provider} and no default found"
99
85
  )
100
86
 
101
- # Ensure computer is properly cast for typing purposes
102
- computer_instance = self.computer
103
-
104
87
  # Get API key from environment if not provided
105
88
  actual_api_key = api_key or os.environ.get(ENV_VARS[self.provider], "")
106
89
  if not actual_api_key:
107
90
  raise ValueError(f"No API key provided for {self.provider}")
108
91
 
109
- # Initialize the appropriate loop based on the loop parameter
110
- if loop == AgentLoop.ANTHROPIC:
111
- self._loop = AnthropicLoop(
112
- api_key=actual_api_key,
113
- model=actual_model_name,
114
- computer=computer_instance,
115
- save_trajectory=save_trajectory,
116
- base_dir=trajectory_dir,
117
- only_n_most_recent_images=only_n_most_recent_images,
118
- )
119
- else:
120
- self._loop = OmniLoop(
92
+ # Create the appropriate loop using the factory
93
+ try:
94
+ # Let the factory create the appropriate loop with needed components
95
+ self._loop = LoopFactory.create_loop(
96
+ loop_type=loop,
121
97
  provider=self.provider,
98
+ computer=self.computer,
99
+ model_name=actual_model_name,
122
100
  api_key=actual_api_key,
123
- model=actual_model_name,
124
- computer=computer_instance,
125
101
  save_trajectory=save_trajectory,
126
- base_dir=trajectory_dir,
102
+ trajectory_dir=trajectory_dir,
127
103
  only_n_most_recent_images=only_n_most_recent_images,
128
- parser=OmniParser(),
129
104
  )
105
+ except ValueError as e:
106
+ logger.error(f"Failed to create loop: {str(e)}")
107
+ raise
130
108
 
131
109
  # Initialize the message manager from the loop
132
110
  self.message_manager = self._loop.message_manager
@@ -152,21 +130,6 @@ class ComputerAgent:
152
130
  else:
153
131
  logger.info("Computer already initialized, skipping initialization")
154
132
 
155
- # Take a test screenshot to verify the computer is working
156
- logger.info("Testing computer with a screenshot...")
157
- try:
158
- test_screenshot = await self.computer.interface.screenshot()
159
- # Determine the screenshot size based on its type
160
- if isinstance(test_screenshot, (bytes, bytearray, memoryview)):
161
- size = len(test_screenshot)
162
- elif hasattr(test_screenshot, "base64_image"):
163
- size = len(test_screenshot.base64_image)
164
- else:
165
- size = "unknown"
166
- logger.info(f"Screenshot test successful, size: {size}")
167
- except Exception as e:
168
- logger.error(f"Screenshot test failed: {str(e)}")
169
- # Even though screenshot failed, we continue since some tests might not need it
170
133
  except Exception as e:
171
134
  logger.error(f"Error initializing computer in __aenter__: {str(e)}")
172
135
  raise
@@ -232,7 +195,6 @@ class ComputerAgent:
232
195
 
233
196
  # Execute the task and yield results
234
197
  async for result in self._loop.run(self.message_manager.messages):
235
- # Yield the result to the caller
236
198
  yield result
237
199
 
238
200
  except Exception as e:
@@ -1,35 +1,21 @@
1
- """Base agent loop implementation."""
1
+ """Base loop definitions."""
2
2
 
3
3
  import logging
4
4
  import asyncio
5
5
  from abc import ABC, abstractmethod
6
- from enum import Enum, auto
7
- from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple
8
- from datetime import datetime
6
+ from typing import Any, AsyncGenerator, Dict, List, Optional
9
7
 
10
8
  from computer import Computer
11
- from .experiment import ExperimentManager
12
9
  from .messages import StandardMessageManager, ImageRetentionConfig
13
10
  from .types import AgentResponse
11
+ from .experiment import ExperimentManager
14
12
 
15
13
  logger = logging.getLogger(__name__)
16
14
 
17
15
 
18
- class AgentLoop(Enum):
19
- """Enumeration of available loop types."""
20
-
21
- ANTHROPIC = auto() # Anthropic implementation
22
- OMNI = auto() # OmniLoop implementation
23
- # Add more loop types as needed
24
-
25
-
26
16
  class BaseLoop(ABC):
27
17
  """Base class for agent loops that handle message processing and tool execution."""
28
18
 
29
- ###########################################
30
- # INITIALIZATION AND CONFIGURATION
31
- ###########################################
32
-
33
19
  def __init__(
34
20
  self,
35
21
  computer: Computer,
@@ -68,6 +54,11 @@ class BaseLoop(ABC):
68
54
  self.only_n_most_recent_images = only_n_most_recent_images
69
55
  self._kwargs = kwargs
70
56
 
57
+ # Initialize message manager
58
+ self.message_manager = StandardMessageManager(
59
+ config=ImageRetentionConfig(num_images_to_keep=only_n_most_recent_images)
60
+ )
61
+
71
62
  # Initialize experiment manager
72
63
  if self.save_trajectory and self.base_dir:
73
64
  self.experiment_manager = ExperimentManager(
@@ -110,8 +101,7 @@ class BaseLoop(ABC):
110
101
  )
111
102
  raise RuntimeError(f"Failed to initialize: {str(e)}")
112
103
 
113
- ###########################################
114
-
104
+ ###########################################
115
105
  # ABSTRACT METHODS TO BE IMPLEMENTED BY SUBCLASSES
116
106
  ###########################################
117
107
 
@@ -125,17 +115,14 @@ class BaseLoop(ABC):
125
115
  raise NotImplementedError
126
116
 
127
117
  @abstractmethod
128
- async def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
118
+ def run(self, messages: List[Dict[str, Any]]) -> AsyncGenerator[AgentResponse, None]:
129
119
  """Run the agent loop with provided messages.
130
120
 
131
- This method handles the main agent loop including message processing,
132
- API calls, response handling, and action execution.
133
-
134
121
  Args:
135
122
  messages: List of message objects
136
123
 
137
- Yields:
138
- Agent response format
124
+ Returns:
125
+ An async generator that yields agent responses
139
126
  """
140
127
  raise NotImplementedError
141
128
 
@@ -0,0 +1,104 @@
1
+ """Base agent loop implementation."""
2
+
3
+ import logging
4
+ import importlib.util
5
+ from typing import Dict, Optional, Type, TYPE_CHECKING, Any, cast, Callable, Awaitable
6
+
7
+ from computer import Computer
8
+ from .types import AgentLoop
9
+ from .base import BaseLoop
10
+
11
+ # For type checking only
12
+ if TYPE_CHECKING:
13
+ from ..providers.omni.types import LLMProvider
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class LoopFactory:
19
+ """Factory class for creating agent loops."""
20
+
21
+ # Registry to store loop implementations
22
+ _loop_registry: Dict[AgentLoop, Type[BaseLoop]] = {}
23
+
24
+ @classmethod
25
+ def create_loop(
26
+ cls,
27
+ loop_type: AgentLoop,
28
+ api_key: str,
29
+ model_name: str,
30
+ computer: Computer,
31
+ provider: Any = None,
32
+ save_trajectory: bool = True,
33
+ trajectory_dir: str = "trajectories",
34
+ only_n_most_recent_images: Optional[int] = None,
35
+ acknowledge_safety_check_callback: Optional[Callable[[str], Awaitable[bool]]] = None,
36
+ ) -> BaseLoop:
37
+ """Create and return an appropriate loop instance based on type."""
38
+ if loop_type == AgentLoop.ANTHROPIC:
39
+ # Lazy import AnthropicLoop only when needed
40
+ try:
41
+ from ..providers.anthropic.loop import AnthropicLoop
42
+ except ImportError:
43
+ raise ImportError(
44
+ "The 'anthropic' provider is not installed. "
45
+ "Install it with 'pip install cua-agent[anthropic]'"
46
+ )
47
+
48
+ return AnthropicLoop(
49
+ api_key=api_key,
50
+ model=model_name,
51
+ computer=computer,
52
+ save_trajectory=save_trajectory,
53
+ base_dir=trajectory_dir,
54
+ only_n_most_recent_images=only_n_most_recent_images,
55
+ )
56
+ elif loop_type == AgentLoop.OPENAI:
57
+ # Lazy import OpenAILoop only when needed
58
+ try:
59
+ from ..providers.openai.loop import OpenAILoop
60
+ except ImportError:
61
+ raise ImportError(
62
+ "The 'openai' provider is not installed. "
63
+ "Install it with 'pip install cua-agent[openai]'"
64
+ )
65
+
66
+ return OpenAILoop(
67
+ api_key=api_key,
68
+ model=model_name,
69
+ computer=computer,
70
+ save_trajectory=save_trajectory,
71
+ base_dir=trajectory_dir,
72
+ only_n_most_recent_images=only_n_most_recent_images,
73
+ acknowledge_safety_check_callback=acknowledge_safety_check_callback,
74
+ )
75
+ elif loop_type == AgentLoop.OMNI:
76
+ # Lazy import OmniLoop and related classes only when needed
77
+ try:
78
+ from ..providers.omni.loop import OmniLoop
79
+ from ..providers.omni.parser import OmniParser
80
+ from ..providers.omni.types import LLMProvider
81
+ except ImportError:
82
+ raise ImportError(
83
+ "The 'omni' provider is not installed. "
84
+ "Install it with 'pip install cua-agent[all]'"
85
+ )
86
+
87
+ if provider is None:
88
+ raise ValueError("Provider is required for OMNI loop type")
89
+
90
+ # We know provider is the correct type at this point, so cast it
91
+ provider_instance = cast(LLMProvider, provider)
92
+
93
+ return OmniLoop(
94
+ provider=provider_instance,
95
+ api_key=api_key,
96
+ model=model_name,
97
+ computer=computer,
98
+ save_trajectory=save_trajectory,
99
+ base_dir=trajectory_dir,
100
+ only_n_most_recent_images=only_n_most_recent_images,
101
+ parser=OmniParser(),
102
+ )
103
+ else:
104
+ raise ValueError(f"Unsupported loop type: {loop_type}")
@@ -0,0 +1,15 @@
1
+ """Provider-specific configurations and constants."""
2
+
3
+ from ..providers.omni.types import LLMProvider
4
+
5
+ # Default models for different providers
6
+ DEFAULT_MODELS = {
7
+ LLMProvider.OPENAI: "gpt-4o",
8
+ LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
9
+ }
10
+
11
+ # Map providers to their environment variable names
12
+ ENV_VARS = {
13
+ LLMProvider.OPENAI: "OPENAI_API_KEY",
14
+ LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
15
+ }
@@ -1,6 +1,16 @@
1
1
  """Core type definitions."""
2
2
 
3
3
  from typing import Any, Dict, List, Optional, TypedDict, Union
4
+ from enum import Enum, auto
5
+
6
+
7
+ class AgentLoop(Enum):
8
+ """Enumeration of available loop types."""
9
+
10
+ ANTHROPIC = auto() # Anthropic implementation
11
+ OMNI = auto() # OmniLoop implementation
12
+ OPENAI = auto() # OpenAI implementation
13
+ # Add more loop types as needed
4
14
 
5
15
 
6
16
  class AgentResponse(TypedDict, total=False):
@@ -16,7 +16,7 @@ from datetime import datetime
16
16
  from computer import Computer
17
17
 
18
18
  # Base imports
19
- from ...core.loop import BaseLoop
19
+ from ...core.base import BaseLoop
20
20
  from ...core.messages import StandardMessageManager, ImageRetentionConfig
21
21
  from ...core.types import AgentResponse
22
22
 
@@ -1,14 +1,11 @@
1
1
  """Response and tool handling for Anthropic provider."""
2
2
 
3
3
  import logging
4
- from typing import Any, Dict, List, Optional, Tuple, cast
4
+ from typing import Any, Dict, List, Tuple, cast
5
5
 
6
6
  from anthropic.types.beta import (
7
7
  BetaMessage,
8
- BetaMessageParam,
9
8
  BetaTextBlock,
10
- BetaTextBlockParam,
11
- BetaToolUseBlockParam,
12
9
  BetaContentBlockParam,
13
10
  )
14
11
 
@@ -1,14 +1,12 @@
1
1
  """Utility functions for Anthropic message handling."""
2
2
 
3
- import time
4
3
  import logging
5
4
  import re
6
5
  from typing import Any, Dict, List, Optional, Tuple, cast
7
- from anthropic.types.beta import BetaMessage, BetaMessageParam, BetaTextBlock
6
+ from anthropic.types.beta import BetaMessage
8
7
  from ..omni.parser import ParseResult
9
8
  from ...core.types import AgentResponse
10
9
  from datetime import datetime
11
- import json
12
10
 
13
11
  # Configure module logger
14
12
  logger = logging.getLogger(__name__)