cua-agent 0.1.6__tar.gz → 0.1.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (96) hide show
  1. cua_agent-0.1.18/PKG-INFO +165 -0
  2. cua_agent-0.1.18/README.md +116 -0
  3. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/__init__.py +3 -2
  4. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/__init__.py +1 -6
  5. cua_agent-0.1.6/agent/core/computer_agent.py → cua_agent-0.1.18/agent/core/agent.py +31 -76
  6. cua_agent-0.1.6/agent/core/loop.py → cua_agent-0.1.18/agent/core/base.py +68 -127
  7. cua_agent-0.1.18/agent/core/factory.py +104 -0
  8. cua_agent-0.1.18/agent/core/messages.py +399 -0
  9. cua_agent-0.1.18/agent/core/provider_config.py +15 -0
  10. cua_agent-0.1.18/agent/core/types.py +45 -0
  11. cua_agent-0.1.18/agent/core/visualization.py +197 -0
  12. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/api/client.py +142 -1
  13. cua_agent-0.1.18/agent/providers/anthropic/api_handler.py +140 -0
  14. cua_agent-0.1.18/agent/providers/anthropic/callbacks/__init__.py +5 -0
  15. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/loop.py +207 -221
  16. cua_agent-0.1.18/agent/providers/anthropic/response_handler.py +226 -0
  17. cua_agent-0.1.18/agent/providers/anthropic/tools/bash.py +66 -0
  18. cua_agent-0.1.18/agent/providers/anthropic/utils.py +368 -0
  19. cua_agent-0.1.18/agent/providers/omni/__init__.py +8 -0
  20. cua_agent-0.1.18/agent/providers/omni/api_handler.py +42 -0
  21. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/omni/clients/anthropic.py +4 -0
  22. cua_agent-0.1.18/agent/providers/omni/image_utils.py +34 -0
  23. cua_agent-0.1.18/agent/providers/omni/loop.py +855 -0
  24. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/omni/parser.py +58 -4
  25. cua_agent-0.1.18/agent/providers/omni/tools/__init__.py +30 -0
  26. cua_agent-0.1.18/agent/providers/omni/tools/base.py +29 -0
  27. cua_agent-0.1.18/agent/providers/omni/tools/bash.py +74 -0
  28. cua_agent-0.1.18/agent/providers/omni/tools/computer.py +179 -0
  29. cua_agent-0.1.18/agent/providers/omni/tools/manager.py +61 -0
  30. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/omni/types.py +1 -3
  31. cua_agent-0.1.18/agent/providers/omni/utils.py +236 -0
  32. cua_agent-0.1.18/agent/providers/openai/__init__.py +6 -0
  33. cua_agent-0.1.18/agent/providers/openai/api_handler.py +453 -0
  34. cua_agent-0.1.18/agent/providers/openai/loop.py +440 -0
  35. cua_agent-0.1.18/agent/providers/openai/response_handler.py +205 -0
  36. cua_agent-0.1.18/agent/providers/openai/tools/__init__.py +15 -0
  37. cua_agent-0.1.18/agent/providers/openai/tools/base.py +79 -0
  38. cua_agent-0.1.18/agent/providers/openai/tools/computer.py +319 -0
  39. cua_agent-0.1.18/agent/providers/openai/tools/manager.py +106 -0
  40. cua_agent-0.1.18/agent/providers/openai/types.py +36 -0
  41. cua_agent-0.1.18/agent/providers/openai/utils.py +98 -0
  42. {cua_agent-0.1.6 → cua_agent-0.1.18}/pyproject.toml +7 -3
  43. cua_agent-0.1.6/PKG-INFO +0 -120
  44. cua_agent-0.1.6/README.md +0 -74
  45. cua_agent-0.1.6/agent/README.md +0 -63
  46. cua_agent-0.1.6/agent/core/messages.py +0 -245
  47. cua_agent-0.1.6/agent/providers/anthropic/messages/manager.py +0 -112
  48. cua_agent-0.1.6/agent/providers/anthropic/tools/bash.py +0 -163
  49. cua_agent-0.1.6/agent/providers/omni/__init__.py +0 -27
  50. cua_agent-0.1.6/agent/providers/omni/callbacks.py +0 -78
  51. cua_agent-0.1.6/agent/providers/omni/clients/groq.py +0 -101
  52. cua_agent-0.1.6/agent/providers/omni/experiment.py +0 -276
  53. cua_agent-0.1.6/agent/providers/omni/image_utils.py +0 -106
  54. cua_agent-0.1.6/agent/providers/omni/loop.py +0 -971
  55. cua_agent-0.1.6/agent/providers/omni/messages.py +0 -171
  56. cua_agent-0.1.6/agent/providers/omni/tool_manager.py +0 -91
  57. cua_agent-0.1.6/agent/providers/omni/tools/__init__.py +0 -12
  58. cua_agent-0.1.6/agent/providers/omni/tools/bash.py +0 -69
  59. cua_agent-0.1.6/agent/providers/omni/tools/computer.py +0 -217
  60. cua_agent-0.1.6/agent/providers/omni/tools/manager.py +0 -81
  61. cua_agent-0.1.6/agent/providers/omni/utils.py +0 -157
  62. cua_agent-0.1.6/agent/providers/omni/visualization.py +0 -130
  63. cua_agent-0.1.6/agent/types/__init__.py +0 -23
  64. cua_agent-0.1.6/agent/types/base.py +0 -41
  65. cua_agent-0.1.6/agent/types/messages.py +0 -36
  66. cua_agent-0.1.6/tests/test_agent.py +0 -91
  67. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/README.md +0 -0
  68. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/callbacks.py +0 -0
  69. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/experiment.py +0 -0
  70. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/telemetry.py +0 -0
  71. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/tools/__init__.py +0 -0
  72. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/tools/base.py +0 -0
  73. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/tools/bash.py +0 -0
  74. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/tools/collection.py +0 -0
  75. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/tools/computer.py +0 -0
  76. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/tools/edit.py +0 -0
  77. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/core/tools/manager.py +0 -0
  78. {cua_agent-0.1.6/agent/types → cua_agent-0.1.18/agent/core}/tools.py +0 -0
  79. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/__init__.py +0 -0
  80. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/__init__.py +0 -0
  81. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/api/logging.py +0 -0
  82. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/callbacks/manager.py +0 -0
  83. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/prompts.py +0 -0
  84. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/tools/__init__.py +0 -0
  85. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/tools/base.py +0 -0
  86. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/tools/collection.py +0 -0
  87. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/tools/computer.py +0 -0
  88. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/tools/edit.py +0 -0
  89. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/tools/manager.py +0 -0
  90. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/tools/run.py +0 -0
  91. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/anthropic/types.py +0 -0
  92. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/omni/clients/base.py +0 -0
  93. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/omni/clients/openai.py +0 -0
  94. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/omni/clients/utils.py +0 -0
  95. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/providers/omni/prompts.py +0 -0
  96. {cua_agent-0.1.6 → cua_agent-0.1.18}/agent/telemetry.py +0 -0
@@ -0,0 +1,165 @@
1
+ Metadata-Version: 2.1
2
+ Name: cua-agent
3
+ Version: 0.1.18
4
+ Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
+ Author-Email: TryCua <gh@trycua.com>
6
+ Requires-Python: <3.13,>=3.10
7
+ Requires-Dist: httpx<0.29.0,>=0.27.0
8
+ Requires-Dist: aiohttp<4.0.0,>=3.9.3
9
+ Requires-Dist: asyncio
10
+ Requires-Dist: anyio<5.0.0,>=4.4.1
11
+ Requires-Dist: typing-extensions<5.0.0,>=4.12.2
12
+ Requires-Dist: pydantic<3.0.0,>=2.6.4
13
+ Requires-Dist: rich<14.0.0,>=13.7.1
14
+ Requires-Dist: python-dotenv<2.0.0,>=1.0.1
15
+ Requires-Dist: cua-computer<0.2.0,>=0.1.0
16
+ Requires-Dist: cua-core<0.2.0,>=0.1.0
17
+ Requires-Dist: certifi>=2024.2.2
18
+ Provides-Extra: anthropic
19
+ Requires-Dist: anthropic>=0.49.0; extra == "anthropic"
20
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "anthropic"
21
+ Provides-Extra: openai
22
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "openai"
23
+ Requires-Dist: httpx<0.29.0,>=0.27.0; extra == "openai"
24
+ Provides-Extra: som
25
+ Requires-Dist: torch>=2.2.1; extra == "som"
26
+ Requires-Dist: torchvision>=0.17.1; extra == "som"
27
+ Requires-Dist: ultralytics>=8.0.0; extra == "som"
28
+ Requires-Dist: transformers>=4.38.2; extra == "som"
29
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "som"
30
+ Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "som"
31
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "som"
32
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "som"
33
+ Requires-Dist: groq<0.5.0,>=0.4.0; extra == "som"
34
+ Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "som"
35
+ Requires-Dist: requests<3.0.0,>=2.31.0; extra == "som"
36
+ Provides-Extra: all
37
+ Requires-Dist: torch>=2.2.1; extra == "all"
38
+ Requires-Dist: torchvision>=0.17.1; extra == "all"
39
+ Requires-Dist: ultralytics>=8.0.0; extra == "all"
40
+ Requires-Dist: transformers>=4.38.2; extra == "all"
41
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "all"
42
+ Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "all"
43
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "all"
44
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "all"
45
+ Requires-Dist: groq<0.5.0,>=0.4.0; extra == "all"
46
+ Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "all"
47
+ Requires-Dist: requests<3.0.0,>=2.31.0; extra == "all"
48
+ Description-Content-Type: text/markdown
49
+
50
+ <div align="center">
51
+ <h1>
52
+ <div class="image-wrapper" style="display: inline-block;">
53
+ <picture>
54
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
55
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
56
+ <img alt="Shows my svg">
57
+ </picture>
58
+ </div>
59
+
60
+ [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
61
+ [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
62
+ [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
63
+ [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
64
+ </h1>
65
+ </div>
66
+
67
+ **cua-agent** is a general Computer-Use framework for running multi-app agentic workflows targeting macOS and Linux sandbox created with Cua, supporting local (Ollama) and cloud model providers (OpenAI, Anthropic, Groq, DeepSeek, Qwen).
68
+
69
+ ### Get started with Agent
70
+
71
+ <div align="center">
72
+ <img src="../../img/agent.png"/>
73
+ </div>
74
+
75
+ ## Install
76
+
77
+ ```bash
78
+ pip install "cua-agent[all]"
79
+
80
+ # or install specific loop providers
81
+ pip install "cua-agent[openai]" # OpenAI Cua Loop
82
+ pip install "cua-agent[anthropic]" # Anthropic Cua Loop
83
+ pip install "cua-agent[omni]" # Cua Loop based on OmniParser
84
+ ```
85
+
86
+ ## Run
87
+
88
+ ```bash
89
+ async with Computer() as macos_computer:
90
+ # Create agent with loop and provider
91
+ agent = ComputerAgent(
92
+ computer=macos_computer,
93
+ loop=AgentLoop.OPENAI,
94
+ model=LLM(provider=LLMProvider.OPENAI)
95
+ )
96
+
97
+ tasks = [
98
+ "Look for a repository named trycua/cua on GitHub.",
99
+ "Check the open issues, open the most recent one and read it.",
100
+ "Clone the repository in users/lume/projects if it doesn't exist yet.",
101
+ "Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
102
+ "From Cursor, open Composer if not already open.",
103
+ "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
104
+ ]
105
+
106
+ for i, task in enumerate(tasks):
107
+ print(f"\nExecuting task {i}/{len(tasks)}: {task}")
108
+ async for result in agent.run(task):
109
+ print(result)
110
+
111
+ print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
112
+ ```
113
+
114
+ Refer to these notebooks for step-by-step guides on how to use the Computer-Use Agent (CUA):
115
+
116
+ - [Agent Notebook](../../notebooks/agent_nb.ipynb) - Complete examples and workflows
117
+
118
+ ## Agent Loops
119
+
120
+ The `cua-agent` package provides three agent loops variations, based on different CUA models providers and techniques:
121
+
122
+ | Agent Loop | Supported Models | Description | Set-Of-Marks |
123
+ |:-----------|:-----------------|:------------|:-------------|
124
+ | `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
125
+ | `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
126
+ | `AgentLoop.OMNI` <br>(preview) | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `gpt-3.5-turbo` | Use OmniParser for element pixel-detection (SoM) and any VLMs | OmniParser |
127
+
128
+ ## AgentResponse
129
+ The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
130
+
131
+ ```python
132
+ async for result in agent.run(task):
133
+ print("Response ID: ", result.get("id"))
134
+
135
+ # Print detailed usage information
136
+ usage = result.get("usage")
137
+ if usage:
138
+ print("\nUsage Details:")
139
+ print(f" Input Tokens: {usage.get('input_tokens')}")
140
+ if "input_tokens_details" in usage:
141
+ print(f" Input Tokens Details: {usage.get('input_tokens_details')}")
142
+ print(f" Output Tokens: {usage.get('output_tokens')}")
143
+ if "output_tokens_details" in usage:
144
+ print(f" Output Tokens Details: {usage.get('output_tokens_details')}")
145
+ print(f" Total Tokens: {usage.get('total_tokens')}")
146
+
147
+ print("Response Text: ", result.get("text"))
148
+
149
+ # Print tools information
150
+ tools = result.get("tools")
151
+ if tools:
152
+ print("\nTools:")
153
+ print(tools)
154
+
155
+ # Print reasoning and tool call outputs
156
+ outputs = result.get("output", [])
157
+ for output in outputs:
158
+ output_type = output.get("type")
159
+ if output_type == "reasoning":
160
+ print("\nReasoning Output:")
161
+ print(output)
162
+ elif output_type == "computer_call":
163
+ print("\nTool Call Output:")
164
+ print(output)
165
+ ```
@@ -0,0 +1,116 @@
1
+ <div align="center">
2
+ <h1>
3
+ <div class="image-wrapper" style="display: inline-block;">
4
+ <picture>
5
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
6
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
7
+ <img alt="Shows my svg">
8
+ </picture>
9
+ </div>
10
+
11
+ [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
12
+ [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
13
+ [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
14
+ [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
15
+ </h1>
16
+ </div>
17
+
18
+ **cua-agent** is a general Computer-Use framework for running multi-app agentic workflows targeting macOS and Linux sandbox created with Cua, supporting local (Ollama) and cloud model providers (OpenAI, Anthropic, Groq, DeepSeek, Qwen).
19
+
20
+ ### Get started with Agent
21
+
22
+ <div align="center">
23
+ <img src="../../img/agent.png"/>
24
+ </div>
25
+
26
+ ## Install
27
+
28
+ ```bash
29
+ pip install "cua-agent[all]"
30
+
31
+ # or install specific loop providers
32
+ pip install "cua-agent[openai]" # OpenAI Cua Loop
33
+ pip install "cua-agent[anthropic]" # Anthropic Cua Loop
34
+ pip install "cua-agent[omni]" # Cua Loop based on OmniParser
35
+ ```
36
+
37
+ ## Run
38
+
39
+ ```bash
40
+ async with Computer() as macos_computer:
41
+ # Create agent with loop and provider
42
+ agent = ComputerAgent(
43
+ computer=macos_computer,
44
+ loop=AgentLoop.OPENAI,
45
+ model=LLM(provider=LLMProvider.OPENAI)
46
+ )
47
+
48
+ tasks = [
49
+ "Look for a repository named trycua/cua on GitHub.",
50
+ "Check the open issues, open the most recent one and read it.",
51
+ "Clone the repository in users/lume/projects if it doesn't exist yet.",
52
+ "Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
53
+ "From Cursor, open Composer if not already open.",
54
+ "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
55
+ ]
56
+
57
+ for i, task in enumerate(tasks):
58
+ print(f"\nExecuting task {i}/{len(tasks)}: {task}")
59
+ async for result in agent.run(task):
60
+ print(result)
61
+
62
+ print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
63
+ ```
64
+
65
+ Refer to these notebooks for step-by-step guides on how to use the Computer-Use Agent (CUA):
66
+
67
+ - [Agent Notebook](../../notebooks/agent_nb.ipynb) - Complete examples and workflows
68
+
69
+ ## Agent Loops
70
+
71
+ The `cua-agent` package provides three agent loops variations, based on different CUA models providers and techniques:
72
+
73
+ | Agent Loop | Supported Models | Description | Set-Of-Marks |
74
+ |:-----------|:-----------------|:------------|:-------------|
75
+ | `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
76
+ | `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
77
+ | `AgentLoop.OMNI` <br>(preview) | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `gpt-3.5-turbo` | Use OmniParser for element pixel-detection (SoM) and any VLMs | OmniParser |
78
+
79
+ ## AgentResponse
80
+ The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
81
+
82
+ ```python
83
+ async for result in agent.run(task):
84
+ print("Response ID: ", result.get("id"))
85
+
86
+ # Print detailed usage information
87
+ usage = result.get("usage")
88
+ if usage:
89
+ print("\nUsage Details:")
90
+ print(f" Input Tokens: {usage.get('input_tokens')}")
91
+ if "input_tokens_details" in usage:
92
+ print(f" Input Tokens Details: {usage.get('input_tokens_details')}")
93
+ print(f" Output Tokens: {usage.get('output_tokens')}")
94
+ if "output_tokens_details" in usage:
95
+ print(f" Output Tokens Details: {usage.get('output_tokens_details')}")
96
+ print(f" Total Tokens: {usage.get('total_tokens')}")
97
+
98
+ print("Response Text: ", result.get("text"))
99
+
100
+ # Print tools information
101
+ tools = result.get("tools")
102
+ if tools:
103
+ print("\nTools:")
104
+ print(tools)
105
+
106
+ # Print reasoning and tool call outputs
107
+ outputs = result.get("output", [])
108
+ for output in outputs:
109
+ output_type = output.get("type")
110
+ if output_type == "reasoning":
111
+ print("\nReasoning Output:")
112
+ print(output)
113
+ elif output_type == "computer_call":
114
+ print("\nTool Call Output:")
115
+ print(output)
116
+ ```
@@ -49,6 +49,7 @@ except Exception as e:
49
49
  logger.warning(f"Error initializing telemetry: {e}")
50
50
 
51
51
  from .providers.omni.types import LLMProvider, LLM
52
- from .types.base import AgentLoop
52
+ from .core.factory import AgentLoop
53
+ from .core.agent import ComputerAgent
53
54
 
54
- __all__ = ["AgentLoop", "LLMProvider", "LLM"]
55
+ __all__ = ["AgentLoop", "LLMProvider", "LLM", "ComputerAgent"]
@@ -1,12 +1,7 @@
1
1
  """Core agent components."""
2
2
 
3
- from .loop import BaseLoop
3
+ from .factory import BaseLoop
4
4
  from .messages import (
5
- create_user_message,
6
- create_assistant_message,
7
- create_system_message,
8
- create_image_message,
9
- create_screen_message,
10
5
  BaseMessageManager,
11
6
  ImageRetentionConfig,
12
7
  )
@@ -3,31 +3,18 @@
3
3
  import asyncio
4
4
  import logging
5
5
  import os
6
- from typing import Any, AsyncGenerator, Dict, Optional, cast
7
- from dataclasses import dataclass
6
+ from typing import AsyncGenerator, Optional
8
7
 
9
8
  from computer import Computer
10
- from ..providers.anthropic.loop import AnthropicLoop
11
- from ..providers.omni.loop import OmniLoop
12
- from ..providers.omni.parser import OmniParser
13
- from ..providers.omni.types import LLMProvider, LLM
9
+ from ..providers.omni.types import LLM
14
10
  from .. import AgentLoop
11
+ from .types import AgentResponse
12
+ from .factory import LoopFactory
13
+ from .provider_config import DEFAULT_MODELS, ENV_VARS
15
14
 
16
15
  logging.basicConfig(level=logging.INFO)
17
16
  logger = logging.getLogger(__name__)
18
17
 
19
- # Default models for different providers
20
- DEFAULT_MODELS = {
21
- LLMProvider.OPENAI: "gpt-4o",
22
- LLMProvider.ANTHROPIC: "claude-3-7-sonnet-20250219",
23
- }
24
-
25
- # Map providers to their environment variable names
26
- ENV_VARS = {
27
- LLMProvider.OPENAI: "OPENAI_API_KEY",
28
- LLMProvider.ANTHROPIC: "ANTHROPIC_API_KEY",
29
- }
30
-
31
18
 
32
19
  class ComputerAgent:
33
20
  """A computer agent that can perform automated tasks using natural language instructions."""
@@ -44,7 +31,6 @@ class ComputerAgent:
44
31
  save_trajectory: bool = True,
45
32
  trajectory_dir: str = "trajectories",
46
33
  only_n_most_recent_images: Optional[int] = None,
47
- parser: Optional[OmniParser] = None,
48
34
  verbosity: int = logging.INFO,
49
35
  ):
50
36
  """Initialize the ComputerAgent.
@@ -61,12 +47,11 @@ class ComputerAgent:
61
47
  save_trajectory: Whether to save the trajectory.
62
48
  trajectory_dir: Directory to save the trajectory.
63
49
  only_n_most_recent_images: Maximum number of recent screenshots to include in API requests.
64
- parser: Parser instance for the OmniLoop. Only used if provider is not ANTHROPIC.
65
50
  verbosity: Logging level.
66
51
  """
67
52
  # Basic agent configuration
68
53
  self.max_retries = max_retries
69
- self.computer = computer or Computer()
54
+ self.computer = computer
70
55
  self.queue = asyncio.Queue()
71
56
  self.screenshot_dir = screenshot_dir
72
57
  self.log_dir = log_dir
@@ -99,39 +84,30 @@ class ComputerAgent:
99
84
  f"No model specified for provider {self.provider} and no default found"
100
85
  )
101
86
 
102
- # Ensure computer is properly cast for typing purposes
103
- computer_instance = cast(Computer, self.computer)
104
-
105
87
  # Get API key from environment if not provided
106
88
  actual_api_key = api_key or os.environ.get(ENV_VARS[self.provider], "")
107
89
  if not actual_api_key:
108
90
  raise ValueError(f"No API key provided for {self.provider}")
109
91
 
110
- # Initialize the appropriate loop based on the loop parameter
111
- if loop == AgentLoop.ANTHROPIC:
112
- self._loop = AnthropicLoop(
113
- api_key=actual_api_key,
114
- model=actual_model_name,
115
- computer=computer_instance,
116
- save_trajectory=save_trajectory,
117
- base_dir=trajectory_dir,
118
- only_n_most_recent_images=only_n_most_recent_images,
119
- )
120
- else:
121
- # Default to OmniLoop for other loop types
122
- # Initialize parser if not provided
123
- actual_parser = parser or OmniParser()
124
-
125
- self._loop = OmniLoop(
92
+ # Create the appropriate loop using the factory
93
+ try:
94
+ # Let the factory create the appropriate loop with needed components
95
+ self._loop = LoopFactory.create_loop(
96
+ loop_type=loop,
126
97
  provider=self.provider,
98
+ computer=self.computer,
99
+ model_name=actual_model_name,
127
100
  api_key=actual_api_key,
128
- model=actual_model_name,
129
- computer=computer_instance,
130
101
  save_trajectory=save_trajectory,
131
- base_dir=trajectory_dir,
102
+ trajectory_dir=trajectory_dir,
132
103
  only_n_most_recent_images=only_n_most_recent_images,
133
- parser=actual_parser,
134
104
  )
105
+ except ValueError as e:
106
+ logger.error(f"Failed to create loop: {str(e)}")
107
+ raise
108
+
109
+ # Initialize the message manager from the loop
110
+ self.message_manager = self._loop.message_manager
135
111
 
136
112
  logger.info(
137
113
  f"ComputerAgent initialized with provider: {self.provider}, model: {actual_model_name}"
@@ -154,21 +130,6 @@ class ComputerAgent:
154
130
  else:
155
131
  logger.info("Computer already initialized, skipping initialization")
156
132
 
157
- # Take a test screenshot to verify the computer is working
158
- logger.info("Testing computer with a screenshot...")
159
- try:
160
- test_screenshot = await self.computer.interface.screenshot()
161
- # Determine the screenshot size based on its type
162
- if isinstance(test_screenshot, (bytes, bytearray, memoryview)):
163
- size = len(test_screenshot)
164
- elif hasattr(test_screenshot, "base64_image"):
165
- size = len(test_screenshot.base64_image)
166
- else:
167
- size = "unknown"
168
- logger.info(f"Screenshot test successful, size: {size}")
169
- except Exception as e:
170
- logger.error(f"Screenshot test failed: {str(e)}")
171
- # Even though screenshot failed, we continue since some tests might not need it
172
133
  except Exception as e:
173
134
  logger.error(f"Error initializing computer in __aenter__: {str(e)}")
174
135
  raise
@@ -201,36 +162,30 @@ class ComputerAgent:
201
162
  await self.computer.run()
202
163
  self._initialized = True
203
164
 
204
- async def _init_if_needed(self):
205
- """Initialize the computer interface if it hasn't been initialized yet."""
206
- if not self.computer._initialized:
207
- logger.info("Computer not initialized, initializing now...")
208
- try:
209
- # Call run directly
210
- await self.computer.run()
211
- logger.info("Computer interface initialized successfully")
212
- except Exception as e:
213
- logger.error(f"Error initializing computer interface: {str(e)}")
214
- raise
215
-
216
- async def run(self, task: str) -> AsyncGenerator[Dict[str, Any], None]:
165
+ async def run(self, task: str) -> AsyncGenerator[AgentResponse, None]:
217
166
  """Run a task using the computer agent.
218
167
 
219
168
  Args:
220
169
  task: Task description
221
170
 
222
171
  Yields:
223
- Task execution updates
172
+ Agent response format
224
173
  """
225
174
  try:
226
175
  logger.info(f"Running task: {task}")
176
+ logger.info(
177
+ f"Message history before task has {len(self.message_manager.messages)} messages"
178
+ )
227
179
 
228
180
  # Initialize the computer if needed
229
181
  if not self._initialized:
230
182
  await self.initialize()
231
183
 
232
- # Format task as a message
233
- messages = [{"role": "user", "content": task}]
184
+ # Add task as a user message using the message manager
185
+ self.message_manager.add_user_message([{"type": "text", "text": task}])
186
+ logger.info(
187
+ f"Added task message. Message history now has {len(self.message_manager.messages)} messages"
188
+ )
234
189
 
235
190
  # Pass properly formatted messages to the loop
236
191
  if self._loop is None:
@@ -239,7 +194,7 @@ class ComputerAgent:
239
194
  return
240
195
 
241
196
  # Execute the task and yield results
242
- async for result in self._loop.run(messages):
197
+ async for result in self._loop.run(self.message_manager.messages):
243
198
  yield result
244
199
 
245
200
  except Exception as e: