cua-agent 0.1.6__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (57) hide show
  1. agent/__init__.py +3 -2
  2. agent/core/__init__.py +1 -6
  3. agent/core/{computer_agent.py → agent.py} +31 -76
  4. agent/core/{loop.py → base.py} +68 -127
  5. agent/core/factory.py +104 -0
  6. agent/core/messages.py +279 -125
  7. agent/core/provider_config.py +15 -0
  8. agent/core/types.py +45 -0
  9. agent/core/visualization.py +197 -0
  10. agent/providers/anthropic/api/client.py +142 -1
  11. agent/providers/anthropic/api_handler.py +140 -0
  12. agent/providers/anthropic/callbacks/__init__.py +5 -0
  13. agent/providers/anthropic/loop.py +207 -221
  14. agent/providers/anthropic/response_handler.py +226 -0
  15. agent/providers/anthropic/tools/bash.py +0 -97
  16. agent/providers/anthropic/utils.py +368 -0
  17. agent/providers/omni/__init__.py +1 -20
  18. agent/providers/omni/api_handler.py +42 -0
  19. agent/providers/omni/clients/anthropic.py +4 -0
  20. agent/providers/omni/image_utils.py +0 -72
  21. agent/providers/omni/loop.py +491 -607
  22. agent/providers/omni/parser.py +58 -4
  23. agent/providers/omni/tools/__init__.py +25 -7
  24. agent/providers/omni/tools/base.py +29 -0
  25. agent/providers/omni/tools/bash.py +43 -38
  26. agent/providers/omni/tools/computer.py +144 -182
  27. agent/providers/omni/tools/manager.py +25 -45
  28. agent/providers/omni/types.py +1 -3
  29. agent/providers/omni/utils.py +224 -145
  30. agent/providers/openai/__init__.py +6 -0
  31. agent/providers/openai/api_handler.py +453 -0
  32. agent/providers/openai/loop.py +440 -0
  33. agent/providers/openai/response_handler.py +205 -0
  34. agent/providers/openai/tools/__init__.py +15 -0
  35. agent/providers/openai/tools/base.py +79 -0
  36. agent/providers/openai/tools/computer.py +319 -0
  37. agent/providers/openai/tools/manager.py +106 -0
  38. agent/providers/openai/types.py +36 -0
  39. agent/providers/openai/utils.py +98 -0
  40. cua_agent-0.1.18.dist-info/METADATA +165 -0
  41. cua_agent-0.1.18.dist-info/RECORD +73 -0
  42. agent/README.md +0 -63
  43. agent/providers/anthropic/messages/manager.py +0 -112
  44. agent/providers/omni/callbacks.py +0 -78
  45. agent/providers/omni/clients/groq.py +0 -101
  46. agent/providers/omni/experiment.py +0 -276
  47. agent/providers/omni/messages.py +0 -171
  48. agent/providers/omni/tool_manager.py +0 -91
  49. agent/providers/omni/visualization.py +0 -130
  50. agent/types/__init__.py +0 -23
  51. agent/types/base.py +0 -41
  52. agent/types/messages.py +0 -36
  53. cua_agent-0.1.6.dist-info/METADATA +0 -120
  54. cua_agent-0.1.6.dist-info/RECORD +0 -64
  55. /agent/{types → core}/tools.py +0 -0
  56. {cua_agent-0.1.6.dist-info → cua_agent-0.1.18.dist-info}/WHEEL +0 -0
  57. {cua_agent-0.1.6.dist-info → cua_agent-0.1.18.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,165 @@
1
+ Metadata-Version: 2.1
2
+ Name: cua-agent
3
+ Version: 0.1.18
4
+ Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
+ Author-Email: TryCua <gh@trycua.com>
6
+ Requires-Python: <3.13,>=3.10
7
+ Requires-Dist: httpx<0.29.0,>=0.27.0
8
+ Requires-Dist: aiohttp<4.0.0,>=3.9.3
9
+ Requires-Dist: asyncio
10
+ Requires-Dist: anyio<5.0.0,>=4.4.1
11
+ Requires-Dist: typing-extensions<5.0.0,>=4.12.2
12
+ Requires-Dist: pydantic<3.0.0,>=2.6.4
13
+ Requires-Dist: rich<14.0.0,>=13.7.1
14
+ Requires-Dist: python-dotenv<2.0.0,>=1.0.1
15
+ Requires-Dist: cua-computer<0.2.0,>=0.1.0
16
+ Requires-Dist: cua-core<0.2.0,>=0.1.0
17
+ Requires-Dist: certifi>=2024.2.2
18
+ Provides-Extra: anthropic
19
+ Requires-Dist: anthropic>=0.49.0; extra == "anthropic"
20
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "anthropic"
21
+ Provides-Extra: openai
22
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "openai"
23
+ Requires-Dist: httpx<0.29.0,>=0.27.0; extra == "openai"
24
+ Provides-Extra: som
25
+ Requires-Dist: torch>=2.2.1; extra == "som"
26
+ Requires-Dist: torchvision>=0.17.1; extra == "som"
27
+ Requires-Dist: ultralytics>=8.0.0; extra == "som"
28
+ Requires-Dist: transformers>=4.38.2; extra == "som"
29
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "som"
30
+ Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "som"
31
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "som"
32
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "som"
33
+ Requires-Dist: groq<0.5.0,>=0.4.0; extra == "som"
34
+ Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "som"
35
+ Requires-Dist: requests<3.0.0,>=2.31.0; extra == "som"
36
+ Provides-Extra: all
37
+ Requires-Dist: torch>=2.2.1; extra == "all"
38
+ Requires-Dist: torchvision>=0.17.1; extra == "all"
39
+ Requires-Dist: ultralytics>=8.0.0; extra == "all"
40
+ Requires-Dist: transformers>=4.38.2; extra == "all"
41
+ Requires-Dist: cua-som<0.2.0,>=0.1.0; extra == "all"
42
+ Requires-Dist: anthropic<0.47.0,>=0.46.0; extra == "all"
43
+ Requires-Dist: boto3<2.0.0,>=1.35.81; extra == "all"
44
+ Requires-Dist: openai<2.0.0,>=1.14.0; extra == "all"
45
+ Requires-Dist: groq<0.5.0,>=0.4.0; extra == "all"
46
+ Requires-Dist: dashscope<2.0.0,>=1.13.0; extra == "all"
47
+ Requires-Dist: requests<3.0.0,>=2.31.0; extra == "all"
48
+ Description-Content-Type: text/markdown
49
+
50
+ <div align="center">
51
+ <h1>
52
+ <div class="image-wrapper" style="display: inline-block;">
53
+ <picture>
54
+ <source media="(prefers-color-scheme: dark)" alt="logo" height="150" srcset="../../img/logo_white.png" style="display: block; margin: auto;">
55
+ <source media="(prefers-color-scheme: light)" alt="logo" height="150" srcset="../../img/logo_black.png" style="display: block; margin: auto;">
56
+ <img alt="Shows my svg">
57
+ </picture>
58
+ </div>
59
+
60
+ [![Python](https://img.shields.io/badge/Python-333333?logo=python&logoColor=white&labelColor=333333)](#)
61
+ [![macOS](https://img.shields.io/badge/macOS-000000?logo=apple&logoColor=F0F0F0)](#)
62
+ [![Discord](https://img.shields.io/badge/Discord-%235865F2.svg?&logo=discord&logoColor=white)](https://discord.com/invite/mVnXXpdE85)
63
+ [![PyPI](https://img.shields.io/pypi/v/cua-computer?color=333333)](https://pypi.org/project/cua-computer/)
64
+ </h1>
65
+ </div>
66
+
67
+ **cua-agent** is a general Computer-Use framework for running multi-app agentic workflows targeting macOS and Linux sandbox created with Cua, supporting local (Ollama) and cloud model providers (OpenAI, Anthropic, Groq, DeepSeek, Qwen).
68
+
69
+ ### Get started with Agent
70
+
71
+ <div align="center">
72
+ <img src="../../img/agent.png"/>
73
+ </div>
74
+
75
+ ## Install
76
+
77
+ ```bash
78
+ pip install "cua-agent[all]"
79
+
80
+ # or install specific loop providers
81
+ pip install "cua-agent[openai]" # OpenAI Cua Loop
82
+ pip install "cua-agent[anthropic]" # Anthropic Cua Loop
83
+ pip install "cua-agent[omni]" # Cua Loop based on OmniParser
84
+ ```
85
+
86
+ ## Run
87
+
88
+ ```bash
89
+ async with Computer() as macos_computer:
90
+ # Create agent with loop and provider
91
+ agent = ComputerAgent(
92
+ computer=macos_computer,
93
+ loop=AgentLoop.OPENAI,
94
+ model=LLM(provider=LLMProvider.OPENAI)
95
+ )
96
+
97
+ tasks = [
98
+ "Look for a repository named trycua/cua on GitHub.",
99
+ "Check the open issues, open the most recent one and read it.",
100
+ "Clone the repository in users/lume/projects if it doesn't exist yet.",
101
+ "Open the repository with an app named Cursor (on the dock, black background and white cube icon).",
102
+ "From Cursor, open Composer if not already open.",
103
+ "Focus on the Composer text area, then write and submit a task to help resolve the GitHub issue.",
104
+ ]
105
+
106
+ for i, task in enumerate(tasks):
107
+ print(f"\nExecuting task {i}/{len(tasks)}: {task}")
108
+ async for result in agent.run(task):
109
+ print(result)
110
+
111
+ print(f"\n✅ Task {i+1}/{len(tasks)} completed: {task}")
112
+ ```
113
+
114
+ Refer to these notebooks for step-by-step guides on how to use the Computer-Use Agent (CUA):
115
+
116
+ - [Agent Notebook](../../notebooks/agent_nb.ipynb) - Complete examples and workflows
117
+
118
+ ## Agent Loops
119
+
120
+ The `cua-agent` package provides three agent loops variations, based on different CUA models providers and techniques:
121
+
122
+ | Agent Loop | Supported Models | Description | Set-Of-Marks |
123
+ |:-----------|:-----------------|:------------|:-------------|
124
+ | `AgentLoop.OPENAI` | • `computer_use_preview` | Use OpenAI Operator CUA model | Not Required |
125
+ | `AgentLoop.ANTHROPIC` | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219` | Use Anthropic Computer-Use | Not Required |
126
+ | `AgentLoop.OMNI` <br>(preview) | • `claude-3-5-sonnet-20240620`<br>• `claude-3-7-sonnet-20250219`<br>• `gpt-4.5-preview`<br>• `gpt-4o`<br>• `gpt-4`<br>• `gpt-3.5-turbo` | Use OmniParser for element pixel-detection (SoM) and any VLMs | OmniParser |
127
+
128
+ ## AgentResponse
129
+ The `AgentResponse` class represents the structured output returned after each agent turn. It contains the agent's response, reasoning, tool usage, and other metadata. The response format aligns with the new [OpenAI Agent SDK specification](https://platform.openai.com/docs/api-reference/responses) for better consistency across different agent loops.
130
+
131
+ ```python
132
+ async for result in agent.run(task):
133
+ print("Response ID: ", result.get("id"))
134
+
135
+ # Print detailed usage information
136
+ usage = result.get("usage")
137
+ if usage:
138
+ print("\nUsage Details:")
139
+ print(f" Input Tokens: {usage.get('input_tokens')}")
140
+ if "input_tokens_details" in usage:
141
+ print(f" Input Tokens Details: {usage.get('input_tokens_details')}")
142
+ print(f" Output Tokens: {usage.get('output_tokens')}")
143
+ if "output_tokens_details" in usage:
144
+ print(f" Output Tokens Details: {usage.get('output_tokens_details')}")
145
+ print(f" Total Tokens: {usage.get('total_tokens')}")
146
+
147
+ print("Response Text: ", result.get("text"))
148
+
149
+ # Print tools information
150
+ tools = result.get("tools")
151
+ if tools:
152
+ print("\nTools:")
153
+ print(tools)
154
+
155
+ # Print reasoning and tool call outputs
156
+ outputs = result.get("output", [])
157
+ for output in outputs:
158
+ output_type = output.get("type")
159
+ if output_type == "reasoning":
160
+ print("\nReasoning Output:")
161
+ print(output)
162
+ elif output_type == "computer_call":
163
+ print("\nTool Call Output:")
164
+ print(output)
165
+ ```
@@ -0,0 +1,73 @@
1
+ agent/__init__.py,sha256=ZOK-dcYN3o9CQ9XnUipkDR7YKQNNZ3HL_7MJbqHY_-c,1494
2
+ agent/core/README.md,sha256=VOXNVbR0ugxf9gCXYmZtUU2kngZhfi29haT_oSxK0Lk,3559
3
+ agent/core/__init__.py,sha256=7DhJ_6KKooM6uTmDIlumCnd7OFcU67BYIIR1dpIYUB0,506
4
+ agent/core/agent.py,sha256=WutrveYndS-YBy9BVu1I-Eyj-7AAk1CgACkn5OdWvhU,8010
5
+ agent/core/base.py,sha256=EoutyMJ2kSJ72Di8KVRiUXc0ZJ1OkA0e7Ej14Y3F87w,7124
6
+ agent/core/callbacks.py,sha256=VbGIf5QkHh3Q0KsLM6wv7hRdIA5WExTVYLm64bckyUA,4306
7
+ agent/core/experiment.py,sha256=Ywj6q3JZFDKicfPuQsDl0vSN55HS7-Cnk3u3EcUCKe8,8866
8
+ agent/core/factory.py,sha256=sy7k323ZPDf8T7UEKNSV9wT2OylckliaPr8KyYhMAi0,3829
9
+ agent/core/messages.py,sha256=-OVMDqcxK5MUHPEkHliK29XFJYMRAc1keFvzrUyrOmM,16231
10
+ agent/core/provider_config.py,sha256=GndsvFdFQHNlxOSE6XcxRAZcyOJmLQ_jSOd66QY0uyk,414
11
+ agent/core/telemetry.py,sha256=HElPd32k_w2SJ6t-Cc3j_2-AKdLbFwh2YlM8QViDgRw,4790
12
+ agent/core/tools.py,sha256=Jes2CFCFqC727WWHbO-sG7V03rBHnQe5X7Oi9ZkuScI,877
13
+ agent/core/tools/__init__.py,sha256=xZen-PqUp2dUaMEHJowXCQm33_5Sxhsx9PSoD0rq6tI,489
14
+ agent/core/tools/base.py,sha256=CdzRFNuOjNfzgyTUN4ZoCGkUDR5HI0ECQVpvrUdEij8,2295
15
+ agent/core/tools/bash.py,sha256=jnJKVlHn8np8e0gWd8EO0_qqjMkfQzutSugA_Iol4jE,1585
16
+ agent/core/tools/collection.py,sha256=NuwTn6dXSyznxWodfmFDQwUlxxaGb4oBPym4AEJABSQ,1338
17
+ agent/core/tools/computer.py,sha256=lT_aW3huoYpcM8kffuokELupSz_WZG_qkaW1gITRC58,3892
18
+ agent/core/tools/edit.py,sha256=kv4jTKCM0VXrnoNErf7mT-xlr81-7T8v49_VA9y_L4Y,2005
19
+ agent/core/tools/manager.py,sha256=IRsCXjGc076nncQuyIjODoafnHTDhrf9sP5B4q5Pcdo,1742
20
+ agent/core/types.py,sha256=fSOrbGVjidfrTA1XEy7k5uAcq3Ir1sMNXojs8AS0B1Y,1202
21
+ agent/core/visualization.py,sha256=1DuFF5sSeSf5BRSevBMDxml9-ajl7BQLFm5KBUwMbI8,6573
22
+ agent/providers/__init__.py,sha256=b4tIBAaIB1V7p8V0BWipHVnMhfHH_OuVgP4OWGSHdD8,194
23
+ agent/providers/anthropic/__init__.py,sha256=Mj11IZnVshZ2iHkvg4Z5-jrQIaD1WvzDz2Zk_pMwqIA,149
24
+ agent/providers/anthropic/api/client.py,sha256=IVCntHAlkHFHPb6h4cEpb4wsBESy0wC6IPG8_Yydtqw,13258
25
+ agent/providers/anthropic/api/logging.py,sha256=vHpwkIyOZdkSTVIH4ycbBPd4a_rzhP7Osu1I-Ayouwc,5154
26
+ agent/providers/anthropic/api_handler.py,sha256=pWXcqDs0ruviDhRNRrz5Ac9ZH4yDv6ZlwpeG3a42cDg,5206
27
+ agent/providers/anthropic/callbacks/__init__.py,sha256=PciBb6Z6MKSwfXqDjU3pV_0FS4MOn_Np_A7_skD-6dA,104
28
+ agent/providers/anthropic/callbacks/manager.py,sha256=dRKN7MuBze2dLal0iHDxCKYqMdh_KShSphuwn7zC-c4,1878
29
+ agent/providers/anthropic/loop.py,sha256=jW2PwJ8EBzfFKyD-cy7hKRsz1ZWyw8m-xHLUojsP4qE,20226
30
+ agent/providers/anthropic/prompts.py,sha256=nHFfgPrfvnWrEdVP7EUBGUHAI85D2X9HeZirk9EwncU,1941
31
+ agent/providers/anthropic/response_handler.py,sha256=ZTprV4NTP9Eb9jQ7QgEKZBX0L6rMj5nqBRiE3Zfws8I,8008
32
+ agent/providers/anthropic/tools/__init__.py,sha256=JyZwuVtPUnZwRSZBSCdQv9yxbLCsygm3l8Ywjjt9qTQ,661
33
+ agent/providers/anthropic/tools/base.py,sha256=WnRDbqO25tQzLpS2RU2ZXTLF5wd5IqU7SiyRAglQat4,2752
34
+ agent/providers/anthropic/tools/bash.py,sha256=QODuFjWuHM4GgGTqK2HizSyYqGqQwX70AdwrFiGSp2Q,2218
35
+ agent/providers/anthropic/tools/collection.py,sha256=RBK_6hxfHExR-EOxadiLl0OznmFj07nyIUjFgaYZ6Eo,960
36
+ agent/providers/anthropic/tools/computer.py,sha256=vYni1jDOOgzSSBOJxHcEKxvKUYRp5_nQ-9dmpGdLwm4,25858
37
+ agent/providers/anthropic/tools/edit.py,sha256=EGRP61MDA4Oue1D7Q-_vLpd6LdGbdBA1Z4HSZ66DbmI,13465
38
+ agent/providers/anthropic/tools/manager.py,sha256=yNvgTkfEqnOz5isDF0RxvmBMZB0uh2PipFEH-PUXpoY,2020
39
+ agent/providers/anthropic/tools/run.py,sha256=xhXdnBK1di9muaO44CEirL9hpGy3NmKbjfMpyeVmn8Y,1595
40
+ agent/providers/anthropic/types.py,sha256=SF00kOMC1ui8j9Ah56KaeiR2cL394qCHjFIsBpXxt5w,421
41
+ agent/providers/anthropic/utils.py,sha256=qDp0bFGQhK1dG9U461iaeCiyoVUsksXmD43g9cedRW8,14367
42
+ agent/providers/omni/__init__.py,sha256=59Eqpr3Nc3EE61VirUkfecAnQuGELdg0t44q5tg3SW8,172
43
+ agent/providers/omni/api_handler.py,sha256=7CpD43lYAqTyNKWfrD8XcM9ekbajqKCTH9p0TWtEQyg,1163
44
+ agent/providers/omni/clients/anthropic.py,sha256=nC_lj3UwrLqx9TIew58yxLqKwrH1_LwJD6EqVSEfp3g,3670
45
+ agent/providers/omni/clients/base.py,sha256=zAAgPi0jl3SWPC730R9l79E8bfYPSo39UtCSE-mrK6I,1076
46
+ agent/providers/omni/clients/openai.py,sha256=E4TAXMUFoYTunJETCWCNx5XAc6xutiN4rB6PlVpzC5s,5972
47
+ agent/providers/omni/clients/utils.py,sha256=Ani9CVVBm_J2Dl51WG6p1GVuoI6cq8scISrG0pmQ37o,688
48
+ agent/providers/omni/image_utils.py,sha256=wejhWb36yqedsPnLFTFwk2wth8a6txfVWSg4EaNrRdA,908
49
+ agent/providers/omni/loop.py,sha256=6521KV1hBdxnovzT0wfUcYiRCe7agx2TCC0hgQA3TRw,37459
50
+ agent/providers/omni/parser.py,sha256=REpQwlwvY1z_N8wbMj6GhOeTiiWVWHhVja_LOxgzbks,11734
51
+ agent/providers/omni/prompts.py,sha256=Mupjy0bUwBjcAeLXpE1r1jisYPSlhwsp-IXJKEKrEtw,3779
52
+ agent/providers/omni/tools/__init__.py,sha256=IC1cMEDoR2ljGcNNthzBRF_VtnDbRL5qvHJWErtNp98,774
53
+ agent/providers/omni/tools/base.py,sha256=HiQ8dp9NbFGlGopbE1wxo0ZbujA7bzCGjCg4tl2lnPE,824
54
+ agent/providers/omni/tools/bash.py,sha256=wocYvWwoaVjHba19CVqc3bvwj8_1qwqYjNaPBbMRlWA,2241
55
+ agent/providers/omni/tools/computer.py,sha256=cB5PrhPmk6acKSENIvzw4rdpjeWx4HQHfSxBLGHzGRE,6964
56
+ agent/providers/omni/tools/manager.py,sha256=GVLudHNpOQnl6aA_IOvqAEMDoKW62ozMZuwst6Z1Hco,2094
57
+ agent/providers/omni/types.py,sha256=EoEiqtW98R2ZlhZb1-II4t1Ctf7qCOG6rUn_uQUHNdM,1021
58
+ agent/providers/omni/utils.py,sha256=Ikp6ONL1HO637o3KDtv5yv6q-4uIWAzMSQDvGetWXC8,8724
59
+ agent/providers/openai/__init__.py,sha256=8DS6YNZp42NLCacwXsfRaghyczaOCVovX8TgzXUZf_o,165
60
+ agent/providers/openai/api_handler.py,sha256=zIb9JtfTT9TI4Gd7iyjz5m_MCc9sP5mNXLZQHAr0Rw0,17647
61
+ agent/providers/openai/loop.py,sha256=3gruChABTE6IMQemQPwiER7xk6cmjuHuFssa0KUipAk,18782
62
+ agent/providers/openai/response_handler.py,sha256=K8v_92uSr9R74Y5INY4naeEZZZm35CLIl4h74MBZhsw,7953
63
+ agent/providers/openai/tools/__init__.py,sha256=-KbHMWcd2OVTk5RYQ3ACBEMygwbH-VW6n_98p0lwM4A,344
64
+ agent/providers/openai/tools/base.py,sha256=Np_BC9Cm6TslK99etE9hVTtsBlcEaGhoNCK3NXdB_Lw,2474
65
+ agent/providers/openai/tools/computer.py,sha256=51uRgCXbeqlnKotlnA0-iHkXHHJkqede2GUZfwtGxx4,12271
66
+ agent/providers/openai/tools/manager.py,sha256=-wM641dLf8vcv6QF9x_ViGJeDl2YTuUV93j6u7GBI18,3903
67
+ agent/providers/openai/types.py,sha256=0mFUxeFy23fJhMwc6lAFVXKngg2fJIXkPS5oV284V1M,898
68
+ agent/providers/openai/utils.py,sha256=YeCZWIqOFSeugWoqAS0rhxOKAfL-9uN9nrYSBGBgPdc,3175
69
+ agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
70
+ cua_agent-0.1.18.dist-info/METADATA,sha256=t09Oyz6kHe2fgkGGLwpb2Cyk27I6TRXiMoCL8qQBgD0,7019
71
+ cua_agent-0.1.18.dist-info/WHEEL,sha256=thaaA2w1JzcGC48WYufAs8nrYZjJm8LqNfnXFOFyCC4,90
72
+ cua_agent-0.1.18.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
73
+ cua_agent-0.1.18.dist-info/RECORD,,
agent/README.md DELETED
@@ -1,63 +0,0 @@
1
- # Agent Package Structure
2
-
3
- ## Overview
4
- The agent package provides a modular and extensible framework for AI-powered computer agents.
5
-
6
- ## Directory Structure
7
- ```
8
- agent/
9
- ├── __init__.py # Package exports
10
- ├── core/ # Core functionality
11
- │ ├── __init__.py
12
- │ ├── computer_agent.py # Main entry point
13
- │ └── factory.py # Provider factory
14
- ├── base/ # Base implementations
15
- │ ├── __init__.py
16
- │ ├── agent.py # Base agent class
17
- │ ├── core/ # Core components
18
- │ │ ├── callbacks.py
19
- │ │ ├── loop.py
20
- │ │ └── messages.py
21
- │ └── tools/ # Tool implementations
22
- ├── providers/ # Provider implementations
23
- │ ├── __init__.py
24
- │ ├── anthropic/ # Anthropic provider
25
- │ │ ├── agent.py
26
- │ │ ├── loop.py
27
- │ │ └── tool_manager.py
28
- │ └── omni/ # Omni provider
29
- │ ├── agent.py
30
- │ ├── loop.py
31
- │ └── tool_manager.py
32
- └── types/ # Type definitions
33
- ├── __init__.py
34
- ├── base.py # Core types
35
- ├── messages.py # Message types
36
- ├── tools.py # Tool types
37
- └── providers/ # Provider-specific types
38
- ├── anthropic.py
39
- └── omni.py
40
- ```
41
-
42
- ## Key Components
43
-
44
- ### Core
45
- - `computer_agent.py`: Main entry point for creating and using agents
46
- - `factory.py`: Factory for creating provider-specific implementations
47
-
48
- ### Base
49
- - `agent.py`: Base agent implementation with shared functionality
50
- - `core/`: Core components used across providers
51
- - `tools/`: Shared tool implementations
52
-
53
- ### Providers
54
- Each provider follows the same structure:
55
- - `agent.py`: Provider-specific agent implementation
56
- - `loop.py`: Provider-specific message loop
57
- - `tool_manager.py`: Tool management for provider
58
-
59
- ### Types
60
- - `base.py`: Core type definitions
61
- - `messages.py`: Message-related types
62
- - `tools.py`: Tool-related types
63
- - `providers/`: Provider-specific type definitions
@@ -1,112 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import cast
3
- from anthropic.types.beta import (
4
- BetaMessageParam,
5
- BetaCacheControlEphemeralParam,
6
- BetaToolResultBlockParam,
7
- )
8
-
9
-
10
- @dataclass
11
- class ImageRetentionConfig:
12
- """Configuration for image retention in messages."""
13
-
14
- num_images_to_keep: int | None = None
15
- min_removal_threshold: int = 1
16
- enable_caching: bool = True
17
-
18
- def should_retain_images(self) -> bool:
19
- """Check if image retention is enabled."""
20
- return self.num_images_to_keep is not None and self.num_images_to_keep > 0
21
-
22
-
23
- class MessageManager:
24
- """Manages message preparation, including image retention and caching."""
25
-
26
- def __init__(self, image_retention_config: ImageRetentionConfig):
27
- """Initialize the message manager.
28
-
29
- Args:
30
- image_retention_config: Configuration for image retention
31
- """
32
- if image_retention_config.min_removal_threshold < 1:
33
- raise ValueError("min_removal_threshold must be at least 1")
34
- self.image_retention_config = image_retention_config
35
-
36
- def prepare_messages(self, messages: list[BetaMessageParam]) -> list[BetaMessageParam]:
37
- """Prepare messages by applying image retention and caching as configured."""
38
- if self.image_retention_config.should_retain_images():
39
- self._filter_images(messages)
40
- if self.image_retention_config.enable_caching:
41
- self._inject_caching(messages)
42
- return messages
43
-
44
- def _filter_images(self, messages: list[BetaMessageParam]) -> None:
45
- """Filter messages to retain only the specified number of most recent images."""
46
- tool_result_blocks = cast(
47
- list[BetaToolResultBlockParam],
48
- [
49
- item
50
- for message in messages
51
- for item in (message["content"] if isinstance(message["content"], list) else [])
52
- if isinstance(item, dict) and item.get("type") == "tool_result"
53
- ],
54
- )
55
-
56
- total_images = sum(
57
- 1
58
- for tool_result in tool_result_blocks
59
- for content in tool_result.get("content", [])
60
- if isinstance(content, dict) and content.get("type") == "image"
61
- )
62
-
63
- images_to_remove = total_images - (self.image_retention_config.num_images_to_keep or 0)
64
- # Round down to nearest min_removal_threshold for better cache behavior
65
- images_to_remove -= images_to_remove % self.image_retention_config.min_removal_threshold
66
-
67
- # Remove oldest images first
68
- for tool_result in tool_result_blocks:
69
- if isinstance(tool_result.get("content"), list):
70
- new_content = []
71
- for content in tool_result.get("content", []):
72
- if isinstance(content, dict) and content.get("type") == "image":
73
- if images_to_remove > 0:
74
- images_to_remove -= 1
75
- continue
76
- new_content.append(content)
77
- tool_result["content"] = new_content
78
-
79
- def _inject_caching(self, messages: list[BetaMessageParam]) -> None:
80
- """Inject caching control for the most recent turns, limited to 3 blocks max to avoid API errors."""
81
- # Anthropic API allows a maximum of 4 blocks with cache_control
82
- # We use 3 here to be safe, as the system block may also have cache_control
83
- blocks_with_cache_control = 0
84
- max_cache_control_blocks = 3
85
-
86
- for message in reversed(messages):
87
- if message["role"] == "user" and isinstance(content := message["content"], list):
88
- # Only add cache control to the latest message in each turn
89
- if blocks_with_cache_control < max_cache_control_blocks:
90
- blocks_with_cache_control += 1
91
- # Add cache control to the last content block only
92
- if content and len(content) > 0:
93
- content[-1]["cache_control"] = BetaCacheControlEphemeralParam(
94
- type="ephemeral"
95
- )
96
- else:
97
- # Remove any existing cache control
98
- if content and len(content) > 0:
99
- content[-1].pop("cache_control", None)
100
-
101
- # Ensure we're not exceeding the limit by checking the total
102
- if blocks_with_cache_control > max_cache_control_blocks:
103
- # If we somehow exceeded the limit, remove excess cache controls
104
- excess = blocks_with_cache_control - max_cache_control_blocks
105
- for message in messages:
106
- if excess <= 0:
107
- break
108
-
109
- if message["role"] == "user" and isinstance(content := message["content"], list):
110
- if content and len(content) > 0 and "cache_control" in content[-1]:
111
- content[-1].pop("cache_control", None)
112
- excess -= 1
@@ -1,78 +0,0 @@
1
- """Omni callback manager implementation."""
2
-
3
- import logging
4
- from typing import Any, Dict, Optional, Set
5
-
6
- from ...core.callbacks import BaseCallbackManager, ContentCallback, ToolCallback, APICallback
7
- from ...types.tools import ToolResult
8
-
9
- logger = logging.getLogger(__name__)
10
-
11
- class OmniCallbackManager(BaseCallbackManager):
12
- """Callback manager for multi-provider support."""
13
-
14
- def __init__(
15
- self,
16
- content_callback: ContentCallback,
17
- tool_callback: ToolCallback,
18
- api_callback: APICallback,
19
- ):
20
- """Initialize Omni callback manager.
21
-
22
- Args:
23
- content_callback: Callback for content updates
24
- tool_callback: Callback for tool execution results
25
- api_callback: Callback for API interactions
26
- """
27
- super().__init__(
28
- content_callback=content_callback,
29
- tool_callback=tool_callback,
30
- api_callback=api_callback
31
- )
32
- self._active_tools: Set[str] = set()
33
-
34
- def on_content(self, content: Any) -> None:
35
- """Handle content updates.
36
-
37
- Args:
38
- content: Content update data
39
- """
40
- logger.debug(f"Content update: {content}")
41
- self.content_callback(content)
42
-
43
- def on_tool_result(self, result: ToolResult, tool_id: str) -> None:
44
- """Handle tool execution results.
45
-
46
- Args:
47
- result: Tool execution result
48
- tool_id: ID of the tool
49
- """
50
- logger.debug(f"Tool result for {tool_id}: {result}")
51
- self.tool_callback(result, tool_id)
52
-
53
- def on_api_interaction(
54
- self,
55
- request: Any,
56
- response: Any,
57
- error: Optional[Exception] = None
58
- ) -> None:
59
- """Handle API interactions.
60
-
61
- Args:
62
- request: API request data
63
- response: API response data
64
- error: Optional error that occurred
65
- """
66
- if error:
67
- logger.error(f"API error: {str(error)}")
68
- else:
69
- logger.debug(f"API interaction - Request: {request}, Response: {response}")
70
- self.api_callback(request, response, error)
71
-
72
- def get_active_tools(self) -> Set[str]:
73
- """Get currently active tools.
74
-
75
- Returns:
76
- Set of active tool names
77
- """
78
- return self._active_tools.copy()
@@ -1,101 +0,0 @@
1
- """Groq client implementation."""
2
-
3
- import os
4
- import logging
5
- from typing import Dict, List, Optional, Any, Tuple
6
-
7
- from groq import Groq
8
- import re
9
- from .utils import is_image_path
10
- from .base import BaseOmniClient
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
-
15
- class GroqClient(BaseOmniClient):
16
- """Client for making Groq API calls."""
17
-
18
- def __init__(
19
- self,
20
- api_key: Optional[str] = None,
21
- model: str = "deepseek-r1-distill-llama-70b",
22
- max_tokens: int = 4096,
23
- temperature: float = 0.6,
24
- ):
25
- """Initialize Groq client.
26
-
27
- Args:
28
- api_key: Groq API key (if not provided, will try to get from env)
29
- model: Model name to use
30
- max_tokens: Maximum tokens to generate
31
- temperature: Temperature for sampling
32
- """
33
- super().__init__(api_key=api_key, model=model)
34
- self.api_key = api_key or os.getenv("GROQ_API_KEY")
35
- if not self.api_key:
36
- raise ValueError("No Groq API key provided")
37
-
38
- self.max_tokens = max_tokens
39
- self.temperature = temperature
40
- self.client = Groq(api_key=self.api_key)
41
- self.model: str = model # Add explicit type annotation
42
-
43
- def run_interleaved(
44
- self, messages: List[Dict[str, Any]], system: str, max_tokens: Optional[int] = None
45
- ) -> tuple[str, int]:
46
- """Run interleaved chat completion.
47
-
48
- Args:
49
- messages: List of message dicts
50
- system: System prompt
51
- max_tokens: Optional max tokens override
52
-
53
- Returns:
54
- Tuple of (response text, token usage)
55
- """
56
- # Avoid using system messages for R1
57
- final_messages = [{"role": "user", "content": system}]
58
-
59
- # Process messages
60
- if isinstance(messages, list):
61
- for item in messages:
62
- if isinstance(item, dict):
63
- # For dict items, concatenate all text content, ignoring images
64
- text_contents = []
65
- for cnt in item["content"]:
66
- if isinstance(cnt, str):
67
- if not is_image_path(cnt): # Skip image paths
68
- text_contents.append(cnt)
69
- else:
70
- text_contents.append(str(cnt))
71
-
72
- if text_contents: # Only add if there's text content
73
- message = {"role": "user", "content": " ".join(text_contents)}
74
- final_messages.append(message)
75
- else: # str
76
- message = {"role": "user", "content": item}
77
- final_messages.append(message)
78
-
79
- elif isinstance(messages, str):
80
- final_messages.append({"role": "user", "content": messages})
81
-
82
- try:
83
- completion = self.client.chat.completions.create( # type: ignore
84
- model=self.model,
85
- messages=final_messages, # type: ignore
86
- temperature=self.temperature,
87
- max_tokens=max_tokens or self.max_tokens,
88
- top_p=0.95,
89
- stream=False,
90
- )
91
-
92
- response = completion.choices[0].message.content
93
- final_answer = response.split("</think>\n")[-1] if "</think>" in response else response
94
- final_answer = final_answer.replace("<output>", "").replace("</output>", "")
95
- token_usage = completion.usage.total_tokens
96
-
97
- return final_answer, token_usage
98
-
99
- except Exception as e:
100
- logger.error(f"Error in Groq API call: {e}")
101
- raise