cua-agent 0.3.2__py3-none-any.whl → 0.4.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +15 -51
- agent/__main__.py +21 -0
- agent/adapters/__init__.py +9 -0
- agent/adapters/huggingfacelocal_adapter.py +229 -0
- agent/agent.py +577 -0
- agent/callbacks/__init__.py +17 -0
- agent/callbacks/base.py +153 -0
- agent/callbacks/budget_manager.py +44 -0
- agent/callbacks/image_retention.py +139 -0
- agent/callbacks/logging.py +247 -0
- agent/callbacks/pii_anonymization.py +259 -0
- agent/callbacks/trajectory_saver.py +305 -0
- agent/cli.py +290 -0
- agent/computer_handler.py +107 -0
- agent/decorators.py +90 -0
- agent/loops/__init__.py +11 -0
- agent/loops/anthropic.py +728 -0
- agent/loops/omniparser.py +339 -0
- agent/loops/openai.py +95 -0
- agent/loops/uitars.py +688 -0
- agent/responses.py +207 -0
- agent/types.py +79 -0
- agent/ui/__init__.py +7 -1
- agent/ui/gradio/__init__.py +6 -19
- agent/ui/gradio/app.py +80 -1299
- agent/ui/gradio/ui_components.py +703 -0
- cua_agent-0.4.0b2.dist-info/METADATA +424 -0
- cua_agent-0.4.0b2.dist-info/RECORD +30 -0
- agent/core/__init__.py +0 -27
- agent/core/agent.py +0 -210
- agent/core/base.py +0 -217
- agent/core/callbacks.py +0 -200
- agent/core/experiment.py +0 -249
- agent/core/factory.py +0 -122
- agent/core/messages.py +0 -332
- agent/core/provider_config.py +0 -21
- agent/core/telemetry.py +0 -142
- agent/core/tools/__init__.py +0 -21
- agent/core/tools/base.py +0 -74
- agent/core/tools/bash.py +0 -52
- agent/core/tools/collection.py +0 -46
- agent/core/tools/computer.py +0 -113
- agent/core/tools/edit.py +0 -67
- agent/core/tools/manager.py +0 -56
- agent/core/tools.py +0 -32
- agent/core/types.py +0 -88
- agent/core/visualization.py +0 -197
- agent/providers/__init__.py +0 -4
- agent/providers/anthropic/__init__.py +0 -6
- agent/providers/anthropic/api/client.py +0 -360
- agent/providers/anthropic/api/logging.py +0 -150
- agent/providers/anthropic/api_handler.py +0 -140
- agent/providers/anthropic/callbacks/__init__.py +0 -5
- agent/providers/anthropic/callbacks/manager.py +0 -65
- agent/providers/anthropic/loop.py +0 -568
- agent/providers/anthropic/prompts.py +0 -23
- agent/providers/anthropic/response_handler.py +0 -226
- agent/providers/anthropic/tools/__init__.py +0 -33
- agent/providers/anthropic/tools/base.py +0 -88
- agent/providers/anthropic/tools/bash.py +0 -66
- agent/providers/anthropic/tools/collection.py +0 -34
- agent/providers/anthropic/tools/computer.py +0 -396
- agent/providers/anthropic/tools/edit.py +0 -326
- agent/providers/anthropic/tools/manager.py +0 -54
- agent/providers/anthropic/tools/run.py +0 -42
- agent/providers/anthropic/types.py +0 -16
- agent/providers/anthropic/utils.py +0 -381
- agent/providers/omni/__init__.py +0 -8
- agent/providers/omni/api_handler.py +0 -42
- agent/providers/omni/clients/anthropic.py +0 -103
- agent/providers/omni/clients/base.py +0 -35
- agent/providers/omni/clients/oaicompat.py +0 -195
- agent/providers/omni/clients/ollama.py +0 -122
- agent/providers/omni/clients/openai.py +0 -155
- agent/providers/omni/clients/utils.py +0 -25
- agent/providers/omni/image_utils.py +0 -34
- agent/providers/omni/loop.py +0 -990
- agent/providers/omni/parser.py +0 -307
- agent/providers/omni/prompts.py +0 -64
- agent/providers/omni/tools/__init__.py +0 -30
- agent/providers/omni/tools/base.py +0 -29
- agent/providers/omni/tools/bash.py +0 -74
- agent/providers/omni/tools/computer.py +0 -179
- agent/providers/omni/tools/manager.py +0 -61
- agent/providers/omni/utils.py +0 -236
- agent/providers/openai/__init__.py +0 -6
- agent/providers/openai/api_handler.py +0 -456
- agent/providers/openai/loop.py +0 -472
- agent/providers/openai/response_handler.py +0 -205
- agent/providers/openai/tools/__init__.py +0 -15
- agent/providers/openai/tools/base.py +0 -79
- agent/providers/openai/tools/computer.py +0 -326
- agent/providers/openai/tools/manager.py +0 -106
- agent/providers/openai/types.py +0 -36
- agent/providers/openai/utils.py +0 -98
- agent/providers/uitars/__init__.py +0 -1
- agent/providers/uitars/clients/base.py +0 -35
- agent/providers/uitars/clients/mlxvlm.py +0 -263
- agent/providers/uitars/clients/oaicompat.py +0 -214
- agent/providers/uitars/loop.py +0 -660
- agent/providers/uitars/prompts.py +0 -63
- agent/providers/uitars/tools/__init__.py +0 -1
- agent/providers/uitars/tools/computer.py +0 -283
- agent/providers/uitars/tools/manager.py +0 -60
- agent/providers/uitars/utils.py +0 -264
- agent/telemetry.py +0 -21
- agent/ui/__main__.py +0 -15
- cua_agent-0.3.2.dist-info/METADATA +0 -295
- cua_agent-0.3.2.dist-info/RECORD +0 -87
- {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b2.dist-info}/WHEEL +0 -0
- {cua_agent-0.3.2.dist-info → cua_agent-0.4.0b2.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,703 @@
|
|
|
1
|
+
"""
|
|
2
|
+
UI Components for the Gradio interface
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import asyncio
|
|
7
|
+
import logging
|
|
8
|
+
import json
|
|
9
|
+
import platform
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Dict, List, Optional, Any, cast
|
|
12
|
+
import gradio as gr
|
|
13
|
+
from gradio.components.chatbot import MetadataDict
|
|
14
|
+
|
|
15
|
+
from .app import (
|
|
16
|
+
load_settings, save_settings, create_agent, get_model_string,
|
|
17
|
+
get_ollama_models, GradioChatScreenshotHandler, global_agent, global_computer
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def create_gradio_ui() -> gr.Blocks:
|
|
22
|
+
"""Create a Gradio UI for the Computer-Use Agent."""
|
|
23
|
+
|
|
24
|
+
# Load settings
|
|
25
|
+
saved_settings = load_settings()
|
|
26
|
+
|
|
27
|
+
# Check for API keys
|
|
28
|
+
openai_api_key = os.environ.get("OPENAI_API_KEY", "")
|
|
29
|
+
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
30
|
+
cua_api_key = os.environ.get("CUA_API_KEY", "")
|
|
31
|
+
|
|
32
|
+
# Model choices
|
|
33
|
+
openai_models = ["OpenAI: Computer-Use Preview"]
|
|
34
|
+
anthropic_models = [
|
|
35
|
+
"Anthropic: Claude 4 Opus (20250514)",
|
|
36
|
+
"Anthropic: Claude 4 Sonnet (20250514)",
|
|
37
|
+
"Anthropic: Claude 3.7 Sonnet (20250219)",
|
|
38
|
+
"Anthropic: Claude 3.5 Sonnet (20240620)",
|
|
39
|
+
]
|
|
40
|
+
omni_models = [
|
|
41
|
+
"OMNI: OpenAI GPT-4o",
|
|
42
|
+
"OMNI: OpenAI GPT-4o mini",
|
|
43
|
+
"OMNI: Claude 3.7 Sonnet (20250219)",
|
|
44
|
+
"OMNI: Claude 3.5 Sonnet (20240620)"
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
# Check if API keys are available
|
|
48
|
+
has_openai_key = bool(openai_api_key)
|
|
49
|
+
has_anthropic_key = bool(anthropic_api_key)
|
|
50
|
+
has_cua_key = bool(cua_api_key)
|
|
51
|
+
|
|
52
|
+
# Get Ollama models for OMNI
|
|
53
|
+
ollama_models = get_ollama_models()
|
|
54
|
+
if ollama_models:
|
|
55
|
+
omni_models += ollama_models
|
|
56
|
+
|
|
57
|
+
# Detect platform
|
|
58
|
+
is_mac = platform.system().lower() == "darwin"
|
|
59
|
+
|
|
60
|
+
# Format model choices
|
|
61
|
+
provider_to_models = {
|
|
62
|
+
"OPENAI": openai_models,
|
|
63
|
+
"ANTHROPIC": anthropic_models,
|
|
64
|
+
"OMNI": omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"],
|
|
65
|
+
"UITARS": ([
|
|
66
|
+
"huggingface-local/ByteDance-Seed/UI-TARS-1.5-7B",
|
|
67
|
+
] if is_mac else []) + ["Custom model (OpenAI compatible API)"],
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Apply saved settings
|
|
71
|
+
initial_loop = saved_settings.get("agent_loop", "OMNI")
|
|
72
|
+
available_models_for_loop = provider_to_models.get(initial_loop, [])
|
|
73
|
+
saved_model_choice = saved_settings.get("model_choice")
|
|
74
|
+
if saved_model_choice and saved_model_choice in available_models_for_loop:
|
|
75
|
+
initial_model = saved_model_choice
|
|
76
|
+
else:
|
|
77
|
+
if initial_loop == "OPENAI":
|
|
78
|
+
initial_model = openai_models[0] if openai_models else "No models available"
|
|
79
|
+
elif initial_loop == "ANTHROPIC":
|
|
80
|
+
initial_model = anthropic_models[0] if anthropic_models else "No models available"
|
|
81
|
+
else: # OMNI
|
|
82
|
+
initial_model = omni_models[0] if omni_models else "Custom model (OpenAI compatible API)"
|
|
83
|
+
|
|
84
|
+
initial_custom_model = saved_settings.get("custom_model", "Qwen2.5-VL-7B-Instruct")
|
|
85
|
+
initial_provider_base_url = saved_settings.get("provider_base_url", "http://localhost:1234/v1")
|
|
86
|
+
initial_save_trajectory = saved_settings.get("save_trajectory", True)
|
|
87
|
+
initial_recent_images = saved_settings.get("recent_images", 3)
|
|
88
|
+
|
|
89
|
+
# Example prompts
|
|
90
|
+
example_messages = [
|
|
91
|
+
"Create a Python virtual environment, install pandas and matplotlib, then plot stock data",
|
|
92
|
+
"Open a PDF in Preview, add annotations, and save it as a compressed version",
|
|
93
|
+
"Open Safari, search for 'macOS automation tools', and save the first three results as bookmarks",
|
|
94
|
+
"Configure SSH keys and set up a connection to a remote server",
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
def generate_python_code(agent_loop_choice, model_name, tasks, recent_images=3, save_trajectory=True, computer_os="linux", computer_provider="cloud", container_name="", cua_cloud_api_key="", max_budget=None):
|
|
98
|
+
"""Generate Python code for the current configuration and tasks."""
|
|
99
|
+
tasks_str = ""
|
|
100
|
+
for task in tasks:
|
|
101
|
+
if task and task.strip():
|
|
102
|
+
tasks_str += f' "{task}",\n'
|
|
103
|
+
|
|
104
|
+
model_string = get_model_string(model_name, agent_loop_choice)
|
|
105
|
+
|
|
106
|
+
computer_args = []
|
|
107
|
+
if computer_os != "macos":
|
|
108
|
+
computer_args.append(f'os_type="{computer_os}"')
|
|
109
|
+
if computer_provider != "lume":
|
|
110
|
+
computer_args.append(f'provider_type="{computer_provider}"')
|
|
111
|
+
if container_name:
|
|
112
|
+
computer_args.append(f'name="{container_name}"')
|
|
113
|
+
if cua_cloud_api_key:
|
|
114
|
+
computer_args.append(f'api_key="{cua_cloud_api_key}"')
|
|
115
|
+
|
|
116
|
+
computer_args_str = ", ".join(computer_args)
|
|
117
|
+
if computer_args_str:
|
|
118
|
+
computer_args_str = f"({computer_args_str})"
|
|
119
|
+
else:
|
|
120
|
+
computer_args_str = "()"
|
|
121
|
+
|
|
122
|
+
code = f'''import asyncio
|
|
123
|
+
from computer import Computer
|
|
124
|
+
from agent import ComputerAgent
|
|
125
|
+
|
|
126
|
+
async def main():
|
|
127
|
+
async with Computer{computer_args_str} as computer:
|
|
128
|
+
agent = ComputerAgent(
|
|
129
|
+
model="{model_string}",
|
|
130
|
+
tools=[computer],
|
|
131
|
+
only_n_most_recent_images={recent_images},'''
|
|
132
|
+
|
|
133
|
+
if save_trajectory:
|
|
134
|
+
code += '''
|
|
135
|
+
trajectory_dir="trajectories",'''
|
|
136
|
+
|
|
137
|
+
if max_budget:
|
|
138
|
+
code += f'''
|
|
139
|
+
max_trajectory_budget={{"max_budget": {max_budget}, "raise_error": True}},'''
|
|
140
|
+
|
|
141
|
+
code += '''
|
|
142
|
+
)
|
|
143
|
+
'''
|
|
144
|
+
|
|
145
|
+
if tasks_str:
|
|
146
|
+
code += f'''
|
|
147
|
+
# Prompts for the computer-use agent
|
|
148
|
+
tasks = [
|
|
149
|
+
{tasks_str.rstrip()}
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
for task in tasks:
|
|
153
|
+
print(f"Executing task: {{task}}")
|
|
154
|
+
messages = [{{"role": "user", "content": task}}]
|
|
155
|
+
async for result in agent.run(messages):
|
|
156
|
+
for item in result["output"]:
|
|
157
|
+
if item["type"] == "message":
|
|
158
|
+
print(item["content"][0]["text"])'''
|
|
159
|
+
else:
|
|
160
|
+
code += f'''
|
|
161
|
+
# Execute a single task
|
|
162
|
+
task = "Search for information about CUA on GitHub"
|
|
163
|
+
print(f"Executing task: {{task}}")
|
|
164
|
+
messages = [{{"role": "user", "content": task}}]
|
|
165
|
+
async for result in agent.run(messages):
|
|
166
|
+
for item in result["output"]:
|
|
167
|
+
if item["type"] == "message":
|
|
168
|
+
print(item["content"][0]["text"])'''
|
|
169
|
+
|
|
170
|
+
code += '''
|
|
171
|
+
|
|
172
|
+
if __name__ == "__main__":
|
|
173
|
+
asyncio.run(main())'''
|
|
174
|
+
|
|
175
|
+
return code
|
|
176
|
+
|
|
177
|
+
# Create the Gradio interface
|
|
178
|
+
with gr.Blocks(title="Computer-Use Agent") as demo:
|
|
179
|
+
with gr.Row():
|
|
180
|
+
# Left column for settings
|
|
181
|
+
with gr.Column(scale=1):
|
|
182
|
+
# Logo
|
|
183
|
+
gr.HTML(
|
|
184
|
+
"""
|
|
185
|
+
<div style="display: flex; justify-content: center; margin-bottom: 0.5em">
|
|
186
|
+
<img alt="CUA Logo" style="width: 80px;"
|
|
187
|
+
src="https://github.com/trycua/cua/blob/main/img/logo_black.png?raw=true" />
|
|
188
|
+
</div>
|
|
189
|
+
"""
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Python code accordion
|
|
193
|
+
with gr.Accordion("Python Code", open=False):
|
|
194
|
+
code_display = gr.Code(
|
|
195
|
+
language="python",
|
|
196
|
+
value=generate_python_code(initial_loop, "gpt-4o", []),
|
|
197
|
+
interactive=False,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
with gr.Accordion("Computer Configuration", open=True):
|
|
201
|
+
computer_os = gr.Radio(
|
|
202
|
+
choices=["macos", "linux", "windows"],
|
|
203
|
+
label="Operating System",
|
|
204
|
+
value="macos",
|
|
205
|
+
info="Select the operating system for the computer",
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
is_windows = platform.system().lower() == "windows"
|
|
209
|
+
is_mac = platform.system().lower() == "darwin"
|
|
210
|
+
|
|
211
|
+
providers = ["cloud"]
|
|
212
|
+
if is_mac:
|
|
213
|
+
providers += ["lume"]
|
|
214
|
+
if is_windows:
|
|
215
|
+
providers += ["winsandbox"]
|
|
216
|
+
|
|
217
|
+
computer_provider = gr.Radio(
|
|
218
|
+
choices=providers,
|
|
219
|
+
label="Provider",
|
|
220
|
+
value="lume" if is_mac else "cloud",
|
|
221
|
+
info="Select the computer provider",
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
container_name = gr.Textbox(
|
|
225
|
+
label="Container Name",
|
|
226
|
+
placeholder="Enter container name (optional)",
|
|
227
|
+
value=os.environ.get("CUA_CONTAINER_NAME", ""),
|
|
228
|
+
info="Optional name for the container",
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
cua_cloud_api_key = gr.Textbox(
|
|
232
|
+
label="CUA Cloud API Key",
|
|
233
|
+
placeholder="Enter your CUA Cloud API key",
|
|
234
|
+
value=os.environ.get("CUA_API_KEY", ""),
|
|
235
|
+
type="password",
|
|
236
|
+
info="Required for cloud provider",
|
|
237
|
+
visible=(not has_cua_key)
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
with gr.Accordion("Agent Configuration", open=True):
|
|
241
|
+
agent_loop = gr.Dropdown(
|
|
242
|
+
choices=["OPENAI", "ANTHROPIC", "OMNI", "UITARS"],
|
|
243
|
+
label="Agent Loop",
|
|
244
|
+
value=initial_loop,
|
|
245
|
+
info="Select the agent loop provider",
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
# Model selection dropdowns
|
|
249
|
+
with gr.Group() as model_selection_group:
|
|
250
|
+
openai_model_choice = gr.Dropdown(
|
|
251
|
+
choices=openai_models,
|
|
252
|
+
label="OpenAI Model",
|
|
253
|
+
value=openai_models[0] if openai_models else "No models available",
|
|
254
|
+
info="Select OpenAI model",
|
|
255
|
+
interactive=True,
|
|
256
|
+
visible=(initial_loop == "OPENAI")
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
anthropic_model_choice = gr.Dropdown(
|
|
260
|
+
choices=anthropic_models,
|
|
261
|
+
label="Anthropic Model",
|
|
262
|
+
value=anthropic_models[0] if anthropic_models else "No models available",
|
|
263
|
+
info="Select Anthropic model",
|
|
264
|
+
interactive=True,
|
|
265
|
+
visible=(initial_loop == "ANTHROPIC")
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
omni_model_choice = gr.Dropdown(
|
|
269
|
+
choices=omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"],
|
|
270
|
+
label="OMNI Model",
|
|
271
|
+
value=omni_models[0] if omni_models else "Custom model (OpenAI compatible API)",
|
|
272
|
+
info="Select OMNI model or choose a custom model option",
|
|
273
|
+
interactive=True,
|
|
274
|
+
visible=(initial_loop == "OMNI")
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
uitars_model_choice = gr.Dropdown(
|
|
278
|
+
choices=provider_to_models.get("UITARS", ["No models available"]),
|
|
279
|
+
label="UITARS Model",
|
|
280
|
+
value=provider_to_models.get("UITARS", ["No models available"])[0] if provider_to_models.get("UITARS") else "No models available",
|
|
281
|
+
info="Select UITARS model",
|
|
282
|
+
interactive=True,
|
|
283
|
+
visible=(initial_loop == "UITARS")
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
model_choice = gr.Textbox(visible=False)
|
|
287
|
+
|
|
288
|
+
# API key inputs
|
|
289
|
+
with gr.Group(visible=not has_openai_key and (initial_loop == "OPENAI" or initial_loop == "OMNI")) as openai_key_group:
|
|
290
|
+
openai_api_key_input = gr.Textbox(
|
|
291
|
+
label="OpenAI API Key",
|
|
292
|
+
placeholder="Enter your OpenAI API key",
|
|
293
|
+
value=os.environ.get("OPENAI_API_KEY", ""),
|
|
294
|
+
interactive=True,
|
|
295
|
+
type="password",
|
|
296
|
+
info="Required for OpenAI models"
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
with gr.Group(visible=not has_anthropic_key and (initial_loop == "ANTHROPIC" or initial_loop == "OMNI")) as anthropic_key_group:
|
|
300
|
+
anthropic_api_key_input = gr.Textbox(
|
|
301
|
+
label="Anthropic API Key",
|
|
302
|
+
placeholder="Enter your Anthropic API key",
|
|
303
|
+
value=os.environ.get("ANTHROPIC_API_KEY", ""),
|
|
304
|
+
interactive=True,
|
|
305
|
+
type="password",
|
|
306
|
+
info="Required for Anthropic models"
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# API key handlers
|
|
310
|
+
def set_openai_api_key(key):
|
|
311
|
+
if key and key.strip():
|
|
312
|
+
os.environ["OPENAI_API_KEY"] = key.strip()
|
|
313
|
+
print(f"DEBUG - Set OpenAI API key environment variable")
|
|
314
|
+
return key
|
|
315
|
+
|
|
316
|
+
def set_anthropic_api_key(key):
|
|
317
|
+
if key and key.strip():
|
|
318
|
+
os.environ["ANTHROPIC_API_KEY"] = key.strip()
|
|
319
|
+
print(f"DEBUG - Set Anthropic API key environment variable")
|
|
320
|
+
return key
|
|
321
|
+
|
|
322
|
+
openai_api_key_input.change(
|
|
323
|
+
fn=set_openai_api_key,
|
|
324
|
+
inputs=[openai_api_key_input],
|
|
325
|
+
outputs=[openai_api_key_input],
|
|
326
|
+
queue=False
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
anthropic_api_key_input.change(
|
|
330
|
+
fn=set_anthropic_api_key,
|
|
331
|
+
inputs=[anthropic_api_key_input],
|
|
332
|
+
outputs=[anthropic_api_key_input],
|
|
333
|
+
queue=False
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
# UI update function
|
|
337
|
+
def update_ui(loop=None, openai_model=None, anthropic_model=None, omni_model=None, uitars_model=None):
|
|
338
|
+
loop = loop or agent_loop.value
|
|
339
|
+
|
|
340
|
+
model_value = None
|
|
341
|
+
if loop == "OPENAI" and openai_model:
|
|
342
|
+
model_value = openai_model
|
|
343
|
+
elif loop == "ANTHROPIC" and anthropic_model:
|
|
344
|
+
model_value = anthropic_model
|
|
345
|
+
elif loop == "OMNI" and omni_model:
|
|
346
|
+
model_value = omni_model
|
|
347
|
+
elif loop == "UITARS" and uitars_model:
|
|
348
|
+
model_value = uitars_model
|
|
349
|
+
|
|
350
|
+
openai_visible = (loop == "OPENAI")
|
|
351
|
+
anthropic_visible = (loop == "ANTHROPIC")
|
|
352
|
+
omni_visible = (loop == "OMNI")
|
|
353
|
+
uitars_visible = (loop == "UITARS")
|
|
354
|
+
|
|
355
|
+
show_openai_key = not has_openai_key and (loop == "OPENAI" or (loop == "OMNI" and model_value and "OpenAI" in model_value and "Custom" not in model_value))
|
|
356
|
+
show_anthropic_key = not has_anthropic_key and (loop == "ANTHROPIC" or (loop == "OMNI" and model_value and "Claude" in model_value and "Custom" not in model_value))
|
|
357
|
+
|
|
358
|
+
is_custom_openai_api = model_value == "Custom model (OpenAI compatible API)"
|
|
359
|
+
is_custom_ollama = model_value == "Custom model (ollama)"
|
|
360
|
+
is_any_custom = is_custom_openai_api or is_custom_ollama
|
|
361
|
+
|
|
362
|
+
model_choice_value = model_value if model_value else ""
|
|
363
|
+
|
|
364
|
+
return [
|
|
365
|
+
gr.update(visible=openai_visible),
|
|
366
|
+
gr.update(visible=anthropic_visible),
|
|
367
|
+
gr.update(visible=omni_visible),
|
|
368
|
+
gr.update(visible=uitars_visible),
|
|
369
|
+
gr.update(visible=show_openai_key),
|
|
370
|
+
gr.update(visible=show_anthropic_key),
|
|
371
|
+
gr.update(visible=is_any_custom),
|
|
372
|
+
gr.update(visible=is_custom_openai_api),
|
|
373
|
+
gr.update(visible=is_custom_openai_api),
|
|
374
|
+
gr.update(value=model_choice_value)
|
|
375
|
+
]
|
|
376
|
+
|
|
377
|
+
# Custom model inputs
|
|
378
|
+
custom_model = gr.Textbox(
|
|
379
|
+
label="Custom Model Name",
|
|
380
|
+
placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct or llama3)",
|
|
381
|
+
value=initial_custom_model,
|
|
382
|
+
visible=(initial_model == "Custom model (OpenAI compatible API)" or initial_model == "Custom model (ollama)"),
|
|
383
|
+
interactive=True,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
provider_base_url = gr.Textbox(
|
|
387
|
+
label="Provider Base URL",
|
|
388
|
+
placeholder="Enter provider base URL (e.g., http://localhost:1234/v1)",
|
|
389
|
+
value=initial_provider_base_url,
|
|
390
|
+
visible=(initial_model == "Custom model (OpenAI compatible API)"),
|
|
391
|
+
interactive=True,
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
provider_api_key = gr.Textbox(
|
|
395
|
+
label="Provider API Key",
|
|
396
|
+
placeholder="Enter provider API key (if required)",
|
|
397
|
+
value="",
|
|
398
|
+
visible=(initial_model == "Custom model (OpenAI compatible API)"),
|
|
399
|
+
interactive=True,
|
|
400
|
+
type="password",
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
# Connect UI update events
|
|
404
|
+
for dropdown in [agent_loop, omni_model_choice, uitars_model_choice, openai_model_choice, anthropic_model_choice]:
|
|
405
|
+
dropdown.change(
|
|
406
|
+
fn=update_ui,
|
|
407
|
+
inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
|
|
408
|
+
outputs=[
|
|
409
|
+
openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
|
|
410
|
+
openai_key_group, anthropic_key_group,
|
|
411
|
+
custom_model, provider_base_url, provider_api_key,
|
|
412
|
+
model_choice
|
|
413
|
+
],
|
|
414
|
+
queue=False
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
save_trajectory = gr.Checkbox(
|
|
418
|
+
label="Save Trajectory",
|
|
419
|
+
value=initial_save_trajectory,
|
|
420
|
+
info="Save the agent's trajectory for debugging",
|
|
421
|
+
interactive=True,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
recent_images = gr.Slider(
|
|
425
|
+
label="Recent Images",
|
|
426
|
+
minimum=1,
|
|
427
|
+
maximum=10,
|
|
428
|
+
value=initial_recent_images,
|
|
429
|
+
step=1,
|
|
430
|
+
info="Number of recent images to keep in context",
|
|
431
|
+
interactive=True,
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
max_budget = gr.Number(
|
|
435
|
+
label="Max Budget ($)",
|
|
436
|
+
value=lambda: None,
|
|
437
|
+
minimum=-1,
|
|
438
|
+
maximum=100.0,
|
|
439
|
+
step=0.1,
|
|
440
|
+
info="Optional budget limit for trajectory (0 = no limit)",
|
|
441
|
+
interactive=True,
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
# Right column for chat interface
|
|
445
|
+
with gr.Column(scale=2):
|
|
446
|
+
gr.Markdown(
|
|
447
|
+
"Ask me to perform tasks in a virtual environment.<br>Built with <a href='https://github.com/trycua/cua' target='_blank'>github.com/trycua/cua</a>."
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
chatbot_history = gr.Chatbot(type="messages")
|
|
451
|
+
msg = gr.Textbox(
|
|
452
|
+
placeholder="Ask me to perform tasks in a virtual environment"
|
|
453
|
+
)
|
|
454
|
+
clear = gr.Button("Clear")
|
|
455
|
+
cancel_button = gr.Button("Cancel", variant="stop")
|
|
456
|
+
|
|
457
|
+
# Add examples
|
|
458
|
+
example_group = gr.Examples(examples=example_messages, inputs=msg)
|
|
459
|
+
|
|
460
|
+
# Chat submission function
|
|
461
|
+
def chat_submit(message, history):
|
|
462
|
+
history.append(gr.ChatMessage(role="user", content=message))
|
|
463
|
+
return "", history
|
|
464
|
+
|
|
465
|
+
# Cancel function
|
|
466
|
+
async def cancel_agent_task(history):
|
|
467
|
+
global global_agent
|
|
468
|
+
if global_agent:
|
|
469
|
+
print("DEBUG - Cancelling agent task")
|
|
470
|
+
history.append(gr.ChatMessage(role="assistant", content="Task cancelled by user", metadata={"title": "❌ Cancelled"}))
|
|
471
|
+
else:
|
|
472
|
+
history.append(gr.ChatMessage(role="assistant", content="No active agent task to cancel", metadata={"title": "ℹ️ Info"}))
|
|
473
|
+
return history
|
|
474
|
+
|
|
475
|
+
# Process response function
|
|
476
|
+
async def process_response(
|
|
477
|
+
history,
|
|
478
|
+
openai_model_value,
|
|
479
|
+
anthropic_model_value,
|
|
480
|
+
omni_model_value,
|
|
481
|
+
uitars_model_value,
|
|
482
|
+
custom_model_value,
|
|
483
|
+
agent_loop_choice,
|
|
484
|
+
save_traj,
|
|
485
|
+
recent_imgs,
|
|
486
|
+
custom_url_value=None,
|
|
487
|
+
custom_api_key=None,
|
|
488
|
+
openai_key_input=None,
|
|
489
|
+
anthropic_key_input=None,
|
|
490
|
+
computer_os="linux",
|
|
491
|
+
computer_provider="cloud",
|
|
492
|
+
container_name="",
|
|
493
|
+
cua_cloud_api_key="",
|
|
494
|
+
max_budget_value=None,
|
|
495
|
+
):
|
|
496
|
+
if not history:
|
|
497
|
+
yield history
|
|
498
|
+
return
|
|
499
|
+
|
|
500
|
+
# Get the last user message
|
|
501
|
+
last_user_message = history[-1]["content"]
|
|
502
|
+
|
|
503
|
+
# Get the appropriate model value based on the agent loop
|
|
504
|
+
if agent_loop_choice == "OPENAI":
|
|
505
|
+
model_choice_value = openai_model_value
|
|
506
|
+
elif agent_loop_choice == "ANTHROPIC":
|
|
507
|
+
model_choice_value = anthropic_model_value
|
|
508
|
+
elif agent_loop_choice == "OMNI":
|
|
509
|
+
model_choice_value = omni_model_value
|
|
510
|
+
elif agent_loop_choice == "UITARS":
|
|
511
|
+
model_choice_value = uitars_model_value
|
|
512
|
+
else:
|
|
513
|
+
model_choice_value = "No models available"
|
|
514
|
+
|
|
515
|
+
# Determine if this is a custom model selection
|
|
516
|
+
is_custom_model_selected = model_choice_value in ["Custom model (OpenAI compatible API)", "Custom model (ollama)"]
|
|
517
|
+
|
|
518
|
+
# Determine the model name string to analyze
|
|
519
|
+
if is_custom_model_selected:
|
|
520
|
+
model_string_to_analyze = custom_model_value
|
|
521
|
+
else:
|
|
522
|
+
model_string_to_analyze = model_choice_value
|
|
523
|
+
|
|
524
|
+
try:
|
|
525
|
+
# Get the model string
|
|
526
|
+
model_string = get_model_string(model_string_to_analyze, agent_loop_choice)
|
|
527
|
+
|
|
528
|
+
# Set API keys if provided
|
|
529
|
+
if openai_key_input:
|
|
530
|
+
os.environ["OPENAI_API_KEY"] = openai_key_input
|
|
531
|
+
if anthropic_key_input:
|
|
532
|
+
os.environ["ANTHROPIC_API_KEY"] = anthropic_key_input
|
|
533
|
+
if cua_cloud_api_key:
|
|
534
|
+
os.environ["CUA_API_KEY"] = cua_cloud_api_key
|
|
535
|
+
|
|
536
|
+
# Save settings
|
|
537
|
+
current_settings = {
|
|
538
|
+
"agent_loop": agent_loop_choice,
|
|
539
|
+
"model_choice": model_choice_value,
|
|
540
|
+
"custom_model": custom_model_value,
|
|
541
|
+
"provider_base_url": custom_url_value,
|
|
542
|
+
"save_trajectory": save_traj,
|
|
543
|
+
"recent_images": recent_imgs,
|
|
544
|
+
"computer_os": computer_os,
|
|
545
|
+
"computer_provider": computer_provider,
|
|
546
|
+
"container_name": container_name,
|
|
547
|
+
}
|
|
548
|
+
save_settings(current_settings)
|
|
549
|
+
|
|
550
|
+
# Create agent
|
|
551
|
+
global_agent = create_agent(
|
|
552
|
+
model_string=model_string,
|
|
553
|
+
save_trajectory=save_traj,
|
|
554
|
+
only_n_most_recent_images=recent_imgs,
|
|
555
|
+
custom_model_name=custom_model_value if is_custom_model_selected else None,
|
|
556
|
+
computer_os=computer_os,
|
|
557
|
+
computer_provider=computer_provider,
|
|
558
|
+
computer_name=container_name,
|
|
559
|
+
computer_api_key=cua_cloud_api_key,
|
|
560
|
+
verbosity=logging.DEBUG,
|
|
561
|
+
max_trajectory_budget=max_budget_value if max_budget_value and max_budget_value > 0 else None,
|
|
562
|
+
)
|
|
563
|
+
|
|
564
|
+
if global_agent is None:
|
|
565
|
+
history.append(
|
|
566
|
+
gr.ChatMessage(
|
|
567
|
+
role="assistant",
|
|
568
|
+
content="Failed to create agent. Check API keys and configuration.",
|
|
569
|
+
)
|
|
570
|
+
)
|
|
571
|
+
yield history
|
|
572
|
+
return
|
|
573
|
+
|
|
574
|
+
# Create message list for agent
|
|
575
|
+
messages = [{"role": "user", "content": last_user_message}]
|
|
576
|
+
|
|
577
|
+
# Stream responses from the agent
|
|
578
|
+
async for result in global_agent.run(messages):
|
|
579
|
+
print(f"DEBUG - Agent response ------- START")
|
|
580
|
+
from pprint import pprint
|
|
581
|
+
pprint(result)
|
|
582
|
+
print(f"DEBUG - Agent response ------- END")
|
|
583
|
+
|
|
584
|
+
# Process the result output
|
|
585
|
+
for item in result.get("output", []):
|
|
586
|
+
if item.get("type") == "message":
|
|
587
|
+
content = item.get("content", [])
|
|
588
|
+
for content_part in content:
|
|
589
|
+
if content_part.get("text"):
|
|
590
|
+
history.append(gr.ChatMessage(
|
|
591
|
+
role=item.get("role", "assistant"),
|
|
592
|
+
content=content_part.get("text", ""),
|
|
593
|
+
metadata=content_part.get("metadata", {})
|
|
594
|
+
))
|
|
595
|
+
elif item.get("type") == "computer_call":
|
|
596
|
+
action = item.get("action", {})
|
|
597
|
+
action_type = action.get("type", "")
|
|
598
|
+
if action_type:
|
|
599
|
+
action_title = f"🛠️ Performing {action_type}"
|
|
600
|
+
if action.get("x") and action.get("y"):
|
|
601
|
+
action_title += f" at ({action['x']}, {action['y']})"
|
|
602
|
+
history.append(gr.ChatMessage(
|
|
603
|
+
role="assistant",
|
|
604
|
+
content=f"```json\n{json.dumps(action)}\n```",
|
|
605
|
+
metadata={"title": action_title}
|
|
606
|
+
))
|
|
607
|
+
elif item.get("type") == "function_call":
|
|
608
|
+
function_name = item.get("name", "")
|
|
609
|
+
arguments = item.get("arguments", "{}")
|
|
610
|
+
history.append(gr.ChatMessage(
|
|
611
|
+
role="assistant",
|
|
612
|
+
content=f"🔧 Calling function: {function_name}\n```json\n{arguments}\n```",
|
|
613
|
+
metadata={"title": f"Function Call: {function_name}"}
|
|
614
|
+
))
|
|
615
|
+
elif item.get("type") == "function_call_output":
|
|
616
|
+
output = item.get("output", "")
|
|
617
|
+
history.append(gr.ChatMessage(
|
|
618
|
+
role="assistant",
|
|
619
|
+
content=f"📤 Function output:\n```\n{output}\n```",
|
|
620
|
+
metadata={"title": "Function Output"}
|
|
621
|
+
))
|
|
622
|
+
|
|
623
|
+
yield history
|
|
624
|
+
|
|
625
|
+
except Exception as e:
|
|
626
|
+
import traceback
|
|
627
|
+
traceback.print_exc()
|
|
628
|
+
history.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}"))
|
|
629
|
+
yield history
|
|
630
|
+
|
|
631
|
+
# Connect the submit button
|
|
632
|
+
submit_event = msg.submit(
|
|
633
|
+
fn=chat_submit,
|
|
634
|
+
inputs=[msg, chatbot_history],
|
|
635
|
+
outputs=[msg, chatbot_history],
|
|
636
|
+
queue=False,
|
|
637
|
+
).then(
|
|
638
|
+
fn=process_response,
|
|
639
|
+
inputs=[
|
|
640
|
+
chatbot_history,
|
|
641
|
+
openai_model_choice,
|
|
642
|
+
anthropic_model_choice,
|
|
643
|
+
omni_model_choice,
|
|
644
|
+
uitars_model_choice,
|
|
645
|
+
custom_model,
|
|
646
|
+
agent_loop,
|
|
647
|
+
save_trajectory,
|
|
648
|
+
recent_images,
|
|
649
|
+
provider_base_url,
|
|
650
|
+
provider_api_key,
|
|
651
|
+
openai_api_key_input,
|
|
652
|
+
anthropic_api_key_input,
|
|
653
|
+
computer_os,
|
|
654
|
+
computer_provider,
|
|
655
|
+
container_name,
|
|
656
|
+
cua_cloud_api_key,
|
|
657
|
+
max_budget,
|
|
658
|
+
],
|
|
659
|
+
outputs=[chatbot_history],
|
|
660
|
+
queue=True,
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
# Clear button functionality
|
|
664
|
+
clear.click(lambda: None, None, chatbot_history, queue=False)
|
|
665
|
+
|
|
666
|
+
# Connect cancel button
|
|
667
|
+
cancel_button.click(
|
|
668
|
+
cancel_agent_task,
|
|
669
|
+
[chatbot_history],
|
|
670
|
+
[chatbot_history],
|
|
671
|
+
queue=False
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
# Code display update function
|
|
675
|
+
def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, recent_images_val, save_trajectory_val, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget_val):
|
|
676
|
+
messages = []
|
|
677
|
+
if chat_history:
|
|
678
|
+
for msg in chat_history:
|
|
679
|
+
if isinstance(msg, dict) and msg.get("role") == "user":
|
|
680
|
+
messages.append(msg.get("content", ""))
|
|
681
|
+
|
|
682
|
+
return generate_python_code(
|
|
683
|
+
agent_loop,
|
|
684
|
+
model_choice_val or custom_model_val or "gpt-4o",
|
|
685
|
+
messages,
|
|
686
|
+
recent_images_val,
|
|
687
|
+
save_trajectory_val,
|
|
688
|
+
computer_os,
|
|
689
|
+
computer_provider,
|
|
690
|
+
container_name,
|
|
691
|
+
cua_cloud_api_key,
|
|
692
|
+
max_budget_val
|
|
693
|
+
)
|
|
694
|
+
|
|
695
|
+
# Update code display when configuration changes
|
|
696
|
+
for component in [agent_loop, model_choice, custom_model, chatbot_history, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget]:
|
|
697
|
+
component.change(
|
|
698
|
+
update_code_display,
|
|
699
|
+
inputs=[agent_loop, model_choice, custom_model, chatbot_history, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key, max_budget],
|
|
700
|
+
outputs=[code_display]
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
return demo
|