cua-agent 0.1.24__py3-none-any.whl → 0.1.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/core/base.py +20 -0
- agent/core/callbacks.py +57 -2
- agent/providers/anthropic/callbacks/manager.py +20 -10
- agent/providers/omni/clients/oaicompat.py +11 -3
- agent/providers/omni/loop.py +24 -4
- agent/providers/openai/loop.py +13 -4
- agent/ui/gradio/app.py +429 -329
- {cua_agent-0.1.24.dist-info → cua_agent-0.1.26.dist-info}/METADATA +37 -23
- {cua_agent-0.1.24.dist-info → cua_agent-0.1.26.dist-info}/RECORD +11 -11
- {cua_agent-0.1.24.dist-info → cua_agent-0.1.26.dist-info}/WHEEL +1 -1
- {cua_agent-0.1.24.dist-info → cua_agent-0.1.26.dist-info}/entry_points.txt +0 -0
agent/ui/gradio/app.py
CHANGED
|
@@ -30,11 +30,16 @@ Requirements:
|
|
|
30
30
|
import os
|
|
31
31
|
import asyncio
|
|
32
32
|
import logging
|
|
33
|
+
import json
|
|
34
|
+
from pathlib import Path
|
|
33
35
|
from typing import Dict, List, Optional, AsyncGenerator, Any, Tuple, Union
|
|
34
36
|
import gradio as gr
|
|
37
|
+
from gradio.components.chatbot import MetadataDict
|
|
35
38
|
|
|
36
39
|
# Import from agent package
|
|
37
40
|
from agent.core.types import AgentResponse
|
|
41
|
+
from agent.core.callbacks import DefaultCallbackHandler
|
|
42
|
+
from agent.providers.omni.parser import ParseResult
|
|
38
43
|
from computer import Computer
|
|
39
44
|
|
|
40
45
|
from agent import ComputerAgent, AgentLoop, LLM, LLMProvider
|
|
@@ -42,6 +47,86 @@ from agent import ComputerAgent, AgentLoop, LLM, LLMProvider
|
|
|
42
47
|
# Global variables
|
|
43
48
|
global_agent = None
|
|
44
49
|
global_computer = None
|
|
50
|
+
SETTINGS_FILE = Path(".gradio_settings.json")
|
|
51
|
+
|
|
52
|
+
# We'll use asyncio.run() instead of a persistent event loop
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# --- Settings Load/Save Functions ---
|
|
56
|
+
def load_settings() -> Dict[str, Any]:
|
|
57
|
+
"""Loads settings from the JSON file."""
|
|
58
|
+
if SETTINGS_FILE.exists():
|
|
59
|
+
try:
|
|
60
|
+
with open(SETTINGS_FILE, "r") as f:
|
|
61
|
+
settings = json.load(f)
|
|
62
|
+
# Basic validation (can be expanded)
|
|
63
|
+
if isinstance(settings, dict):
|
|
64
|
+
print(f"DEBUG - Loaded settings from {SETTINGS_FILE}")
|
|
65
|
+
return settings
|
|
66
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
67
|
+
print(f"Warning: Could not load settings from {SETTINGS_FILE}: {e}")
|
|
68
|
+
return {}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def save_settings(settings: Dict[str, Any]):
|
|
72
|
+
"""Saves settings to the JSON file."""
|
|
73
|
+
# Ensure sensitive keys are not saved
|
|
74
|
+
settings.pop("provider_api_key", None)
|
|
75
|
+
try:
|
|
76
|
+
with open(SETTINGS_FILE, "w") as f:
|
|
77
|
+
json.dump(settings, f, indent=4)
|
|
78
|
+
print(f"DEBUG - Saved settings to {SETTINGS_FILE}")
|
|
79
|
+
except IOError as e:
|
|
80
|
+
print(f"Warning: Could not save settings to {SETTINGS_FILE}: {e}")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# --- End Settings Load/Save ---
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# Custom Screenshot Handler for Gradio chat
|
|
87
|
+
class GradioChatScreenshotHandler(DefaultCallbackHandler):
|
|
88
|
+
"""Custom handler that adds screenshots to the Gradio chatbot and updates annotated image."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, chatbot_history: List[gr.ChatMessage]):
|
|
91
|
+
"""Initialize with reference to chat history and annotated image component.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
chatbot_history: Reference to the Gradio chatbot history list
|
|
95
|
+
annotated_image: Reference to the annotated image component
|
|
96
|
+
"""
|
|
97
|
+
self.chatbot_history = chatbot_history
|
|
98
|
+
print("GradioChatScreenshotHandler initialized")
|
|
99
|
+
|
|
100
|
+
async def on_screenshot(
|
|
101
|
+
self,
|
|
102
|
+
screenshot_base64: str,
|
|
103
|
+
action_type: str = "",
|
|
104
|
+
parsed_screen: Optional[ParseResult] = None,
|
|
105
|
+
) -> None:
|
|
106
|
+
"""Add screenshot to chatbot when a screenshot is taken and update the annotated image.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
screenshot_base64: Base64 encoded screenshot
|
|
110
|
+
action_type: Type of action that triggered the screenshot
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Original screenshot (does not modify it)
|
|
114
|
+
"""
|
|
115
|
+
# Create a markdown image element for the screenshot
|
|
116
|
+
image_markdown = (
|
|
117
|
+
f""
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Simply append the screenshot as a new message
|
|
121
|
+
if self.chatbot_history is not None:
|
|
122
|
+
self.chatbot_history.append(
|
|
123
|
+
gr.ChatMessage(
|
|
124
|
+
role="assistant",
|
|
125
|
+
content=image_markdown,
|
|
126
|
+
metadata={"title": f"🖥️ Screenshot - {action_type}", "status": "done"},
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
|
|
45
130
|
|
|
46
131
|
# Map model names to specific provider model names
|
|
47
132
|
MODEL_MAPPINGS = {
|
|
@@ -53,6 +138,7 @@ MODEL_MAPPINGS = {
|
|
|
53
138
|
"gpt-4o": "computer_use_preview",
|
|
54
139
|
"gpt-4": "computer_use_preview",
|
|
55
140
|
"gpt-4.5-preview": "computer_use_preview",
|
|
141
|
+
"gpt-4o-mini": "gpt-4o-mini",
|
|
56
142
|
},
|
|
57
143
|
"anthropic": {
|
|
58
144
|
# Default to newest model
|
|
@@ -70,6 +156,7 @@ MODEL_MAPPINGS = {
|
|
|
70
156
|
# OMNI works with any of these models
|
|
71
157
|
"default": "gpt-4o",
|
|
72
158
|
"gpt-4o": "gpt-4o",
|
|
159
|
+
"gpt-4o-mini": "gpt-4o-mini",
|
|
73
160
|
"gpt-4": "gpt-4",
|
|
74
161
|
"gpt-4.5-preview": "gpt-4.5-preview",
|
|
75
162
|
"claude-3-5-sonnet-20240620": "claude-3-5-sonnet-20240620",
|
|
@@ -119,30 +206,82 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
|
|
|
119
206
|
model_name.lower(), MODEL_MAPPINGS["anthropic"]["default"]
|
|
120
207
|
)
|
|
121
208
|
elif agent_loop == AgentLoop.OMNI:
|
|
122
|
-
#
|
|
123
|
-
|
|
209
|
+
# Determine provider and clean model name based on the full string from UI
|
|
210
|
+
cleaned_model_name = model_name # Default to using the name as-is (for custom)
|
|
211
|
+
|
|
212
|
+
if model_name == "Custom model...":
|
|
213
|
+
# Actual model name comes from custom_model_value via model_to_use.
|
|
214
|
+
# Assume OAICOMPAT for custom models unless overridden by URL/key later?
|
|
215
|
+
# get_provider_and_model determines the *initial* provider/model.
|
|
216
|
+
# The custom URL/key in process_response ultimately dictates the OAICOMPAT setup.
|
|
217
|
+
provider = LLMProvider.OAICOMPAT
|
|
218
|
+
# We set cleaned_model_name below outside the checks based on model_to_use
|
|
219
|
+
cleaned_model_name = "" # Placeholder, will be set by custom value later
|
|
220
|
+
elif model_name.startswith("OMNI: Ollama "):
|
|
124
221
|
provider = LLMProvider.OLLAMA
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
model_name_to_use = model_name
|
|
129
|
-
elif "claude" in model_name.lower():
|
|
222
|
+
# Extract the part after "OMNI: Ollama "
|
|
223
|
+
cleaned_model_name = model_name.split("OMNI: Ollama ", 1)[1]
|
|
224
|
+
elif model_name.startswith("OMNI: Claude "):
|
|
130
225
|
provider = LLMProvider.ANTHROPIC
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
)
|
|
134
|
-
|
|
226
|
+
# Extract the canonical model name based on the UI string
|
|
227
|
+
# e.g., "OMNI: Claude 3.7 Sonnet (20250219)" -> "3.7 Sonnet" and "20250219"
|
|
228
|
+
parts = model_name.split(" (")
|
|
229
|
+
model_key_part = parts[0].replace("OMNI: Claude ", "")
|
|
230
|
+
date_part = parts[1].replace(")", "") if len(parts) > 1 else ""
|
|
231
|
+
|
|
232
|
+
# Normalize the extracted key part for comparison
|
|
233
|
+
# "3.7 Sonnet" -> "37sonnet"
|
|
234
|
+
model_key_part_norm = model_key_part.lower().replace(".", "").replace(" ", "")
|
|
235
|
+
|
|
236
|
+
cleaned_model_name = MODEL_MAPPINGS["omni"]["default"] # Default if not found
|
|
237
|
+
# Find the canonical name in the main Anthropic map
|
|
238
|
+
for key_anthropic, val_anthropic in MODEL_MAPPINGS["anthropic"].items():
|
|
239
|
+
# Normalize the canonical key for comparison
|
|
240
|
+
# "claude-3-7-sonnet-20250219" -> "claude37sonnet20250219"
|
|
241
|
+
key_anthropic_norm = key_anthropic.lower().replace("-", "")
|
|
242
|
+
|
|
243
|
+
# Check if the normalized canonical key starts with "claude" + normalized extracted part
|
|
244
|
+
# AND contains the date part.
|
|
245
|
+
if (
|
|
246
|
+
key_anthropic_norm.startswith("claude" + model_key_part_norm)
|
|
247
|
+
and date_part in key_anthropic_norm
|
|
248
|
+
):
|
|
249
|
+
cleaned_model_name = (
|
|
250
|
+
val_anthropic # Use the canonical name like "claude-3-7-sonnet-20250219"
|
|
251
|
+
)
|
|
252
|
+
break
|
|
253
|
+
elif model_name.startswith("OMNI: OpenAI "):
|
|
135
254
|
provider = LLMProvider.OPENAI
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
255
|
+
# Extract the model part, e.g., "GPT-4o mini"
|
|
256
|
+
model_key_part = model_name.replace("OMNI: OpenAI ", "")
|
|
257
|
+
# Normalize the extracted part: "gpt4omini"
|
|
258
|
+
model_key_part_norm = model_key_part.lower().replace("-", "").replace(" ", "")
|
|
259
|
+
|
|
260
|
+
cleaned_model_name = MODEL_MAPPINGS["omni"]["default"] # Default if not found
|
|
261
|
+
# Find the canonical name in the main OMNI map for OpenAI models
|
|
262
|
+
for key_omni, val_omni in MODEL_MAPPINGS["omni"].items():
|
|
263
|
+
# Normalize the omni map key: "gpt-4o-mini" -> "gpt4omini"
|
|
264
|
+
key_omni_norm = key_omni.lower().replace("-", "").replace(" ", "")
|
|
265
|
+
# Check if the normalized omni key matches the normalized extracted part
|
|
266
|
+
if key_omni_norm == model_key_part_norm:
|
|
267
|
+
cleaned_model_name = (
|
|
268
|
+
val_omni # Use the value from the OMNI map (e.g., gpt-4o-mini)
|
|
269
|
+
)
|
|
270
|
+
break
|
|
271
|
+
# Note: No fallback needed here as we explicitly check against omni keys
|
|
272
|
+
|
|
273
|
+
else: # Handles unexpected formats or the raw custom name if "Custom model..." selected
|
|
274
|
+
# Should only happen if user selected "Custom model..."
|
|
275
|
+
# Or if a model name format isn't caught above
|
|
141
276
|
provider = LLMProvider.OAICOMPAT
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
model_name if model_name.strip() else MODEL_MAPPINGS["oaicompat"]["default"]
|
|
277
|
+
cleaned_model_name = (
|
|
278
|
+
model_name.strip() if model_name.strip() else MODEL_MAPPINGS["oaicompat"]["default"]
|
|
145
279
|
)
|
|
280
|
+
|
|
281
|
+
# Assign the determined model name
|
|
282
|
+
model_name_to_use = cleaned_model_name
|
|
283
|
+
# agent_loop remains AgentLoop.OMNI
|
|
284
|
+
|
|
146
285
|
else:
|
|
147
286
|
# Default to OpenAI if unrecognized loop
|
|
148
287
|
provider = LLMProvider.OPENAI
|
|
@@ -177,17 +316,20 @@ def get_ollama_models() -> List[str]:
|
|
|
177
316
|
return []
|
|
178
317
|
|
|
179
318
|
|
|
180
|
-
def extract_synthesized_text(
|
|
319
|
+
def extract_synthesized_text(
|
|
320
|
+
result: Union[AgentResponse, Dict[str, Any]],
|
|
321
|
+
) -> Tuple[str, MetadataDict]:
|
|
181
322
|
"""Extract synthesized text from the agent result."""
|
|
182
323
|
synthesized_text = ""
|
|
324
|
+
metadata = MetadataDict()
|
|
183
325
|
|
|
184
326
|
if "output" in result and result["output"]:
|
|
185
327
|
for output in result["output"]:
|
|
186
328
|
if output.get("type") == "reasoning":
|
|
329
|
+
metadata["title"] = "🧠 Reasoning"
|
|
187
330
|
content = output.get("content", "")
|
|
188
331
|
if content:
|
|
189
332
|
synthesized_text += f"{content}\n"
|
|
190
|
-
|
|
191
333
|
elif output.get("type") == "message":
|
|
192
334
|
# Handle message type outputs - can contain rich content
|
|
193
335
|
content = output.get("content", [])
|
|
@@ -224,7 +366,10 @@ def extract_synthesized_text(result: Union[AgentResponse, Dict[str, Any]]) -> st
|
|
|
224
366
|
else:
|
|
225
367
|
synthesized_text += f"Performed {action_type} action.\n"
|
|
226
368
|
|
|
227
|
-
|
|
369
|
+
metadata["status"] = "done"
|
|
370
|
+
metadata["title"] = f"🛠️ {synthesized_text.strip().splitlines()[-1]}"
|
|
371
|
+
|
|
372
|
+
return synthesized_text.strip(), metadata
|
|
228
373
|
|
|
229
374
|
|
|
230
375
|
def create_computer_instance(verbosity: int = logging.INFO) -> Computer:
|
|
@@ -245,8 +390,8 @@ def create_agent(
|
|
|
245
390
|
save_trajectory: bool = True,
|
|
246
391
|
only_n_most_recent_images: int = 3,
|
|
247
392
|
verbosity: int = logging.INFO,
|
|
248
|
-
use_ollama: bool = False,
|
|
249
393
|
use_oaicompat: bool = False,
|
|
394
|
+
provider_base_url: Optional[str] = None,
|
|
250
395
|
) -> ComputerAgent:
|
|
251
396
|
"""Create or update the global agent with the specified parameters."""
|
|
252
397
|
global global_agent
|
|
@@ -254,15 +399,6 @@ def create_agent(
|
|
|
254
399
|
# Create the computer if not already done
|
|
255
400
|
computer = create_computer_instance(verbosity=verbosity)
|
|
256
401
|
|
|
257
|
-
# Extra configuration to pass to the agent
|
|
258
|
-
extra_config = {}
|
|
259
|
-
|
|
260
|
-
# For Ollama models, we'll pass use_ollama and the model_name directly
|
|
261
|
-
if use_ollama:
|
|
262
|
-
extra_config["use_ollama"] = True
|
|
263
|
-
extra_config["ollama_model"] = model_name
|
|
264
|
-
print(f"DEBUG - Using Ollama with model: {model_name}")
|
|
265
|
-
|
|
266
402
|
# Get API key from environment if not provided
|
|
267
403
|
if api_key is None:
|
|
268
404
|
if provider == LLMProvider.OPENAI:
|
|
@@ -270,69 +406,52 @@ def create_agent(
|
|
|
270
406
|
elif provider == LLMProvider.ANTHROPIC:
|
|
271
407
|
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
272
408
|
|
|
273
|
-
#
|
|
274
|
-
|
|
409
|
+
# Use provided provider_base_url if available, otherwise use default
|
|
410
|
+
default_base_url = "http://localhost:1234/v1" if use_oaicompat else None
|
|
411
|
+
custom_base_url = provider_base_url or default_base_url
|
|
275
412
|
|
|
276
413
|
if use_oaicompat:
|
|
277
|
-
# Special handling for OAICOMPAT - use
|
|
278
|
-
print(f"DEBUG - Creating OAICOMPAT agent with model: {model_name}")
|
|
414
|
+
# Special handling for OAICOMPAT - use OAICOMPAT provider with custom base URL
|
|
415
|
+
print(f"DEBUG - Creating OAICOMPAT agent with model: {model_name}, URL: {custom_base_url}")
|
|
279
416
|
llm = LLM(
|
|
280
|
-
provider=
|
|
417
|
+
provider=LLMProvider.OAICOMPAT, # Set to OAICOMPAT instead of using original provider
|
|
281
418
|
name=model_name,
|
|
282
|
-
provider_base_url=
|
|
419
|
+
provider_base_url=custom_base_url,
|
|
283
420
|
)
|
|
421
|
+
print(f"DEBUG - LLM provider is now: {llm.provider}, base URL: {llm.provider_base_url}")
|
|
284
422
|
# Note: Don't pass use_oaicompat to the agent, as it doesn't accept this parameter
|
|
285
423
|
elif provider == LLMProvider.OAICOMPAT:
|
|
286
424
|
# This path is unlikely to be taken with our current approach
|
|
287
|
-
llm = LLM(provider=provider, name=model_name, provider_base_url=
|
|
425
|
+
llm = LLM(provider=provider, name=model_name, provider_base_url=custom_base_url)
|
|
288
426
|
else:
|
|
289
427
|
# For other providers, just use standard parameters
|
|
290
428
|
llm = LLM(provider=provider, name=model_name)
|
|
291
429
|
|
|
292
430
|
# Create or update the agent
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
**extra_config,
|
|
303
|
-
)
|
|
304
|
-
else:
|
|
305
|
-
# Update the existing agent's parameters
|
|
306
|
-
global_agent._loop = None # Force recreation of the loop
|
|
307
|
-
global_agent.provider = provider
|
|
308
|
-
global_agent.loop = agent_loop
|
|
309
|
-
global_agent.model = llm
|
|
310
|
-
global_agent.api_key = api_key
|
|
311
|
-
|
|
312
|
-
# Explicitly update these settings to ensure they take effect
|
|
313
|
-
global_agent.save_trajectory = save_trajectory
|
|
314
|
-
global_agent.only_n_most_recent_images = only_n_most_recent_images
|
|
315
|
-
|
|
316
|
-
# Update Ollama settings if applicable
|
|
317
|
-
if use_ollama:
|
|
318
|
-
global_agent.use_ollama = True
|
|
319
|
-
global_agent.ollama_model = model_name
|
|
320
|
-
else:
|
|
321
|
-
global_agent.use_ollama = False
|
|
322
|
-
global_agent.ollama_model = None
|
|
323
|
-
|
|
324
|
-
# Log the updated settings
|
|
325
|
-
logging.info(
|
|
326
|
-
f"Updated agent settings: save_trajectory={save_trajectory}, recent_images={only_n_most_recent_images}"
|
|
327
|
-
)
|
|
431
|
+
global_agent = ComputerAgent(
|
|
432
|
+
computer=computer,
|
|
433
|
+
loop=agent_loop,
|
|
434
|
+
model=llm,
|
|
435
|
+
api_key=api_key,
|
|
436
|
+
save_trajectory=save_trajectory,
|
|
437
|
+
only_n_most_recent_images=only_n_most_recent_images,
|
|
438
|
+
verbosity=verbosity,
|
|
439
|
+
)
|
|
328
440
|
|
|
329
441
|
return global_agent
|
|
330
442
|
|
|
331
443
|
|
|
332
|
-
def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> str:
|
|
444
|
+
def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> Tuple[str, MetadataDict]:
|
|
333
445
|
"""Process agent results for the Gradio UI."""
|
|
334
446
|
# Extract text content
|
|
335
447
|
text_obj = result.get("text", {})
|
|
448
|
+
metadata = result.get("metadata", {})
|
|
449
|
+
|
|
450
|
+
# Create a properly typed MetadataDict
|
|
451
|
+
metadata_dict = MetadataDict()
|
|
452
|
+
metadata_dict["title"] = metadata.get("title", "")
|
|
453
|
+
metadata_dict["status"] = "done"
|
|
454
|
+
metadata = metadata_dict
|
|
336
455
|
|
|
337
456
|
# For OpenAI's Computer-Use Agent, text field is an object with format property
|
|
338
457
|
if (
|
|
@@ -341,8 +460,11 @@ def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> str:
|
|
|
341
460
|
and "format" in text_obj
|
|
342
461
|
and not text_obj.get("value", "")
|
|
343
462
|
):
|
|
344
|
-
content = extract_synthesized_text(result)
|
|
463
|
+
content, metadata = extract_synthesized_text(result)
|
|
345
464
|
else:
|
|
465
|
+
if not text_obj:
|
|
466
|
+
text_obj = result
|
|
467
|
+
|
|
346
468
|
# For other types of results, try to get text directly
|
|
347
469
|
if isinstance(text_obj, dict):
|
|
348
470
|
if "value" in text_obj:
|
|
@@ -375,177 +497,7 @@ def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> str:
|
|
|
375
497
|
if not isinstance(content, str):
|
|
376
498
|
content = str(content) if content else ""
|
|
377
499
|
|
|
378
|
-
return content
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
def respond(
|
|
382
|
-
message: str,
|
|
383
|
-
history: List[Tuple[str, str]],
|
|
384
|
-
model_choice, # Accept Gradio Dropdown component
|
|
385
|
-
agent_loop, # Accept Gradio Dropdown component
|
|
386
|
-
save_trajectory, # Accept Gradio Checkbox component
|
|
387
|
-
recent_images, # Accept Gradio Slider component
|
|
388
|
-
openai_api_key: Optional[str] = None,
|
|
389
|
-
anthropic_api_key: Optional[str] = None,
|
|
390
|
-
) -> str:
|
|
391
|
-
"""Process a message with the Computer-Use Agent and return the response."""
|
|
392
|
-
import asyncio
|
|
393
|
-
|
|
394
|
-
# Get actual values from Gradio components
|
|
395
|
-
model_choice_value = model_choice.value if hasattr(model_choice, "value") else model_choice
|
|
396
|
-
agent_loop_value = agent_loop.value if hasattr(agent_loop, "value") else agent_loop
|
|
397
|
-
save_trajectory_value = (
|
|
398
|
-
save_trajectory.value if hasattr(save_trajectory, "value") else save_trajectory
|
|
399
|
-
)
|
|
400
|
-
recent_images_value = int(
|
|
401
|
-
recent_images.value if hasattr(recent_images, "value") else recent_images
|
|
402
|
-
)
|
|
403
|
-
|
|
404
|
-
# Debug logging
|
|
405
|
-
print(f"DEBUG - Model choice object: {type(model_choice)}")
|
|
406
|
-
print(f"DEBUG - Model choice value: {model_choice_value}")
|
|
407
|
-
print(f"DEBUG - Agent loop value: {agent_loop_value}")
|
|
408
|
-
|
|
409
|
-
# Create a new event loop for this function call
|
|
410
|
-
loop = asyncio.new_event_loop()
|
|
411
|
-
asyncio.set_event_loop(loop)
|
|
412
|
-
|
|
413
|
-
async def _async_respond():
|
|
414
|
-
# Extract the loop type and model from the selection
|
|
415
|
-
loop_provider = "OPENAI"
|
|
416
|
-
if isinstance(model_choice_value, str):
|
|
417
|
-
# This is the case for a custom text input from textbox
|
|
418
|
-
if agent_loop_value == "OMNI":
|
|
419
|
-
loop_provider = "OMNI"
|
|
420
|
-
# Use the custom model name as is
|
|
421
|
-
model_id = model_choice_value
|
|
422
|
-
print(f"DEBUG - Using custom model: {model_id}")
|
|
423
|
-
else:
|
|
424
|
-
# Handle regular dropdown value as string
|
|
425
|
-
if model_choice_value.startswith("OpenAI:"):
|
|
426
|
-
loop_provider = "OPENAI"
|
|
427
|
-
model_id = model_choice_value.replace("OpenAI: ", "").lower()
|
|
428
|
-
elif model_choice_value.startswith("Anthropic:"):
|
|
429
|
-
loop_provider = "ANTHROPIC"
|
|
430
|
-
model_id = model_choice_value.replace("Anthropic: ", "").lower()
|
|
431
|
-
elif model_choice_value.startswith("OMNI:"):
|
|
432
|
-
loop_provider = "OMNI"
|
|
433
|
-
if "GPT" in model_choice_value:
|
|
434
|
-
model_id = model_choice_value.replace("OMNI: OpenAI ", "").lower()
|
|
435
|
-
elif "Claude" in model_choice_value:
|
|
436
|
-
model_id = model_choice_value.replace("OMNI: ", "").lower()
|
|
437
|
-
elif "Ollama" in model_choice_value:
|
|
438
|
-
loop_provider = "OMNI-OLLAMA"
|
|
439
|
-
# Extract everything after "OMNI: Ollama " which is the full model name (e.g., phi3:latest)
|
|
440
|
-
model_id = model_choice_value.replace("OMNI: Ollama ", "")
|
|
441
|
-
print(f"DEBUG - Ollama model ID: {model_id}")
|
|
442
|
-
else:
|
|
443
|
-
model_id = "default"
|
|
444
|
-
else:
|
|
445
|
-
# Default case
|
|
446
|
-
loop_provider = agent_loop_value
|
|
447
|
-
model_id = "default"
|
|
448
|
-
else:
|
|
449
|
-
# Model choice is not a string (shouldn't happen, but handle anyway)
|
|
450
|
-
loop_provider = agent_loop_value
|
|
451
|
-
model_id = "default"
|
|
452
|
-
|
|
453
|
-
print(f"DEBUG - Using loop provider: {loop_provider}, model_id: {model_id}")
|
|
454
|
-
|
|
455
|
-
# Use the mapping function to get provider, model name and agent loop
|
|
456
|
-
provider, model_name, agent_loop_type = get_provider_and_model(model_id, loop_provider)
|
|
457
|
-
print(
|
|
458
|
-
f"DEBUG - After mapping: provider={provider}, model_name={model_name}, agent_loop={agent_loop_type}"
|
|
459
|
-
)
|
|
460
|
-
|
|
461
|
-
# Special handling for OAICOMPAT to bypass provider-specific errors
|
|
462
|
-
# Creates the agent with OPENAI provider but using custom model name and provider base URL
|
|
463
|
-
is_oaicompat = str(provider) == "oaicompat"
|
|
464
|
-
if is_oaicompat:
|
|
465
|
-
provider = LLMProvider.OPENAI
|
|
466
|
-
|
|
467
|
-
# Get API key based on provider
|
|
468
|
-
if provider == LLMProvider.OPENAI:
|
|
469
|
-
api_key = openai_api_key or os.environ.get("OPENAI_API_KEY", "")
|
|
470
|
-
elif provider == LLMProvider.ANTHROPIC:
|
|
471
|
-
api_key = anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
|
|
472
|
-
else:
|
|
473
|
-
api_key = ""
|
|
474
|
-
|
|
475
|
-
# Check for settings changes if agent already exists
|
|
476
|
-
settings_changed = False
|
|
477
|
-
settings_message = ""
|
|
478
|
-
if global_agent is not None:
|
|
479
|
-
# Safely check if save_trajectory setting changed
|
|
480
|
-
current_save_traj = getattr(global_agent, "save_trajectory", None)
|
|
481
|
-
if current_save_traj is not None and current_save_traj != save_trajectory_value:
|
|
482
|
-
settings_changed = True
|
|
483
|
-
settings_message += f"Save trajectory set to: {save_trajectory_value}. "
|
|
484
|
-
|
|
485
|
-
# Safely check if recent_images setting changed
|
|
486
|
-
current_recent_images = getattr(global_agent, "only_n_most_recent_images", None)
|
|
487
|
-
if current_recent_images is not None and current_recent_images != recent_images_value:
|
|
488
|
-
settings_changed = True
|
|
489
|
-
settings_message += f"Recent images set to: {recent_images_value}. "
|
|
490
|
-
|
|
491
|
-
# Create or update the agent
|
|
492
|
-
try:
|
|
493
|
-
create_agent(
|
|
494
|
-
provider=provider,
|
|
495
|
-
agent_loop=agent_loop_type,
|
|
496
|
-
model_name=model_name,
|
|
497
|
-
api_key=api_key,
|
|
498
|
-
save_trajectory=save_trajectory_value,
|
|
499
|
-
only_n_most_recent_images=recent_images_value,
|
|
500
|
-
use_ollama=loop_provider == "OMNI-OLLAMA",
|
|
501
|
-
use_oaicompat=is_oaicompat,
|
|
502
|
-
)
|
|
503
|
-
|
|
504
|
-
if global_agent is None:
|
|
505
|
-
return "Failed to create agent. Check API keys and configuration."
|
|
506
|
-
except Exception as e:
|
|
507
|
-
return f"Error creating agent: {str(e)}"
|
|
508
|
-
|
|
509
|
-
# Notify about settings changes if needed
|
|
510
|
-
if settings_changed:
|
|
511
|
-
return f"Settings updated: {settings_message}"
|
|
512
|
-
|
|
513
|
-
# Collect all responses
|
|
514
|
-
response_text = []
|
|
515
|
-
|
|
516
|
-
# Run the agent
|
|
517
|
-
try:
|
|
518
|
-
async for result in global_agent.run(message):
|
|
519
|
-
# Process result
|
|
520
|
-
content = process_agent_result(result)
|
|
521
|
-
|
|
522
|
-
# Skip empty content
|
|
523
|
-
if not content:
|
|
524
|
-
continue
|
|
525
|
-
|
|
526
|
-
# Add content to response list
|
|
527
|
-
response_text.append(content)
|
|
528
|
-
|
|
529
|
-
# Return the full response as a single string
|
|
530
|
-
return "\n".join(response_text) if response_text else "Task completed."
|
|
531
|
-
|
|
532
|
-
except Exception as e:
|
|
533
|
-
import traceback
|
|
534
|
-
|
|
535
|
-
traceback.print_exc()
|
|
536
|
-
return f"Error: {str(e)}"
|
|
537
|
-
|
|
538
|
-
# Run the async function and get the result
|
|
539
|
-
try:
|
|
540
|
-
result = loop.run_until_complete(_async_respond())
|
|
541
|
-
loop.close()
|
|
542
|
-
return result
|
|
543
|
-
except Exception as e:
|
|
544
|
-
loop.close()
|
|
545
|
-
import traceback
|
|
546
|
-
|
|
547
|
-
traceback.print_exc()
|
|
548
|
-
return f"Error executing async operation: {str(e)}"
|
|
500
|
+
return content, metadata
|
|
549
501
|
|
|
550
502
|
|
|
551
503
|
def create_gradio_ui(
|
|
@@ -561,6 +513,10 @@ def create_gradio_ui(
|
|
|
561
513
|
Returns:
|
|
562
514
|
A Gradio Blocks application
|
|
563
515
|
"""
|
|
516
|
+
# --- Load Settings ---
|
|
517
|
+
saved_settings = load_settings()
|
|
518
|
+
# --- End Load Settings ---
|
|
519
|
+
|
|
564
520
|
# Check for API keys
|
|
565
521
|
openai_api_key = os.environ.get("OPENAI_API_KEY", "")
|
|
566
522
|
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
@@ -574,6 +530,7 @@ def create_gradio_ui(
|
|
|
574
530
|
openai_models = ["OpenAI: Computer-Use Preview"]
|
|
575
531
|
omni_models += [
|
|
576
532
|
"OMNI: OpenAI GPT-4o",
|
|
533
|
+
"OMNI: OpenAI GPT-4o mini",
|
|
577
534
|
"OMNI: OpenAI GPT-4.5-preview",
|
|
578
535
|
]
|
|
579
536
|
|
|
@@ -596,21 +553,33 @@ def create_gradio_ui(
|
|
|
596
553
|
"OMNI": omni_models + ["Custom model..."], # Add custom model option
|
|
597
554
|
}
|
|
598
555
|
|
|
599
|
-
#
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
initial_model =
|
|
556
|
+
# --- Apply Saved Settings (override defaults if available) ---
|
|
557
|
+
initial_loop = saved_settings.get("agent_loop", "OMNI")
|
|
558
|
+
# Ensure the saved model is actually available in the choices for the loaded loop
|
|
559
|
+
available_models_for_loop = provider_to_models.get(initial_loop, [])
|
|
560
|
+
saved_model_choice = saved_settings.get("model_choice")
|
|
561
|
+
if saved_model_choice and saved_model_choice in available_models_for_loop:
|
|
562
|
+
initial_model = saved_model_choice
|
|
606
563
|
else:
|
|
607
|
-
|
|
608
|
-
if
|
|
609
|
-
initial_model =
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
564
|
+
# If saved model isn't valid for the loop, reset to default for that loop
|
|
565
|
+
if initial_loop == "OPENAI":
|
|
566
|
+
initial_model = (
|
|
567
|
+
"OpenAI: Computer-Use Preview" if openai_models else "No models available"
|
|
568
|
+
)
|
|
569
|
+
elif initial_loop == "ANTHROPIC":
|
|
570
|
+
initial_model = anthropic_models[0] if anthropic_models else "No models available"
|
|
571
|
+
else: # OMNI
|
|
613
572
|
initial_model = omni_models[0] if omni_models else "No models available"
|
|
573
|
+
if "Custom model..." in available_models_for_loop:
|
|
574
|
+
initial_model = (
|
|
575
|
+
"Custom model..." # Default to custom if available and no other default fits
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
initial_custom_model = saved_settings.get("custom_model", "Qwen2.5-VL-7B-Instruct")
|
|
579
|
+
initial_provider_base_url = saved_settings.get("provider_base_url", "http://localhost:1234/v1")
|
|
580
|
+
initial_save_trajectory = saved_settings.get("save_trajectory", True)
|
|
581
|
+
initial_recent_images = saved_settings.get("recent_images", 3)
|
|
582
|
+
# --- End Apply Saved Settings ---
|
|
614
583
|
|
|
615
584
|
# Example prompts
|
|
616
585
|
example_messages = [
|
|
@@ -703,7 +672,7 @@ def create_gradio_ui(
|
|
|
703
672
|
### 3. Pull the pre-built macOS image
|
|
704
673
|
|
|
705
674
|
```bash
|
|
706
|
-
lume pull macos-sequoia-cua:latest
|
|
675
|
+
lume pull macos-sequoia-cua:latest
|
|
707
676
|
```
|
|
708
677
|
|
|
709
678
|
Initial download requires 80GB storage, but reduces to ~30GB after first run due to macOS's sparse file system.
|
|
@@ -720,48 +689,68 @@ def create_gradio_ui(
|
|
|
720
689
|
"""
|
|
721
690
|
)
|
|
722
691
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
692
|
+
with gr.Accordion("Configuration", open=True):
|
|
693
|
+
# Configuration options
|
|
694
|
+
agent_loop = gr.Dropdown(
|
|
695
|
+
choices=["OPENAI", "ANTHROPIC", "OMNI"],
|
|
696
|
+
label="Agent Loop",
|
|
697
|
+
value=initial_loop,
|
|
698
|
+
info="Select the agent loop provider",
|
|
699
|
+
)
|
|
730
700
|
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
701
|
+
# Create model selection dropdown with custom value support for OMNI
|
|
702
|
+
model_choice = gr.Dropdown(
|
|
703
|
+
choices=provider_to_models.get(initial_loop, ["No models available"]),
|
|
704
|
+
label="LLM Provider and Model",
|
|
705
|
+
value=initial_model,
|
|
706
|
+
info="Select model or choose 'Custom model...' to enter a custom name",
|
|
707
|
+
interactive=True,
|
|
708
|
+
)
|
|
739
709
|
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
710
|
+
# Add custom model textbox (only visible when "Custom model..." is selected)
|
|
711
|
+
custom_model = gr.Textbox(
|
|
712
|
+
label="Custom Model Name",
|
|
713
|
+
placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct)",
|
|
714
|
+
value=initial_custom_model,
|
|
715
|
+
visible=(initial_model == "Custom model..."),
|
|
716
|
+
interactive=True,
|
|
717
|
+
)
|
|
748
718
|
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
719
|
+
# Add custom provider base URL textbox (only visible when "Custom model..." is selected)
|
|
720
|
+
provider_base_url = gr.Textbox(
|
|
721
|
+
label="Provider Base URL",
|
|
722
|
+
placeholder="Enter provider base URL (e.g., http://localhost:1234/v1)",
|
|
723
|
+
value=initial_provider_base_url,
|
|
724
|
+
visible=(initial_model == "Custom model..."),
|
|
725
|
+
interactive=True,
|
|
726
|
+
)
|
|
755
727
|
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
728
|
+
# Add custom API key textbox (only visible when "Custom model..." is selected)
|
|
729
|
+
provider_api_key = gr.Textbox(
|
|
730
|
+
label="Provider API Key",
|
|
731
|
+
placeholder="Enter provider API key (if required)",
|
|
732
|
+
value="",
|
|
733
|
+
visible=(initial_model == "Custom model..."),
|
|
734
|
+
interactive=True,
|
|
735
|
+
type="password",
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
save_trajectory = gr.Checkbox(
|
|
739
|
+
label="Save Trajectory",
|
|
740
|
+
value=initial_save_trajectory,
|
|
741
|
+
info="Save the agent's trajectory for debugging",
|
|
742
|
+
interactive=True,
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
recent_images = gr.Slider(
|
|
746
|
+
label="Recent Images",
|
|
747
|
+
minimum=1,
|
|
748
|
+
maximum=10,
|
|
749
|
+
value=initial_recent_images,
|
|
750
|
+
step=1,
|
|
751
|
+
info="Number of recent images to keep in context",
|
|
752
|
+
interactive=True,
|
|
753
|
+
)
|
|
765
754
|
|
|
766
755
|
# Right column for chat interface
|
|
767
756
|
with gr.Column(scale=2):
|
|
@@ -770,7 +759,7 @@ def create_gradio_ui(
|
|
|
770
759
|
"Ask me to perform tasks in a virtual macOS environment.<br>Built with <a href='https://github.com/trycua/cua' target='_blank'>github.com/trycua/cua</a>."
|
|
771
760
|
)
|
|
772
761
|
|
|
773
|
-
|
|
762
|
+
chatbot_history = gr.Chatbot(type="messages")
|
|
774
763
|
msg = gr.Textbox(
|
|
775
764
|
placeholder="Ask me to perform tasks in a virtual macOS environment"
|
|
776
765
|
)
|
|
@@ -782,63 +771,169 @@ def create_gradio_ui(
|
|
|
782
771
|
# Function to handle chat submission
|
|
783
772
|
def chat_submit(message, history):
|
|
784
773
|
# Add user message to history
|
|
785
|
-
history =
|
|
774
|
+
history.append(gr.ChatMessage(role="user", content=message))
|
|
786
775
|
return "", history
|
|
787
776
|
|
|
788
777
|
# Function to process agent response after user input
|
|
789
|
-
def process_response(
|
|
778
|
+
async def process_response(
|
|
790
779
|
history,
|
|
791
780
|
model_choice_value,
|
|
792
781
|
custom_model_value,
|
|
793
782
|
agent_loop_choice,
|
|
794
783
|
save_traj,
|
|
795
784
|
recent_imgs,
|
|
785
|
+
custom_url_value=None,
|
|
786
|
+
custom_api_key=None,
|
|
796
787
|
):
|
|
797
788
|
if not history:
|
|
798
|
-
|
|
789
|
+
yield history
|
|
790
|
+
return
|
|
799
791
|
|
|
800
792
|
# Get the last user message
|
|
801
|
-
last_user_message = history[-1][
|
|
793
|
+
last_user_message = history[-1]["content"]
|
|
802
794
|
|
|
803
|
-
#
|
|
804
|
-
|
|
795
|
+
# Determine the model name string to analyze: custom or from dropdown
|
|
796
|
+
model_string_to_analyze = (
|
|
805
797
|
custom_model_value
|
|
806
798
|
if model_choice_value == "Custom model..."
|
|
807
|
-
else model_choice_value
|
|
799
|
+
else model_choice_value # Use the full UI string initially
|
|
808
800
|
)
|
|
809
801
|
|
|
810
|
-
#
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
802
|
+
# Determine if this is a custom model selection
|
|
803
|
+
is_custom_model_selected = model_choice_value == "Custom model..."
|
|
804
|
+
|
|
805
|
+
try:
|
|
806
|
+
# Get the provider, *cleaned* model name, and agent loop type
|
|
807
|
+
provider, cleaned_model_name_from_func, agent_loop_type = (
|
|
808
|
+
get_provider_and_model(model_string_to_analyze, agent_loop_choice)
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
# Determine the final model name to send to the agent
|
|
812
|
+
# If custom selected, use the custom text box value, otherwise use the cleaned name
|
|
813
|
+
final_model_name_to_send = (
|
|
814
|
+
custom_model_value
|
|
815
|
+
if is_custom_model_selected
|
|
816
|
+
else cleaned_model_name_from_func
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
# Determine if OAICOMPAT should be used (only if custom model explicitly selected)
|
|
820
|
+
is_oaicompat = is_custom_model_selected
|
|
821
|
+
|
|
822
|
+
# Get API key based on provider determined by get_provider_and_model
|
|
823
|
+
if is_oaicompat and custom_api_key:
|
|
824
|
+
# Use custom API key if provided for custom model
|
|
825
|
+
api_key = custom_api_key
|
|
826
|
+
print(
|
|
827
|
+
f"DEBUG - Using custom API key for model: {final_model_name_to_send}"
|
|
828
|
+
)
|
|
829
|
+
elif provider == LLMProvider.OPENAI:
|
|
830
|
+
api_key = openai_api_key or os.environ.get("OPENAI_API_KEY", "")
|
|
831
|
+
elif provider == LLMProvider.ANTHROPIC:
|
|
832
|
+
api_key = anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
|
|
833
|
+
else:
|
|
834
|
+
# For Ollama or default OAICOMPAT (without custom key), no key needed/expected
|
|
835
|
+
api_key = ""
|
|
836
|
+
|
|
837
|
+
# --- Save Settings Before Running Agent ---
|
|
838
|
+
current_settings = {
|
|
839
|
+
"agent_loop": agent_loop_choice,
|
|
840
|
+
"model_choice": model_choice_value,
|
|
841
|
+
"custom_model": custom_model_value,
|
|
842
|
+
"provider_base_url": custom_url_value,
|
|
843
|
+
"save_trajectory": save_traj,
|
|
844
|
+
"recent_images": recent_imgs,
|
|
845
|
+
}
|
|
846
|
+
save_settings(current_settings)
|
|
847
|
+
# --- End Save Settings ---
|
|
848
|
+
|
|
849
|
+
# Create or update the agent
|
|
850
|
+
create_agent(
|
|
851
|
+
# Provider determined by get_provider_and_model unless custom model selected
|
|
852
|
+
provider=LLMProvider.OAICOMPAT if is_oaicompat else provider,
|
|
853
|
+
agent_loop=agent_loop_type,
|
|
854
|
+
# Pass the FINAL determined model name (cleaned or custom)
|
|
855
|
+
model_name=final_model_name_to_send,
|
|
856
|
+
api_key=api_key,
|
|
857
|
+
save_trajectory=save_traj,
|
|
858
|
+
only_n_most_recent_images=recent_imgs,
|
|
859
|
+
use_oaicompat=is_oaicompat, # Set flag if custom model was selected
|
|
860
|
+
# Pass custom URL only if custom model was selected
|
|
861
|
+
provider_base_url=custom_url_value if is_oaicompat else None,
|
|
862
|
+
verbosity=logging.DEBUG, # Added verbosity here
|
|
863
|
+
)
|
|
864
|
+
|
|
865
|
+
if global_agent is None:
|
|
866
|
+
# Add initial empty assistant message
|
|
867
|
+
history.append(
|
|
868
|
+
gr.ChatMessage(
|
|
869
|
+
role="assistant",
|
|
870
|
+
content="Failed to create agent. Check API keys and configuration.",
|
|
871
|
+
)
|
|
872
|
+
)
|
|
873
|
+
yield history
|
|
874
|
+
return
|
|
875
|
+
|
|
876
|
+
# Add the screenshot handler to the agent's loop if available
|
|
877
|
+
if global_agent and hasattr(global_agent, "_loop"):
|
|
878
|
+
print("DEBUG - Adding screenshot handler to agent loop")
|
|
879
|
+
|
|
880
|
+
# Create the screenshot handler with references to UI components
|
|
881
|
+
screenshot_handler = GradioChatScreenshotHandler(history)
|
|
882
|
+
|
|
883
|
+
# Add the handler to the callback manager if it exists AND is not None
|
|
884
|
+
if (
|
|
885
|
+
hasattr(global_agent._loop, "callback_manager")
|
|
886
|
+
and global_agent._loop.callback_manager is not None
|
|
887
|
+
):
|
|
888
|
+
global_agent._loop.callback_manager.add_handler(screenshot_handler)
|
|
889
|
+
print(
|
|
890
|
+
f"DEBUG - Screenshot handler added to callback manager with history: {id(history)}"
|
|
891
|
+
)
|
|
892
|
+
else:
|
|
893
|
+
# Optional: Log a warning if the callback manager is missing/None for a specific loop
|
|
894
|
+
print(
|
|
895
|
+
f"WARNING - Callback manager not found or is None for loop type: {type(global_agent._loop)}. Screenshot handler not added."
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
# Stream responses from the agent
|
|
899
|
+
async for result in global_agent.run(last_user_message):
|
|
900
|
+
# Process result
|
|
901
|
+
content, metadata = process_agent_result(result)
|
|
902
|
+
|
|
903
|
+
# Skip empty content
|
|
904
|
+
if content or metadata.get("title"):
|
|
905
|
+
history.append(
|
|
906
|
+
gr.ChatMessage(
|
|
907
|
+
role="assistant", content=content, metadata=metadata
|
|
908
|
+
)
|
|
909
|
+
)
|
|
910
|
+
yield history
|
|
911
|
+
except Exception as e:
|
|
912
|
+
import traceback
|
|
913
|
+
|
|
914
|
+
traceback.print_exc()
|
|
915
|
+
# Update with error message
|
|
916
|
+
history.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}"))
|
|
917
|
+
yield history
|
|
825
918
|
|
|
826
919
|
# Connect the components
|
|
827
|
-
msg.submit(chat_submit, [msg,
|
|
920
|
+
msg.submit(chat_submit, [msg, chatbot_history], [msg, chatbot_history]).then(
|
|
828
921
|
process_response,
|
|
829
922
|
[
|
|
830
|
-
|
|
923
|
+
chatbot_history,
|
|
831
924
|
model_choice,
|
|
832
925
|
custom_model,
|
|
833
926
|
agent_loop,
|
|
834
927
|
save_trajectory,
|
|
835
928
|
recent_images,
|
|
929
|
+
provider_base_url,
|
|
930
|
+
provider_api_key,
|
|
836
931
|
],
|
|
837
|
-
[
|
|
932
|
+
[chatbot_history],
|
|
838
933
|
)
|
|
839
934
|
|
|
840
935
|
# Clear button functionality
|
|
841
|
-
clear.click(lambda: None, None,
|
|
936
|
+
clear.click(lambda: None, None, chatbot_history, queue=False)
|
|
842
937
|
|
|
843
938
|
# Connect agent_loop changes to model selection
|
|
844
939
|
agent_loop.change(
|
|
@@ -848,14 +943,19 @@ def create_gradio_ui(
|
|
|
848
943
|
queue=False, # Process immediately without queueing
|
|
849
944
|
)
|
|
850
945
|
|
|
851
|
-
# Show/hide custom model
|
|
946
|
+
# Show/hide custom model, provider base URL, and API key textboxes based on dropdown selection
|
|
852
947
|
def update_custom_model_visibility(model_value):
|
|
853
|
-
|
|
948
|
+
is_custom = model_value == "Custom model..."
|
|
949
|
+
return (
|
|
950
|
+
gr.update(visible=is_custom),
|
|
951
|
+
gr.update(visible=is_custom),
|
|
952
|
+
gr.update(visible=is_custom),
|
|
953
|
+
)
|
|
854
954
|
|
|
855
955
|
model_choice.change(
|
|
856
956
|
fn=update_custom_model_visibility,
|
|
857
957
|
inputs=[model_choice],
|
|
858
|
-
outputs=[custom_model],
|
|
958
|
+
outputs=[custom_model, provider_base_url, provider_api_key],
|
|
859
959
|
queue=False, # Process immediately without queueing
|
|
860
960
|
)
|
|
861
961
|
|