cua-agent 0.1.25__py3-none-any.whl → 0.1.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/core/base.py +20 -0
- agent/core/callbacks.py +57 -2
- agent/providers/anthropic/callbacks/manager.py +20 -10
- agent/providers/omni/clients/oaicompat.py +11 -3
- agent/providers/omni/loop.py +8 -2
- agent/providers/openai/loop.py +13 -4
- agent/ui/gradio/app.py +426 -331
- {cua_agent-0.1.25.dist-info → cua_agent-0.1.27.dist-info}/METADATA +30 -37
- {cua_agent-0.1.25.dist-info → cua_agent-0.1.27.dist-info}/RECORD +11 -11
- {cua_agent-0.1.25.dist-info → cua_agent-0.1.27.dist-info}/WHEEL +1 -1
- {cua_agent-0.1.25.dist-info → cua_agent-0.1.27.dist-info}/entry_points.txt +0 -0
agent/ui/gradio/app.py
CHANGED
|
@@ -30,11 +30,16 @@ Requirements:
|
|
|
30
30
|
import os
|
|
31
31
|
import asyncio
|
|
32
32
|
import logging
|
|
33
|
+
import json
|
|
34
|
+
from pathlib import Path
|
|
33
35
|
from typing import Dict, List, Optional, AsyncGenerator, Any, Tuple, Union
|
|
34
36
|
import gradio as gr
|
|
37
|
+
from gradio.components.chatbot import MetadataDict
|
|
35
38
|
|
|
36
39
|
# Import from agent package
|
|
37
40
|
from agent.core.types import AgentResponse
|
|
41
|
+
from agent.core.callbacks import DefaultCallbackHandler
|
|
42
|
+
from agent.providers.omni.parser import ParseResult
|
|
38
43
|
from computer import Computer
|
|
39
44
|
|
|
40
45
|
from agent import ComputerAgent, AgentLoop, LLM, LLMProvider
|
|
@@ -42,6 +47,86 @@ from agent import ComputerAgent, AgentLoop, LLM, LLMProvider
|
|
|
42
47
|
# Global variables
|
|
43
48
|
global_agent = None
|
|
44
49
|
global_computer = None
|
|
50
|
+
SETTINGS_FILE = Path(".gradio_settings.json")
|
|
51
|
+
|
|
52
|
+
# We'll use asyncio.run() instead of a persistent event loop
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# --- Settings Load/Save Functions ---
|
|
56
|
+
def load_settings() -> Dict[str, Any]:
|
|
57
|
+
"""Loads settings from the JSON file."""
|
|
58
|
+
if SETTINGS_FILE.exists():
|
|
59
|
+
try:
|
|
60
|
+
with open(SETTINGS_FILE, "r") as f:
|
|
61
|
+
settings = json.load(f)
|
|
62
|
+
# Basic validation (can be expanded)
|
|
63
|
+
if isinstance(settings, dict):
|
|
64
|
+
print(f"DEBUG - Loaded settings from {SETTINGS_FILE}")
|
|
65
|
+
return settings
|
|
66
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
67
|
+
print(f"Warning: Could not load settings from {SETTINGS_FILE}: {e}")
|
|
68
|
+
return {}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def save_settings(settings: Dict[str, Any]):
|
|
72
|
+
"""Saves settings to the JSON file."""
|
|
73
|
+
# Ensure sensitive keys are not saved
|
|
74
|
+
settings.pop("provider_api_key", None)
|
|
75
|
+
try:
|
|
76
|
+
with open(SETTINGS_FILE, "w") as f:
|
|
77
|
+
json.dump(settings, f, indent=4)
|
|
78
|
+
print(f"DEBUG - Saved settings to {SETTINGS_FILE}")
|
|
79
|
+
except IOError as e:
|
|
80
|
+
print(f"Warning: Could not save settings to {SETTINGS_FILE}: {e}")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# --- End Settings Load/Save ---
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# Custom Screenshot Handler for Gradio chat
|
|
87
|
+
class GradioChatScreenshotHandler(DefaultCallbackHandler):
|
|
88
|
+
"""Custom handler that adds screenshots to the Gradio chatbot and updates annotated image."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, chatbot_history: List[gr.ChatMessage]):
|
|
91
|
+
"""Initialize with reference to chat history and annotated image component.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
chatbot_history: Reference to the Gradio chatbot history list
|
|
95
|
+
annotated_image: Reference to the annotated image component
|
|
96
|
+
"""
|
|
97
|
+
self.chatbot_history = chatbot_history
|
|
98
|
+
print("GradioChatScreenshotHandler initialized")
|
|
99
|
+
|
|
100
|
+
async def on_screenshot(
|
|
101
|
+
self,
|
|
102
|
+
screenshot_base64: str,
|
|
103
|
+
action_type: str = "",
|
|
104
|
+
parsed_screen: Optional[ParseResult] = None,
|
|
105
|
+
) -> None:
|
|
106
|
+
"""Add screenshot to chatbot when a screenshot is taken and update the annotated image.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
screenshot_base64: Base64 encoded screenshot
|
|
110
|
+
action_type: Type of action that triggered the screenshot
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Original screenshot (does not modify it)
|
|
114
|
+
"""
|
|
115
|
+
# Create a markdown image element for the screenshot
|
|
116
|
+
image_markdown = (
|
|
117
|
+
f""
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Simply append the screenshot as a new message
|
|
121
|
+
if self.chatbot_history is not None:
|
|
122
|
+
self.chatbot_history.append(
|
|
123
|
+
gr.ChatMessage(
|
|
124
|
+
role="assistant",
|
|
125
|
+
content=image_markdown,
|
|
126
|
+
metadata={"title": f"🖥️ Screenshot - {action_type}", "status": "done"},
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
|
|
45
130
|
|
|
46
131
|
# Map model names to specific provider model names
|
|
47
132
|
MODEL_MAPPINGS = {
|
|
@@ -53,6 +138,7 @@ MODEL_MAPPINGS = {
|
|
|
53
138
|
"gpt-4o": "computer_use_preview",
|
|
54
139
|
"gpt-4": "computer_use_preview",
|
|
55
140
|
"gpt-4.5-preview": "computer_use_preview",
|
|
141
|
+
"gpt-4o-mini": "gpt-4o-mini",
|
|
56
142
|
},
|
|
57
143
|
"anthropic": {
|
|
58
144
|
# Default to newest model
|
|
@@ -70,6 +156,7 @@ MODEL_MAPPINGS = {
|
|
|
70
156
|
# OMNI works with any of these models
|
|
71
157
|
"default": "gpt-4o",
|
|
72
158
|
"gpt-4o": "gpt-4o",
|
|
159
|
+
"gpt-4o-mini": "gpt-4o-mini",
|
|
73
160
|
"gpt-4": "gpt-4",
|
|
74
161
|
"gpt-4.5-preview": "gpt-4.5-preview",
|
|
75
162
|
"claude-3-5-sonnet-20240620": "claude-3-5-sonnet-20240620",
|
|
@@ -119,30 +206,82 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
|
|
|
119
206
|
model_name.lower(), MODEL_MAPPINGS["anthropic"]["default"]
|
|
120
207
|
)
|
|
121
208
|
elif agent_loop == AgentLoop.OMNI:
|
|
122
|
-
#
|
|
123
|
-
|
|
209
|
+
# Determine provider and clean model name based on the full string from UI
|
|
210
|
+
cleaned_model_name = model_name # Default to using the name as-is (for custom)
|
|
211
|
+
|
|
212
|
+
if model_name == "Custom model...":
|
|
213
|
+
# Actual model name comes from custom_model_value via model_to_use.
|
|
214
|
+
# Assume OAICOMPAT for custom models unless overridden by URL/key later?
|
|
215
|
+
# get_provider_and_model determines the *initial* provider/model.
|
|
216
|
+
# The custom URL/key in process_response ultimately dictates the OAICOMPAT setup.
|
|
217
|
+
provider = LLMProvider.OAICOMPAT
|
|
218
|
+
# We set cleaned_model_name below outside the checks based on model_to_use
|
|
219
|
+
cleaned_model_name = "" # Placeholder, will be set by custom value later
|
|
220
|
+
elif model_name.startswith("OMNI: Ollama "):
|
|
124
221
|
provider = LLMProvider.OLLAMA
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
model_name_to_use = model_name
|
|
129
|
-
elif "claude" in model_name.lower():
|
|
222
|
+
# Extract the part after "OMNI: Ollama "
|
|
223
|
+
cleaned_model_name = model_name.split("OMNI: Ollama ", 1)[1]
|
|
224
|
+
elif model_name.startswith("OMNI: Claude "):
|
|
130
225
|
provider = LLMProvider.ANTHROPIC
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
)
|
|
134
|
-
|
|
226
|
+
# Extract the canonical model name based on the UI string
|
|
227
|
+
# e.g., "OMNI: Claude 3.7 Sonnet (20250219)" -> "3.7 Sonnet" and "20250219"
|
|
228
|
+
parts = model_name.split(" (")
|
|
229
|
+
model_key_part = parts[0].replace("OMNI: Claude ", "")
|
|
230
|
+
date_part = parts[1].replace(")", "") if len(parts) > 1 else ""
|
|
231
|
+
|
|
232
|
+
# Normalize the extracted key part for comparison
|
|
233
|
+
# "3.7 Sonnet" -> "37sonnet"
|
|
234
|
+
model_key_part_norm = model_key_part.lower().replace(".", "").replace(" ", "")
|
|
235
|
+
|
|
236
|
+
cleaned_model_name = MODEL_MAPPINGS["omni"]["default"] # Default if not found
|
|
237
|
+
# Find the canonical name in the main Anthropic map
|
|
238
|
+
for key_anthropic, val_anthropic in MODEL_MAPPINGS["anthropic"].items():
|
|
239
|
+
# Normalize the canonical key for comparison
|
|
240
|
+
# "claude-3-7-sonnet-20250219" -> "claude37sonnet20250219"
|
|
241
|
+
key_anthropic_norm = key_anthropic.lower().replace("-", "")
|
|
242
|
+
|
|
243
|
+
# Check if the normalized canonical key starts with "claude" + normalized extracted part
|
|
244
|
+
# AND contains the date part.
|
|
245
|
+
if (
|
|
246
|
+
key_anthropic_norm.startswith("claude" + model_key_part_norm)
|
|
247
|
+
and date_part in key_anthropic_norm
|
|
248
|
+
):
|
|
249
|
+
cleaned_model_name = (
|
|
250
|
+
val_anthropic # Use the canonical name like "claude-3-7-sonnet-20250219"
|
|
251
|
+
)
|
|
252
|
+
break
|
|
253
|
+
elif model_name.startswith("OMNI: OpenAI "):
|
|
135
254
|
provider = LLMProvider.OPENAI
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
255
|
+
# Extract the model part, e.g., "GPT-4o mini"
|
|
256
|
+
model_key_part = model_name.replace("OMNI: OpenAI ", "")
|
|
257
|
+
# Normalize the extracted part: "gpt4omini"
|
|
258
|
+
model_key_part_norm = model_key_part.lower().replace("-", "").replace(" ", "")
|
|
259
|
+
|
|
260
|
+
cleaned_model_name = MODEL_MAPPINGS["omni"]["default"] # Default if not found
|
|
261
|
+
# Find the canonical name in the main OMNI map for OpenAI models
|
|
262
|
+
for key_omni, val_omni in MODEL_MAPPINGS["omni"].items():
|
|
263
|
+
# Normalize the omni map key: "gpt-4o-mini" -> "gpt4omini"
|
|
264
|
+
key_omni_norm = key_omni.lower().replace("-", "").replace(" ", "")
|
|
265
|
+
# Check if the normalized omni key matches the normalized extracted part
|
|
266
|
+
if key_omni_norm == model_key_part_norm:
|
|
267
|
+
cleaned_model_name = (
|
|
268
|
+
val_omni # Use the value from the OMNI map (e.g., gpt-4o-mini)
|
|
269
|
+
)
|
|
270
|
+
break
|
|
271
|
+
# Note: No fallback needed here as we explicitly check against omni keys
|
|
272
|
+
|
|
273
|
+
else: # Handles unexpected formats or the raw custom name if "Custom model..." selected
|
|
274
|
+
# Should only happen if user selected "Custom model..."
|
|
275
|
+
# Or if a model name format isn't caught above
|
|
141
276
|
provider = LLMProvider.OAICOMPAT
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
model_name if model_name.strip() else MODEL_MAPPINGS["oaicompat"]["default"]
|
|
277
|
+
cleaned_model_name = (
|
|
278
|
+
model_name.strip() if model_name.strip() else MODEL_MAPPINGS["oaicompat"]["default"]
|
|
145
279
|
)
|
|
280
|
+
|
|
281
|
+
# Assign the determined model name
|
|
282
|
+
model_name_to_use = cleaned_model_name
|
|
283
|
+
# agent_loop remains AgentLoop.OMNI
|
|
284
|
+
|
|
146
285
|
else:
|
|
147
286
|
# Default to OpenAI if unrecognized loop
|
|
148
287
|
provider = LLMProvider.OPENAI
|
|
@@ -177,17 +316,20 @@ def get_ollama_models() -> List[str]:
|
|
|
177
316
|
return []
|
|
178
317
|
|
|
179
318
|
|
|
180
|
-
def extract_synthesized_text(
|
|
319
|
+
def extract_synthesized_text(
|
|
320
|
+
result: Union[AgentResponse, Dict[str, Any]],
|
|
321
|
+
) -> Tuple[str, MetadataDict]:
|
|
181
322
|
"""Extract synthesized text from the agent result."""
|
|
182
323
|
synthesized_text = ""
|
|
324
|
+
metadata = MetadataDict()
|
|
183
325
|
|
|
184
326
|
if "output" in result and result["output"]:
|
|
185
327
|
for output in result["output"]:
|
|
186
328
|
if output.get("type") == "reasoning":
|
|
329
|
+
metadata["title"] = "🧠 Reasoning"
|
|
187
330
|
content = output.get("content", "")
|
|
188
331
|
if content:
|
|
189
332
|
synthesized_text += f"{content}\n"
|
|
190
|
-
|
|
191
333
|
elif output.get("type") == "message":
|
|
192
334
|
# Handle message type outputs - can contain rich content
|
|
193
335
|
content = output.get("content", [])
|
|
@@ -224,7 +366,10 @@ def extract_synthesized_text(result: Union[AgentResponse, Dict[str, Any]]) -> st
|
|
|
224
366
|
else:
|
|
225
367
|
synthesized_text += f"Performed {action_type} action.\n"
|
|
226
368
|
|
|
227
|
-
|
|
369
|
+
metadata["status"] = "done"
|
|
370
|
+
metadata["title"] = f"🛠️ {synthesized_text.strip().splitlines()[-1]}"
|
|
371
|
+
|
|
372
|
+
return synthesized_text.strip(), metadata
|
|
228
373
|
|
|
229
374
|
|
|
230
375
|
def create_computer_instance(verbosity: int = logging.INFO) -> Computer:
|
|
@@ -245,8 +390,8 @@ def create_agent(
|
|
|
245
390
|
save_trajectory: bool = True,
|
|
246
391
|
only_n_most_recent_images: int = 3,
|
|
247
392
|
verbosity: int = logging.INFO,
|
|
248
|
-
use_ollama: bool = False,
|
|
249
393
|
use_oaicompat: bool = False,
|
|
394
|
+
provider_base_url: Optional[str] = None,
|
|
250
395
|
) -> ComputerAgent:
|
|
251
396
|
"""Create or update the global agent with the specified parameters."""
|
|
252
397
|
global global_agent
|
|
@@ -254,15 +399,6 @@ def create_agent(
|
|
|
254
399
|
# Create the computer if not already done
|
|
255
400
|
computer = create_computer_instance(verbosity=verbosity)
|
|
256
401
|
|
|
257
|
-
# Extra configuration to pass to the agent
|
|
258
|
-
extra_config = {}
|
|
259
|
-
|
|
260
|
-
# For Ollama models, we'll pass use_ollama and the model_name directly
|
|
261
|
-
if use_ollama:
|
|
262
|
-
extra_config["use_ollama"] = True
|
|
263
|
-
extra_config["ollama_model"] = model_name
|
|
264
|
-
print(f"DEBUG - Using Ollama with model: {model_name}")
|
|
265
|
-
|
|
266
402
|
# Get API key from environment if not provided
|
|
267
403
|
if api_key is None:
|
|
268
404
|
if provider == LLMProvider.OPENAI:
|
|
@@ -270,72 +406,52 @@ def create_agent(
|
|
|
270
406
|
elif provider == LLMProvider.ANTHROPIC:
|
|
271
407
|
api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
272
408
|
|
|
273
|
-
#
|
|
274
|
-
|
|
409
|
+
# Use provided provider_base_url if available, otherwise use default
|
|
410
|
+
default_base_url = "http://localhost:1234/v1" if use_oaicompat else None
|
|
411
|
+
custom_base_url = provider_base_url or default_base_url
|
|
275
412
|
|
|
276
413
|
if use_oaicompat:
|
|
277
414
|
# Special handling for OAICOMPAT - use OAICOMPAT provider with custom base URL
|
|
278
|
-
print(
|
|
279
|
-
f"DEBUG - Creating OAICOMPAT agent with model: {model_name}, URL: {provider_base_url}"
|
|
280
|
-
)
|
|
415
|
+
print(f"DEBUG - Creating OAICOMPAT agent with model: {model_name}, URL: {custom_base_url}")
|
|
281
416
|
llm = LLM(
|
|
282
417
|
provider=LLMProvider.OAICOMPAT, # Set to OAICOMPAT instead of using original provider
|
|
283
418
|
name=model_name,
|
|
284
|
-
provider_base_url=
|
|
419
|
+
provider_base_url=custom_base_url,
|
|
285
420
|
)
|
|
286
421
|
print(f"DEBUG - LLM provider is now: {llm.provider}, base URL: {llm.provider_base_url}")
|
|
287
422
|
# Note: Don't pass use_oaicompat to the agent, as it doesn't accept this parameter
|
|
288
423
|
elif provider == LLMProvider.OAICOMPAT:
|
|
289
424
|
# This path is unlikely to be taken with our current approach
|
|
290
|
-
llm = LLM(provider=provider, name=model_name, provider_base_url=
|
|
425
|
+
llm = LLM(provider=provider, name=model_name, provider_base_url=custom_base_url)
|
|
291
426
|
else:
|
|
292
427
|
# For other providers, just use standard parameters
|
|
293
428
|
llm = LLM(provider=provider, name=model_name)
|
|
294
429
|
|
|
295
430
|
# Create or update the agent
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
**extra_config,
|
|
306
|
-
)
|
|
307
|
-
else:
|
|
308
|
-
# Update the existing agent's parameters
|
|
309
|
-
global_agent._loop = None # Force recreation of the loop
|
|
310
|
-
global_agent.provider = provider
|
|
311
|
-
global_agent.loop = agent_loop
|
|
312
|
-
global_agent.model = llm
|
|
313
|
-
global_agent.api_key = api_key
|
|
314
|
-
|
|
315
|
-
# Explicitly update these settings to ensure they take effect
|
|
316
|
-
global_agent.save_trajectory = save_trajectory
|
|
317
|
-
global_agent.only_n_most_recent_images = only_n_most_recent_images
|
|
318
|
-
|
|
319
|
-
# Update Ollama settings if applicable
|
|
320
|
-
if use_ollama:
|
|
321
|
-
global_agent.use_ollama = True
|
|
322
|
-
global_agent.ollama_model = model_name
|
|
323
|
-
else:
|
|
324
|
-
global_agent.use_ollama = False
|
|
325
|
-
global_agent.ollama_model = None
|
|
326
|
-
|
|
327
|
-
# Log the updated settings
|
|
328
|
-
logging.info(
|
|
329
|
-
f"Updated agent settings: save_trajectory={save_trajectory}, recent_images={only_n_most_recent_images}"
|
|
330
|
-
)
|
|
431
|
+
global_agent = ComputerAgent(
|
|
432
|
+
computer=computer,
|
|
433
|
+
loop=agent_loop,
|
|
434
|
+
model=llm,
|
|
435
|
+
api_key=api_key,
|
|
436
|
+
save_trajectory=save_trajectory,
|
|
437
|
+
only_n_most_recent_images=only_n_most_recent_images,
|
|
438
|
+
verbosity=verbosity,
|
|
439
|
+
)
|
|
331
440
|
|
|
332
441
|
return global_agent
|
|
333
442
|
|
|
334
443
|
|
|
335
|
-
def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> str:
|
|
444
|
+
def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> Tuple[str, MetadataDict]:
|
|
336
445
|
"""Process agent results for the Gradio UI."""
|
|
337
446
|
# Extract text content
|
|
338
447
|
text_obj = result.get("text", {})
|
|
448
|
+
metadata = result.get("metadata", {})
|
|
449
|
+
|
|
450
|
+
# Create a properly typed MetadataDict
|
|
451
|
+
metadata_dict = MetadataDict()
|
|
452
|
+
metadata_dict["title"] = metadata.get("title", "")
|
|
453
|
+
metadata_dict["status"] = "done"
|
|
454
|
+
metadata = metadata_dict
|
|
339
455
|
|
|
340
456
|
# For OpenAI's Computer-Use Agent, text field is an object with format property
|
|
341
457
|
if (
|
|
@@ -344,8 +460,11 @@ def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> str:
|
|
|
344
460
|
and "format" in text_obj
|
|
345
461
|
and not text_obj.get("value", "")
|
|
346
462
|
):
|
|
347
|
-
content = extract_synthesized_text(result)
|
|
463
|
+
content, metadata = extract_synthesized_text(result)
|
|
348
464
|
else:
|
|
465
|
+
if not text_obj:
|
|
466
|
+
text_obj = result
|
|
467
|
+
|
|
349
468
|
# For other types of results, try to get text directly
|
|
350
469
|
if isinstance(text_obj, dict):
|
|
351
470
|
if "value" in text_obj:
|
|
@@ -378,179 +497,7 @@ def process_agent_result(result: Union[AgentResponse, Dict[str, Any]]) -> str:
|
|
|
378
497
|
if not isinstance(content, str):
|
|
379
498
|
content = str(content) if content else ""
|
|
380
499
|
|
|
381
|
-
return content
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
def respond(
|
|
385
|
-
message: str,
|
|
386
|
-
history: List[Tuple[str, str]],
|
|
387
|
-
model_choice, # Accept Gradio Dropdown component
|
|
388
|
-
agent_loop, # Accept Gradio Dropdown component
|
|
389
|
-
save_trajectory, # Accept Gradio Checkbox component
|
|
390
|
-
recent_images, # Accept Gradio Slider component
|
|
391
|
-
openai_api_key: Optional[str] = None,
|
|
392
|
-
anthropic_api_key: Optional[str] = None,
|
|
393
|
-
) -> str:
|
|
394
|
-
"""Process a message with the Computer-Use Agent and return the response."""
|
|
395
|
-
import asyncio
|
|
396
|
-
|
|
397
|
-
# Get actual values from Gradio components
|
|
398
|
-
model_choice_value = model_choice.value if hasattr(model_choice, "value") else model_choice
|
|
399
|
-
agent_loop_value = agent_loop.value if hasattr(agent_loop, "value") else agent_loop
|
|
400
|
-
save_trajectory_value = (
|
|
401
|
-
save_trajectory.value if hasattr(save_trajectory, "value") else save_trajectory
|
|
402
|
-
)
|
|
403
|
-
recent_images_value = int(
|
|
404
|
-
recent_images.value if hasattr(recent_images, "value") else recent_images
|
|
405
|
-
)
|
|
406
|
-
|
|
407
|
-
# Debug logging
|
|
408
|
-
print(f"DEBUG - Model choice object: {type(model_choice)}")
|
|
409
|
-
print(f"DEBUG - Model choice value: {model_choice_value}")
|
|
410
|
-
print(f"DEBUG - Agent loop value: {agent_loop_value}")
|
|
411
|
-
|
|
412
|
-
# Create a new event loop for this function call
|
|
413
|
-
loop = asyncio.new_event_loop()
|
|
414
|
-
asyncio.set_event_loop(loop)
|
|
415
|
-
|
|
416
|
-
async def _async_respond():
|
|
417
|
-
# Extract the loop type and model from the selection
|
|
418
|
-
loop_provider = "OPENAI"
|
|
419
|
-
if isinstance(model_choice_value, str):
|
|
420
|
-
# This is the case for a custom text input from textbox
|
|
421
|
-
if agent_loop_value == "OMNI":
|
|
422
|
-
loop_provider = "OMNI"
|
|
423
|
-
# Use the custom model name as is
|
|
424
|
-
model_id = model_choice_value
|
|
425
|
-
print(f"DEBUG - Using custom model: {model_id}")
|
|
426
|
-
else:
|
|
427
|
-
# Handle regular dropdown value as string
|
|
428
|
-
if model_choice_value.startswith("OpenAI:"):
|
|
429
|
-
loop_provider = "OPENAI"
|
|
430
|
-
model_id = model_choice_value.replace("OpenAI: ", "").lower()
|
|
431
|
-
elif model_choice_value.startswith("Anthropic:"):
|
|
432
|
-
loop_provider = "ANTHROPIC"
|
|
433
|
-
model_id = model_choice_value.replace("Anthropic: ", "").lower()
|
|
434
|
-
elif model_choice_value.startswith("OMNI:"):
|
|
435
|
-
loop_provider = "OMNI"
|
|
436
|
-
if "GPT" in model_choice_value:
|
|
437
|
-
model_id = model_choice_value.replace("OMNI: OpenAI ", "").lower()
|
|
438
|
-
elif "Claude" in model_choice_value:
|
|
439
|
-
model_id = model_choice_value.replace("OMNI: ", "").lower()
|
|
440
|
-
elif "Ollama" in model_choice_value:
|
|
441
|
-
loop_provider = "OMNI-OLLAMA"
|
|
442
|
-
# Extract everything after "OMNI: Ollama " which is the full model name (e.g., phi3:latest)
|
|
443
|
-
model_id = model_choice_value.replace("OMNI: Ollama ", "")
|
|
444
|
-
print(f"DEBUG - Ollama model ID: {model_id}")
|
|
445
|
-
else:
|
|
446
|
-
model_id = "default"
|
|
447
|
-
else:
|
|
448
|
-
# Default case
|
|
449
|
-
loop_provider = agent_loop_value
|
|
450
|
-
model_id = "default"
|
|
451
|
-
else:
|
|
452
|
-
# Model choice is not a string (shouldn't happen, but handle anyway)
|
|
453
|
-
loop_provider = agent_loop_value
|
|
454
|
-
model_id = "default"
|
|
455
|
-
|
|
456
|
-
print(f"DEBUG - Using loop provider: {loop_provider}, model_id: {model_id}")
|
|
457
|
-
|
|
458
|
-
# Use the mapping function to get provider, model name and agent loop
|
|
459
|
-
provider, model_name, agent_loop_type = get_provider_and_model(model_id, loop_provider)
|
|
460
|
-
print(
|
|
461
|
-
f"DEBUG - After mapping: provider={provider}, model_name={model_name}, agent_loop={agent_loop_type}"
|
|
462
|
-
)
|
|
463
|
-
|
|
464
|
-
# Special handling for OAICOMPAT to bypass provider-specific errors
|
|
465
|
-
# Creates the agent with OPENAI provider but using custom model name and provider base URL
|
|
466
|
-
is_oaicompat = str(provider) == "oaicompat"
|
|
467
|
-
|
|
468
|
-
# Don't override the provider for OAICOMPAT - instead pass it through
|
|
469
|
-
# if is_oaicompat:
|
|
470
|
-
# provider = LLMProvider.OPENAI
|
|
471
|
-
|
|
472
|
-
# Get API key based on provider
|
|
473
|
-
if provider == LLMProvider.OPENAI:
|
|
474
|
-
api_key = openai_api_key or os.environ.get("OPENAI_API_KEY", "")
|
|
475
|
-
elif provider == LLMProvider.ANTHROPIC:
|
|
476
|
-
api_key = anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
|
|
477
|
-
else:
|
|
478
|
-
api_key = ""
|
|
479
|
-
|
|
480
|
-
# Check for settings changes if agent already exists
|
|
481
|
-
settings_changed = False
|
|
482
|
-
settings_message = ""
|
|
483
|
-
if global_agent is not None:
|
|
484
|
-
# Safely check if save_trajectory setting changed
|
|
485
|
-
current_save_traj = getattr(global_agent, "save_trajectory", None)
|
|
486
|
-
if current_save_traj is not None and current_save_traj != save_trajectory_value:
|
|
487
|
-
settings_changed = True
|
|
488
|
-
settings_message += f"Save trajectory set to: {save_trajectory_value}. "
|
|
489
|
-
|
|
490
|
-
# Safely check if recent_images setting changed
|
|
491
|
-
current_recent_images = getattr(global_agent, "only_n_most_recent_images", None)
|
|
492
|
-
if current_recent_images is not None and current_recent_images != recent_images_value:
|
|
493
|
-
settings_changed = True
|
|
494
|
-
settings_message += f"Recent images set to: {recent_images_value}. "
|
|
495
|
-
|
|
496
|
-
# Create or update the agent
|
|
497
|
-
try:
|
|
498
|
-
create_agent(
|
|
499
|
-
provider=provider,
|
|
500
|
-
agent_loop=agent_loop_type,
|
|
501
|
-
model_name=model_name,
|
|
502
|
-
api_key=api_key,
|
|
503
|
-
save_trajectory=save_trajectory_value,
|
|
504
|
-
only_n_most_recent_images=recent_images_value,
|
|
505
|
-
use_ollama=loop_provider == "OMNI-OLLAMA",
|
|
506
|
-
use_oaicompat=is_oaicompat,
|
|
507
|
-
)
|
|
508
|
-
|
|
509
|
-
if global_agent is None:
|
|
510
|
-
return "Failed to create agent. Check API keys and configuration."
|
|
511
|
-
except Exception as e:
|
|
512
|
-
return f"Error creating agent: {str(e)}"
|
|
513
|
-
|
|
514
|
-
# Notify about settings changes if needed
|
|
515
|
-
if settings_changed:
|
|
516
|
-
return f"Settings updated: {settings_message}"
|
|
517
|
-
|
|
518
|
-
# Collect all responses
|
|
519
|
-
response_text = []
|
|
520
|
-
|
|
521
|
-
# Run the agent
|
|
522
|
-
try:
|
|
523
|
-
async for result in global_agent.run(message):
|
|
524
|
-
# Process result
|
|
525
|
-
content = process_agent_result(result)
|
|
526
|
-
|
|
527
|
-
# Skip empty content
|
|
528
|
-
if not content:
|
|
529
|
-
continue
|
|
530
|
-
|
|
531
|
-
# Add content to response list
|
|
532
|
-
response_text.append(content)
|
|
533
|
-
|
|
534
|
-
# Return the full response as a single string
|
|
535
|
-
return "\n".join(response_text) if response_text else "Task completed."
|
|
536
|
-
|
|
537
|
-
except Exception as e:
|
|
538
|
-
import traceback
|
|
539
|
-
|
|
540
|
-
traceback.print_exc()
|
|
541
|
-
return f"Error: {str(e)}"
|
|
542
|
-
|
|
543
|
-
# Run the async function and get the result
|
|
544
|
-
try:
|
|
545
|
-
result = loop.run_until_complete(_async_respond())
|
|
546
|
-
loop.close()
|
|
547
|
-
return result
|
|
548
|
-
except Exception as e:
|
|
549
|
-
loop.close()
|
|
550
|
-
import traceback
|
|
551
|
-
|
|
552
|
-
traceback.print_exc()
|
|
553
|
-
return f"Error executing async operation: {str(e)}"
|
|
500
|
+
return content, metadata
|
|
554
501
|
|
|
555
502
|
|
|
556
503
|
def create_gradio_ui(
|
|
@@ -566,6 +513,10 @@ def create_gradio_ui(
|
|
|
566
513
|
Returns:
|
|
567
514
|
A Gradio Blocks application
|
|
568
515
|
"""
|
|
516
|
+
# --- Load Settings ---
|
|
517
|
+
saved_settings = load_settings()
|
|
518
|
+
# --- End Load Settings ---
|
|
519
|
+
|
|
569
520
|
# Check for API keys
|
|
570
521
|
openai_api_key = os.environ.get("OPENAI_API_KEY", "")
|
|
571
522
|
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
@@ -579,6 +530,7 @@ def create_gradio_ui(
|
|
|
579
530
|
openai_models = ["OpenAI: Computer-Use Preview"]
|
|
580
531
|
omni_models += [
|
|
581
532
|
"OMNI: OpenAI GPT-4o",
|
|
533
|
+
"OMNI: OpenAI GPT-4o mini",
|
|
582
534
|
"OMNI: OpenAI GPT-4.5-preview",
|
|
583
535
|
]
|
|
584
536
|
|
|
@@ -601,21 +553,33 @@ def create_gradio_ui(
|
|
|
601
553
|
"OMNI": omni_models + ["Custom model..."], # Add custom model option
|
|
602
554
|
}
|
|
603
555
|
|
|
604
|
-
#
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
initial_model =
|
|
556
|
+
# --- Apply Saved Settings (override defaults if available) ---
|
|
557
|
+
initial_loop = saved_settings.get("agent_loop", "OMNI")
|
|
558
|
+
# Ensure the saved model is actually available in the choices for the loaded loop
|
|
559
|
+
available_models_for_loop = provider_to_models.get(initial_loop, [])
|
|
560
|
+
saved_model_choice = saved_settings.get("model_choice")
|
|
561
|
+
if saved_model_choice and saved_model_choice in available_models_for_loop:
|
|
562
|
+
initial_model = saved_model_choice
|
|
611
563
|
else:
|
|
612
|
-
|
|
613
|
-
if
|
|
614
|
-
initial_model =
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
564
|
+
# If saved model isn't valid for the loop, reset to default for that loop
|
|
565
|
+
if initial_loop == "OPENAI":
|
|
566
|
+
initial_model = (
|
|
567
|
+
"OpenAI: Computer-Use Preview" if openai_models else "No models available"
|
|
568
|
+
)
|
|
569
|
+
elif initial_loop == "ANTHROPIC":
|
|
570
|
+
initial_model = anthropic_models[0] if anthropic_models else "No models available"
|
|
571
|
+
else: # OMNI
|
|
618
572
|
initial_model = omni_models[0] if omni_models else "No models available"
|
|
573
|
+
if "Custom model..." in available_models_for_loop:
|
|
574
|
+
initial_model = (
|
|
575
|
+
"Custom model..." # Default to custom if available and no other default fits
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
initial_custom_model = saved_settings.get("custom_model", "Qwen2.5-VL-7B-Instruct")
|
|
579
|
+
initial_provider_base_url = saved_settings.get("provider_base_url", "http://localhost:1234/v1")
|
|
580
|
+
initial_save_trajectory = saved_settings.get("save_trajectory", True)
|
|
581
|
+
initial_recent_images = saved_settings.get("recent_images", 3)
|
|
582
|
+
# --- End Apply Saved Settings ---
|
|
619
583
|
|
|
620
584
|
# Example prompts
|
|
621
585
|
example_messages = [
|
|
@@ -708,7 +672,7 @@ def create_gradio_ui(
|
|
|
708
672
|
### 3. Pull the pre-built macOS image
|
|
709
673
|
|
|
710
674
|
```bash
|
|
711
|
-
lume pull macos-sequoia-cua:latest
|
|
675
|
+
lume pull macos-sequoia-cua:latest
|
|
712
676
|
```
|
|
713
677
|
|
|
714
678
|
Initial download requires 80GB storage, but reduces to ~30GB after first run due to macOS's sparse file system.
|
|
@@ -725,48 +689,68 @@ def create_gradio_ui(
|
|
|
725
689
|
"""
|
|
726
690
|
)
|
|
727
691
|
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
692
|
+
with gr.Accordion("Configuration", open=True):
|
|
693
|
+
# Configuration options
|
|
694
|
+
agent_loop = gr.Dropdown(
|
|
695
|
+
choices=["OPENAI", "ANTHROPIC", "OMNI"],
|
|
696
|
+
label="Agent Loop",
|
|
697
|
+
value=initial_loop,
|
|
698
|
+
info="Select the agent loop provider",
|
|
699
|
+
)
|
|
735
700
|
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
701
|
+
# Create model selection dropdown with custom value support for OMNI
|
|
702
|
+
model_choice = gr.Dropdown(
|
|
703
|
+
choices=provider_to_models.get(initial_loop, ["No models available"]),
|
|
704
|
+
label="LLM Provider and Model",
|
|
705
|
+
value=initial_model,
|
|
706
|
+
info="Select model or choose 'Custom model...' to enter a custom name",
|
|
707
|
+
interactive=True,
|
|
708
|
+
)
|
|
744
709
|
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
710
|
+
# Add custom model textbox (only visible when "Custom model..." is selected)
|
|
711
|
+
custom_model = gr.Textbox(
|
|
712
|
+
label="Custom Model Name",
|
|
713
|
+
placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct)",
|
|
714
|
+
value=initial_custom_model,
|
|
715
|
+
visible=(initial_model == "Custom model..."),
|
|
716
|
+
interactive=True,
|
|
717
|
+
)
|
|
753
718
|
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
719
|
+
# Add custom provider base URL textbox (only visible when "Custom model..." is selected)
|
|
720
|
+
provider_base_url = gr.Textbox(
|
|
721
|
+
label="Provider Base URL",
|
|
722
|
+
placeholder="Enter provider base URL (e.g., http://localhost:1234/v1)",
|
|
723
|
+
value=initial_provider_base_url,
|
|
724
|
+
visible=(initial_model == "Custom model..."),
|
|
725
|
+
interactive=True,
|
|
726
|
+
)
|
|
760
727
|
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
728
|
+
# Add custom API key textbox (only visible when "Custom model..." is selected)
|
|
729
|
+
provider_api_key = gr.Textbox(
|
|
730
|
+
label="Provider API Key",
|
|
731
|
+
placeholder="Enter provider API key (if required)",
|
|
732
|
+
value="",
|
|
733
|
+
visible=(initial_model == "Custom model..."),
|
|
734
|
+
interactive=True,
|
|
735
|
+
type="password",
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
save_trajectory = gr.Checkbox(
|
|
739
|
+
label="Save Trajectory",
|
|
740
|
+
value=initial_save_trajectory,
|
|
741
|
+
info="Save the agent's trajectory for debugging",
|
|
742
|
+
interactive=True,
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
recent_images = gr.Slider(
|
|
746
|
+
label="Recent Images",
|
|
747
|
+
minimum=1,
|
|
748
|
+
maximum=10,
|
|
749
|
+
value=initial_recent_images,
|
|
750
|
+
step=1,
|
|
751
|
+
info="Number of recent images to keep in context",
|
|
752
|
+
interactive=True,
|
|
753
|
+
)
|
|
770
754
|
|
|
771
755
|
# Right column for chat interface
|
|
772
756
|
with gr.Column(scale=2):
|
|
@@ -775,7 +759,7 @@ def create_gradio_ui(
|
|
|
775
759
|
"Ask me to perform tasks in a virtual macOS environment.<br>Built with <a href='https://github.com/trycua/cua' target='_blank'>github.com/trycua/cua</a>."
|
|
776
760
|
)
|
|
777
761
|
|
|
778
|
-
|
|
762
|
+
chatbot_history = gr.Chatbot(type="messages")
|
|
779
763
|
msg = gr.Textbox(
|
|
780
764
|
placeholder="Ask me to perform tasks in a virtual macOS environment"
|
|
781
765
|
)
|
|
@@ -787,63 +771,169 @@ def create_gradio_ui(
|
|
|
787
771
|
# Function to handle chat submission
|
|
788
772
|
def chat_submit(message, history):
|
|
789
773
|
# Add user message to history
|
|
790
|
-
history =
|
|
774
|
+
history.append(gr.ChatMessage(role="user", content=message))
|
|
791
775
|
return "", history
|
|
792
776
|
|
|
793
777
|
# Function to process agent response after user input
|
|
794
|
-
def process_response(
|
|
778
|
+
async def process_response(
|
|
795
779
|
history,
|
|
796
780
|
model_choice_value,
|
|
797
781
|
custom_model_value,
|
|
798
782
|
agent_loop_choice,
|
|
799
783
|
save_traj,
|
|
800
784
|
recent_imgs,
|
|
785
|
+
custom_url_value=None,
|
|
786
|
+
custom_api_key=None,
|
|
801
787
|
):
|
|
802
788
|
if not history:
|
|
803
|
-
|
|
789
|
+
yield history
|
|
790
|
+
return
|
|
804
791
|
|
|
805
792
|
# Get the last user message
|
|
806
|
-
last_user_message = history[-1][
|
|
793
|
+
last_user_message = history[-1]["content"]
|
|
807
794
|
|
|
808
|
-
#
|
|
809
|
-
|
|
795
|
+
# Determine the model name string to analyze: custom or from dropdown
|
|
796
|
+
model_string_to_analyze = (
|
|
810
797
|
custom_model_value
|
|
811
798
|
if model_choice_value == "Custom model..."
|
|
812
|
-
else model_choice_value
|
|
799
|
+
else model_choice_value # Use the full UI string initially
|
|
813
800
|
)
|
|
814
801
|
|
|
815
|
-
#
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
802
|
+
# Determine if this is a custom model selection
|
|
803
|
+
is_custom_model_selected = model_choice_value == "Custom model..."
|
|
804
|
+
|
|
805
|
+
try:
|
|
806
|
+
# Get the provider, *cleaned* model name, and agent loop type
|
|
807
|
+
provider, cleaned_model_name_from_func, agent_loop_type = (
|
|
808
|
+
get_provider_and_model(model_string_to_analyze, agent_loop_choice)
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
# Determine the final model name to send to the agent
|
|
812
|
+
# If custom selected, use the custom text box value, otherwise use the cleaned name
|
|
813
|
+
final_model_name_to_send = (
|
|
814
|
+
custom_model_value
|
|
815
|
+
if is_custom_model_selected
|
|
816
|
+
else cleaned_model_name_from_func
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
# Determine if OAICOMPAT should be used (only if custom model explicitly selected)
|
|
820
|
+
is_oaicompat = is_custom_model_selected
|
|
821
|
+
|
|
822
|
+
# Get API key based on provider determined by get_provider_and_model
|
|
823
|
+
if is_oaicompat and custom_api_key:
|
|
824
|
+
# Use custom API key if provided for custom model
|
|
825
|
+
api_key = custom_api_key
|
|
826
|
+
print(
|
|
827
|
+
f"DEBUG - Using custom API key for model: {final_model_name_to_send}"
|
|
828
|
+
)
|
|
829
|
+
elif provider == LLMProvider.OPENAI:
|
|
830
|
+
api_key = openai_api_key or os.environ.get("OPENAI_API_KEY", "")
|
|
831
|
+
elif provider == LLMProvider.ANTHROPIC:
|
|
832
|
+
api_key = anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", "")
|
|
833
|
+
else:
|
|
834
|
+
# For Ollama or default OAICOMPAT (without custom key), no key needed/expected
|
|
835
|
+
api_key = ""
|
|
836
|
+
|
|
837
|
+
# --- Save Settings Before Running Agent ---
|
|
838
|
+
current_settings = {
|
|
839
|
+
"agent_loop": agent_loop_choice,
|
|
840
|
+
"model_choice": model_choice_value,
|
|
841
|
+
"custom_model": custom_model_value,
|
|
842
|
+
"provider_base_url": custom_url_value,
|
|
843
|
+
"save_trajectory": save_traj,
|
|
844
|
+
"recent_images": recent_imgs,
|
|
845
|
+
}
|
|
846
|
+
save_settings(current_settings)
|
|
847
|
+
# --- End Save Settings ---
|
|
848
|
+
|
|
849
|
+
# Create or update the agent
|
|
850
|
+
create_agent(
|
|
851
|
+
# Provider determined by get_provider_and_model unless custom model selected
|
|
852
|
+
provider=LLMProvider.OAICOMPAT if is_oaicompat else provider,
|
|
853
|
+
agent_loop=agent_loop_type,
|
|
854
|
+
# Pass the FINAL determined model name (cleaned or custom)
|
|
855
|
+
model_name=final_model_name_to_send,
|
|
856
|
+
api_key=api_key,
|
|
857
|
+
save_trajectory=save_traj,
|
|
858
|
+
only_n_most_recent_images=recent_imgs,
|
|
859
|
+
use_oaicompat=is_oaicompat, # Set flag if custom model was selected
|
|
860
|
+
# Pass custom URL only if custom model was selected
|
|
861
|
+
provider_base_url=custom_url_value if is_oaicompat else None,
|
|
862
|
+
verbosity=logging.DEBUG, # Added verbosity here
|
|
863
|
+
)
|
|
864
|
+
|
|
865
|
+
if global_agent is None:
|
|
866
|
+
# Add initial empty assistant message
|
|
867
|
+
history.append(
|
|
868
|
+
gr.ChatMessage(
|
|
869
|
+
role="assistant",
|
|
870
|
+
content="Failed to create agent. Check API keys and configuration.",
|
|
871
|
+
)
|
|
872
|
+
)
|
|
873
|
+
yield history
|
|
874
|
+
return
|
|
875
|
+
|
|
876
|
+
# Add the screenshot handler to the agent's loop if available
|
|
877
|
+
if global_agent and hasattr(global_agent, "_loop"):
|
|
878
|
+
print("DEBUG - Adding screenshot handler to agent loop")
|
|
879
|
+
|
|
880
|
+
# Create the screenshot handler with references to UI components
|
|
881
|
+
screenshot_handler = GradioChatScreenshotHandler(history)
|
|
882
|
+
|
|
883
|
+
# Add the handler to the callback manager if it exists AND is not None
|
|
884
|
+
if (
|
|
885
|
+
hasattr(global_agent._loop, "callback_manager")
|
|
886
|
+
and global_agent._loop.callback_manager is not None
|
|
887
|
+
):
|
|
888
|
+
global_agent._loop.callback_manager.add_handler(screenshot_handler)
|
|
889
|
+
print(
|
|
890
|
+
f"DEBUG - Screenshot handler added to callback manager with history: {id(history)}"
|
|
891
|
+
)
|
|
892
|
+
else:
|
|
893
|
+
# Optional: Log a warning if the callback manager is missing/None for a specific loop
|
|
894
|
+
print(
|
|
895
|
+
f"WARNING - Callback manager not found or is None for loop type: {type(global_agent._loop)}. Screenshot handler not added."
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
# Stream responses from the agent
|
|
899
|
+
async for result in global_agent.run(last_user_message):
|
|
900
|
+
# Process result
|
|
901
|
+
content, metadata = process_agent_result(result)
|
|
902
|
+
|
|
903
|
+
# Skip empty content
|
|
904
|
+
if content or metadata.get("title"):
|
|
905
|
+
history.append(
|
|
906
|
+
gr.ChatMessage(
|
|
907
|
+
role="assistant", content=content, metadata=metadata
|
|
908
|
+
)
|
|
909
|
+
)
|
|
910
|
+
yield history
|
|
911
|
+
except Exception as e:
|
|
912
|
+
import traceback
|
|
913
|
+
|
|
914
|
+
traceback.print_exc()
|
|
915
|
+
# Update with error message
|
|
916
|
+
history.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}"))
|
|
917
|
+
yield history
|
|
830
918
|
|
|
831
919
|
# Connect the components
|
|
832
|
-
msg.submit(chat_submit, [msg,
|
|
920
|
+
msg.submit(chat_submit, [msg, chatbot_history], [msg, chatbot_history]).then(
|
|
833
921
|
process_response,
|
|
834
922
|
[
|
|
835
|
-
|
|
923
|
+
chatbot_history,
|
|
836
924
|
model_choice,
|
|
837
925
|
custom_model,
|
|
838
926
|
agent_loop,
|
|
839
927
|
save_trajectory,
|
|
840
928
|
recent_images,
|
|
929
|
+
provider_base_url,
|
|
930
|
+
provider_api_key,
|
|
841
931
|
],
|
|
842
|
-
[
|
|
932
|
+
[chatbot_history],
|
|
843
933
|
)
|
|
844
934
|
|
|
845
935
|
# Clear button functionality
|
|
846
|
-
clear.click(lambda: None, None,
|
|
936
|
+
clear.click(lambda: None, None, chatbot_history, queue=False)
|
|
847
937
|
|
|
848
938
|
# Connect agent_loop changes to model selection
|
|
849
939
|
agent_loop.change(
|
|
@@ -853,14 +943,19 @@ def create_gradio_ui(
|
|
|
853
943
|
queue=False, # Process immediately without queueing
|
|
854
944
|
)
|
|
855
945
|
|
|
856
|
-
# Show/hide custom model
|
|
946
|
+
# Show/hide custom model, provider base URL, and API key textboxes based on dropdown selection
|
|
857
947
|
def update_custom_model_visibility(model_value):
|
|
858
|
-
|
|
948
|
+
is_custom = model_value == "Custom model..."
|
|
949
|
+
return (
|
|
950
|
+
gr.update(visible=is_custom),
|
|
951
|
+
gr.update(visible=is_custom),
|
|
952
|
+
gr.update(visible=is_custom),
|
|
953
|
+
)
|
|
859
954
|
|
|
860
955
|
model_choice.change(
|
|
861
956
|
fn=update_custom_model_visibility,
|
|
862
957
|
inputs=[model_choice],
|
|
863
|
-
outputs=[custom_model],
|
|
958
|
+
outputs=[custom_model, provider_base_url, provider_api_key],
|
|
864
959
|
queue=False, # Process immediately without queueing
|
|
865
960
|
)
|
|
866
961
|
|