cua-agent 0.1.37__tar.gz → 0.1.39__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- {cua_agent-0.1.37 → cua_agent-0.1.39}/PKG-INFO +5 -2
- {cua_agent-0.1.37 → cua_agent-0.1.39}/README.md +4 -1
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/ui/gradio/app.py +423 -149
- {cua_agent-0.1.37 → cua_agent-0.1.39}/pyproject.toml +3 -3
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/agent.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/base.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/callbacks.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/experiment.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/factory.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/messages.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/provider_config.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/telemetry.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/tools/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/tools/base.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/tools/bash.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/tools/collection.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/tools/computer.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/tools/edit.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/tools/manager.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/tools.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/types.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/core/visualization.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/api/client.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/api/logging.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/api_handler.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/callbacks/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/callbacks/manager.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/loop.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/prompts.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/response_handler.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/tools/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/tools/base.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/tools/bash.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/tools/collection.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/tools/computer.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/tools/edit.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/tools/manager.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/tools/run.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/types.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/anthropic/utils.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/api_handler.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/clients/anthropic.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/clients/base.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/clients/oaicompat.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/clients/ollama.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/clients/openai.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/clients/utils.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/image_utils.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/loop.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/parser.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/prompts.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/tools/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/tools/base.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/tools/bash.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/tools/computer.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/tools/manager.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/omni/utils.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/openai/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/openai/api_handler.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/openai/loop.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/openai/response_handler.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/openai/tools/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/openai/tools/base.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/openai/tools/computer.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/openai/tools/manager.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/openai/types.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/openai/utils.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/uitars/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/uitars/clients/base.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/uitars/clients/mlxvlm.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/uitars/clients/oaicompat.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/uitars/loop.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/uitars/prompts.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/uitars/tools/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/uitars/tools/computer.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/uitars/tools/manager.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/providers/uitars/utils.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/telemetry.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/ui/__init__.py +0 -0
- {cua_agent-0.1.37 → cua_agent-0.1.39}/agent/ui/gradio/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.39
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: >=3.10
|
|
@@ -103,9 +103,12 @@ pip install "cua-agent[all]"
|
|
|
103
103
|
pip install "cua-agent[openai]" # OpenAI Cua Loop
|
|
104
104
|
pip install "cua-agent[anthropic]" # Anthropic Cua Loop
|
|
105
105
|
pip install "cua-agent[uitars]" # UI-Tars support
|
|
106
|
-
pip install "cua-agent[uitars-mlx]" # local UI-Tars support with MLXVLM
|
|
107
106
|
pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
|
|
108
107
|
pip install "cua-agent[ui]" # Gradio UI for the agent
|
|
108
|
+
|
|
109
|
+
# For local UI-TARS with MLX support, you need to manually install mlx-vlm:
|
|
110
|
+
pip install "cua-agent[uitars-mlx]"
|
|
111
|
+
pip install git+https://github.com/ddupont808/mlx-vlm.git@stable/fix/qwen2-position-id # PR: https://github.com/Blaizzy/mlx-vlm/pull/349
|
|
109
112
|
```
|
|
110
113
|
|
|
111
114
|
## Run
|
|
@@ -32,9 +32,12 @@ pip install "cua-agent[all]"
|
|
|
32
32
|
pip install "cua-agent[openai]" # OpenAI Cua Loop
|
|
33
33
|
pip install "cua-agent[anthropic]" # Anthropic Cua Loop
|
|
34
34
|
pip install "cua-agent[uitars]" # UI-Tars support
|
|
35
|
-
pip install "cua-agent[uitars-mlx]" # local UI-Tars support with MLXVLM
|
|
36
35
|
pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
|
|
37
36
|
pip install "cua-agent[ui]" # Gradio UI for the agent
|
|
37
|
+
|
|
38
|
+
# For local UI-TARS with MLX support, you need to manually install mlx-vlm:
|
|
39
|
+
pip install "cua-agent[uitars-mlx]"
|
|
40
|
+
pip install git+https://github.com/ddupont808/mlx-vlm.git@stable/fix/qwen2-position-id # PR: https://github.com/Blaizzy/mlx-vlm/pull/349
|
|
38
41
|
```
|
|
39
42
|
|
|
40
43
|
## Run
|
|
@@ -6,7 +6,7 @@ with an advanced UI for model selection and configuration.
|
|
|
6
6
|
|
|
7
7
|
Supported Agent Loops and Models:
|
|
8
8
|
- AgentLoop.OPENAI: Uses OpenAI Operator CUA model
|
|
9
|
-
•
|
|
9
|
+
• computer-use-preview
|
|
10
10
|
|
|
11
11
|
- AgentLoop.ANTHROPIC: Uses Anthropic Computer-Use models
|
|
12
12
|
• claude-3-5-sonnet-20240620
|
|
@@ -133,12 +133,12 @@ class GradioChatScreenshotHandler(DefaultCallbackHandler):
|
|
|
133
133
|
MODEL_MAPPINGS = {
|
|
134
134
|
"openai": {
|
|
135
135
|
# Default to operator CUA model
|
|
136
|
-
"default": "
|
|
136
|
+
"default": "computer-use-preview",
|
|
137
137
|
# Map standard OpenAI model names to CUA-specific model names
|
|
138
|
-
"gpt-4-turbo": "
|
|
139
|
-
"gpt-4o": "
|
|
140
|
-
"gpt-4": "
|
|
141
|
-
"gpt-4.5-preview": "
|
|
138
|
+
"gpt-4-turbo": "computer-use-preview",
|
|
139
|
+
"gpt-4o": "computer-use-preview",
|
|
140
|
+
"gpt-4": "computer-use-preview",
|
|
141
|
+
"gpt-4.5-preview": "computer-use-preview",
|
|
142
142
|
"gpt-4o-mini": "gpt-4o-mini",
|
|
143
143
|
},
|
|
144
144
|
"anthropic": {
|
|
@@ -217,7 +217,7 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
|
|
|
217
217
|
# Determine provider and clean model name based on the full string from UI
|
|
218
218
|
cleaned_model_name = model_name # Default to using the name as-is (for custom)
|
|
219
219
|
|
|
220
|
-
if model_name == "Custom model
|
|
220
|
+
if model_name == "Custom model (OpenAI compatible API)":
|
|
221
221
|
# Actual model name comes from custom_model_value via model_to_use.
|
|
222
222
|
# Assume OAICOMPAT for custom models unless overridden by URL/key later?
|
|
223
223
|
# get_provider_and_model determines the *initial* provider/model.
|
|
@@ -278,8 +278,8 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
|
|
|
278
278
|
break
|
|
279
279
|
# Note: No fallback needed here as we explicitly check against omni keys
|
|
280
280
|
|
|
281
|
-
else: # Handles unexpected formats or the raw custom name if "Custom model
|
|
282
|
-
# Should only happen if user selected "Custom model
|
|
281
|
+
else: # Handles unexpected formats or the raw custom name if "Custom model (OpenAI compatible API)" selected
|
|
282
|
+
# Should only happen if user selected "Custom model (OpenAI compatible API)"
|
|
283
283
|
# Or if a model name format isn't caught above
|
|
284
284
|
provider = LLMProvider.OAICOMPAT
|
|
285
285
|
cleaned_model_name = (
|
|
@@ -291,7 +291,7 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
|
|
|
291
291
|
# agent_loop remains AgentLoop.OMNI
|
|
292
292
|
elif agent_loop == AgentLoop.UITARS:
|
|
293
293
|
# For UITARS, use MLXVLM provider for the MLX models, OAICOMPAT for custom
|
|
294
|
-
if model_name == "Custom model
|
|
294
|
+
if model_name == "Custom model (OpenAI compatible API)":
|
|
295
295
|
provider = LLMProvider.OAICOMPAT
|
|
296
296
|
model_name_to_use = "tgi"
|
|
297
297
|
else:
|
|
@@ -449,11 +449,11 @@ def create_gradio_ui(
|
|
|
449
449
|
provider_to_models = {
|
|
450
450
|
"OPENAI": openai_models,
|
|
451
451
|
"ANTHROPIC": anthropic_models,
|
|
452
|
-
"OMNI": omni_models + ["Custom model
|
|
452
|
+
"OMNI": omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"], # Add custom model options
|
|
453
453
|
"UITARS": [
|
|
454
454
|
"mlx-community/UI-TARS-1.5-7B-4bit",
|
|
455
455
|
"mlx-community/UI-TARS-1.5-7B-6bit",
|
|
456
|
-
"Custom model
|
|
456
|
+
"Custom model (OpenAI compatible API)"
|
|
457
457
|
], # UI-TARS options with MLX models
|
|
458
458
|
}
|
|
459
459
|
|
|
@@ -474,9 +474,9 @@ def create_gradio_ui(
|
|
|
474
474
|
initial_model = anthropic_models[0] if anthropic_models else "No models available"
|
|
475
475
|
else: # OMNI
|
|
476
476
|
initial_model = omni_models[0] if omni_models else "No models available"
|
|
477
|
-
if "Custom model
|
|
477
|
+
if "Custom model (OpenAI compatible API)" in available_models_for_loop:
|
|
478
478
|
initial_model = (
|
|
479
|
-
"Custom model
|
|
479
|
+
"Custom model (OpenAI compatible API)" # Default to custom if available and no other default fits
|
|
480
480
|
)
|
|
481
481
|
|
|
482
482
|
initial_custom_model = saved_settings.get("custom_model", "Qwen2.5-VL-7B-Instruct")
|
|
@@ -499,7 +499,7 @@ def create_gradio_ui(
|
|
|
499
499
|
|
|
500
500
|
Args:
|
|
501
501
|
agent_loop_choice: The agent loop type (e.g., UITARS, OPENAI, ANTHROPIC, OMNI)
|
|
502
|
-
provider: The provider type (e.g., OPENAI, ANTHROPIC, OLLAMA, OAICOMPAT)
|
|
502
|
+
provider: The provider type (e.g., OPENAI, ANTHROPIC, OLLAMA, OAICOMPAT, MLXVLM)
|
|
503
503
|
model_name: The model name
|
|
504
504
|
tasks: List of tasks to execute
|
|
505
505
|
provider_url: The provider base URL for OAICOMPAT providers
|
|
@@ -528,14 +528,58 @@ async def main():
|
|
|
528
528
|
only_n_most_recent_images={recent_images},
|
|
529
529
|
save_trajectory={save_trajectory},'''
|
|
530
530
|
|
|
531
|
-
# Add the model configuration based on provider
|
|
532
|
-
if
|
|
531
|
+
# Add the model configuration based on provider and agent loop
|
|
532
|
+
if agent_loop_choice == "OPENAI":
|
|
533
|
+
# For OPENAI loop, always use OPENAI provider with computer-use-preview
|
|
533
534
|
code += f'''
|
|
535
|
+
model=LLM(
|
|
536
|
+
provider=LLMProvider.OPENAI,
|
|
537
|
+
name="computer-use-preview"
|
|
538
|
+
)'''
|
|
539
|
+
elif agent_loop_choice == "ANTHROPIC":
|
|
540
|
+
# For ANTHROPIC loop, always use ANTHROPIC provider
|
|
541
|
+
code += f'''
|
|
542
|
+
model=LLM(
|
|
543
|
+
provider=LLMProvider.ANTHROPIC,
|
|
544
|
+
name="{model_name}"
|
|
545
|
+
)'''
|
|
546
|
+
elif agent_loop_choice == "UITARS":
|
|
547
|
+
# For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for others
|
|
548
|
+
if provider == LLMProvider.MLXVLM:
|
|
549
|
+
code += f'''
|
|
550
|
+
model=LLM(
|
|
551
|
+
provider=LLMProvider.MLXVLM,
|
|
552
|
+
name="{model_name}"
|
|
553
|
+
)'''
|
|
554
|
+
else: # OAICOMPAT
|
|
555
|
+
code += f'''
|
|
556
|
+
model=LLM(
|
|
557
|
+
provider=LLMProvider.OAICOMPAT,
|
|
558
|
+
name="{model_name}",
|
|
559
|
+
provider_base_url="{provider_url}"
|
|
560
|
+
)'''
|
|
561
|
+
elif agent_loop_choice == "OMNI":
|
|
562
|
+
# For OMNI, provider can be OPENAI, ANTHROPIC, OLLAMA, or OAICOMPAT
|
|
563
|
+
if provider == LLMProvider.OAICOMPAT:
|
|
564
|
+
code += f'''
|
|
534
565
|
model=LLM(
|
|
535
566
|
provider=LLMProvider.OAICOMPAT,
|
|
536
567
|
name="{model_name}",
|
|
537
568
|
provider_base_url="{provider_url}"
|
|
538
569
|
)'''
|
|
570
|
+
else: # OPENAI, ANTHROPIC, OLLAMA
|
|
571
|
+
code += f'''
|
|
572
|
+
model=LLM(
|
|
573
|
+
provider=LLMProvider.{provider.name},
|
|
574
|
+
name="{model_name}"
|
|
575
|
+
)'''
|
|
576
|
+
else:
|
|
577
|
+
# Default case - just use the provided provider and model
|
|
578
|
+
code += f'''
|
|
579
|
+
model=LLM(
|
|
580
|
+
provider=LLMProvider.{provider.name},
|
|
581
|
+
name="{model_name}"
|
|
582
|
+
)'''
|
|
539
583
|
|
|
540
584
|
code += """
|
|
541
585
|
)
|
|
@@ -561,6 +605,8 @@ async def main():
|
|
|
561
605
|
print(f"Executing task: {{task}}")
|
|
562
606
|
async for result in agent.run(task):
|
|
563
607
|
print(result)'''
|
|
608
|
+
|
|
609
|
+
|
|
564
610
|
|
|
565
611
|
# Add the main block
|
|
566
612
|
code += '''
|
|
@@ -570,62 +616,6 @@ if __name__ == "__main__":
|
|
|
570
616
|
|
|
571
617
|
return code
|
|
572
618
|
|
|
573
|
-
# Function to update model choices based on agent loop selection
|
|
574
|
-
def update_model_choices(loop):
|
|
575
|
-
models = provider_to_models.get(loop, [])
|
|
576
|
-
if loop == "OMNI":
|
|
577
|
-
# For OMNI, include the custom model option
|
|
578
|
-
if not models:
|
|
579
|
-
models = ["Custom model..."]
|
|
580
|
-
elif "Custom model..." not in models:
|
|
581
|
-
models.append("Custom model...")
|
|
582
|
-
|
|
583
|
-
# Show both OpenAI and Anthropic key inputs for OMNI if keys aren't set
|
|
584
|
-
return [
|
|
585
|
-
gr.update(choices=models, value=models[0] if models else "Custom model...", interactive=True),
|
|
586
|
-
gr.update(visible=not has_openai_key),
|
|
587
|
-
gr.update(visible=not has_anthropic_key)
|
|
588
|
-
]
|
|
589
|
-
elif loop == "OPENAI":
|
|
590
|
-
# Show only OpenAI key input for OPENAI loop if key isn't set
|
|
591
|
-
if not models:
|
|
592
|
-
return [
|
|
593
|
-
gr.update(choices=["No models available"], value="No models available", interactive=True),
|
|
594
|
-
gr.update(visible=not has_openai_key),
|
|
595
|
-
gr.update(visible=False)
|
|
596
|
-
]
|
|
597
|
-
return [
|
|
598
|
-
gr.update(choices=models, value=models[0] if models else None, interactive=True),
|
|
599
|
-
gr.update(visible=not has_openai_key),
|
|
600
|
-
gr.update(visible=False)
|
|
601
|
-
]
|
|
602
|
-
elif loop == "ANTHROPIC":
|
|
603
|
-
# Show only Anthropic key input for ANTHROPIC loop if key isn't set
|
|
604
|
-
if not models:
|
|
605
|
-
return [
|
|
606
|
-
gr.update(choices=["No models available"], value="No models available", interactive=True),
|
|
607
|
-
gr.update(visible=False),
|
|
608
|
-
gr.update(visible=not has_anthropic_key)
|
|
609
|
-
]
|
|
610
|
-
return [
|
|
611
|
-
gr.update(choices=models, value=models[0] if models else None, interactive=True),
|
|
612
|
-
gr.update(visible=False),
|
|
613
|
-
gr.update(visible=not has_anthropic_key)
|
|
614
|
-
]
|
|
615
|
-
else:
|
|
616
|
-
# For other providers (like UITARS), don't show API key inputs
|
|
617
|
-
if not models:
|
|
618
|
-
return [
|
|
619
|
-
gr.update(choices=["No models available"], value="No models available", interactive=True),
|
|
620
|
-
gr.update(visible=False),
|
|
621
|
-
gr.update(visible=False)
|
|
622
|
-
]
|
|
623
|
-
return [
|
|
624
|
-
gr.update(choices=models, value=models[0] if models else None, interactive=True),
|
|
625
|
-
gr.update(visible=False),
|
|
626
|
-
gr.update(visible=False)
|
|
627
|
-
]
|
|
628
|
-
|
|
629
619
|
# Create the Gradio interface with advanced UI
|
|
630
620
|
with gr.Blocks(title="Computer-Use Agent") as demo:
|
|
631
621
|
with gr.Row():
|
|
@@ -684,14 +674,52 @@ if __name__ == "__main__":
|
|
|
684
674
|
info="Select the agent loop provider",
|
|
685
675
|
)
|
|
686
676
|
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
677
|
+
|
|
678
|
+
# Create separate model selection dropdowns for each provider type
|
|
679
|
+
# This avoids the Gradio bug with updating choices
|
|
680
|
+
with gr.Group() as model_selection_group:
|
|
681
|
+
# OpenAI models dropdown
|
|
682
|
+
openai_model_choice = gr.Dropdown(
|
|
683
|
+
choices=openai_models,
|
|
684
|
+
label="OpenAI Model",
|
|
685
|
+
value=openai_models[0] if openai_models else "No models available",
|
|
686
|
+
info="Select OpenAI model",
|
|
687
|
+
interactive=True,
|
|
688
|
+
visible=(initial_loop == "OPENAI")
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
# Anthropic models dropdown
|
|
692
|
+
anthropic_model_choice = gr.Dropdown(
|
|
693
|
+
choices=anthropic_models,
|
|
694
|
+
label="Anthropic Model",
|
|
695
|
+
value=anthropic_models[0] if anthropic_models else "No models available",
|
|
696
|
+
info="Select Anthropic model",
|
|
697
|
+
interactive=True,
|
|
698
|
+
visible=(initial_loop == "ANTHROPIC")
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
# OMNI models dropdown
|
|
702
|
+
omni_model_choice = gr.Dropdown(
|
|
703
|
+
choices=omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"],
|
|
704
|
+
label="OMNI Model",
|
|
705
|
+
value=omni_models[0] if omni_models else "Custom model (OpenAI compatible API)",
|
|
706
|
+
info="Select OMNI model or choose a custom model option",
|
|
707
|
+
interactive=True,
|
|
708
|
+
visible=(initial_loop == "OMNI")
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
# UITARS models dropdown
|
|
712
|
+
uitars_model_choice = gr.Dropdown(
|
|
713
|
+
choices=provider_to_models.get("UITARS", ["No models available"]),
|
|
714
|
+
label="UITARS Model",
|
|
715
|
+
value=provider_to_models.get("UITARS", ["No models available"])[0] if provider_to_models.get("UITARS") else "No models available",
|
|
716
|
+
info="Select UITARS model",
|
|
717
|
+
interactive=True,
|
|
718
|
+
visible=(initial_loop == "UITARS")
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
# Hidden field to store the selected model (for compatibility with existing code)
|
|
722
|
+
model_choice = gr.Textbox(visible=False)
|
|
695
723
|
|
|
696
724
|
# Add API key inputs for OpenAI and Anthropic
|
|
697
725
|
with gr.Group(visible=not has_openai_key and (initial_loop == "OPENAI" or initial_loop == "OMNI")) as openai_key_group:
|
|
@@ -713,34 +741,176 @@ if __name__ == "__main__":
|
|
|
713
741
|
type="password",
|
|
714
742
|
info="Required for Anthropic models"
|
|
715
743
|
)
|
|
744
|
+
|
|
745
|
+
# Function to set OpenAI API key environment variable
|
|
746
|
+
def set_openai_api_key(key):
|
|
747
|
+
if key and key.strip():
|
|
748
|
+
os.environ["OPENAI_API_KEY"] = key.strip()
|
|
749
|
+
print(f"DEBUG - Set OpenAI API key environment variable")
|
|
750
|
+
return key
|
|
751
|
+
|
|
752
|
+
# Function to set Anthropic API key environment variable
|
|
753
|
+
def set_anthropic_api_key(key):
|
|
754
|
+
if key and key.strip():
|
|
755
|
+
os.environ["ANTHROPIC_API_KEY"] = key.strip()
|
|
756
|
+
print(f"DEBUG - Set Anthropic API key environment variable")
|
|
757
|
+
return key
|
|
758
|
+
|
|
759
|
+
# Add change event handlers for API key inputs
|
|
760
|
+
openai_api_key_input.change(
|
|
761
|
+
fn=set_openai_api_key,
|
|
762
|
+
inputs=[openai_api_key_input],
|
|
763
|
+
outputs=[openai_api_key_input],
|
|
764
|
+
queue=False
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
anthropic_api_key_input.change(
|
|
768
|
+
fn=set_anthropic_api_key,
|
|
769
|
+
inputs=[anthropic_api_key_input],
|
|
770
|
+
outputs=[anthropic_api_key_input],
|
|
771
|
+
queue=False
|
|
772
|
+
)
|
|
716
773
|
|
|
717
|
-
#
|
|
774
|
+
# Combined function to update UI based on selections
|
|
775
|
+
def update_ui(loop=None, openai_model=None, anthropic_model=None, omni_model=None, uitars_model=None):
|
|
776
|
+
# Default values if not provided
|
|
777
|
+
loop = loop or agent_loop.value
|
|
778
|
+
|
|
779
|
+
# Determine which model value to use for custom model checks
|
|
780
|
+
model_value = None
|
|
781
|
+
if loop == "OPENAI" and openai_model:
|
|
782
|
+
model_value = openai_model
|
|
783
|
+
elif loop == "ANTHROPIC" and anthropic_model:
|
|
784
|
+
model_value = anthropic_model
|
|
785
|
+
elif loop == "OMNI" and omni_model:
|
|
786
|
+
model_value = omni_model
|
|
787
|
+
elif loop == "UITARS" and uitars_model:
|
|
788
|
+
model_value = uitars_model
|
|
789
|
+
|
|
790
|
+
# Show/hide appropriate model dropdown based on loop selection
|
|
791
|
+
openai_visible = (loop == "OPENAI")
|
|
792
|
+
anthropic_visible = (loop == "ANTHROPIC")
|
|
793
|
+
omni_visible = (loop == "OMNI")
|
|
794
|
+
uitars_visible = (loop == "UITARS")
|
|
795
|
+
|
|
796
|
+
# Show/hide API key inputs based on loop selection
|
|
797
|
+
show_openai_key = not has_openai_key and (loop == "OPENAI" or (loop == "OMNI" and model_value and "OpenAI" in model_value and "Custom" not in model_value))
|
|
798
|
+
show_anthropic_key = not has_anthropic_key and (loop == "ANTHROPIC" or (loop == "OMNI" and model_value and "Claude" in model_value and "Custom" not in model_value))
|
|
799
|
+
|
|
800
|
+
# Determine custom model visibility
|
|
801
|
+
is_custom_openai_api = model_value == "Custom model (OpenAI compatible API)"
|
|
802
|
+
is_custom_ollama = model_value == "Custom model (ollama)"
|
|
803
|
+
is_any_custom = is_custom_openai_api or is_custom_ollama
|
|
804
|
+
|
|
805
|
+
# Update the hidden model_choice field based on the visible dropdown
|
|
806
|
+
model_choice_value = model_value if model_value else ""
|
|
807
|
+
|
|
808
|
+
# Return all UI updates
|
|
809
|
+
return [
|
|
810
|
+
# Model dropdowns visibility
|
|
811
|
+
gr.update(visible=openai_visible),
|
|
812
|
+
gr.update(visible=anthropic_visible),
|
|
813
|
+
gr.update(visible=omni_visible),
|
|
814
|
+
gr.update(visible=uitars_visible),
|
|
815
|
+
# API key inputs visibility
|
|
816
|
+
gr.update(visible=show_openai_key),
|
|
817
|
+
gr.update(visible=show_anthropic_key),
|
|
818
|
+
# Custom model fields visibility
|
|
819
|
+
gr.update(visible=is_any_custom), # Custom model name always visible for any custom option
|
|
820
|
+
gr.update(visible=is_custom_openai_api), # Provider base URL only for OpenAI compatible API
|
|
821
|
+
# Update the hidden model_choice field
|
|
822
|
+
gr.update(value=model_choice_value)
|
|
823
|
+
]
|
|
824
|
+
|
|
825
|
+
# Add custom model textbox (visible for both custom model options)
|
|
718
826
|
custom_model = gr.Textbox(
|
|
719
827
|
label="Custom Model Name",
|
|
720
|
-
placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct)",
|
|
828
|
+
placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct or llama3)",
|
|
721
829
|
value=initial_custom_model,
|
|
722
|
-
visible=(initial_model == "Custom model
|
|
830
|
+
visible=(initial_model == "Custom model (OpenAI compatible API)" or initial_model == "Custom model (ollama)"),
|
|
723
831
|
interactive=True,
|
|
724
832
|
)
|
|
725
833
|
|
|
726
|
-
# Add custom provider base URL textbox (only visible
|
|
834
|
+
# Add custom provider base URL textbox (only visible for OpenAI compatible API)
|
|
727
835
|
provider_base_url = gr.Textbox(
|
|
728
836
|
label="Provider Base URL",
|
|
729
837
|
placeholder="Enter provider base URL (e.g., http://localhost:1234/v1)",
|
|
730
838
|
value=initial_provider_base_url,
|
|
731
|
-
visible=(initial_model == "Custom model
|
|
839
|
+
visible=(initial_model == "Custom model (OpenAI compatible API)"),
|
|
732
840
|
interactive=True,
|
|
733
841
|
)
|
|
734
842
|
|
|
735
|
-
# Add custom API key textbox (only visible
|
|
843
|
+
# Add custom API key textbox (only visible for OpenAI compatible API)
|
|
736
844
|
provider_api_key = gr.Textbox(
|
|
737
845
|
label="Provider API Key",
|
|
738
846
|
placeholder="Enter provider API key (if required)",
|
|
739
847
|
value="",
|
|
740
|
-
visible=(initial_model == "Custom model
|
|
848
|
+
visible=(initial_model == "Custom model (OpenAI compatible API)"),
|
|
741
849
|
interactive=True,
|
|
742
850
|
type="password",
|
|
743
851
|
)
|
|
852
|
+
|
|
853
|
+
# Connect agent_loop changes to update all UI elements
|
|
854
|
+
agent_loop.change(
|
|
855
|
+
fn=update_ui,
|
|
856
|
+
inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
|
|
857
|
+
outputs=[
|
|
858
|
+
openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
|
|
859
|
+
openai_key_group, anthropic_key_group,
|
|
860
|
+
custom_model, provider_base_url, provider_api_key,
|
|
861
|
+
model_choice # Add model_choice to outputs
|
|
862
|
+
],
|
|
863
|
+
queue=False # Process immediately without queueing
|
|
864
|
+
)
|
|
865
|
+
|
|
866
|
+
# Connect each model dropdown to update UI
|
|
867
|
+
omni_model_choice.change(
|
|
868
|
+
fn=update_ui,
|
|
869
|
+
inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
|
|
870
|
+
outputs=[
|
|
871
|
+
openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
|
|
872
|
+
openai_key_group, anthropic_key_group,
|
|
873
|
+
custom_model, provider_base_url, provider_api_key,
|
|
874
|
+
model_choice # Add model_choice to outputs
|
|
875
|
+
],
|
|
876
|
+
queue=False
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
uitars_model_choice.change(
|
|
880
|
+
fn=update_ui,
|
|
881
|
+
inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
|
|
882
|
+
outputs=[
|
|
883
|
+
openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
|
|
884
|
+
openai_key_group, anthropic_key_group,
|
|
885
|
+
custom_model, provider_base_url, provider_api_key,
|
|
886
|
+
model_choice # Add model_choice to outputs
|
|
887
|
+
],
|
|
888
|
+
queue=False
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
openai_model_choice.change(
|
|
892
|
+
fn=update_ui,
|
|
893
|
+
inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
|
|
894
|
+
outputs=[
|
|
895
|
+
openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
|
|
896
|
+
openai_key_group, anthropic_key_group,
|
|
897
|
+
custom_model, provider_base_url, provider_api_key,
|
|
898
|
+
model_choice # Add model_choice to outputs
|
|
899
|
+
],
|
|
900
|
+
queue=False
|
|
901
|
+
)
|
|
902
|
+
|
|
903
|
+
anthropic_model_choice.change(
|
|
904
|
+
fn=update_ui,
|
|
905
|
+
inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
|
|
906
|
+
outputs=[
|
|
907
|
+
openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
|
|
908
|
+
openai_key_group, anthropic_key_group,
|
|
909
|
+
custom_model, provider_base_url, provider_api_key,
|
|
910
|
+
model_choice # Add model_choice to outputs
|
|
911
|
+
],
|
|
912
|
+
queue=False
|
|
913
|
+
)
|
|
744
914
|
|
|
745
915
|
save_trajectory = gr.Checkbox(
|
|
746
916
|
label="Save Trajectory",
|
|
@@ -772,6 +942,9 @@ if __name__ == "__main__":
|
|
|
772
942
|
placeholder="Ask me to perform tasks in a virtual macOS environment"
|
|
773
943
|
)
|
|
774
944
|
clear = gr.Button("Clear")
|
|
945
|
+
|
|
946
|
+
# Add cancel button
|
|
947
|
+
cancel_button = gr.Button("Cancel", variant="stop")
|
|
775
948
|
|
|
776
949
|
# Add examples
|
|
777
950
|
example_group = gr.Examples(examples=example_messages, inputs=msg)
|
|
@@ -782,10 +955,28 @@ if __name__ == "__main__":
|
|
|
782
955
|
history.append(gr.ChatMessage(role="user", content=message))
|
|
783
956
|
return "", history
|
|
784
957
|
|
|
958
|
+
# Function to cancel the running agent
|
|
959
|
+
async def cancel_agent_task(history):
|
|
960
|
+
global global_agent
|
|
961
|
+
if global_agent and hasattr(global_agent, '_loop'):
|
|
962
|
+
print("DEBUG - Cancelling agent task")
|
|
963
|
+
# Cancel the agent loop
|
|
964
|
+
if hasattr(global_agent._loop, 'cancel') and callable(global_agent._loop.cancel):
|
|
965
|
+
await global_agent._loop.cancel()
|
|
966
|
+
history.append(gr.ChatMessage(role="assistant", content="Task cancelled by user", metadata={"title": "❌ Cancelled"}))
|
|
967
|
+
else:
|
|
968
|
+
history.append(gr.ChatMessage(role="assistant", content="Could not cancel task: cancel method not found", metadata={"title": "⚠️ Warning"}))
|
|
969
|
+
else:
|
|
970
|
+
history.append(gr.ChatMessage(role="assistant", content="No active agent task to cancel", metadata={"title": "ℹ️ Info"}))
|
|
971
|
+
return history
|
|
972
|
+
|
|
785
973
|
# Function to process agent response after user input
|
|
786
974
|
async def process_response(
|
|
787
975
|
history,
|
|
788
|
-
|
|
976
|
+
openai_model_value,
|
|
977
|
+
anthropic_model_value,
|
|
978
|
+
omni_model_value,
|
|
979
|
+
uitars_model_value,
|
|
789
980
|
custom_model_value,
|
|
790
981
|
agent_loop_choice,
|
|
791
982
|
save_traj,
|
|
@@ -802,21 +993,47 @@ if __name__ == "__main__":
|
|
|
802
993
|
# Get the last user message
|
|
803
994
|
last_user_message = history[-1]["content"]
|
|
804
995
|
|
|
996
|
+
# Get the appropriate model value based on the agent loop
|
|
997
|
+
if agent_loop_choice == "OPENAI":
|
|
998
|
+
model_choice_value = openai_model_value
|
|
999
|
+
elif agent_loop_choice == "ANTHROPIC":
|
|
1000
|
+
model_choice_value = anthropic_model_value
|
|
1001
|
+
elif agent_loop_choice == "OMNI":
|
|
1002
|
+
model_choice_value = omni_model_value
|
|
1003
|
+
elif agent_loop_choice == "UITARS":
|
|
1004
|
+
model_choice_value = uitars_model_value
|
|
1005
|
+
else:
|
|
1006
|
+
model_choice_value = "No models available"
|
|
1007
|
+
|
|
1008
|
+
# Determine if this is a custom model selection and which type
|
|
1009
|
+
is_custom_openai_api = model_choice_value == "Custom model (OpenAI compatible API)"
|
|
1010
|
+
is_custom_ollama = model_choice_value == "Custom model (ollama)"
|
|
1011
|
+
is_custom_model_selected = is_custom_openai_api or is_custom_ollama
|
|
1012
|
+
|
|
805
1013
|
# Determine the model name string to analyze: custom or from dropdown
|
|
806
|
-
|
|
807
|
-
custom_model_value
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
)
|
|
811
|
-
|
|
812
|
-
# Determine if this is a custom model selection
|
|
813
|
-
is_custom_model_selected = model_choice_value == "Custom model..."
|
|
1014
|
+
if is_custom_model_selected:
|
|
1015
|
+
model_string_to_analyze = custom_model_value
|
|
1016
|
+
else:
|
|
1017
|
+
model_string_to_analyze = model_choice_value # Use the full UI string initially
|
|
814
1018
|
|
|
815
1019
|
try:
|
|
816
|
-
#
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
1020
|
+
# Special case for UITARS - use MLXVLM provider
|
|
1021
|
+
if agent_loop_choice == "UITARS":
|
|
1022
|
+
provider = LLMProvider.MLXVLM
|
|
1023
|
+
cleaned_model_name_from_func = model_string_to_analyze
|
|
1024
|
+
agent_loop_type = AgentLoop.UITARS
|
|
1025
|
+
print(f"Using MLXVLM provider for UITARS model: {model_string_to_analyze}")
|
|
1026
|
+
# Special case for Ollama custom model
|
|
1027
|
+
elif is_custom_ollama and agent_loop_choice == "OMNI":
|
|
1028
|
+
provider = LLMProvider.OLLAMA
|
|
1029
|
+
cleaned_model_name_from_func = custom_model_value
|
|
1030
|
+
agent_loop_type = AgentLoop.OMNI
|
|
1031
|
+
print(f"Using Ollama provider for custom model: {custom_model_value}")
|
|
1032
|
+
else:
|
|
1033
|
+
# Get the provider, *cleaned* model name, and agent loop type
|
|
1034
|
+
provider, cleaned_model_name_from_func, agent_loop_type = (
|
|
1035
|
+
get_provider_and_model(model_string_to_analyze, agent_loop_choice)
|
|
1036
|
+
)
|
|
820
1037
|
|
|
821
1038
|
print(f"provider={provider} cleaned_model_name_from_func={cleaned_model_name_from_func} agent_loop_type={agent_loop_type} agent_loop_choice={agent_loop_choice}")
|
|
822
1039
|
|
|
@@ -828,26 +1045,34 @@ if __name__ == "__main__":
|
|
|
828
1045
|
else cleaned_model_name_from_func
|
|
829
1046
|
)
|
|
830
1047
|
|
|
831
|
-
# Determine if OAICOMPAT should be used (only
|
|
832
|
-
is_oaicompat =
|
|
1048
|
+
# Determine if OAICOMPAT should be used (only for OpenAI compatible API custom model)
|
|
1049
|
+
is_oaicompat = is_custom_openai_api and agent_loop_choice != "UITARS"
|
|
833
1050
|
|
|
834
1051
|
# Get API key based on provider determined by get_provider_and_model
|
|
835
1052
|
if is_oaicompat and custom_api_key:
|
|
836
|
-
# Use custom API key if provided for custom model
|
|
1053
|
+
# Use custom API key if provided for OpenAI compatible API custom model
|
|
837
1054
|
api_key = custom_api_key
|
|
838
1055
|
print(
|
|
839
|
-
f"DEBUG - Using custom API key for model: {final_model_name_to_send}"
|
|
1056
|
+
f"DEBUG - Using custom API key for OpenAI compatible API model: {final_model_name_to_send}"
|
|
840
1057
|
)
|
|
1058
|
+
elif provider == LLMProvider.OLLAMA:
|
|
1059
|
+
# No API key needed for Ollama
|
|
1060
|
+
api_key = ""
|
|
1061
|
+
print(f"DEBUG - No API key needed for Ollama model: {final_model_name_to_send}")
|
|
841
1062
|
elif provider == LLMProvider.OPENAI:
|
|
842
1063
|
# Use OpenAI key from input if provided, otherwise use environment variable
|
|
843
1064
|
api_key = openai_key_input if openai_key_input else (openai_api_key or os.environ.get("OPENAI_API_KEY", ""))
|
|
844
1065
|
if openai_key_input:
|
|
845
|
-
|
|
1066
|
+
# Set the environment variable for the OpenAI API key
|
|
1067
|
+
os.environ["OPENAI_API_KEY"] = openai_key_input
|
|
1068
|
+
print(f"DEBUG - Using provided OpenAI API key from UI and set as environment variable")
|
|
846
1069
|
elif provider == LLMProvider.ANTHROPIC:
|
|
847
1070
|
# Use Anthropic key from input if provided, otherwise use environment variable
|
|
848
1071
|
api_key = anthropic_key_input if anthropic_key_input else (anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", ""))
|
|
849
1072
|
if anthropic_key_input:
|
|
850
|
-
|
|
1073
|
+
# Set the environment variable for the Anthropic API key
|
|
1074
|
+
os.environ["ANTHROPIC_API_KEY"] = anthropic_key_input
|
|
1075
|
+
print(f"DEBUG - Using provided Anthropic API key from UI and set as environment variable")
|
|
851
1076
|
else:
|
|
852
1077
|
# For Ollama or default OAICOMPAT (without custom key), no key needed/expected
|
|
853
1078
|
api_key = ""
|
|
@@ -866,8 +1091,8 @@ if __name__ == "__main__":
|
|
|
866
1091
|
|
|
867
1092
|
# Create or update the agent
|
|
868
1093
|
create_agent(
|
|
869
|
-
# Provider determined by
|
|
870
|
-
provider=
|
|
1094
|
+
# Provider determined by special cases and get_provider_and_model
|
|
1095
|
+
provider=provider,
|
|
871
1096
|
agent_loop=agent_loop_type,
|
|
872
1097
|
# Pass the FINAL determined model name (cleaned or custom)
|
|
873
1098
|
model_name=final_model_name_to_send,
|
|
@@ -980,13 +1205,21 @@ if __name__ == "__main__":
|
|
|
980
1205
|
# Update with error message
|
|
981
1206
|
history.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}"))
|
|
982
1207
|
yield history
|
|
983
|
-
|
|
984
|
-
# Connect the
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
[
|
|
1208
|
+
|
|
1209
|
+
# Connect the submit button to the process_response function
|
|
1210
|
+
submit_event = msg.submit(
|
|
1211
|
+
fn=chat_submit,
|
|
1212
|
+
inputs=[msg, chatbot_history],
|
|
1213
|
+
outputs=[msg, chatbot_history],
|
|
1214
|
+
queue=False,
|
|
1215
|
+
).then(
|
|
1216
|
+
fn=process_response,
|
|
1217
|
+
inputs=[
|
|
988
1218
|
chatbot_history,
|
|
989
|
-
|
|
1219
|
+
openai_model_choice,
|
|
1220
|
+
anthropic_model_choice,
|
|
1221
|
+
omni_model_choice,
|
|
1222
|
+
uitars_model_choice,
|
|
990
1223
|
custom_model,
|
|
991
1224
|
agent_loop,
|
|
992
1225
|
save_trajectory,
|
|
@@ -996,44 +1229,22 @@ if __name__ == "__main__":
|
|
|
996
1229
|
openai_api_key_input,
|
|
997
1230
|
anthropic_api_key_input,
|
|
998
1231
|
],
|
|
999
|
-
[chatbot_history],
|
|
1232
|
+
outputs=[chatbot_history],
|
|
1233
|
+
queue=True,
|
|
1000
1234
|
)
|
|
1001
1235
|
|
|
1002
1236
|
# Clear button functionality
|
|
1003
1237
|
clear.click(lambda: None, None, chatbot_history, queue=False)
|
|
1004
|
-
|
|
1005
|
-
# Connect agent_loop changes to model selection
|
|
1006
|
-
agent_loop.change(
|
|
1007
|
-
fn=update_model_choices,
|
|
1008
|
-
inputs=[agent_loop],
|
|
1009
|
-
outputs=[model_choice],
|
|
1010
|
-
queue=False, # Process immediately without queueing
|
|
1011
|
-
)
|
|
1012
|
-
|
|
1013
|
-
# Show/hide custom model, provider base URL, and API key textboxes based on dropdown selection
|
|
1014
|
-
def update_custom_model_visibility(model_value):
|
|
1015
|
-
is_custom = model_value == "Custom model..."
|
|
1016
|
-
return (
|
|
1017
|
-
gr.update(visible=is_custom),
|
|
1018
|
-
gr.update(visible=is_custom),
|
|
1019
|
-
gr.update(visible=is_custom),
|
|
1020
|
-
)
|
|
1021
|
-
|
|
1022
|
-
model_choice.change(
|
|
1023
|
-
fn=update_custom_model_visibility,
|
|
1024
|
-
inputs=[model_choice],
|
|
1025
|
-
outputs=[custom_model, provider_base_url, provider_api_key],
|
|
1026
|
-
queue=False, # Process immediately without queueing
|
|
1027
|
-
)
|
|
1028
1238
|
|
|
1029
|
-
# Connect
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
queue=False
|
|
1239
|
+
# Connect cancel button to cancel function
|
|
1240
|
+
cancel_button.click(
|
|
1241
|
+
cancel_agent_task,
|
|
1242
|
+
[chatbot_history],
|
|
1243
|
+
[chatbot_history],
|
|
1244
|
+
queue=False # Process immediately without queueing
|
|
1035
1245
|
)
|
|
1036
1246
|
|
|
1247
|
+
|
|
1037
1248
|
# Function to update the code display based on configuration and chat history
|
|
1038
1249
|
def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val):
|
|
1039
1250
|
# Extract messages from chat history
|
|
@@ -1043,9 +1254,72 @@ if __name__ == "__main__":
|
|
|
1043
1254
|
if msg.get("role") == "user":
|
|
1044
1255
|
messages.append(msg.get("content", ""))
|
|
1045
1256
|
|
|
1046
|
-
# Determine
|
|
1047
|
-
|
|
1048
|
-
|
|
1257
|
+
# Determine if this is a custom model selection and which type
|
|
1258
|
+
is_custom_openai_api = model_choice_val == "Custom model (OpenAI compatible API)"
|
|
1259
|
+
is_custom_ollama = model_choice_val == "Custom model (ollama)"
|
|
1260
|
+
is_custom_model_selected = is_custom_openai_api or is_custom_ollama
|
|
1261
|
+
|
|
1262
|
+
# Determine provider and model name based on agent loop
|
|
1263
|
+
if agent_loop == "OPENAI":
|
|
1264
|
+
# For OPENAI loop, always use OPENAI provider with computer-use-preview
|
|
1265
|
+
provider = LLMProvider.OPENAI
|
|
1266
|
+
model_name = "computer-use-preview"
|
|
1267
|
+
elif agent_loop == "ANTHROPIC":
|
|
1268
|
+
# For ANTHROPIC loop, always use ANTHROPIC provider
|
|
1269
|
+
provider = LLMProvider.ANTHROPIC
|
|
1270
|
+
# Extract model name from the UI string
|
|
1271
|
+
if model_choice_val.startswith("Anthropic: Claude "):
|
|
1272
|
+
# Extract the model name based on the UI string
|
|
1273
|
+
model_parts = model_choice_val.replace("Anthropic: Claude ", "").split(" (")
|
|
1274
|
+
version = model_parts[0] # e.g., "3.7 Sonnet"
|
|
1275
|
+
date = model_parts[1].replace(")", "") if len(model_parts) > 1 else "" # e.g., "20250219"
|
|
1276
|
+
|
|
1277
|
+
# Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620
|
|
1278
|
+
version = version.replace(".", "-").replace(" ", "-").lower()
|
|
1279
|
+
model_name = f"claude-{version}-{date}"
|
|
1280
|
+
else:
|
|
1281
|
+
# Use the model_choice_val directly if it doesn't match the expected format
|
|
1282
|
+
model_name = model_choice_val
|
|
1283
|
+
elif agent_loop == "UITARS":
|
|
1284
|
+
# For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for custom
|
|
1285
|
+
if model_choice_val == "Custom model (OpenAI compatible API)":
|
|
1286
|
+
provider = LLMProvider.OAICOMPAT
|
|
1287
|
+
model_name = custom_model_val
|
|
1288
|
+
else:
|
|
1289
|
+
provider = LLMProvider.MLXVLM
|
|
1290
|
+
model_name = model_choice_val
|
|
1291
|
+
elif agent_loop == "OMNI":
|
|
1292
|
+
# For OMNI, provider can be OPENAI, ANTHROPIC, OLLAMA, or OAICOMPAT
|
|
1293
|
+
if is_custom_openai_api:
|
|
1294
|
+
provider = LLMProvider.OAICOMPAT
|
|
1295
|
+
model_name = custom_model_val
|
|
1296
|
+
elif is_custom_ollama:
|
|
1297
|
+
provider = LLMProvider.OLLAMA
|
|
1298
|
+
model_name = custom_model_val
|
|
1299
|
+
elif model_choice_val.startswith("OMNI: OpenAI "):
|
|
1300
|
+
provider = LLMProvider.OPENAI
|
|
1301
|
+
# Extract model name from UI string (e.g., "OMNI: OpenAI GPT-4o" -> "gpt-4o")
|
|
1302
|
+
model_name = model_choice_val.replace("OMNI: OpenAI ", "").lower().replace(" ", "-")
|
|
1303
|
+
elif model_choice_val.startswith("OMNI: Claude "):
|
|
1304
|
+
provider = LLMProvider.ANTHROPIC
|
|
1305
|
+
# Extract model name from UI string (similar to ANTHROPIC loop case)
|
|
1306
|
+
model_parts = model_choice_val.replace("OMNI: Claude ", "").split(" (")
|
|
1307
|
+
version = model_parts[0] # e.g., "3.7 Sonnet"
|
|
1308
|
+
date = model_parts[1].replace(")", "") if len(model_parts) > 1 else "" # e.g., "20250219"
|
|
1309
|
+
|
|
1310
|
+
# Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620
|
|
1311
|
+
version = version.replace(".", "-").replace(" ", "-").lower()
|
|
1312
|
+
model_name = f"claude-{version}-{date}"
|
|
1313
|
+
elif model_choice_val.startswith("OMNI: Ollama "):
|
|
1314
|
+
provider = LLMProvider.OLLAMA
|
|
1315
|
+
# Extract model name from UI string (e.g., "OMNI: Ollama llama3" -> "llama3")
|
|
1316
|
+
model_name = model_choice_val.replace("OMNI: Ollama ", "")
|
|
1317
|
+
else:
|
|
1318
|
+
# Fallback to get_provider_and_model for any other cases
|
|
1319
|
+
provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop)
|
|
1320
|
+
else:
|
|
1321
|
+
# Fallback for any other agent loop
|
|
1322
|
+
provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop)
|
|
1049
1323
|
|
|
1050
1324
|
# Generate and return the code
|
|
1051
1325
|
return generate_python_code(
|
|
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
|
|
|
6
6
|
|
|
7
7
|
[project]
|
|
8
8
|
name = "cua-agent"
|
|
9
|
-
version = "0.1.
|
|
9
|
+
version = "0.1.39"
|
|
10
10
|
description = "CUA (Computer Use) Agent for AI-driven computer interaction"
|
|
11
11
|
readme = "README.md"
|
|
12
12
|
authors = [
|
|
@@ -109,7 +109,7 @@ target-version = [
|
|
|
109
109
|
|
|
110
110
|
[tool.ruff]
|
|
111
111
|
line-length = 100
|
|
112
|
-
target-version = "0.1.
|
|
112
|
+
target-version = "0.1.39"
|
|
113
113
|
select = [
|
|
114
114
|
"E",
|
|
115
115
|
"F",
|
|
@@ -123,7 +123,7 @@ docstring-code-format = true
|
|
|
123
123
|
|
|
124
124
|
[tool.mypy]
|
|
125
125
|
strict = true
|
|
126
|
-
python_version = "0.1.
|
|
126
|
+
python_version = "0.1.39"
|
|
127
127
|
ignore_missing_imports = true
|
|
128
128
|
disallow_untyped_defs = true
|
|
129
129
|
check_untyped_defs = true
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|