cua-agent 0.1.33__py3-none-any.whl → 0.1.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/ui/gradio/app.py +248 -72
- {cua_agent-0.1.33.dist-info → cua_agent-0.1.35.dist-info}/METADATA +1 -1
- {cua_agent-0.1.33.dist-info → cua_agent-0.1.35.dist-info}/RECORD +5 -5
- {cua_agent-0.1.33.dist-info → cua_agent-0.1.35.dist-info}/WHEEL +0 -0
- {cua_agent-0.1.33.dist-info → cua_agent-0.1.35.dist-info}/entry_points.txt +0 -0
agent/ui/gradio/app.py
CHANGED
|
@@ -412,25 +412,23 @@ def create_gradio_ui(
|
|
|
412
412
|
openai_api_key = os.environ.get("OPENAI_API_KEY", "")
|
|
413
413
|
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
|
414
414
|
|
|
415
|
-
#
|
|
416
|
-
openai_models = []
|
|
417
|
-
anthropic_models = [
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
]
|
|
433
|
-
omni_models += ["OMNI: Claude 3.7 Sonnet (20250219)", "OMNI: Claude 3.5 Sonnet (20240620)"]
|
|
415
|
+
# Always show models regardless of API key availability
|
|
416
|
+
openai_models = ["OpenAI: Computer-Use Preview"]
|
|
417
|
+
anthropic_models = [
|
|
418
|
+
"Anthropic: Claude 3.7 Sonnet (20250219)",
|
|
419
|
+
"Anthropic: Claude 3.5 Sonnet (20240620)",
|
|
420
|
+
]
|
|
421
|
+
omni_models = [
|
|
422
|
+
"OMNI: OpenAI GPT-4o",
|
|
423
|
+
"OMNI: OpenAI GPT-4o mini",
|
|
424
|
+
"OMNI: OpenAI GPT-4.5-preview",
|
|
425
|
+
"OMNI: Claude 3.7 Sonnet (20250219)",
|
|
426
|
+
"OMNI: Claude 3.5 Sonnet (20240620)"
|
|
427
|
+
]
|
|
428
|
+
|
|
429
|
+
# Check if API keys are available
|
|
430
|
+
has_openai_key = bool(openai_api_key)
|
|
431
|
+
has_anthropic_key = bool(anthropic_api_key)
|
|
434
432
|
|
|
435
433
|
# Get Ollama models for OMNI
|
|
436
434
|
ollama_models = get_ollama_models()
|
|
@@ -480,6 +478,83 @@ def create_gradio_ui(
|
|
|
480
478
|
"Open Safari, search for 'macOS automation tools', and save the first three results as bookmarks",
|
|
481
479
|
"Configure SSH keys and set up a connection to a remote server",
|
|
482
480
|
]
|
|
481
|
+
|
|
482
|
+
# Function to generate Python code based on configuration and tasks
|
|
483
|
+
def generate_python_code(agent_loop_choice, provider, model_name, tasks, provider_url, recent_images=3, save_trajectory=True):
|
|
484
|
+
"""Generate Python code for the current configuration and tasks.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
agent_loop_choice: The agent loop type (e.g., UITARS, OPENAI, ANTHROPIC, OMNI)
|
|
488
|
+
provider: The provider type (e.g., OPENAI, ANTHROPIC, OLLAMA, OAICOMPAT)
|
|
489
|
+
model_name: The model name
|
|
490
|
+
tasks: List of tasks to execute
|
|
491
|
+
provider_url: The provider base URL for OAICOMPAT providers
|
|
492
|
+
recent_images: Number of recent images to keep in context
|
|
493
|
+
save_trajectory: Whether to save the agent trajectory
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
Formatted Python code as a string
|
|
497
|
+
"""
|
|
498
|
+
# Format the tasks as a Python list
|
|
499
|
+
tasks_str = ""
|
|
500
|
+
for task in tasks:
|
|
501
|
+
if task and task.strip():
|
|
502
|
+
tasks_str += f' "{task}",\n'
|
|
503
|
+
|
|
504
|
+
# Create the Python code template
|
|
505
|
+
code = f'''import asyncio
|
|
506
|
+
from computer import Computer
|
|
507
|
+
from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
|
|
508
|
+
|
|
509
|
+
async def main():
|
|
510
|
+
async with Computer() as macos_computer:
|
|
511
|
+
agent = ComputerAgent(
|
|
512
|
+
computer=macos_computer,
|
|
513
|
+
loop=AgentLoop.{agent_loop_choice},
|
|
514
|
+
only_n_most_recent_images={recent_images},
|
|
515
|
+
save_trajectory={save_trajectory},'''
|
|
516
|
+
|
|
517
|
+
# Add the model configuration based on provider
|
|
518
|
+
if provider == LLMProvider.OAICOMPAT:
|
|
519
|
+
code += f'''
|
|
520
|
+
model=LLM(
|
|
521
|
+
provider=LLMProvider.OAICOMPAT,
|
|
522
|
+
name="{model_name}",
|
|
523
|
+
provider_base_url="{provider_url}"
|
|
524
|
+
)'''
|
|
525
|
+
|
|
526
|
+
code += """
|
|
527
|
+
)
|
|
528
|
+
"""
|
|
529
|
+
|
|
530
|
+
# Add tasks section if there are tasks
|
|
531
|
+
if tasks_str:
|
|
532
|
+
code += f'''
|
|
533
|
+
# Prompts for the computer-use agent
|
|
534
|
+
tasks = [
|
|
535
|
+
{tasks_str.rstrip()}
|
|
536
|
+
]
|
|
537
|
+
|
|
538
|
+
for task in tasks:
|
|
539
|
+
print(f"Executing task: {{task}}")
|
|
540
|
+
async for result in agent.run(task):
|
|
541
|
+
print(result)'''
|
|
542
|
+
else:
|
|
543
|
+
# If no tasks, just add a placeholder for a single task
|
|
544
|
+
code += f'''
|
|
545
|
+
# Execute a single task
|
|
546
|
+
task = "Search for information about CUA on GitHub"
|
|
547
|
+
print(f"Executing task: {{task}}")
|
|
548
|
+
async for result in agent.run(task):
|
|
549
|
+
print(result)'''
|
|
550
|
+
|
|
551
|
+
# Add the main block
|
|
552
|
+
code += '''
|
|
553
|
+
|
|
554
|
+
if __name__ == "__main__":
|
|
555
|
+
asyncio.run(main())'''
|
|
556
|
+
|
|
557
|
+
return code
|
|
483
558
|
|
|
484
559
|
# Function to update model choices based on agent loop selection
|
|
485
560
|
def update_model_choices(loop):
|
|
@@ -491,16 +566,51 @@ def create_gradio_ui(
|
|
|
491
566
|
elif "Custom model..." not in models:
|
|
492
567
|
models.append("Custom model...")
|
|
493
568
|
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
569
|
+
# Show both OpenAI and Anthropic key inputs for OMNI if keys aren't set
|
|
570
|
+
return [
|
|
571
|
+
gr.update(choices=models, value=models[0] if models else "Custom model...", interactive=True),
|
|
572
|
+
gr.update(visible=not has_openai_key),
|
|
573
|
+
gr.update(visible=not has_anthropic_key)
|
|
574
|
+
]
|
|
575
|
+
elif loop == "OPENAI":
|
|
576
|
+
# Show only OpenAI key input for OPENAI loop if key isn't set
|
|
577
|
+
if not models:
|
|
578
|
+
return [
|
|
579
|
+
gr.update(choices=["No models available"], value="No models available", interactive=True),
|
|
580
|
+
gr.update(visible=not has_openai_key),
|
|
581
|
+
gr.update(visible=False)
|
|
582
|
+
]
|
|
583
|
+
return [
|
|
584
|
+
gr.update(choices=models, value=models[0] if models else None, interactive=True),
|
|
585
|
+
gr.update(visible=not has_openai_key),
|
|
586
|
+
gr.update(visible=False)
|
|
587
|
+
]
|
|
588
|
+
elif loop == "ANTHROPIC":
|
|
589
|
+
# Show only Anthropic key input for ANTHROPIC loop if key isn't set
|
|
590
|
+
if not models:
|
|
591
|
+
return [
|
|
592
|
+
gr.update(choices=["No models available"], value="No models available", interactive=True),
|
|
593
|
+
gr.update(visible=False),
|
|
594
|
+
gr.update(visible=not has_anthropic_key)
|
|
595
|
+
]
|
|
596
|
+
return [
|
|
597
|
+
gr.update(choices=models, value=models[0] if models else None, interactive=True),
|
|
598
|
+
gr.update(visible=False),
|
|
599
|
+
gr.update(visible=not has_anthropic_key)
|
|
600
|
+
]
|
|
497
601
|
else:
|
|
498
|
-
# For other providers,
|
|
602
|
+
# For other providers (like UITARS), don't show API key inputs
|
|
499
603
|
if not models:
|
|
500
|
-
return
|
|
501
|
-
choices=["No models available"], value="No models available", interactive=True
|
|
502
|
-
|
|
503
|
-
|
|
604
|
+
return [
|
|
605
|
+
gr.update(choices=["No models available"], value="No models available", interactive=True),
|
|
606
|
+
gr.update(visible=False),
|
|
607
|
+
gr.update(visible=False)
|
|
608
|
+
]
|
|
609
|
+
return [
|
|
610
|
+
gr.update(choices=models, value=models[0] if models else None, interactive=True),
|
|
611
|
+
gr.update(visible=False),
|
|
612
|
+
gr.update(visible=False)
|
|
613
|
+
]
|
|
504
614
|
|
|
505
615
|
# Create the Gradio interface with advanced UI
|
|
506
616
|
with gr.Blocks(title="Computer-Use Agent") as demo:
|
|
@@ -537,50 +647,20 @@ def create_gradio_ui(
|
|
|
537
647
|
"""
|
|
538
648
|
)
|
|
539
649
|
|
|
540
|
-
# Add
|
|
541
|
-
with gr.Accordion("
|
|
542
|
-
gr.
|
|
543
|
-
""
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
```bash
|
|
553
|
-
sudo /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/trycua/cua/main/libs/lume/scripts/install.sh)"
|
|
554
|
-
```
|
|
555
|
-
|
|
556
|
-
### 2. Start the Lume daemon service
|
|
557
|
-
|
|
558
|
-
In a separate terminal:
|
|
559
|
-
|
|
560
|
-
```bash
|
|
561
|
-
lume serve
|
|
562
|
-
```
|
|
563
|
-
|
|
564
|
-
### 3. Pull the pre-built macOS image
|
|
565
|
-
|
|
566
|
-
```bash
|
|
567
|
-
lume pull macos-sequoia-cua:latest
|
|
568
|
-
```
|
|
569
|
-
|
|
570
|
-
Initial download requires 80GB storage, but reduces to ~30GB after first run due to macOS's sparse file system.
|
|
571
|
-
|
|
572
|
-
VMs are stored in `~/.lume`, and locally cached images are stored in `~/.lume/cache`.
|
|
573
|
-
|
|
574
|
-
### 4. Test the sandbox
|
|
575
|
-
|
|
576
|
-
```bash
|
|
577
|
-
lume run macos-sequoia-cua:latest
|
|
578
|
-
```
|
|
579
|
-
|
|
580
|
-
For more detailed instructions, visit the [CUA GitHub repository](https://github.com/trycua/cua).
|
|
581
|
-
"""
|
|
650
|
+
# Add accordion for Python code
|
|
651
|
+
with gr.Accordion("Python Code", open=False):
|
|
652
|
+
code_display = gr.Code(
|
|
653
|
+
language="python",
|
|
654
|
+
value=generate_python_code(
|
|
655
|
+
initial_loop,
|
|
656
|
+
LLMProvider.OPENAI,
|
|
657
|
+
"gpt-4o",
|
|
658
|
+
[],
|
|
659
|
+
"https://openrouter.ai/api/v1"
|
|
660
|
+
),
|
|
661
|
+
interactive=False,
|
|
582
662
|
)
|
|
583
|
-
|
|
663
|
+
|
|
584
664
|
with gr.Accordion("Configuration", open=True):
|
|
585
665
|
# Configuration options
|
|
586
666
|
agent_loop = gr.Dropdown(
|
|
@@ -599,6 +679,27 @@ def create_gradio_ui(
|
|
|
599
679
|
interactive=True,
|
|
600
680
|
)
|
|
601
681
|
|
|
682
|
+
# Add API key inputs for OpenAI and Anthropic
|
|
683
|
+
with gr.Group(visible=not has_openai_key and (initial_loop == "OPENAI" or initial_loop == "OMNI")) as openai_key_group:
|
|
684
|
+
openai_api_key_input = gr.Textbox(
|
|
685
|
+
label="OpenAI API Key",
|
|
686
|
+
placeholder="Enter your OpenAI API key",
|
|
687
|
+
value="",
|
|
688
|
+
interactive=True,
|
|
689
|
+
type="password",
|
|
690
|
+
info="Required for OpenAI models"
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
with gr.Group(visible=not has_anthropic_key and (initial_loop == "ANTHROPIC" or initial_loop == "OMNI")) as anthropic_key_group:
|
|
694
|
+
anthropic_api_key_input = gr.Textbox(
|
|
695
|
+
label="Anthropic API Key",
|
|
696
|
+
placeholder="Enter your Anthropic API key",
|
|
697
|
+
value="",
|
|
698
|
+
interactive=True,
|
|
699
|
+
type="password",
|
|
700
|
+
info="Required for Anthropic models"
|
|
701
|
+
)
|
|
702
|
+
|
|
602
703
|
# Add custom model textbox (only visible when "Custom model..." is selected)
|
|
603
704
|
custom_model = gr.Textbox(
|
|
604
705
|
label="Custom Model Name",
|
|
@@ -643,6 +744,7 @@ def create_gradio_ui(
|
|
|
643
744
|
info="Number of recent images to keep in context",
|
|
644
745
|
interactive=True,
|
|
645
746
|
)
|
|
747
|
+
|
|
646
748
|
|
|
647
749
|
# Right column for chat interface
|
|
648
750
|
with gr.Column(scale=2):
|
|
@@ -676,6 +778,8 @@ def create_gradio_ui(
|
|
|
676
778
|
recent_imgs,
|
|
677
779
|
custom_url_value=None,
|
|
678
780
|
custom_api_key=None,
|
|
781
|
+
openai_key_input=None,
|
|
782
|
+
anthropic_key_input=None,
|
|
679
783
|
):
|
|
680
784
|
if not history:
|
|
681
785
|
yield history
|
|
@@ -721,9 +825,15 @@ def create_gradio_ui(
|
|
|
721
825
|
f"DEBUG - Using custom API key for model: {final_model_name_to_send}"
|
|
722
826
|
)
|
|
723
827
|
elif provider == LLMProvider.OPENAI:
|
|
724
|
-
|
|
828
|
+
# Use OpenAI key from input if provided, otherwise use environment variable
|
|
829
|
+
api_key = openai_key_input if openai_key_input else (openai_api_key or os.environ.get("OPENAI_API_KEY", ""))
|
|
830
|
+
if openai_key_input:
|
|
831
|
+
print(f"DEBUG - Using provided OpenAI API key from UI")
|
|
725
832
|
elif provider == LLMProvider.ANTHROPIC:
|
|
726
|
-
|
|
833
|
+
# Use Anthropic key from input if provided, otherwise use environment variable
|
|
834
|
+
api_key = anthropic_key_input if anthropic_key_input else (anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", ""))
|
|
835
|
+
if anthropic_key_input:
|
|
836
|
+
print(f"DEBUG - Using provided Anthropic API key from UI")
|
|
727
837
|
else:
|
|
728
838
|
# For Ollama or default OAICOMPAT (without custom key), no key needed/expected
|
|
729
839
|
api_key = ""
|
|
@@ -869,6 +979,8 @@ def create_gradio_ui(
|
|
|
869
979
|
recent_images,
|
|
870
980
|
provider_base_url,
|
|
871
981
|
provider_api_key,
|
|
982
|
+
openai_api_key_input,
|
|
983
|
+
anthropic_api_key_input,
|
|
872
984
|
],
|
|
873
985
|
[chatbot_history],
|
|
874
986
|
)
|
|
@@ -899,6 +1011,70 @@ def create_gradio_ui(
|
|
|
899
1011
|
outputs=[custom_model, provider_base_url, provider_api_key],
|
|
900
1012
|
queue=False, # Process immediately without queueing
|
|
901
1013
|
)
|
|
1014
|
+
|
|
1015
|
+
# Connect agent_loop changes to model selection and API key visibility
|
|
1016
|
+
agent_loop.change(
|
|
1017
|
+
fn=update_model_choices,
|
|
1018
|
+
inputs=[agent_loop],
|
|
1019
|
+
outputs=[model_choice, openai_key_group, anthropic_key_group],
|
|
1020
|
+
queue=False, # Process immediately without queueing
|
|
1021
|
+
)
|
|
1022
|
+
|
|
1023
|
+
# Function to update the code display based on configuration and chat history
|
|
1024
|
+
def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val):
|
|
1025
|
+
# Extract messages from chat history
|
|
1026
|
+
messages = []
|
|
1027
|
+
if chat_history:
|
|
1028
|
+
for msg in chat_history:
|
|
1029
|
+
if msg.get("role") == "user":
|
|
1030
|
+
messages.append(msg.get("content", ""))
|
|
1031
|
+
|
|
1032
|
+
# Determine provider and model name based on selection
|
|
1033
|
+
model_string = custom_model_val if model_choice_val == "Custom model..." else model_choice_val
|
|
1034
|
+
provider, model_name, _ = get_provider_and_model(model_string, agent_loop)
|
|
1035
|
+
|
|
1036
|
+
# Generate and return the code
|
|
1037
|
+
return generate_python_code(
|
|
1038
|
+
agent_loop,
|
|
1039
|
+
provider,
|
|
1040
|
+
model_name,
|
|
1041
|
+
messages,
|
|
1042
|
+
provider_base_url,
|
|
1043
|
+
recent_images_val,
|
|
1044
|
+
save_trajectory_val
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
# Update code display when configuration changes
|
|
1048
|
+
agent_loop.change(
|
|
1049
|
+
update_code_display,
|
|
1050
|
+
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
|
1051
|
+
outputs=[code_display]
|
|
1052
|
+
)
|
|
1053
|
+
model_choice.change(
|
|
1054
|
+
update_code_display,
|
|
1055
|
+
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
|
1056
|
+
outputs=[code_display]
|
|
1057
|
+
)
|
|
1058
|
+
custom_model.change(
|
|
1059
|
+
update_code_display,
|
|
1060
|
+
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
|
1061
|
+
outputs=[code_display]
|
|
1062
|
+
)
|
|
1063
|
+
chatbot_history.change(
|
|
1064
|
+
update_code_display,
|
|
1065
|
+
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
|
1066
|
+
outputs=[code_display]
|
|
1067
|
+
)
|
|
1068
|
+
recent_images.change(
|
|
1069
|
+
update_code_display,
|
|
1070
|
+
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
|
1071
|
+
outputs=[code_display]
|
|
1072
|
+
)
|
|
1073
|
+
save_trajectory.change(
|
|
1074
|
+
update_code_display,
|
|
1075
|
+
inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
|
|
1076
|
+
outputs=[code_display]
|
|
1077
|
+
)
|
|
902
1078
|
|
|
903
1079
|
return demo
|
|
904
1080
|
|
|
@@ -78,8 +78,8 @@ agent/providers/uitars/utils.py,sha256=S6FiZ3P-O4B15P1Gdup2o7SyuIu4nSQbspxcektpw
|
|
|
78
78
|
agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
|
|
79
79
|
agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
|
|
80
80
|
agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
|
|
81
|
-
agent/ui/gradio/app.py,sha256=
|
|
82
|
-
cua_agent-0.1.
|
|
83
|
-
cua_agent-0.1.
|
|
84
|
-
cua_agent-0.1.
|
|
85
|
-
cua_agent-0.1.
|
|
81
|
+
agent/ui/gradio/app.py,sha256=q_nS6JJLlu1Y9xu56YHR26l_ypgaK3zR3v6BfpZT4qc,49396
|
|
82
|
+
cua_agent-0.1.35.dist-info/METADATA,sha256=C7b0g8sHR6-3eWEJFQwuelXf6MKPeD8_Z8Z5aPwoikQ,11335
|
|
83
|
+
cua_agent-0.1.35.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
|
|
84
|
+
cua_agent-0.1.35.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
85
|
+
cua_agent-0.1.35.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|