cua-agent 0.1.37__py3-none-any.whl → 0.1.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

agent/ui/gradio/app.py CHANGED
@@ -6,7 +6,7 @@ with an advanced UI for model selection and configuration.
6
6
 
7
7
  Supported Agent Loops and Models:
8
8
  - AgentLoop.OPENAI: Uses OpenAI Operator CUA model
9
- computer_use_preview
9
+ computer-use-preview
10
10
 
11
11
  - AgentLoop.ANTHROPIC: Uses Anthropic Computer-Use models
12
12
  • claude-3-5-sonnet-20240620
@@ -133,12 +133,12 @@ class GradioChatScreenshotHandler(DefaultCallbackHandler):
133
133
  MODEL_MAPPINGS = {
134
134
  "openai": {
135
135
  # Default to operator CUA model
136
- "default": "computer_use_preview",
136
+ "default": "computer-use-preview",
137
137
  # Map standard OpenAI model names to CUA-specific model names
138
- "gpt-4-turbo": "computer_use_preview",
139
- "gpt-4o": "computer_use_preview",
140
- "gpt-4": "computer_use_preview",
141
- "gpt-4.5-preview": "computer_use_preview",
138
+ "gpt-4-turbo": "computer-use-preview",
139
+ "gpt-4o": "computer-use-preview",
140
+ "gpt-4": "computer-use-preview",
141
+ "gpt-4.5-preview": "computer-use-preview",
142
142
  "gpt-4o-mini": "gpt-4o-mini",
143
143
  },
144
144
  "anthropic": {
@@ -217,7 +217,7 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
217
217
  # Determine provider and clean model name based on the full string from UI
218
218
  cleaned_model_name = model_name # Default to using the name as-is (for custom)
219
219
 
220
- if model_name == "Custom model...":
220
+ if model_name == "Custom model (OpenAI compatible API)":
221
221
  # Actual model name comes from custom_model_value via model_to_use.
222
222
  # Assume OAICOMPAT for custom models unless overridden by URL/key later?
223
223
  # get_provider_and_model determines the *initial* provider/model.
@@ -278,8 +278,8 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
278
278
  break
279
279
  # Note: No fallback needed here as we explicitly check against omni keys
280
280
 
281
- else: # Handles unexpected formats or the raw custom name if "Custom model..." selected
282
- # Should only happen if user selected "Custom model..."
281
+ else: # Handles unexpected formats or the raw custom name if "Custom model (OpenAI compatible API)" selected
282
+ # Should only happen if user selected "Custom model (OpenAI compatible API)"
283
283
  # Or if a model name format isn't caught above
284
284
  provider = LLMProvider.OAICOMPAT
285
285
  cleaned_model_name = (
@@ -291,7 +291,7 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
291
291
  # agent_loop remains AgentLoop.OMNI
292
292
  elif agent_loop == AgentLoop.UITARS:
293
293
  # For UITARS, use MLXVLM provider for the MLX models, OAICOMPAT for custom
294
- if model_name == "Custom model...":
294
+ if model_name == "Custom model (OpenAI compatible API)":
295
295
  provider = LLMProvider.OAICOMPAT
296
296
  model_name_to_use = "tgi"
297
297
  else:
@@ -449,11 +449,11 @@ def create_gradio_ui(
449
449
  provider_to_models = {
450
450
  "OPENAI": openai_models,
451
451
  "ANTHROPIC": anthropic_models,
452
- "OMNI": omni_models + ["Custom model..."], # Add custom model option
452
+ "OMNI": omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"], # Add custom model options
453
453
  "UITARS": [
454
454
  "mlx-community/UI-TARS-1.5-7B-4bit",
455
455
  "mlx-community/UI-TARS-1.5-7B-6bit",
456
- "Custom model..."
456
+ "Custom model (OpenAI compatible API)"
457
457
  ], # UI-TARS options with MLX models
458
458
  }
459
459
 
@@ -474,9 +474,9 @@ def create_gradio_ui(
474
474
  initial_model = anthropic_models[0] if anthropic_models else "No models available"
475
475
  else: # OMNI
476
476
  initial_model = omni_models[0] if omni_models else "No models available"
477
- if "Custom model..." in available_models_for_loop:
477
+ if "Custom model (OpenAI compatible API)" in available_models_for_loop:
478
478
  initial_model = (
479
- "Custom model..." # Default to custom if available and no other default fits
479
+ "Custom model (OpenAI compatible API)" # Default to custom if available and no other default fits
480
480
  )
481
481
 
482
482
  initial_custom_model = saved_settings.get("custom_model", "Qwen2.5-VL-7B-Instruct")
@@ -499,7 +499,7 @@ def create_gradio_ui(
499
499
 
500
500
  Args:
501
501
  agent_loop_choice: The agent loop type (e.g., UITARS, OPENAI, ANTHROPIC, OMNI)
502
- provider: The provider type (e.g., OPENAI, ANTHROPIC, OLLAMA, OAICOMPAT)
502
+ provider: The provider type (e.g., OPENAI, ANTHROPIC, OLLAMA, OAICOMPAT, MLXVLM)
503
503
  model_name: The model name
504
504
  tasks: List of tasks to execute
505
505
  provider_url: The provider base URL for OAICOMPAT providers
@@ -528,14 +528,58 @@ async def main():
528
528
  only_n_most_recent_images={recent_images},
529
529
  save_trajectory={save_trajectory},'''
530
530
 
531
- # Add the model configuration based on provider
532
- if provider == LLMProvider.OAICOMPAT:
531
+ # Add the model configuration based on provider and agent loop
532
+ if agent_loop_choice == "OPENAI":
533
+ # For OPENAI loop, always use OPENAI provider with computer-use-preview
533
534
  code += f'''
535
+ model=LLM(
536
+ provider=LLMProvider.OPENAI,
537
+ name="computer-use-preview"
538
+ )'''
539
+ elif agent_loop_choice == "ANTHROPIC":
540
+ # For ANTHROPIC loop, always use ANTHROPIC provider
541
+ code += f'''
542
+ model=LLM(
543
+ provider=LLMProvider.ANTHROPIC,
544
+ name="{model_name}"
545
+ )'''
546
+ elif agent_loop_choice == "UITARS":
547
+ # For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for others
548
+ if provider == LLMProvider.MLXVLM:
549
+ code += f'''
550
+ model=LLM(
551
+ provider=LLMProvider.MLXVLM,
552
+ name="{model_name}"
553
+ )'''
554
+ else: # OAICOMPAT
555
+ code += f'''
556
+ model=LLM(
557
+ provider=LLMProvider.OAICOMPAT,
558
+ name="{model_name}",
559
+ provider_base_url="{provider_url}"
560
+ )'''
561
+ elif agent_loop_choice == "OMNI":
562
+ # For OMNI, provider can be OPENAI, ANTHROPIC, OLLAMA, or OAICOMPAT
563
+ if provider == LLMProvider.OAICOMPAT:
564
+ code += f'''
534
565
  model=LLM(
535
566
  provider=LLMProvider.OAICOMPAT,
536
567
  name="{model_name}",
537
568
  provider_base_url="{provider_url}"
538
569
  )'''
570
+ else: # OPENAI, ANTHROPIC, OLLAMA
571
+ code += f'''
572
+ model=LLM(
573
+ provider=LLMProvider.{provider.name},
574
+ name="{model_name}"
575
+ )'''
576
+ else:
577
+ # Default case - just use the provided provider and model
578
+ code += f'''
579
+ model=LLM(
580
+ provider=LLMProvider.{provider.name},
581
+ name="{model_name}"
582
+ )'''
539
583
 
540
584
  code += """
541
585
  )
@@ -561,6 +605,8 @@ async def main():
561
605
  print(f"Executing task: {{task}}")
562
606
  async for result in agent.run(task):
563
607
  print(result)'''
608
+
609
+
564
610
 
565
611
  # Add the main block
566
612
  code += '''
@@ -570,62 +616,6 @@ if __name__ == "__main__":
570
616
 
571
617
  return code
572
618
 
573
- # Function to update model choices based on agent loop selection
574
- def update_model_choices(loop):
575
- models = provider_to_models.get(loop, [])
576
- if loop == "OMNI":
577
- # For OMNI, include the custom model option
578
- if not models:
579
- models = ["Custom model..."]
580
- elif "Custom model..." not in models:
581
- models.append("Custom model...")
582
-
583
- # Show both OpenAI and Anthropic key inputs for OMNI if keys aren't set
584
- return [
585
- gr.update(choices=models, value=models[0] if models else "Custom model...", interactive=True),
586
- gr.update(visible=not has_openai_key),
587
- gr.update(visible=not has_anthropic_key)
588
- ]
589
- elif loop == "OPENAI":
590
- # Show only OpenAI key input for OPENAI loop if key isn't set
591
- if not models:
592
- return [
593
- gr.update(choices=["No models available"], value="No models available", interactive=True),
594
- gr.update(visible=not has_openai_key),
595
- gr.update(visible=False)
596
- ]
597
- return [
598
- gr.update(choices=models, value=models[0] if models else None, interactive=True),
599
- gr.update(visible=not has_openai_key),
600
- gr.update(visible=False)
601
- ]
602
- elif loop == "ANTHROPIC":
603
- # Show only Anthropic key input for ANTHROPIC loop if key isn't set
604
- if not models:
605
- return [
606
- gr.update(choices=["No models available"], value="No models available", interactive=True),
607
- gr.update(visible=False),
608
- gr.update(visible=not has_anthropic_key)
609
- ]
610
- return [
611
- gr.update(choices=models, value=models[0] if models else None, interactive=True),
612
- gr.update(visible=False),
613
- gr.update(visible=not has_anthropic_key)
614
- ]
615
- else:
616
- # For other providers (like UITARS), don't show API key inputs
617
- if not models:
618
- return [
619
- gr.update(choices=["No models available"], value="No models available", interactive=True),
620
- gr.update(visible=False),
621
- gr.update(visible=False)
622
- ]
623
- return [
624
- gr.update(choices=models, value=models[0] if models else None, interactive=True),
625
- gr.update(visible=False),
626
- gr.update(visible=False)
627
- ]
628
-
629
619
  # Create the Gradio interface with advanced UI
630
620
  with gr.Blocks(title="Computer-Use Agent") as demo:
631
621
  with gr.Row():
@@ -684,14 +674,52 @@ if __name__ == "__main__":
684
674
  info="Select the agent loop provider",
685
675
  )
686
676
 
687
- # Create model selection dropdown with custom value support for OMNI
688
- model_choice = gr.Dropdown(
689
- choices=provider_to_models.get(initial_loop, ["No models available"]),
690
- label="LLM Provider and Model",
691
- value=initial_model,
692
- info="Select model or choose 'Custom model...' to enter a custom name",
693
- interactive=True,
694
- )
677
+
678
+ # Create separate model selection dropdowns for each provider type
679
+ # This avoids the Gradio bug with updating choices
680
+ with gr.Group() as model_selection_group:
681
+ # OpenAI models dropdown
682
+ openai_model_choice = gr.Dropdown(
683
+ choices=openai_models,
684
+ label="OpenAI Model",
685
+ value=openai_models[0] if openai_models else "No models available",
686
+ info="Select OpenAI model",
687
+ interactive=True,
688
+ visible=(initial_loop == "OPENAI")
689
+ )
690
+
691
+ # Anthropic models dropdown
692
+ anthropic_model_choice = gr.Dropdown(
693
+ choices=anthropic_models,
694
+ label="Anthropic Model",
695
+ value=anthropic_models[0] if anthropic_models else "No models available",
696
+ info="Select Anthropic model",
697
+ interactive=True,
698
+ visible=(initial_loop == "ANTHROPIC")
699
+ )
700
+
701
+ # OMNI models dropdown
702
+ omni_model_choice = gr.Dropdown(
703
+ choices=omni_models + ["Custom model (OpenAI compatible API)", "Custom model (ollama)"],
704
+ label="OMNI Model",
705
+ value=omni_models[0] if omni_models else "Custom model (OpenAI compatible API)",
706
+ info="Select OMNI model or choose a custom model option",
707
+ interactive=True,
708
+ visible=(initial_loop == "OMNI")
709
+ )
710
+
711
+ # UITARS models dropdown
712
+ uitars_model_choice = gr.Dropdown(
713
+ choices=provider_to_models.get("UITARS", ["No models available"]),
714
+ label="UITARS Model",
715
+ value=provider_to_models.get("UITARS", ["No models available"])[0] if provider_to_models.get("UITARS") else "No models available",
716
+ info="Select UITARS model",
717
+ interactive=True,
718
+ visible=(initial_loop == "UITARS")
719
+ )
720
+
721
+ # Hidden field to store the selected model (for compatibility with existing code)
722
+ model_choice = gr.Textbox(visible=False)
695
723
 
696
724
  # Add API key inputs for OpenAI and Anthropic
697
725
  with gr.Group(visible=not has_openai_key and (initial_loop == "OPENAI" or initial_loop == "OMNI")) as openai_key_group:
@@ -713,34 +741,176 @@ if __name__ == "__main__":
713
741
  type="password",
714
742
  info="Required for Anthropic models"
715
743
  )
744
+
745
+ # Function to set OpenAI API key environment variable
746
+ def set_openai_api_key(key):
747
+ if key and key.strip():
748
+ os.environ["OPENAI_API_KEY"] = key.strip()
749
+ print(f"DEBUG - Set OpenAI API key environment variable")
750
+ return key
751
+
752
+ # Function to set Anthropic API key environment variable
753
+ def set_anthropic_api_key(key):
754
+ if key and key.strip():
755
+ os.environ["ANTHROPIC_API_KEY"] = key.strip()
756
+ print(f"DEBUG - Set Anthropic API key environment variable")
757
+ return key
758
+
759
+ # Add change event handlers for API key inputs
760
+ openai_api_key_input.change(
761
+ fn=set_openai_api_key,
762
+ inputs=[openai_api_key_input],
763
+ outputs=[openai_api_key_input],
764
+ queue=False
765
+ )
766
+
767
+ anthropic_api_key_input.change(
768
+ fn=set_anthropic_api_key,
769
+ inputs=[anthropic_api_key_input],
770
+ outputs=[anthropic_api_key_input],
771
+ queue=False
772
+ )
716
773
 
717
- # Add custom model textbox (only visible when "Custom model..." is selected)
774
+ # Combined function to update UI based on selections
775
+ def update_ui(loop=None, openai_model=None, anthropic_model=None, omni_model=None, uitars_model=None):
776
+ # Default values if not provided
777
+ loop = loop or agent_loop.value
778
+
779
+ # Determine which model value to use for custom model checks
780
+ model_value = None
781
+ if loop == "OPENAI" and openai_model:
782
+ model_value = openai_model
783
+ elif loop == "ANTHROPIC" and anthropic_model:
784
+ model_value = anthropic_model
785
+ elif loop == "OMNI" and omni_model:
786
+ model_value = omni_model
787
+ elif loop == "UITARS" and uitars_model:
788
+ model_value = uitars_model
789
+
790
+ # Show/hide appropriate model dropdown based on loop selection
791
+ openai_visible = (loop == "OPENAI")
792
+ anthropic_visible = (loop == "ANTHROPIC")
793
+ omni_visible = (loop == "OMNI")
794
+ uitars_visible = (loop == "UITARS")
795
+
796
+ # Show/hide API key inputs based on loop selection
797
+ show_openai_key = not has_openai_key and (loop == "OPENAI" or (loop == "OMNI" and model_value and "OpenAI" in model_value and "Custom" not in model_value))
798
+ show_anthropic_key = not has_anthropic_key and (loop == "ANTHROPIC" or (loop == "OMNI" and model_value and "Claude" in model_value and "Custom" not in model_value))
799
+
800
+ # Determine custom model visibility
801
+ is_custom_openai_api = model_value == "Custom model (OpenAI compatible API)"
802
+ is_custom_ollama = model_value == "Custom model (ollama)"
803
+ is_any_custom = is_custom_openai_api or is_custom_ollama
804
+
805
+ # Update the hidden model_choice field based on the visible dropdown
806
+ model_choice_value = model_value if model_value else ""
807
+
808
+ # Return all UI updates
809
+ return [
810
+ # Model dropdowns visibility
811
+ gr.update(visible=openai_visible),
812
+ gr.update(visible=anthropic_visible),
813
+ gr.update(visible=omni_visible),
814
+ gr.update(visible=uitars_visible),
815
+ # API key inputs visibility
816
+ gr.update(visible=show_openai_key),
817
+ gr.update(visible=show_anthropic_key),
818
+ # Custom model fields visibility
819
+ gr.update(visible=is_any_custom), # Custom model name always visible for any custom option
820
+ gr.update(visible=is_custom_openai_api), # Provider base URL only for OpenAI compatible API
821
+ # Update the hidden model_choice field
822
+ gr.update(value=model_choice_value)
823
+ ]
824
+
825
+ # Add custom model textbox (visible for both custom model options)
718
826
  custom_model = gr.Textbox(
719
827
  label="Custom Model Name",
720
- placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct)",
828
+ placeholder="Enter custom model name (e.g., Qwen2.5-VL-7B-Instruct or llama3)",
721
829
  value=initial_custom_model,
722
- visible=(initial_model == "Custom model..."),
830
+ visible=(initial_model == "Custom model (OpenAI compatible API)" or initial_model == "Custom model (ollama)"),
723
831
  interactive=True,
724
832
  )
725
833
 
726
- # Add custom provider base URL textbox (only visible when "Custom model..." is selected)
834
+ # Add custom provider base URL textbox (only visible for OpenAI compatible API)
727
835
  provider_base_url = gr.Textbox(
728
836
  label="Provider Base URL",
729
837
  placeholder="Enter provider base URL (e.g., http://localhost:1234/v1)",
730
838
  value=initial_provider_base_url,
731
- visible=(initial_model == "Custom model..."),
839
+ visible=(initial_model == "Custom model (OpenAI compatible API)"),
732
840
  interactive=True,
733
841
  )
734
842
 
735
- # Add custom API key textbox (only visible when "Custom model..." is selected)
843
+ # Add custom API key textbox (only visible for OpenAI compatible API)
736
844
  provider_api_key = gr.Textbox(
737
845
  label="Provider API Key",
738
846
  placeholder="Enter provider API key (if required)",
739
847
  value="",
740
- visible=(initial_model == "Custom model..."),
848
+ visible=(initial_model == "Custom model (OpenAI compatible API)"),
741
849
  interactive=True,
742
850
  type="password",
743
851
  )
852
+
853
+ # Connect agent_loop changes to update all UI elements
854
+ agent_loop.change(
855
+ fn=update_ui,
856
+ inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
857
+ outputs=[
858
+ openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
859
+ openai_key_group, anthropic_key_group,
860
+ custom_model, provider_base_url, provider_api_key,
861
+ model_choice # Add model_choice to outputs
862
+ ],
863
+ queue=False # Process immediately without queueing
864
+ )
865
+
866
+ # Connect each model dropdown to update UI
867
+ omni_model_choice.change(
868
+ fn=update_ui,
869
+ inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
870
+ outputs=[
871
+ openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
872
+ openai_key_group, anthropic_key_group,
873
+ custom_model, provider_base_url, provider_api_key,
874
+ model_choice # Add model_choice to outputs
875
+ ],
876
+ queue=False
877
+ )
878
+
879
+ uitars_model_choice.change(
880
+ fn=update_ui,
881
+ inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
882
+ outputs=[
883
+ openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
884
+ openai_key_group, anthropic_key_group,
885
+ custom_model, provider_base_url, provider_api_key,
886
+ model_choice # Add model_choice to outputs
887
+ ],
888
+ queue=False
889
+ )
890
+
891
+ openai_model_choice.change(
892
+ fn=update_ui,
893
+ inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
894
+ outputs=[
895
+ openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
896
+ openai_key_group, anthropic_key_group,
897
+ custom_model, provider_base_url, provider_api_key,
898
+ model_choice # Add model_choice to outputs
899
+ ],
900
+ queue=False
901
+ )
902
+
903
+ anthropic_model_choice.change(
904
+ fn=update_ui,
905
+ inputs=[agent_loop, openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice],
906
+ outputs=[
907
+ openai_model_choice, anthropic_model_choice, omni_model_choice, uitars_model_choice,
908
+ openai_key_group, anthropic_key_group,
909
+ custom_model, provider_base_url, provider_api_key,
910
+ model_choice # Add model_choice to outputs
911
+ ],
912
+ queue=False
913
+ )
744
914
 
745
915
  save_trajectory = gr.Checkbox(
746
916
  label="Save Trajectory",
@@ -772,6 +942,9 @@ if __name__ == "__main__":
772
942
  placeholder="Ask me to perform tasks in a virtual macOS environment"
773
943
  )
774
944
  clear = gr.Button("Clear")
945
+
946
+ # Add cancel button
947
+ cancel_button = gr.Button("Cancel", variant="stop")
775
948
 
776
949
  # Add examples
777
950
  example_group = gr.Examples(examples=example_messages, inputs=msg)
@@ -782,10 +955,28 @@ if __name__ == "__main__":
782
955
  history.append(gr.ChatMessage(role="user", content=message))
783
956
  return "", history
784
957
 
958
+ # Function to cancel the running agent
959
+ async def cancel_agent_task(history):
960
+ global global_agent
961
+ if global_agent and hasattr(global_agent, '_loop'):
962
+ print("DEBUG - Cancelling agent task")
963
+ # Cancel the agent loop
964
+ if hasattr(global_agent._loop, 'cancel') and callable(global_agent._loop.cancel):
965
+ await global_agent._loop.cancel()
966
+ history.append(gr.ChatMessage(role="assistant", content="Task cancelled by user", metadata={"title": "❌ Cancelled"}))
967
+ else:
968
+ history.append(gr.ChatMessage(role="assistant", content="Could not cancel task: cancel method not found", metadata={"title": "⚠️ Warning"}))
969
+ else:
970
+ history.append(gr.ChatMessage(role="assistant", content="No active agent task to cancel", metadata={"title": "ℹ️ Info"}))
971
+ return history
972
+
785
973
  # Function to process agent response after user input
786
974
  async def process_response(
787
975
  history,
788
- model_choice_value,
976
+ openai_model_value,
977
+ anthropic_model_value,
978
+ omni_model_value,
979
+ uitars_model_value,
789
980
  custom_model_value,
790
981
  agent_loop_choice,
791
982
  save_traj,
@@ -802,21 +993,47 @@ if __name__ == "__main__":
802
993
  # Get the last user message
803
994
  last_user_message = history[-1]["content"]
804
995
 
996
+ # Get the appropriate model value based on the agent loop
997
+ if agent_loop_choice == "OPENAI":
998
+ model_choice_value = openai_model_value
999
+ elif agent_loop_choice == "ANTHROPIC":
1000
+ model_choice_value = anthropic_model_value
1001
+ elif agent_loop_choice == "OMNI":
1002
+ model_choice_value = omni_model_value
1003
+ elif agent_loop_choice == "UITARS":
1004
+ model_choice_value = uitars_model_value
1005
+ else:
1006
+ model_choice_value = "No models available"
1007
+
1008
+ # Determine if this is a custom model selection and which type
1009
+ is_custom_openai_api = model_choice_value == "Custom model (OpenAI compatible API)"
1010
+ is_custom_ollama = model_choice_value == "Custom model (ollama)"
1011
+ is_custom_model_selected = is_custom_openai_api or is_custom_ollama
1012
+
805
1013
  # Determine the model name string to analyze: custom or from dropdown
806
- model_string_to_analyze = (
807
- custom_model_value
808
- if model_choice_value == "Custom model..."
809
- else model_choice_value # Use the full UI string initially
810
- )
811
-
812
- # Determine if this is a custom model selection
813
- is_custom_model_selected = model_choice_value == "Custom model..."
1014
+ if is_custom_model_selected:
1015
+ model_string_to_analyze = custom_model_value
1016
+ else:
1017
+ model_string_to_analyze = model_choice_value # Use the full UI string initially
814
1018
 
815
1019
  try:
816
- # Get the provider, *cleaned* model name, and agent loop type
817
- provider, cleaned_model_name_from_func, agent_loop_type = (
818
- get_provider_and_model(model_string_to_analyze, agent_loop_choice)
819
- )
1020
+ # Special case for UITARS - use MLXVLM provider
1021
+ if agent_loop_choice == "UITARS":
1022
+ provider = LLMProvider.MLXVLM
1023
+ cleaned_model_name_from_func = model_string_to_analyze
1024
+ agent_loop_type = AgentLoop.UITARS
1025
+ print(f"Using MLXVLM provider for UITARS model: {model_string_to_analyze}")
1026
+ # Special case for Ollama custom model
1027
+ elif is_custom_ollama and agent_loop_choice == "OMNI":
1028
+ provider = LLMProvider.OLLAMA
1029
+ cleaned_model_name_from_func = custom_model_value
1030
+ agent_loop_type = AgentLoop.OMNI
1031
+ print(f"Using Ollama provider for custom model: {custom_model_value}")
1032
+ else:
1033
+ # Get the provider, *cleaned* model name, and agent loop type
1034
+ provider, cleaned_model_name_from_func, agent_loop_type = (
1035
+ get_provider_and_model(model_string_to_analyze, agent_loop_choice)
1036
+ )
820
1037
 
821
1038
  print(f"provider={provider} cleaned_model_name_from_func={cleaned_model_name_from_func} agent_loop_type={agent_loop_type} agent_loop_choice={agent_loop_choice}")
822
1039
 
@@ -828,26 +1045,34 @@ if __name__ == "__main__":
828
1045
  else cleaned_model_name_from_func
829
1046
  )
830
1047
 
831
- # Determine if OAICOMPAT should be used (only if custom model explicitly selected)
832
- is_oaicompat = is_custom_model_selected
1048
+ # Determine if OAICOMPAT should be used (only for OpenAI compatible API custom model)
1049
+ is_oaicompat = is_custom_openai_api and agent_loop_choice != "UITARS"
833
1050
 
834
1051
  # Get API key based on provider determined by get_provider_and_model
835
1052
  if is_oaicompat and custom_api_key:
836
- # Use custom API key if provided for custom model
1053
+ # Use custom API key if provided for OpenAI compatible API custom model
837
1054
  api_key = custom_api_key
838
1055
  print(
839
- f"DEBUG - Using custom API key for model: {final_model_name_to_send}"
1056
+ f"DEBUG - Using custom API key for OpenAI compatible API model: {final_model_name_to_send}"
840
1057
  )
1058
+ elif provider == LLMProvider.OLLAMA:
1059
+ # No API key needed for Ollama
1060
+ api_key = ""
1061
+ print(f"DEBUG - No API key needed for Ollama model: {final_model_name_to_send}")
841
1062
  elif provider == LLMProvider.OPENAI:
842
1063
  # Use OpenAI key from input if provided, otherwise use environment variable
843
1064
  api_key = openai_key_input if openai_key_input else (openai_api_key or os.environ.get("OPENAI_API_KEY", ""))
844
1065
  if openai_key_input:
845
- print(f"DEBUG - Using provided OpenAI API key from UI")
1066
+ # Set the environment variable for the OpenAI API key
1067
+ os.environ["OPENAI_API_KEY"] = openai_key_input
1068
+ print(f"DEBUG - Using provided OpenAI API key from UI and set as environment variable")
846
1069
  elif provider == LLMProvider.ANTHROPIC:
847
1070
  # Use Anthropic key from input if provided, otherwise use environment variable
848
1071
  api_key = anthropic_key_input if anthropic_key_input else (anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY", ""))
849
1072
  if anthropic_key_input:
850
- print(f"DEBUG - Using provided Anthropic API key from UI")
1073
+ # Set the environment variable for the Anthropic API key
1074
+ os.environ["ANTHROPIC_API_KEY"] = anthropic_key_input
1075
+ print(f"DEBUG - Using provided Anthropic API key from UI and set as environment variable")
851
1076
  else:
852
1077
  # For Ollama or default OAICOMPAT (without custom key), no key needed/expected
853
1078
  api_key = ""
@@ -866,8 +1091,8 @@ if __name__ == "__main__":
866
1091
 
867
1092
  # Create or update the agent
868
1093
  create_agent(
869
- # Provider determined by get_provider_and_model unless custom model selected
870
- provider=LLMProvider.OAICOMPAT if is_oaicompat else provider,
1094
+ # Provider determined by special cases and get_provider_and_model
1095
+ provider=provider,
871
1096
  agent_loop=agent_loop_type,
872
1097
  # Pass the FINAL determined model name (cleaned or custom)
873
1098
  model_name=final_model_name_to_send,
@@ -980,13 +1205,21 @@ if __name__ == "__main__":
980
1205
  # Update with error message
981
1206
  history.append(gr.ChatMessage(role="assistant", content=f"Error: {str(e)}"))
982
1207
  yield history
983
-
984
- # Connect the components
985
- msg.submit(chat_submit, [msg, chatbot_history], [msg, chatbot_history]).then(
986
- process_response,
987
- [
1208
+
1209
+ # Connect the submit button to the process_response function
1210
+ submit_event = msg.submit(
1211
+ fn=chat_submit,
1212
+ inputs=[msg, chatbot_history],
1213
+ outputs=[msg, chatbot_history],
1214
+ queue=False,
1215
+ ).then(
1216
+ fn=process_response,
1217
+ inputs=[
988
1218
  chatbot_history,
989
- model_choice,
1219
+ openai_model_choice,
1220
+ anthropic_model_choice,
1221
+ omni_model_choice,
1222
+ uitars_model_choice,
990
1223
  custom_model,
991
1224
  agent_loop,
992
1225
  save_trajectory,
@@ -996,44 +1229,22 @@ if __name__ == "__main__":
996
1229
  openai_api_key_input,
997
1230
  anthropic_api_key_input,
998
1231
  ],
999
- [chatbot_history],
1232
+ outputs=[chatbot_history],
1233
+ queue=True,
1000
1234
  )
1001
1235
 
1002
1236
  # Clear button functionality
1003
1237
  clear.click(lambda: None, None, chatbot_history, queue=False)
1004
-
1005
- # Connect agent_loop changes to model selection
1006
- agent_loop.change(
1007
- fn=update_model_choices,
1008
- inputs=[agent_loop],
1009
- outputs=[model_choice],
1010
- queue=False, # Process immediately without queueing
1011
- )
1012
-
1013
- # Show/hide custom model, provider base URL, and API key textboxes based on dropdown selection
1014
- def update_custom_model_visibility(model_value):
1015
- is_custom = model_value == "Custom model..."
1016
- return (
1017
- gr.update(visible=is_custom),
1018
- gr.update(visible=is_custom),
1019
- gr.update(visible=is_custom),
1020
- )
1021
-
1022
- model_choice.change(
1023
- fn=update_custom_model_visibility,
1024
- inputs=[model_choice],
1025
- outputs=[custom_model, provider_base_url, provider_api_key],
1026
- queue=False, # Process immediately without queueing
1027
- )
1028
1238
 
1029
- # Connect agent_loop changes to model selection and API key visibility
1030
- agent_loop.change(
1031
- fn=update_model_choices,
1032
- inputs=[agent_loop],
1033
- outputs=[model_choice, openai_key_group, anthropic_key_group],
1034
- queue=False, # Process immediately without queueing
1239
+ # Connect cancel button to cancel function
1240
+ cancel_button.click(
1241
+ cancel_agent_task,
1242
+ [chatbot_history],
1243
+ [chatbot_history],
1244
+ queue=False # Process immediately without queueing
1035
1245
  )
1036
1246
 
1247
+
1037
1248
  # Function to update the code display based on configuration and chat history
1038
1249
  def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val):
1039
1250
  # Extract messages from chat history
@@ -1043,9 +1254,72 @@ if __name__ == "__main__":
1043
1254
  if msg.get("role") == "user":
1044
1255
  messages.append(msg.get("content", ""))
1045
1256
 
1046
- # Determine provider and model name based on selection
1047
- model_string = custom_model_val if model_choice_val == "Custom model..." else model_choice_val
1048
- provider, model_name, _ = get_provider_and_model(model_string, agent_loop)
1257
+ # Determine if this is a custom model selection and which type
1258
+ is_custom_openai_api = model_choice_val == "Custom model (OpenAI compatible API)"
1259
+ is_custom_ollama = model_choice_val == "Custom model (ollama)"
1260
+ is_custom_model_selected = is_custom_openai_api or is_custom_ollama
1261
+
1262
+ # Determine provider and model name based on agent loop
1263
+ if agent_loop == "OPENAI":
1264
+ # For OPENAI loop, always use OPENAI provider with computer-use-preview
1265
+ provider = LLMProvider.OPENAI
1266
+ model_name = "computer-use-preview"
1267
+ elif agent_loop == "ANTHROPIC":
1268
+ # For ANTHROPIC loop, always use ANTHROPIC provider
1269
+ provider = LLMProvider.ANTHROPIC
1270
+ # Extract model name from the UI string
1271
+ if model_choice_val.startswith("Anthropic: Claude "):
1272
+ # Extract the model name based on the UI string
1273
+ model_parts = model_choice_val.replace("Anthropic: Claude ", "").split(" (")
1274
+ version = model_parts[0] # e.g., "3.7 Sonnet"
1275
+ date = model_parts[1].replace(")", "") if len(model_parts) > 1 else "" # e.g., "20250219"
1276
+
1277
+ # Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620
1278
+ version = version.replace(".", "-").replace(" ", "-").lower()
1279
+ model_name = f"claude-{version}-{date}"
1280
+ else:
1281
+ # Use the model_choice_val directly if it doesn't match the expected format
1282
+ model_name = model_choice_val
1283
+ elif agent_loop == "UITARS":
1284
+ # For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for custom
1285
+ if model_choice_val == "Custom model (OpenAI compatible API)":
1286
+ provider = LLMProvider.OAICOMPAT
1287
+ model_name = custom_model_val
1288
+ else:
1289
+ provider = LLMProvider.MLXVLM
1290
+ model_name = model_choice_val
1291
+ elif agent_loop == "OMNI":
1292
+ # For OMNI, provider can be OPENAI, ANTHROPIC, OLLAMA, or OAICOMPAT
1293
+ if is_custom_openai_api:
1294
+ provider = LLMProvider.OAICOMPAT
1295
+ model_name = custom_model_val
1296
+ elif is_custom_ollama:
1297
+ provider = LLMProvider.OLLAMA
1298
+ model_name = custom_model_val
1299
+ elif model_choice_val.startswith("OMNI: OpenAI "):
1300
+ provider = LLMProvider.OPENAI
1301
+ # Extract model name from UI string (e.g., "OMNI: OpenAI GPT-4o" -> "gpt-4o")
1302
+ model_name = model_choice_val.replace("OMNI: OpenAI ", "").lower().replace(" ", "-")
1303
+ elif model_choice_val.startswith("OMNI: Claude "):
1304
+ provider = LLMProvider.ANTHROPIC
1305
+ # Extract model name from UI string (similar to ANTHROPIC loop case)
1306
+ model_parts = model_choice_val.replace("OMNI: Claude ", "").split(" (")
1307
+ version = model_parts[0] # e.g., "3.7 Sonnet"
1308
+ date = model_parts[1].replace(")", "") if len(model_parts) > 1 else "" # e.g., "20250219"
1309
+
1310
+ # Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620
1311
+ version = version.replace(".", "-").replace(" ", "-").lower()
1312
+ model_name = f"claude-{version}-{date}"
1313
+ elif model_choice_val.startswith("OMNI: Ollama "):
1314
+ provider = LLMProvider.OLLAMA
1315
+ # Extract model name from UI string (e.g., "OMNI: Ollama llama3" -> "llama3")
1316
+ model_name = model_choice_val.replace("OMNI: Ollama ", "")
1317
+ else:
1318
+ # Fallback to get_provider_and_model for any other cases
1319
+ provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop)
1320
+ else:
1321
+ # Fallback for any other agent loop
1322
+ provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop)
1049
1323
 
1050
1324
  # Generate and return the code
1051
1325
  return generate_python_code(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.1.37
3
+ Version: 0.1.39
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.10
@@ -103,9 +103,12 @@ pip install "cua-agent[all]"
103
103
  pip install "cua-agent[openai]" # OpenAI Cua Loop
104
104
  pip install "cua-agent[anthropic]" # Anthropic Cua Loop
105
105
  pip install "cua-agent[uitars]" # UI-Tars support
106
- pip install "cua-agent[uitars-mlx]" # local UI-Tars support with MLXVLM
107
106
  pip install "cua-agent[omni]" # Cua Loop based on OmniParser (includes Ollama for local models)
108
107
  pip install "cua-agent[ui]" # Gradio UI for the agent
108
+
109
+ # For local UI-TARS with MLX support, you need to manually install mlx-vlm:
110
+ pip install "cua-agent[uitars-mlx]"
111
+ pip install git+https://github.com/ddupont808/mlx-vlm.git@stable/fix/qwen2-position-id # PR: https://github.com/Blaizzy/mlx-vlm/pull/349
109
112
  ```
110
113
 
111
114
  ## Run
@@ -79,8 +79,8 @@ agent/providers/uitars/utils.py,sha256=493STTEEJcVhVbQgR0e8rNTI1DjkxUx8IgIv3wkJ1
79
79
  agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
80
80
  agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
81
81
  agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
82
- agent/ui/gradio/app.py,sha256=uj6cT0sFgnaN_a7JMy-OMKyOVEiKhwl3b5bJ7RamUQY,50090
83
- cua_agent-0.1.37.dist-info/METADATA,sha256=Zvtfyd23U2UJTko82x6z5jzaJEuySC2TdQddMkHny28,12514
84
- cua_agent-0.1.37.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
85
- cua_agent-0.1.37.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
86
- cua_agent-0.1.37.dist-info/RECORD,,
82
+ agent/ui/gradio/app.py,sha256=dtyolxA53OXYnSuPY5CXXqcadobU2rDwEEjFymbgrZQ,67705
83
+ cua_agent-0.1.39.dist-info/METADATA,sha256=j54pBj9cIEy3Eb2ac8YtVucq8e5dTvKr4a7mJAuorwc,12689
84
+ cua_agent-0.1.39.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
85
+ cua_agent-0.1.39.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
86
+ cua_agent-0.1.39.dist-info/RECORD,,