cua-agent 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

agent/ui/gradio/app.py CHANGED
@@ -290,7 +290,7 @@ def get_provider_and_model(model_name: str, loop_provider: str) -> tuple:
290
290
  model_name_to_use = cleaned_model_name
291
291
  # agent_loop remains AgentLoop.OMNI
292
292
  elif agent_loop == AgentLoop.UITARS:
293
- # For UITARS, use MLXVLM provider for the MLX models, OAICOMPAT for custom
293
+ # For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for custom
294
294
  if model_name == "Custom model (OpenAI compatible API)":
295
295
  provider = LLMProvider.OAICOMPAT
296
296
  model_name_to_use = "tgi"
@@ -333,12 +333,25 @@ def get_ollama_models() -> List[str]:
333
333
  logging.error(f"Error getting Ollama models: {e}")
334
334
  return []
335
335
 
336
- def create_computer_instance(verbosity: int = logging.INFO) -> Computer:
336
+
337
+ def create_computer_instance(
338
+ verbosity: int = logging.INFO,
339
+ os_type: str = "macos",
340
+ provider_type: str = "lume",
341
+ name: Optional[str] = None,
342
+ api_key: Optional[str] = None
343
+ ) -> Computer:
337
344
  """Create or get the global Computer instance."""
338
345
  global global_computer
339
346
 
340
347
  if global_computer is None:
341
- global_computer = Computer(verbosity=verbosity)
348
+ global_computer = Computer(
349
+ verbosity=verbosity,
350
+ os_type=os_type,
351
+ provider_type=provider_type,
352
+ name=name if name else "",
353
+ api_key=api_key
354
+ )
342
355
 
343
356
  return global_computer
344
357
 
@@ -353,12 +366,22 @@ def create_agent(
353
366
  verbosity: int = logging.INFO,
354
367
  use_oaicompat: bool = False,
355
368
  provider_base_url: Optional[str] = None,
369
+ computer_os: str = "macos",
370
+ computer_provider: str = "lume",
371
+ computer_name: Optional[str] = None,
372
+ computer_api_key: Optional[str] = None,
356
373
  ) -> ComputerAgent:
357
374
  """Create or update the global agent with the specified parameters."""
358
375
  global global_agent
359
376
 
360
377
  # Create the computer if not already done
361
- computer = create_computer_instance(verbosity=verbosity)
378
+ computer = create_computer_instance(
379
+ verbosity=verbosity,
380
+ os_type=computer_os,
381
+ provider_type=computer_provider,
382
+ name=computer_name,
383
+ api_key=computer_api_key
384
+ )
362
385
 
363
386
  # Get API key from environment if not provided
364
387
  if api_key is None:
@@ -401,6 +424,7 @@ def create_agent(
401
424
 
402
425
  return global_agent
403
426
 
427
+
404
428
  def create_gradio_ui(
405
429
  provider_name: str = "openai",
406
430
  model_name: str = "gpt-4o",
@@ -439,6 +463,9 @@ def create_gradio_ui(
439
463
  # Check if API keys are available
440
464
  has_openai_key = bool(openai_api_key)
441
465
  has_anthropic_key = bool(anthropic_api_key)
466
+
467
+ print("has_openai_key", has_openai_key)
468
+ print("has_anthropic_key", has_anthropic_key)
442
469
 
443
470
  # Get Ollama models for OMNI
444
471
  ollama_models = get_ollama_models()
@@ -473,7 +500,7 @@ def create_gradio_ui(
473
500
  elif initial_loop == "ANTHROPIC":
474
501
  initial_model = anthropic_models[0] if anthropic_models else "No models available"
475
502
  else: # OMNI
476
- initial_model = omni_models[0] if omni_models else "No models available"
503
+ initial_model = omni_models[0] if omni_models else "Custom model (OpenAI compatible API)"
477
504
  if "Custom model (OpenAI compatible API)" in available_models_for_loop:
478
505
  initial_model = (
479
506
  "Custom model (OpenAI compatible API)" # Default to custom if available and no other default fits
@@ -494,7 +521,7 @@ def create_gradio_ui(
494
521
  ]
495
522
 
496
523
  # Function to generate Python code based on configuration and tasks
497
- def generate_python_code(agent_loop_choice, provider, model_name, tasks, provider_url, recent_images=3, save_trajectory=True):
524
+ def generate_python_code(agent_loop_choice, provider, model_name, tasks, provider_url, recent_images=3, save_trajectory=True, computer_os="macos", computer_provider="lume", container_name="", cua_cloud_api_key=""):
498
525
  """Generate Python code for the current configuration and tasks.
499
526
 
500
527
  Args:
@@ -505,6 +532,10 @@ def create_gradio_ui(
505
532
  provider_url: The provider base URL for OAICOMPAT providers
506
533
  recent_images: Number of recent images to keep in context
507
534
  save_trajectory: Whether to save the agent trajectory
535
+ computer_os: Operating system type for the computer
536
+ computer_provider: Provider type for the computer
537
+ container_name: Optional VM name
538
+ cua_cloud_api_key: Optional CUA Cloud API key
508
539
 
509
540
  Returns:
510
541
  Formatted Python code as a string
@@ -515,13 +546,29 @@ def create_gradio_ui(
515
546
  if task and task.strip():
516
547
  tasks_str += f' "{task}",\n'
517
548
 
518
- # Create the Python code template
549
+ # Create the Python code template with computer configuration
550
+ computer_args = []
551
+ if computer_os != "macos":
552
+ computer_args.append(f'os_type="{computer_os}"')
553
+ if computer_provider != "lume":
554
+ computer_args.append(f'provider_type="{computer_provider}"')
555
+ if container_name:
556
+ computer_args.append(f'name="{container_name}"')
557
+ if cua_cloud_api_key:
558
+ computer_args.append(f'api_key="{cua_cloud_api_key}"')
559
+
560
+ computer_args_str = ", ".join(computer_args)
561
+ if computer_args_str:
562
+ computer_args_str = f"({computer_args_str})"
563
+ else:
564
+ computer_args_str = "()"
565
+
519
566
  code = f'''import asyncio
520
567
  from computer import Computer
521
568
  from agent import ComputerAgent, LLM, AgentLoop, LLMProvider
522
569
 
523
570
  async def main():
524
- async with Computer() as macos_computer:
571
+ async with Computer{computer_args_str} as macos_computer:
525
572
  agent = ComputerAgent(
526
573
  computer=macos_computer,
527
574
  loop=AgentLoop.{agent_loop_choice},
@@ -660,12 +707,49 @@ if __name__ == "__main__":
660
707
  LLMProvider.OPENAI,
661
708
  "gpt-4o",
662
709
  [],
663
- "https://openrouter.ai/api/v1"
710
+ "https://openrouter.ai/api/v1",
711
+ 3, # recent_images default
712
+ True, # save_trajectory default
713
+ "macos",
714
+ "lume",
715
+ "",
716
+ ""
664
717
  ),
665
718
  interactive=False,
666
719
  )
667
720
 
668
- with gr.Accordion("Configuration", open=True):
721
+ with gr.Accordion("Computer Configuration", open=True):
722
+ # Computer configuration options
723
+ computer_os = gr.Radio(
724
+ choices=["macos", "linux"],
725
+ label="Operating System",
726
+ value="macos",
727
+ info="Select the operating system for the computer",
728
+ )
729
+
730
+ computer_provider = gr.Radio(
731
+ choices=["cloud", "lume"],
732
+ label="Provider",
733
+ value="lume",
734
+ info="Select the computer provider",
735
+ )
736
+
737
+ container_name = gr.Textbox(
738
+ label="Container Name",
739
+ placeholder="Enter container name (optional)",
740
+ value="",
741
+ info="Optional name for the container",
742
+ )
743
+
744
+ cua_cloud_api_key = gr.Textbox(
745
+ label="CUA Cloud API Key",
746
+ placeholder="Enter your CUA Cloud API key",
747
+ value="",
748
+ type="password",
749
+ info="Required for cloud provider",
750
+ )
751
+
752
+ with gr.Accordion("Agent Configuration", open=True):
669
753
  # Configuration options
670
754
  agent_loop = gr.Dropdown(
671
755
  choices=["OPENAI", "ANTHROPIC", "OMNI", "UITARS"],
@@ -986,6 +1070,10 @@ if __name__ == "__main__":
986
1070
  custom_api_key=None,
987
1071
  openai_key_input=None,
988
1072
  anthropic_key_input=None,
1073
+ computer_os="macos",
1074
+ computer_provider="lume",
1075
+ container_name="",
1076
+ cua_cloud_api_key="",
989
1077
  ):
990
1078
  if not history:
991
1079
  yield history
@@ -1092,6 +1180,10 @@ if __name__ == "__main__":
1092
1180
  "provider_base_url": custom_url_value,
1093
1181
  "save_trajectory": save_traj,
1094
1182
  "recent_images": recent_imgs,
1183
+ "computer_os": computer_os,
1184
+ "computer_provider": computer_provider,
1185
+ "container_name": container_name,
1186
+ "cua_cloud_api_key": cua_cloud_api_key,
1095
1187
  }
1096
1188
  save_settings(current_settings)
1097
1189
  # --- End Save Settings ---
@@ -1109,6 +1201,10 @@ if __name__ == "__main__":
1109
1201
  use_oaicompat=is_oaicompat, # Set flag if custom model was selected
1110
1202
  # Pass custom URL only if custom model was selected
1111
1203
  provider_base_url=custom_url_value if is_oaicompat else None,
1204
+ computer_os=computer_os,
1205
+ computer_provider=computer_provider,
1206
+ computer_name=container_name,
1207
+ computer_api_key=cua_cloud_api_key,
1112
1208
  verbosity=logging.DEBUG, # Added verbosity here
1113
1209
  )
1114
1210
 
@@ -1235,6 +1331,10 @@ if __name__ == "__main__":
1235
1331
  provider_api_key,
1236
1332
  openai_api_key_input,
1237
1333
  anthropic_api_key_input,
1334
+ computer_os,
1335
+ computer_provider,
1336
+ container_name,
1337
+ cua_cloud_api_key,
1238
1338
  ],
1239
1339
  outputs=[chatbot_history],
1240
1340
  queue=True,
@@ -1253,82 +1353,20 @@ if __name__ == "__main__":
1253
1353
 
1254
1354
 
1255
1355
  # Function to update the code display based on configuration and chat history
1256
- def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val):
1356
+ def update_code_display(agent_loop, model_choice_val, custom_model_val, chat_history, provider_base_url, recent_images_val, save_trajectory_val, computer_os, computer_provider, container_name, cua_cloud_api_key):
1257
1357
  # Extract messages from chat history
1258
1358
  messages = []
1259
1359
  if chat_history:
1260
1360
  for msg in chat_history:
1261
- if msg.get("role") == "user":
1361
+ if isinstance(msg, dict) and msg.get("role") == "user":
1262
1362
  messages.append(msg.get("content", ""))
1263
1363
 
1264
- # Determine if this is a custom model selection and which type
1265
- is_custom_openai_api = model_choice_val == "Custom model (OpenAI compatible API)"
1266
- is_custom_ollama = model_choice_val == "Custom model (ollama)"
1267
- is_custom_model_selected = is_custom_openai_api or is_custom_ollama
1268
-
1269
- # Determine provider and model name based on agent loop
1270
- if agent_loop == "OPENAI":
1271
- # For OPENAI loop, always use OPENAI provider with computer-use-preview
1272
- provider = LLMProvider.OPENAI
1273
- model_name = "computer-use-preview"
1274
- elif agent_loop == "ANTHROPIC":
1275
- # For ANTHROPIC loop, always use ANTHROPIC provider
1276
- provider = LLMProvider.ANTHROPIC
1277
- # Extract model name from the UI string
1278
- if model_choice_val.startswith("Anthropic: Claude "):
1279
- # Extract the model name based on the UI string
1280
- model_parts = model_choice_val.replace("Anthropic: Claude ", "").split(" (")
1281
- version = model_parts[0] # e.g., "3.7 Sonnet"
1282
- date = model_parts[1].replace(")", "") if len(model_parts) > 1 else "" # e.g., "20250219"
1283
-
1284
- # Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620
1285
- version = version.replace(".", "-").replace(" ", "-").lower()
1286
- model_name = f"claude-{version}-{date}"
1287
- else:
1288
- # Use the model_choice_val directly if it doesn't match the expected format
1289
- model_name = model_choice_val
1290
- elif agent_loop == "UITARS":
1291
- # For UITARS, use MLXVLM for mlx-community models, OAICOMPAT for custom
1292
- if model_choice_val == "Custom model (OpenAI compatible API)":
1293
- provider = LLMProvider.OAICOMPAT
1294
- model_name = custom_model_val
1295
- else:
1296
- provider = LLMProvider.MLXVLM
1297
- model_name = model_choice_val
1298
- elif agent_loop == "OMNI":
1299
- # For OMNI, provider can be OPENAI, ANTHROPIC, OLLAMA, or OAICOMPAT
1300
- if is_custom_openai_api:
1301
- provider = LLMProvider.OAICOMPAT
1302
- model_name = custom_model_val
1303
- elif is_custom_ollama:
1304
- provider = LLMProvider.OLLAMA
1305
- model_name = custom_model_val
1306
- elif model_choice_val.startswith("OMNI: OpenAI "):
1307
- provider = LLMProvider.OPENAI
1308
- # Extract model name from UI string (e.g., "OMNI: OpenAI GPT-4o" -> "gpt-4o")
1309
- model_name = model_choice_val.replace("OMNI: OpenAI ", "").lower().replace(" ", "-")
1310
- elif model_choice_val.startswith("OMNI: Claude "):
1311
- provider = LLMProvider.ANTHROPIC
1312
- # Extract model name from UI string (similar to ANTHROPIC loop case)
1313
- model_parts = model_choice_val.replace("OMNI: Claude ", "").split(" (")
1314
- version = model_parts[0] # e.g., "3.7 Sonnet"
1315
- date = model_parts[1].replace(")", "") if len(model_parts) > 1 else "" # e.g., "20250219"
1316
-
1317
- # Format as claude-3-7-sonnet-20250219 or claude-3-5-sonnet-20240620
1318
- version = version.replace(".", "-").replace(" ", "-").lower()
1319
- model_name = f"claude-{version}-{date}"
1320
- elif model_choice_val.startswith("OMNI: Ollama "):
1321
- provider = LLMProvider.OLLAMA
1322
- # Extract model name from UI string (e.g., "OMNI: Ollama llama3" -> "llama3")
1323
- model_name = model_choice_val.replace("OMNI: Ollama ", "")
1324
- else:
1325
- # Fallback to get_provider_and_model for any other cases
1326
- provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop)
1327
- else:
1328
- # Fallback for any other agent loop
1329
- provider, model_name, _ = get_provider_and_model(model_choice_val, agent_loop)
1364
+ # Determine provider and model based on current selection
1365
+ provider, model_name, _ = get_provider_and_model(
1366
+ model_choice_val or custom_model_val or "gpt-4o",
1367
+ agent_loop
1368
+ )
1330
1369
 
1331
- # Generate and return the code
1332
1370
  return generate_python_code(
1333
1371
  agent_loop,
1334
1372
  provider,
@@ -1336,38 +1374,62 @@ if __name__ == "__main__":
1336
1374
  messages,
1337
1375
  provider_base_url,
1338
1376
  recent_images_val,
1339
- save_trajectory_val
1377
+ save_trajectory_val,
1378
+ computer_os,
1379
+ computer_provider,
1380
+ container_name,
1381
+ cua_cloud_api_key
1340
1382
  )
1341
1383
 
1342
1384
  # Update code display when configuration changes
1343
1385
  agent_loop.change(
1344
1386
  update_code_display,
1345
- inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
1387
+ inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
1346
1388
  outputs=[code_display]
1347
1389
  )
1348
1390
  model_choice.change(
1349
1391
  update_code_display,
1350
- inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
1392
+ inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
1351
1393
  outputs=[code_display]
1352
1394
  )
1353
1395
  custom_model.change(
1354
1396
  update_code_display,
1355
- inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
1397
+ inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
1356
1398
  outputs=[code_display]
1357
1399
  )
1358
1400
  chatbot_history.change(
1359
1401
  update_code_display,
1360
- inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
1402
+ inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
1361
1403
  outputs=[code_display]
1362
1404
  )
1363
1405
  recent_images.change(
1364
1406
  update_code_display,
1365
- inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
1407
+ inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
1366
1408
  outputs=[code_display]
1367
1409
  )
1368
1410
  save_trajectory.change(
1369
1411
  update_code_display,
1370
- inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory],
1412
+ inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
1413
+ outputs=[code_display]
1414
+ )
1415
+ computer_os.change(
1416
+ update_code_display,
1417
+ inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
1418
+ outputs=[code_display]
1419
+ )
1420
+ computer_provider.change(
1421
+ update_code_display,
1422
+ inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
1423
+ outputs=[code_display]
1424
+ )
1425
+ container_name.change(
1426
+ update_code_display,
1427
+ inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
1428
+ outputs=[code_display]
1429
+ )
1430
+ cua_cloud_api_key.change(
1431
+ update_code_display,
1432
+ inputs=[agent_loop, model_choice, custom_model, chatbot_history, provider_base_url, recent_images, save_trajectory, computer_os, computer_provider, container_name, cua_cloud_api_key],
1371
1433
  outputs=[code_display]
1372
1434
  )
1373
1435
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.10
@@ -79,8 +79,8 @@ agent/providers/uitars/utils.py,sha256=493STTEEJcVhVbQgR0e8rNTI1DjkxUx8IgIv3wkJ1
79
79
  agent/telemetry.py,sha256=pVGxbj0ewnvq4EGj28CydN4a1iOfvZR_XKL3vIOqhOM,390
80
80
  agent/ui/__init__.py,sha256=ohhxJLBin6k1hl5sKcmBST8mgh23WXgAXz3pN4f470E,45
81
81
  agent/ui/gradio/__init__.py,sha256=ANKZhv1HqsLheWbLVBlyRQ7Q5qGeXuPi5jDs8vu-ZMo,579
82
- agent/ui/gradio/app.py,sha256=-ccsE6LrXFfxnPeMlEqm49QGvdjCgm-l6TudZZEM9r0,68241
83
- cua_agent-0.2.1.dist-info/METADATA,sha256=g3ca5FEJpxPobVoOrOW2ysqNFnEzwFQhTPvtq4zyLNs,12688
84
- cua_agent-0.2.1.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
85
- cua_agent-0.2.1.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
86
- cua_agent-0.2.1.dist-info/RECORD,,
82
+ agent/ui/gradio/app.py,sha256=c6K5Pb-iP4N6PuXCkrkbB6g6FFTOjAcYAz7pj-fbBlY,69915
83
+ cua_agent-0.2.2.dist-info/METADATA,sha256=Z5JMKgdDMFXKgOg4-NWPohgSS0pRJdOLdVPzu7J52kc,12688
84
+ cua_agent-0.2.2.dist-info/WHEEL,sha256=tSfRZzRHthuv7vxpI4aehrdN9scLjk-dCJkPLzkHxGg,90
85
+ cua_agent-0.2.2.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
86
+ cua_agent-0.2.2.dist-info/RECORD,,