cua-agent 0.4.21__tar.gz → 0.4.23__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (50) hide show
  1. {cua_agent-0.4.21 → cua_agent-0.4.23}/PKG-INFO +1 -1
  2. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/agent.py +5 -5
  3. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/human_tool/ui.py +121 -105
  4. {cua_agent-0.4.21 → cua_agent-0.4.23}/pyproject.toml +1 -1
  5. {cua_agent-0.4.21 → cua_agent-0.4.23}/README.md +0 -0
  6. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/__init__.py +0 -0
  7. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/__main__.py +0 -0
  8. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/adapters/__init__.py +0 -0
  9. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/adapters/huggingfacelocal_adapter.py +0 -0
  10. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/adapters/human_adapter.py +0 -0
  11. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/adapters/mlxvlm_adapter.py +0 -0
  12. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/callbacks/__init__.py +0 -0
  13. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/callbacks/base.py +0 -0
  14. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/callbacks/budget_manager.py +0 -0
  15. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/callbacks/image_retention.py +0 -0
  16. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/callbacks/logging.py +0 -0
  17. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/callbacks/operator_validator.py +0 -0
  18. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/callbacks/pii_anonymization.py +0 -0
  19. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/callbacks/telemetry.py +0 -0
  20. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/callbacks/trajectory_saver.py +0 -0
  21. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/cli.py +0 -0
  22. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/computers/__init__.py +0 -0
  23. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/computers/base.py +0 -0
  24. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/computers/cua.py +0 -0
  25. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/computers/custom.py +0 -0
  26. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/decorators.py +0 -0
  27. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/human_tool/__init__.py +0 -0
  28. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/human_tool/__main__.py +0 -0
  29. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/human_tool/server.py +0 -0
  30. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/integrations/hud/__init__.py +0 -0
  31. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/integrations/hud/proxy.py +0 -0
  32. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/loops/__init__.py +0 -0
  33. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/loops/anthropic.py +0 -0
  34. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/loops/base.py +0 -0
  35. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/loops/composed_grounded.py +0 -0
  36. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/loops/glm45v.py +0 -0
  37. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/loops/gta1.py +0 -0
  38. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/loops/model_types.csv +0 -0
  39. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/loops/omniparser.py +0 -0
  40. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/loops/openai.py +0 -0
  41. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/loops/uitars.py +0 -0
  42. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/proxy/examples.py +0 -0
  43. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/proxy/handlers.py +0 -0
  44. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/responses.py +0 -0
  45. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/types.py +0 -0
  46. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/ui/__init__.py +0 -0
  47. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/ui/__main__.py +0 -0
  48. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/ui/gradio/__init__.py +0 -0
  49. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/ui/gradio/app.py +0 -0
  50. {cua_agent-0.4.21 → cua_agent-0.4.23}/agent/ui/gradio/ui_components.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.4.21
3
+ Version: 0.4.23
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.12
@@ -188,7 +188,11 @@ class ComputerAgent:
188
188
  max_trajectory_budget: If set, adds BudgetManagerCallback to track usage costs and stop when budget is exceeded
189
189
  telemetry_enabled: If set, adds TelemetryCallback to track anonymized usage data. Enabled by default.
190
190
  **kwargs: Additional arguments passed to the agent loop
191
- """
191
+ """
192
+ # If the loop is "human/human", we need to prefix a grounding model fallback
193
+ if model in ["human/human", "human"]:
194
+ model = "openai/computer-use-preview+human/human"
195
+
192
196
  self.model = model
193
197
  self.tools = tools or []
194
198
  self.custom_loop = custom_loop
@@ -253,10 +257,6 @@ class ComputerAgent:
253
257
 
254
258
  # == Initialize computer agent ==
255
259
 
256
- # If the loop is "human/human", we need to prefix a grounding model fallback
257
- if model in ["human/human", "human"]:
258
- model = "openai/computer-use-preview+human/human"
259
-
260
260
  # Find the appropriate agent loop
261
261
  if custom_loop:
262
262
  self.agent_loop = custom_loop
@@ -196,7 +196,9 @@ class HumanCompletionUI:
196
196
  gr.update(choices=["latest"], value="latest"), # dropdown
197
197
  gr.update(value=None), # image (no image)
198
198
  gr.update(value=[]), # chatbot (empty messages)
199
- gr.update(interactive=False) # submit button
199
+ gr.update(interactive=False), # submit button
200
+ gr.update(visible=False), # click_actions_group hidden
201
+ gr.update(visible=False), # actions_group hidden
200
202
  )
201
203
 
202
204
  # Sort pending calls by created_at to get oldest first
@@ -237,7 +239,9 @@ class HumanCompletionUI:
237
239
  gr.update(choices=choices, value="latest"),
238
240
  gr.update(value=self.last_image),
239
241
  gr.update(value=conversation),
240
- gr.update(interactive=bool(choices))
242
+ gr.update(interactive=bool(choices)),
243
+ gr.update(visible=True), # click_actions_group visible when there is a call
244
+ gr.update(visible=True), # actions_group visible when there is a call
241
245
  )
242
246
 
243
247
  def on_call_selected(self, selected_choice):
@@ -246,7 +250,9 @@ class HumanCompletionUI:
246
250
  return (
247
251
  gr.update(value=None), # no image
248
252
  gr.update(value=[]), # empty chatbot
249
- gr.update(interactive=False)
253
+ gr.update(interactive=False),
254
+ gr.update(visible=False), # click_actions_group hidden
255
+ gr.update(visible=False), # actions_group hidden
250
256
  )
251
257
 
252
258
  pending_calls = self.get_pending_calls()
@@ -254,7 +260,9 @@ class HumanCompletionUI:
254
260
  return (
255
261
  gr.update(value=None), # no image
256
262
  gr.update(value=[]), # empty chatbot
257
- gr.update(interactive=False)
263
+ gr.update(interactive=False),
264
+ gr.update(visible=False), # click_actions_group hidden
265
+ gr.update(visible=False), # actions_group hidden
258
266
  )
259
267
 
260
268
  # Handle "latest" option
@@ -286,7 +294,9 @@ class HumanCompletionUI:
286
294
  return (
287
295
  gr.update(value=None), # no image
288
296
  gr.update(value=[]), # empty chatbot
289
- gr.update(interactive=False)
297
+ gr.update(interactive=False),
298
+ gr.update(visible=False), # click_actions_group hidden
299
+ gr.update(visible=False), # actions_group hidden
290
300
  )
291
301
 
292
302
  conversation = self.format_messages_for_chatbot(selected_call.get("messages", []))
@@ -297,7 +307,9 @@ class HumanCompletionUI:
297
307
  return (
298
308
  gr.update(value=self.last_image),
299
309
  gr.update(value=conversation),
300
- gr.update(interactive=True)
310
+ gr.update(interactive=True),
311
+ gr.update(visible=True), # click_actions_group visible
312
+ gr.update(visible=True), # actions_group visible
301
313
  )
302
314
 
303
315
  def submit_response(self, response_text: str):
@@ -368,6 +380,10 @@ class HumanCompletionUI:
368
380
  """Submit a hotkey action."""
369
381
  return self.submit_action("keypress", keys=keys)
370
382
 
383
+ def submit_wait_action(self) -> str:
384
+ """Submit a wait action with no kwargs."""
385
+ return self.submit_action("wait")
386
+
371
387
  def submit_description_click(self, description: str, action_type: str = "click", button: str = "left") -> str:
372
388
  """Submit a description-based action."""
373
389
  if action_type == "click":
@@ -407,7 +423,7 @@ def create_ui():
407
423
  """Create the Gradio interface."""
408
424
  ui_handler = HumanCompletionUI()
409
425
 
410
- with gr.Blocks(title="Human-in-the-Loop Agent Tool") as demo:
426
+ with gr.Blocks(title="Human-in-the-Loop Agent Tool", fill_width=True) as demo:
411
427
  gr.Markdown("# 🤖 Human-in-the-Loop Agent Tool")
412
428
  gr.Markdown("Review AI conversation requests and provide human responses.")
413
429
 
@@ -415,29 +431,30 @@ def create_ui():
415
431
  with gr.Column(scale=2):
416
432
  with gr.Group():
417
433
  screenshot_image = gr.Image(
418
- label="Screenshot",
434
+ label="Interactive Screenshot",
419
435
  interactive=False,
420
436
  height=600
421
437
  )
422
438
 
423
- # Action type selection for image clicks
424
- with gr.Row():
425
- action_type_radio = gr.Radio(
426
- label="Action Type",
427
- choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"],
428
- value="click",
429
- scale=2
430
- )
431
- action_button_radio = gr.Radio(
432
- label="Button (for click only)",
433
- choices=["left", "right", "wheel", "back", "forward"],
434
- value="left",
435
- visible=True,
436
- scale=1
437
- )
439
+ # Action type selection for image clicks (wrapped for visibility control)
440
+ with gr.Group(visible=False) as click_actions_group:
441
+ with gr.Row():
442
+ action_type_radio = gr.Dropdown(
443
+ label="Action",
444
+ choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"],
445
+ value="click",
446
+ scale=2
447
+ )
448
+ action_button_radio = gr.Dropdown(
449
+ label="Button",
450
+ choices=["left", "right", "wheel", "back", "forward"],
451
+ value="left",
452
+ visible=True,
453
+ scale=1
454
+ )
438
455
 
439
456
  conversation_chatbot = gr.Chatbot(
440
- label="Messages",
457
+ label="Conversation",
441
458
  type="messages",
442
459
  height=500,
443
460
  show_copy_button=True
@@ -446,91 +463,83 @@ def create_ui():
446
463
  with gr.Column(scale=1):
447
464
  with gr.Group():
448
465
  call_dropdown = gr.Dropdown(
449
- label="Select a pending call",
466
+ label="Select a pending conversation request",
450
467
  choices=["latest"],
451
468
  interactive=True,
452
469
  value="latest"
453
470
  )
454
471
  refresh_btn = gr.Button("🔄 Refresh", variant="secondary")
472
+ status_display = gr.Textbox(
473
+ label="Status",
474
+ interactive=False,
475
+ value="Ready to receive requests..."
476
+ )
455
477
 
456
478
  with gr.Group():
457
479
  response_text = gr.Textbox(
458
- label="Response",
480
+ label="Message",
459
481
  lines=3,
460
- placeholder="Enter your response here..."
482
+ placeholder="Enter your message here..."
461
483
  )
462
- submit_btn = gr.Button("📤 Submit Response", variant="primary", interactive=False)
484
+ submit_btn = gr.Button("📤 Submit Message", variant="primary", interactive=False)
463
485
 
464
- # Action Accordions
465
- with gr.Accordion("🖱️ Click Actions", open=False):
466
- with gr.Group():
467
- with gr.Row():
468
- click_x = gr.Number(label="X", value=0, minimum=0)
469
- click_y = gr.Number(label="Y", value=0, minimum=0)
470
- with gr.Row():
471
- click_action_type = gr.Dropdown(
472
- label="Action Type",
473
- choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"],
474
- value="click"
475
- )
476
- click_button = gr.Dropdown(
477
- label="Button (for click only)",
478
- choices=["left", "right", "wheel", "back", "forward"],
479
- value="left"
480
- )
481
- click_submit_btn = gr.Button("Submit Action")
482
-
483
- with gr.Accordion("📝 Type Action", open=False):
484
- with gr.Group():
485
- type_text = gr.Textbox(
486
- label="Text to Type",
487
- placeholder="Enter text to type..."
488
- )
489
- type_submit_btn = gr.Button("Submit Type")
490
-
491
- with gr.Accordion("⌨️ Keypress Action", open=False):
492
- with gr.Group():
493
- keypress_text = gr.Textbox(
494
- label="Keys",
495
- placeholder="e.g., ctrl+c, alt+tab"
496
- )
497
- keypress_submit_btn = gr.Button("Submit Keypress")
498
-
499
- with gr.Accordion("🎯 Description Action", open=False):
500
- with gr.Group():
501
- description_text = gr.Textbox(
502
- label="Element Description",
503
- placeholder="e.g., 'Privacy and security option in left sidebar'"
504
- )
505
- with gr.Row():
506
- description_action_type = gr.Dropdown(
507
- label="Action Type",
508
- choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"],
509
- value="click"
510
- )
511
- description_button = gr.Radio(
512
- label="Button (for click only)",
513
- choices=["left", "right", "wheel", "back", "forward"],
514
- value="left"
515
- )
516
- description_submit_btn = gr.Button("Submit Description Action")
517
-
518
- status_display = gr.Textbox(
519
- label="Status",
520
- interactive=False,
521
- value="Ready to receive calls..."
522
- )
486
+ # Action Accordions (wrapped for visibility control)
487
+ with gr.Group(visible=False) as actions_group:
488
+ with gr.Tabs():
489
+ with gr.Tab("🖱️ Click Actions"):
490
+ with gr.Group():
491
+ description_text = gr.Textbox(
492
+ label="Element Description",
493
+ placeholder="e.g., 'Privacy and security option in left sidebar'"
494
+ )
495
+ with gr.Row():
496
+ description_action_type = gr.Dropdown(
497
+ label="Action",
498
+ choices=["click", "double_click", "move", "left_mouse_up", "left_mouse_down"],
499
+ value="click"
500
+ )
501
+ description_button = gr.Dropdown(
502
+ label="Button",
503
+ choices=["left", "right", "wheel", "back", "forward"],
504
+ value="left"
505
+ )
506
+ description_submit_btn = gr.Button("Submit Click Action")
507
+
508
+ with gr.Tab("📝 Type Action"):
509
+ with gr.Group():
510
+ type_text = gr.Textbox(
511
+ label="Text to Type",
512
+ placeholder="Enter text to type..."
513
+ )
514
+ type_submit_btn = gr.Button("Submit Type")
515
+
516
+ with gr.Tab("⌨️ Keypress Action"):
517
+ with gr.Group():
518
+ keypress_text = gr.Textbox(
519
+ label="Keys",
520
+ placeholder="e.g., ctrl+c, alt+tab"
521
+ )
522
+ keypress_submit_btn = gr.Button("Submit Keypress")
523
+
524
+ with gr.Tab("🧰 Misc Actions"):
525
+ with gr.Group():
526
+ misc_action_dropdown = gr.Dropdown(
527
+ label="Action",
528
+ choices=["wait"],
529
+ value="wait"
530
+ )
531
+ misc_submit_btn = gr.Button("Submit Action")
523
532
 
524
533
  # Event handlers
525
534
  refresh_btn.click(
526
535
  fn=ui_handler.refresh_pending_calls,
527
- outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
536
+ outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
528
537
  )
529
538
 
530
539
  call_dropdown.change(
531
540
  fn=ui_handler.on_call_selected,
532
541
  inputs=[call_dropdown],
533
- outputs=[screenshot_image, conversation_chatbot, submit_btn]
542
+ outputs=[screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
534
543
  )
535
544
 
536
545
  def handle_image_click(evt: gr.SelectData):
@@ -548,7 +557,7 @@ def create_ui():
548
557
  outputs=[status_display]
549
558
  ).then(
550
559
  fn=ui_handler.wait_for_pending_calls,
551
- outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
560
+ outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
552
561
  )
553
562
 
554
563
  # Response submission
@@ -558,7 +567,7 @@ def create_ui():
558
567
  outputs=[response_text, status_display]
559
568
  ).then(
560
569
  fn=ui_handler.refresh_pending_calls,
561
- outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
570
+ outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
562
571
  )
563
572
 
564
573
  # Toggle button radio visibility based on action type
@@ -570,16 +579,6 @@ def create_ui():
570
579
  inputs=[action_type_radio],
571
580
  outputs=[action_button_radio]
572
581
  )
573
-
574
- # Action accordion handlers
575
- click_submit_btn.click(
576
- fn=ui_handler.submit_click_action,
577
- inputs=[click_x, click_y, click_action_type, click_button],
578
- outputs=[status_display]
579
- ).then(
580
- fn=ui_handler.wait_for_pending_calls,
581
- outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
582
- )
583
582
 
584
583
  type_submit_btn.click(
585
584
  fn=ui_handler.submit_type_action,
@@ -587,7 +586,7 @@ def create_ui():
587
586
  outputs=[status_display]
588
587
  ).then(
589
588
  fn=ui_handler.wait_for_pending_calls,
590
- outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
589
+ outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
591
590
  )
592
591
 
593
592
  keypress_submit_btn.click(
@@ -596,7 +595,7 @@ def create_ui():
596
595
  outputs=[status_display]
597
596
  ).then(
598
597
  fn=ui_handler.wait_for_pending_calls,
599
- outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
598
+ outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
600
599
  )
601
600
 
602
601
  def handle_description_submit(description, action_type, button):
@@ -612,13 +611,30 @@ def create_ui():
612
611
  outputs=[status_display]
613
612
  ).then(
614
613
  fn=ui_handler.wait_for_pending_calls,
615
- outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
614
+ outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
615
+ )
616
+
617
+ # Misc action handler
618
+ def handle_misc_submit(selected_action):
619
+ if selected_action == "wait":
620
+ result = ui_handler.submit_wait_action()
621
+ ui_handler.wait_for_pending_calls()
622
+ return result
623
+ return f"Unsupported misc action: {selected_action}"
624
+
625
+ misc_submit_btn.click(
626
+ fn=handle_misc_submit,
627
+ inputs=[misc_action_dropdown],
628
+ outputs=[status_display]
629
+ ).then(
630
+ fn=ui_handler.wait_for_pending_calls,
631
+ outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
616
632
  )
617
633
 
618
634
  # Load initial data
619
635
  demo.load(
620
636
  fn=ui_handler.refresh_pending_calls,
621
- outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn]
637
+ outputs=[call_dropdown, screenshot_image, conversation_chatbot, submit_btn, click_actions_group, actions_group]
622
638
  )
623
639
 
624
640
  return demo
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "cua-agent"
9
- version = "0.4.21"
9
+ version = "0.4.23"
10
10
  description = "CUA (Computer Use) Agent for AI-driven computer interaction"
11
11
  readme = "README.md"
12
12
  authors = [
File without changes
File without changes
File without changes
File without changes
File without changes