cua-agent 0.4.2__tar.gz → 0.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- {cua_agent-0.4.2 → cua_agent-0.4.4}/PKG-INFO +1 -1
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/cli.py +60 -10
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/anthropic.py +16 -18
- {cua_agent-0.4.2 → cua_agent-0.4.4}/pyproject.toml +1 -1
- {cua_agent-0.4.2 → cua_agent-0.4.4}/README.md +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/__init__.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/__main__.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/adapters/__init__.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/adapters/huggingfacelocal_adapter.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/agent.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/__init__.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/base.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/budget_manager.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/image_retention.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/logging.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/pii_anonymization.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/telemetry.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/callbacks/trajectory_saver.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/computer_handler.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/decorators.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/__init__.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/omniparser.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/openai.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/loops/uitars.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/responses.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/telemetry.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/types.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/ui/__init__.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/ui/__main__.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/ui/gradio/__init__.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/ui/gradio/app.py +0 -0
- {cua_agent-0.4.2 → cua_agent-0.4.4}/agent/ui/gradio/ui_components.py +0 -0
|
@@ -51,9 +51,8 @@ class Colors:
|
|
|
51
51
|
BG_YELLOW = '\033[43m'
|
|
52
52
|
BG_BLUE = '\033[44m'
|
|
53
53
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
"""Print colored text to terminal."""
|
|
54
|
+
def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = False, end: str = "\n", right: str = ""):
|
|
55
|
+
"""Print colored text to terminal with optional right-aligned text."""
|
|
57
56
|
prefix = ""
|
|
58
57
|
if bold:
|
|
59
58
|
prefix += Colors.BOLD
|
|
@@ -62,10 +61,35 @@ def print_colored(text: str, color: str = "", bold: bool = False, dim: bool = Fa
|
|
|
62
61
|
if color:
|
|
63
62
|
prefix += color
|
|
64
63
|
|
|
65
|
-
|
|
64
|
+
if right:
|
|
65
|
+
# Get terminal width (default to 80 if unable to determine)
|
|
66
|
+
try:
|
|
67
|
+
import shutil
|
|
68
|
+
terminal_width = shutil.get_terminal_size().columns
|
|
69
|
+
except:
|
|
70
|
+
terminal_width = 80
|
|
71
|
+
|
|
72
|
+
# Add right margin
|
|
73
|
+
terminal_width -= 1
|
|
74
|
+
|
|
75
|
+
# Calculate padding needed
|
|
76
|
+
# Account for ANSI escape codes not taking visual space
|
|
77
|
+
visible_left_len = len(text)
|
|
78
|
+
visible_right_len = len(right)
|
|
79
|
+
padding = terminal_width - visible_left_len - visible_right_len
|
|
80
|
+
|
|
81
|
+
if padding > 0:
|
|
82
|
+
output = f"{prefix}{text}{' ' * padding}{right}{Colors.RESET}"
|
|
83
|
+
else:
|
|
84
|
+
# If not enough space, just put a single space between
|
|
85
|
+
output = f"{prefix}{text} {right}{Colors.RESET}"
|
|
86
|
+
else:
|
|
87
|
+
output = f"{prefix}{text}{Colors.RESET}"
|
|
88
|
+
|
|
89
|
+
print(output, end=end)
|
|
66
90
|
|
|
67
91
|
|
|
68
|
-
def print_action(action_type: str, details: Dict[str, Any]):
|
|
92
|
+
def print_action(action_type: str, details: Dict[str, Any], total_cost: float):
|
|
69
93
|
"""Print computer action with nice formatting."""
|
|
70
94
|
# Format action details
|
|
71
95
|
args_str = ""
|
|
@@ -81,8 +105,10 @@ def print_action(action_type: str, details: Dict[str, Any]):
|
|
|
81
105
|
elif action_type == "scroll" and "x" in details and "y" in details:
|
|
82
106
|
args_str = f"({details['x']}, {details['y']})"
|
|
83
107
|
|
|
84
|
-
|
|
85
|
-
|
|
108
|
+
if total_cost > 0:
|
|
109
|
+
print_colored(f"🛠️ {action_type}{args_str}", dim=True, right=f"💸 ${total_cost:.2f}")
|
|
110
|
+
else:
|
|
111
|
+
print_colored(f"🛠️ {action_type}{args_str}", dim=True)
|
|
86
112
|
|
|
87
113
|
def print_welcome(model: str, agent_loop: str, container_name: str):
|
|
88
114
|
"""Print welcome message."""
|
|
@@ -92,7 +118,7 @@ def print_welcome(model: str, agent_loop: str, container_name: str):
|
|
|
92
118
|
async def ainput(prompt: str = ""):
|
|
93
119
|
return await asyncio.to_thread(input, prompt)
|
|
94
120
|
|
|
95
|
-
async def chat_loop(agent, model: str, container_name: str, initial_prompt: str = ""):
|
|
121
|
+
async def chat_loop(agent, model: str, container_name: str, initial_prompt: str = "", show_usage: bool = True):
|
|
96
122
|
"""Main chat loop with the agent."""
|
|
97
123
|
print_welcome(model, agent.agent_loop.__name__, container_name)
|
|
98
124
|
|
|
@@ -101,6 +127,8 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
|
|
|
101
127
|
if initial_prompt:
|
|
102
128
|
history.append({"role": "user", "content": initial_prompt})
|
|
103
129
|
|
|
130
|
+
total_cost = 0
|
|
131
|
+
|
|
104
132
|
while True:
|
|
105
133
|
if history[-1].get("role") != "user":
|
|
106
134
|
# Get user input with prompt
|
|
@@ -124,6 +152,9 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
|
|
|
124
152
|
async for result in agent.run(history):
|
|
125
153
|
# Add agent responses to history
|
|
126
154
|
history.extend(result.get("output", []))
|
|
155
|
+
|
|
156
|
+
if show_usage:
|
|
157
|
+
total_cost += result.get("usage", {}).get("response_cost", 0)
|
|
127
158
|
|
|
128
159
|
# Process and display the output
|
|
129
160
|
for item in result.get("output", []):
|
|
@@ -143,7 +174,7 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
|
|
|
143
174
|
action_type = action.get("type", "")
|
|
144
175
|
if action_type:
|
|
145
176
|
spinner.hide()
|
|
146
|
-
print_action(action_type, action)
|
|
177
|
+
print_action(action_type, action, total_cost)
|
|
147
178
|
spinner.text = f"Performing {action_type}..."
|
|
148
179
|
spinner.show()
|
|
149
180
|
|
|
@@ -163,6 +194,8 @@ async def chat_loop(agent, model: str, container_name: str, initial_prompt: str
|
|
|
163
194
|
print_colored(f"📤 {output}", dim=True)
|
|
164
195
|
|
|
165
196
|
spinner.hide()
|
|
197
|
+
if show_usage and total_cost > 0:
|
|
198
|
+
print_colored(f"Total cost: ${total_cost:.2f}", dim=True)
|
|
166
199
|
|
|
167
200
|
|
|
168
201
|
async def main():
|
|
@@ -214,6 +247,20 @@ Examples:
|
|
|
214
247
|
type=str,
|
|
215
248
|
help="Initial prompt to send to the agent. Leave blank for interactive mode."
|
|
216
249
|
)
|
|
250
|
+
|
|
251
|
+
parser.add_argument(
|
|
252
|
+
"-c", "--cache",
|
|
253
|
+
action="store_true",
|
|
254
|
+
help="Tell the API to enable caching"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
parser.add_argument(
|
|
258
|
+
"-u", "--usage",
|
|
259
|
+
action="store_true",
|
|
260
|
+
help="Show total cost of the agent runs"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
217
264
|
|
|
218
265
|
args = parser.parse_args()
|
|
219
266
|
|
|
@@ -294,11 +341,14 @@ Examples:
|
|
|
294
341
|
"raise_error": True,
|
|
295
342
|
"reset_after_each_run": False
|
|
296
343
|
}
|
|
344
|
+
|
|
345
|
+
if args.cache:
|
|
346
|
+
agent_kwargs["use_prompt_caching"] = True
|
|
297
347
|
|
|
298
348
|
agent = ComputerAgent(**agent_kwargs)
|
|
299
349
|
|
|
300
350
|
# Start chat loop
|
|
301
|
-
await chat_loop(agent, args.model, container_name, args.prompt)
|
|
351
|
+
await chat_loop(agent, args.model, container_name, args.prompt, args.usage)
|
|
302
352
|
|
|
303
353
|
|
|
304
354
|
|
|
@@ -606,35 +606,33 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
606
606
|
# Basic actions (all versions)
|
|
607
607
|
if action_type == "screenshot":
|
|
608
608
|
responses_items.append(make_screenshot_item(call_id=call_id))
|
|
609
|
-
elif action_type
|
|
609
|
+
elif action_type in ["click", "left_click"]:
|
|
610
610
|
coordinate = tool_input.get("coordinate", [0, 0])
|
|
611
611
|
responses_items.append(make_click_item(
|
|
612
612
|
x=coordinate[0] if len(coordinate) > 0 else 0,
|
|
613
613
|
y=coordinate[1] if len(coordinate) > 1 else 0,
|
|
614
614
|
call_id=call_id
|
|
615
615
|
))
|
|
616
|
-
elif action_type
|
|
616
|
+
elif action_type in ["type", "type_text"]:
|
|
617
617
|
responses_items.append(make_type_item(
|
|
618
618
|
text=tool_input.get("text", ""),
|
|
619
619
|
call_id=call_id
|
|
620
620
|
))
|
|
621
|
-
elif action_type
|
|
621
|
+
elif action_type in ["key", "keypress", "hotkey"]:
|
|
622
622
|
responses_items.append(make_keypress_item(
|
|
623
623
|
keys=tool_input.get("text", "").replace("+", "-").split("-"),
|
|
624
624
|
call_id=call_id
|
|
625
625
|
))
|
|
626
|
-
elif action_type
|
|
626
|
+
elif action_type in ["mouse_move", "move_cursor", "move"]:
|
|
627
627
|
# Mouse move - create a custom action item
|
|
628
628
|
coordinate = tool_input.get("coordinate", [0, 0])
|
|
629
|
-
responses_items.append(
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
}
|
|
637
|
-
})
|
|
629
|
+
responses_items.append(
|
|
630
|
+
make_move_item(
|
|
631
|
+
x=coordinate[0] if len(coordinate) > 0 else 0,
|
|
632
|
+
y=coordinate[1] if len(coordinate) > 1 else 0,
|
|
633
|
+
call_id=call_id
|
|
634
|
+
)
|
|
635
|
+
)
|
|
638
636
|
|
|
639
637
|
# Enhanced actions (computer_20250124) Available in Claude 4 and Claude Sonnet 3.7
|
|
640
638
|
elif action_type == "scroll":
|
|
@@ -651,7 +649,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
651
649
|
scroll_y=scroll_y,
|
|
652
650
|
call_id=call_id
|
|
653
651
|
))
|
|
654
|
-
elif action_type
|
|
652
|
+
elif action_type in ["left_click_drag", "drag"]:
|
|
655
653
|
start_coord = tool_input.get("start_coordinate", [0, 0])
|
|
656
654
|
end_coord = tool_input.get("end_coordinate", [0, 0])
|
|
657
655
|
responses_items.append(make_drag_item(
|
|
@@ -809,7 +807,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
809
807
|
y=coordinate[1] if len(coordinate) > 1 else 0,
|
|
810
808
|
call_id=call_id
|
|
811
809
|
))
|
|
812
|
-
elif action_type
|
|
810
|
+
elif action_type in ["type", "type_text"]:
|
|
813
811
|
# Input:
|
|
814
812
|
# {
|
|
815
813
|
# "function": {
|
|
@@ -836,7 +834,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
836
834
|
text=args.get("text", ""),
|
|
837
835
|
call_id=call_id
|
|
838
836
|
))
|
|
839
|
-
elif action_type
|
|
837
|
+
elif action_type in ["key", "keypress", "hotkey"]:
|
|
840
838
|
# Input:
|
|
841
839
|
# {
|
|
842
840
|
# "function": {
|
|
@@ -863,7 +861,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
863
861
|
keys=args.get("text", "").replace("+", "-").split("-"),
|
|
864
862
|
call_id=call_id
|
|
865
863
|
))
|
|
866
|
-
elif action_type
|
|
864
|
+
elif action_type in ["mouse_move", "move_cursor", "move"]:
|
|
867
865
|
# Input:
|
|
868
866
|
# {
|
|
869
867
|
# "function": {
|
|
@@ -937,7 +935,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
937
935
|
scroll_y=scroll_y,
|
|
938
936
|
call_id=call_id
|
|
939
937
|
))
|
|
940
|
-
elif action_type
|
|
938
|
+
elif action_type in ["left_click_drag", "drag"]:
|
|
941
939
|
# Input:
|
|
942
940
|
# {
|
|
943
941
|
# "function": {
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|