cua-agent 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/callbacks/telemetry.py +1 -1
- agent/cli.py +27 -15
- agent/loops/anthropic.py +659 -18
- {cua_agent-0.4.1.dist-info → cua_agent-0.4.2.dist-info}/METADATA +2 -2
- {cua_agent-0.4.1.dist-info → cua_agent-0.4.2.dist-info}/RECORD +7 -7
- {cua_agent-0.4.1.dist-info → cua_agent-0.4.2.dist-info}/WHEEL +0 -0
- {cua_agent-0.4.1.dist-info → cua_agent-0.4.2.dist-info}/entry_points.txt +0 -0
agent/callbacks/telemetry.py
CHANGED
|
@@ -60,7 +60,7 @@ class TelemetryCallback(AsyncCallbackHandler):
|
|
|
60
60
|
"""Record agent type/model and session initialization."""
|
|
61
61
|
agent_info = {
|
|
62
62
|
"session_id": self.session_id,
|
|
63
|
-
"agent_type": self.agent.agent_loop.__name__,
|
|
63
|
+
"agent_type": self.agent.agent_loop.__name__ if hasattr(self.agent, 'agent_loop') else 'unknown',
|
|
64
64
|
"model": getattr(self.agent, 'model', 'unknown'),
|
|
65
65
|
**SYSTEM_INFO
|
|
66
66
|
}
|
agent/cli.py
CHANGED
|
@@ -92,26 +92,30 @@ def print_welcome(model: str, agent_loop: str, container_name: str):
|
|
|
92
92
|
async def ainput(prompt: str = ""):
|
|
93
93
|
return await asyncio.to_thread(input, prompt)
|
|
94
94
|
|
|
95
|
-
async def chat_loop(agent, model: str, container_name: str):
|
|
95
|
+
async def chat_loop(agent, model: str, container_name: str, initial_prompt: str = ""):
|
|
96
96
|
"""Main chat loop with the agent."""
|
|
97
97
|
print_welcome(model, agent.agent_loop.__name__, container_name)
|
|
98
98
|
|
|
99
99
|
history = []
|
|
100
100
|
|
|
101
|
+
if initial_prompt:
|
|
102
|
+
history.append({"role": "user", "content": initial_prompt})
|
|
103
|
+
|
|
101
104
|
while True:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
if user_input.lower() in ['exit', 'quit', 'q']:
|
|
107
|
-
print_colored("\n👋 Goodbye!")
|
|
108
|
-
break
|
|
105
|
+
if history[-1].get("role") != "user":
|
|
106
|
+
# Get user input with prompt
|
|
107
|
+
print_colored("> ", end="")
|
|
108
|
+
user_input = await ainput()
|
|
109
109
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
110
|
+
if user_input.lower() in ['exit', 'quit', 'q']:
|
|
111
|
+
print_colored("\n👋 Goodbye!")
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
if not user_input:
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
# Add user message to history
|
|
118
|
+
history.append({"role": "user", "content": user_input})
|
|
115
119
|
|
|
116
120
|
# Stream responses from the agent with spinner
|
|
117
121
|
with yaspin(text="Thinking...", spinner="line", attrs=["dark"]) as spinner:
|
|
@@ -204,6 +208,12 @@ Examples:
|
|
|
204
208
|
action="store_true",
|
|
205
209
|
help="Enable verbose logging"
|
|
206
210
|
)
|
|
211
|
+
|
|
212
|
+
parser.add_argument(
|
|
213
|
+
"-p", "--prompt",
|
|
214
|
+
type=str,
|
|
215
|
+
help="Initial prompt to send to the agent. Leave blank for interactive mode."
|
|
216
|
+
)
|
|
207
217
|
|
|
208
218
|
args = parser.parse_args()
|
|
209
219
|
|
|
@@ -269,9 +279,11 @@ Examples:
|
|
|
269
279
|
agent_kwargs = {
|
|
270
280
|
"model": args.model,
|
|
271
281
|
"tools": [computer],
|
|
272
|
-
"only_n_most_recent_images": args.images,
|
|
273
282
|
"verbosity": 20 if args.verbose else 30, # DEBUG vs WARNING
|
|
274
283
|
}
|
|
284
|
+
|
|
285
|
+
if args.images > 0:
|
|
286
|
+
agent_kwargs["only_n_most_recent_images"] = args.images
|
|
275
287
|
|
|
276
288
|
if args.trajectory:
|
|
277
289
|
agent_kwargs["trajectory_dir"] = "trajectories"
|
|
@@ -286,7 +298,7 @@ Examples:
|
|
|
286
298
|
agent = ComputerAgent(**agent_kwargs)
|
|
287
299
|
|
|
288
300
|
# Start chat loop
|
|
289
|
-
await chat_loop(agent, args.model, container_name)
|
|
301
|
+
await chat_loop(agent, args.model, container_name, args.prompt)
|
|
290
302
|
|
|
291
303
|
|
|
292
304
|
|
agent/loops/anthropic.py
CHANGED
|
@@ -193,17 +193,98 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[
|
|
|
193
193
|
|
|
194
194
|
tool_use_content = []
|
|
195
195
|
|
|
196
|
+
# Basic actions (all versions)
|
|
196
197
|
if action_type == "click":
|
|
198
|
+
# Input:
|
|
199
|
+
# {
|
|
200
|
+
# "type": "computer_call",
|
|
201
|
+
# "call_id": "call_1",
|
|
202
|
+
# "action": {
|
|
203
|
+
# "type": "click",
|
|
204
|
+
# "x": 100,
|
|
205
|
+
# "y": 200
|
|
206
|
+
# }
|
|
207
|
+
# }
|
|
208
|
+
|
|
209
|
+
# Output:
|
|
210
|
+
# {
|
|
211
|
+
# "function": {
|
|
212
|
+
# "name": "computer",
|
|
213
|
+
# "arguments": json.dumps({
|
|
214
|
+
# "action": "click",
|
|
215
|
+
# "coordinate": [100, 200]
|
|
216
|
+
# })
|
|
217
|
+
# },
|
|
218
|
+
# "id": "call_1",
|
|
219
|
+
# "type": "function"
|
|
220
|
+
# }
|
|
221
|
+
button = action.get("button", "left")
|
|
222
|
+
action_name = "right_click" if button == "right" else "middle_click" if button == "wheel" else "left_click"
|
|
197
223
|
tool_use_content.append({
|
|
198
224
|
"type": "tool_use",
|
|
199
225
|
"id": call_id,
|
|
200
226
|
"name": "computer",
|
|
201
227
|
"input": {
|
|
202
|
-
"action":
|
|
228
|
+
"action": action_name,
|
|
229
|
+
"coordinate": [action.get("x", 0), action.get("y", 0)]
|
|
230
|
+
}
|
|
231
|
+
})
|
|
232
|
+
elif action_type == "double_click":
|
|
233
|
+
# Input:
|
|
234
|
+
# {
|
|
235
|
+
# "type": "computer_call",
|
|
236
|
+
# "call_id": "call_1",
|
|
237
|
+
# "action": {
|
|
238
|
+
# "type": "double_click",
|
|
239
|
+
# "x": 160,
|
|
240
|
+
# "y": 240
|
|
241
|
+
# }
|
|
242
|
+
# }
|
|
243
|
+
|
|
244
|
+
# Output:
|
|
245
|
+
# {
|
|
246
|
+
# "function": {
|
|
247
|
+
# "name": "computer",
|
|
248
|
+
# "arguments": json.dumps({
|
|
249
|
+
# "action": "double_click",
|
|
250
|
+
# "coordinate": [160, 240]
|
|
251
|
+
# })
|
|
252
|
+
# },
|
|
253
|
+
# "id": "call_1",
|
|
254
|
+
# "type": "function"
|
|
255
|
+
# }
|
|
256
|
+
tool_use_content.append({
|
|
257
|
+
"type": "tool_use",
|
|
258
|
+
"id": call_id,
|
|
259
|
+
"name": "computer",
|
|
260
|
+
"input": {
|
|
261
|
+
"action": "double_click",
|
|
203
262
|
"coordinate": [action.get("x", 0), action.get("y", 0)]
|
|
204
263
|
}
|
|
205
264
|
})
|
|
206
265
|
elif action_type == "type":
|
|
266
|
+
# Input:
|
|
267
|
+
# {
|
|
268
|
+
# "type": "computer_call",
|
|
269
|
+
# "call_id": "call_1",
|
|
270
|
+
# "action": {
|
|
271
|
+
# "type": "type",
|
|
272
|
+
# "text": "Hello World"
|
|
273
|
+
# }
|
|
274
|
+
# }
|
|
275
|
+
|
|
276
|
+
# Output:
|
|
277
|
+
# {
|
|
278
|
+
# "function": {
|
|
279
|
+
# "name": "computer",
|
|
280
|
+
# "arguments": json.dumps({
|
|
281
|
+
# "action": "type",
|
|
282
|
+
# "text": "Hello World"
|
|
283
|
+
# })
|
|
284
|
+
# },
|
|
285
|
+
# "id": "call_1",
|
|
286
|
+
# "type": "function"
|
|
287
|
+
# }
|
|
207
288
|
tool_use_content.append({
|
|
208
289
|
"type": "tool_use",
|
|
209
290
|
"id": call_id,
|
|
@@ -213,26 +294,223 @@ def _convert_responses_items_to_completion_messages(messages: Messages) -> List[
|
|
|
213
294
|
"text": action.get("text", "")
|
|
214
295
|
}
|
|
215
296
|
})
|
|
216
|
-
elif action_type == "
|
|
297
|
+
elif action_type == "keypress":
|
|
298
|
+
# Input:
|
|
299
|
+
# {
|
|
300
|
+
# "type": "computer_call",
|
|
301
|
+
# "call_id": "call_1",
|
|
302
|
+
# "action": {
|
|
303
|
+
# "type": "keypress",
|
|
304
|
+
# "keys": ["ctrl", "c"]
|
|
305
|
+
# }
|
|
306
|
+
# }
|
|
307
|
+
|
|
308
|
+
# Output:
|
|
309
|
+
# {
|
|
310
|
+
# "function": {
|
|
311
|
+
# "name": "computer",
|
|
312
|
+
# "arguments": json.dumps({
|
|
313
|
+
# "action": "key",
|
|
314
|
+
# "text": "ctrl+c"
|
|
315
|
+
# })
|
|
316
|
+
# },
|
|
317
|
+
# "id": "call_1",
|
|
318
|
+
# "type": "function"
|
|
319
|
+
# }
|
|
217
320
|
tool_use_content.append({
|
|
218
321
|
"type": "tool_use",
|
|
219
322
|
"id": call_id,
|
|
220
323
|
"name": "computer",
|
|
221
324
|
"input": {
|
|
222
325
|
"action": "key",
|
|
223
|
-
"
|
|
326
|
+
"text": "+".join(action.get("keys", []))
|
|
327
|
+
}
|
|
328
|
+
})
|
|
329
|
+
elif action_type == "mouse_move":
|
|
330
|
+
# Input:
|
|
331
|
+
# {
|
|
332
|
+
# "type": "computer_call",
|
|
333
|
+
# "call_id": "call_1",
|
|
334
|
+
# "action": {
|
|
335
|
+
# "type": "mouse_move",
|
|
336
|
+
# "x": 150,
|
|
337
|
+
# "y": 250
|
|
338
|
+
# }
|
|
339
|
+
# }
|
|
340
|
+
|
|
341
|
+
# Output:
|
|
342
|
+
# {
|
|
343
|
+
# "function": {
|
|
344
|
+
# "name": "computer",
|
|
345
|
+
# "arguments": json.dumps({
|
|
346
|
+
# "action": "mouse_move",
|
|
347
|
+
# "coordinate": [150, 250]
|
|
348
|
+
# })
|
|
349
|
+
# },
|
|
350
|
+
# "id": "call_1",
|
|
351
|
+
# "type": "function"
|
|
352
|
+
# }
|
|
353
|
+
tool_use_content.append({
|
|
354
|
+
"type": "tool_use",
|
|
355
|
+
"id": call_id,
|
|
356
|
+
"name": "computer",
|
|
357
|
+
"input": {
|
|
358
|
+
"action": "mouse_move",
|
|
359
|
+
"coordinate": [action.get("x", 0), action.get("y", 0)]
|
|
360
|
+
}
|
|
361
|
+
})
|
|
362
|
+
elif action_type == "scroll":
|
|
363
|
+
# Input:
|
|
364
|
+
# {
|
|
365
|
+
# "type": "computer_call",
|
|
366
|
+
# "call_id": "call_1",
|
|
367
|
+
# "action": {
|
|
368
|
+
# "type": "scroll",
|
|
369
|
+
# "x": 300,
|
|
370
|
+
# "y": 400,
|
|
371
|
+
# "scroll_x": 0,
|
|
372
|
+
# "scroll_y": -5
|
|
373
|
+
# }
|
|
374
|
+
# }
|
|
375
|
+
|
|
376
|
+
# Output:
|
|
377
|
+
# {
|
|
378
|
+
# "function": {
|
|
379
|
+
# "name": "computer",
|
|
380
|
+
# "arguments": json.dumps({
|
|
381
|
+
# "action": "scroll",
|
|
382
|
+
# "coordinate": [300, 400],
|
|
383
|
+
# "scroll_direction": "down",
|
|
384
|
+
# "scroll_amount": 5
|
|
385
|
+
# })
|
|
386
|
+
# },
|
|
387
|
+
# "id": "call_1",
|
|
388
|
+
# "type": "function"
|
|
389
|
+
# }
|
|
390
|
+
scroll_x = action.get("scroll_x", 0)
|
|
391
|
+
scroll_y = action.get("scroll_y", 0)
|
|
392
|
+
# Determine direction and amount from scroll values
|
|
393
|
+
if scroll_x > 0:
|
|
394
|
+
direction = "left"
|
|
395
|
+
amount = scroll_x
|
|
396
|
+
elif scroll_x < 0:
|
|
397
|
+
direction = "right"
|
|
398
|
+
amount = -scroll_x
|
|
399
|
+
elif scroll_y > 0:
|
|
400
|
+
direction = "up"
|
|
401
|
+
amount = scroll_y
|
|
402
|
+
elif scroll_y < 0:
|
|
403
|
+
direction = "down"
|
|
404
|
+
amount = -scroll_y
|
|
405
|
+
else:
|
|
406
|
+
direction = "down"
|
|
407
|
+
amount = 3
|
|
408
|
+
|
|
409
|
+
tool_use_content.append({
|
|
410
|
+
"type": "tool_use",
|
|
411
|
+
"id": call_id,
|
|
412
|
+
"name": "computer",
|
|
413
|
+
"input": {
|
|
414
|
+
"action": "scroll",
|
|
415
|
+
"coordinate": [action.get("x", 0), action.get("y", 0)],
|
|
416
|
+
"scroll_direction": direction,
|
|
417
|
+
"scroll_amount": amount
|
|
418
|
+
}
|
|
419
|
+
})
|
|
420
|
+
elif action_type == "drag":
|
|
421
|
+
# Input:
|
|
422
|
+
# {
|
|
423
|
+
# "type": "computer_call",
|
|
424
|
+
# "call_id": "call_1",
|
|
425
|
+
# "action": {
|
|
426
|
+
# "type": "drag",
|
|
427
|
+
# "path": [
|
|
428
|
+
# {"x": 100, "y": 150},
|
|
429
|
+
# {"x": 200, "y": 250}
|
|
430
|
+
# ]
|
|
431
|
+
# }
|
|
432
|
+
# }
|
|
433
|
+
|
|
434
|
+
# Output:
|
|
435
|
+
# {
|
|
436
|
+
# "function": {
|
|
437
|
+
# "name": "computer",
|
|
438
|
+
# "arguments": json.dumps({
|
|
439
|
+
# "action": "left_click_drag",
|
|
440
|
+
# "start_coordinate": [100, 150],
|
|
441
|
+
# "end_coordinate": [200, 250]
|
|
442
|
+
# })
|
|
443
|
+
# },
|
|
444
|
+
# "id": "call_1",
|
|
445
|
+
# "type": "function"
|
|
446
|
+
# }
|
|
447
|
+
path = action.get("path", [])
|
|
448
|
+
start_coord = [0, 0]
|
|
449
|
+
end_coord = [0, 0]
|
|
450
|
+
if isinstance(path, list) and len(path) >= 2:
|
|
451
|
+
start_coord = [path[0].get("x", 0), path[0].get("y", 0)]
|
|
452
|
+
end_coord = [path[-1].get("x", 0), path[-1].get("y", 0)]
|
|
453
|
+
|
|
454
|
+
tool_use_content.append({
|
|
455
|
+
"type": "tool_use",
|
|
456
|
+
"id": call_id,
|
|
457
|
+
"name": "computer",
|
|
458
|
+
"input": {
|
|
459
|
+
"action": "left_click_drag",
|
|
460
|
+
"start_coordinate": start_coord,
|
|
461
|
+
"end_coordinate": end_coord
|
|
224
462
|
}
|
|
225
463
|
})
|
|
226
464
|
elif action_type == "wait":
|
|
465
|
+
# Input:
|
|
466
|
+
# {
|
|
467
|
+
# "type": "computer_call",
|
|
468
|
+
# "call_id": "call_1",
|
|
469
|
+
# "action": {
|
|
470
|
+
# "type": "wait"
|
|
471
|
+
# }
|
|
472
|
+
# }
|
|
473
|
+
|
|
474
|
+
# Output:
|
|
475
|
+
# {
|
|
476
|
+
# "function": {
|
|
477
|
+
# "name": "computer",
|
|
478
|
+
# "arguments": json.dumps({
|
|
479
|
+
# "action": "wait"
|
|
480
|
+
# })
|
|
481
|
+
# },
|
|
482
|
+
# "id": "call_1",
|
|
483
|
+
# "type": "function"
|
|
484
|
+
# }
|
|
227
485
|
tool_use_content.append({
|
|
228
486
|
"type": "tool_use",
|
|
229
487
|
"id": call_id,
|
|
230
488
|
"name": "computer",
|
|
231
489
|
"input": {
|
|
232
|
-
"action": "
|
|
490
|
+
"action": "wait"
|
|
233
491
|
}
|
|
234
492
|
})
|
|
235
493
|
elif action_type == "screenshot":
|
|
494
|
+
# Input:
|
|
495
|
+
# {
|
|
496
|
+
# "type": "computer_call",
|
|
497
|
+
# "call_id": "call_1",
|
|
498
|
+
# "action": {
|
|
499
|
+
# "type": "screenshot"
|
|
500
|
+
# }
|
|
501
|
+
# }
|
|
502
|
+
|
|
503
|
+
# Output:
|
|
504
|
+
# {
|
|
505
|
+
# "function": {
|
|
506
|
+
# "name": "computer",
|
|
507
|
+
# "arguments": json.dumps({
|
|
508
|
+
# "action": "screenshot"
|
|
509
|
+
# })
|
|
510
|
+
# },
|
|
511
|
+
# "id": "call_1",
|
|
512
|
+
# "type": "function"
|
|
513
|
+
# }
|
|
236
514
|
tool_use_content.append({
|
|
237
515
|
"type": "tool_use",
|
|
238
516
|
"id": call_id,
|
|
@@ -342,7 +620,7 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
342
620
|
))
|
|
343
621
|
elif action_type == "key":
|
|
344
622
|
responses_items.append(make_keypress_item(
|
|
345
|
-
|
|
623
|
+
keys=tool_input.get("text", "").replace("+", "-").split("-"),
|
|
346
624
|
call_id=call_id
|
|
347
625
|
))
|
|
348
626
|
elif action_type == "mouse_move":
|
|
@@ -361,21 +639,32 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
361
639
|
# Enhanced actions (computer_20250124) Available in Claude 4 and Claude Sonnet 3.7
|
|
362
640
|
elif action_type == "scroll":
|
|
363
641
|
coordinate = tool_input.get("coordinate", [0, 0])
|
|
642
|
+
scroll_amount = tool_input.get("scroll_amount", 3)
|
|
643
|
+
scroll_x = scroll_amount if tool_input.get("scroll_direction", "down") == "right" else \
|
|
644
|
+
-scroll_amount if tool_input.get("scroll_direction", "down") == "left" else 0
|
|
645
|
+
scroll_y = scroll_amount if tool_input.get("scroll_direction", "down") == "down" else \
|
|
646
|
+
-scroll_amount if tool_input.get("scroll_direction", "down") == "up" else 0
|
|
364
647
|
responses_items.append(make_scroll_item(
|
|
365
648
|
x=coordinate[0] if len(coordinate) > 0 else 0,
|
|
366
649
|
y=coordinate[1] if len(coordinate) > 1 else 0,
|
|
367
|
-
|
|
368
|
-
|
|
650
|
+
scroll_x=scroll_x,
|
|
651
|
+
scroll_y=scroll_y,
|
|
369
652
|
call_id=call_id
|
|
370
653
|
))
|
|
371
654
|
elif action_type == "left_click_drag":
|
|
372
655
|
start_coord = tool_input.get("start_coordinate", [0, 0])
|
|
373
656
|
end_coord = tool_input.get("end_coordinate", [0, 0])
|
|
374
657
|
responses_items.append(make_drag_item(
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
658
|
+
path=[
|
|
659
|
+
{
|
|
660
|
+
"x": start_coord[0] if len(start_coord) > 0 else 0,
|
|
661
|
+
"y": start_coord[1] if len(start_coord) > 1 else 0
|
|
662
|
+
},
|
|
663
|
+
{
|
|
664
|
+
"x": end_coord[0] if len(end_coord) > 0 else 0,
|
|
665
|
+
"y": end_coord[1] if len(end_coord) > 1 else 0
|
|
666
|
+
}
|
|
667
|
+
],
|
|
379
668
|
call_id=call_id
|
|
380
669
|
))
|
|
381
670
|
elif action_type == "right_click":
|
|
@@ -459,7 +748,6 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
459
748
|
# Handle tool calls (alternative format)
|
|
460
749
|
if hasattr(message, 'tool_calls') and message.tool_calls:
|
|
461
750
|
for tool_call in message.tool_calls:
|
|
462
|
-
print(tool_call)
|
|
463
751
|
if tool_call.function.name == "computer":
|
|
464
752
|
try:
|
|
465
753
|
args = json.loads(tool_call.function.arguments)
|
|
@@ -468,10 +756,53 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
468
756
|
|
|
469
757
|
# Basic actions (all versions)
|
|
470
758
|
if action_type == "screenshot":
|
|
759
|
+
# Input:
|
|
760
|
+
# {
|
|
761
|
+
# "function": {
|
|
762
|
+
# "name": "computer",
|
|
763
|
+
# "arguments": json.dumps({
|
|
764
|
+
# "action": "screenshot"
|
|
765
|
+
# })
|
|
766
|
+
# },
|
|
767
|
+
# "id": "call_1",
|
|
768
|
+
# "type": "function"
|
|
769
|
+
# }
|
|
770
|
+
|
|
771
|
+
# Output:
|
|
772
|
+
# {
|
|
773
|
+
# "type": "computer_call",
|
|
774
|
+
# "call_id": "call_1",
|
|
775
|
+
# "action": {
|
|
776
|
+
# "type": "screenshot"
|
|
777
|
+
# }
|
|
778
|
+
# }
|
|
471
779
|
responses_items.append(make_screenshot_item(
|
|
472
780
|
call_id=call_id
|
|
473
781
|
))
|
|
474
782
|
elif action_type in ["click", "left_click"]:
|
|
783
|
+
# Input:
|
|
784
|
+
# {
|
|
785
|
+
# "function": {
|
|
786
|
+
# "name": "computer",
|
|
787
|
+
# "arguments": json.dumps({
|
|
788
|
+
# "action": "click",
|
|
789
|
+
# "coordinate": [100, 200]
|
|
790
|
+
# })
|
|
791
|
+
# },
|
|
792
|
+
# "id": "call_1",
|
|
793
|
+
# "type": "function"
|
|
794
|
+
# }
|
|
795
|
+
|
|
796
|
+
# Output:
|
|
797
|
+
# {
|
|
798
|
+
# "type": "computer_call",
|
|
799
|
+
# "call_id": "call_1",
|
|
800
|
+
# "action": {
|
|
801
|
+
# "type": "click",
|
|
802
|
+
# "x": 100,
|
|
803
|
+
# "y": 200
|
|
804
|
+
# }
|
|
805
|
+
# }
|
|
475
806
|
coordinate = args.get("coordinate", [0, 0])
|
|
476
807
|
responses_items.append(make_click_item(
|
|
477
808
|
x=coordinate[0] if len(coordinate) > 0 else 0,
|
|
@@ -479,16 +810,83 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
479
810
|
call_id=call_id
|
|
480
811
|
))
|
|
481
812
|
elif action_type == "type":
|
|
813
|
+
# Input:
|
|
814
|
+
# {
|
|
815
|
+
# "function": {
|
|
816
|
+
# "name": "computer",
|
|
817
|
+
# "arguments": json.dumps({
|
|
818
|
+
# "action": "type",
|
|
819
|
+
# "text": "Hello World"
|
|
820
|
+
# })
|
|
821
|
+
# },
|
|
822
|
+
# "id": "call_1",
|
|
823
|
+
# "type": "function"
|
|
824
|
+
# }
|
|
825
|
+
|
|
826
|
+
# Output:
|
|
827
|
+
# {
|
|
828
|
+
# "type": "computer_call",
|
|
829
|
+
# "call_id": "call_1",
|
|
830
|
+
# "action": {
|
|
831
|
+
# "type": "type",
|
|
832
|
+
# "text": "Hello World"
|
|
833
|
+
# }
|
|
834
|
+
# }
|
|
482
835
|
responses_items.append(make_type_item(
|
|
483
836
|
text=args.get("text", ""),
|
|
484
837
|
call_id=call_id
|
|
485
838
|
))
|
|
486
839
|
elif action_type == "key":
|
|
840
|
+
# Input:
|
|
841
|
+
# {
|
|
842
|
+
# "function": {
|
|
843
|
+
# "name": "computer",
|
|
844
|
+
# "arguments": json.dumps({
|
|
845
|
+
# "action": "key",
|
|
846
|
+
# "text": "ctrl+c"
|
|
847
|
+
# })
|
|
848
|
+
# },
|
|
849
|
+
# "id": "call_1",
|
|
850
|
+
# "type": "function"
|
|
851
|
+
# }
|
|
852
|
+
|
|
853
|
+
# Output:
|
|
854
|
+
# {
|
|
855
|
+
# "type": "computer_call",
|
|
856
|
+
# "call_id": "call_1",
|
|
857
|
+
# "action": {
|
|
858
|
+
# "type": "keypress",
|
|
859
|
+
# "keys": ["ctrl", "c"]
|
|
860
|
+
# }
|
|
861
|
+
# }
|
|
487
862
|
responses_items.append(make_keypress_item(
|
|
488
|
-
|
|
863
|
+
keys=args.get("text", "").replace("+", "-").split("-"),
|
|
489
864
|
call_id=call_id
|
|
490
865
|
))
|
|
491
866
|
elif action_type == "mouse_move":
|
|
867
|
+
# Input:
|
|
868
|
+
# {
|
|
869
|
+
# "function": {
|
|
870
|
+
# "name": "computer",
|
|
871
|
+
# "arguments": json.dumps({
|
|
872
|
+
# "action": "mouse_move",
|
|
873
|
+
# "coordinate": [150, 250]
|
|
874
|
+
# })
|
|
875
|
+
# },
|
|
876
|
+
# "id": "call_1",
|
|
877
|
+
# "type": "function"
|
|
878
|
+
# }
|
|
879
|
+
|
|
880
|
+
# Output:
|
|
881
|
+
# {
|
|
882
|
+
# "type": "computer_call",
|
|
883
|
+
# "call_id": "call_1",
|
|
884
|
+
# "action": {
|
|
885
|
+
# "type": "mouse_move",
|
|
886
|
+
# "x": 150,
|
|
887
|
+
# "y": 250
|
|
888
|
+
# }
|
|
889
|
+
# }
|
|
492
890
|
coordinate = args.get("coordinate", [0, 0])
|
|
493
891
|
responses_items.append(make_move_item(
|
|
494
892
|
x=coordinate[0] if len(coordinate) > 0 else 0,
|
|
@@ -498,6 +896,33 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
498
896
|
|
|
499
897
|
# Enhanced actions (computer_20250124) Available in Claude 4 and Claude Sonnet 3.7
|
|
500
898
|
elif action_type == "scroll":
|
|
899
|
+
# Input:
|
|
900
|
+
# {
|
|
901
|
+
# "function": {
|
|
902
|
+
# "name": "computer",
|
|
903
|
+
# "arguments": json.dumps({
|
|
904
|
+
# "action": "scroll",
|
|
905
|
+
# "coordinate": [300, 400],
|
|
906
|
+
# "scroll_direction": "down",
|
|
907
|
+
# "scroll_amount": 5
|
|
908
|
+
# })
|
|
909
|
+
# },
|
|
910
|
+
# "id": "call_1",
|
|
911
|
+
# "type": "function"
|
|
912
|
+
# }
|
|
913
|
+
|
|
914
|
+
# Output:
|
|
915
|
+
# {
|
|
916
|
+
# "type": "computer_call",
|
|
917
|
+
# "call_id": "call_1",
|
|
918
|
+
# "action": {
|
|
919
|
+
# "type": "scroll",
|
|
920
|
+
# "x": 300,
|
|
921
|
+
# "y": 400,
|
|
922
|
+
# "scroll_x": 0,
|
|
923
|
+
# "scroll_y": -5
|
|
924
|
+
# }
|
|
925
|
+
# }
|
|
501
926
|
coordinate = args.get("coordinate", [0, 0])
|
|
502
927
|
direction = args.get("scroll_direction", "down")
|
|
503
928
|
amount = args.get("scroll_amount", 3)
|
|
@@ -513,16 +938,72 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
513
938
|
call_id=call_id
|
|
514
939
|
))
|
|
515
940
|
elif action_type == "left_click_drag":
|
|
941
|
+
# Input:
|
|
942
|
+
# {
|
|
943
|
+
# "function": {
|
|
944
|
+
# "name": "computer",
|
|
945
|
+
# "arguments": json.dumps({
|
|
946
|
+
# "action": "left_click_drag",
|
|
947
|
+
# "start_coordinate": [100, 150],
|
|
948
|
+
# "end_coordinate": [200, 250]
|
|
949
|
+
# })
|
|
950
|
+
# },
|
|
951
|
+
# "id": "call_1",
|
|
952
|
+
# "type": "function"
|
|
953
|
+
# }
|
|
954
|
+
|
|
955
|
+
# Output:
|
|
956
|
+
# {
|
|
957
|
+
# "type": "computer_call",
|
|
958
|
+
# "call_id": "call_1",
|
|
959
|
+
# "action": {
|
|
960
|
+
# "type": "drag",
|
|
961
|
+
# "path": [
|
|
962
|
+
# {"x": 100, "y": 150},
|
|
963
|
+
# {"x": 200, "y": 250}
|
|
964
|
+
# ]
|
|
965
|
+
# }
|
|
966
|
+
# }
|
|
516
967
|
start_coord = args.get("start_coordinate", [0, 0])
|
|
517
968
|
end_coord = args.get("end_coordinate", [0, 0])
|
|
518
969
|
responses_items.append(make_drag_item(
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
970
|
+
path=[
|
|
971
|
+
{
|
|
972
|
+
"x": start_coord[0] if len(start_coord) > 0 else 0,
|
|
973
|
+
"y": start_coord[1] if len(start_coord) > 1 else 0
|
|
974
|
+
},
|
|
975
|
+
{
|
|
976
|
+
"x": end_coord[0] if len(end_coord) > 0 else 0,
|
|
977
|
+
"y": end_coord[1] if len(end_coord) > 1 else 0
|
|
978
|
+
}
|
|
979
|
+
],
|
|
523
980
|
call_id=call_id
|
|
524
981
|
))
|
|
525
982
|
elif action_type == "right_click":
|
|
983
|
+
# Input:
|
|
984
|
+
# {
|
|
985
|
+
# "function": {
|
|
986
|
+
# "name": "computer",
|
|
987
|
+
# "arguments": json.dumps({
|
|
988
|
+
# "action": "right_click",
|
|
989
|
+
# "coordinate": [120, 180]
|
|
990
|
+
# })
|
|
991
|
+
# },
|
|
992
|
+
# "id": "call_1",
|
|
993
|
+
# "type": "function"
|
|
994
|
+
# }
|
|
995
|
+
|
|
996
|
+
# Output:
|
|
997
|
+
# {
|
|
998
|
+
# "type": "computer_call",
|
|
999
|
+
# "call_id": "call_1",
|
|
1000
|
+
# "action": {
|
|
1001
|
+
# "type": "click",
|
|
1002
|
+
# "x": 120,
|
|
1003
|
+
# "y": 180,
|
|
1004
|
+
# "button": "right"
|
|
1005
|
+
# }
|
|
1006
|
+
# }
|
|
526
1007
|
coordinate = args.get("coordinate", [0, 0])
|
|
527
1008
|
responses_items.append(make_click_item(
|
|
528
1009
|
x=coordinate[0] if len(coordinate) > 0 else 0,
|
|
@@ -531,14 +1012,61 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
531
1012
|
call_id=call_id
|
|
532
1013
|
))
|
|
533
1014
|
elif action_type == "middle_click":
|
|
1015
|
+
# Input:
|
|
1016
|
+
# {
|
|
1017
|
+
# "function": {
|
|
1018
|
+
# "name": "computer",
|
|
1019
|
+
# "arguments": json.dumps({
|
|
1020
|
+
# "action": "middle_click",
|
|
1021
|
+
# "coordinate": [140, 220]
|
|
1022
|
+
# })
|
|
1023
|
+
# },
|
|
1024
|
+
# "id": "call_1",
|
|
1025
|
+
# "type": "function"
|
|
1026
|
+
# }
|
|
1027
|
+
|
|
1028
|
+
# Output:
|
|
1029
|
+
# {
|
|
1030
|
+
# "type": "computer_call",
|
|
1031
|
+
# "call_id": "call_1",
|
|
1032
|
+
# "action": {
|
|
1033
|
+
# "type": "click",
|
|
1034
|
+
# "x": 140,
|
|
1035
|
+
# "y": 220,
|
|
1036
|
+
# "button": "wheel"
|
|
1037
|
+
# }
|
|
1038
|
+
# }
|
|
534
1039
|
coordinate = args.get("coordinate", [0, 0])
|
|
535
1040
|
responses_items.append(make_click_item(
|
|
536
1041
|
x=coordinate[0] if len(coordinate) > 0 else 0,
|
|
537
1042
|
y=coordinate[1] if len(coordinate) > 1 else 0,
|
|
538
|
-
button="
|
|
1043
|
+
button="wheel",
|
|
539
1044
|
call_id=call_id
|
|
540
1045
|
))
|
|
541
1046
|
elif action_type == "double_click":
|
|
1047
|
+
# Input:
|
|
1048
|
+
# {
|
|
1049
|
+
# "function": {
|
|
1050
|
+
# "name": "computer",
|
|
1051
|
+
# "arguments": json.dumps({
|
|
1052
|
+
# "action": "double_click",
|
|
1053
|
+
# "coordinate": [160, 240]
|
|
1054
|
+
# })
|
|
1055
|
+
# },
|
|
1056
|
+
# "id": "call_1",
|
|
1057
|
+
# "type": "function"
|
|
1058
|
+
# }
|
|
1059
|
+
|
|
1060
|
+
# Output:
|
|
1061
|
+
# {
|
|
1062
|
+
# "type": "computer_call",
|
|
1063
|
+
# "call_id": "call_1",
|
|
1064
|
+
# "action": {
|
|
1065
|
+
# "type": "double_click",
|
|
1066
|
+
# "x": 160,
|
|
1067
|
+
# "y": 240
|
|
1068
|
+
# }
|
|
1069
|
+
# }
|
|
542
1070
|
coordinate = args.get("coordinate", [0, 0])
|
|
543
1071
|
responses_items.append(make_double_click_item(
|
|
544
1072
|
x=coordinate[0] if len(coordinate) > 0 else 0,
|
|
@@ -546,14 +1074,127 @@ def _convert_completion_to_responses_items(response: Any) -> List[Dict[str, Any]
|
|
|
546
1074
|
call_id=call_id
|
|
547
1075
|
))
|
|
548
1076
|
elif action_type == "triple_click":
|
|
1077
|
+
# Input:
|
|
1078
|
+
# {
|
|
1079
|
+
# "function": {
|
|
1080
|
+
# "name": "computer",
|
|
1081
|
+
# "arguments": json.dumps({
|
|
1082
|
+
# "action": "triple_click",
|
|
1083
|
+
# "coordinate": [180, 260]
|
|
1084
|
+
# })
|
|
1085
|
+
# },
|
|
1086
|
+
# "id": "call_1",
|
|
1087
|
+
# "type": "function"
|
|
1088
|
+
# }
|
|
1089
|
+
|
|
1090
|
+
# Output:
|
|
1091
|
+
# {
|
|
1092
|
+
# "type": "computer_call",
|
|
1093
|
+
# "call_id": "call_1",
|
|
1094
|
+
# "action": {
|
|
1095
|
+
# "type": "triple_click",
|
|
1096
|
+
# "x": 180,
|
|
1097
|
+
# "y": 260
|
|
1098
|
+
# }
|
|
1099
|
+
# }
|
|
549
1100
|
raise NotImplementedError("triple_click")
|
|
550
1101
|
elif action_type == "left_mouse_down":
|
|
1102
|
+
# Input:
|
|
1103
|
+
# {
|
|
1104
|
+
# "function": {
|
|
1105
|
+
# "name": "computer",
|
|
1106
|
+
# "arguments": json.dumps({
|
|
1107
|
+
# "action": "left_mouse_down",
|
|
1108
|
+
# "coordinate": [200, 280]
|
|
1109
|
+
# })
|
|
1110
|
+
# },
|
|
1111
|
+
# "id": "call_1",
|
|
1112
|
+
# "type": "function"
|
|
1113
|
+
# }
|
|
1114
|
+
|
|
1115
|
+
# Output:
|
|
1116
|
+
# {
|
|
1117
|
+
# "type": "computer_call",
|
|
1118
|
+
# "call_id": "call_1",
|
|
1119
|
+
# "action": {
|
|
1120
|
+
# "type": "mouse_down",
|
|
1121
|
+
# "button": "left",
|
|
1122
|
+
# "x": 200,
|
|
1123
|
+
# "y": 280
|
|
1124
|
+
# }
|
|
1125
|
+
# }
|
|
551
1126
|
raise NotImplementedError("left_mouse_down")
|
|
552
1127
|
elif action_type == "left_mouse_up":
|
|
1128
|
+
# Input:
|
|
1129
|
+
# {
|
|
1130
|
+
# "function": {
|
|
1131
|
+
# "name": "computer",
|
|
1132
|
+
# "arguments": json.dumps({
|
|
1133
|
+
# "action": "left_mouse_up",
|
|
1134
|
+
# "coordinate": [220, 300]
|
|
1135
|
+
# })
|
|
1136
|
+
# },
|
|
1137
|
+
# "id": "call_1",
|
|
1138
|
+
# "type": "function"
|
|
1139
|
+
# }
|
|
1140
|
+
|
|
1141
|
+
# Output:
|
|
1142
|
+
# {
|
|
1143
|
+
# "type": "computer_call",
|
|
1144
|
+
# "call_id": "call_1",
|
|
1145
|
+
# "action": {
|
|
1146
|
+
# "type": "mouse_up",
|
|
1147
|
+
# "button": "left",
|
|
1148
|
+
# "x": 220,
|
|
1149
|
+
# "y": 300
|
|
1150
|
+
# }
|
|
1151
|
+
# }
|
|
553
1152
|
raise NotImplementedError("left_mouse_up")
|
|
554
1153
|
elif action_type == "hold_key":
|
|
1154
|
+
# Input:
|
|
1155
|
+
# {
|
|
1156
|
+
# "function": {
|
|
1157
|
+
# "name": "computer",
|
|
1158
|
+
# "arguments": json.dumps({
|
|
1159
|
+
# "action": "hold_key",
|
|
1160
|
+
# "key": "shift"
|
|
1161
|
+
# })
|
|
1162
|
+
# },
|
|
1163
|
+
# "id": "call_1",
|
|
1164
|
+
# "type": "function"
|
|
1165
|
+
# }
|
|
1166
|
+
|
|
1167
|
+
# Output:
|
|
1168
|
+
# {
|
|
1169
|
+
# "type": "computer_call",
|
|
1170
|
+
# "call_id": "call_1",
|
|
1171
|
+
# "action": {
|
|
1172
|
+
# "type": "key_hold",
|
|
1173
|
+
# "key": "shift"
|
|
1174
|
+
# }
|
|
1175
|
+
# }
|
|
555
1176
|
raise NotImplementedError("hold_key")
|
|
556
1177
|
elif action_type == "wait":
|
|
1178
|
+
# Input:
|
|
1179
|
+
# {
|
|
1180
|
+
# "function": {
|
|
1181
|
+
# "name": "computer",
|
|
1182
|
+
# "arguments": json.dumps({
|
|
1183
|
+
# "action": "wait"
|
|
1184
|
+
# })
|
|
1185
|
+
# },
|
|
1186
|
+
# "id": "call_1",
|
|
1187
|
+
# "type": "function"
|
|
1188
|
+
# }
|
|
1189
|
+
|
|
1190
|
+
# Output:
|
|
1191
|
+
# {
|
|
1192
|
+
# "type": "computer_call",
|
|
1193
|
+
# "call_id": "call_1",
|
|
1194
|
+
# "action": {
|
|
1195
|
+
# "type": "wait"
|
|
1196
|
+
# }
|
|
1197
|
+
# }
|
|
557
1198
|
responses_items.append(make_wait_item(
|
|
558
1199
|
call_id=call_id
|
|
559
1200
|
))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: >=3.11
|
|
@@ -13,7 +13,7 @@ Requires-Dist: pydantic>=2.6.4
|
|
|
13
13
|
Requires-Dist: rich>=13.7.1
|
|
14
14
|
Requires-Dist: python-dotenv>=1.0.1
|
|
15
15
|
Requires-Dist: cua-computer<0.5.0,>=0.3.0
|
|
16
|
-
Requires-Dist: cua-core<0.2.0,>=0.1.
|
|
16
|
+
Requires-Dist: cua-core<0.2.0,>=0.1.8
|
|
17
17
|
Requires-Dist: certifi>=2024.2.2
|
|
18
18
|
Requires-Dist: litellm>=1.74.8
|
|
19
19
|
Provides-Extra: openai
|
|
@@ -9,13 +9,13 @@ agent/callbacks/budget_manager.py,sha256=RyKM-7iXQcDotYvrw3eURzeEHEXvQjID-NobtvQ
|
|
|
9
9
|
agent/callbacks/image_retention.py,sha256=tiuRT5ke9xXTb2eP8Gz-2ITyAMY29LURUH6AbjX3RP8,6165
|
|
10
10
|
agent/callbacks/logging.py,sha256=OOxU97EzrxlnUAtiEnvy9FB7SwCUK90-rdpDFA2Ae4E,10921
|
|
11
11
|
agent/callbacks/pii_anonymization.py,sha256=UKAqNacHG3z92_6uocVzOIl8gJoqyofldCoCmB4UVIE,10268
|
|
12
|
-
agent/callbacks/telemetry.py,sha256=
|
|
12
|
+
agent/callbacks/telemetry.py,sha256=PU7pkK7W1v1xjDN-9gA30lGvn4-WhqK3BPHGW3HpTOc,7497
|
|
13
13
|
agent/callbacks/trajectory_saver.py,sha256=POE8aPT-MBzfW873wr6C7iiVUHtp483KwvLPxC1S3EY,11626
|
|
14
|
-
agent/cli.py,sha256=
|
|
14
|
+
agent/cli.py,sha256=cTH2RT8tLtISseqrtTSau7g3-ILkFnHkR2BDLaNQVVE,10440
|
|
15
15
|
agent/computer_handler.py,sha256=2gfFBeDk9Vd54x9mOqnswMo8BdjUduLo5I0RbBPLovY,3964
|
|
16
16
|
agent/decorators.py,sha256=bCmcCjP31WEjWg1D91OE2jo7AZTfGa9cNgCnYUvjiyw,2832
|
|
17
17
|
agent/loops/__init__.py,sha256=_qpP_--3ePdFkTZP8qmUEFlBsy6m4h8fj0gGLDKA7zw,217
|
|
18
|
-
agent/loops/anthropic.py,sha256=
|
|
18
|
+
agent/loops/anthropic.py,sha256=Za_Qzf4q37CO4QZ0jTnSjHj7RIgaoTLNdrxfPYEysCg,58155
|
|
19
19
|
agent/loops/omniparser.py,sha256=m3bDNQ0Igc_HHVoAbjVNj599uRoC9Eap3DCALg6RZ54,11422
|
|
20
20
|
agent/loops/openai.py,sha256=ArTqadeJY8F9N8ZLKfswlzgHV_54HbWJgLd4l6ele9w,3010
|
|
21
21
|
agent/loops/uitars.py,sha256=L0NYxKoIiMfIHbyomnaiK3ZGLmLv3QMx9nX57GruAk0,26323
|
|
@@ -27,7 +27,7 @@ agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
|
|
|
27
27
|
agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
|
|
28
28
|
agent/ui/gradio/app.py,sha256=X7he4jzyFqWJDP1y_M8yfZvfdy6GHNuclLn4k9iIwAw,8824
|
|
29
29
|
agent/ui/gradio/ui_components.py,sha256=WxFE-4wvdEgj7FPLNXUrs118sXJ9vN3kLkZxtto-weo,34474
|
|
30
|
-
cua_agent-0.4.
|
|
31
|
-
cua_agent-0.4.
|
|
32
|
-
cua_agent-0.4.
|
|
33
|
-
cua_agent-0.4.
|
|
30
|
+
cua_agent-0.4.2.dist-info/METADATA,sha256=mWUkghYqwfpH9ElvfMlzG30mNuDdlnKPr8VmNnZalAg,12060
|
|
31
|
+
cua_agent-0.4.2.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
|
32
|
+
cua_agent-0.4.2.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
33
|
+
cua_agent-0.4.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|