cua-agent 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cua-agent might be problematic. Click here for more details.
- agent/__init__.py +2 -2
- agent/adapters/huggingfacelocal_adapter.py +5 -1
- agent/agent.py +82 -15
- agent/cli.py +9 -3
- agent/computer_handler.py +7 -3
- agent/decorators.py +28 -66
- agent/loops/__init__.py +3 -1
- agent/loops/anthropic.py +200 -84
- agent/loops/base.py +76 -0
- agent/loops/composed_grounded.py +318 -0
- agent/loops/gta1.py +178 -0
- agent/loops/model_types.csv +6 -0
- agent/loops/omniparser.py +178 -84
- agent/loops/openai.py +198 -58
- agent/loops/uitars.py +305 -178
- agent/responses.py +477 -1
- agent/types.py +7 -5
- {cua_agent-0.4.7.dist-info → cua_agent-0.4.9.dist-info}/METADATA +2 -2
- cua_agent-0.4.9.dist-info/RECORD +37 -0
- cua_agent-0.4.7.dist-info/RECORD +0 -33
- {cua_agent-0.4.7.dist-info → cua_agent-0.4.9.dist-info}/WHEEL +0 -0
- {cua_agent-0.4.7.dist-info → cua_agent-0.4.9.dist-info}/entry_points.txt +0 -0
agent/responses.py
CHANGED
|
@@ -40,7 +40,7 @@ def make_input_image_item(image_data: Union[str, bytes]) -> EasyInputMessagePara
|
|
|
40
40
|
ResponseInputImageParam(
|
|
41
41
|
type="input_image",
|
|
42
42
|
image_url=f"data:image/png;base64,{base64.b64encode(image_data).decode('utf-8') if isinstance(image_data, bytes) else image_data}"
|
|
43
|
-
)
|
|
43
|
+
) # type: ignore
|
|
44
44
|
],
|
|
45
45
|
role="user",
|
|
46
46
|
type="message"
|
|
@@ -205,3 +205,479 @@ def make_wait_item(call_id: Optional[str] = None) -> ResponseComputerToolCallPar
|
|
|
205
205
|
status="completed",
|
|
206
206
|
type="computer_call"
|
|
207
207
|
)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# Conversion functions between element descriptions and coordinates
|
|
211
|
+
def convert_computer_calls_desc2xy(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]:
|
|
212
|
+
"""
|
|
213
|
+
Convert computer calls from element descriptions to x,y coordinates.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
responses_items: List of response items containing computer calls with element_description
|
|
217
|
+
desc2xy: Dictionary mapping element descriptions to (x, y) coordinate tuples
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
List of response items with element_description replaced by x,y coordinates
|
|
221
|
+
"""
|
|
222
|
+
converted_items = []
|
|
223
|
+
|
|
224
|
+
for item in responses_items:
|
|
225
|
+
if item.get("type") == "computer_call" and "action" in item:
|
|
226
|
+
action = item["action"].copy()
|
|
227
|
+
|
|
228
|
+
# Handle single element_description
|
|
229
|
+
if "element_description" in action:
|
|
230
|
+
desc = action["element_description"]
|
|
231
|
+
if desc in desc2xy:
|
|
232
|
+
x, y = desc2xy[desc]
|
|
233
|
+
action["x"] = x
|
|
234
|
+
action["y"] = y
|
|
235
|
+
del action["element_description"]
|
|
236
|
+
|
|
237
|
+
# Handle start_element_description and end_element_description for drag operations
|
|
238
|
+
elif "start_element_description" in action and "end_element_description" in action:
|
|
239
|
+
start_desc = action["start_element_description"]
|
|
240
|
+
end_desc = action["end_element_description"]
|
|
241
|
+
|
|
242
|
+
if start_desc in desc2xy and end_desc in desc2xy:
|
|
243
|
+
start_x, start_y = desc2xy[start_desc]
|
|
244
|
+
end_x, end_y = desc2xy[end_desc]
|
|
245
|
+
action["path"] = [{"x": start_x, "y": start_y}, {"x": end_x, "y": end_y}]
|
|
246
|
+
del action["start_element_description"]
|
|
247
|
+
del action["end_element_description"]
|
|
248
|
+
|
|
249
|
+
converted_item = item.copy()
|
|
250
|
+
converted_item["action"] = action
|
|
251
|
+
converted_items.append(converted_item)
|
|
252
|
+
else:
|
|
253
|
+
converted_items.append(item)
|
|
254
|
+
|
|
255
|
+
return converted_items
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def convert_computer_calls_xy2desc(responses_items: List[Dict[str, Any]], desc2xy: Dict[str, tuple]) -> List[Dict[str, Any]]:
|
|
259
|
+
"""
|
|
260
|
+
Convert computer calls from x,y coordinates to element descriptions.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
responses_items: List of response items containing computer calls with x,y coordinates
|
|
264
|
+
desc2xy: Dictionary mapping element descriptions to (x, y) coordinate tuples
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
List of response items with x,y coordinates replaced by element_description
|
|
268
|
+
"""
|
|
269
|
+
# Create reverse mapping from coordinates to descriptions
|
|
270
|
+
xy2desc = {coords: desc for desc, coords in desc2xy.items()}
|
|
271
|
+
|
|
272
|
+
converted_items = []
|
|
273
|
+
|
|
274
|
+
for item in responses_items:
|
|
275
|
+
if item.get("type") == "computer_call" and "action" in item:
|
|
276
|
+
action = item["action"].copy()
|
|
277
|
+
|
|
278
|
+
# Handle single x,y coordinates
|
|
279
|
+
if "x" in action and "y" in action:
|
|
280
|
+
coords = (action["x"], action["y"])
|
|
281
|
+
if coords in xy2desc:
|
|
282
|
+
action["element_description"] = xy2desc[coords]
|
|
283
|
+
del action["x"]
|
|
284
|
+
del action["y"]
|
|
285
|
+
|
|
286
|
+
# Handle path for drag operations
|
|
287
|
+
elif "path" in action and isinstance(action["path"], list) and len(action["path"]) == 2:
|
|
288
|
+
start_point = action["path"][0]
|
|
289
|
+
end_point = action["path"][1]
|
|
290
|
+
|
|
291
|
+
if ("x" in start_point and "y" in start_point and
|
|
292
|
+
"x" in end_point and "y" in end_point):
|
|
293
|
+
|
|
294
|
+
start_coords = (start_point["x"], start_point["y"])
|
|
295
|
+
end_coords = (end_point["x"], end_point["y"])
|
|
296
|
+
|
|
297
|
+
if start_coords in xy2desc and end_coords in xy2desc:
|
|
298
|
+
action["start_element_description"] = xy2desc[start_coords]
|
|
299
|
+
action["end_element_description"] = xy2desc[end_coords]
|
|
300
|
+
del action["path"]
|
|
301
|
+
|
|
302
|
+
converted_item = item.copy()
|
|
303
|
+
converted_item["action"] = action
|
|
304
|
+
converted_items.append(converted_item)
|
|
305
|
+
else:
|
|
306
|
+
converted_items.append(item)
|
|
307
|
+
|
|
308
|
+
return converted_items
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def get_all_element_descriptions(responses_items: List[Dict[str, Any]]) -> List[str]:
|
|
312
|
+
"""
|
|
313
|
+
Extract all element descriptions from computer calls in responses items.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
responses_items: List of response items containing computer calls
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
List of unique element descriptions found in computer calls
|
|
320
|
+
"""
|
|
321
|
+
descriptions = set()
|
|
322
|
+
|
|
323
|
+
for item in responses_items:
|
|
324
|
+
if item.get("type") == "computer_call" and "action" in item:
|
|
325
|
+
action = item["action"]
|
|
326
|
+
|
|
327
|
+
# Handle single element_description
|
|
328
|
+
if "element_description" in action:
|
|
329
|
+
descriptions.add(action["element_description"])
|
|
330
|
+
|
|
331
|
+
# Handle start_element_description and end_element_description for drag operations
|
|
332
|
+
if "start_element_description" in action:
|
|
333
|
+
descriptions.add(action["start_element_description"])
|
|
334
|
+
|
|
335
|
+
if "end_element_description" in action:
|
|
336
|
+
descriptions.add(action["end_element_description"])
|
|
337
|
+
|
|
338
|
+
return list(descriptions)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
# Conversion functions between responses_items and completion messages formats
|
|
342
|
+
def convert_responses_items_to_completion_messages(messages: List[Dict[str, Any]], allow_images_in_tool_results: bool = True) -> List[Dict[str, Any]]:
|
|
343
|
+
"""Convert responses_items message format to liteLLM completion format.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
messages: List of responses_items format messages
|
|
347
|
+
allow_images_in_tool_results: If True, include images in tool role messages.
|
|
348
|
+
If False, send tool message + separate user message with image.
|
|
349
|
+
"""
|
|
350
|
+
completion_messages = []
|
|
351
|
+
|
|
352
|
+
for message in messages:
|
|
353
|
+
msg_type = message.get("type")
|
|
354
|
+
role = message.get("role")
|
|
355
|
+
|
|
356
|
+
# Handle user messages (both with and without explicit type)
|
|
357
|
+
if role == "user" or msg_type == "user":
|
|
358
|
+
content = message.get("content", "")
|
|
359
|
+
if isinstance(content, list):
|
|
360
|
+
# Handle list content (images, text blocks)
|
|
361
|
+
completion_content = []
|
|
362
|
+
for item in content:
|
|
363
|
+
if item.get("type") == "input_image":
|
|
364
|
+
completion_content.append({
|
|
365
|
+
"type": "image_url",
|
|
366
|
+
"image_url": {
|
|
367
|
+
"url": item.get("image_url")
|
|
368
|
+
}
|
|
369
|
+
})
|
|
370
|
+
elif item.get("type") == "input_text":
|
|
371
|
+
completion_content.append({
|
|
372
|
+
"type": "text",
|
|
373
|
+
"text": item.get("text")
|
|
374
|
+
})
|
|
375
|
+
elif item.get("type") == "text":
|
|
376
|
+
completion_content.append({
|
|
377
|
+
"type": "text",
|
|
378
|
+
"text": item.get("text")
|
|
379
|
+
})
|
|
380
|
+
|
|
381
|
+
completion_messages.append({
|
|
382
|
+
"role": "user",
|
|
383
|
+
"content": completion_content
|
|
384
|
+
})
|
|
385
|
+
elif isinstance(content, str):
|
|
386
|
+
# Handle string content
|
|
387
|
+
completion_messages.append({
|
|
388
|
+
"role": "user",
|
|
389
|
+
"content": content
|
|
390
|
+
})
|
|
391
|
+
|
|
392
|
+
# Handle assistant messages
|
|
393
|
+
elif role == "assistant" or msg_type == "message":
|
|
394
|
+
content = message.get("content", [])
|
|
395
|
+
if isinstance(content, list):
|
|
396
|
+
text_parts = []
|
|
397
|
+
for item in content:
|
|
398
|
+
if item.get("type") == "output_text":
|
|
399
|
+
text_parts.append(item.get("text", ""))
|
|
400
|
+
elif item.get("type") == "text":
|
|
401
|
+
text_parts.append(item.get("text", ""))
|
|
402
|
+
|
|
403
|
+
if text_parts:
|
|
404
|
+
completion_messages.append({
|
|
405
|
+
"role": "assistant",
|
|
406
|
+
"content": "\n".join(text_parts)
|
|
407
|
+
})
|
|
408
|
+
|
|
409
|
+
# Handle reasoning items (convert to assistant message)
|
|
410
|
+
elif msg_type == "reasoning":
|
|
411
|
+
summary = message.get("summary", [])
|
|
412
|
+
text_parts = []
|
|
413
|
+
for item in summary:
|
|
414
|
+
if item.get("type") == "summary_text":
|
|
415
|
+
text_parts.append(item.get("text", ""))
|
|
416
|
+
|
|
417
|
+
if text_parts:
|
|
418
|
+
completion_messages.append({
|
|
419
|
+
"role": "assistant",
|
|
420
|
+
"content": "\n".join(text_parts)
|
|
421
|
+
})
|
|
422
|
+
|
|
423
|
+
# Handle function calls
|
|
424
|
+
elif msg_type == "function_call":
|
|
425
|
+
# Add tool call to last assistant message or create new one
|
|
426
|
+
if not completion_messages or completion_messages[-1]["role"] != "assistant":
|
|
427
|
+
completion_messages.append({
|
|
428
|
+
"role": "assistant",
|
|
429
|
+
"content": "",
|
|
430
|
+
"tool_calls": []
|
|
431
|
+
})
|
|
432
|
+
|
|
433
|
+
if "tool_calls" not in completion_messages[-1]:
|
|
434
|
+
completion_messages[-1]["tool_calls"] = []
|
|
435
|
+
|
|
436
|
+
completion_messages[-1]["tool_calls"].append({
|
|
437
|
+
"id": message.get("call_id"),
|
|
438
|
+
"type": "function",
|
|
439
|
+
"function": {
|
|
440
|
+
"name": message.get("name"),
|
|
441
|
+
"arguments": message.get("arguments")
|
|
442
|
+
}
|
|
443
|
+
})
|
|
444
|
+
|
|
445
|
+
# Handle computer calls
|
|
446
|
+
elif msg_type == "computer_call":
|
|
447
|
+
# Add tool call to last assistant message or create new one
|
|
448
|
+
if not completion_messages or completion_messages[-1]["role"] != "assistant":
|
|
449
|
+
completion_messages.append({
|
|
450
|
+
"role": "assistant",
|
|
451
|
+
"content": "",
|
|
452
|
+
"tool_calls": []
|
|
453
|
+
})
|
|
454
|
+
|
|
455
|
+
if "tool_calls" not in completion_messages[-1]:
|
|
456
|
+
completion_messages[-1]["tool_calls"] = []
|
|
457
|
+
|
|
458
|
+
action = message.get("action", {})
|
|
459
|
+
completion_messages[-1]["tool_calls"].append({
|
|
460
|
+
"id": message.get("call_id"),
|
|
461
|
+
"type": "function",
|
|
462
|
+
"function": {
|
|
463
|
+
"name": "computer",
|
|
464
|
+
"arguments": json.dumps(action)
|
|
465
|
+
}
|
|
466
|
+
})
|
|
467
|
+
|
|
468
|
+
# Handle function/computer call outputs
|
|
469
|
+
elif msg_type in ["function_call_output", "computer_call_output"]:
|
|
470
|
+
output = message.get("output")
|
|
471
|
+
call_id = message.get("call_id")
|
|
472
|
+
|
|
473
|
+
if isinstance(output, dict) and output.get("type") == "input_image":
|
|
474
|
+
if allow_images_in_tool_results:
|
|
475
|
+
# Handle image output as tool response (may not work with all APIs)
|
|
476
|
+
completion_messages.append({
|
|
477
|
+
"role": "tool",
|
|
478
|
+
"tool_call_id": call_id,
|
|
479
|
+
"content": [{
|
|
480
|
+
"type": "image_url",
|
|
481
|
+
"image_url": {
|
|
482
|
+
"url": output.get("image_url")
|
|
483
|
+
}
|
|
484
|
+
}]
|
|
485
|
+
})
|
|
486
|
+
else:
|
|
487
|
+
# Send tool message + separate user message with image (OpenAI compatible)
|
|
488
|
+
completion_messages += [{
|
|
489
|
+
"role": "tool",
|
|
490
|
+
"tool_call_id": call_id,
|
|
491
|
+
"content": "[Execution completed. See screenshot below]"
|
|
492
|
+
}, {
|
|
493
|
+
"role": "user",
|
|
494
|
+
"content": [{
|
|
495
|
+
"type": "image_url",
|
|
496
|
+
"image_url": {
|
|
497
|
+
"url": output.get("image_url")
|
|
498
|
+
}
|
|
499
|
+
}]
|
|
500
|
+
}]
|
|
501
|
+
else:
|
|
502
|
+
# Handle text output as tool response
|
|
503
|
+
completion_messages.append({
|
|
504
|
+
"role": "tool",
|
|
505
|
+
"tool_call_id": call_id,
|
|
506
|
+
"content": str(output)
|
|
507
|
+
})
|
|
508
|
+
|
|
509
|
+
return completion_messages
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def convert_completion_messages_to_responses_items(completion_messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
513
|
+
"""Convert completion messages format to responses_items message format."""
|
|
514
|
+
responses_items = []
|
|
515
|
+
skip_next = False
|
|
516
|
+
|
|
517
|
+
for i, message in enumerate(completion_messages):
|
|
518
|
+
if skip_next:
|
|
519
|
+
skip_next = False
|
|
520
|
+
continue
|
|
521
|
+
|
|
522
|
+
role = message.get("role")
|
|
523
|
+
content = message.get("content")
|
|
524
|
+
tool_calls = message.get("tool_calls", [])
|
|
525
|
+
|
|
526
|
+
# Handle assistant messages with text content
|
|
527
|
+
if role == "assistant" and content and isinstance(content, str):
|
|
528
|
+
responses_items.append({
|
|
529
|
+
"type": "message",
|
|
530
|
+
"role": "assistant",
|
|
531
|
+
"content": [{
|
|
532
|
+
"type": "output_text",
|
|
533
|
+
"text": content
|
|
534
|
+
}]
|
|
535
|
+
})
|
|
536
|
+
|
|
537
|
+
# Handle tool calls
|
|
538
|
+
if tool_calls:
|
|
539
|
+
for tool_call in tool_calls:
|
|
540
|
+
if tool_call.get("type") == "function":
|
|
541
|
+
function = tool_call.get("function", {})
|
|
542
|
+
function_name = function.get("name")
|
|
543
|
+
|
|
544
|
+
if function_name == "computer":
|
|
545
|
+
# Parse computer action
|
|
546
|
+
try:
|
|
547
|
+
action = json.loads(function.get("arguments", "{}"))
|
|
548
|
+
# Change key from "action" -> "type"
|
|
549
|
+
if action.get("action"):
|
|
550
|
+
action["type"] = action["action"]
|
|
551
|
+
del action["action"]
|
|
552
|
+
responses_items.append({
|
|
553
|
+
"type": "computer_call",
|
|
554
|
+
"call_id": tool_call.get("id"),
|
|
555
|
+
"action": action,
|
|
556
|
+
"status": "completed"
|
|
557
|
+
})
|
|
558
|
+
except json.JSONDecodeError:
|
|
559
|
+
# Fallback to function call format
|
|
560
|
+
responses_items.append({
|
|
561
|
+
"type": "function_call",
|
|
562
|
+
"call_id": tool_call.get("id"),
|
|
563
|
+
"name": function_name,
|
|
564
|
+
"arguments": function.get("arguments", "{}"),
|
|
565
|
+
"status": "completed"
|
|
566
|
+
})
|
|
567
|
+
else:
|
|
568
|
+
# Regular function call
|
|
569
|
+
responses_items.append({
|
|
570
|
+
"type": "function_call",
|
|
571
|
+
"call_id": tool_call.get("id"),
|
|
572
|
+
"name": function_name,
|
|
573
|
+
"arguments": function.get("arguments", "{}"),
|
|
574
|
+
"status": "completed"
|
|
575
|
+
})
|
|
576
|
+
|
|
577
|
+
# Handle tool messages (function/computer call outputs)
|
|
578
|
+
elif role == "tool" and content:
|
|
579
|
+
tool_call_id = message.get("tool_call_id")
|
|
580
|
+
if isinstance(content, str):
|
|
581
|
+
# Check if this is the "[Execution completed. See screenshot below]" pattern
|
|
582
|
+
if content == "[Execution completed. See screenshot below]":
|
|
583
|
+
# Look ahead for the next user message with image
|
|
584
|
+
next_idx = i + 1
|
|
585
|
+
if (next_idx < len(completion_messages) and
|
|
586
|
+
completion_messages[next_idx].get("role") == "user" and
|
|
587
|
+
isinstance(completion_messages[next_idx].get("content"), list)):
|
|
588
|
+
# Found the pattern - extract image from next message
|
|
589
|
+
next_content = completion_messages[next_idx]["content"]
|
|
590
|
+
for item in next_content:
|
|
591
|
+
if item.get("type") == "image_url":
|
|
592
|
+
responses_items.append({
|
|
593
|
+
"type": "computer_call_output",
|
|
594
|
+
"call_id": tool_call_id,
|
|
595
|
+
"output": {
|
|
596
|
+
"type": "input_image",
|
|
597
|
+
"image_url": item.get("image_url", {}).get("url")
|
|
598
|
+
}
|
|
599
|
+
})
|
|
600
|
+
# Skip the next user message since we processed it
|
|
601
|
+
skip_next = True
|
|
602
|
+
break
|
|
603
|
+
else:
|
|
604
|
+
# No matching user message, treat as regular text
|
|
605
|
+
responses_items.append({
|
|
606
|
+
"type": "computer_call_output",
|
|
607
|
+
"call_id": tool_call_id,
|
|
608
|
+
"output": content
|
|
609
|
+
})
|
|
610
|
+
else:
|
|
611
|
+
# Determine if this is a computer call or function call output
|
|
612
|
+
try:
|
|
613
|
+
# Try to parse as structured output
|
|
614
|
+
parsed_content = json.loads(content)
|
|
615
|
+
if parsed_content.get("type") == "input_image":
|
|
616
|
+
responses_items.append({
|
|
617
|
+
"type": "computer_call_output",
|
|
618
|
+
"call_id": tool_call_id,
|
|
619
|
+
"output": parsed_content
|
|
620
|
+
})
|
|
621
|
+
else:
|
|
622
|
+
responses_items.append({
|
|
623
|
+
"type": "computer_call_output",
|
|
624
|
+
"call_id": tool_call_id,
|
|
625
|
+
"output": content
|
|
626
|
+
})
|
|
627
|
+
except json.JSONDecodeError:
|
|
628
|
+
# Plain text output - could be function or computer call
|
|
629
|
+
responses_items.append({
|
|
630
|
+
"type": "function_call_output",
|
|
631
|
+
"call_id": tool_call_id,
|
|
632
|
+
"output": content
|
|
633
|
+
})
|
|
634
|
+
elif isinstance(content, list):
|
|
635
|
+
# Handle structured content (e.g., images)
|
|
636
|
+
for item in content:
|
|
637
|
+
if item.get("type") == "image_url":
|
|
638
|
+
responses_items.append({
|
|
639
|
+
"type": "computer_call_output",
|
|
640
|
+
"call_id": tool_call_id,
|
|
641
|
+
"output": {
|
|
642
|
+
"type": "input_image",
|
|
643
|
+
"image_url": item.get("image_url", {}).get("url")
|
|
644
|
+
}
|
|
645
|
+
})
|
|
646
|
+
elif item.get("type") == "text":
|
|
647
|
+
responses_items.append({
|
|
648
|
+
"type": "function_call_output",
|
|
649
|
+
"call_id": tool_call_id,
|
|
650
|
+
"output": item.get("text")
|
|
651
|
+
})
|
|
652
|
+
|
|
653
|
+
# Handle actual user messages
|
|
654
|
+
elif role == "user" and content:
|
|
655
|
+
if isinstance(content, list):
|
|
656
|
+
# Handle structured user content (e.g., text + images)
|
|
657
|
+
user_content = []
|
|
658
|
+
for item in content:
|
|
659
|
+
if item.get("type") == "image_url":
|
|
660
|
+
user_content.append({
|
|
661
|
+
"type": "input_image",
|
|
662
|
+
"image_url": item.get("image_url", {}).get("url")
|
|
663
|
+
})
|
|
664
|
+
elif item.get("type") == "text":
|
|
665
|
+
user_content.append({
|
|
666
|
+
"type": "input_text",
|
|
667
|
+
"text": item.get("text")
|
|
668
|
+
})
|
|
669
|
+
|
|
670
|
+
if user_content:
|
|
671
|
+
responses_items.append({
|
|
672
|
+
"role": "user",
|
|
673
|
+
"type": "message",
|
|
674
|
+
"content": user_content
|
|
675
|
+
})
|
|
676
|
+
elif isinstance(content, str):
|
|
677
|
+
# Handle simple text user message
|
|
678
|
+
responses_items.append({
|
|
679
|
+
"role": "user",
|
|
680
|
+
"content": content
|
|
681
|
+
})
|
|
682
|
+
|
|
683
|
+
return responses_items
|
agent/types.py
CHANGED
|
@@ -14,16 +14,18 @@ Tools = Optional[Iterable[ToolParam]]
|
|
|
14
14
|
|
|
15
15
|
# Agent output types
|
|
16
16
|
AgentResponse = ResponsesAPIResponse
|
|
17
|
+
AgentCapability = Literal["step", "click"]
|
|
17
18
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
|
|
20
|
+
# Agent config registration
|
|
21
|
+
class AgentConfigInfo(BaseModel):
|
|
22
|
+
"""Information about a registered agent config"""
|
|
23
|
+
agent_class: type
|
|
22
24
|
models_regex: str
|
|
23
25
|
priority: int = 0
|
|
24
26
|
|
|
25
27
|
def matches_model(self, model: str) -> bool:
|
|
26
|
-
"""Check if this
|
|
28
|
+
"""Check if this agent config matches the given model"""
|
|
27
29
|
return bool(re.match(self.models_regex, model))
|
|
28
30
|
|
|
29
31
|
# Computer tool interface
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cua-agent
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.9
|
|
4
4
|
Summary: CUA (Computer Use) Agent for AI-driven computer interaction
|
|
5
5
|
Author-Email: TryCua <gh@trycua.com>
|
|
6
6
|
Requires-Python: >=3.11
|
|
@@ -15,7 +15,7 @@ Requires-Dist: python-dotenv>=1.0.1
|
|
|
15
15
|
Requires-Dist: cua-computer<0.5.0,>=0.4.0
|
|
16
16
|
Requires-Dist: cua-core<0.2.0,>=0.1.8
|
|
17
17
|
Requires-Dist: certifi>=2024.2.2
|
|
18
|
-
Requires-Dist: litellm>=1.74.
|
|
18
|
+
Requires-Dist: litellm>=1.74.12
|
|
19
19
|
Provides-Extra: openai
|
|
20
20
|
Provides-Extra: anthropic
|
|
21
21
|
Provides-Extra: omni
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
agent/__init__.py,sha256=vWbQYgjkzIso7zILSm4OAbNU_vrmN4HyYkfX8vC-Yi0,1547
|
|
2
|
+
agent/__main__.py,sha256=lBUe8Niqa5XoCjwFfXyX7GtnUwjjZXC1-j4V9mvUYSc,538
|
|
3
|
+
agent/adapters/__init__.py,sha256=szM2HMten2WkcqXeRnan__-sXjpyS4eyvIW0LXSfj4U,178
|
|
4
|
+
agent/adapters/huggingfacelocal_adapter.py,sha256=CT3dwJnOWItB5eTqpn5i0Y1Ec6yRjaW7zhA14Ot9gz8,8066
|
|
5
|
+
agent/agent.py,sha256=PP6UvNq_QYBYuEt97Dhono7g3hz1fQlMIapSQRhw59c,27761
|
|
6
|
+
agent/callbacks/__init__.py,sha256=yxxBXUqpXQ-jRi_ixJMtmQPxoNRy5Vz1PUBzNNa1Dwg,538
|
|
7
|
+
agent/callbacks/base.py,sha256=UnnnYlh6XCm6HKZZsAPaT_Eyo9LUYLyjyNwF-QRm6Ns,4691
|
|
8
|
+
agent/callbacks/budget_manager.py,sha256=RyKM-7iXQcDotYvrw3eURzeEHEXvQjID-NobtvQWE7k,1832
|
|
9
|
+
agent/callbacks/image_retention.py,sha256=tiuRT5ke9xXTb2eP8Gz-2ITyAMY29LURUH6AbjX3RP8,6165
|
|
10
|
+
agent/callbacks/logging.py,sha256=OOxU97EzrxlnUAtiEnvy9FB7SwCUK90-rdpDFA2Ae4E,10921
|
|
11
|
+
agent/callbacks/pii_anonymization.py,sha256=NEkUTUjQBi82nqus7kT-1E4RaeQ2hQrY7YCnKndLhP8,3272
|
|
12
|
+
agent/callbacks/telemetry.py,sha256=PU7pkK7W1v1xjDN-9gA30lGvn4-WhqK3BPHGW3HpTOc,7497
|
|
13
|
+
agent/callbacks/trajectory_saver.py,sha256=POE8aPT-MBzfW873wr6C7iiVUHtp483KwvLPxC1S3EY,11626
|
|
14
|
+
agent/cli.py,sha256=LDKjahBcZLFjUSq3c4MYELJ3dxJ0n5lgY2WKTC3rXQY,12375
|
|
15
|
+
agent/computer_handler.py,sha256=WBE78movl9ZHKWl-63GDoVSdYt71q4hUwQ6MvUfa140,4112
|
|
16
|
+
agent/decorators.py,sha256=n8VvMsififWkmuk75Q7HIpo0xAA2yAeQ6J-OOiwbAKc,1836
|
|
17
|
+
agent/loops/__init__.py,sha256=AQ8eLgAo9ZiSaRC8n9nMOudF2IWgIKd8130uWwQlIJg,297
|
|
18
|
+
agent/loops/anthropic.py,sha256=joz7ibK6B4rTR3ue8a8rRcY0K0FIAv0TwcH2BL46Nmg,62557
|
|
19
|
+
agent/loops/base.py,sha256=LK7kSTnc2CB88LI7qr2VP7LMq0eS5r2bSEnrxO6IN5U,2345
|
|
20
|
+
agent/loops/composed_grounded.py,sha256=BgxufIyJCkWnJpp29PE1V2ce4iB9ictGjuVqFDx17B8,12122
|
|
21
|
+
agent/loops/gta1.py,sha256=ha5TaUWqUzTffx_ow1WiBU8i3VNP-6FL5XC66ajPFjg,5829
|
|
22
|
+
agent/loops/model_types.csv,sha256=GmFn4x80yoUpQZuQ-GXtJkPVlOLYWZ5u_5A73HRyeNE,112
|
|
23
|
+
agent/loops/omniparser.py,sha256=-db8JUL2Orn47ERIaLbuNShAXn4LeIgYzRWphn_9Dg4,15071
|
|
24
|
+
agent/loops/openai.py,sha256=8Ad_XufpENmLq1nEnhzF3oswPrPK1EPz-C5NU8UOEs0,8035
|
|
25
|
+
agent/loops/uitars.py,sha256=EDq8AO20lrnwB013uJoWSkkz3TVRU9oG8DQ1VviXltc,31445
|
|
26
|
+
agent/responses.py,sha256=ZI1nUYyyDNiiI7PwJpfta4RlGeuxpT-Tm-ptF6-ys9c,27498
|
|
27
|
+
agent/telemetry.py,sha256=87ZTyBaT0wEPQn4v76II3g0V3GERuIVbypoX-Ug6FKQ,4786
|
|
28
|
+
agent/types.py,sha256=zXev_CV9LvlYRkxzO_EmW1ZT70Z8qeGG3iHbzyYmV30,2425
|
|
29
|
+
agent/ui/__init__.py,sha256=DTZpK85QXscXK2nM9HtpAhVBF13yAamUrtwrQSuV-kM,126
|
|
30
|
+
agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
|
|
31
|
+
agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
|
|
32
|
+
agent/ui/gradio/app.py,sha256=9UOPwuwspLrnHGY91zdzuRqkMH4cmwOBH-f-BC0gVC4,9077
|
|
33
|
+
agent/ui/gradio/ui_components.py,sha256=hVMGZxAEq1LBHOqKj-RbDXJsj1j0Qw5dOV0ecWIHxmc,35397
|
|
34
|
+
cua_agent-0.4.9.dist-info/METADATA,sha256=lTkzRc98XnFsPDzmDZ7WAwfU3BNM_sJljP5R8yAV5yw,12061
|
|
35
|
+
cua_agent-0.4.9.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
|
36
|
+
cua_agent-0.4.9.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
37
|
+
cua_agent-0.4.9.dist-info/RECORD,,
|
cua_agent-0.4.7.dist-info/RECORD
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
agent/__init__.py,sha256=PfRgVa_aJQL9fK0D1g2r__Kdg3627EigNS31_M8Ivkk,1539
|
|
2
|
-
agent/__main__.py,sha256=lBUe8Niqa5XoCjwFfXyX7GtnUwjjZXC1-j4V9mvUYSc,538
|
|
3
|
-
agent/adapters/__init__.py,sha256=szM2HMten2WkcqXeRnan__-sXjpyS4eyvIW0LXSfj4U,178
|
|
4
|
-
agent/adapters/huggingfacelocal_adapter.py,sha256=o2IQI1wuZWDYgGPj92dkxTb3uk07XjJdvC19O2_aeak,7963
|
|
5
|
-
agent/agent.py,sha256=bSmc_5Jr4CTvTut8lgNwNpnk9w4sD9SACQb0GbT4zwg,24770
|
|
6
|
-
agent/callbacks/__init__.py,sha256=yxxBXUqpXQ-jRi_ixJMtmQPxoNRy5Vz1PUBzNNa1Dwg,538
|
|
7
|
-
agent/callbacks/base.py,sha256=UnnnYlh6XCm6HKZZsAPaT_Eyo9LUYLyjyNwF-QRm6Ns,4691
|
|
8
|
-
agent/callbacks/budget_manager.py,sha256=RyKM-7iXQcDotYvrw3eURzeEHEXvQjID-NobtvQWE7k,1832
|
|
9
|
-
agent/callbacks/image_retention.py,sha256=tiuRT5ke9xXTb2eP8Gz-2ITyAMY29LURUH6AbjX3RP8,6165
|
|
10
|
-
agent/callbacks/logging.py,sha256=OOxU97EzrxlnUAtiEnvy9FB7SwCUK90-rdpDFA2Ae4E,10921
|
|
11
|
-
agent/callbacks/pii_anonymization.py,sha256=NEkUTUjQBi82nqus7kT-1E4RaeQ2hQrY7YCnKndLhP8,3272
|
|
12
|
-
agent/callbacks/telemetry.py,sha256=PU7pkK7W1v1xjDN-9gA30lGvn4-WhqK3BPHGW3HpTOc,7497
|
|
13
|
-
agent/callbacks/trajectory_saver.py,sha256=POE8aPT-MBzfW873wr6C7iiVUHtp483KwvLPxC1S3EY,11626
|
|
14
|
-
agent/cli.py,sha256=odI7cdl1psOGK-mEQzezsPzbRcLFwDbi7A2ukvYq8dk,12130
|
|
15
|
-
agent/computer_handler.py,sha256=2gfFBeDk9Vd54x9mOqnswMo8BdjUduLo5I0RbBPLovY,3964
|
|
16
|
-
agent/decorators.py,sha256=bCmcCjP31WEjWg1D91OE2jo7AZTfGa9cNgCnYUvjiyw,2832
|
|
17
|
-
agent/loops/__init__.py,sha256=_qpP_--3ePdFkTZP8qmUEFlBsy6m4h8fj0gGLDKA7zw,217
|
|
18
|
-
agent/loops/anthropic.py,sha256=MlEgQwuqHVQ5mMU9U36Gvd0YqR9_Jj1CFEczOgjSbFc,58200
|
|
19
|
-
agent/loops/omniparser.py,sha256=m3bDNQ0Igc_HHVoAbjVNj599uRoC9Eap3DCALg6RZ54,11422
|
|
20
|
-
agent/loops/openai.py,sha256=ArTqadeJY8F9N8ZLKfswlzgHV_54HbWJgLd4l6ele9w,3010
|
|
21
|
-
agent/loops/uitars.py,sha256=L0NYxKoIiMfIHbyomnaiK3ZGLmLv3QMx9nX57GruAk0,26323
|
|
22
|
-
agent/responses.py,sha256=ztSMEz8q4ykQAXF21UyQxkSZ6GeoDMydT5HZqKBPAXg,6660
|
|
23
|
-
agent/telemetry.py,sha256=87ZTyBaT0wEPQn4v76II3g0V3GERuIVbypoX-Ug6FKQ,4786
|
|
24
|
-
agent/types.py,sha256=GiLxIcF7s1XIh_WaY7tjdQPFpdTXb5MWVe_ZUPA0gkY,2364
|
|
25
|
-
agent/ui/__init__.py,sha256=DTZpK85QXscXK2nM9HtpAhVBF13yAamUrtwrQSuV-kM,126
|
|
26
|
-
agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
|
|
27
|
-
agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
|
|
28
|
-
agent/ui/gradio/app.py,sha256=9UOPwuwspLrnHGY91zdzuRqkMH4cmwOBH-f-BC0gVC4,9077
|
|
29
|
-
agent/ui/gradio/ui_components.py,sha256=hVMGZxAEq1LBHOqKj-RbDXJsj1j0Qw5dOV0ecWIHxmc,35397
|
|
30
|
-
cua_agent-0.4.7.dist-info/METADATA,sha256=wCahxHMvzKL-FkTFy4XlZZirBwl1v-RYWRcYbFcJBDk,12060
|
|
31
|
-
cua_agent-0.4.7.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
|
|
32
|
-
cua_agent-0.4.7.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
|
|
33
|
-
cua_agent-0.4.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|