cua-agent 0.4.23__tar.gz → 0.4.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cua-agent might be problematic. Click here for more details.

Files changed (51) hide show
  1. {cua_agent-0.4.23 → cua_agent-0.4.24}/PKG-INFO +1 -1
  2. cua_agent-0.4.24/agent/callbacks/image_retention.py +90 -0
  3. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/callbacks/operator_validator.py +32 -32
  4. {cua_agent-0.4.23 → cua_agent-0.4.24}/pyproject.toml +1 -1
  5. cua_agent-0.4.23/agent/callbacks/image_retention.py +0 -139
  6. {cua_agent-0.4.23 → cua_agent-0.4.24}/README.md +0 -0
  7. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/__init__.py +0 -0
  8. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/__main__.py +0 -0
  9. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/adapters/__init__.py +0 -0
  10. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/adapters/huggingfacelocal_adapter.py +0 -0
  11. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/adapters/human_adapter.py +0 -0
  12. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/adapters/mlxvlm_adapter.py +0 -0
  13. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/agent.py +0 -0
  14. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/callbacks/__init__.py +0 -0
  15. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/callbacks/base.py +0 -0
  16. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/callbacks/budget_manager.py +0 -0
  17. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/callbacks/logging.py +0 -0
  18. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/callbacks/pii_anonymization.py +0 -0
  19. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/callbacks/telemetry.py +0 -0
  20. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/callbacks/trajectory_saver.py +0 -0
  21. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/cli.py +0 -0
  22. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/computers/__init__.py +0 -0
  23. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/computers/base.py +0 -0
  24. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/computers/cua.py +0 -0
  25. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/computers/custom.py +0 -0
  26. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/decorators.py +0 -0
  27. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/human_tool/__init__.py +0 -0
  28. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/human_tool/__main__.py +0 -0
  29. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/human_tool/server.py +0 -0
  30. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/human_tool/ui.py +0 -0
  31. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/integrations/hud/__init__.py +0 -0
  32. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/integrations/hud/proxy.py +0 -0
  33. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/loops/__init__.py +0 -0
  34. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/loops/anthropic.py +0 -0
  35. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/loops/base.py +0 -0
  36. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/loops/composed_grounded.py +0 -0
  37. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/loops/glm45v.py +0 -0
  38. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/loops/gta1.py +0 -0
  39. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/loops/model_types.csv +0 -0
  40. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/loops/omniparser.py +0 -0
  41. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/loops/openai.py +0 -0
  42. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/loops/uitars.py +0 -0
  43. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/proxy/examples.py +0 -0
  44. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/proxy/handlers.py +0 -0
  45. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/responses.py +0 -0
  46. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/types.py +0 -0
  47. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/ui/__init__.py +0 -0
  48. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/ui/__main__.py +0 -0
  49. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/ui/gradio/__init__.py +0 -0
  50. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/ui/gradio/app.py +0 -0
  51. {cua_agent-0.4.23 → cua_agent-0.4.24}/agent/ui/gradio/ui_components.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cua-agent
3
- Version: 0.4.23
3
+ Version: 0.4.24
4
4
  Summary: CUA (Computer Use) Agent for AI-driven computer interaction
5
5
  Author-Email: TryCua <gh@trycua.com>
6
6
  Requires-Python: >=3.12
@@ -0,0 +1,90 @@
1
+ """
2
+ Image retention callback handler that limits the number of recent images in message history.
3
+ """
4
+
5
+ from typing import List, Dict, Any, Optional
6
+ from .base import AsyncCallbackHandler
7
+
8
+
9
+ class ImageRetentionCallback(AsyncCallbackHandler):
10
+ """
11
+ Callback handler that applies image retention policy to limit the number
12
+ of recent images in message history to prevent context window overflow.
13
+ """
14
+
15
+ def __init__(self, only_n_most_recent_images: Optional[int] = None):
16
+ """
17
+ Initialize the image retention callback.
18
+
19
+ Args:
20
+ only_n_most_recent_images: If set, only keep the N most recent images in message history
21
+ """
22
+ self.only_n_most_recent_images = only_n_most_recent_images
23
+
24
+ async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
25
+ """
26
+ Apply image retention policy to messages before sending to agent loop.
27
+
28
+ Args:
29
+ messages: List of message dictionaries
30
+
31
+ Returns:
32
+ List of messages with image retention policy applied
33
+ """
34
+ if self.only_n_most_recent_images is None:
35
+ return messages
36
+
37
+ return self._apply_image_retention(messages)
38
+
39
+ def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
40
+ """Apply image retention policy to keep only the N most recent images.
41
+
42
+ Removes computer_call_output items with image_url and their corresponding computer_call items,
43
+ keeping only the most recent N image pairs based on only_n_most_recent_images setting.
44
+
45
+ Args:
46
+ messages: List of message dictionaries
47
+
48
+ Returns:
49
+ Filtered list of messages with image retention applied
50
+ """
51
+ if self.only_n_most_recent_images is None:
52
+ return messages
53
+
54
+ # Gather indices of all computer_call_output messages that contain an image_url
55
+ output_indices: List[int] = []
56
+ for idx, msg in enumerate(messages):
57
+ if msg.get("type") == "computer_call_output":
58
+ out = msg.get("output")
59
+ if isinstance(out, dict) and ("image_url" in out):
60
+ output_indices.append(idx)
61
+
62
+ # Nothing to trim
63
+ if len(output_indices) <= self.only_n_most_recent_images:
64
+ return messages
65
+
66
+ # Determine which outputs to keep (most recent N)
67
+ keep_output_indices = set(output_indices[-self.only_n_most_recent_images :])
68
+
69
+ # Build set of indices to remove in one pass
70
+ to_remove: set[int] = set()
71
+
72
+ for idx in output_indices:
73
+ if idx in keep_output_indices:
74
+ continue # keep this screenshot and its context
75
+
76
+ to_remove.add(idx) # remove the computer_call_output itself
77
+
78
+ # Remove the immediately preceding computer_call with matching call_id (if present)
79
+ call_id = messages[idx].get("call_id")
80
+ prev_idx = idx - 1
81
+ if prev_idx >= 0 and messages[prev_idx].get("type") == "computer_call" and messages[prev_idx].get("call_id") == call_id:
82
+ to_remove.add(prev_idx)
83
+ # Check a single reasoning immediately before that computer_call
84
+ r_idx = prev_idx - 1
85
+ if r_idx >= 0 and messages[r_idx].get("type") == "reasoning":
86
+ to_remove.add(r_idx)
87
+
88
+ # Construct filtered list
89
+ filtered = [m for i, m in enumerate(messages) if i not in to_remove]
90
+ return filtered
@@ -102,37 +102,37 @@ class OperatorNormalizerCallback(AsyncCallbackHandler):
102
102
  _keep_keys(action, keep)
103
103
 
104
104
 
105
- # Second pass: if an assistant message is immediately followed by a computer_call,
106
- # replace the assistant message itself with a reasoning message with summary text.
107
- if isinstance(output, list):
108
- for i, item in enumerate(output):
109
- # AssistantMessage shape: { type: 'message', role: 'assistant', content: OutputContent[] }
110
- if item.get("type") == "message" and item.get("role") == "assistant":
111
- next_idx = i + 1
112
- if next_idx >= len(output):
113
- continue
114
- next_item = output[next_idx]
115
- if not isinstance(next_item, dict):
116
- continue
117
- if next_item.get("type") != "computer_call":
118
- continue
119
- contents = item.get("content") or []
120
- # Extract text from OutputContent[]
121
- text_parts: List[str] = []
122
- if isinstance(contents, list):
123
- for c in contents:
124
- if isinstance(c, dict) and c.get("type") == "output_text" and isinstance(c.get("text"), str):
125
- text_parts.append(c["text"])
126
- text_content = "\n".join(text_parts).strip()
127
- # Replace assistant message with reasoning message
128
- output[i] = {
129
- "type": "reasoning",
130
- "summary": [
131
- {
132
- "type": "summary_text",
133
- "text": text_content,
134
- }
135
- ],
136
- }
105
+ # # Second pass: if an assistant message is immediately followed by a computer_call,
106
+ # # replace the assistant message itself with a reasoning message with summary text.
107
+ # if isinstance(output, list):
108
+ # for i, item in enumerate(output):
109
+ # # AssistantMessage shape: { type: 'message', role: 'assistant', content: OutputContent[] }
110
+ # if item.get("type") == "message" and item.get("role") == "assistant":
111
+ # next_idx = i + 1
112
+ # if next_idx >= len(output):
113
+ # continue
114
+ # next_item = output[next_idx]
115
+ # if not isinstance(next_item, dict):
116
+ # continue
117
+ # if next_item.get("type") != "computer_call":
118
+ # continue
119
+ # contents = item.get("content") or []
120
+ # # Extract text from OutputContent[]
121
+ # text_parts: List[str] = []
122
+ # if isinstance(contents, list):
123
+ # for c in contents:
124
+ # if isinstance(c, dict) and c.get("type") == "output_text" and isinstance(c.get("text"), str):
125
+ # text_parts.append(c["text"])
126
+ # text_content = "\n".join(text_parts).strip()
127
+ # # Replace assistant message with reasoning message
128
+ # output[i] = {
129
+ # "type": "reasoning",
130
+ # "summary": [
131
+ # {
132
+ # "type": "summary_text",
133
+ # "text": text_content,
134
+ # }
135
+ # ],
136
+ # }
137
137
 
138
138
  return output
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "cua-agent"
9
- version = "0.4.23"
9
+ version = "0.4.24"
10
10
  description = "CUA (Computer Use) Agent for AI-driven computer interaction"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -1,139 +0,0 @@
1
- """
2
- Image retention callback handler that limits the number of recent images in message history.
3
- """
4
-
5
- from typing import List, Dict, Any, Optional
6
- from .base import AsyncCallbackHandler
7
-
8
-
9
- class ImageRetentionCallback(AsyncCallbackHandler):
10
- """
11
- Callback handler that applies image retention policy to limit the number
12
- of recent images in message history to prevent context window overflow.
13
- """
14
-
15
- def __init__(self, only_n_most_recent_images: Optional[int] = None):
16
- """
17
- Initialize the image retention callback.
18
-
19
- Args:
20
- only_n_most_recent_images: If set, only keep the N most recent images in message history
21
- """
22
- self.only_n_most_recent_images = only_n_most_recent_images
23
-
24
- async def on_llm_start(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
25
- """
26
- Apply image retention policy to messages before sending to agent loop.
27
-
28
- Args:
29
- messages: List of message dictionaries
30
-
31
- Returns:
32
- List of messages with image retention policy applied
33
- """
34
- if self.only_n_most_recent_images is None:
35
- return messages
36
-
37
- return self._apply_image_retention(messages)
38
-
39
- def _apply_image_retention(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
40
- """Apply image retention policy to keep only the N most recent images.
41
-
42
- Removes computer_call_output items with image_url and their corresponding computer_call items,
43
- keeping only the most recent N image pairs based on only_n_most_recent_images setting.
44
-
45
- Args:
46
- messages: List of message dictionaries
47
-
48
- Returns:
49
- Filtered list of messages with image retention applied
50
- """
51
- if self.only_n_most_recent_images is None:
52
- return messages
53
-
54
- # First pass: Assign call_id to reasoning items based on the next computer_call
55
- messages_with_call_ids = []
56
- for i, msg in enumerate(messages):
57
- msg_copy = msg.copy() if isinstance(msg, dict) else msg
58
-
59
- # If this is a reasoning item without a call_id, find the next computer_call
60
- if (msg_copy.get("type") == "reasoning" and
61
- not msg_copy.get("call_id")):
62
- # Look ahead for the next computer_call
63
- for j in range(i + 1, len(messages)):
64
- next_msg = messages[j]
65
- if (next_msg.get("type") == "computer_call" and
66
- next_msg.get("call_id")):
67
- msg_copy["call_id"] = next_msg.get("call_id")
68
- break
69
-
70
- messages_with_call_ids.append(msg_copy)
71
-
72
- # Find all computer_call_output items with images and their call_ids
73
- image_call_ids = []
74
- for msg in reversed(messages_with_call_ids): # Process in reverse to get most recent first
75
- if (msg.get("type") == "computer_call_output" and
76
- isinstance(msg.get("output"), dict) and
77
- "image_url" in msg.get("output", {})):
78
- call_id = msg.get("call_id")
79
- if call_id and call_id not in image_call_ids:
80
- image_call_ids.append(call_id)
81
- if len(image_call_ids) >= self.only_n_most_recent_images:
82
- break
83
-
84
- # Keep the most recent N image call_ids (reverse to get chronological order)
85
- keep_call_ids = set(image_call_ids[:self.only_n_most_recent_images])
86
-
87
- # Filter messages: remove computer_call, computer_call_output, and reasoning for old images
88
- filtered_messages = []
89
- for msg in messages_with_call_ids:
90
- msg_type = msg.get("type")
91
- call_id = msg.get("call_id")
92
-
93
- # Remove old computer_call items
94
- if msg_type == "computer_call" and call_id not in keep_call_ids:
95
- # Check if this call_id corresponds to an image call
96
- has_image_output = any(
97
- m.get("type") == "computer_call_output" and
98
- m.get("call_id") == call_id and
99
- isinstance(m.get("output"), dict) and
100
- "image_url" in m.get("output", {})
101
- for m in messages_with_call_ids
102
- )
103
- if has_image_output:
104
- continue # Skip this computer_call
105
-
106
- # Remove old computer_call_output items with images
107
- if (msg_type == "computer_call_output" and
108
- call_id not in keep_call_ids and
109
- isinstance(msg.get("output"), dict) and
110
- "image_url" in msg.get("output", {})):
111
- continue # Skip this computer_call_output
112
-
113
- # Remove old reasoning items that are paired with removed computer calls
114
- if (msg_type == "reasoning" and
115
- call_id and call_id not in keep_call_ids):
116
- # Check if this call_id corresponds to an image call that's being removed
117
- has_image_output = any(
118
- m.get("type") == "computer_call_output" and
119
- m.get("call_id") == call_id and
120
- isinstance(m.get("output"), dict) and
121
- "image_url" in m.get("output", {})
122
- for m in messages_with_call_ids
123
- )
124
- if has_image_output:
125
- continue # Skip this reasoning item
126
-
127
- filtered_messages.append(msg)
128
-
129
- # Clean up: Remove call_id from reasoning items before returning
130
- final_messages = []
131
- for msg in filtered_messages:
132
- if msg.get("type") == "reasoning" and "call_id" in msg:
133
- # Create a copy without call_id for reasoning items
134
- cleaned_msg = {k: v for k, v in msg.items() if k != "call_id"}
135
- final_messages.append(cleaned_msg)
136
- else:
137
- final_messages.append(msg)
138
-
139
- return final_messages
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes