PyPI - cua-agent - Versions diffs - 0.4.23__py3-none-any.whl → 0.4.24__py3-none-any.whl - Mend

cua-agent 0.4.23py3-none-any.whl → 0.4.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (6) hide show

agent/callbacks/image_retention.py CHANGED Viewed

@@ -50,90 +50,41 @@ class ImageRetentionCallback(AsyncCallbackHandler):
         """
         if self.only_n_most_recent_images is None:
             return messages
-        # First pass: Assign call_id to reasoning items based on the next computer_call
-        messages_with_call_ids = []
-        for i, msg in enumerate(messages):
-            msg_copy = msg.copy() if isinstance(msg, dict) else msg
-            # If this is a reasoning item without a call_id, find the next computer_call
-            if (msg_copy.get("type") == "reasoning" and
-                not msg_copy.get("call_id")):
-                # Look ahead for the next computer_call
-                for j in range(i + 1, len(messages)):
-                    next_msg = messages[j]
-                    if (next_msg.get("type") == "computer_call" and
-                        next_msg.get("call_id")):
-                        msg_copy["call_id"] = next_msg.get("call_id")
-                        break
-            messages_with_call_ids.append(msg_copy)
-        # Find all computer_call_output items with images and their call_ids
-        image_call_ids = []
-        for msg in reversed(messages_with_call_ids):  # Process in reverse to get most recent first
-            if (msg.get("type") == "computer_call_output" and
-                isinstance(msg.get("output"), dict) and
-                "image_url" in msg.get("output", {})):
-                call_id = msg.get("call_id")
-                if call_id and call_id not in image_call_ids:
-                    image_call_ids.append(call_id)
-                    if len(image_call_ids) >= self.only_n_most_recent_images:
-                        break
-        # Keep the most recent N image call_ids (reverse to get chronological order)
-        keep_call_ids = set(image_call_ids[:self.only_n_most_recent_images])
-        # Filter messages: remove computer_call, computer_call_output, and reasoning for old images
-        filtered_messages = []
-        for msg in messages_with_call_ids:
-            msg_type = msg.get("type")
-            call_id = msg.get("call_id")
-            # Remove old computer_call items
-            if msg_type == "computer_call" and call_id not in keep_call_ids:
-                # Check if this call_id corresponds to an image call
-                has_image_output = any(
-                    m.get("type") == "computer_call_output" and
-                    m.get("call_id") == call_id and
-                    isinstance(m.get("output"), dict) and
-                    "image_url" in m.get("output", {})
-                    for m in messages_with_call_ids
-                )
-                if has_image_output:
-                    continue  # Skip this computer_call
-            # Remove old computer_call_output items with images
-            if (msg_type == "computer_call_output" and
-                call_id not in keep_call_ids and
-                isinstance(msg.get("output"), dict) and
-                "image_url" in msg.get("output", {})):
-                continue  # Skip this computer_call_output
-            # Remove old reasoning items that are paired with removed computer calls
-            if (msg_type == "reasoning" and
-                call_id and call_id not in keep_call_ids):
-                # Check if this call_id corresponds to an image call that's being removed
-                has_image_output = any(
-                    m.get("type") == "computer_call_output" and
-                    m.get("call_id") == call_id and
-                    isinstance(m.get("output"), dict) and
-                    "image_url" in m.get("output", {})
-                    for m in messages_with_call_ids
-                )
-                if has_image_output:
-                    continue  # Skip this reasoning item
-            filtered_messages.append(msg)
-        # Clean up: Remove call_id from reasoning items before returning
-        final_messages = []
-        for msg in filtered_messages:
-            if msg.get("type") == "reasoning" and "call_id" in msg:
-                # Create a copy without call_id for reasoning items
-                cleaned_msg = {k: v for k, v in msg.items() if k != "call_id"}
-                final_messages.append(cleaned_msg)
-            else:
-                final_messages.append(msg)
-        return final_messages
+        # Gather indices of all computer_call_output messages that contain an image_url
+        output_indices: List[int] = []
+        for idx, msg in enumerate(messages):
+            if msg.get("type") == "computer_call_output":
+                out = msg.get("output")
+                if isinstance(out, dict) and ("image_url" in out):
+                    output_indices.append(idx)
+        # Nothing to trim
+        if len(output_indices) <= self.only_n_most_recent_images:
+            return messages
+        # Determine which outputs to keep (most recent N)
+        keep_output_indices = set(output_indices[-self.only_n_most_recent_images :])
+        # Build set of indices to remove in one pass
+        to_remove: set[int] = set()
+        for idx in output_indices:
+            if idx in keep_output_indices:
+                continue  # keep this screenshot and its context
+            to_remove.add(idx)  # remove the computer_call_output itself
+            # Remove the immediately preceding computer_call with matching call_id (if present)
+            call_id = messages[idx].get("call_id")
+            prev_idx = idx - 1
+            if prev_idx >= 0 and messages[prev_idx].get("type") == "computer_call" and messages[prev_idx].get("call_id") == call_id:
+                to_remove.add(prev_idx)
+                # Check a single reasoning immediately before that computer_call
+                r_idx = prev_idx - 1
+                if r_idx >= 0 and messages[r_idx].get("type") == "reasoning":
+                    to_remove.add(r_idx)
+        # Construct filtered list
+        filtered = [m for i, m in enumerate(messages) if i not in to_remove]
+        return filtered

agent/callbacks/operator_validator.py CHANGED Viewed

@@ -102,37 +102,37 @@ class OperatorNormalizerCallback(AsyncCallbackHandler):
                 _keep_keys(action, keep)
-        # Second pass: if an assistant message is immediately followed by a computer_call,
-        # replace the assistant message itself with a reasoning message with summary text.
-        if isinstance(output, list):
-            for i, item in enumerate(output):
-                # AssistantMessage shape: { type: 'message', role: 'assistant', content: OutputContent[] }
-                if item.get("type") == "message" and item.get("role") == "assistant":
-                    next_idx = i + 1
-                    if next_idx >= len(output):
-                        continue
-                    next_item = output[next_idx]
-                    if not isinstance(next_item, dict):
-                        continue
-                    if next_item.get("type") != "computer_call":
-                        continue
-                    contents = item.get("content") or []
-                    # Extract text from OutputContent[]
-                    text_parts: List[str] = []
-                    if isinstance(contents, list):
-                        for c in contents:
-                            if isinstance(c, dict) and c.get("type") == "output_text" and isinstance(c.get("text"), str):
-                                text_parts.append(c["text"])
-                    text_content = "\n".join(text_parts).strip()
-                    # Replace assistant message with reasoning message
-                    output[i] = {
-                        "type": "reasoning",
-                        "summary": [
-                            {
-                                "type": "summary_text",
-                                "text": text_content,
-                            }
-                        ],
-                    }
+        # # Second pass: if an assistant message is immediately followed by a computer_call,
+        # # replace the assistant message itself with a reasoning message with summary text.
+        # if isinstance(output, list):
+        #     for i, item in enumerate(output):
+        #         # AssistantMessage shape: { type: 'message', role: 'assistant', content: OutputContent[] }
+        #         if item.get("type") == "message" and item.get("role") == "assistant":
+        #             next_idx = i + 1
+        #             if next_idx >= len(output):
+        #                 continue
+        #             next_item = output[next_idx]
+        #             if not isinstance(next_item, dict):
+        #                 continue
+        #             if next_item.get("type") != "computer_call":
+        #                 continue
+        #             contents = item.get("content") or []
+        #             # Extract text from OutputContent[]
+        #             text_parts: List[str] = []
+        #             if isinstance(contents, list):
+        #                 for c in contents:
+        #                     if isinstance(c, dict) and c.get("type") == "output_text" and isinstance(c.get("text"), str):
+        #                         text_parts.append(c["text"])
+        #             text_content = "\n".join(text_parts).strip()
+        #             # Replace assistant message with reasoning message
+        #             output[i] = {
+        #                 "type": "reasoning",
+        #                 "summary": [
+        #                     {
+        #                         "type": "summary_text",
+        #                         "text": text_content,
+        #                     }
+        #                 ],
+        #             }
         return output

{cua_agent-0.4.23.dist-info → cua_agent-0.4.24.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cua-agent
-Version: 0.4.23
+Version: 0.4.24
 Summary: CUA (Computer Use) Agent for AI-driven computer interaction
 Author-Email: TryCua <gh@trycua.com>
 Requires-Python: >=3.12

{cua_agent-0.4.23.dist-info → cua_agent-0.4.24.dist-info}/RECORD RENAMED Viewed

@@ -8,9 +8,9 @@ agent/agent.py,sha256=ao3SKnZoX5_P6mTzEg0hgOUam6bNRwpbitzlyvwI3bg,29826
 agent/callbacks/__init__.py,sha256=et6pNfX_AiJqhVzUfCvcjzFbDhfLoHazKCXN5sqwxaM,631
 agent/callbacks/base.py,sha256=UnnnYlh6XCm6HKZZsAPaT_Eyo9LUYLyjyNwF-QRm6Ns,4691
 agent/callbacks/budget_manager.py,sha256=RyKM-7iXQcDotYvrw3eURzeEHEXvQjID-NobtvQWE7k,1832
-agent/callbacks/image_retention.py,sha256=tiuRT5ke9xXTb2eP8Gz-2ITyAMY29LURUH6AbjX3RP8,6165
+agent/callbacks/image_retention.py,sha256=8MeLo5-Y7cACpsNk2p_bvnZIYKpW6XgyukmdYGX23rE,3588
 agent/callbacks/logging.py,sha256=OOxU97EzrxlnUAtiEnvy9FB7SwCUK90-rdpDFA2Ae4E,10921
-agent/callbacks/operator_validator.py,sha256=dLvR749glMPiGt8UP-XMLm_LcaTUUhWvRZJN_qkHV7Y,6430
+agent/callbacks/operator_validator.py,sha256=T5tp62pkShkcdHu2rgREUGdk8fryL_ziJsItXsfgYUQ,6494
 agent/callbacks/pii_anonymization.py,sha256=NEkUTUjQBi82nqus7kT-1E4RaeQ2hQrY7YCnKndLhP8,3272
 agent/callbacks/telemetry.py,sha256=RbUDhE41mTi8g9hNre0EpltK_NUZkLj8buJLWBzs0Ek,7363
 agent/callbacks/trajectory_saver.py,sha256=rslgg4Ak7JHSNmmJgANRQ5TsUYWGuUJDZ6amureaz_o,15963
@@ -45,7 +45,7 @@ agent/ui/__main__.py,sha256=vudWXYvGM0aNT5aZ94HPtGW8YXOZ4cLXepHyhUM_k1g,73
 agent/ui/gradio/__init__.py,sha256=yv4Mrfo-Sj2U5sVn_UJHAuwYCezo-5O4ItR2C9jzNko,145
 agent/ui/gradio/app.py,sha256=Ol97YEbwREZZQ9_PMjVHlfOcu9BGsawxgAGAm79hT80,9117
 agent/ui/gradio/ui_components.py,sha256=dJUvKDmc1oSejtoR_gU_oWWYwxaOOQyPloSYRGMrUCQ,36068
-cua_agent-0.4.23.dist-info/METADATA,sha256=pBBwUDqUNHDWsNyueuDPr3N3OMHWvfTD8uPncS7rWVg,12712
-cua_agent-0.4.23.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
-cua_agent-0.4.23.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
-cua_agent-0.4.23.dist-info/RECORD,,
+cua_agent-0.4.24.dist-info/METADATA,sha256=-yvFHUziugRMdDqtf_NDVnQfcNbHKut_rr-yswIDYkM,12712
+cua_agent-0.4.24.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
+cua_agent-0.4.24.dist-info/entry_points.txt,sha256=6OYgBcLyFCUgeqLgnvMyOJxPCWzgy7se4rLPKtNonMs,34
+cua_agent-0.4.24.dist-info/RECORD,,

{cua_agent-0.4.23.dist-info → cua_agent-0.4.24.dist-info}/WHEEL RENAMED Viewed

File without changes

{cua_agent-0.4.23.dist-info → cua_agent-0.4.24.dist-info}/entry_points.txt RENAMED Viewed

File without changes

cua-agent 0.4.23__py3-none-any.whl → 0.4.24__py3-none-any.whl

Potentially problematic release.

cua-agent 0.4.23py3-none-any.whl → 0.4.24py3-none-any.whl