PyPI - orgo - Versions diffs - 0.0.11__tar.gz → 0.0.12__tar.gz - Mend

orgo 0.0.11tar.gz → 0.0.12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{orgo-0.0.11 → orgo-0.0.12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: orgo
-Version: 0.0.11
+Version: 0.0.12
 Summary: Computers for AI agents
 Author: Orgo Team
 License: MIT

{orgo-0.0.11 → orgo-0.0.12}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "orgo"
-version = "0.0.11"
+version = "0.0.12"
 description = "Computers for AI agents"
 authors = [{name = "Orgo Team"}]
 license = {text = "MIT"}

{orgo-0.0.11 → orgo-0.0.12}/src/orgo/prompt.py RENAMED Viewed

@@ -3,6 +3,7 @@ Prompt module for interacting with virtual computers using AI models.
 """
 import os
+import base64
 from typing import Dict, List, Any, Optional, Callable, Union, Protocol
@@ -105,7 +106,14 @@ class AnthropicProvider:
   - File browser items: DOUBLE-CLICK to open folders and files
   - When submitting, use the 'Enter' key, not the 'Return' key.
 * If you see an icon on the desktop that you need to open, ALWAYS use the double_click action, never use left_click.
-</UBUNTU_DESKTOP_GUIDELINES>"""
+</UBUNTU_DESKTOP_GUIDELINES>
+<SCREENSHOT_GUIDELINES>
+* Be mindful of how many screenshots you take - they consume significant memory.
+* Only take screenshots when you need to see the current state of the screen.
+* Try to batch multiple actions before taking another screenshot.
+* For better performance, limit the number of screenshots you take.
+</SCREENSHOT_GUIDELINES>"""
         try:
             # Define the computer tool per Anthropic's documentation
@@ -138,9 +146,9 @@ class AnthropicProvider:
             while iteration < max_iterations:
                 iteration += 1
-                # Prune old screenshots if we've exceeded our limit
+                # Filter to keep only the N most recent screenshots
                 if screenshot_count > max_saved_screenshots:
-                    self._prune_old_screenshots(messages, screenshot_count - max_saved_screenshots)
+                    self._filter_to_n_most_recent_images(messages, max_saved_screenshots)
                     screenshot_count = max_saved_screenshots
                 # Create the request parameters
@@ -161,7 +169,20 @@ class AnthropicProvider:
                     }
                 # Create message request to Claude
-                response = client.beta.messages.create(**request_params)
+                try:
+                    response = client.beta.messages.create(**request_params)
+                except Exception as e:
+                    if "base64" in str(e).lower():
+                        # If we get a base64 error, try again after more aggressively filtering images
+                        if callback:
+                            callback("error", f"Base64 error detected. Attempting recovery...")
+                        # Remove all but the most recent image and try again
+                        self._filter_to_n_most_recent_images(messages, 1)
+                        response = client.beta.messages.create(**request_params)
+                    else:
+                        # Not a base64 error, re-raise
+                        raise
                 # Extract the content from the response
                 response_content = response.content
@@ -226,49 +247,57 @@ class AnthropicProvider:
             if callback:
                 callback("error", str(e))
             raise
-    def _prune_old_screenshots(self, messages: List[Dict[str, Any]], num_to_prune: int):
+    def _filter_to_n_most_recent_images(self, messages: List[Dict[str, Any]], max_images: int):
         """
-        Remove old screenshots from the conversation history.
+        Keep only the N most recent images in the conversation history.
         Args:
             messages: The conversation history
-            num_to_prune: Number of screenshots to remove
+            max_images: Maximum number of images to keep
         """
-        screenshots_pruned = 0
+        # Find all the image blocks in the conversation history
+        image_blocks = []
-        # Start from the beginning of the messages (excluding the first user message)
-        for i in range(1, len(messages)):
-            if messages[i]["role"] != "user":
+        for msg_idx, msg in enumerate(messages):
+            if msg["role"] != "user":
                 continue
-            content = messages[i]["content"]
+            content = msg.get("content", [])
             if not isinstance(content, list):
                 continue
-            # Look for tool_result blocks in the content
-            for j, block in enumerate(content):
+            for content_idx, block in enumerate(content):
                 if not isinstance(block, dict):
                     continue
                 if block.get("type") != "tool_result":
                     continue
-                # Check if this tool_result contains an image
                 block_content = block.get("content", [])
-                for k, content_item in enumerate(block_content):
+                for content_item_idx, content_item in enumerate(block_content):
                     if not isinstance(content_item, dict):
                         continue
-                    if content_item.get("type") == "image":
-                        # This is a screenshot, remove it
-                        if "source" in content_item and "data" in content_item["source"]:
-                            # Replace the base64 data with a placeholder to save space
-                            content_item["source"]["data"] = "[IMAGE DATA REMOVED]"
-                            screenshots_pruned += 1
-                            if screenshots_pruned >= num_to_prune:
-                                return
+                    if content_item.get("type") == "image" and "source" in content_item:
+                        image_blocks.append({
+                            "msg_idx": msg_idx,
+                            "content_idx": content_idx,
+                            "block": block,
+                            "content_item_idx": content_item_idx,
+                            "content_item": content_item
+                        })
+        # If we have more images than our limit, remove the oldest ones
+        if len(image_blocks) > max_images:
+            # Keep only the most recent ones (which are at the end of the list)
+            images_to_remove = image_blocks[:-max_images]
+            for img_block in images_to_remove:
+                content_item = img_block["content_item"]
+                if "source" in content_item and "data" in content_item["source"]:
+                    # Replace the base64 data with a placeholder
+                    content_item["source"]["data"] = "[IMAGE DATA REMOVED]"
     def _execute_tool(self,
                       computer_id: str,

{orgo-0.0.11 → orgo-0.0.12}/src/orgo.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: orgo
-Version: 0.0.11
+Version: 0.0.12
 Summary: Computers for AI agents
 Author: Orgo Team
 License: MIT