PyPI - cua-agent - Versions diffs - 0.1.41__tar.gz → 0.1.43__tar.gz - Mend

cua-agent 0.1.41tar.gz → 0.1.43tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cua-agent might be problematic. Click here for more details.

Files changed (85) hide show

{cua_agent-0.1.41 → cua_agent-0.1.43}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: cua-agent
-Version: 0.1.41
+Version: 0.1.43
 Summary: CUA (Computer Use) Agent for AI-driven computer interaction
 Author-Email: TryCua <gh@trycua.com>
 Requires-Python: >=3.10

{cua_agent-0.1.41 → cua_agent-0.1.43}/agent/core/base.py RENAMED Viewed

@@ -131,6 +131,15 @@ class BaseLoop(ABC):
             An async generator that yields agent responses
         """
         raise NotImplementedError
+    @abstractmethod
+    async def cancel(self) -> None:
+        """Cancel the currently running agent loop task.
+        This method should stop any ongoing processing in the agent loop
+        and clean up resources appropriately.
+        """
+        raise NotImplementedError
     ###########################################
     # EXPERIMENT AND TRAJECTORY MANAGEMENT

{cua_agent-0.1.41 → cua_agent-0.1.43}/agent/providers/anthropic/loop.py RENAMED Viewed

@@ -101,6 +101,7 @@ class AnthropicLoop(BaseLoop):
         self.tool_manager = None
         self.callback_manager = None
         self.queue = asyncio.Queue()  # Initialize queue
+        self.loop_task = None  # Store the loop task for cancellation
         # Initialize handlers
         self.api_handler = AnthropicAPIHandler(self)
@@ -169,7 +170,7 @@ class AnthropicLoop(BaseLoop):
                 logger.info("Client initialized successfully")
             # Start loop in background task
-            loop_task = asyncio.create_task(self._run_loop(queue, messages))
+            self.loop_task = asyncio.create_task(self._run_loop(queue, messages))
             # Process and yield messages as they arrive
             while True:
@@ -184,7 +185,7 @@ class AnthropicLoop(BaseLoop):
                     continue
             # Wait for loop to complete
-            await loop_task
+            await self.loop_task
             # Send completion message
             yield {
@@ -200,6 +201,31 @@ class AnthropicLoop(BaseLoop):
                 "content": f"Error: {str(e)}",
                 "metadata": {"title": "❌ Error"},
             }
+    async def cancel(self) -> None:
+        """Cancel the currently running agent loop task.
+        This method stops the ongoing processing in the agent loop
+        by cancelling the loop_task if it exists and is running.
+        """
+        if self.loop_task and not self.loop_task.done():
+            logger.info("Cancelling Anthropic loop task")
+            self.loop_task.cancel()
+            try:
+                # Wait for the task to be cancelled with a timeout
+                await asyncio.wait_for(self.loop_task, timeout=2.0)
+            except asyncio.TimeoutError:
+                logger.warning("Timeout while waiting for loop task to cancel")
+            except asyncio.CancelledError:
+                logger.info("Loop task cancelled successfully")
+            except Exception as e:
+                logger.error(f"Error while cancelling loop task: {str(e)}")
+            finally:
+                # Put None in the queue to signal any waiting consumers to stop
+                await self.queue.put(None)
+                logger.info("Anthropic loop task cancelled")
+        else:
+            logger.info("No active Anthropic loop task to cancel")
     ###########################################
     # AGENT LOOP IMPLEMENTATION

{cua_agent-0.1.41 → cua_agent-0.1.43}/agent/providers/omni/loop.py RENAMED Viewed

@@ -105,6 +105,7 @@ class OmniLoop(BaseLoop):
         # Set API client attributes
         self.client = None
         self.retry_count = 0
+        self.loop_task = None  # Store the loop task for cancellation
         # Initialize handlers
         self.api_handler = OmniAPIHandler(loop=self)
@@ -580,10 +581,55 @@ class OmniLoop(BaseLoop):
         Yields:
             Agent response format
         """
-        # Initialize the message manager with the provided messages
-        self.message_manager.messages = messages.copy()
-        logger.info(f"Starting OmniLoop run with {len(self.message_manager.messages)} messages")
+        try:
+            logger.info(f"Starting OmniLoop run with {len(messages)} messages")
+            # Initialize the message manager with the provided messages
+            self.message_manager.messages = messages.copy()
+            # Create queue for response streaming
+            queue = asyncio.Queue()
+            # Start loop in background task
+            self.loop_task = asyncio.create_task(self._run_loop(queue, messages))
+            # Process and yield messages as they arrive
+            while True:
+                try:
+                    item = await queue.get()
+                    if item is None:  # Stop signal
+                        break
+                    yield item
+                    queue.task_done()
+                except Exception as e:
+                    logger.error(f"Error processing queue item: {str(e)}")
+                    continue
+            # Wait for loop to complete
+            await self.loop_task
+            # Send completion message
+            yield {
+                "role": "assistant",
+                "content": "Task completed successfully.",
+                "metadata": {"title": "✅ Complete"},
+            }
+        except Exception as e:
+            logger.error(f"Error in run method: {str(e)}")
+            yield {
+                "role": "assistant",
+                "content": f"Error: {str(e)}",
+                "metadata": {"title": "❌ Error"},
+            }
+    async def _run_loop(self, queue: asyncio.Queue, messages: List[Dict[str, Any]]) -> None:
+        """Internal method to run the agent loop with provided messages.
+        Args:
+            queue: Queue to put responses into
+            messages: List of messages in standard OpenAI format
+        """
         # Continue running until explicitly told to stop
         running = True
         turn_created = False
@@ -673,8 +719,8 @@ class OmniLoop(BaseLoop):
                 # Log standardized response for ease of parsing
                 self._log_api_call("agent_response", request=None, response=openai_compatible_response)
-                # Yield the response to the caller
-                yield openai_compatible_response
+                # Put the response in the queue
+                await queue.put(openai_compatible_response)
                 # Check if we should continue this conversation
                 running = should_continue
@@ -688,20 +734,47 @@ class OmniLoop(BaseLoop):
             except Exception as e:
                 attempt += 1
-                error_msg = f"Error in run method (attempt {attempt}/{max_attempts}): {str(e)}"
+                error_msg = f"Error in _run_loop method (attempt {attempt}/{max_attempts}): {str(e)}"
                 logger.error(error_msg)
                 # If this is our last attempt, provide more info about the error
                 if attempt >= max_attempts:
                     logger.error(f"Maximum retry attempts reached. Last error was: {str(e)}")
-                yield {
-                    "error": str(e),
+                await queue.put({
+                    "role": "assistant",
+                    "content": f"Error: {str(e)}",
                     "metadata": {"title": "❌ Error"},
-                }
+                })
                 # Create a brief delay before retrying
                 await asyncio.sleep(1)
+        finally:
+            # Signal that we're done
+            await queue.put(None)
+    async def cancel(self) -> None:
+        """Cancel the currently running agent loop task.
+        This method stops the ongoing processing in the agent loop
+        by cancelling the loop_task if it exists and is running.
+        """
+        if self.loop_task and not self.loop_task.done():
+            logger.info("Cancelling Omni loop task")
+            self.loop_task.cancel()
+            try:
+                # Wait for the task to be cancelled with a timeout
+                await asyncio.wait_for(self.loop_task, timeout=2.0)
+            except asyncio.TimeoutError:
+                logger.warning("Timeout while waiting for loop task to cancel")
+            except asyncio.CancelledError:
+                logger.info("Loop task cancelled successfully")
+            except Exception as e:
+                logger.error(f"Error while cancelling loop task: {str(e)}")
+            finally:
+                logger.info("Omni loop task cancelled")
+        else:
+            logger.info("No active Omni loop task to cancel")
     async def process_model_response(self, response_text: str) -> Optional[Dict[str, Any]]:
         """Process model response to extract tool calls.

{cua_agent-0.1.41 → cua_agent-0.1.43}/agent/providers/openai/loop.py RENAMED Viewed

@@ -87,6 +87,7 @@ class OpenAILoop(BaseLoop):
         self.acknowledge_safety_check_callback = acknowledge_safety_check_callback
         self.queue = asyncio.Queue()  # Initialize queue
         self.last_response_id = None  # Store the last response ID across runs
+        self.loop_task = None  # Store the loop task for cancellation
         # Initialize handlers
         self.api_handler = OpenAIAPIHandler(self)
@@ -132,28 +133,28 @@ class OpenAILoop(BaseLoop):
             logger.info("Starting OpenAI loop run")
             # Create queue for response streaming
-            queue = asyncio.Queue()
+            self.queue = asyncio.Queue()
             # Ensure tool manager is initialized
             await self.tool_manager.initialize()
             # Start loop in background task
-            loop_task = asyncio.create_task(self._run_loop(queue, messages))
+            self.loop_task = asyncio.create_task(self._run_loop(self.queue, messages))
             # Process and yield messages as they arrive
             while True:
                 try:
-                    item = await queue.get()
+                    item = await self.queue.get()
                     if item is None:  # Stop signal
                         break
                     yield item
-                    queue.task_done()
+                    self.queue.task_done()
                 except Exception as e:
                     logger.error(f"Error processing queue item: {str(e)}")
                     continue
             # Wait for loop to complete
-            await loop_task
+            await self.loop_task
             # Send completion message
             yield {
@@ -169,6 +170,31 @@ class OpenAILoop(BaseLoop):
                 "content": f"Error: {str(e)}",
                 "metadata": {"title": "❌ Error"},
             }
+    async def cancel(self) -> None:
+        """Cancel the currently running agent loop task.
+        This method stops the ongoing processing in the agent loop
+        by cancelling the loop_task if it exists and is running.
+        """
+        if self.loop_task and not self.loop_task.done():
+            logger.info("Cancelling OpenAI loop task")
+            self.loop_task.cancel()
+            try:
+                # Wait for the task to be cancelled with a timeout
+                await asyncio.wait_for(self.loop_task, timeout=2.0)
+            except asyncio.TimeoutError:
+                logger.warning("Timeout while waiting for loop task to cancel")
+            except asyncio.CancelledError:
+                logger.info("Loop task cancelled successfully")
+            except Exception as e:
+                logger.error(f"Error while cancelling loop task: {str(e)}")
+            finally:
+                # Put None in the queue to signal any waiting consumers to stop
+                await self.queue.put(None)
+                logger.info("OpenAI loop task cancelled")
+        else:
+            logger.info("No active OpenAI loop task to cancel")
     ###########################################
     # AGENT LOOP IMPLEMENTATION

{cua_agent-0.1.41 → cua_agent-0.1.43}/agent/providers/uitars/loop.py RENAMED Viewed

@@ -93,6 +93,7 @@ class UITARSLoop(BaseLoop):
         # Set API client attributes
         self.client = None
         self.retry_count = 0
+        self.loop_task = None  # Store the loop task for cancellation
         # Initialize visualization helper
         self.viz_helper = VisualizationHelper(agent=self)
@@ -462,10 +463,55 @@ class UITARSLoop(BaseLoop):
         Yields:
             Agent response format
         """
-        # Initialize the message manager with the provided messages
-        self.message_manager.messages = messages.copy()
-        logger.info(f"Starting UITARSLoop run with {len(self.message_manager.messages)} messages")
+        try:
+            logger.info(f"Starting UITARSLoop run with {len(messages)} messages")
+            # Initialize the message manager with the provided messages
+            self.message_manager.messages = messages.copy()
+            # Create queue for response streaming
+            queue = asyncio.Queue()
+            # Start loop in background task
+            self.loop_task = asyncio.create_task(self._run_loop(queue, messages))
+            # Process and yield messages as they arrive
+            while True:
+                try:
+                    item = await queue.get()
+                    if item is None:  # Stop signal
+                        break
+                    yield item
+                    queue.task_done()
+                except Exception as e:
+                    logger.error(f"Error processing queue item: {str(e)}")
+                    continue
+            # Wait for loop to complete
+            await self.loop_task
+            # Send completion message
+            yield {
+                "role": "assistant",
+                "content": "Task completed successfully.",
+                "metadata": {"title": "✅ Complete"},
+            }
+        except Exception as e:
+            logger.error(f"Error in run method: {str(e)}")
+            yield {
+                "role": "assistant",
+                "content": f"Error: {str(e)}",
+                "metadata": {"title": "❌ Error"},
+            }
+    async def _run_loop(self, queue: asyncio.Queue, messages: List[Dict[str, Any]]) -> None:
+        """Internal method to run the agent loop with provided messages.
+        Args:
+            queue: Queue to put responses into
+            messages: List of messages in standard OpenAI format
+        """
         # Continue running until explicitly told to stop
         running = True
         turn_created = False
@@ -475,88 +521,117 @@ class UITARSLoop(BaseLoop):
         attempt = 0
         max_attempts = 3
-        while running and attempt < max_attempts:
-            try:
-                # Create a new turn directory if it's not already created
-                if not turn_created:
-                    self._create_turn_dir()
-                    turn_created = True
+        try:
+            while running and attempt < max_attempts:
+                try:
+                    # Create a new turn directory if it's not already created
+                    if not turn_created:
+                        self._create_turn_dir()
+                        turn_created = True
-                # Ensure client is initialized
-                if self.client is None:
-                    logger.info("Initializing client...")
-                    await self.initialize_client()
+                    # Ensure client is initialized
                     if self.client is None:
-                        raise RuntimeError("Failed to initialize client")
-                    logger.info("Client initialized successfully")
-                # Get current screen
-                base64_screenshot = await self._get_current_screen()
-                # Add screenshot to message history
-                self.message_manager.add_user_message(
-                    [
-                        {
-                            "type": "image_url",
-                            "image_url": {"url": f"data:image/png;base64,{base64_screenshot}"},
-                        }
-                    ]
-                )
-                logger.info("Added screenshot to message history")
+                        logger.info("Initializing client...")
+                        await self.initialize_client()
+                        if self.client is None:
+                            raise RuntimeError("Failed to initialize client")
+                        logger.info("Client initialized successfully")
+                    # Get current screen
+                    base64_screenshot = await self._get_current_screen()
+                    # Add screenshot to message history
+                    self.message_manager.add_user_message(
+                        [
+                            {
+                                "type": "image_url",
+                                "image_url": {"url": f"data:image/png;base64,{base64_screenshot}"},
+                            }
+                        ]
+                    )
+                    logger.info("Added screenshot to message history")
-                # Get system prompt
-                system_prompt = self._get_system_prompt()
+                    # Get system prompt
+                    system_prompt = self._get_system_prompt()
-                # Make API call with retries
-                response = await self._make_api_call(
-                    self.message_manager.messages, system_prompt
-                )
+                    # Make API call with retries
+                    response = await self._make_api_call(
+                        self.message_manager.messages, system_prompt
+                    )
-                # Handle the response (may execute actions)
-                # Returns: (should_continue, action_screenshot_saved)
-                should_continue, new_screenshot_saved = await self._handle_response(
-                    response, self.message_manager.messages
-                )
+                    # Handle the response (may execute actions)
+                    # Returns: (should_continue, action_screenshot_saved)
+                    should_continue, new_screenshot_saved = await self._handle_response(
+                        response, self.message_manager.messages
+                    )
-                # Update whether an action screenshot was saved this turn
-                action_screenshot_saved = action_screenshot_saved or new_screenshot_saved
-                agent_response = await to_agent_response_format(
-                    response,
-                    messages,
-                    model=self.model,
-                )
-                # Log standardized response for ease of parsing
-                self._log_api_call("agent_response", request=None, response=agent_response)
-                yield agent_response
-                # Check if we should continue this conversation
-                running = should_continue
+                    # Update whether an action screenshot was saved this turn
+                    action_screenshot_saved = action_screenshot_saved or new_screenshot_saved
+                    agent_response = await to_agent_response_format(
+                        response,
+                        messages,
+                        model=self.model,
+                    )
+                    # Log standardized response for ease of parsing
+                    self._log_api_call("agent_response", request=None, response=agent_response)
+                    # Put the response in the queue
+                    await queue.put(agent_response)
+                    # Check if we should continue this conversation
+                    running = should_continue
-                # Create a new turn directory if we're continuing
-                if running:
-                    turn_created = False
+                    # Create a new turn directory if we're continuing
+                    if running:
+                        turn_created = False
-                # Reset attempt counter on success
-                attempt = 0
+                    # Reset attempt counter on success
+                    attempt = 0
+                except Exception as e:
+                    attempt += 1
+                    error_msg = f"Error in run method (attempt {attempt}/{max_attempts}): {str(e)}"
+                    logger.error(error_msg)
+                    # If this is our last attempt, provide more info about the error
+                    if attempt >= max_attempts:
+                        logger.error(f"Maximum retry attempts reached. Last error was: {str(e)}")
+                    await queue.put({
+                        "role": "assistant",
+                        "content": f"Error: {str(e)}",
+                        "metadata": {"title": "❌ Error"},
+                    })
+                    # Create a brief delay before retrying
+                    await asyncio.sleep(1)
+        finally:
+            # Signal that we're done
+            await queue.put(None)
+    async def cancel(self) -> None:
+        """Cancel the currently running agent loop task.
+        This method stops the ongoing processing in the agent loop
+        by cancelling the loop_task if it exists and is running.
+        """
+        if self.loop_task and not self.loop_task.done():
+            logger.info("Cancelling UITARS loop task")
+            self.loop_task.cancel()
+            try:
+                # Wait for the task to be cancelled with a timeout
+                await asyncio.wait_for(self.loop_task, timeout=2.0)
+            except asyncio.TimeoutError:
+                logger.warning("Timeout while waiting for loop task to cancel")
+            except asyncio.CancelledError:
+                logger.info("Loop task cancelled successfully")
             except Exception as e:
-                attempt += 1
-                error_msg = f"Error in run method (attempt {attempt}/{max_attempts}): {str(e)}"
-                logger.error(error_msg)
-                # If this is our last attempt, provide more info about the error
-                if attempt >= max_attempts:
-                    logger.error(f"Maximum retry attempts reached. Last error was: {str(e)}")
-                yield {
-                    "role": "assistant",
-                    "content": f"Error: {str(e)}",
-                    "metadata": {"title": "❌ Error"},
-                }
-                # Create a brief delay before retrying
-                await asyncio.sleep(1)
+                logger.error(f"Error while cancelling loop task: {str(e)}")
+            finally:
+                logger.info("UITARS loop task cancelled")
+        else:
+            logger.info("No active UITARS loop task to cancel")
     ###########################################
     # UTILITY METHODS

{cua_agent-0.1.41 → cua_agent-0.1.43}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
 [project]
 name = "cua-agent"
-version = "0.1.41"
+version = "0.1.43"
 description = "CUA (Computer Use) Agent for AI-driven computer interaction"
 readme = "README.md"
 authors = [
@@ -109,7 +109,7 @@ target-version = [
 [tool.ruff]
 line-length = 100
-target-version = "0.1.41"
+target-version = "0.1.43"
 select = [
     "E",
     "F",
@@ -123,7 +123,7 @@ docstring-code-format = true
 [tool.mypy]
 strict = true
-python_version = "0.1.41"
+python_version = "0.1.43"
 ignore_missing_imports = true
 disallow_untyped_defs = true
 check_untyped_defs = true