PyPI - camel-ai - Versions diffs - 0.2.71a4__py3-none-any.whl → 0.2.71a6__py3-none-any.whl - Mend

camel-ai 0.2.71a4py3-none-any.whl → 0.2.71a6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (36) hide show

camel/__init__.py +1 -1
camel/agents/chat_agent.py +1533 -135
camel/agents/repo_agent.py +2 -1
camel/benchmarks/browsecomp.py +6 -6
camel/logger.py +1 -1
camel/messages/base.py +12 -1
camel/models/azure_openai_model.py +96 -7
camel/models/base_model.py +68 -10
camel/models/deepseek_model.py +5 -0
camel/models/gemini_model.py +5 -0
camel/models/litellm_model.py +48 -16
camel/models/model_manager.py +24 -6
camel/models/openai_compatible_model.py +109 -5
camel/models/openai_model.py +117 -8
camel/societies/workforce/prompts.py +68 -5
camel/societies/workforce/role_playing_worker.py +65 -7
camel/societies/workforce/single_agent_worker.py +72 -18
camel/societies/workforce/structured_output_handler.py +500 -0
camel/societies/workforce/utils.py +67 -2
camel/societies/workforce/workforce.py +527 -114
camel/societies/workforce/workforce_logger.py +0 -8
camel/tasks/task.py +3 -1
camel/toolkits/__init__.py +2 -0
camel/toolkits/file_write_toolkit.py +526 -121
camel/toolkits/hybrid_browser_toolkit/actions.py +235 -60
camel/toolkits/hybrid_browser_toolkit/agent.py +25 -8
camel/toolkits/hybrid_browser_toolkit/browser_session.py +574 -164
camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +996 -126
camel/toolkits/hybrid_browser_toolkit/stealth_config.py +116 -0
camel/toolkits/hybrid_browser_toolkit/stealth_script.js +0 -0
camel/toolkits/message_agent_toolkit.py +608 -0
camel/toolkits/note_taking_toolkit.py +7 -13
{camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a6.dist-info}/METADATA +6 -4
{camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a6.dist-info}/RECORD +36 -32
{camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a6.dist-info}/WHEEL +0 -0
{camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a6.dist-info}/licenses/LICENSE +0 -0

camel/societies/workforce/workforce.py CHANGED Viewed

@@ -14,6 +14,7 @@
 from __future__ import annotations
 import asyncio
+import concurrent.futures
 import json
 import time
 import uuid
@@ -28,6 +29,7 @@ from typing import (
     Optional,
     Set,
     Tuple,
+    Union,
 )
 from colorama import Fore
@@ -40,12 +42,19 @@ from camel.societies.workforce.base import BaseNode
 from camel.societies.workforce.prompts import (
     ASSIGN_TASK_PROMPT,
     CREATE_NODE_PROMPT,
+    FAILURE_ANALYSIS_PROMPT,
     WF_TASK_DECOMPOSE_PROMPT,
 )
 from camel.societies.workforce.role_playing_worker import RolePlayingWorker
 from camel.societies.workforce.single_agent_worker import SingleAgentWorker
+from camel.societies.workforce.structured_output_handler import (
+    StructuredOutputHandler,
+)
 from camel.societies.workforce.task_channel import TaskChannel
 from camel.societies.workforce.utils import (
+    FailureContext,
+    RecoveryDecision,
+    RecoveryStrategy,
     TaskAssignment,
     TaskAssignResult,
     WorkerConf,
@@ -162,6 +171,14 @@ class Workforce(BaseNode):
             SingleAgentWorker instances; RolePlayingWorker and nested
             Workforce instances do not participate in memory sharing.
             (default: :obj:`False`)
+        use_structured_output_handler (bool, optional): Whether to use the
+            structured output handler instead of native structured output.
+            When enabled, the workforce will use prompts with structured
+            output instructions and regex extraction to parse responses.
+            This ensures compatibility with agents that don't reliably
+            support native structured output. When disabled, the workforce
+            uses the native response_format parameter.
+            (default: :obj:`True`)
     Example:
         >>> import asyncio
@@ -212,13 +229,19 @@ class Workforce(BaseNode):
         new_worker_agent: Optional[ChatAgent] = None,
         graceful_shutdown_timeout: float = 15.0,
         share_memory: bool = False,
+        use_structured_output_handler: bool = True,
     ) -> None:
         super().__init__(description)
-        self._child_listening_tasks: Deque[asyncio.Task] = deque()
+        self._child_listening_tasks: Deque[
+            Union[asyncio.Task, concurrent.futures.Future]
+        ] = deque()
         self._children = children or []
         self.new_worker_agent = new_worker_agent
         self.graceful_shutdown_timeout = graceful_shutdown_timeout
         self.share_memory = share_memory
+        self.use_structured_output_handler = use_structured_output_handler
+        if self.use_structured_output_handler:
+            self.structured_handler = StructuredOutputHandler()
         self.metrics_logger = WorkforceLogger(workforce_id=self.node_id)
         self._task: Optional[Task] = None
         self._pending_tasks: Deque[Task] = deque()
@@ -611,6 +634,31 @@ class Workforce(BaseNode):
             # Remove original task dependencies as it's now decomposed
             del self._task_dependencies[original_task_id]
+    def _increment_in_flight_tasks(self, task_id: str) -> None:
+        r"""Safely increment the in-flight tasks counter with logging."""
+        self._in_flight_tasks += 1
+        logger.debug(
+            f"Incremented in-flight tasks for {task_id}. "
+            f"Count: {self._in_flight_tasks}"
+        )
+    def _decrement_in_flight_tasks(
+        self, task_id: str, context: str = ""
+    ) -> None:
+        r"""Safely decrement the in-flight tasks counter with safety checks."""
+        if self._in_flight_tasks > 0:
+            self._in_flight_tasks -= 1
+            logger.debug(
+                f"Decremented in-flight tasks for {task_id} ({context}). "
+                f"Count: {self._in_flight_tasks}"
+            )
+        else:
+            logger.debug(
+                f"Attempted to decrement in-flight tasks for {task_id} "
+                f"({context}) but counter is already 0. "
+                f"Counter: {self._in_flight_tasks}"
+            )
     def _cleanup_task_tracking(self, task_id: str) -> None:
         r"""Clean up tracking data for a task to prevent memory leaks.
@@ -634,9 +682,6 @@ class Workforce(BaseNode):
         )
         self.task_agent.reset()
         subtasks = task.decompose(self.task_agent, decompose_prompt)
-        task.subtasks = subtasks
-        for subtask in subtasks:
-            subtask.parent = task
         # Update dependency tracking for decomposed task
         if subtasks:
@@ -644,6 +689,122 @@ class Workforce(BaseNode):
         return subtasks
+    def _analyze_failure(
+        self, task: Task, error_message: str
+    ) -> RecoveryDecision:
+        r"""Analyze a task failure and decide on the best recovery strategy.
+        Args:
+            task (Task): The failed task
+            error_message (str): The error message from the failure
+        Returns:
+            RecoveryDecision: The decided recovery strategy with reasoning
+        """
+        # First, do a quick smart analysis based on error patterns
+        error_msg_lower = error_message.lower()
+        if any(
+            keyword in error_msg_lower
+            for keyword in [
+                'connection',
+                'network',
+                'server disconnected',
+                'timeout',
+                'apiconnectionerror',
+            ]
+        ):
+            return RecoveryDecision(
+                strategy=RecoveryStrategy.RETRY,
+                reasoning="Network/connection error detected, retrying task",
+                modified_task_content=None,
+            )
+        # Create failure context
+        failure_context = FailureContext(
+            task_id=task.id,
+            task_content=task.content,
+            failure_count=task.failure_count,
+            error_message=error_message,
+            worker_id=task.assigned_worker_id,
+            task_depth=task.get_depth(),
+            additional_info=str(task.additional_info)
+            if task.additional_info
+            else None,
+        )
+        # Format the analysis prompt
+        analysis_prompt = FAILURE_ANALYSIS_PROMPT.format(
+            task_id=failure_context.task_id,
+            task_content=failure_context.task_content,
+            failure_count=failure_context.failure_count,
+            error_message=failure_context.error_message,
+            worker_id=failure_context.worker_id or "unknown",
+            task_depth=failure_context.task_depth,
+            additional_info=failure_context.additional_info or "None",
+        )
+        try:
+            # Check if we should use structured handler
+            if self.use_structured_output_handler:
+                # Use structured handler
+                enhanced_prompt = (
+                    self.structured_handler.generate_structured_prompt(
+                        base_prompt=analysis_prompt,
+                        schema=RecoveryDecision,
+                        examples=[
+                            {
+                                "strategy": "RETRY",
+                                "reasoning": "Temporary network error, "
+                                "worth retrying",
+                                "modified_task_content": None,
+                            }
+                        ],
+                    )
+                )
+                self.task_agent.reset()
+                response = self.task_agent.step(enhanced_prompt)
+                result = self.structured_handler.parse_structured_response(
+                    response.msg.content if response.msg else "",
+                    schema=RecoveryDecision,
+                    fallback_values={
+                        "strategy": RecoveryStrategy.RETRY,
+                        "reasoning": "Defaulting to retry due to parsing "
+                        "issues",
+                        "modified_task_content": None,
+                    },
+                )
+                # Ensure we return a RecoveryDecision instance
+                if isinstance(result, RecoveryDecision):
+                    return result
+                elif isinstance(result, dict):
+                    return RecoveryDecision(**result)
+                else:
+                    return RecoveryDecision(
+                        strategy=RecoveryStrategy.RETRY,
+                        reasoning="Failed to parse recovery decision",
+                        modified_task_content=None,
+                    )
+            else:
+                # Use existing native structured output code
+                self.task_agent.reset()
+                response = self.task_agent.step(
+                    analysis_prompt, response_format=RecoveryDecision
+                )
+                return response.msg.parsed
+        except Exception as e:
+            logger.warning(
+                f"Error during failure analysis: {e}, defaulting to RETRY"
+            )
+            return RecoveryDecision(
+                strategy=RecoveryStrategy.RETRY,
+                reasoning=f"Analysis failed due to error: {e!s}, "
+                f"defaulting to retry",
+                modified_task_content=None,
+            )
     # Human intervention methods
     async def _async_pause(self) -> None:
         r"""Async implementation of pause to run on the event loop."""
@@ -1029,9 +1190,6 @@ class Workforce(BaseNode):
             needed
             >>> print(result.result)
         """
-        import asyncio
-        import concurrent.futures
         # Check if we're already in an event loop
         try:
             current_loop = asyncio.get_running_loop()
@@ -1206,7 +1364,42 @@ class Workforce(BaseNode):
         return self._task
-    @check_if_running(False)
+    def _start_child_node_when_paused(
+        self, start_coroutine: Coroutine
+    ) -> None:
+        r"""Helper to start a child node when workforce is paused.
+        Args:
+            start_coroutine: The coroutine to start (e.g., worker_node.start())
+        """
+        if self._state == WorkforceState.PAUSED and hasattr(
+            self, '_child_listening_tasks'
+        ):
+            if self._loop and not self._loop.is_closed():
+                # Use thread-safe coroutine execution for dynamic addition
+                child_task: Union[asyncio.Task, concurrent.futures.Future]
+                try:
+                    # Check if we're in the same thread as the loop
+                    current_loop = asyncio.get_running_loop()
+                    if current_loop is self._loop:
+                        # Same loop context - use create_task
+                        child_task = self._loop.create_task(start_coroutine)
+                    else:
+                        # Different loop context - use thread-safe approach
+                        child_task = asyncio.run_coroutine_threadsafe(
+                            start_coroutine, self._loop
+                        )
+                except RuntimeError:
+                    # No running loop in current thread - use thread-safe
+                    # approach
+                    child_task = asyncio.run_coroutine_threadsafe(
+                        start_coroutine, self._loop
+                    )
+                self._child_listening_tasks.append(child_task)
+        else:
+            # Close the coroutine to prevent RuntimeWarning
+            start_coroutine.close()
     def add_single_agent_worker(
         self,
         description: str,
@@ -1214,6 +1407,7 @@ class Workforce(BaseNode):
         pool_max_size: int = DEFAULT_WORKER_POOL_SIZE,
     ) -> Workforce:
         r"""Add a worker node to the workforce that uses a single agent.
+        Can be called when workforce is paused to dynamically add workers.
         Args:
             description (str): Description of the worker node.
@@ -1223,7 +1417,15 @@ class Workforce(BaseNode):
         Returns:
             Workforce: The workforce node itself.
+        Raises:
+            RuntimeError: If called while workforce is running (not paused).
         """
+        if self._state == WorkforceState.RUNNING:
+            raise RuntimeError(
+                "Cannot add workers while workforce is running. "
+                "Pause the workforce first."
+            )
         # Ensure the worker agent shares this workforce's pause control
         self._attach_pause_event_to_agent(worker)
@@ -1231,8 +1433,17 @@ class Workforce(BaseNode):
             description=description,
             worker=worker,
             pool_max_size=pool_max_size,
+            use_structured_output_handler=self.use_structured_output_handler,
         )
         self._children.append(worker_node)
+        # If we have a channel set up, set it for the new worker
+        if hasattr(self, '_channel') and self._channel is not None:
+            worker_node.set_channel(self._channel)
+        # If workforce is paused, start the worker's listening task
+        self._start_child_node_when_paused(worker_node.start())
         if self.metrics_logger:
             self.metrics_logger.log_worker_created(
                 worker_id=worker_node.node_id,
@@ -1241,7 +1452,6 @@ class Workforce(BaseNode):
             )
         return self
-    @check_if_running(False)
     def add_role_playing_worker(
         self,
         description: str,
@@ -1253,6 +1463,7 @@ class Workforce(BaseNode):
         chat_turn_limit: int = 3,
     ) -> Workforce:
         r"""Add a worker node to the workforce that uses `RolePlaying` system.
+        Can be called when workforce is paused to dynamically add workers.
         Args:
             description (str): Description of the node.
@@ -1272,7 +1483,15 @@ class Workforce(BaseNode):
         Returns:
             Workforce: The workforce node itself.
+        Raises:
+            RuntimeError: If called while workforce is running (not paused).
         """
+        if self._state == WorkforceState.RUNNING:
+            raise RuntimeError(
+                "Cannot add workers while workforce is running. "
+                "Pause the workforce first."
+            )
         # Ensure provided kwargs carry pause_event so that internally created
         # ChatAgents (assistant/user/summarizer) inherit it.
         assistant_agent_kwargs = self._ensure_pause_event_in_kwargs(
@@ -1293,8 +1512,17 @@ class Workforce(BaseNode):
             user_agent_kwargs=user_agent_kwargs,
             summarize_agent_kwargs=summarize_agent_kwargs,
             chat_turn_limit=chat_turn_limit,
+            use_structured_output_handler=self.use_structured_output_handler,
         )
         self._children.append(worker_node)
+        # If we have a channel set up, set it for the new worker
+        if hasattr(self, '_channel') and self._channel is not None:
+            worker_node.set_channel(self._channel)
+        # If workforce is paused, start the worker's listening task
+        self._start_child_node_when_paused(worker_node.start())
         if self.metrics_logger:
             self.metrics_logger.log_worker_created(
                 worker_id=worker_node.node_id,
@@ -1303,20 +1531,35 @@ class Workforce(BaseNode):
             )
         return self
-    @check_if_running(False)
     def add_workforce(self, workforce: Workforce) -> Workforce:
         r"""Add a workforce node to the workforce.
+        Can be called when workforce is paused to dynamically add workers.
         Args:
             workforce (Workforce): The workforce node to be added.
         Returns:
             Workforce: The workforce node itself.
+        Raises:
+            RuntimeError: If called while workforce is running (not paused).
         """
+        if self._state == WorkforceState.RUNNING:
+            raise RuntimeError(
+                "Cannot add workers while workforce is running. "
+                "Pause the workforce first."
+            )
         # Align child workforce's pause_event with this one for unified
         # control of worker agents only.
         workforce._pause_event = self._pause_event
         self._children.append(workforce)
+        # If we have a channel set up, set it for the new workforce
+        if hasattr(self, '_channel') and self._channel is not None:
+            workforce.set_channel(self._channel)
+        # If workforce is paused, start the child workforce's listening task
+        self._start_child_node_when_paused(workforce.start())
         return self
     async def _async_reset(self) -> None:
@@ -1443,26 +1686,73 @@ class Workforce(BaseNode):
             )
             prompt = prompt + f"\n\n{feedback}"
-        response = self.coordinator_agent.step(
-            prompt, response_format=TaskAssignResult
-        )
-        if response.msg is None or response.msg.content is None:
-            logger.error(
-                "Coordinator agent returned empty response for task assignment"
+        # Check if we should use structured handler
+        if self.use_structured_output_handler:
+            # Use structured handler for prompt-based extraction
+            enhanced_prompt = (
+                self.structured_handler.generate_structured_prompt(
+                    base_prompt=prompt,
+                    schema=TaskAssignResult,
+                    examples=[
+                        {
+                            "assignments": [
+                                {
+                                    "task_id": "task_1",
+                                    "assignee_id": "worker_123",
+                                    "dependencies": [],
+                                }
+                            ]
+                        }
+                    ],
+                )
             )
-            return TaskAssignResult(assignments=[])
-        try:
-            result_dict = json.loads(response.msg.content, parse_int=str)
-            return TaskAssignResult(**result_dict)
-        except json.JSONDecodeError as e:
-            logger.error(
-                f"JSON parsing error in task assignment: Invalid response "
-                f"format - {e}. Response content: "
-                f"{response.msg.content[:50]}..."
+            # Get response without structured format
+            response = self.coordinator_agent.step(enhanced_prompt)
+            if response.msg is None or response.msg.content is None:
+                logger.error(
+                    "Coordinator agent returned empty response for "
+                    "task assignment"
+                )
+                return TaskAssignResult(assignments=[])
+            # Parse with structured handler
+            result = self.structured_handler.parse_structured_response(
+                response.msg.content,
+                schema=TaskAssignResult,
+                fallback_values={"assignments": []},
+            )
+            # Ensure we return a TaskAssignResult instance
+            if isinstance(result, TaskAssignResult):
+                return result
+            elif isinstance(result, dict):
+                return TaskAssignResult(**result)
+            else:
+                return TaskAssignResult(assignments=[])
+        else:
+            # Use existing native structured output code
+            response = self.coordinator_agent.step(
+                prompt, response_format=TaskAssignResult
             )
-            return TaskAssignResult(assignments=[])
+            if response.msg is None or response.msg.content is None:
+                logger.error(
+                    "Coordinator agent returned empty response for "
+                    "task assignment"
+                )
+                return TaskAssignResult(assignments=[])
+            try:
+                result_dict = json.loads(response.msg.content, parse_int=str)
+                return TaskAssignResult(**result_dict)
+            except json.JSONDecodeError as e:
+                logger.error(
+                    f"JSON parsing error in task assignment: Invalid response "
+                    f"format - {e}. Response content: "
+                    f"{response.msg.content[:50]}..."
+                )
+                return TaskAssignResult(assignments=[])
     def _validate_assignments(
         self, assignments: List[TaskAssignment], valid_ids: Set[str]
@@ -1654,18 +1944,20 @@ class Workforce(BaseNode):
             )
         try:
-            self._in_flight_tasks += 1
             await self._channel.post_task(task, self.node_id, assignee_id)
+            self._increment_in_flight_tasks(task.id)
             logger.debug(
                 f"Posted task {task.id} to {assignee_id}. "
                 f"In-flight tasks: {self._in_flight_tasks}"
             )
         except Exception as e:
-            # Decrement counter if posting failed
-            self._in_flight_tasks -= 1
             logger.error(
                 f"Failed to post task {task.id} to {assignee_id}: {e}"
             )
+            print(
+                f"{Fore.RED}Failed to post task {task.id} to {assignee_id}: "
+                f"{e}{Fore.RESET}"
+            )
     async def _post_dependency(self, dependency: Task) -> None:
         await self._channel.post_dependency(dependency, self.node_id)
@@ -1686,35 +1978,92 @@ class Workforce(BaseNode):
             child_nodes_info=self._get_child_nodes_info(),
             additional_info=task.additional_info,
         )
-        response = self.coordinator_agent.step(
-            prompt, response_format=WorkerConf
-        )
-        if response.msg is None or response.msg.content is None:
-            logger.error(
-                "Coordinator agent returned empty response for worker creation"
-            )
-            # Create a fallback worker configuration
-            new_node_conf = WorkerConf(
-                description=f"Fallback worker for "
-                f"task: {task.content[:50]}...",
-                role="General Assistant",
-                sys_msg="You are a general assistant that can help "
-                "with various tasks.",
+        # Check if we should use structured handler
+        if self.use_structured_output_handler:
+            # Use structured handler
+            enhanced_prompt = (
+                self.structured_handler.generate_structured_prompt(
+                    base_prompt=prompt,
+                    schema=WorkerConf,
+                    examples=[
+                        {
+                            "description": "Data analysis specialist",
+                            "role": "Data Analyst",
+                            "sys_msg": "You are an expert data analyst.",
+                        }
+                    ],
+                )
             )
+            response = self.coordinator_agent.step(enhanced_prompt)
+            if response.msg is None or response.msg.content is None:
+                logger.error(
+                    "Coordinator agent returned empty response for "
+                    "worker creation"
+                )
+                new_node_conf = WorkerConf(
+                    description=f"Fallback worker for task: "
+                    f"{task.content[:50]}...",
+                    role="General Assistant",
+                    sys_msg="You are a general assistant that can help "
+                    "with various tasks.",
+                )
+            else:
+                result = self.structured_handler.parse_structured_response(
+                    response.msg.content,
+                    schema=WorkerConf,
+                    fallback_values={
+                        "description": f"Worker for task: "
+                        f"{task.content[:50]}...",
+                        "role": "Task Specialist",
+                        "sys_msg": f"You are a specialist for: {task.content}",
+                    },
+                )
+                # Ensure we have a WorkerConf instance
+                if isinstance(result, WorkerConf):
+                    new_node_conf = result
+                elif isinstance(result, dict):
+                    new_node_conf = WorkerConf(**result)
+                else:
+                    new_node_conf = WorkerConf(
+                        description=f"Worker for task: {task.content[:50]}...",
+                        role="Task Specialist",
+                        sys_msg=f"You are a specialist for: {task.content}",
+                    )
         else:
-            try:
-                result_dict = json.loads(response.msg.content)
-                new_node_conf = WorkerConf(**result_dict)
-            except json.JSONDecodeError as e:
+            # Use existing native structured output code
+            response = self.coordinator_agent.step(
+                prompt, response_format=WorkerConf
+            )
+            if response.msg is None or response.msg.content is None:
                 logger.error(
-                    f"JSON parsing error in worker creation: Invalid response "
-                    f"format - {e}. Response content: "
-                    f"{response.msg.content[:100]}..."
+                    "Coordinator agent returned empty response for "
+                    "worker creation"
                 )
-                raise RuntimeError(
-                    f"Failed to create worker for task {task.id}: "
-                    f"Coordinator agent returned malformed JSON response. "
+                # Create a fallback worker configuration
+                new_node_conf = WorkerConf(
+                    description=f"Fallback worker for "
+                    f"task: {task.content[:50]}...",
+                    role="General Assistant",
+                    sys_msg="You are a general assistant that can help "
+                    "with various tasks.",
                 )
+            else:
+                try:
+                    result_dict = json.loads(response.msg.content)
+                    new_node_conf = WorkerConf(**result_dict)
+                except json.JSONDecodeError as e:
+                    logger.error(
+                        f"JSON parsing error in worker creation: Invalid "
+                        f"response format - {e}. Response content: "
+                        f"format - {e}. Response content: "
+                        f"{response.msg.content[:100]}..."
+                    )
+                    raise RuntimeError(
+                        f"Failed to create worker for task {task.id}: "
+                        f"Coordinator agent returned malformed JSON response. "
+                    ) from e
         new_agent = await self._create_new_agent(
             new_node_conf.role,
@@ -1725,6 +2074,7 @@ class Workforce(BaseNode):
             description=new_node_conf.description,
             worker=new_agent,
             pool_max_size=DEFAULT_WORKER_POOL_SIZE,
+            use_structured_output_handler=self.use_structured_output_handler,
         )
         new_node.set_channel(self._channel)
@@ -1789,10 +2139,6 @@ class Workforce(BaseNode):
                 timeout=TASK_TIMEOUT_SECONDS,
             )
         except Exception as e:
-            # Decrement in-flight counter to prevent hanging
-            if self._in_flight_tasks > 0:
-                self._in_flight_tasks -= 1
             error_msg = (
                 f"Error getting returned task {e} in "
                 f"workforce {self.node_id}. "
@@ -1804,8 +2150,11 @@ class Workforce(BaseNode):
             if self._pending_tasks and self._assignees:
                 for task in self._pending_tasks:
                     if task.id in self._assignees:
-                        # Mark this real task as failed
+                        # Mark task as failed and decrement counter
                         task.set_state(TaskState.FAILED)
+                        self._decrement_in_flight_tasks(
+                            task.id, "timeout/error in _get_returned_task"
+                        )
                         return task
             return None
@@ -1905,7 +2254,6 @@ class Workforce(BaseNode):
                 task_id=task.id,
                 worker_id=worker_id,
                 error_message=detailed_error,
-                error_type="TaskFailure",
                 metadata={
                     'failure_count': task.failure_count,
                     'task_content': task.content,
@@ -1944,67 +2292,116 @@ class Workforce(BaseNode):
                 await self._channel.archive_task(task.id)
             return True
-        if task.get_depth() > 3:
-            # Create a new worker node and reassign
-            assignee = await self._create_worker_node_for_task(task)
+        # Use intelligent failure analysis to decide recovery strategy
+        recovery_decision = self._analyze_failure(task, detailed_error)
-            # Sync shared memory after creating new worker to provide context
-            if self.share_memory:
-                logger.info(
-                    f"Syncing shared memory after creating new worker "
-                    f"{assignee.node_id} for failed task {task.id}"
-                )
-                self._sync_shared_memory()
+        logger.info(
+            f"Task {task.id} failure "
+            f"analysis: {recovery_decision.strategy.value} - "
+            f"{recovery_decision.reasoning}"
+        )
-            await self._post_task(task, assignee.node_id)
-            action_taken = f"reassigned to new worker {assignee.node_id}"
-        else:
-            subtasks = self._decompose_task(task)
-            if self.metrics_logger and subtasks:
-                self.metrics_logger.log_task_decomposed(
-                    parent_task_id=task.id,
-                    subtask_ids=[st.id for st in subtasks],
-                )
-                for subtask in subtasks:
-                    self.metrics_logger.log_task_created(
-                        task_id=subtask.id,
-                        description=subtask.content,
-                        parent_task_id=task.id,
-                        task_type=subtask.type,
-                        metadata=subtask.additional_info,
+        # Clean up tracking before attempting recovery
+        if task.id in self._assignees:
+            await self._channel.archive_task(task.id)
+        self._cleanup_task_tracking(task.id)
+        try:
+            if recovery_decision.strategy == RecoveryStrategy.RETRY:
+                # Simply retry the task by reposting it
+                if task.id in self._assignees:
+                    assignee_id = self._assignees[task.id]
+                    await self._post_task(task, assignee_id)
+                    action_taken = f"retried with same worker {assignee_id}"
+                else:
+                    # Find a new assignee and retry
+                    batch_result = await self._find_assignee([task])
+                    assignment = batch_result.assignments[0]
+                    self._assignees[task.id] = assignment.assignee_id
+                    await self._post_task(task, assignment.assignee_id)
+                    action_taken = (
+                        f"retried with new worker {assignment.assignee_id}"
                     )
-            # Insert packets at the head of the queue
-            self._pending_tasks.extendleft(reversed(subtasks))
-            await self._post_ready_tasks()
-            action_taken = f"decomposed into {len(subtasks)} subtasks"
+            elif recovery_decision.strategy == RecoveryStrategy.REPLAN:
+                # Modify the task content and retry
+                if recovery_decision.modified_task_content:
+                    task.content = recovery_decision.modified_task_content
+                    logger.info(f"Task {task.id} content modified for replan")
-            # Handle task completion differently for decomposed tasks
-            if task.id in self._assignees:
-                await self._channel.archive_task(task.id)
+                # Repost the modified task
+                if task.id in self._assignees:
+                    assignee_id = self._assignees[task.id]
+                    await self._post_task(task, assignee_id)
+                    action_taken = (
+                        f"replanned and retried with worker {assignee_id}"
+                    )
+                else:
+                    # Find a new assignee for the replanned task
+                    batch_result = await self._find_assignee([task])
+                    assignment = batch_result.assignments[0]
+                    self._assignees[task.id] = assignment.assignee_id
+                    await self._post_task(task, assignment.assignee_id)
+                    action_taken = (
+                        f"replanned and assigned to "
+                        f"worker {assignment.assignee_id}"
+                    )
-            self._cleanup_task_tracking(task.id)
-            logger.debug(
-                f"Task {task.id} failed and was {action_taken}. "
-                f"Dependencies updated for subtasks."
-            )
+            elif recovery_decision.strategy == RecoveryStrategy.DECOMPOSE:
+                # Decompose the task into subtasks
+                subtasks = self._decompose_task(task)
+                if self.metrics_logger and subtasks:
+                    self.metrics_logger.log_task_decomposed(
+                        parent_task_id=task.id,
+                        subtask_ids=[st.id for st in subtasks],
+                    )
+                    for subtask in subtasks:
+                        self.metrics_logger.log_task_created(
+                            task_id=subtask.id,
+                            description=subtask.content,
+                            parent_task_id=task.id,
+                            task_type=subtask.type,
+                            metadata=subtask.additional_info,
+                        )
+                # Insert packets at the head of the queue
+                self._pending_tasks.extendleft(reversed(subtasks))
-            # Sync shared memory after task decomposition
-            if self.share_memory:
-                logger.info(
-                    f"Syncing shared memory after task {task.id} decomposition"
+                await self._post_ready_tasks()
+                action_taken = f"decomposed into {len(subtasks)} subtasks"
+                logger.debug(
+                    f"Task {task.id} failed and was {action_taken}. "
+                    f"Dependencies updated for subtasks."
                 )
-                self._sync_shared_memory()
-            # Check if any pending tasks are now ready to execute
-            await self._post_ready_tasks()
-            return False
+                # Sync shared memory after task decomposition
+                if self.share_memory:
+                    logger.info(
+                        f"Syncing shared memory after "
+                        f"task {task.id} decomposition"
+                    )
+                    self._sync_shared_memory()
-        # For reassigned tasks (depth > 3), handle normally
-        if task.id in self._assignees:
-            await self._channel.archive_task(task.id)
+                # Check if any pending tasks are now ready to execute
+                await self._post_ready_tasks()
+                return False
+            elif recovery_decision.strategy == RecoveryStrategy.CREATE_WORKER:
+                assignee = await self._create_worker_node_for_task(task)
+                await self._post_task(task, assignee.node_id)
+                action_taken = (
+                    f"created new worker {assignee.node_id} and assigned "
+                    f"task {task.id} to it"
+                )
+        except Exception as e:
+            logger.error(f"Recovery strategy failed for task {task.id}: {e}")
+            # If max retries reached, halt the workforce
+            if task.failure_count >= MAX_TASK_RETRIES:
+                self._completed_tasks.append(task)
+                return True
+            self._completed_tasks.append(task)
+            return False
-        self._cleanup_task_tracking(task.id)
         logger.debug(
             f"Task {task.id} failed and was {action_taken}. "
             f"Updating dependency state."
@@ -2275,7 +2672,9 @@ class Workforce(BaseNode):
                     await self._post_ready_tasks()
                     continue
-                self._in_flight_tasks -= 1
+                self._decrement_in_flight_tasks(
+                    returned_task.id, "task returned successfully"
+                )
                 # Check for stop request after getting task
                 if self._stop_requested:
@@ -2360,8 +2759,9 @@ class Workforce(BaseNode):
             except Exception as e:
                 # Decrement in-flight counter to prevent hanging
-                if self._in_flight_tasks > 0:
-                    self._in_flight_tasks -= 1
+                self._decrement_in_flight_tasks(
+                    "unknown", "exception in task processing loop"
+                )
                 logger.error(
                     f"Error processing task in workforce {self.node_id}: {e}"
@@ -2440,8 +2840,20 @@ class Workforce(BaseNode):
                         for task in self._child_listening_tasks:
                             if not task.done():
                                 task.cancel()
+                        # Handle both asyncio.Task and concurrent.futures.
+                        # Future
+                        awaitables = []
+                        for task in self._child_listening_tasks:
+                            if isinstance(task, concurrent.futures.Future):
+                                # Convert Future to awaitable
+                                awaitables.append(asyncio.wrap_future(task))
+                            else:
+                                # Already an asyncio.Task
+                                awaitables.append(task)
                         await asyncio.gather(
-                            *self._child_listening_tasks,
+                            *awaitables,
                             return_exceptions=True,
                         )
@@ -2482,6 +2894,7 @@ class Workforce(BaseNode):
             else None,
             graceful_shutdown_timeout=self.graceful_shutdown_timeout,
             share_memory=self.share_memory,
+            use_structured_output_handler=self.use_structured_output_handler,
         )
         for child in self._children:

camel-ai 0.2.71a4__py3-none-any.whl → 0.2.71a6__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.71a4py3-none-any.whl → 0.2.71a6py3-none-any.whl