PyPI - camel-ai - Versions diffs - 0.2.69a7__py3-none-any.whl → 0.2.71a1__py3-none-any.whl - Mend

camel-ai 0.2.69a7py3-none-any.whl → 0.2.71a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (23) hide show

camel/__init__.py +1 -1
camel/societies/role_playing.py +26 -28
camel/societies/workforce/role_playing_worker.py +4 -4
camel/societies/workforce/single_agent_worker.py +4 -4
camel/societies/workforce/workforce.py +462 -159
camel/societies/workforce/workforce_logger.py +37 -24
camel/storages/__init__.py +2 -0
camel/storages/vectordb_storages/__init__.py +2 -0
camel/storages/vectordb_storages/pgvector.py +349 -0
camel/tasks/task.py +83 -7
camel/toolkits/file_write_toolkit.py +21 -7
camel/toolkits/human_toolkit.py +23 -8
camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +23 -2
camel/toolkits/non_visual_browser_toolkit/nv_browser_session.py +53 -11
camel/toolkits/non_visual_browser_toolkit/snapshot.js +211 -131
camel/toolkits/non_visual_browser_toolkit/snapshot.py +9 -8
camel/toolkits/terminal_toolkit.py +28 -20
camel/toolkits/video_download_toolkit.py +5 -1
camel/types/enums.py +3 -0
{camel_ai-0.2.69a7.dist-info → camel_ai-0.2.71a1.dist-info}/METADATA +5 -1
{camel_ai-0.2.69a7.dist-info → camel_ai-0.2.71a1.dist-info}/RECORD +23 -22
{camel_ai-0.2.69a7.dist-info → camel_ai-0.2.71a1.dist-info}/WHEEL +0 -0
{camel_ai-0.2.69a7.dist-info → camel_ai-0.2.71a1.dist-info}/licenses/LICENSE +0 -0

camel/societies/workforce/workforce.py CHANGED Viewed

@@ -43,7 +43,12 @@ from camel.societies.workforce.utils import (
     check_if_running,
 )
 from camel.societies.workforce.worker import Worker
-from camel.tasks.task import Task, TaskState, validate_task_content
+from camel.tasks.task import (
+    Task,
+    TaskState,
+    is_task_result_insufficient,
+    validate_task_content,
+)
 from camel.toolkits import (
     CodeExecutionToolkit,
     SearchToolkit,
@@ -57,6 +62,12 @@ from .workforce_logger import WorkforceLogger
 logger = get_logger(__name__)
+# Constants for configuration values
+MAX_TASK_RETRIES = 3
+MAX_PENDING_TASKS_LIMIT = 20
+TASK_TIMEOUT_SECONDS = 180.0
+DEFAULT_WORKER_POOL_SIZE = 10
 class WorkforceState(Enum):
     r"""Workforce execution state for human intervention support."""
@@ -111,27 +122,24 @@ class Workforce(BaseNode):
         children (Optional[List[BaseNode]], optional): List of child nodes
             under this node. Each child node can be a worker node or
             another workforce node. (default: :obj:`None`)
-        coordinator_agent_kwargs (Optional[Dict], optional): Keyword
-            arguments passed directly to the coordinator :obj:`ChatAgent`
-            constructor. The coordinator manages task assignment and failure
-            handling strategies. See :obj:`ChatAgent` documentation
-            for all available parameters.
-            (default: :obj:`None` - uses ModelPlatformType.DEFAULT,
-            ModelType.DEFAULT)
-        task_agent_kwargs (Optional[Dict], optional): Keyword arguments
-            passed directly to the task planning :obj:`ChatAgent` constructor.
-            The task agent handles task decomposition into subtasks and result
-            composition. See :obj:`ChatAgent` documentation for all
-            available parameters.
-            (default: :obj:`None` - uses ModelPlatformType.DEFAULT,
-            ModelType.DEFAULT)
-        new_worker_agent_kwargs (Optional[Dict], optional): Default keyword
-            arguments passed to :obj:`ChatAgent` constructor for workers
-            created dynamically at runtime when existing workers cannot handle
-            failed tasks. See :obj:`ChatAgent` documentation for all
-            available parameters.
-            (default: :obj:`None` - creates workers with SearchToolkit,
-            CodeExecutionToolkit, and ThinkingToolkit)
+        coordinator_agent (Optional[ChatAgent], optional): A custom coordinator
+            agent instance for task assignment and worker creation. If
+            provided, the workforce will create a new agent using this agent's
+            model configuration but with the required system message and
+            functionality.
+            If None, a default agent will be created using DEFAULT model
+            settings. (default: :obj:`None`)
+        task_agent (Optional[ChatAgent], optional): A custom task planning
+            agent instance for task decomposition and composition. If
+            provided, the workforce will create a new agent using this agent's
+            model configuration but with the required system message and tools
+            (TaskPlanningToolkit). If None, a default agent will be created
+            using DEFAULT model settings. (default: :obj:`None`)
+        new_worker_agent (Optional[ChatAgent], optional): A template agent for
+            workers created dynamically at runtime when existing workers cannot
+            handle failed tasks. If None, workers will be created with default
+            settings including SearchToolkit, CodeExecutionToolkit, and
+            ThinkingToolkit. (default: :obj:`None`)
         graceful_shutdown_timeout (float, optional): The timeout in seconds
             for graceful shutdown when a task fails 3 times. During this
             period, the workforce remains active for debugging.
@@ -147,40 +155,59 @@ class Workforce(BaseNode):
             (default: :obj:`False`)
     Example:
-        >>> # Configure with custom model and shared memory
         >>> import asyncio
+        >>> from camel.agents import ChatAgent
+        >>> from camel.models import ModelFactory
+        >>> from camel.types import ModelPlatformType, ModelType
+        >>> from camel.tasks import Task
+        >>>
+        >>> # Simple workforce with default agents
+        >>> workforce = Workforce("Research Team")
+        >>>
+        >>> # Workforce with custom model configuration
         >>> model = ModelFactory.create(
-        ...     ModelPlatformType.OPENAI, ModelType.GPT_4O
+        ...     ModelPlatformType.OPENAI, model_type=ModelType.GPT_4O
         ... )
+        >>> coordinator_agent = ChatAgent(model=model)
+        >>> task_agent = ChatAgent(model=model)
+        >>>
         >>> workforce = Workforce(
         ...     "Research Team",
-        ...     coordinator_agent_kwargs={"model": model, "token_limit": 4000},
-        ...     task_agent_kwargs={"model": model, "token_limit": 8000},
-        ...     share_memory=True  # Enable shared memory
+        ...     coordinator_agent=coordinator_agent,
+        ...     task_agent=task_agent,
         ... )
         >>>
         >>> # Process a task
         >>> async def main():
         ...     task = Task(content="Research AI trends", id="1")
-        ...     result = workforce.process_task(task)
+        ...     result = await workforce.process_task_async(task)
         ...     return result
-        >>> asyncio.run(main())
+        >>>
+        >>> result_task = asyncio.run(main())
+    Note:
+        When custom coordinator_agent or task_agent are provided, the workforce
+        will preserve the user's system message and append the required
+        workforce coordination or task planning instructions to it. This
+        ensures both the user's intent is preserved and proper workforce
+        functionality is maintained. All other agent configurations (model,
+        memory, tools, etc.) will also be preserved.
     """
     def __init__(
         self,
         description: str,
         children: Optional[List[BaseNode]] = None,
-        coordinator_agent_kwargs: Optional[Dict] = None,
-        task_agent_kwargs: Optional[Dict] = None,
-        new_worker_agent_kwargs: Optional[Dict] = None,
+        coordinator_agent: Optional[ChatAgent] = None,
+        task_agent: Optional[ChatAgent] = None,
+        new_worker_agent: Optional[ChatAgent] = None,  # TODO: use MCP Agent
         graceful_shutdown_timeout: float = 15.0,
         share_memory: bool = False,
     ) -> None:
         super().__init__(description)
         self._child_listening_tasks: Deque[asyncio.Task] = deque()
         self._children = children or []
-        self.new_worker_agent_kwargs = new_worker_agent_kwargs
+        self.new_worker_agent = new_worker_agent
         self.graceful_shutdown_timeout = graceful_shutdown_timeout
         self.share_memory = share_memory
         self.metrics_logger = WorkforceLogger(workforce_id=self.node_id)
@@ -200,6 +227,7 @@ class Workforce(BaseNode):
         self._completed_tasks: List[Task] = []
         self._loop: Optional[asyncio.AbstractEventLoop] = None
         self._main_task_future: Optional[asyncio.Future] = None
+        self._cleanup_task: Optional[asyncio.Task] = None
         # Snapshot throttle support
         self._last_snapshot_time: float = 0.0
         # Minimum seconds between automatic snapshots
@@ -214,58 +242,72 @@ class Workforce(BaseNode):
                     role=role_or_desc,
                 )
-        # Warning messages for default model usage
-        if coordinator_agent_kwargs is None:
-            logger.warning(
-                "No coordinator_agent_kwargs provided. Using default "
-                "ChatAgent settings (ModelPlatformType.DEFAULT, "
-                "ModelType.DEFAULT). To customize the coordinator agent "
-                "that assigns tasks and handles failures, pass a dictionary "
-                "with ChatAgent parameters, e.g.: {'model': your_model, "
-                "'tools': your_tools, 'token_limit': 8000}. See ChatAgent "
-                "documentation for all available options."
-            )
-        if task_agent_kwargs is None:
-            logger.warning(
-                "No task_agent_kwargs provided. Using default ChatAgent "
-                "settings (ModelPlatformType.DEFAULT, ModelType.DEFAULT). "
-                "To customize the task planning agent that "
-                "decomposes/composes tasks, pass a dictionary with "
-                "ChatAgent parameters, e.g.: {'model': your_model, "
-                "'token_limit': 16000}. See ChatAgent documentation for "
-                "all available options."
-            )
-        if new_worker_agent_kwargs is None:
-            logger.warning(
-                "No new_worker_agent_kwargs provided. Workers created at "
-                "runtime will use default ChatAgent settings with "
-                "SearchToolkit, CodeExecutionToolkit, and ThinkingToolkit. "
-                "To customize runtime worker creation, pass a dictionary "
-                "with ChatAgent parameters, e.g.: {'model': your_model, "
-                "'tools': your_tools}. See ChatAgent documentation for all "
-                "available options."
-            )
-        if self.share_memory:
-            logger.info(
-                "Shared memory enabled. All agents will share their complete "
-                "conversation history and function-calling trajectory for "
-                "better context continuity during task handoffs."
-            )
+        # Set up coordinator agent with default system message
         coord_agent_sys_msg = BaseMessage.make_assistant_message(
             role_name="Workforce Manager",
-            content="You are coordinating a group of workers. A worker can be "
-            "a group of agents or a single agent. Each worker is "
+            content="You are coordinating a group of workers. A worker "
+            "can be a group of agents or a single agent. Each worker is "
             "created to solve a specific kind of task. Your job "
             "includes assigning tasks to a existing worker, creating "
             "a new worker for a task, etc.",
         )
-        self.coordinator_agent = ChatAgent(
-            coord_agent_sys_msg,
-            **(coordinator_agent_kwargs or {}),
-        )
+        if coordinator_agent is None:
+            logger.warning(
+                "No coordinator_agent provided. Using default "
+                "ChatAgent settings (ModelPlatformType.DEFAULT, "
+                "ModelType.DEFAULT) with default system message."
+            )
+            self.coordinator_agent = ChatAgent(coord_agent_sys_msg)
+        else:
+            logger.info(
+                "Custom coordinator_agent provided. Preserving user's "
+                "system message and appending workforce coordination "
+                "instructions to ensure proper functionality."
+            )
+            if coordinator_agent.system_message is not None:
+                user_sys_msg_content = coordinator_agent.system_message.content
+                combined_content = (
+                    f"{user_sys_msg_content}\n\n"
+                    f"{coord_agent_sys_msg.content}"
+                )
+                combined_sys_msg = BaseMessage.make_assistant_message(
+                    role_name=coordinator_agent.system_message.role_name,
+                    content=combined_content,
+                )
+            else:
+                combined_sys_msg = coord_agent_sys_msg
+            # Create a new agent with the provided agent's configuration
+            # but with the combined system message
+            self.coordinator_agent = ChatAgent(
+                system_message=combined_sys_msg,
+                model=coordinator_agent.model_backend,
+                memory=coordinator_agent.memory,
+                message_window_size=getattr(
+                    coordinator_agent.memory, "window_size", None
+                ),
+                token_limit=getattr(
+                    coordinator_agent.memory.get_context_creator(),
+                    "token_limit",
+                    None,
+                ),
+                output_language=coordinator_agent.output_language,
+                tools=[
+                    tool.func
+                    for tool in coordinator_agent._internal_tools.values()
+                ],
+                external_tools=[
+                    schema
+                    for schema in coordinator_agent._external_tool_schemas.values()  # noqa: E501
+                ],
+                response_terminators=coordinator_agent.response_terminators,
+                max_iteration=coordinator_agent.max_iteration,
+                stop_event=coordinator_agent.stop_event,
+            )
+        # Set up task agent with default system message and required tools
         task_sys_msg = BaseMessage.make_assistant_message(
             role_name="Task Planner",
             content="You are going to compose and decompose tasks. Keep "
@@ -275,13 +317,83 @@ class Workforce(BaseNode):
             "of agents. This ensures efficient execution by minimizing "
             "context switching between agents.",
         )
-        _task_agent_kwargs = dict(task_agent_kwargs or {})
-        extra_tools = TaskPlanningToolkit().get_tools()
-        _task_agent_kwargs["tools"] = [
-            *_task_agent_kwargs.get("tools", []),
-            *extra_tools,
-        ]
-        self.task_agent = ChatAgent(task_sys_msg, **_task_agent_kwargs)
+        task_planning_tools = TaskPlanningToolkit().get_tools()
+        if task_agent is None:
+            logger.warning(
+                "No task_agent provided. Using default ChatAgent "
+                "settings (ModelPlatformType.DEFAULT, ModelType.DEFAULT) "
+                "with default system message and TaskPlanningToolkit."
+            )
+            self.task_agent = ChatAgent(
+                task_sys_msg,
+                tools=TaskPlanningToolkit().get_tools(),  # type: ignore[arg-type]
+            )
+        else:
+            logger.info(
+                "Custom task_agent provided. Preserving user's "
+                "system message and appending task planning "
+                "instructions to ensure proper functionality."
+            )
+            if task_agent.system_message is not None:
+                user_task_sys_msg_content = task_agent.system_message.content
+                combined_task_content = (
+                    f"{user_task_sys_msg_content}\n\n"
+                    f"{task_sys_msg.content}"
+                )
+                combined_task_sys_msg = BaseMessage.make_assistant_message(
+                    role_name=task_agent.system_message.role_name,
+                    content=combined_task_content,
+                )
+            else:
+                combined_task_sys_msg = task_sys_msg
+            # Since ChatAgent constructor uses a dictionary with
+            # function names as keys, we don't need to manually deduplicate.
+            combined_tools = [
+                tool.func for tool in task_agent._internal_tools.values()
+            ] + [tool.func for tool in task_planning_tools]
+            # Create a new agent with the provided agent's configuration
+            # but with the combined system message and tools
+            self.task_agent = ChatAgent(
+                system_message=combined_task_sys_msg,
+                model=task_agent.model_backend,
+                memory=task_agent.memory,
+                message_window_size=getattr(
+                    task_agent.memory, "window_size", None
+                ),
+                token_limit=getattr(
+                    task_agent.memory.get_context_creator(),
+                    "token_limit",
+                    None,
+                ),
+                output_language=task_agent.output_language,
+                tools=combined_tools,
+                external_tools=[
+                    schema
+                    for schema in task_agent._external_tool_schemas.values()
+                ],
+                response_terminators=task_agent.response_terminators,
+                max_iteration=task_agent.max_iteration,
+                stop_event=task_agent.stop_event,
+            )
+        if new_worker_agent is None:
+            logger.info(
+                "No new_worker_agent provided. Workers created at runtime "
+                "will use default ChatAgent settings with SearchToolkit, "
+                "CodeExecutionToolkit, and ThinkingToolkit. To customize "
+                "runtime worker creation, pass a ChatAgent instance."
+            )
+        if self.share_memory:
+            logger.info(
+                "Shared memory enabled. All agents will share their complete "
+                "conversation history and function-calling trajectory for "
+                "better context continuity during task handoffs."
+            )
     def __repr__(self):
         return (
@@ -417,6 +529,15 @@ class Workforce(BaseNode):
         except Exception as e:
             logger.warning(f"Error synchronizing shared memory: {e}")
+    def _cleanup_task_tracking(self, task_id: str) -> None:
+        r"""Clean up tracking data for a task to prevent memory leaks.
+        Args:
+            task_id (str): The ID of the task to clean up.
+        """
+        if task_id in self._task_start_times:
+            del self._task_start_times[task_id]
     def _decompose_task(self, task: Task) -> List[Task]:
         r"""Decompose the task into subtasks. This method will also set the
         relationship between the task and its subtasks.
@@ -1004,7 +1125,7 @@ class Workforce(BaseNode):
         self,
         description: str,
         worker: ChatAgent,
-        pool_max_size: int = 10,
+        pool_max_size: int = DEFAULT_WORKER_POOL_SIZE,
     ) -> Workforce:
         r"""Add a worker node to the workforce that uses a single agent.
@@ -1133,7 +1254,7 @@ class Workforce(BaseNode):
             except RuntimeError:
                 asyncio.run(self._async_reset())
-        if hasattr(self, 'logger') and self.metrics_logger is not None:
+        if hasattr(self, 'metrics_logger') and self.metrics_logger is not None:
             self.metrics_logger.reset_task_data()
         else:
             self.metrics_logger = WorkforceLogger(workforce_id=self.node_id)
@@ -1225,8 +1346,16 @@ class Workforce(BaseNode):
             )
             return TaskAssignResult(assignments=[])
-        result_dict = json.loads(response.msg.content, parse_int=str)
-        return TaskAssignResult(**result_dict)
+        try:
+            result_dict = json.loads(response.msg.content, parse_int=str)
+            return TaskAssignResult(**result_dict)
+        except json.JSONDecodeError as e:
+            logger.error(
+                f"JSON parsing error in task assignment: Invalid response "
+                f"format - {e}. Response content: "
+                f"{response.msg.content[:50]}..."
+            )
+            return TaskAssignResult(assignments=[])
     def _validate_assignments(
         self, assignments: List[TaskAssignment], valid_ids: Set[str]
@@ -1408,12 +1537,26 @@ class Workforce(BaseNode):
         # Record the start time when a task is posted
         self._task_start_times[task.id] = time.time()
+        task.assigned_worker_id = assignee_id
         if self.metrics_logger:
             self.metrics_logger.log_task_started(
                 task_id=task.id, worker_id=assignee_id
             )
-        self._in_flight_tasks += 1
-        await self._channel.post_task(task, self.node_id, assignee_id)
+        try:
+            self._in_flight_tasks += 1
+            await self._channel.post_task(task, self.node_id, assignee_id)
+            logger.debug(
+                f"Posted task {task.id} to {assignee_id}. "
+                f"In-flight tasks: {self._in_flight_tasks}"
+            )
+        except Exception as e:
+            # Decrement counter if posting failed
+            self._in_flight_tasks -= 1
+            logger.error(
+                f"Failed to post task {task.id} to {assignee_id}: {e}"
+            )
     async def _post_dependency(self, dependency: Task) -> None:
         await self._channel.post_dependency(dependency, self.node_id)
@@ -1450,8 +1593,19 @@ class Workforce(BaseNode):
                 "with various tasks.",
             )
         else:
-            result_dict = json.loads(response.msg.content)
-            new_node_conf = WorkerConf(**result_dict)
+            try:
+                result_dict = json.loads(response.msg.content)
+                new_node_conf = WorkerConf(**result_dict)
+            except json.JSONDecodeError as e:
+                logger.error(
+                    f"JSON parsing error in worker creation: Invalid response "
+                    f"format - {e}. Response content: "
+                    f"{response.msg.content[:100]}..."
+                )
+                raise RuntimeError(
+                    f"Failed to create worker for task {task.id}: "
+                    f"Coordinator agent returned malformed JSON response. "
+                )
         new_agent = self._create_new_agent(
             new_node_conf.role,
@@ -1461,7 +1615,7 @@ class Workforce(BaseNode):
         new_node = SingleAgentWorker(
             description=new_node_conf.description,
             worker=new_agent,
-            pool_max_size=10,  # TODO: make this configurable
+            pool_max_size=DEFAULT_WORKER_POOL_SIZE,
         )
         new_node.set_channel(self._channel)
@@ -1486,25 +1640,25 @@ class Workforce(BaseNode):
             content=sys_msg,
         )
-        if self.new_worker_agent_kwargs is not None:
-            return ChatAgent(worker_sys_msg, **self.new_worker_agent_kwargs)
-        # Default tools for a new agent
-        function_list = [
-            SearchToolkit().search_duckduckgo,
-            *CodeExecutionToolkit().get_tools(),
-            *ThinkingToolkit().get_tools(),
-        ]
+        if self.new_worker_agent is not None:
+            return self.new_worker_agent
+        else:
+            # Default tools for a new agent
+            function_list = [
+                SearchToolkit().search_duckduckgo,
+                *CodeExecutionToolkit().get_tools(),
+                *ThinkingToolkit().get_tools(),
+            ]
-        model = ModelFactory.create(
-            model_platform=ModelPlatformType.DEFAULT,
-            model_type=ModelType.DEFAULT,
-            model_config_dict={"temperature": 0},
-        )
+            model = ModelFactory.create(
+                model_platform=ModelPlatformType.DEFAULT,
+                model_type=ModelType.DEFAULT,
+                model_config_dict={"temperature": 0},
+            )
-        return ChatAgent(worker_sys_msg, model=model, tools=function_list)  # type: ignore[arg-type]
+            return ChatAgent(worker_sys_msg, model=model, tools=function_list)  # type: ignore[arg-type]
-    async def _get_returned_task(self) -> Task:
+    async def _get_returned_task(self) -> Optional[Task]:
         r"""Get the task that's published by this node and just get returned
         from the assignee. Includes timeout handling to prevent indefinite
         waiting.
@@ -1513,17 +1667,28 @@ class Workforce(BaseNode):
             # Add timeout to prevent indefinite waiting
             return await asyncio.wait_for(
                 self._channel.get_returned_task_by_publisher(self.node_id),
-                timeout=180.0,  # 3 minute timeout
+                timeout=TASK_TIMEOUT_SECONDS,
             )
-        except asyncio.TimeoutError:
-            logger.warning(
-                f"Timeout waiting for returned task in "
+        except Exception as e:
+            # Decrement in-flight counter to prevent hanging
+            if self._in_flight_tasks > 0:
+                self._in_flight_tasks -= 1
+            error_msg = (
+                f"Error getting returned task {e} in "
                 f"workforce {self.node_id}. "
-                f"This may indicate an issue with async tool execution. "
                 f"Current pending tasks: {len(self._pending_tasks)}, "
                 f"In-flight tasks: {self._in_flight_tasks}"
             )
-            raise
+            logger.warning(error_msg)
+            if self._pending_tasks and self._assignees:
+                for task in self._pending_tasks:
+                    if task.id in self._assignees:
+                        # Mark this real task as failed
+                        task.set_state(TaskState.FAILED)
+                        return task
+            return None
     async def _post_ready_tasks(self) -> None:
         r"""Checks for unassigned tasks, assigns them, and then posts any
@@ -1563,6 +1728,9 @@ class Workforce(BaseNode):
         # Step 2: Iterate through all pending tasks and post those that are
         # ready
         posted_tasks = []
+        # Pre-compute completed task IDs set for O(1) lookups
+        completed_task_ids = {t.id for t in self._completed_tasks}
         for task in self._pending_tasks:
             # A task must be assigned to be considered for posting
             if task.id in self._task_dependencies:
@@ -1570,8 +1738,7 @@ class Workforce(BaseNode):
                 # Check if all dependencies for this task are in the completed
                 # set
                 if all(
-                    dep_id in {t.id for t in self._completed_tasks}
-                    for dep_id in dependencies
+                    dep_id in completed_task_ids for dep_id in dependencies
                 ):
                     assignee_id = self._assignees[task.id]
                     logger.debug(
@@ -1593,17 +1760,67 @@ class Workforce(BaseNode):
     async def _handle_failed_task(self, task: Task) -> bool:
         task.failure_count += 1
+        # Determine detailed failure information
+        if is_task_result_insufficient(task):
+            failure_reason = "Worker returned unhelpful "
+            f"response: {task.result[:100] if task.result else ''}..."
+        else:
+            failure_reason = "Task marked as failed despite "
+            f"having result: {(task.result or '')[:100]}..."
+        # Add context about the worker and task
+        worker_id = task.assigned_worker_id or "unknown"
+        worker_info = f" (assigned to worker: {worker_id})"
+        detailed_error = f"{failure_reason}{worker_info}"
+        logger.error(
+            f"Task {task.id} failed (attempt "
+            f"{task.failure_count}/3): {detailed_error}"
+        )
         if self.metrics_logger:
-            worker_id = self._assignees.get(task.id)
             self.metrics_logger.log_task_failed(
                 task_id=task.id,
                 worker_id=worker_id,
-                error_message=task.result or "Task execution failed",
+                error_message=detailed_error,
                 error_type="TaskFailure",
-                metadata={'failure_count': task.failure_count},
+                metadata={
+                    'failure_count': task.failure_count,
+                    'task_content': task.content,
+                    'result_length': len(task.result) if task.result else 0,
+                },
             )
-        if task.failure_count > 3:
+        # Check for immediate halt conditions - return immediately if we
+        # should halt
+        if task.failure_count >= MAX_TASK_RETRIES:
+            logger.error(
+                f"Task {task.id} has exceeded maximum retry attempts "
+                f"({MAX_TASK_RETRIES}). Final failure "
+                f"reason: {detailed_error}. "
+                f"Task content: '{task.content[:100]}...'"
+            )
+            self._cleanup_task_tracking(task.id)
+            # Mark task as completed for dependency tracking before halting
+            self._completed_tasks.append(task)
+            if task.id in self._assignees:
+                await self._channel.archive_task(task.id)
+            return True
+        # If too many tasks are failing rapidly, also halt to prevent infinite
+        # loops
+        if len(self._pending_tasks) > MAX_PENDING_TASKS_LIMIT:
+            logger.error(
+                f"Too many pending tasks ({len(self._pending_tasks)} > "
+                f"{MAX_PENDING_TASKS_LIMIT}). Halting to prevent task "
+                f"explosion. Last failed task: {task.id}"
+            )
+            self._cleanup_task_tracking(task.id)
+            # Mark task as completed for dependency tracking before halting
+            self._completed_tasks.append(task)
+            if task.id in self._assignees:
+                await self._channel.archive_task(task.id)
             return True
         if task.get_depth() > 3:
@@ -1658,8 +1875,6 @@ class Workforce(BaseNode):
         # Mark task as completed for dependency tracking
         self._completed_tasks.append(task)
-        # Post next ready tasks
         # Sync shared memory after task completion to share knowledge
         if self.share_memory:
             logger.info(
@@ -1673,7 +1888,7 @@ class Workforce(BaseNode):
     async def _handle_completed_task(self, task: Task) -> None:
         if self.metrics_logger:
-            worker_id = self._assignees.get(task.id, "unknown")
+            worker_id = task.assigned_worker_id or "unknown"
             processing_time_seconds = None
             token_usage = None
@@ -1682,7 +1897,7 @@ class Workforce(BaseNode):
                 processing_time_seconds = (
                     time.time() - self._task_start_times[task.id]
                 )
-                del self._task_start_times[task.id]  # Prevent memory leaks
+                self._cleanup_task_tracking(task.id)
             elif (
                 task.additional_info is not None
                 and 'processing_time_seconds' in task.additional_info
@@ -1876,8 +2091,19 @@ class Workforce(BaseNode):
                         )
                         self._last_snapshot_time = time.time()
-                # Get returned task (this may block until a task is returned)
+                # Get returned task
                 returned_task = await self._get_returned_task()
+                # If no task was returned, continue
+                if returned_task is None:
+                    logger.debug(
+                        f"No task returned in workforce {self.node_id}. "
+                        f"Pending: {len(self._pending_tasks)}, "
+                        f"In-flight: {self._in_flight_tasks}"
+                    )
+                    await self._post_ready_tasks()
+                    continue
                 self._in_flight_tasks -= 1
                 # Check for stop request after getting task
@@ -1887,22 +2113,72 @@ class Workforce(BaseNode):
                 # Process the returned task based on its state
                 if returned_task.state == TaskState.DONE:
-                    print(
-                        f"{Fore.CYAN}🎯 Task {returned_task.id} completed "
-                        f"successfully.{Fore.RESET}"
-                    )
-                    await self._handle_completed_task(returned_task)
+                    # Check if the "completed" task actually failed to provide
+                    # useful results
+                    if is_task_result_insufficient(returned_task):
+                        result_preview = (
+                            returned_task.result[:100] + "..."
+                            if returned_task.result
+                            else "No result"
+                        )
+                        logger.warning(
+                            f"Task {returned_task.id} marked as DONE but "
+                            f"result is insufficient. "
+                            f"Treating as failed. Result: '{result_preview}'"
+                        )
+                        returned_task.state = TaskState.FAILED
+                        try:
+                            halt = await self._handle_failed_task(
+                                returned_task
+                            )
+                            if not halt:
+                                continue
+                            print(
+                                f"{Fore.RED}Task {returned_task.id} has "
+                                f"failed for {MAX_TASK_RETRIES} times after "
+                                f"insufficient results, halting the "
+                                f"workforce. Final error: "
+                                f"{returned_task.result or 'Unknown error'}"
+                                f"{Fore.RESET}"
+                            )
+                            await self._graceful_shutdown(returned_task)
+                            break
+                        except Exception as e:
+                            logger.error(
+                                f"Error handling insufficient task result "
+                                f"{returned_task.id}: {e}",
+                                exc_info=True,
+                            )
+                            continue
+                    else:
+                        print(
+                            f"{Fore.CYAN}🎯 Task {returned_task.id} completed "
+                            f"successfully.{Fore.RESET}"
+                        )
+                        await self._handle_completed_task(returned_task)
                 elif returned_task.state == TaskState.FAILED:
-                    halt = await self._handle_failed_task(returned_task)
-                    if not halt:
+                    try:
+                        halt = await self._handle_failed_task(returned_task)
+                        if not halt:
+                            continue
+                        print(
+                            f"{Fore.RED}Task {returned_task.id} has failed "
+                            f"for {MAX_TASK_RETRIES} times, halting "
+                            f"the workforce. Final error: "
+                            f"{returned_task.result or 'Unknown error'}"
+                            f"{Fore.RESET}"
+                        )
+                        # Graceful shutdown instead of immediate break
+                        await self._graceful_shutdown(returned_task)
+                        break
+                    except Exception as e:
+                        logger.error(
+                            f"Error handling failed task "
+                            f"{returned_task.id}: {e}",
+                            exc_info=True,
+                        )
+                        # Continue to prevent hanging
                         continue
-                    print(
-                        f"{Fore.RED}Task {returned_task.id} has failed "
-                        f"for 3 times, halting the workforce.{Fore.RESET}"
-                    )
-                    # Graceful shutdown instead of immediate break
-                    await self._graceful_shutdown(returned_task)
-                    break
                 elif returned_task.state == TaskState.OPEN:
                     # TODO: Add logic for OPEN
                     pass
@@ -1912,7 +2188,18 @@ class Workforce(BaseNode):
                     )
             except Exception as e:
-                logger.error(f"Error processing task: {e}")
+                # Decrement in-flight counter to prevent hanging
+                if self._in_flight_tasks > 0:
+                    self._in_flight_tasks -= 1
+                logger.error(
+                    f"Error processing task in workforce {self.node_id}: {e}"
+                    f"Workforce state - Pending tasks: "
+                    f"{len(self._pending_tasks)}, "
+                    f"In-flight tasks: {self._in_flight_tasks}, "
+                    f"Completed tasks: {len(self._completed_tasks)}"
+                )
                 if self._stop_requested:
                     break
                 # Continue with next iteration unless stop is requested
@@ -1966,11 +2253,38 @@ class Workforce(BaseNode):
         r"""Stop all the child nodes under it. The node itself will be stopped
         by its parent node.
         """
+        # Stop all child nodes first
         for child in self._children:
             if child._running:
                 child.stop()
-        for child_task in self._child_listening_tasks:
-            child_task.cancel()
+        # Cancel child listening tasks
+        if self._child_listening_tasks:
+            try:
+                loop = asyncio.get_running_loop()
+                if loop and not loop.is_closed():
+                    # Create graceful cleanup task
+                    async def cleanup():
+                        await asyncio.sleep(0.1)  # Brief grace period
+                        for task in self._child_listening_tasks:
+                            if not task.done():
+                                task.cancel()
+                        await asyncio.gather(
+                            *self._child_listening_tasks,
+                            return_exceptions=True,
+                        )
+                    self._cleanup_task = loop.create_task(cleanup())
+                else:
+                    # No active loop, cancel immediately
+                    for task in self._child_listening_tasks:
+                        task.cancel()
+            except (RuntimeError, Exception) as e:
+                # Fallback: cancel immediately
+                logger.debug(f"Exception during task cleanup: {e}")
+                for task in self._child_listening_tasks:
+                    task.cancel()
         self._running = False
     def clone(self, with_memory: bool = False) -> 'Workforce':
@@ -1988,28 +2302,17 @@ class Workforce(BaseNode):
         """
         # Create a new instance with the same configuration
-        # Extract the original kwargs from the agents to properly clone them
-        coordinator_kwargs = (
-            getattr(self.coordinator_agent, 'init_kwargs', {}) or {}
-        )
-        task_kwargs = getattr(self.task_agent, 'init_kwargs', {}) or {}
         new_instance = Workforce(
             description=self.description,
-            coordinator_agent_kwargs=coordinator_kwargs.copy(),
-            task_agent_kwargs=task_kwargs.copy(),
-            new_worker_agent_kwargs=self.new_worker_agent_kwargs.copy()
-            if self.new_worker_agent_kwargs
+            coordinator_agent=self.coordinator_agent.clone(with_memory),
+            task_agent=self.task_agent.clone(with_memory),
+            new_worker_agent=self.new_worker_agent.clone(with_memory)
+            if self.new_worker_agent
             else None,
             graceful_shutdown_timeout=self.graceful_shutdown_timeout,
             share_memory=self.share_memory,
         )
-        new_instance.task_agent = self.task_agent.clone(with_memory)
-        new_instance.coordinator_agent = self.coordinator_agent.clone(
-            with_memory
-        )
         for child in self._children:
             if isinstance(child, SingleAgentWorker):
                 cloned_worker = child.worker.clone(with_memory)

camel-ai 0.2.69a7__py3-none-any.whl → 0.2.71a1__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.69a7py3-none-any.whl → 0.2.71a1py3-none-any.whl