camel-ai 0.2.76a14__py3-none-any.whl → 0.2.77__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

@@ -22,6 +22,7 @@ import uuid
22
22
  from collections import deque
23
23
  from enum import Enum
24
24
  from typing import (
25
+ TYPE_CHECKING,
25
26
  Any,
26
27
  Callable,
27
28
  Coroutine,
@@ -36,6 +37,9 @@ from typing import (
36
37
  cast,
37
38
  )
38
39
 
40
+ if TYPE_CHECKING:
41
+ from camel.utils.context_utils import ContextUtility
42
+
39
43
  from colorama import Fore
40
44
 
41
45
  from camel.agents import ChatAgent
@@ -46,19 +50,23 @@ from camel.societies.workforce.base import BaseNode
46
50
  from camel.societies.workforce.prompts import (
47
51
  ASSIGN_TASK_PROMPT,
48
52
  CREATE_NODE_PROMPT,
49
- FAILURE_ANALYSIS_PROMPT,
53
+ FAILURE_ANALYSIS_RESPONSE_FORMAT,
54
+ QUALITY_EVALUATION_RESPONSE_FORMAT,
55
+ TASK_AGENT_SYSTEM_MESSAGE,
56
+ TASK_ANALYSIS_PROMPT,
50
57
  TASK_DECOMPOSE_PROMPT,
51
58
  )
52
59
  from camel.societies.workforce.role_playing_worker import RolePlayingWorker
53
- from camel.societies.workforce.single_agent_worker import SingleAgentWorker
60
+ from camel.societies.workforce.single_agent_worker import (
61
+ SingleAgentWorker,
62
+ )
54
63
  from camel.societies.workforce.structured_output_handler import (
55
64
  StructuredOutputHandler,
56
65
  )
57
66
  from camel.societies.workforce.task_channel import TaskChannel
58
67
  from camel.societies.workforce.utils import (
59
- FailureContext,
60
- RecoveryDecision,
61
68
  RecoveryStrategy,
69
+ TaskAnalysisResult,
62
70
  TaskAssignment,
63
71
  TaskAssignResult,
64
72
  WorkerConf,
@@ -324,8 +332,7 @@ class Workforce(BaseNode):
324
332
  if coordinator_agent.system_message is not None:
325
333
  user_sys_msg_content = coordinator_agent.system_message.content
326
334
  combined_content = (
327
- f"{user_sys_msg_content}\n\n"
328
- f"{coord_agent_sys_msg.content}"
335
+ f"{user_sys_msg_content}\n\n{coord_agent_sys_msg.content}"
329
336
  )
330
337
  combined_sys_msg = BaseMessage.make_assistant_message(
331
338
  role_name=coordinator_agent.system_message.role_name,
@@ -362,7 +369,7 @@ class Workforce(BaseNode):
362
369
  # Set up task agent with default system message and required tools
363
370
  task_sys_msg = BaseMessage.make_assistant_message(
364
371
  role_name="Task Planner",
365
- content="You are going to handle tasks.",
372
+ content=TASK_AGENT_SYSTEM_MESSAGE,
366
373
  )
367
374
  task_planning_tools = TaskPlanningToolkit().get_tools()
368
375
 
@@ -387,8 +394,7 @@ class Workforce(BaseNode):
387
394
  if task_agent.system_message is not None:
388
395
  user_task_sys_msg_content = task_agent.system_message.content
389
396
  combined_task_content = (
390
- f"{user_task_sys_msg_content}\n\n"
391
- f"{task_sys_msg.content}"
397
+ f"{user_task_sys_msg_content}\n\n{task_sys_msg.content}"
392
398
  )
393
399
  combined_task_sys_msg = BaseMessage.make_assistant_message(
394
400
  role_name=task_agent.system_message.role_name,
@@ -450,6 +456,30 @@ class Workforce(BaseNode):
450
456
  "better context continuity during task handoffs."
451
457
  )
452
458
 
459
+ # Shared context utility for workflow management (created lazily)
460
+ self._shared_context_utility: Optional["ContextUtility"] = None
461
+
462
+ # ------------------------------------------------------------------
463
+ # Helper for propagating pause control to externally supplied agents
464
+ # ------------------------------------------------------------------
465
+
466
+ def _get_or_create_shared_context_utility(self) -> "ContextUtility":
467
+ r"""Get or create the shared context utility for workflow management.
468
+
469
+ This method creates the context utility only when needed, avoiding
470
+ unnecessary session folder creation during initialization.
471
+
472
+ Returns:
473
+ ContextUtility: The shared context utility instance.
474
+ """
475
+ if self._shared_context_utility is None:
476
+ from camel.utils.context_utils import ContextUtility
477
+
478
+ self._shared_context_utility = (
479
+ ContextUtility.get_workforce_shared()
480
+ )
481
+ return self._shared_context_utility
482
+
453
483
  def _validate_agent_compatibility(
454
484
  self, agent: ChatAgent, agent_context: str = "agent"
455
485
  ) -> None:
@@ -776,76 +806,124 @@ class Workforce(BaseNode):
776
806
  self._update_dependencies_for_decomposition(task, subtasks)
777
807
  return subtasks
778
808
 
779
- def _analyze_failure(
780
- self, task: Task, error_message: str
781
- ) -> RecoveryDecision:
782
- r"""Analyze a task failure and decide on the best recovery strategy.
809
+ def _analyze_task(
810
+ self,
811
+ task: Task,
812
+ *,
813
+ for_failure: bool,
814
+ error_message: Optional[str] = None,
815
+ ) -> TaskAnalysisResult:
816
+ r"""Unified task analysis for both failures and quality evaluation.
817
+
818
+ This method consolidates the logic for analyzing task failures and
819
+ evaluating task quality, using the unified TASK_ANALYSIS_PROMPT.
783
820
 
784
821
  Args:
785
- task (Task): The failed task
786
- error_message (str): The error message from the failure
822
+ task (Task): The task to analyze
823
+ for_failure (bool): True for failure analysis, False for quality
824
+ evaluation
825
+ error_message (Optional[str]): Error message, required when
826
+ for_failure=True
787
827
 
788
828
  Returns:
789
- RecoveryDecision: The decided recovery strategy with reasoning
829
+ TaskAnalysisResult: Unified analysis result with recovery strategy
830
+ and optional quality metrics
831
+
832
+ Raises:
833
+ ValueError: If for_failure=True but error_message is None
790
834
  """
791
- # First, do a quick smart analysis based on error patterns
792
- error_msg_lower = error_message.lower()
793
- if any(
794
- keyword in error_msg_lower
795
- for keyword in [
796
- 'connection',
797
- 'network',
798
- 'server disconnected',
799
- 'timeout',
800
- 'apiconnectionerror',
835
+ # Validate required parameters
836
+ if for_failure and error_message is None:
837
+ raise ValueError("error_message is required when for_failure=True")
838
+
839
+ # Determine task result and issue-specific analysis based on context
840
+ if for_failure:
841
+ task_result = "N/A (task failed)"
842
+ issue_type = "Task Failure"
843
+ issue_analysis = f"**Error Message:** {error_message}"
844
+ response_format = FAILURE_ANALYSIS_RESPONSE_FORMAT
845
+ result_schema = TaskAnalysisResult
846
+ fallback_values: Dict[str, Any] = {
847
+ "reasoning": "Defaulting to retry due to parsing error",
848
+ "recovery_strategy": RecoveryStrategy.RETRY,
849
+ "modified_task_content": None,
850
+ "issues": [error_message] if error_message else [],
851
+ }
852
+ examples: List[Dict[str, Any]] = [
853
+ {
854
+ "reasoning": "Temporary network error, worth retrying",
855
+ "recovery_strategy": "retry",
856
+ "modified_task_content": None,
857
+ "issues": ["Network timeout"],
858
+ }
801
859
  ]
802
- ):
803
- return RecoveryDecision(
804
- strategy=RecoveryStrategy.RETRY,
805
- reasoning="Network/connection error detected, retrying task",
806
- modified_task_content=None,
860
+ else:
861
+ # Quality evaluation
862
+ task_result = task.result or "No result available"
863
+ issue_type = "Quality Evaluation"
864
+ issue_analysis = (
865
+ "Provide a quality score (0-100) and list any specific "
866
+ "issues found."
807
867
  )
868
+ response_format = QUALITY_EVALUATION_RESPONSE_FORMAT
869
+ result_schema = TaskAnalysisResult
870
+ fallback_values = {
871
+ "reasoning": (
872
+ "Defaulting to acceptable quality due to parsing error"
873
+ ),
874
+ "issues": [],
875
+ "recovery_strategy": None,
876
+ "modified_task_content": None,
877
+ "quality_score": 80,
878
+ }
879
+ examples = [
880
+ {
881
+ "reasoning": (
882
+ "Excellent implementation with comprehensive tests"
883
+ ),
884
+ "issues": [],
885
+ "recovery_strategy": None,
886
+ "modified_task_content": None,
887
+ "quality_score": 98,
888
+ },
889
+ {
890
+ "reasoning": (
891
+ "Implementation incomplete with missing features"
892
+ ),
893
+ "issues": [
894
+ "Incomplete implementation",
895
+ "Missing error handling",
896
+ ],
897
+ "recovery_strategy": "replan",
898
+ "modified_task_content": (
899
+ "Previous attempt was incomplete. "
900
+ "Please implement with: 1) Full feature "
901
+ "coverage, 2) Proper error handling"
902
+ ),
903
+ "quality_score": 45,
904
+ },
905
+ ]
808
906
 
809
- # Create failure context
810
- failure_context = FailureContext(
907
+ # Format the unified analysis prompt
908
+ analysis_prompt = TASK_ANALYSIS_PROMPT.format(
811
909
  task_id=task.id,
812
910
  task_content=task.content,
911
+ task_result=task_result,
813
912
  failure_count=task.failure_count,
814
- error_message=error_message,
815
- worker_id=task.assigned_worker_id,
816
913
  task_depth=task.get_depth(),
817
- additional_info=str(task.additional_info)
818
- if task.additional_info
819
- else None,
820
- )
821
-
822
- # Format the analysis prompt
823
- analysis_prompt = FAILURE_ANALYSIS_PROMPT.format(
824
- task_id=failure_context.task_id,
825
- task_content=failure_context.task_content,
826
- failure_count=failure_context.failure_count,
827
- error_message=failure_context.error_message,
828
- worker_id=failure_context.worker_id or "unknown",
829
- task_depth=failure_context.task_depth,
830
- additional_info=failure_context.additional_info or "None",
914
+ assigned_worker=task.assigned_worker_id or "unknown",
915
+ issue_type=issue_type,
916
+ issue_specific_analysis=issue_analysis,
917
+ response_format=response_format,
831
918
  )
832
919
 
833
920
  try:
834
- # Check if we should use structured handler
835
921
  if self.use_structured_output_handler:
836
- # Use structured handler
837
922
  enhanced_prompt = (
838
923
  self.structured_handler.generate_structured_prompt(
839
924
  base_prompt=analysis_prompt,
840
- schema=RecoveryDecision,
841
- examples=[
842
- {
843
- "strategy": "RETRY",
844
- "reasoning": "Temporary network error, "
845
- "worth retrying",
846
- "modified_task_content": None,
847
- }
848
- ],
925
+ schema=result_schema,
926
+ examples=examples,
849
927
  )
850
928
  )
851
929
 
@@ -854,43 +932,220 @@ class Workforce(BaseNode):
854
932
 
855
933
  result = self.structured_handler.parse_structured_response(
856
934
  response.msg.content if response.msg else "",
857
- schema=RecoveryDecision,
858
- fallback_values={
859
- "strategy": RecoveryStrategy.RETRY,
860
- "reasoning": "Defaulting to retry due to parsing "
861
- "issues",
862
- "modified_task_content": None,
863
- },
935
+ schema=result_schema,
936
+ fallback_values=fallback_values,
864
937
  )
865
- # Ensure we return a RecoveryDecision instance
866
- if isinstance(result, RecoveryDecision):
938
+
939
+ if isinstance(result, TaskAnalysisResult):
867
940
  return result
868
941
  elif isinstance(result, dict):
869
- return RecoveryDecision(**result)
942
+ return result_schema(**result)
870
943
  else:
871
- return RecoveryDecision(
872
- strategy=RecoveryStrategy.RETRY,
873
- reasoning="Failed to parse recovery decision",
874
- modified_task_content=None,
875
- )
944
+ # Fallback based on context
945
+ return TaskAnalysisResult(**fallback_values)
876
946
  else:
877
- # Use existing native structured output code
878
947
  self.task_agent.reset()
879
948
  response = self.task_agent.step(
880
- analysis_prompt, response_format=RecoveryDecision
949
+ analysis_prompt, response_format=result_schema
881
950
  )
882
951
  return response.msg.parsed
883
952
 
884
953
  except Exception as e:
885
954
  logger.warning(
886
- f"Error during failure analysis: {e}, defaulting to RETRY"
955
+ f"Error during task analysis "
956
+ f"({'failure' if for_failure else 'quality'}): {e}, "
957
+ f"using fallback"
887
958
  )
888
- return RecoveryDecision(
889
- strategy=RecoveryStrategy.RETRY,
890
- reasoning=f"Analysis failed due to error: {e!s}, "
891
- f"defaulting to retry",
892
- modified_task_content=None,
959
+ return TaskAnalysisResult(**fallback_values)
960
+
961
+ async def _apply_recovery_strategy(
962
+ self,
963
+ task: Task,
964
+ recovery_decision: TaskAnalysisResult,
965
+ ) -> bool:
966
+ r"""Apply the recovery strategy from a task analysis result.
967
+
968
+ This method centralizes the recovery logic for both execution failures
969
+ and quality-based failures.
970
+
971
+ Args:
972
+ task (Task): The task that needs recovery
973
+ recovery_decision (TaskAnalysisResult): The analysis result with
974
+ recovery strategy
975
+
976
+ Returns:
977
+ bool: True if workforce should halt (e.g., decompose needs
978
+ different handling), False otherwise
979
+ """
980
+ strategy = (
981
+ recovery_decision.recovery_strategy or RecoveryStrategy.RETRY
982
+ )
983
+ action_taken = ""
984
+
985
+ try:
986
+ if strategy == RecoveryStrategy.RETRY:
987
+ # Simply retry the task by reposting it to the same worker
988
+ # Check both _assignees dict and task.assigned_worker_id
989
+ assignee_id = (
990
+ self._assignees.get(task.id) or task.assigned_worker_id
991
+ )
992
+
993
+ if assignee_id:
994
+ # Retry with the same worker - no coordinator call needed
995
+ await self._post_task(task, assignee_id)
996
+ action_taken = f"retried with same worker {assignee_id}"
997
+ logger.info(
998
+ f"Task {task.id} retrying with same worker "
999
+ f"{assignee_id} (no coordinator call)"
1000
+ )
1001
+ else:
1002
+ # No previous assignment exists - find a new assignee
1003
+ logger.info(
1004
+ f"Task {task.id} has no previous assignee, "
1005
+ f"calling coordinator"
1006
+ )
1007
+ batch_result = await self._find_assignee([task])
1008
+ assignment = batch_result.assignments[0]
1009
+ self._assignees[task.id] = assignment.assignee_id
1010
+ await self._post_task(task, assignment.assignee_id)
1011
+ action_taken = (
1012
+ f"retried with new worker {assignment.assignee_id}"
1013
+ )
1014
+
1015
+ elif strategy == RecoveryStrategy.REPLAN:
1016
+ # Modify the task content and retry
1017
+ if recovery_decision.modified_task_content:
1018
+ task.content = recovery_decision.modified_task_content
1019
+ logger.info(f"Task {task.id} content modified for replan")
1020
+
1021
+ # Repost the modified task
1022
+ if task.id in self._assignees:
1023
+ assignee_id = self._assignees[task.id]
1024
+ await self._post_task(task, assignee_id)
1025
+ action_taken = (
1026
+ f"replanned and retried with worker {assignee_id}"
1027
+ )
1028
+ else:
1029
+ # Find a new assignee for the replanned task
1030
+ batch_result = await self._find_assignee([task])
1031
+ assignment = batch_result.assignments[0]
1032
+ self._assignees[task.id] = assignment.assignee_id
1033
+ await self._post_task(task, assignment.assignee_id)
1034
+ action_taken = (
1035
+ f"replanned and assigned to "
1036
+ f"worker {assignment.assignee_id}"
1037
+ )
1038
+
1039
+ elif strategy == RecoveryStrategy.REASSIGN:
1040
+ # Reassign to a different worker
1041
+ old_worker = task.assigned_worker_id
1042
+ logger.info(
1043
+ f"Task {task.id} will be reassigned from worker "
1044
+ f"{old_worker}"
1045
+ )
1046
+
1047
+ # Find a different worker
1048
+ batch_result = await self._find_assignee([task])
1049
+ assignment = batch_result.assignments[0]
1050
+ new_worker = assignment.assignee_id
1051
+
1052
+ # If same worker, force find another
1053
+ if new_worker == old_worker and len(self._children) > 1:
1054
+ logger.info("Same worker selected, finding alternative")
1055
+ # Try to find different worker by adding note to
1056
+ # task content
1057
+ task.content = (
1058
+ f"{task.content}\n\n"
1059
+ f"Note: Previous worker {old_worker} had quality "
1060
+ f"issues. Needs different approach."
1061
+ )
1062
+ batch_result = await self._find_assignee([task])
1063
+ assignment = batch_result.assignments[0]
1064
+ new_worker = assignment.assignee_id
1065
+
1066
+ self._assignees[task.id] = new_worker
1067
+ await self._post_task(task, new_worker)
1068
+ action_taken = f"reassigned from {old_worker} to {new_worker}"
1069
+ logger.info(
1070
+ f"Task {task.id} reassigned from {old_worker} to "
1071
+ f"{new_worker}"
1072
+ )
1073
+
1074
+ elif strategy == RecoveryStrategy.DECOMPOSE:
1075
+ # Decompose the task into subtasks
1076
+ reason = (
1077
+ "failure"
1078
+ if not recovery_decision.is_quality_evaluation
1079
+ else "quality issues"
1080
+ )
1081
+ logger.info(
1082
+ f"Task {task.id} will be decomposed due to {reason}"
1083
+ )
1084
+ subtasks_result = self._decompose_task(task)
1085
+
1086
+ # Handle both streaming and non-streaming results
1087
+ if isinstance(subtasks_result, Generator):
1088
+ subtasks = []
1089
+ for new_tasks in subtasks_result:
1090
+ subtasks.extend(new_tasks)
1091
+ else:
1092
+ subtasks = subtasks_result
1093
+
1094
+ if self.metrics_logger and subtasks:
1095
+ self.metrics_logger.log_task_decomposed(
1096
+ parent_task_id=task.id,
1097
+ subtask_ids=[st.id for st in subtasks],
1098
+ )
1099
+ for subtask in subtasks:
1100
+ self.metrics_logger.log_task_created(
1101
+ task_id=subtask.id,
1102
+ description=subtask.content,
1103
+ parent_task_id=task.id,
1104
+ task_type=subtask.type,
1105
+ metadata=subtask.additional_info,
1106
+ )
1107
+
1108
+ # Insert subtasks at the head of the queue
1109
+ self._pending_tasks.extendleft(reversed(subtasks))
1110
+ await self._post_ready_tasks()
1111
+ action_taken = f"decomposed into {len(subtasks)} subtasks"
1112
+
1113
+ logger.info(
1114
+ f"Task {task.id} decomposed into {len(subtasks)} subtasks"
1115
+ )
1116
+
1117
+ # Sync shared memory after task decomposition
1118
+ if self.share_memory:
1119
+ logger.info(
1120
+ f"Syncing shared memory after task {task.id} "
1121
+ f"decomposition"
1122
+ )
1123
+ self._sync_shared_memory()
1124
+
1125
+ # For decompose, we return early with special handling
1126
+ return True
1127
+
1128
+ elif strategy == RecoveryStrategy.CREATE_WORKER:
1129
+ assignee = await self._create_worker_node_for_task(task)
1130
+ await self._post_task(task, assignee.node_id)
1131
+ action_taken = (
1132
+ f"created new worker {assignee.node_id} and assigned "
1133
+ f"task {task.id} to it"
1134
+ )
1135
+
1136
+ except Exception as e:
1137
+ logger.error(
1138
+ f"Recovery strategy {strategy} failed for task {task.id}: {e}",
1139
+ exc_info=True,
893
1140
  )
1141
+ raise
1142
+
1143
+ logger.debug(
1144
+ f"Task {task.id} recovery: {action_taken}. "
1145
+ f"Strategy: {strategy.value}"
1146
+ )
1147
+
1148
+ return False
894
1149
 
895
1150
  # Human intervention methods
896
1151
  async def _async_pause(self) -> None:
@@ -1660,6 +1915,7 @@ class Workforce(BaseNode):
1660
1915
  description: str,
1661
1916
  worker: ChatAgent,
1662
1917
  pool_max_size: int = DEFAULT_WORKER_POOL_SIZE,
1918
+ enable_workflow_memory: bool = False,
1663
1919
  ) -> Workforce:
1664
1920
  r"""Add a worker node to the workforce that uses a single agent.
1665
1921
  Can be called when workforce is paused to dynamically add workers.
@@ -1669,6 +1925,9 @@ class Workforce(BaseNode):
1669
1925
  worker (ChatAgent): The agent to be added.
1670
1926
  pool_max_size (int): Maximum size of the agent pool.
1671
1927
  (default: :obj:`10`)
1928
+ enable_workflow_memory (bool): Whether to enable workflow memory
1929
+ accumulation. Set to True if you plan to call
1930
+ save_workflow_memories(). (default: :obj:`False`)
1672
1931
 
1673
1932
  Returns:
1674
1933
  Workforce: The workforce node itself.
@@ -1695,6 +1954,8 @@ class Workforce(BaseNode):
1695
1954
  worker=worker,
1696
1955
  pool_max_size=pool_max_size,
1697
1956
  use_structured_output_handler=self.use_structured_output_handler,
1957
+ context_utility=None, # Will be set during save/load operations
1958
+ enable_workflow_memory=enable_workflow_memory,
1698
1959
  )
1699
1960
  self._children.append(worker_node)
1700
1961
 
@@ -1871,6 +2132,237 @@ class Workforce(BaseNode):
1871
2132
  else:
1872
2133
  self.metrics_logger = WorkforceLogger(workforce_id=self.node_id)
1873
2134
 
2135
+ def save_workflow_memories(self) -> Dict[str, str]:
2136
+ r"""Save workflow memories for all SingleAgentWorker instances in the
2137
+ workforce.
2138
+
2139
+ This method iterates through all child workers and triggers workflow
2140
+ saving for SingleAgentWorker instances using their
2141
+ save_workflow_memories()
2142
+ method.
2143
+ Other worker types are skipped.
2144
+
2145
+ Returns:
2146
+ Dict[str, str]: Dictionary mapping worker node IDs to save results.
2147
+ Values are either file paths (success) or error messages
2148
+ (failure).
2149
+
2150
+ Example:
2151
+ >>> workforce = Workforce("My Team")
2152
+ >>> # ... add workers and process tasks ...
2153
+ >>> results = workforce.save_workflows()
2154
+ >>> print(results)
2155
+ {'worker_123': '/path/to/data_analyst_workflow_20250122.md',
2156
+ 'worker_456': 'error: No conversation context available'}
2157
+ """
2158
+ results = {}
2159
+
2160
+ # Get or create shared context utility for this save operation
2161
+ shared_context_utility = self._get_or_create_shared_context_utility()
2162
+
2163
+ for child in self._children:
2164
+ if isinstance(child, SingleAgentWorker):
2165
+ try:
2166
+ # Set the shared context utility for this operation
2167
+ child._shared_context_utility = shared_context_utility
2168
+ child.worker.set_context_utility(shared_context_utility)
2169
+
2170
+ result = child.save_workflow_memories()
2171
+ if result.get("status") == "success":
2172
+ results[child.node_id] = result.get(
2173
+ "file_path", "unknown_path"
2174
+ )
2175
+ else:
2176
+ # Error: check if there's a separate message field,
2177
+ # otherwise use the status itself
2178
+ error_msg = result.get(
2179
+ "message", result.get("status", "Unknown error")
2180
+ )
2181
+ results[child.node_id] = f"error: {error_msg}"
2182
+
2183
+ except Exception as e:
2184
+ results[child.node_id] = f"error: {e!s}"
2185
+ else:
2186
+ # Skip non-SingleAgentWorker types
2187
+ results[child.node_id] = (
2188
+ f"skipped: {type(child).__name__} not supported"
2189
+ )
2190
+
2191
+ logger.info(f"Workflow save completed for {len(results)} workers")
2192
+ return results
2193
+
2194
+ def load_workflow_memories(
2195
+ self,
2196
+ max_files_to_load: int = 3,
2197
+ session_id: Optional[str] = None,
2198
+ ) -> Dict[str, bool]:
2199
+ r"""Load workflow memories for all SingleAgentWorker instances in the
2200
+ workforce.
2201
+
2202
+ This method iterates through all child workers and loads relevant
2203
+ workflow files for SingleAgentWorker instances using their
2204
+ load_workflow_memories()
2205
+ method. Workers match files based on their description names.
2206
+
2207
+ Args:
2208
+ max_files_to_load (int): Maximum number of workflow files to load
2209
+ per worker. (default: :obj:`3`)
2210
+ session_id (Optional[str]): Specific workforce session ID to load
2211
+ from. If None, searches across all sessions.
2212
+ (default: :obj:`None`)
2213
+
2214
+ Returns:
2215
+ Dict[str, bool]: Dictionary mapping worker node IDs to load
2216
+ success status.
2217
+ True indicates successful loading, False indicates failure.
2218
+
2219
+ Example:
2220
+ >>> workforce = Workforce("My Team")
2221
+ >>> workforce.add_single_agent_worker(
2222
+ ... "data_analyst", analyst_agent
2223
+ ... )
2224
+ >>> success_status = workforce.load_workflows()
2225
+ >>> print(success_status)
2226
+ {'worker_123': True} # Successfully loaded workflows for
2227
+ # data_analyst
2228
+ """
2229
+ results = {}
2230
+
2231
+ # For loading, we don't create a new session - instead we search
2232
+ # existing ones
2233
+ # Each worker will search independently across all existing sessions
2234
+
2235
+ # First, load workflows for SingleAgentWorker instances
2236
+ for child in self._children:
2237
+ if isinstance(child, SingleAgentWorker):
2238
+ try:
2239
+ # For loading, don't set shared context utility
2240
+ # Let each worker search across existing sessions
2241
+ success = child.load_workflow_memories(
2242
+ max_files_to_load=max_files_to_load,
2243
+ session_id=session_id,
2244
+ )
2245
+ results[child.node_id] = success
2246
+
2247
+ except Exception as e:
2248
+ logger.error(
2249
+ f"Failed to load workflow for {child.node_id}: {e!s}"
2250
+ )
2251
+ results[child.node_id] = False
2252
+ else:
2253
+ # Skip non-SingleAgentWorker types
2254
+ results[child.node_id] = False
2255
+
2256
+ # Load aggregated workflow summaries for coordinator and task agents
2257
+ self._load_management_agent_workflows(max_files_to_load, session_id)
2258
+
2259
+ logger.info(f"Workflow load completed for {len(results)} workers")
2260
+ return results
2261
+
2262
+ def _load_management_agent_workflows(
2263
+ self, max_files_to_load: int, session_id: Optional[str] = None
2264
+ ) -> None:
2265
+ r"""Load workflow summaries for coordinator and task planning agents.
2266
+
2267
+ This method loads aggregated workflow summaries to help:
2268
+ - Coordinator agent: understand task assignment patterns and worker
2269
+ capabilities
2270
+ - Task agent: understand task decomposition patterns and
2271
+ successful strategies
2272
+
2273
+ Args:
2274
+ max_files_to_load (int): Maximum number of workflow files to load.
2275
+ session_id (Optional[str]): Specific session ID to load from.
2276
+ If None, searches across all sessions.
2277
+ """
2278
+ try:
2279
+ import glob
2280
+ import os
2281
+ from pathlib import Path
2282
+
2283
+ from camel.utils.context_utils import ContextUtility
2284
+
2285
+ # For loading management workflows, search across all sessions
2286
+ camel_workdir = os.environ.get("CAMEL_WORKDIR")
2287
+ if camel_workdir:
2288
+ base_dir = os.path.join(camel_workdir, "workforce_workflows")
2289
+ else:
2290
+ base_dir = "workforce_workflows"
2291
+
2292
+ # Search for workflow files in specified or all session directories
2293
+ if session_id:
2294
+ search_path = str(
2295
+ Path(base_dir) / session_id / "*_workflow*.md"
2296
+ )
2297
+ else:
2298
+ search_path = str(Path(base_dir) / "*" / "*_workflow*.md")
2299
+ workflow_files = glob.glob(search_path)
2300
+
2301
+ if not workflow_files:
2302
+ logger.info(
2303
+ "No workflow files found for management agent context"
2304
+ )
2305
+ return
2306
+
2307
+ # Sort by modification time (most recent first)
2308
+ workflow_files.sort(
2309
+ key=lambda x: os.path.getmtime(x), reverse=True
2310
+ )
2311
+
2312
+ # Load workflows for coordinator agent (up to 5 most recent)
2313
+ coordinator_loaded = 0
2314
+ for file_path in workflow_files[:max_files_to_load]:
2315
+ try:
2316
+ filename = os.path.basename(file_path).replace('.md', '')
2317
+ session_dir = os.path.dirname(file_path)
2318
+ session_id = os.path.basename(session_dir)
2319
+
2320
+ # Use shared context utility with specific session
2321
+ temp_utility = ContextUtility.get_workforce_shared(
2322
+ session_id
2323
+ )
2324
+
2325
+ status = temp_utility.load_markdown_context_to_memory(
2326
+ self.coordinator_agent, filename
2327
+ )
2328
+ if "Context appended" in status:
2329
+ coordinator_loaded += 1
2330
+ except Exception as e:
2331
+ logger.warning(
2332
+ f"Failed to load coordinator workflow {file_path}: {e}"
2333
+ )
2334
+
2335
+ # Load workflows for task agent (up to 3 most recent)
2336
+ task_agent_loaded = 0
2337
+ for file_path in workflow_files[:max_files_to_load]:
2338
+ try:
2339
+ filename = os.path.basename(file_path).replace('.md', '')
2340
+ session_dir = os.path.dirname(file_path)
2341
+ session_id = os.path.basename(session_dir)
2342
+
2343
+ # Use shared context utility with specific session
2344
+ temp_utility = ContextUtility.get_workforce_shared(
2345
+ session_id
2346
+ )
2347
+
2348
+ status = temp_utility.load_markdown_context_to_memory(
2349
+ self.task_agent, filename
2350
+ )
2351
+ if "Context appended" in status:
2352
+ task_agent_loaded += 1
2353
+ except Exception as e:
2354
+ logger.warning(
2355
+ f"Failed to load task agent workflow {file_path}: {e}"
2356
+ )
2357
+
2358
+ logger.info(
2359
+ f"Loaded {coordinator_loaded} workflows for coordinator, "
2360
+ f"{task_agent_loaded} workflows for task agent"
2361
+ )
2362
+
2363
+ except Exception as e:
2364
+ logger.error(f"Error loading management agent workflows: {e}")
2365
+
1874
2366
  @check_if_running(False)
1875
2367
  def set_channel(self, channel: TaskChannel) -> None:
1876
2368
  r"""Set the channel for the node and all the child nodes under it."""
@@ -2380,8 +2872,7 @@ class Workforce(BaseNode):
2380
2872
  "worker creation"
2381
2873
  )
2382
2874
  new_node_conf = WorkerConf(
2383
- description=f"Fallback worker for task: "
2384
- f"{task.content}",
2875
+ description=f"Fallback worker for task: {task.content}",
2385
2876
  role="General Assistant",
2386
2877
  sys_msg="You are a general assistant that can help "
2387
2878
  "with various tasks.",
@@ -2391,7 +2882,7 @@ class Workforce(BaseNode):
2391
2882
  response.msg.content,
2392
2883
  schema=WorkerConf,
2393
2884
  fallback_values={
2394
- "description": f"Worker for task: " f"{task.content}",
2885
+ "description": f"Worker for task: {task.content}",
2395
2886
  "role": "Task Specialist",
2396
2887
  "sys_msg": f"You are a specialist for: {task.content}",
2397
2888
  },
@@ -2419,8 +2910,7 @@ class Workforce(BaseNode):
2419
2910
  )
2420
2911
  # Create a fallback worker configuration
2421
2912
  new_node_conf = WorkerConf(
2422
- description=f"Fallback worker for "
2423
- f"task: {task.content}",
2913
+ description=f"Fallback worker for task: {task.content}",
2424
2914
  role="General Assistant",
2425
2915
  sys_msg="You are a general assistant that can help "
2426
2916
  "with various tasks.",
@@ -2619,20 +3109,119 @@ class Workforce(BaseNode):
2619
3109
  f"Assigning task: {e}"
2620
3110
  )
2621
3111
  dependencies = self._task_dependencies[task.id]
2622
- # Check if all dependencies for this task are in the completed
2623
- # set and their state is DONE
2624
- if all(
2625
- dep_id in completed_tasks_info
2626
- and completed_tasks_info[dep_id] == TaskState.DONE
2627
- for dep_id in dependencies
2628
- ):
2629
- assignee_id = self._assignees[task.id]
2630
- logger.debug(
2631
- f"Posting task {task.id} to assignee {assignee_id}. "
2632
- f"Dependencies met."
3112
+
3113
+ # Check if all dependencies are in completed state
3114
+ all_deps_completed = all(
3115
+ dep_id in completed_tasks_info for dep_id in dependencies
3116
+ )
3117
+
3118
+ # Only proceed with dependency checks if all deps are completed
3119
+ if all_deps_completed:
3120
+ # Check if all dependencies succeeded (state is DONE)
3121
+ all_deps_done = all(
3122
+ completed_tasks_info[dep_id] == TaskState.DONE
3123
+ for dep_id in dependencies
2633
3124
  )
2634
- await self._post_task(task, assignee_id)
2635
- posted_tasks.append(task)
3125
+
3126
+ # Check if any dependency failed
3127
+ any_dep_failed = any(
3128
+ completed_tasks_info[dep_id] == TaskState.FAILED
3129
+ for dep_id in dependencies
3130
+ )
3131
+
3132
+ if all_deps_done:
3133
+ # All dependencies completed successfully - post the
3134
+ # task
3135
+ assignee_id = self._assignees[task.id]
3136
+ logger.debug(
3137
+ f"Posting task {task.id} to "
3138
+ f"assignee {assignee_id}. "
3139
+ f"Dependencies met."
3140
+ )
3141
+ await self._post_task(task, assignee_id)
3142
+ posted_tasks.append(task)
3143
+ elif any_dep_failed:
3144
+ # Check if any failed dependencies can still be retried
3145
+ failed_deps = [
3146
+ dep_id
3147
+ for dep_id in dependencies
3148
+ if completed_tasks_info[dep_id] == TaskState.FAILED
3149
+ ]
3150
+
3151
+ # Check if any failed dependency is still retryable
3152
+ failed_tasks_with_retry_potential = []
3153
+ permanently_failed_deps = []
3154
+
3155
+ for dep_id in failed_deps:
3156
+ # Find the failed dependency task
3157
+ failed_task = next(
3158
+ (
3159
+ t
3160
+ for t in self._completed_tasks
3161
+ if t.id == dep_id
3162
+ ),
3163
+ None,
3164
+ )
3165
+ if (
3166
+ failed_task
3167
+ and failed_task.failure_count
3168
+ < MAX_TASK_RETRIES
3169
+ ):
3170
+ failed_tasks_with_retry_potential.append(
3171
+ dep_id
3172
+ )
3173
+ else:
3174
+ permanently_failed_deps.append(dep_id)
3175
+
3176
+ # Only fail the task if ALL dependencies are
3177
+ # permanently failed
3178
+ if (
3179
+ permanently_failed_deps
3180
+ and not failed_tasks_with_retry_potential
3181
+ ):
3182
+ logger.error(
3183
+ f"Task {task.id} cannot proceed: dependencies "
3184
+ f"{permanently_failed_deps} have "
3185
+ f"permanently failed. "
3186
+ f"Marking task as failed."
3187
+ )
3188
+ task.state = TaskState.FAILED
3189
+ task.result = (
3190
+ f"Task failed due to permanently "
3191
+ f"failed dependencies: "
3192
+ f"{permanently_failed_deps}"
3193
+ )
3194
+
3195
+ # Log the failure to metrics
3196
+ if self.metrics_logger:
3197
+ self.metrics_logger.log_task_failed(
3198
+ task_id=task.id,
3199
+ worker_id=task.assigned_worker_id
3200
+ or "unknown",
3201
+ error_message=task.result,
3202
+ metadata={
3203
+ 'failure_reason': (
3204
+ 'dependency_failure'
3205
+ ),
3206
+ 'failed_dependencies': (
3207
+ permanently_failed_deps
3208
+ ),
3209
+ },
3210
+ )
3211
+
3212
+ self._completed_tasks.append(task)
3213
+ self._cleanup_task_tracking(task.id)
3214
+ posted_tasks.append(task) # Remove from pending
3215
+ else:
3216
+ # Some dependencies may still be retried, keep
3217
+ # task pending
3218
+ logger.debug(
3219
+ f"Task {task.id} waiting: dependencies "
3220
+ f"{failed_tasks_with_retry_potential} "
3221
+ f"failed but may be retried "
3222
+ f"(attempt < {MAX_TASK_RETRIES})"
3223
+ )
3224
+ # else: Not all dependencies completed yet, skip this task
2636
3225
 
2637
3226
  # Step 3: Remove the posted tasks from the pending list
2638
3227
  for task in posted_tasks:
@@ -2644,21 +3233,30 @@ class Workforce(BaseNode):
2644
3233
  pass
2645
3234
 
2646
3235
  async def _handle_failed_task(self, task: Task) -> bool:
3236
+ r"""Handle a task that failed during execution.
3237
+
3238
+ Args:
3239
+ task (Task): The failed task
3240
+
3241
+ Returns:
3242
+ bool: True if workforce should halt, False otherwise
3243
+ """
2647
3244
  task.failure_count += 1
2648
3245
 
2649
3246
  # Determine detailed failure information
2650
- # Use the actual error/result stored in task.result
2651
3247
  failure_reason = task.result or "Unknown error"
2652
-
2653
- # Add context about the worker and task
2654
3248
  worker_id = task.assigned_worker_id or "unknown"
2655
- worker_info = f" (assigned to worker: {worker_id})"
2656
-
2657
- detailed_error = f"{failure_reason}{worker_info}"
3249
+ detailed_error = f"{failure_reason} (assigned to worker: {worker_id})"
2658
3250
 
2659
3251
  logger.error(
2660
3252
  f"Task {task.id} failed (attempt "
2661
- f"{task.failure_count}/3): {detailed_error}"
3253
+ f"{task.failure_count}/{MAX_TASK_RETRIES}): {detailed_error}"
3254
+ )
3255
+
3256
+ print(
3257
+ f"{Fore.RED}❌ Task {task.id} failed "
3258
+ f"(attempt {task.failure_count}/{MAX_TASK_RETRIES}): "
3259
+ f"{failure_reason}{Fore.RESET}"
2662
3260
  )
2663
3261
 
2664
3262
  if self.metrics_logger:
@@ -2673,24 +3271,20 @@ class Workforce(BaseNode):
2673
3271
  },
2674
3272
  )
2675
3273
 
2676
- # Check for immediate halt conditions - return immediately if we
2677
- # should halt
3274
+ # Check for immediate halt conditions
2678
3275
  if task.failure_count >= MAX_TASK_RETRIES:
2679
3276
  logger.error(
2680
3277
  f"Task {task.id} has exceeded maximum retry attempts "
2681
- f"({MAX_TASK_RETRIES}). Final failure "
2682
- f"reason: {detailed_error}. "
3278
+ f"({MAX_TASK_RETRIES}). Final failure reason: "
3279
+ f"{detailed_error}. "
2683
3280
  f"Task content: '{task.content}'"
2684
3281
  )
2685
3282
  self._cleanup_task_tracking(task.id)
2686
- # Mark task as completed for dependency tracking before halting
2687
3283
  self._completed_tasks.append(task)
2688
3284
  if task.id in self._assignees:
2689
3285
  await self._channel.archive_task(task.id)
2690
3286
  return True
2691
3287
 
2692
- # If too many tasks are failing rapidly, also halt to prevent infinite
2693
- # loops
2694
3288
  if len(self._pending_tasks) > MAX_PENDING_TASKS_LIMIT:
2695
3289
  logger.error(
2696
3290
  f"Too many pending tasks ({len(self._pending_tasks)} > "
@@ -2698,18 +3292,24 @@ class Workforce(BaseNode):
2698
3292
  f"explosion. Last failed task: {task.id}"
2699
3293
  )
2700
3294
  self._cleanup_task_tracking(task.id)
2701
- # Mark task as completed for dependency tracking before halting
2702
3295
  self._completed_tasks.append(task)
2703
3296
  if task.id in self._assignees:
2704
3297
  await self._channel.archive_task(task.id)
2705
3298
  return True
2706
3299
 
2707
3300
  # Use intelligent failure analysis to decide recovery strategy
2708
- recovery_decision = self._analyze_failure(task, detailed_error)
3301
+ recovery_decision = self._analyze_task(
3302
+ task, for_failure=True, error_message=detailed_error
3303
+ )
2709
3304
 
3305
+ strategy_str = (
3306
+ recovery_decision.recovery_strategy.value
3307
+ if recovery_decision.recovery_strategy
3308
+ else "none"
3309
+ )
2710
3310
  logger.info(
2711
3311
  f"Task {task.id} failure "
2712
- f"analysis: {recovery_decision.strategy.value} - "
3312
+ f"analysis: {strategy_str} - "
2713
3313
  f"{recovery_decision.reasoning}"
2714
3314
  )
2715
3315
 
@@ -2718,105 +3318,23 @@ class Workforce(BaseNode):
2718
3318
  await self._channel.archive_task(task.id)
2719
3319
  self._cleanup_task_tracking(task.id)
2720
3320
 
3321
+ # Apply recovery strategy
2721
3322
  try:
2722
- if recovery_decision.strategy == RecoveryStrategy.RETRY:
2723
- # Simply retry the task by reposting it
2724
- if task.id in self._assignees:
2725
- assignee_id = self._assignees[task.id]
2726
- await self._post_task(task, assignee_id)
2727
- action_taken = f"retried with same worker {assignee_id}"
2728
- else:
2729
- # Find a new assignee and retry
2730
- batch_result = await self._find_assignee([task])
2731
- assignment = batch_result.assignments[0]
2732
- self._assignees[task.id] = assignment.assignee_id
2733
- await self._post_task(task, assignment.assignee_id)
2734
- action_taken = (
2735
- f"retried with new worker {assignment.assignee_id}"
2736
- )
2737
-
2738
- elif recovery_decision.strategy == RecoveryStrategy.REPLAN:
2739
- # Modify the task content and retry
2740
- if recovery_decision.modified_task_content:
2741
- task.content = recovery_decision.modified_task_content
2742
- logger.info(f"Task {task.id} content modified for replan")
2743
-
2744
- # Repost the modified task
2745
- if task.id in self._assignees:
2746
- assignee_id = self._assignees[task.id]
2747
- await self._post_task(task, assignee_id)
2748
- action_taken = (
2749
- f"replanned and retried with worker {assignee_id}"
2750
- )
2751
- else:
2752
- # Find a new assignee for the replanned task
2753
- batch_result = await self._find_assignee([task])
2754
- assignment = batch_result.assignments[0]
2755
- self._assignees[task.id] = assignment.assignee_id
2756
- await self._post_task(task, assignment.assignee_id)
2757
- action_taken = (
2758
- f"replanned and assigned to "
2759
- f"worker {assignment.assignee_id}"
2760
- )
2761
-
2762
- elif recovery_decision.strategy == RecoveryStrategy.DECOMPOSE:
2763
- # Decompose the task into subtasks
2764
- subtasks_result = self._decompose_task(task)
2765
-
2766
- # Handle both streaming and non-streaming results
2767
- if isinstance(subtasks_result, Generator):
2768
- # This is a generator (streaming mode)
2769
- subtasks = []
2770
- for new_tasks in subtasks_result:
2771
- subtasks.extend(new_tasks)
2772
- else:
2773
- # This is a regular list (non-streaming mode)
2774
- subtasks = subtasks_result
2775
- if self.metrics_logger and subtasks:
2776
- self.metrics_logger.log_task_decomposed(
2777
- parent_task_id=task.id,
2778
- subtask_ids=[st.id for st in subtasks],
2779
- )
2780
- for subtask in subtasks:
2781
- self.metrics_logger.log_task_created(
2782
- task_id=subtask.id,
2783
- description=subtask.content,
2784
- parent_task_id=task.id,
2785
- task_type=subtask.type,
2786
- metadata=subtask.additional_info,
2787
- )
2788
- # Insert packets at the head of the queue
2789
- self._pending_tasks.extendleft(reversed(subtasks))
2790
-
2791
- await self._post_ready_tasks()
2792
- action_taken = f"decomposed into {len(subtasks)} subtasks"
2793
-
2794
- logger.debug(
2795
- f"Task {task.id} failed and was {action_taken}. "
2796
- f"Dependencies updated for subtasks."
2797
- )
2798
-
2799
- # Sync shared memory after task decomposition
2800
- if self.share_memory:
2801
- logger.info(
2802
- f"Syncing shared memory after "
2803
- f"task {task.id} decomposition"
2804
- )
2805
- self._sync_shared_memory()
3323
+ is_decompose = await self._apply_recovery_strategy(
3324
+ task, recovery_decision
3325
+ )
2806
3326
 
2807
- # Check if any pending tasks are now ready to execute
2808
- await self._post_ready_tasks()
3327
+ # For decompose, we handle it specially
3328
+ if is_decompose:
3329
+ # Task was decomposed, add to completed tasks
3330
+ self._completed_tasks.append(task)
2809
3331
  return False
2810
3332
 
2811
- elif recovery_decision.strategy == RecoveryStrategy.CREATE_WORKER:
2812
- assignee = await self._create_worker_node_for_task(task)
2813
- await self._post_task(task, assignee.node_id)
2814
- action_taken = (
2815
- f"created new worker {assignee.node_id} and assigned "
2816
- f"task {task.id} to it"
2817
- )
2818
3333
  except Exception as e:
2819
- logger.error(f"Recovery strategy failed for task {task.id}: {e}")
3334
+ logger.error(
3335
+ f"Recovery strategy failed for task {task.id}: {e}",
3336
+ exc_info=True,
3337
+ )
2820
3338
  # If max retries reached, halt the workforce
2821
3339
  if task.failure_count >= MAX_TASK_RETRIES:
2822
3340
  self._completed_tasks.append(task)
@@ -2824,18 +3342,17 @@ class Workforce(BaseNode):
2824
3342
  self._completed_tasks.append(task)
2825
3343
  return False
2826
3344
 
3345
+ # Task is being retried - don't add to completed tasks
3346
+ # It will be added when it actually completes or permanently fails
2827
3347
  logger.debug(
2828
- f"Task {task.id} failed and was {action_taken}. "
2829
- f"Updating dependency state."
3348
+ f"Task {task.id} is being retried (strategy: "
3349
+ f"{recovery_decision.recovery_strategy}). "
3350
+ f"Not adding to completed tasks until final outcome."
2830
3351
  )
2831
- # Mark task as completed for dependency tracking
2832
- self._completed_tasks.append(task)
2833
3352
 
2834
- # Sync shared memory after task completion to share knowledge
3353
+ # Sync shared memory after task recovery
2835
3354
  if self.share_memory:
2836
- logger.info(
2837
- f"Syncing shared memory after task {task.id} completion"
2838
- )
3355
+ logger.info(f"Syncing shared memory after task {task.id} recovery")
2839
3356
  self._sync_shared_memory()
2840
3357
 
2841
3358
  # Check if any pending tasks are now ready to execute
@@ -3345,11 +3862,88 @@ class Workforce(BaseNode):
3345
3862
  )
3346
3863
  continue
3347
3864
  else:
3348
- print(
3349
- f"{Fore.CYAN}🎯 Task {returned_task.id} completed "
3350
- f"successfully.{Fore.RESET}"
3865
+ quality_eval = self._analyze_task(
3866
+ returned_task, for_failure=False
3351
3867
  )
3352
- await self._handle_completed_task(returned_task)
3868
+
3869
+ if not quality_eval.quality_sufficient:
3870
+ logger.info(
3871
+ f"Task {returned_task.id} quality check: "
3872
+ f"score={quality_eval.quality_score}, "
3873
+ f"issues={quality_eval.issues}, "
3874
+ f"strategy={quality_eval.recovery_strategy}"
3875
+ )
3876
+
3877
+ # Check retry limit before attempting recovery
3878
+ if returned_task.failure_count >= 2:
3879
+ print(
3880
+ f"{Fore.YELLOW}Task {returned_task.id} "
3881
+ f"completed with low quality score: "
3882
+ f"{quality_eval.quality_score} "
3883
+ f"(retry limit reached){Fore.RESET}"
3884
+ )
3885
+ await self._handle_completed_task(
3886
+ returned_task
3887
+ )
3888
+ continue
3889
+
3890
+ # Print visual feedback for quality-failed tasks
3891
+ # with recovery strategy
3892
+ recovery_action = (
3893
+ quality_eval.recovery_strategy.value
3894
+ if quality_eval.recovery_strategy
3895
+ else ""
3896
+ )
3897
+ print(
3898
+ f"{Fore.YELLOW}⚠️ Task {returned_task.id} "
3899
+ f"failed quality check (score: "
3900
+ f"{quality_eval.quality_score}). "
3901
+ f"Issues: {', '.join(quality_eval.issues)}. "
3902
+ f"Recovery: {recovery_action}{Fore.RESET}"
3903
+ )
3904
+
3905
+ # Mark as failed for recovery
3906
+ returned_task.failure_count += 1
3907
+ returned_task.state = TaskState.FAILED
3908
+ returned_task.result = (
3909
+ f"Quality insufficient (score: "
3910
+ f"{quality_eval.quality_score}). "
3911
+ f"Issues: {', '.join(quality_eval.issues)}"
3912
+ )
3913
+
3914
+ # Clean up tracking before attempting recovery
3915
+ if returned_task.id in self._assignees:
3916
+ await self._channel.archive_task(
3917
+ returned_task.id
3918
+ )
3919
+ self._cleanup_task_tracking(returned_task.id)
3920
+
3921
+ # Apply LLM-recommended recovery strategy
3922
+ try:
3923
+ is_decompose = (
3924
+ await self._apply_recovery_strategy(
3925
+ returned_task, quality_eval
3926
+ )
3927
+ )
3928
+
3929
+ # For decompose, cleanup happens in the method
3930
+ if is_decompose:
3931
+ continue
3932
+
3933
+ except Exception as e:
3934
+ logger.error(
3935
+ f"Error handling quality-failed task "
3936
+ f"{returned_task.id}: {e}",
3937
+ exc_info=True,
3938
+ )
3939
+ continue
3940
+ else:
3941
+ print(
3942
+ f"{Fore.CYAN}Task {returned_task.id} "
3943
+ f"completed successfully (quality score: "
3944
+ f"{quality_eval.quality_score}).{Fore.RESET}"
3945
+ )
3946
+ await self._handle_completed_task(returned_task)
3353
3947
  elif returned_task.state == TaskState.FAILED:
3354
3948
  try:
3355
3949
  halt = await self._handle_failed_task(returned_task)