camel-ai 0.2.71a4__py3-none-any.whl → 0.2.71a6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (36) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +1533 -135
  3. camel/agents/repo_agent.py +2 -1
  4. camel/benchmarks/browsecomp.py +6 -6
  5. camel/logger.py +1 -1
  6. camel/messages/base.py +12 -1
  7. camel/models/azure_openai_model.py +96 -7
  8. camel/models/base_model.py +68 -10
  9. camel/models/deepseek_model.py +5 -0
  10. camel/models/gemini_model.py +5 -0
  11. camel/models/litellm_model.py +48 -16
  12. camel/models/model_manager.py +24 -6
  13. camel/models/openai_compatible_model.py +109 -5
  14. camel/models/openai_model.py +117 -8
  15. camel/societies/workforce/prompts.py +68 -5
  16. camel/societies/workforce/role_playing_worker.py +65 -7
  17. camel/societies/workforce/single_agent_worker.py +72 -18
  18. camel/societies/workforce/structured_output_handler.py +500 -0
  19. camel/societies/workforce/utils.py +67 -2
  20. camel/societies/workforce/workforce.py +527 -114
  21. camel/societies/workforce/workforce_logger.py +0 -8
  22. camel/tasks/task.py +3 -1
  23. camel/toolkits/__init__.py +2 -0
  24. camel/toolkits/file_write_toolkit.py +526 -121
  25. camel/toolkits/hybrid_browser_toolkit/actions.py +235 -60
  26. camel/toolkits/hybrid_browser_toolkit/agent.py +25 -8
  27. camel/toolkits/hybrid_browser_toolkit/browser_session.py +574 -164
  28. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +996 -126
  29. camel/toolkits/hybrid_browser_toolkit/stealth_config.py +116 -0
  30. camel/toolkits/hybrid_browser_toolkit/stealth_script.js +0 -0
  31. camel/toolkits/message_agent_toolkit.py +608 -0
  32. camel/toolkits/note_taking_toolkit.py +7 -13
  33. {camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a6.dist-info}/METADATA +6 -4
  34. {camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a6.dist-info}/RECORD +36 -32
  35. {camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a6.dist-info}/WHEEL +0 -0
  36. {camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a6.dist-info}/licenses/LICENSE +0 -0
@@ -14,6 +14,7 @@
14
14
  from __future__ import annotations
15
15
 
16
16
  import asyncio
17
+ import concurrent.futures
17
18
  import json
18
19
  import time
19
20
  import uuid
@@ -28,6 +29,7 @@ from typing import (
28
29
  Optional,
29
30
  Set,
30
31
  Tuple,
32
+ Union,
31
33
  )
32
34
 
33
35
  from colorama import Fore
@@ -40,12 +42,19 @@ from camel.societies.workforce.base import BaseNode
40
42
  from camel.societies.workforce.prompts import (
41
43
  ASSIGN_TASK_PROMPT,
42
44
  CREATE_NODE_PROMPT,
45
+ FAILURE_ANALYSIS_PROMPT,
43
46
  WF_TASK_DECOMPOSE_PROMPT,
44
47
  )
45
48
  from camel.societies.workforce.role_playing_worker import RolePlayingWorker
46
49
  from camel.societies.workforce.single_agent_worker import SingleAgentWorker
50
+ from camel.societies.workforce.structured_output_handler import (
51
+ StructuredOutputHandler,
52
+ )
47
53
  from camel.societies.workforce.task_channel import TaskChannel
48
54
  from camel.societies.workforce.utils import (
55
+ FailureContext,
56
+ RecoveryDecision,
57
+ RecoveryStrategy,
49
58
  TaskAssignment,
50
59
  TaskAssignResult,
51
60
  WorkerConf,
@@ -162,6 +171,14 @@ class Workforce(BaseNode):
162
171
  SingleAgentWorker instances; RolePlayingWorker and nested
163
172
  Workforce instances do not participate in memory sharing.
164
173
  (default: :obj:`False`)
174
+ use_structured_output_handler (bool, optional): Whether to use the
175
+ structured output handler instead of native structured output.
176
+ When enabled, the workforce will use prompts with structured
177
+ output instructions and regex extraction to parse responses.
178
+ This ensures compatibility with agents that don't reliably
179
+ support native structured output. When disabled, the workforce
180
+ uses the native response_format parameter.
181
+ (default: :obj:`True`)
165
182
 
166
183
  Example:
167
184
  >>> import asyncio
@@ -212,13 +229,19 @@ class Workforce(BaseNode):
212
229
  new_worker_agent: Optional[ChatAgent] = None,
213
230
  graceful_shutdown_timeout: float = 15.0,
214
231
  share_memory: bool = False,
232
+ use_structured_output_handler: bool = True,
215
233
  ) -> None:
216
234
  super().__init__(description)
217
- self._child_listening_tasks: Deque[asyncio.Task] = deque()
235
+ self._child_listening_tasks: Deque[
236
+ Union[asyncio.Task, concurrent.futures.Future]
237
+ ] = deque()
218
238
  self._children = children or []
219
239
  self.new_worker_agent = new_worker_agent
220
240
  self.graceful_shutdown_timeout = graceful_shutdown_timeout
221
241
  self.share_memory = share_memory
242
+ self.use_structured_output_handler = use_structured_output_handler
243
+ if self.use_structured_output_handler:
244
+ self.structured_handler = StructuredOutputHandler()
222
245
  self.metrics_logger = WorkforceLogger(workforce_id=self.node_id)
223
246
  self._task: Optional[Task] = None
224
247
  self._pending_tasks: Deque[Task] = deque()
@@ -611,6 +634,31 @@ class Workforce(BaseNode):
611
634
  # Remove original task dependencies as it's now decomposed
612
635
  del self._task_dependencies[original_task_id]
613
636
 
637
+ def _increment_in_flight_tasks(self, task_id: str) -> None:
638
+ r"""Safely increment the in-flight tasks counter with logging."""
639
+ self._in_flight_tasks += 1
640
+ logger.debug(
641
+ f"Incremented in-flight tasks for {task_id}. "
642
+ f"Count: {self._in_flight_tasks}"
643
+ )
644
+
645
+ def _decrement_in_flight_tasks(
646
+ self, task_id: str, context: str = ""
647
+ ) -> None:
648
+ r"""Safely decrement the in-flight tasks counter with safety checks."""
649
+ if self._in_flight_tasks > 0:
650
+ self._in_flight_tasks -= 1
651
+ logger.debug(
652
+ f"Decremented in-flight tasks for {task_id} ({context}). "
653
+ f"Count: {self._in_flight_tasks}"
654
+ )
655
+ else:
656
+ logger.debug(
657
+ f"Attempted to decrement in-flight tasks for {task_id} "
658
+ f"({context}) but counter is already 0. "
659
+ f"Counter: {self._in_flight_tasks}"
660
+ )
661
+
614
662
  def _cleanup_task_tracking(self, task_id: str) -> None:
615
663
  r"""Clean up tracking data for a task to prevent memory leaks.
616
664
 
@@ -634,9 +682,6 @@ class Workforce(BaseNode):
634
682
  )
635
683
  self.task_agent.reset()
636
684
  subtasks = task.decompose(self.task_agent, decompose_prompt)
637
- task.subtasks = subtasks
638
- for subtask in subtasks:
639
- subtask.parent = task
640
685
 
641
686
  # Update dependency tracking for decomposed task
642
687
  if subtasks:
@@ -644,6 +689,122 @@ class Workforce(BaseNode):
644
689
 
645
690
  return subtasks
646
691
 
692
+ def _analyze_failure(
693
+ self, task: Task, error_message: str
694
+ ) -> RecoveryDecision:
695
+ r"""Analyze a task failure and decide on the best recovery strategy.
696
+
697
+ Args:
698
+ task (Task): The failed task
699
+ error_message (str): The error message from the failure
700
+
701
+ Returns:
702
+ RecoveryDecision: The decided recovery strategy with reasoning
703
+ """
704
+ # First, do a quick smart analysis based on error patterns
705
+ error_msg_lower = error_message.lower()
706
+ if any(
707
+ keyword in error_msg_lower
708
+ for keyword in [
709
+ 'connection',
710
+ 'network',
711
+ 'server disconnected',
712
+ 'timeout',
713
+ 'apiconnectionerror',
714
+ ]
715
+ ):
716
+ return RecoveryDecision(
717
+ strategy=RecoveryStrategy.RETRY,
718
+ reasoning="Network/connection error detected, retrying task",
719
+ modified_task_content=None,
720
+ )
721
+
722
+ # Create failure context
723
+ failure_context = FailureContext(
724
+ task_id=task.id,
725
+ task_content=task.content,
726
+ failure_count=task.failure_count,
727
+ error_message=error_message,
728
+ worker_id=task.assigned_worker_id,
729
+ task_depth=task.get_depth(),
730
+ additional_info=str(task.additional_info)
731
+ if task.additional_info
732
+ else None,
733
+ )
734
+
735
+ # Format the analysis prompt
736
+ analysis_prompt = FAILURE_ANALYSIS_PROMPT.format(
737
+ task_id=failure_context.task_id,
738
+ task_content=failure_context.task_content,
739
+ failure_count=failure_context.failure_count,
740
+ error_message=failure_context.error_message,
741
+ worker_id=failure_context.worker_id or "unknown",
742
+ task_depth=failure_context.task_depth,
743
+ additional_info=failure_context.additional_info or "None",
744
+ )
745
+
746
+ try:
747
+ # Check if we should use structured handler
748
+ if self.use_structured_output_handler:
749
+ # Use structured handler
750
+ enhanced_prompt = (
751
+ self.structured_handler.generate_structured_prompt(
752
+ base_prompt=analysis_prompt,
753
+ schema=RecoveryDecision,
754
+ examples=[
755
+ {
756
+ "strategy": "RETRY",
757
+ "reasoning": "Temporary network error, "
758
+ "worth retrying",
759
+ "modified_task_content": None,
760
+ }
761
+ ],
762
+ )
763
+ )
764
+
765
+ self.task_agent.reset()
766
+ response = self.task_agent.step(enhanced_prompt)
767
+
768
+ result = self.structured_handler.parse_structured_response(
769
+ response.msg.content if response.msg else "",
770
+ schema=RecoveryDecision,
771
+ fallback_values={
772
+ "strategy": RecoveryStrategy.RETRY,
773
+ "reasoning": "Defaulting to retry due to parsing "
774
+ "issues",
775
+ "modified_task_content": None,
776
+ },
777
+ )
778
+ # Ensure we return a RecoveryDecision instance
779
+ if isinstance(result, RecoveryDecision):
780
+ return result
781
+ elif isinstance(result, dict):
782
+ return RecoveryDecision(**result)
783
+ else:
784
+ return RecoveryDecision(
785
+ strategy=RecoveryStrategy.RETRY,
786
+ reasoning="Failed to parse recovery decision",
787
+ modified_task_content=None,
788
+ )
789
+ else:
790
+ # Use existing native structured output code
791
+ self.task_agent.reset()
792
+ response = self.task_agent.step(
793
+ analysis_prompt, response_format=RecoveryDecision
794
+ )
795
+ return response.msg.parsed
796
+
797
+ except Exception as e:
798
+ logger.warning(
799
+ f"Error during failure analysis: {e}, defaulting to RETRY"
800
+ )
801
+ return RecoveryDecision(
802
+ strategy=RecoveryStrategy.RETRY,
803
+ reasoning=f"Analysis failed due to error: {e!s}, "
804
+ f"defaulting to retry",
805
+ modified_task_content=None,
806
+ )
807
+
647
808
  # Human intervention methods
648
809
  async def _async_pause(self) -> None:
649
810
  r"""Async implementation of pause to run on the event loop."""
@@ -1029,9 +1190,6 @@ class Workforce(BaseNode):
1029
1190
  needed
1030
1191
  >>> print(result.result)
1031
1192
  """
1032
- import asyncio
1033
- import concurrent.futures
1034
-
1035
1193
  # Check if we're already in an event loop
1036
1194
  try:
1037
1195
  current_loop = asyncio.get_running_loop()
@@ -1206,7 +1364,42 @@ class Workforce(BaseNode):
1206
1364
 
1207
1365
  return self._task
1208
1366
 
1209
- @check_if_running(False)
1367
+ def _start_child_node_when_paused(
1368
+ self, start_coroutine: Coroutine
1369
+ ) -> None:
1370
+ r"""Helper to start a child node when workforce is paused.
1371
+
1372
+ Args:
1373
+ start_coroutine: The coroutine to start (e.g., worker_node.start())
1374
+ """
1375
+ if self._state == WorkforceState.PAUSED and hasattr(
1376
+ self, '_child_listening_tasks'
1377
+ ):
1378
+ if self._loop and not self._loop.is_closed():
1379
+ # Use thread-safe coroutine execution for dynamic addition
1380
+ child_task: Union[asyncio.Task, concurrent.futures.Future]
1381
+ try:
1382
+ # Check if we're in the same thread as the loop
1383
+ current_loop = asyncio.get_running_loop()
1384
+ if current_loop is self._loop:
1385
+ # Same loop context - use create_task
1386
+ child_task = self._loop.create_task(start_coroutine)
1387
+ else:
1388
+ # Different loop context - use thread-safe approach
1389
+ child_task = asyncio.run_coroutine_threadsafe(
1390
+ start_coroutine, self._loop
1391
+ )
1392
+ except RuntimeError:
1393
+ # No running loop in current thread - use thread-safe
1394
+ # approach
1395
+ child_task = asyncio.run_coroutine_threadsafe(
1396
+ start_coroutine, self._loop
1397
+ )
1398
+ self._child_listening_tasks.append(child_task)
1399
+ else:
1400
+ # Close the coroutine to prevent RuntimeWarning
1401
+ start_coroutine.close()
1402
+
1210
1403
  def add_single_agent_worker(
1211
1404
  self,
1212
1405
  description: str,
@@ -1214,6 +1407,7 @@ class Workforce(BaseNode):
1214
1407
  pool_max_size: int = DEFAULT_WORKER_POOL_SIZE,
1215
1408
  ) -> Workforce:
1216
1409
  r"""Add a worker node to the workforce that uses a single agent.
1410
+ Can be called when workforce is paused to dynamically add workers.
1217
1411
 
1218
1412
  Args:
1219
1413
  description (str): Description of the worker node.
@@ -1223,7 +1417,15 @@ class Workforce(BaseNode):
1223
1417
 
1224
1418
  Returns:
1225
1419
  Workforce: The workforce node itself.
1420
+
1421
+ Raises:
1422
+ RuntimeError: If called while workforce is running (not paused).
1226
1423
  """
1424
+ if self._state == WorkforceState.RUNNING:
1425
+ raise RuntimeError(
1426
+ "Cannot add workers while workforce is running. "
1427
+ "Pause the workforce first."
1428
+ )
1227
1429
  # Ensure the worker agent shares this workforce's pause control
1228
1430
  self._attach_pause_event_to_agent(worker)
1229
1431
 
@@ -1231,8 +1433,17 @@ class Workforce(BaseNode):
1231
1433
  description=description,
1232
1434
  worker=worker,
1233
1435
  pool_max_size=pool_max_size,
1436
+ use_structured_output_handler=self.use_structured_output_handler,
1234
1437
  )
1235
1438
  self._children.append(worker_node)
1439
+
1440
+ # If we have a channel set up, set it for the new worker
1441
+ if hasattr(self, '_channel') and self._channel is not None:
1442
+ worker_node.set_channel(self._channel)
1443
+
1444
+ # If workforce is paused, start the worker's listening task
1445
+ self._start_child_node_when_paused(worker_node.start())
1446
+
1236
1447
  if self.metrics_logger:
1237
1448
  self.metrics_logger.log_worker_created(
1238
1449
  worker_id=worker_node.node_id,
@@ -1241,7 +1452,6 @@ class Workforce(BaseNode):
1241
1452
  )
1242
1453
  return self
1243
1454
 
1244
- @check_if_running(False)
1245
1455
  def add_role_playing_worker(
1246
1456
  self,
1247
1457
  description: str,
@@ -1253,6 +1463,7 @@ class Workforce(BaseNode):
1253
1463
  chat_turn_limit: int = 3,
1254
1464
  ) -> Workforce:
1255
1465
  r"""Add a worker node to the workforce that uses `RolePlaying` system.
1466
+ Can be called when workforce is paused to dynamically add workers.
1256
1467
 
1257
1468
  Args:
1258
1469
  description (str): Description of the node.
@@ -1272,7 +1483,15 @@ class Workforce(BaseNode):
1272
1483
 
1273
1484
  Returns:
1274
1485
  Workforce: The workforce node itself.
1486
+
1487
+ Raises:
1488
+ RuntimeError: If called while workforce is running (not paused).
1275
1489
  """
1490
+ if self._state == WorkforceState.RUNNING:
1491
+ raise RuntimeError(
1492
+ "Cannot add workers while workforce is running. "
1493
+ "Pause the workforce first."
1494
+ )
1276
1495
  # Ensure provided kwargs carry pause_event so that internally created
1277
1496
  # ChatAgents (assistant/user/summarizer) inherit it.
1278
1497
  assistant_agent_kwargs = self._ensure_pause_event_in_kwargs(
@@ -1293,8 +1512,17 @@ class Workforce(BaseNode):
1293
1512
  user_agent_kwargs=user_agent_kwargs,
1294
1513
  summarize_agent_kwargs=summarize_agent_kwargs,
1295
1514
  chat_turn_limit=chat_turn_limit,
1515
+ use_structured_output_handler=self.use_structured_output_handler,
1296
1516
  )
1297
1517
  self._children.append(worker_node)
1518
+
1519
+ # If we have a channel set up, set it for the new worker
1520
+ if hasattr(self, '_channel') and self._channel is not None:
1521
+ worker_node.set_channel(self._channel)
1522
+
1523
+ # If workforce is paused, start the worker's listening task
1524
+ self._start_child_node_when_paused(worker_node.start())
1525
+
1298
1526
  if self.metrics_logger:
1299
1527
  self.metrics_logger.log_worker_created(
1300
1528
  worker_id=worker_node.node_id,
@@ -1303,20 +1531,35 @@ class Workforce(BaseNode):
1303
1531
  )
1304
1532
  return self
1305
1533
 
1306
- @check_if_running(False)
1307
1534
  def add_workforce(self, workforce: Workforce) -> Workforce:
1308
1535
  r"""Add a workforce node to the workforce.
1536
+ Can be called when workforce is paused to dynamically add workers.
1309
1537
 
1310
1538
  Args:
1311
1539
  workforce (Workforce): The workforce node to be added.
1312
1540
 
1313
1541
  Returns:
1314
1542
  Workforce: The workforce node itself.
1543
+
1544
+ Raises:
1545
+ RuntimeError: If called while workforce is running (not paused).
1315
1546
  """
1547
+ if self._state == WorkforceState.RUNNING:
1548
+ raise RuntimeError(
1549
+ "Cannot add workers while workforce is running. "
1550
+ "Pause the workforce first."
1551
+ )
1316
1552
  # Align child workforce's pause_event with this one for unified
1317
1553
  # control of worker agents only.
1318
1554
  workforce._pause_event = self._pause_event
1319
1555
  self._children.append(workforce)
1556
+
1557
+ # If we have a channel set up, set it for the new workforce
1558
+ if hasattr(self, '_channel') and self._channel is not None:
1559
+ workforce.set_channel(self._channel)
1560
+
1561
+ # If workforce is paused, start the child workforce's listening task
1562
+ self._start_child_node_when_paused(workforce.start())
1320
1563
  return self
1321
1564
 
1322
1565
  async def _async_reset(self) -> None:
@@ -1443,26 +1686,73 @@ class Workforce(BaseNode):
1443
1686
  )
1444
1687
  prompt = prompt + f"\n\n{feedback}"
1445
1688
 
1446
- response = self.coordinator_agent.step(
1447
- prompt, response_format=TaskAssignResult
1448
- )
1449
-
1450
- if response.msg is None or response.msg.content is None:
1451
- logger.error(
1452
- "Coordinator agent returned empty response for task assignment"
1689
+ # Check if we should use structured handler
1690
+ if self.use_structured_output_handler:
1691
+ # Use structured handler for prompt-based extraction
1692
+ enhanced_prompt = (
1693
+ self.structured_handler.generate_structured_prompt(
1694
+ base_prompt=prompt,
1695
+ schema=TaskAssignResult,
1696
+ examples=[
1697
+ {
1698
+ "assignments": [
1699
+ {
1700
+ "task_id": "task_1",
1701
+ "assignee_id": "worker_123",
1702
+ "dependencies": [],
1703
+ }
1704
+ ]
1705
+ }
1706
+ ],
1707
+ )
1453
1708
  )
1454
- return TaskAssignResult(assignments=[])
1455
1709
 
1456
- try:
1457
- result_dict = json.loads(response.msg.content, parse_int=str)
1458
- return TaskAssignResult(**result_dict)
1459
- except json.JSONDecodeError as e:
1460
- logger.error(
1461
- f"JSON parsing error in task assignment: Invalid response "
1462
- f"format - {e}. Response content: "
1463
- f"{response.msg.content[:50]}..."
1710
+ # Get response without structured format
1711
+ response = self.coordinator_agent.step(enhanced_prompt)
1712
+
1713
+ if response.msg is None or response.msg.content is None:
1714
+ logger.error(
1715
+ "Coordinator agent returned empty response for "
1716
+ "task assignment"
1717
+ )
1718
+ return TaskAssignResult(assignments=[])
1719
+
1720
+ # Parse with structured handler
1721
+ result = self.structured_handler.parse_structured_response(
1722
+ response.msg.content,
1723
+ schema=TaskAssignResult,
1724
+ fallback_values={"assignments": []},
1725
+ )
1726
+ # Ensure we return a TaskAssignResult instance
1727
+ if isinstance(result, TaskAssignResult):
1728
+ return result
1729
+ elif isinstance(result, dict):
1730
+ return TaskAssignResult(**result)
1731
+ else:
1732
+ return TaskAssignResult(assignments=[])
1733
+ else:
1734
+ # Use existing native structured output code
1735
+ response = self.coordinator_agent.step(
1736
+ prompt, response_format=TaskAssignResult
1464
1737
  )
1465
- return TaskAssignResult(assignments=[])
1738
+
1739
+ if response.msg is None or response.msg.content is None:
1740
+ logger.error(
1741
+ "Coordinator agent returned empty response for "
1742
+ "task assignment"
1743
+ )
1744
+ return TaskAssignResult(assignments=[])
1745
+
1746
+ try:
1747
+ result_dict = json.loads(response.msg.content, parse_int=str)
1748
+ return TaskAssignResult(**result_dict)
1749
+ except json.JSONDecodeError as e:
1750
+ logger.error(
1751
+ f"JSON parsing error in task assignment: Invalid response "
1752
+ f"format - {e}. Response content: "
1753
+ f"{response.msg.content[:50]}..."
1754
+ )
1755
+ return TaskAssignResult(assignments=[])
1466
1756
 
1467
1757
  def _validate_assignments(
1468
1758
  self, assignments: List[TaskAssignment], valid_ids: Set[str]
@@ -1654,18 +1944,20 @@ class Workforce(BaseNode):
1654
1944
  )
1655
1945
 
1656
1946
  try:
1657
- self._in_flight_tasks += 1
1658
1947
  await self._channel.post_task(task, self.node_id, assignee_id)
1948
+ self._increment_in_flight_tasks(task.id)
1659
1949
  logger.debug(
1660
1950
  f"Posted task {task.id} to {assignee_id}. "
1661
1951
  f"In-flight tasks: {self._in_flight_tasks}"
1662
1952
  )
1663
1953
  except Exception as e:
1664
- # Decrement counter if posting failed
1665
- self._in_flight_tasks -= 1
1666
1954
  logger.error(
1667
1955
  f"Failed to post task {task.id} to {assignee_id}: {e}"
1668
1956
  )
1957
+ print(
1958
+ f"{Fore.RED}Failed to post task {task.id} to {assignee_id}: "
1959
+ f"{e}{Fore.RESET}"
1960
+ )
1669
1961
 
1670
1962
  async def _post_dependency(self, dependency: Task) -> None:
1671
1963
  await self._channel.post_dependency(dependency, self.node_id)
@@ -1686,35 +1978,92 @@ class Workforce(BaseNode):
1686
1978
  child_nodes_info=self._get_child_nodes_info(),
1687
1979
  additional_info=task.additional_info,
1688
1980
  )
1689
- response = self.coordinator_agent.step(
1690
- prompt, response_format=WorkerConf
1691
- )
1692
- if response.msg is None or response.msg.content is None:
1693
- logger.error(
1694
- "Coordinator agent returned empty response for worker creation"
1695
- )
1696
- # Create a fallback worker configuration
1697
- new_node_conf = WorkerConf(
1698
- description=f"Fallback worker for "
1699
- f"task: {task.content[:50]}...",
1700
- role="General Assistant",
1701
- sys_msg="You are a general assistant that can help "
1702
- "with various tasks.",
1981
+ # Check if we should use structured handler
1982
+ if self.use_structured_output_handler:
1983
+ # Use structured handler
1984
+ enhanced_prompt = (
1985
+ self.structured_handler.generate_structured_prompt(
1986
+ base_prompt=prompt,
1987
+ schema=WorkerConf,
1988
+ examples=[
1989
+ {
1990
+ "description": "Data analysis specialist",
1991
+ "role": "Data Analyst",
1992
+ "sys_msg": "You are an expert data analyst.",
1993
+ }
1994
+ ],
1995
+ )
1703
1996
  )
1997
+
1998
+ response = self.coordinator_agent.step(enhanced_prompt)
1999
+
2000
+ if response.msg is None or response.msg.content is None:
2001
+ logger.error(
2002
+ "Coordinator agent returned empty response for "
2003
+ "worker creation"
2004
+ )
2005
+ new_node_conf = WorkerConf(
2006
+ description=f"Fallback worker for task: "
2007
+ f"{task.content[:50]}...",
2008
+ role="General Assistant",
2009
+ sys_msg="You are a general assistant that can help "
2010
+ "with various tasks.",
2011
+ )
2012
+ else:
2013
+ result = self.structured_handler.parse_structured_response(
2014
+ response.msg.content,
2015
+ schema=WorkerConf,
2016
+ fallback_values={
2017
+ "description": f"Worker for task: "
2018
+ f"{task.content[:50]}...",
2019
+ "role": "Task Specialist",
2020
+ "sys_msg": f"You are a specialist for: {task.content}",
2021
+ },
2022
+ )
2023
+ # Ensure we have a WorkerConf instance
2024
+ if isinstance(result, WorkerConf):
2025
+ new_node_conf = result
2026
+ elif isinstance(result, dict):
2027
+ new_node_conf = WorkerConf(**result)
2028
+ else:
2029
+ new_node_conf = WorkerConf(
2030
+ description=f"Worker for task: {task.content[:50]}...",
2031
+ role="Task Specialist",
2032
+ sys_msg=f"You are a specialist for: {task.content}",
2033
+ )
1704
2034
  else:
1705
- try:
1706
- result_dict = json.loads(response.msg.content)
1707
- new_node_conf = WorkerConf(**result_dict)
1708
- except json.JSONDecodeError as e:
2035
+ # Use existing native structured output code
2036
+ response = self.coordinator_agent.step(
2037
+ prompt, response_format=WorkerConf
2038
+ )
2039
+ if response.msg is None or response.msg.content is None:
1709
2040
  logger.error(
1710
- f"JSON parsing error in worker creation: Invalid response "
1711
- f"format - {e}. Response content: "
1712
- f"{response.msg.content[:100]}..."
2041
+ "Coordinator agent returned empty response for "
2042
+ "worker creation"
1713
2043
  )
1714
- raise RuntimeError(
1715
- f"Failed to create worker for task {task.id}: "
1716
- f"Coordinator agent returned malformed JSON response. "
2044
+ # Create a fallback worker configuration
2045
+ new_node_conf = WorkerConf(
2046
+ description=f"Fallback worker for "
2047
+ f"task: {task.content[:50]}...",
2048
+ role="General Assistant",
2049
+ sys_msg="You are a general assistant that can help "
2050
+ "with various tasks.",
1717
2051
  )
2052
+ else:
2053
+ try:
2054
+ result_dict = json.loads(response.msg.content)
2055
+ new_node_conf = WorkerConf(**result_dict)
2056
+ except json.JSONDecodeError as e:
2057
+ logger.error(
2058
+ f"JSON parsing error in worker creation: Invalid "
2059
+ f"response format - {e}. Response content: "
2060
+ f"format - {e}. Response content: "
2061
+ f"{response.msg.content[:100]}..."
2062
+ )
2063
+ raise RuntimeError(
2064
+ f"Failed to create worker for task {task.id}: "
2065
+ f"Coordinator agent returned malformed JSON response. "
2066
+ ) from e
1718
2067
 
1719
2068
  new_agent = await self._create_new_agent(
1720
2069
  new_node_conf.role,
@@ -1725,6 +2074,7 @@ class Workforce(BaseNode):
1725
2074
  description=new_node_conf.description,
1726
2075
  worker=new_agent,
1727
2076
  pool_max_size=DEFAULT_WORKER_POOL_SIZE,
2077
+ use_structured_output_handler=self.use_structured_output_handler,
1728
2078
  )
1729
2079
  new_node.set_channel(self._channel)
1730
2080
 
@@ -1789,10 +2139,6 @@ class Workforce(BaseNode):
1789
2139
  timeout=TASK_TIMEOUT_SECONDS,
1790
2140
  )
1791
2141
  except Exception as e:
1792
- # Decrement in-flight counter to prevent hanging
1793
- if self._in_flight_tasks > 0:
1794
- self._in_flight_tasks -= 1
1795
-
1796
2142
  error_msg = (
1797
2143
  f"Error getting returned task {e} in "
1798
2144
  f"workforce {self.node_id}. "
@@ -1804,8 +2150,11 @@ class Workforce(BaseNode):
1804
2150
  if self._pending_tasks and self._assignees:
1805
2151
  for task in self._pending_tasks:
1806
2152
  if task.id in self._assignees:
1807
- # Mark this real task as failed
2153
+ # Mark task as failed and decrement counter
1808
2154
  task.set_state(TaskState.FAILED)
2155
+ self._decrement_in_flight_tasks(
2156
+ task.id, "timeout/error in _get_returned_task"
2157
+ )
1809
2158
  return task
1810
2159
  return None
1811
2160
 
@@ -1905,7 +2254,6 @@ class Workforce(BaseNode):
1905
2254
  task_id=task.id,
1906
2255
  worker_id=worker_id,
1907
2256
  error_message=detailed_error,
1908
- error_type="TaskFailure",
1909
2257
  metadata={
1910
2258
  'failure_count': task.failure_count,
1911
2259
  'task_content': task.content,
@@ -1944,67 +2292,116 @@ class Workforce(BaseNode):
1944
2292
  await self._channel.archive_task(task.id)
1945
2293
  return True
1946
2294
 
1947
- if task.get_depth() > 3:
1948
- # Create a new worker node and reassign
1949
- assignee = await self._create_worker_node_for_task(task)
2295
+ # Use intelligent failure analysis to decide recovery strategy
2296
+ recovery_decision = self._analyze_failure(task, detailed_error)
1950
2297
 
1951
- # Sync shared memory after creating new worker to provide context
1952
- if self.share_memory:
1953
- logger.info(
1954
- f"Syncing shared memory after creating new worker "
1955
- f"{assignee.node_id} for failed task {task.id}"
1956
- )
1957
- self._sync_shared_memory()
2298
+ logger.info(
2299
+ f"Task {task.id} failure "
2300
+ f"analysis: {recovery_decision.strategy.value} - "
2301
+ f"{recovery_decision.reasoning}"
2302
+ )
1958
2303
 
1959
- await self._post_task(task, assignee.node_id)
1960
- action_taken = f"reassigned to new worker {assignee.node_id}"
1961
- else:
1962
- subtasks = self._decompose_task(task)
1963
- if self.metrics_logger and subtasks:
1964
- self.metrics_logger.log_task_decomposed(
1965
- parent_task_id=task.id,
1966
- subtask_ids=[st.id for st in subtasks],
1967
- )
1968
- for subtask in subtasks:
1969
- self.metrics_logger.log_task_created(
1970
- task_id=subtask.id,
1971
- description=subtask.content,
1972
- parent_task_id=task.id,
1973
- task_type=subtask.type,
1974
- metadata=subtask.additional_info,
2304
+ # Clean up tracking before attempting recovery
2305
+ if task.id in self._assignees:
2306
+ await self._channel.archive_task(task.id)
2307
+ self._cleanup_task_tracking(task.id)
2308
+
2309
+ try:
2310
+ if recovery_decision.strategy == RecoveryStrategy.RETRY:
2311
+ # Simply retry the task by reposting it
2312
+ if task.id in self._assignees:
2313
+ assignee_id = self._assignees[task.id]
2314
+ await self._post_task(task, assignee_id)
2315
+ action_taken = f"retried with same worker {assignee_id}"
2316
+ else:
2317
+ # Find a new assignee and retry
2318
+ batch_result = await self._find_assignee([task])
2319
+ assignment = batch_result.assignments[0]
2320
+ self._assignees[task.id] = assignment.assignee_id
2321
+ await self._post_task(task, assignment.assignee_id)
2322
+ action_taken = (
2323
+ f"retried with new worker {assignment.assignee_id}"
1975
2324
  )
1976
- # Insert packets at the head of the queue
1977
- self._pending_tasks.extendleft(reversed(subtasks))
1978
2325
 
1979
- await self._post_ready_tasks()
1980
- action_taken = f"decomposed into {len(subtasks)} subtasks"
2326
+ elif recovery_decision.strategy == RecoveryStrategy.REPLAN:
2327
+ # Modify the task content and retry
2328
+ if recovery_decision.modified_task_content:
2329
+ task.content = recovery_decision.modified_task_content
2330
+ logger.info(f"Task {task.id} content modified for replan")
1981
2331
 
1982
- # Handle task completion differently for decomposed tasks
1983
- if task.id in self._assignees:
1984
- await self._channel.archive_task(task.id)
2332
+ # Repost the modified task
2333
+ if task.id in self._assignees:
2334
+ assignee_id = self._assignees[task.id]
2335
+ await self._post_task(task, assignee_id)
2336
+ action_taken = (
2337
+ f"replanned and retried with worker {assignee_id}"
2338
+ )
2339
+ else:
2340
+ # Find a new assignee for the replanned task
2341
+ batch_result = await self._find_assignee([task])
2342
+ assignment = batch_result.assignments[0]
2343
+ self._assignees[task.id] = assignment.assignee_id
2344
+ await self._post_task(task, assignment.assignee_id)
2345
+ action_taken = (
2346
+ f"replanned and assigned to "
2347
+ f"worker {assignment.assignee_id}"
2348
+ )
1985
2349
 
1986
- self._cleanup_task_tracking(task.id)
1987
- logger.debug(
1988
- f"Task {task.id} failed and was {action_taken}. "
1989
- f"Dependencies updated for subtasks."
1990
- )
2350
+ elif recovery_decision.strategy == RecoveryStrategy.DECOMPOSE:
2351
+ # Decompose the task into subtasks
2352
+ subtasks = self._decompose_task(task)
2353
+ if self.metrics_logger and subtasks:
2354
+ self.metrics_logger.log_task_decomposed(
2355
+ parent_task_id=task.id,
2356
+ subtask_ids=[st.id for st in subtasks],
2357
+ )
2358
+ for subtask in subtasks:
2359
+ self.metrics_logger.log_task_created(
2360
+ task_id=subtask.id,
2361
+ description=subtask.content,
2362
+ parent_task_id=task.id,
2363
+ task_type=subtask.type,
2364
+ metadata=subtask.additional_info,
2365
+ )
2366
+ # Insert packets at the head of the queue
2367
+ self._pending_tasks.extendleft(reversed(subtasks))
1991
2368
 
1992
- # Sync shared memory after task decomposition
1993
- if self.share_memory:
1994
- logger.info(
1995
- f"Syncing shared memory after task {task.id} decomposition"
2369
+ await self._post_ready_tasks()
2370
+ action_taken = f"decomposed into {len(subtasks)} subtasks"
2371
+
2372
+ logger.debug(
2373
+ f"Task {task.id} failed and was {action_taken}. "
2374
+ f"Dependencies updated for subtasks."
1996
2375
  )
1997
- self._sync_shared_memory()
1998
2376
 
1999
- # Check if any pending tasks are now ready to execute
2000
- await self._post_ready_tasks()
2001
- return False
2377
+ # Sync shared memory after task decomposition
2378
+ if self.share_memory:
2379
+ logger.info(
2380
+ f"Syncing shared memory after "
2381
+ f"task {task.id} decomposition"
2382
+ )
2383
+ self._sync_shared_memory()
2002
2384
 
2003
- # For reassigned tasks (depth > 3), handle normally
2004
- if task.id in self._assignees:
2005
- await self._channel.archive_task(task.id)
2385
+ # Check if any pending tasks are now ready to execute
2386
+ await self._post_ready_tasks()
2387
+ return False
2388
+
2389
+ elif recovery_decision.strategy == RecoveryStrategy.CREATE_WORKER:
2390
+ assignee = await self._create_worker_node_for_task(task)
2391
+ await self._post_task(task, assignee.node_id)
2392
+ action_taken = (
2393
+ f"created new worker {assignee.node_id} and assigned "
2394
+ f"task {task.id} to it"
2395
+ )
2396
+ except Exception as e:
2397
+ logger.error(f"Recovery strategy failed for task {task.id}: {e}")
2398
+ # If max retries reached, halt the workforce
2399
+ if task.failure_count >= MAX_TASK_RETRIES:
2400
+ self._completed_tasks.append(task)
2401
+ return True
2402
+ self._completed_tasks.append(task)
2403
+ return False
2006
2404
 
2007
- self._cleanup_task_tracking(task.id)
2008
2405
  logger.debug(
2009
2406
  f"Task {task.id} failed and was {action_taken}. "
2010
2407
  f"Updating dependency state."
@@ -2275,7 +2672,9 @@ class Workforce(BaseNode):
2275
2672
  await self._post_ready_tasks()
2276
2673
  continue
2277
2674
 
2278
- self._in_flight_tasks -= 1
2675
+ self._decrement_in_flight_tasks(
2676
+ returned_task.id, "task returned successfully"
2677
+ )
2279
2678
 
2280
2679
  # Check for stop request after getting task
2281
2680
  if self._stop_requested:
@@ -2360,8 +2759,9 @@ class Workforce(BaseNode):
2360
2759
 
2361
2760
  except Exception as e:
2362
2761
  # Decrement in-flight counter to prevent hanging
2363
- if self._in_flight_tasks > 0:
2364
- self._in_flight_tasks -= 1
2762
+ self._decrement_in_flight_tasks(
2763
+ "unknown", "exception in task processing loop"
2764
+ )
2365
2765
 
2366
2766
  logger.error(
2367
2767
  f"Error processing task in workforce {self.node_id}: {e}"
@@ -2440,8 +2840,20 @@ class Workforce(BaseNode):
2440
2840
  for task in self._child_listening_tasks:
2441
2841
  if not task.done():
2442
2842
  task.cancel()
2843
+
2844
+ # Handle both asyncio.Task and concurrent.futures.
2845
+ # Future
2846
+ awaitables = []
2847
+ for task in self._child_listening_tasks:
2848
+ if isinstance(task, concurrent.futures.Future):
2849
+ # Convert Future to awaitable
2850
+ awaitables.append(asyncio.wrap_future(task))
2851
+ else:
2852
+ # Already an asyncio.Task
2853
+ awaitables.append(task)
2854
+
2443
2855
  await asyncio.gather(
2444
- *self._child_listening_tasks,
2856
+ *awaitables,
2445
2857
  return_exceptions=True,
2446
2858
  )
2447
2859
 
@@ -2482,6 +2894,7 @@ class Workforce(BaseNode):
2482
2894
  else None,
2483
2895
  graceful_shutdown_timeout=self.graceful_shutdown_timeout,
2484
2896
  share_memory=self.share_memory,
2897
+ use_structured_output_handler=self.use_structured_output_handler,
2485
2898
  )
2486
2899
 
2487
2900
  for child in self._children: