camel-ai 0.2.78__py3-none-any.whl → 0.2.79a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (39) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_utils.py +38 -0
  3. camel/agents/chat_agent.py +1112 -287
  4. camel/datasets/base_generator.py +39 -10
  5. camel/environments/single_step.py +28 -3
  6. camel/memories/__init__.py +1 -2
  7. camel/memories/agent_memories.py +34 -0
  8. camel/memories/base.py +26 -0
  9. camel/memories/blocks/chat_history_block.py +117 -17
  10. camel/memories/context_creators/score_based.py +25 -384
  11. camel/messages/base.py +26 -0
  12. camel/models/aws_bedrock_model.py +1 -17
  13. camel/models/azure_openai_model.py +113 -67
  14. camel/models/model_factory.py +17 -1
  15. camel/models/moonshot_model.py +102 -5
  16. camel/models/openai_compatible_model.py +62 -32
  17. camel/models/openai_model.py +61 -35
  18. camel/models/samba_model.py +34 -15
  19. camel/models/sglang_model.py +41 -11
  20. camel/societies/workforce/__init__.py +2 -0
  21. camel/societies/workforce/events.py +122 -0
  22. camel/societies/workforce/role_playing_worker.py +15 -11
  23. camel/societies/workforce/single_agent_worker.py +143 -291
  24. camel/societies/workforce/utils.py +2 -1
  25. camel/societies/workforce/workflow_memory_manager.py +772 -0
  26. camel/societies/workforce/workforce.py +513 -188
  27. camel/societies/workforce/workforce_callback.py +74 -0
  28. camel/societies/workforce/workforce_logger.py +144 -140
  29. camel/societies/workforce/workforce_metrics.py +33 -0
  30. camel/storages/vectordb_storages/oceanbase.py +5 -4
  31. camel/toolkits/file_toolkit.py +166 -0
  32. camel/toolkits/message_integration.py +15 -13
  33. camel/toolkits/terminal_toolkit/terminal_toolkit.py +112 -79
  34. camel/types/enums.py +1 -0
  35. camel/utils/context_utils.py +201 -2
  36. {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/METADATA +14 -13
  37. {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/RECORD +39 -35
  38. {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/WHEEL +0 -0
  39. {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/licenses/LICENSE +0 -0
@@ -37,6 +37,9 @@ from typing import (
37
37
  cast,
38
38
  )
39
39
 
40
+ from .workforce_callback import WorkforceCallback
41
+ from .workforce_metrics import WorkforceMetrics
42
+
40
43
  if TYPE_CHECKING:
41
44
  from camel.utils.context_utils import ContextUtility
42
45
 
@@ -89,6 +92,16 @@ from camel.toolkits import (
89
92
  from camel.types import ModelPlatformType, ModelType
90
93
  from camel.utils import dependencies_required
91
94
 
95
+ from .events import (
96
+ AllTasksCompletedEvent,
97
+ TaskAssignedEvent,
98
+ TaskCompletedEvent,
99
+ TaskCreatedEvent,
100
+ TaskDecomposedEvent,
101
+ TaskFailedEvent,
102
+ TaskStartedEvent,
103
+ WorkerCreatedEvent,
104
+ )
92
105
  from .workforce_logger import WorkforceLogger
93
106
 
94
107
  if os.environ.get("TRACEROOT_ENABLED", "False").lower() == "true":
@@ -205,6 +218,17 @@ class Workforce(BaseNode):
205
218
  support native structured output. When disabled, the workforce
206
219
  uses the native response_format parameter.
207
220
  (default: :obj:`True`)
221
+ callbacks (Optional[List[WorkforceCallback]], optional): A list of
222
+ callback handlers to observe and record workforce lifecycle events
223
+ and metrics (e.g., task creation/assignment/start/completion/
224
+ failure, worker creation/deletion, all-tasks-completed). All
225
+ items must be instances of :class:`WorkforceCallback`, otherwise
226
+ a :class:`ValueError` is raised. If none of the provided
227
+ callbacks implement :class:`WorkforceMetrics`, a built-in
228
+ :class:`WorkforceLogger` (implements both callback and metrics)
229
+ is added automatically. If at least one provided callback
230
+ implements :class:`WorkforceMetrics`, no default logger is added.
231
+ (default: :obj:`None`)
208
232
 
209
233
  Example:
210
234
  >>> import asyncio
@@ -257,6 +281,7 @@ class Workforce(BaseNode):
257
281
  share_memory: bool = False,
258
282
  use_structured_output_handler: bool = True,
259
283
  task_timeout_seconds: Optional[float] = None,
284
+ callbacks: Optional[List[WorkforceCallback]] = None,
260
285
  ) -> None:
261
286
  super().__init__(description)
262
287
  self._child_listening_tasks: Deque[
@@ -272,7 +297,6 @@ class Workforce(BaseNode):
272
297
  )
273
298
  if self.use_structured_output_handler:
274
299
  self.structured_handler = StructuredOutputHandler()
275
- self.metrics_logger = WorkforceLogger(workforce_id=self.node_id)
276
300
  self._task: Optional[Task] = None
277
301
  self._pending_tasks: Deque[Task] = deque()
278
302
  self._task_dependencies: Dict[str, List[str]] = {}
@@ -295,15 +319,9 @@ class Workforce(BaseNode):
295
319
  self._last_snapshot_time: float = 0.0
296
320
  # Minimum seconds between automatic snapshots
297
321
  self.snapshot_interval: float = 30.0
298
- if self.metrics_logger:
299
- for child in self._children:
300
- worker_type = type(child).__name__
301
- role_or_desc = child.description
302
- self.metrics_logger.log_worker_created(
303
- worker_id=child.node_id,
304
- worker_type=worker_type,
305
- role=role_or_desc,
306
- )
322
+ # Shared memory UUID tracking to prevent re-sharing duplicates
323
+ self._shared_memory_uuids: Set[str] = set()
324
+ self._initialize_callbacks(callbacks)
307
325
 
308
326
  # Set up coordinator agent with default system message
309
327
  coord_agent_sys_msg = BaseMessage.make_assistant_message(
@@ -463,20 +481,75 @@ class Workforce(BaseNode):
463
481
  # Helper for propagating pause control to externally supplied agents
464
482
  # ------------------------------------------------------------------
465
483
 
466
- def _get_or_create_shared_context_utility(self) -> "ContextUtility":
484
+ def _initialize_callbacks(
485
+ self, callbacks: Optional[List[WorkforceCallback]]
486
+ ) -> None:
487
+ r"""Validate, register, and prime workforce callbacks."""
488
+ self._callbacks: List[WorkforceCallback] = []
489
+
490
+ if callbacks:
491
+ for cb in callbacks:
492
+ if isinstance(cb, WorkforceCallback):
493
+ self._callbacks.append(cb)
494
+ else:
495
+ raise ValueError(
496
+ "All callbacks must be instances of WorkforceCallback"
497
+ )
498
+
499
+ has_metrics_callback = any(
500
+ isinstance(cb, WorkforceMetrics) for cb in self._callbacks
501
+ )
502
+
503
+ if not has_metrics_callback:
504
+ self._callbacks.append(WorkforceLogger(workforce_id=self.node_id))
505
+ else:
506
+ logger.info(
507
+ "WorkforceMetrics implementation detected. Skipping default "
508
+ "WorkforceLogger addition."
509
+ )
510
+
511
+ for child in self._children:
512
+ self._notify_worker_created(child)
513
+
514
+ def _notify_worker_created(
515
+ self,
516
+ worker_node: BaseNode,
517
+ *,
518
+ worker_type: Optional[str] = None,
519
+ role: Optional[str] = None,
520
+ metadata: Optional[Dict[str, Any]] = None,
521
+ ) -> None:
522
+ r"""Emit a worker-created event to all registered callbacks."""
523
+ event = WorkerCreatedEvent(
524
+ worker_id=worker_node.node_id,
525
+ worker_type=worker_type or type(worker_node).__name__,
526
+ role=role or worker_node.description,
527
+ metadata=metadata,
528
+ )
529
+ for cb in self._callbacks:
530
+ cb.log_worker_created(event)
531
+
532
+ def _get_or_create_shared_context_utility(
533
+ self,
534
+ session_id: Optional[str] = None,
535
+ ) -> "ContextUtility":
467
536
  r"""Get or create the shared context utility for workflow management.
468
537
 
469
538
  This method creates the context utility only when needed, avoiding
470
539
  unnecessary session folder creation during initialization.
471
540
 
541
+ Args:
542
+ session_id (Optional[str]): Custom session ID to use. If None,
543
+ auto-generates a timestamped session ID. (default: :obj:`None`)
544
+
472
545
  Returns:
473
546
  ContextUtility: The shared context utility instance.
474
547
  """
475
548
  if self._shared_context_utility is None:
476
549
  from camel.utils.context_utils import ContextUtility
477
550
 
478
- self._shared_context_utility = (
479
- ContextUtility.get_workforce_shared()
551
+ self._shared_context_utility = ContextUtility.get_workforce_shared(
552
+ session_id=session_id
480
553
  )
481
554
  return self._shared_context_utility
482
555
 
@@ -644,14 +717,29 @@ class Workforce(BaseNode):
644
717
  )
645
718
  return
646
719
 
647
- # Share with coordinator agent
720
+ # Filter out already-shared records to prevent re-sharing
721
+ # This prevents exponential growth of duplicate records
722
+ new_records = []
648
723
  for record in memory_records:
724
+ record_uuid = str(record.uuid)
725
+ if record_uuid not in self._shared_memory_uuids:
726
+ new_records.append(record)
727
+ self._shared_memory_uuids.add(record_uuid)
728
+
729
+ if not new_records:
730
+ logger.debug(
731
+ "No new records to share (all were already shared)"
732
+ )
733
+ return
734
+
735
+ # Share with coordinator agent
736
+ for record in new_records:
649
737
  # Only add records from other agents to avoid duplication
650
738
  if record.agent_id != self.coordinator_agent.agent_id:
651
739
  self.coordinator_agent.memory.write_record(record)
652
740
 
653
741
  # Share with task agent
654
- for record in memory_records:
742
+ for record in new_records:
655
743
  if record.agent_id != self.task_agent.agent_id:
656
744
  self.task_agent.memory.write_record(record)
657
745
 
@@ -663,12 +751,12 @@ class Workforce(BaseNode):
663
751
  ]
664
752
 
665
753
  for worker in single_agent_workers:
666
- for record in memory_records:
754
+ for record in new_records:
667
755
  if record.agent_id != worker.worker.agent_id:
668
756
  worker.worker.memory.write_record(record)
669
757
 
670
758
  logger.info(
671
- f"Shared {len(memory_records)} memory records across "
759
+ f"Shared {len(new_records)} new memory records across "
672
760
  f"{len(single_agent_workers) + 2} agents in workforce "
673
761
  f"{self.node_id}"
674
762
  )
@@ -775,10 +863,12 @@ class Workforce(BaseNode):
775
863
  Union[List[Task], Generator[List[Task], None, None]]:
776
864
  The subtasks or generator of subtasks.
777
865
  """
778
- decompose_prompt = TASK_DECOMPOSE_PROMPT.format(
779
- content=task.content,
780
- child_nodes_info=self._get_child_nodes_info(),
781
- additional_info=task.additional_info,
866
+ decompose_prompt = str(
867
+ TASK_DECOMPOSE_PROMPT.format(
868
+ content=task.content,
869
+ child_nodes_info=self._get_child_nodes_info(),
870
+ additional_info=task.additional_info,
871
+ )
782
872
  )
783
873
  self.task_agent.reset()
784
874
  result = task.decompose(self.task_agent, decompose_prompt)
@@ -905,16 +995,18 @@ class Workforce(BaseNode):
905
995
  ]
906
996
 
907
997
  # Format the unified analysis prompt
908
- analysis_prompt = TASK_ANALYSIS_PROMPT.format(
909
- task_id=task.id,
910
- task_content=task.content,
911
- task_result=task_result,
912
- failure_count=task.failure_count,
913
- task_depth=task.get_depth(),
914
- assigned_worker=task.assigned_worker_id or "unknown",
915
- issue_type=issue_type,
916
- issue_specific_analysis=issue_analysis,
917
- response_format=response_format,
998
+ analysis_prompt = str(
999
+ TASK_ANALYSIS_PROMPT.format(
1000
+ task_id=task.id,
1001
+ task_content=task.content,
1002
+ task_result=task_result,
1003
+ failure_count=task.failure_count,
1004
+ task_depth=task.get_depth(),
1005
+ assigned_worker=task.assigned_worker_id or "unknown",
1006
+ issue_type=issue_type,
1007
+ issue_specific_analysis=issue_analysis,
1008
+ response_format=response_format,
1009
+ )
918
1010
  )
919
1011
 
920
1012
  try:
@@ -1091,19 +1183,23 @@ class Workforce(BaseNode):
1091
1183
  else:
1092
1184
  subtasks = subtasks_result
1093
1185
 
1094
- if self.metrics_logger and subtasks:
1095
- self.metrics_logger.log_task_decomposed(
1186
+ if subtasks:
1187
+ task_decomposed_event = TaskDecomposedEvent(
1096
1188
  parent_task_id=task.id,
1097
1189
  subtask_ids=[st.id for st in subtasks],
1098
1190
  )
1191
+ for cb in self._callbacks:
1192
+ cb.log_task_decomposed(task_decomposed_event)
1099
1193
  for subtask in subtasks:
1100
- self.metrics_logger.log_task_created(
1194
+ task_created_event = TaskCreatedEvent(
1101
1195
  task_id=subtask.id,
1102
1196
  description=subtask.content,
1103
1197
  parent_task_id=task.id,
1104
1198
  task_type=subtask.type,
1105
1199
  metadata=subtask.additional_info,
1106
1200
  )
1201
+ for cb in self._callbacks:
1202
+ cb.log_task_created(task_created_event)
1107
1203
 
1108
1204
  # Insert subtasks at the head of the queue
1109
1205
  self._pending_tasks.extendleft(reversed(subtasks))
@@ -1323,6 +1419,20 @@ class Workforce(BaseNode):
1323
1419
  logger.warning(f"Task {task_id} not found in pending tasks.")
1324
1420
  return False
1325
1421
 
1422
+ def get_main_task_queue(self) -> List[Task]:
1423
+ r"""Get current main task queue for human review.
1424
+ Returns:
1425
+ List[Task]: List of main tasks waiting to be decomposed
1426
+ and executed.
1427
+ """
1428
+ # Return tasks from pending queue that need decomposition
1429
+ return [
1430
+ t
1431
+ for t in self._pending_tasks
1432
+ if t.additional_info
1433
+ and t.additional_info.get('_needs_decomposition')
1434
+ ]
1435
+
1326
1436
  def add_task(
1327
1437
  self,
1328
1438
  content: str,
@@ -1616,13 +1726,15 @@ class Workforce(BaseNode):
1616
1726
  self._task = task
1617
1727
  task.state = TaskState.FAILED
1618
1728
 
1619
- if self.metrics_logger:
1620
- self.metrics_logger.log_task_created(
1621
- task_id=task.id,
1622
- description=task.content,
1623
- task_type=task.type,
1624
- metadata=task.additional_info,
1625
- )
1729
+ task_created_event = TaskCreatedEvent(
1730
+ task_id=task.id,
1731
+ description=task.content,
1732
+ task_type=task.type,
1733
+ metadata=task.additional_info,
1734
+ )
1735
+ for cb in self._callbacks:
1736
+ cb.log_task_created(task_created_event)
1737
+
1626
1738
  # The agent tend to be overconfident on the whole task, so we
1627
1739
  # decompose the task into subtasks first
1628
1740
  subtasks_result = self._decompose_task(task)
@@ -1636,18 +1748,23 @@ class Workforce(BaseNode):
1636
1748
  else:
1637
1749
  # This is a regular list (non-streaming mode)
1638
1750
  subtasks = subtasks_result
1639
- if self.metrics_logger and subtasks:
1640
- self.metrics_logger.log_task_decomposed(
1641
- parent_task_id=task.id, subtask_ids=[st.id for st in subtasks]
1751
+ if subtasks:
1752
+ task_decomposed_event = TaskDecomposedEvent(
1753
+ parent_task_id=task.id,
1754
+ subtask_ids=[st.id for st in subtasks],
1642
1755
  )
1756
+ for cb in self._callbacks:
1757
+ cb.log_task_decomposed(task_decomposed_event)
1643
1758
  for subtask in subtasks:
1644
- self.metrics_logger.log_task_created(
1759
+ task_created_event = TaskCreatedEvent(
1645
1760
  task_id=subtask.id,
1646
1761
  description=subtask.content,
1647
1762
  parent_task_id=task.id,
1648
1763
  task_type=subtask.type,
1649
1764
  metadata=subtask.additional_info,
1650
1765
  )
1766
+ for cb in self._callbacks:
1767
+ cb.log_task_created(task_created_event)
1651
1768
 
1652
1769
  if subtasks:
1653
1770
  # _pending_tasks will contain both undecomposed
@@ -1966,12 +2083,10 @@ class Workforce(BaseNode):
1966
2083
  # If workforce is paused, start the worker's listening task
1967
2084
  self._start_child_node_when_paused(worker_node.start())
1968
2085
 
1969
- if self.metrics_logger:
1970
- self.metrics_logger.log_worker_created(
1971
- worker_id=worker_node.node_id,
1972
- worker_type='SingleAgentWorker',
1973
- role=worker_node.description,
1974
- )
2086
+ self._notify_worker_created(
2087
+ worker_node,
2088
+ worker_type='SingleAgentWorker',
2089
+ )
1975
2090
  return self
1976
2091
 
1977
2092
  def add_role_playing_worker(
@@ -2045,12 +2160,10 @@ class Workforce(BaseNode):
2045
2160
  # If workforce is paused, start the worker's listening task
2046
2161
  self._start_child_node_when_paused(worker_node.start())
2047
2162
 
2048
- if self.metrics_logger:
2049
- self.metrics_logger.log_worker_created(
2050
- worker_id=worker_node.node_id,
2051
- worker_type='RolePlayingWorker',
2052
- role=worker_node.description,
2053
- )
2163
+ self._notify_worker_created(
2164
+ worker_node,
2165
+ worker_type='RolePlayingWorker',
2166
+ )
2054
2167
  return self
2055
2168
 
2056
2169
  def add_workforce(self, workforce: Workforce) -> Workforce:
@@ -2127,21 +2240,36 @@ class Workforce(BaseNode):
2127
2240
  # No active loop, directly set the event
2128
2241
  self._pause_event.set()
2129
2242
 
2130
- if hasattr(self, 'metrics_logger') and self.metrics_logger is not None:
2131
- self.metrics_logger.reset_task_data()
2132
- else:
2133
- self.metrics_logger = WorkforceLogger(workforce_id=self.node_id)
2243
+ for cb in self._callbacks:
2244
+ if isinstance(cb, WorkforceMetrics):
2245
+ cb.reset_task_data()
2134
2246
 
2135
- def save_workflow_memories(self) -> Dict[str, str]:
2247
+ def save_workflow_memories(
2248
+ self,
2249
+ session_id: Optional[str] = None,
2250
+ ) -> Dict[str, str]:
2136
2251
  r"""Save workflow memories for all SingleAgentWorker instances in the
2137
2252
  workforce.
2138
2253
 
2254
+ .. deprecated:: 0.2.80
2255
+ This synchronous method processes workers sequentially, which can
2256
+ be slow for multiple agents. Use
2257
+ :meth:`save_workflow_memories_async`
2258
+ instead for parallel processing and significantly better
2259
+ performance.
2260
+
2139
2261
  This method iterates through all child workers and triggers workflow
2140
2262
  saving for SingleAgentWorker instances using their
2141
2263
  save_workflow_memories()
2142
2264
  method.
2143
2265
  Other worker types are skipped.
2144
2266
 
2267
+ Args:
2268
+ session_id (Optional[str]): Custom session ID to use for saving
2269
+ workflows. If None, auto-generates a timestamped session ID.
2270
+ Useful for organizing workflows by project or context.
2271
+ (default: :obj:`None`)
2272
+
2145
2273
  Returns:
2146
2274
  Dict[str, str]: Dictionary mapping worker node IDs to save results.
2147
2275
  Values are either file paths (success) or error messages
@@ -2150,15 +2278,41 @@ class Workforce(BaseNode):
2150
2278
  Example:
2151
2279
  >>> workforce = Workforce("My Team")
2152
2280
  >>> # ... add workers and process tasks ...
2153
- >>> results = workforce.save_workflows()
2281
+ >>> # save with auto-generated session id
2282
+ >>> results = workforce.save_workflow_memories()
2154
2283
  >>> print(results)
2155
- {'worker_123': '/path/to/data_analyst_workflow_20250122.md',
2284
+ {'worker_123': '/path/to/developer_agent_workflow.md',
2156
2285
  'worker_456': 'error: No conversation context available'}
2286
+ >>> # save with custom project id
2287
+ >>> results = workforce.save_workflow_memories(
2288
+ ... session_id="project_123"
2289
+ ... )
2290
+
2291
+ Note:
2292
+ For better performance with multiple workers, use the async
2293
+ version::
2294
+
2295
+ results = await workforce.save_workflow_memories_async()
2296
+
2297
+ See Also:
2298
+ :meth:`save_workflow_memories_async`: Async version with parallel
2299
+ processing for significantly better performance.
2157
2300
  """
2301
+ import warnings
2302
+
2303
+ warnings.warn(
2304
+ "save_workflow_memories() is slow for multiple workers. "
2305
+ "Consider using save_workflow_memories_async() for parallel "
2306
+ "processing and ~4x faster performance.",
2307
+ DeprecationWarning,
2308
+ stacklevel=2,
2309
+ )
2158
2310
  results = {}
2159
2311
 
2160
2312
  # Get or create shared context utility for this save operation
2161
- shared_context_utility = self._get_or_create_shared_context_utility()
2313
+ shared_context_utility = self._get_or_create_shared_context_utility(
2314
+ session_id=session_id
2315
+ )
2162
2316
 
2163
2317
  for child in self._children:
2164
2318
  if isinstance(child, SingleAgentWorker):
@@ -2191,10 +2345,122 @@ class Workforce(BaseNode):
2191
2345
  logger.info(f"Workflow save completed for {len(results)} workers")
2192
2346
  return results
2193
2347
 
2348
+ async def save_workflow_memories_async(
2349
+ self,
2350
+ session_id: Optional[str] = None,
2351
+ ) -> Dict[str, str]:
2352
+ r"""Asynchronously save workflow memories for all SingleAgentWorker
2353
+ instances in the workforce.
2354
+
2355
+ This is the async version of save_workflow_memories() that parallelizes
2356
+ LLM summarization calls across all workers using asyncio.gather(),
2357
+ significantly reducing total save time.
2358
+
2359
+ This method iterates through all child workers and triggers workflow
2360
+ saving for SingleAgentWorker instances using their
2361
+ save_workflow_memories_async() method in parallel.
2362
+ Other worker types are skipped.
2363
+
2364
+ Args:
2365
+ session_id (Optional[str]): Custom session ID to use for saving
2366
+ workflows. If None, auto-generates a timestamped session ID.
2367
+ Useful for organizing workflows by project or context.
2368
+ (default: :obj:`None`)
2369
+
2370
+ Returns:
2371
+ Dict[str, str]: Dictionary mapping worker node IDs to save results.
2372
+ Values are either file paths (success) or error messages
2373
+ (failure).
2374
+
2375
+ Example:
2376
+ >>> workforce = Workforce("My Team")
2377
+ >>> # ... add workers and process tasks ...
2378
+ >>> # save with parallel summarization (faster)
2379
+ >>> results = await workforce.save_workflow_memories_async()
2380
+ >>> print(results)
2381
+ {'worker_123': '/path/to/developer_agent_workflow.md',
2382
+ 'worker_456': '/path/to/search_agent_workflow.md',
2383
+ 'worker_789': '/path/to/document_agent_workflow.md'}
2384
+ """
2385
+ import asyncio
2386
+
2387
+ results = {}
2388
+
2389
+ # Get or create shared context utility for this save operation
2390
+ shared_context_utility = self._get_or_create_shared_context_utility(
2391
+ session_id=session_id
2392
+ )
2393
+
2394
+ # Prepare tasks for parallel execution
2395
+ async def save_single_worker(
2396
+ child: BaseNode,
2397
+ ) -> tuple[str, str]:
2398
+ """Save workflow for a single worker, then return (node_id,
2399
+ result)."""
2400
+ if isinstance(child, SingleAgentWorker):
2401
+ try:
2402
+ # Set the shared context utility for this operation
2403
+ child._shared_context_utility = shared_context_utility
2404
+ child.worker.set_context_utility(shared_context_utility)
2405
+
2406
+ result = await child.save_workflow_memories_async()
2407
+ if result.get("status") == "success":
2408
+ return (
2409
+ child.node_id,
2410
+ result.get("file_path", "unknown_path"),
2411
+ )
2412
+ else:
2413
+ # Error: check if there's a separate message field,
2414
+ # otherwise use the status itself
2415
+ error_msg = result.get(
2416
+ "message", result.get("status", "Unknown error")
2417
+ )
2418
+ return (child.node_id, f"error: {error_msg}")
2419
+
2420
+ except Exception as e:
2421
+ return (child.node_id, f"error: {e!s}")
2422
+ else:
2423
+ # Skip non-SingleAgentWorker types
2424
+ return (
2425
+ child.node_id,
2426
+ f"skipped: {type(child).__name__} not supported",
2427
+ )
2428
+
2429
+ # Create tasks for all workers
2430
+ tasks = [save_single_worker(child) for child in self._children]
2431
+
2432
+ # Execute all tasks in parallel using asyncio.gather()
2433
+ parallel_results = await asyncio.gather(*tasks, return_exceptions=True)
2434
+
2435
+ # Process results
2436
+ for result in parallel_results:
2437
+ if isinstance(result, Exception):
2438
+ # Handle any unexpected exceptions
2439
+ logger.error(
2440
+ f"Unexpected error during workflow save: {result}"
2441
+ )
2442
+ results["unknown"] = f"error: {result!s}"
2443
+ elif isinstance(result, tuple) and len(result) == 2:
2444
+ # Successfully got (node_id, save_result) tuple
2445
+ node_id, save_result = result
2446
+ results[node_id] = save_result
2447
+ else:
2448
+ # Unexpected result format
2449
+ logger.error(f"Unexpected result format: {result}")
2450
+ results["unknown"] = "error: unexpected result format"
2451
+
2452
+ logger.info(
2453
+ f"Workflow save completed for {len(results)} workers "
2454
+ f"(parallelized)"
2455
+ )
2456
+ return results
2457
+
2194
2458
  def load_workflow_memories(
2195
2459
  self,
2196
- max_files_to_load: int = 3,
2197
2460
  session_id: Optional[str] = None,
2461
+ worker_max_workflows: int = 3,
2462
+ coordinator_max_workflows: int = 5,
2463
+ task_agent_max_workflows: int = 3,
2198
2464
  ) -> Dict[str, bool]:
2199
2465
  r"""Load workflow memories for all SingleAgentWorker instances in the
2200
2466
  workforce.
@@ -2205,11 +2471,15 @@ class Workforce(BaseNode):
2205
2471
  method. Workers match files based on their description names.
2206
2472
 
2207
2473
  Args:
2208
- max_files_to_load (int): Maximum number of workflow files to load
2209
- per worker. (default: :obj:`3`)
2210
2474
  session_id (Optional[str]): Specific workforce session ID to load
2211
2475
  from. If None, searches across all sessions.
2212
2476
  (default: :obj:`None`)
2477
+ worker_max_workflows (int): Maximum number of workflow files to
2478
+ load per worker agent. (default: :obj:`3`)
2479
+ coordinator_max_workflows (int): Maximum number of workflow files
2480
+ to load for the coordinator agent. (default: :obj:`5`)
2481
+ task_agent_max_workflows (int): Maximum number of workflow files
2482
+ to load for the task planning agent. (default: :obj:`3`)
2213
2483
 
2214
2484
  Returns:
2215
2485
  Dict[str, bool]: Dictionary mapping worker node IDs to load
@@ -2221,7 +2491,11 @@ class Workforce(BaseNode):
2221
2491
  >>> workforce.add_single_agent_worker(
2222
2492
  ... "data_analyst", analyst_agent
2223
2493
  ... )
2224
- >>> success_status = workforce.load_workflows()
2494
+ >>> success_status = workforce.load_workflow_memories(
2495
+ ... worker_max_workflows=5,
2496
+ ... coordinator_max_workflows=10,
2497
+ ... task_agent_max_workflows=5
2498
+ ... )
2225
2499
  >>> print(success_status)
2226
2500
  {'worker_123': True} # Successfully loaded workflows for
2227
2501
  # data_analyst
@@ -2239,7 +2513,7 @@ class Workforce(BaseNode):
2239
2513
  # For loading, don't set shared context utility
2240
2514
  # Let each worker search across existing sessions
2241
2515
  success = child.load_workflow_memories(
2242
- max_files_to_load=max_files_to_load,
2516
+ max_workflows=worker_max_workflows,
2243
2517
  session_id=session_id,
2244
2518
  )
2245
2519
  results[child.node_id] = success
@@ -2254,13 +2528,18 @@ class Workforce(BaseNode):
2254
2528
  results[child.node_id] = False
2255
2529
 
2256
2530
  # Load aggregated workflow summaries for coordinator and task agents
2257
- self._load_management_agent_workflows(max_files_to_load, session_id)
2531
+ self._load_management_agent_workflows(
2532
+ coordinator_max_workflows, task_agent_max_workflows, session_id
2533
+ )
2258
2534
 
2259
2535
  logger.info(f"Workflow load completed for {len(results)} workers")
2260
2536
  return results
2261
2537
 
2262
2538
  def _load_management_agent_workflows(
2263
- self, max_files_to_load: int, session_id: Optional[str] = None
2539
+ self,
2540
+ coordinator_max_workflows: int,
2541
+ task_agent_max_workflows: int,
2542
+ session_id: Optional[str] = None,
2264
2543
  ) -> None:
2265
2544
  r"""Load workflow summaries for coordinator and task planning agents.
2266
2545
 
@@ -2271,7 +2550,10 @@ class Workforce(BaseNode):
2271
2550
  successful strategies
2272
2551
 
2273
2552
  Args:
2274
- max_files_to_load (int): Maximum number of workflow files to load.
2553
+ coordinator_max_workflows (int): Maximum number of workflow files
2554
+ to load for the coordinator agent.
2555
+ task_agent_max_workflows (int): Maximum number of workflow files
2556
+ to load for the task planning agent.
2275
2557
  session_id (Optional[str]): Specific session ID to load from.
2276
2558
  If None, searches across all sessions.
2277
2559
  """
@@ -2309,9 +2591,9 @@ class Workforce(BaseNode):
2309
2591
  key=lambda x: os.path.getmtime(x), reverse=True
2310
2592
  )
2311
2593
 
2312
- # Load workflows for coordinator agent (up to 5 most recent)
2594
+ # Load workflows for coordinator agent
2313
2595
  coordinator_loaded = 0
2314
- for file_path in workflow_files[:max_files_to_load]:
2596
+ for file_path in workflow_files[:coordinator_max_workflows]:
2315
2597
  try:
2316
2598
  filename = os.path.basename(file_path).replace('.md', '')
2317
2599
  session_dir = os.path.dirname(file_path)
@@ -2332,9 +2614,9 @@ class Workforce(BaseNode):
2332
2614
  f"Failed to load coordinator workflow {file_path}: {e}"
2333
2615
  )
2334
2616
 
2335
- # Load workflows for task agent (up to 3 most recent)
2617
+ # Load workflows for task agent
2336
2618
  task_agent_loaded = 0
2337
- for file_path in workflow_files[:max_files_to_load]:
2619
+ for file_path in workflow_files[:task_agent_max_workflows]:
2338
2620
  try:
2339
2621
  filename = os.path.basename(file_path).replace('.md', '')
2340
2622
  session_dir = os.path.dirname(file_path)
@@ -2807,10 +3089,11 @@ class Workforce(BaseNode):
2807
3089
 
2808
3090
  task.assigned_worker_id = assignee_id
2809
3091
 
2810
- if self.metrics_logger:
2811
- self.metrics_logger.log_task_started(
2812
- task_id=task.id, worker_id=assignee_id
2813
- )
3092
+ task_started_event = TaskStartedEvent(
3093
+ task_id=task.id, worker_id=assignee_id
3094
+ )
3095
+ for cb in self._callbacks:
3096
+ cb.log_task_started(task_started_event)
2814
3097
 
2815
3098
  try:
2816
3099
  await self._channel.post_task(task, self.node_id, assignee_id)
@@ -2842,10 +3125,12 @@ class Workforce(BaseNode):
2842
3125
  Returns:
2843
3126
  Worker: The created worker node.
2844
3127
  """
2845
- prompt = CREATE_NODE_PROMPT.format(
2846
- content=task.content,
2847
- child_nodes_info=self._get_child_nodes_info(),
2848
- additional_info=task.additional_info,
3128
+ prompt = str(
3129
+ CREATE_NODE_PROMPT.format(
3130
+ content=task.content,
3131
+ child_nodes_info=self._get_child_nodes_info(),
3132
+ additional_info=task.additional_info,
3133
+ )
2849
3134
  )
2850
3135
  # Check if we should use structured handler
2851
3136
  if self.use_structured_output_handler:
@@ -2954,13 +3239,13 @@ class Workforce(BaseNode):
2954
3239
  print(f"{Fore.CYAN}{new_node} created.{Fore.RESET}")
2955
3240
 
2956
3241
  self._children.append(new_node)
2957
- if self.metrics_logger:
2958
- self.metrics_logger.log_worker_created(
2959
- worker_id=new_node.node_id,
2960
- worker_type='SingleAgentWorker',
2961
- role=new_node_conf.role,
2962
- metadata={'description': new_node_conf.description},
2963
- )
3242
+
3243
+ self._notify_worker_created(
3244
+ new_node,
3245
+ worker_type='SingleAgentWorker',
3246
+ role=new_node_conf.role,
3247
+ metadata={'description': new_node_conf.description},
3248
+ )
2964
3249
  self._child_listening_tasks.append(
2965
3250
  asyncio.create_task(new_node.start())
2966
3251
  )
@@ -3061,22 +3346,24 @@ class Workforce(BaseNode):
3061
3346
  batch_result = await self._find_assignee(tasks_to_assign)
3062
3347
  logger.debug(
3063
3348
  f"Coordinator returned assignments:\n"
3064
- f"{json.dumps(batch_result.dict(), indent=2)}"
3349
+ f"{json.dumps(batch_result.model_dump(), indent=2)}"
3065
3350
  )
3066
3351
  for assignment in batch_result.assignments:
3067
3352
  self._task_dependencies[assignment.task_id] = (
3068
3353
  assignment.dependencies
3069
3354
  )
3070
3355
  self._assignees[assignment.task_id] = assignment.assignee_id
3071
- if self.metrics_logger:
3356
+
3357
+ task_assigned_event = TaskAssignedEvent(
3358
+ task_id=assignment.task_id,
3359
+ worker_id=assignment.assignee_id,
3360
+ dependencies=assignment.dependencies,
3361
+ queue_time_seconds=None,
3362
+ )
3363
+ for cb in self._callbacks:
3072
3364
  # queue_time_seconds can be derived by logger if task
3073
3365
  # creation time is logged
3074
- self.metrics_logger.log_task_assigned(
3075
- task_id=assignment.task_id,
3076
- worker_id=assignment.assignee_id,
3077
- dependencies=assignment.dependencies,
3078
- queue_time_seconds=None,
3079
- )
3366
+ cb.log_task_assigned(task_assigned_event)
3080
3367
 
3081
3368
  # Step 2: Iterate through all pending tasks and post those that are
3082
3369
  # ready
@@ -3193,21 +3480,19 @@ class Workforce(BaseNode):
3193
3480
  )
3194
3481
 
3195
3482
  # Log the failure to metrics
3196
- if self.metrics_logger:
3197
- self.metrics_logger.log_task_failed(
3198
- task_id=task.id,
3199
- worker_id=task.assigned_worker_id
3200
- or "unknown",
3201
- error_message=task.result,
3202
- metadata={
3203
- 'failure_reason': (
3204
- 'dependency_failure'
3205
- ),
3206
- 'failed_dependencies': (
3207
- permanently_failed_deps
3208
- ),
3209
- },
3210
- )
3483
+ task_failed_event = TaskFailedEvent(
3484
+ task_id=task.id,
3485
+ worker_id=task.assigned_worker_id or "unknown",
3486
+ error_message=task.result,
3487
+ metadata={
3488
+ 'failure_reason': 'dependency_failure',
3489
+ 'failed_dependencies': (
3490
+ permanently_failed_deps
3491
+ ),
3492
+ },
3493
+ )
3494
+ for cb in self._callbacks:
3495
+ cb.log_task_failed(task_failed_event)
3211
3496
 
3212
3497
  self._completed_tasks.append(task)
3213
3498
  self._cleanup_task_tracking(task.id)
@@ -3259,17 +3544,18 @@ class Workforce(BaseNode):
3259
3544
  f"{failure_reason}{Fore.RESET}"
3260
3545
  )
3261
3546
 
3262
- if self.metrics_logger:
3263
- self.metrics_logger.log_task_failed(
3264
- task_id=task.id,
3265
- worker_id=worker_id,
3266
- error_message=detailed_error,
3267
- metadata={
3268
- 'failure_count': task.failure_count,
3269
- 'task_content': task.content,
3270
- 'result_length': len(task.result) if task.result else 0,
3271
- },
3272
- )
3547
+ task_failed_event = TaskFailedEvent(
3548
+ task_id=task.id,
3549
+ worker_id=worker_id,
3550
+ error_message=detailed_error,
3551
+ metadata={
3552
+ 'failure_count': task.failure_count,
3553
+ 'task_content': task.content,
3554
+ 'result_length': len(task.result) if task.result else 0,
3555
+ },
3556
+ )
3557
+ for cb in self._callbacks:
3558
+ cb.log_task_failed(task_failed_event)
3273
3559
 
3274
3560
  # Check for immediate halt conditions
3275
3561
  if task.failure_count >= MAX_TASK_RETRIES:
@@ -3360,61 +3646,60 @@ class Workforce(BaseNode):
3360
3646
  return False
3361
3647
 
3362
3648
  async def _handle_completed_task(self, task: Task) -> None:
3363
- if self.metrics_logger:
3364
- worker_id = task.assigned_worker_id or "unknown"
3365
- processing_time_seconds = None
3366
- token_usage = None
3367
-
3368
- # Get processing time from task start time or additional info
3369
- if task.id in self._task_start_times:
3370
- processing_time_seconds = (
3371
- time.time() - self._task_start_times[task.id]
3372
- )
3373
- self._cleanup_task_tracking(task.id)
3374
- elif (
3375
- task.additional_info is not None
3376
- and 'processing_time_seconds' in task.additional_info
3377
- ):
3378
- processing_time_seconds = task.additional_info[
3379
- 'processing_time_seconds'
3380
- ]
3649
+ worker_id = task.assigned_worker_id or "unknown"
3650
+ processing_time_seconds = None
3651
+ token_usage = None
3381
3652
 
3382
- # Get token usage from task additional info (preferred - actual
3383
- # usage)
3384
- if (
3385
- task.additional_info is not None
3386
- and 'token_usage' in task.additional_info
3387
- ):
3388
- token_usage = task.additional_info['token_usage']
3389
- else:
3390
- # Fallback: Try to get token usage from SingleAgentWorker
3391
- # memory
3392
- assignee_node = next(
3393
- (
3394
- child
3395
- for child in self._children
3396
- if child.node_id == worker_id
3397
- ),
3398
- None,
3399
- )
3400
- if isinstance(assignee_node, SingleAgentWorker):
3401
- try:
3402
- _, total_tokens = (
3403
- assignee_node.worker.memory.get_context()
3404
- )
3405
- token_usage = {'total_tokens': total_tokens}
3406
- except Exception:
3407
- token_usage = None
3653
+ # Get processing time from task start time or additional info
3654
+ if task.id in self._task_start_times:
3655
+ processing_time_seconds = (
3656
+ time.time() - self._task_start_times[task.id]
3657
+ )
3658
+ self._cleanup_task_tracking(task.id)
3659
+ elif (
3660
+ task.additional_info is not None
3661
+ and 'processing_time_seconds' in task.additional_info
3662
+ ):
3663
+ processing_time_seconds = task.additional_info[
3664
+ 'processing_time_seconds'
3665
+ ]
3408
3666
 
3409
- # Log the completed task
3410
- self.metrics_logger.log_task_completed(
3411
- task_id=task.id,
3412
- worker_id=worker_id,
3413
- result_summary=task.result if task.result else "Completed",
3414
- processing_time_seconds=processing_time_seconds,
3415
- token_usage=token_usage,
3416
- metadata={'current_state': task.state.value},
3667
+ # Get token usage from task additional info (preferred - actual
3668
+ # usage)
3669
+ if (
3670
+ task.additional_info is not None
3671
+ and 'token_usage' in task.additional_info
3672
+ ):
3673
+ token_usage = task.additional_info['token_usage']
3674
+ else:
3675
+ # Fallback: Try to get token usage from SingleAgentWorker
3676
+ # memory
3677
+ assignee_node = next(
3678
+ (
3679
+ child
3680
+ for child in self._children
3681
+ if child.node_id == worker_id
3682
+ ),
3683
+ None,
3417
3684
  )
3685
+ if isinstance(assignee_node, SingleAgentWorker):
3686
+ try:
3687
+ _, total_tokens = assignee_node.worker.memory.get_context()
3688
+ token_usage = {'total_tokens': total_tokens}
3689
+ except Exception:
3690
+ token_usage = None
3691
+
3692
+ # Log the completed task
3693
+ task_completed_event = TaskCompletedEvent(
3694
+ task_id=task.id,
3695
+ worker_id=worker_id,
3696
+ result_summary=task.result if task.result else "Completed",
3697
+ processing_time_seconds=processing_time_seconds,
3698
+ token_usage=token_usage,
3699
+ metadata={'current_state': task.state.value},
3700
+ )
3701
+ for cb in self._callbacks:
3702
+ cb.log_task_completed(task_completed_event)
3418
3703
 
3419
3704
  # Find and remove the completed task from pending tasks
3420
3705
  tasks_list = list(self._pending_tasks)
@@ -3534,15 +3819,23 @@ class Workforce(BaseNode):
3534
3819
  r"""Returns an ASCII tree representation of the task hierarchy and
3535
3820
  worker status.
3536
3821
  """
3537
- if not self.metrics_logger:
3538
- return "Logger not initialized."
3539
- return self.metrics_logger.get_ascii_tree_representation()
3822
+ metrics_cb: List[WorkforceMetrics] = [
3823
+ cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
3824
+ ]
3825
+ if len(metrics_cb) == 0:
3826
+ return "Metrics Callback not initialized."
3827
+ else:
3828
+ return metrics_cb[0].get_ascii_tree_representation()
3540
3829
 
3541
3830
  def get_workforce_kpis(self) -> Dict[str, Any]:
3542
3831
  r"""Returns a dictionary of key performance indicators."""
3543
- if not self.metrics_logger:
3544
- return {"error": "Logger not initialized."}
3545
- return self.metrics_logger.get_kpis()
3832
+ metrics_cb: List[WorkforceMetrics] = [
3833
+ cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
3834
+ ]
3835
+ if len(metrics_cb) == 0:
3836
+ return {"error": "Metrics Callback not initialized."}
3837
+ else:
3838
+ return metrics_cb[0].get_kpis()
3546
3839
 
3547
3840
  def dump_workforce_logs(self, file_path: str) -> None:
3548
3841
  r"""Dumps all collected logs to a JSON file.
@@ -3550,10 +3843,13 @@ class Workforce(BaseNode):
3550
3843
  Args:
3551
3844
  file_path (str): The path to the JSON file.
3552
3845
  """
3553
- if not self.metrics_logger:
3846
+ metrics_cb: List[WorkforceMetrics] = [
3847
+ cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
3848
+ ]
3849
+ if len(metrics_cb) == 0:
3554
3850
  print("Logger not initialized. Cannot dump logs.")
3555
3851
  return
3556
- self.metrics_logger.dump_to_json(file_path)
3852
+ metrics_cb[0].dump_to_json(file_path)
3557
3853
  # Use logger.info or print, consistent with existing style
3558
3854
  logger.info(f"Workforce logs dumped to {file_path}")
3559
3855
 
@@ -3634,8 +3930,7 @@ class Workforce(BaseNode):
3634
3930
  # Reset in-flight counter to prevent hanging
3635
3931
  self._in_flight_tasks = 0
3636
3932
 
3637
- # Check if there are any pending tasks (including those needing
3638
- # decomposition)
3933
+ # Check if there are any main pending tasks after filtering
3639
3934
  if self._pending_tasks:
3640
3935
  # Check if the first pending task needs decomposition
3641
3936
  next_task = self._pending_tasks[0]
@@ -3844,6 +4139,20 @@ class Workforce(BaseNode):
3844
4139
  )
3845
4140
  if not halt:
3846
4141
  continue
4142
+
4143
+ # Do not halt if we have main tasks in queue
4144
+ if len(self.get_main_task_queue()) > 0:
4145
+ print(
4146
+ f"{Fore.RED}Task {returned_task.id} has "
4147
+ f"failed for {MAX_TASK_RETRIES} times "
4148
+ f"after insufficient results, skipping "
4149
+ f"that task. Final error: "
4150
+ f"{returned_task.result or 'Unknown err'}"
4151
+ f"{Fore.RESET}"
4152
+ )
4153
+ self._skip_requested = True
4154
+ continue
4155
+
3847
4156
  print(
3848
4157
  f"{Fore.RED}Task {returned_task.id} has "
3849
4158
  f"failed for {MAX_TASK_RETRIES} times after "
@@ -3949,6 +4258,19 @@ class Workforce(BaseNode):
3949
4258
  halt = await self._handle_failed_task(returned_task)
3950
4259
  if not halt:
3951
4260
  continue
4261
+
4262
+ # Do not halt if we have main tasks in queue
4263
+ if len(self.get_main_task_queue()) > 0:
4264
+ print(
4265
+ f"{Fore.RED}Task {returned_task.id} has "
4266
+ f"failed for {MAX_TASK_RETRIES} times, "
4267
+ f"skipping that task. Final error: "
4268
+ f"{returned_task.result or 'Unknown error'}"
4269
+ f"{Fore.RESET}"
4270
+ )
4271
+ self._skip_requested = True
4272
+ continue
4273
+
3952
4274
  print(
3953
4275
  f"{Fore.RED}Task {returned_task.id} has failed "
3954
4276
  f"for {MAX_TASK_RETRIES} times, halting "
@@ -4001,6 +4323,9 @@ class Workforce(BaseNode):
4001
4323
  elif not self._pending_tasks and self._in_flight_tasks == 0:
4002
4324
  self._state = WorkforceState.IDLE
4003
4325
  logger.info("All tasks completed.")
4326
+ all_tasks_completed_event = AllTasksCompletedEvent()
4327
+ for cb in self._callbacks:
4328
+ cb.log_all_tasks_completed(all_tasks_completed_event)
4004
4329
 
4005
4330
  # shut down the whole workforce tree
4006
4331
  self.stop()