camel-ai 0.2.71a4__py3-none-any.whl → 0.2.71a5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +1482 -134
- camel/agents/repo_agent.py +2 -1
- camel/benchmarks/browsecomp.py +6 -6
- camel/logger.py +1 -1
- camel/messages/base.py +12 -1
- camel/models/azure_openai_model.py +96 -7
- camel/models/base_model.py +68 -10
- camel/models/deepseek_model.py +5 -0
- camel/models/gemini_model.py +5 -0
- camel/models/litellm_model.py +48 -16
- camel/models/model_manager.py +24 -6
- camel/models/openai_compatible_model.py +109 -5
- camel/models/openai_model.py +117 -8
- camel/societies/workforce/prompts.py +68 -5
- camel/societies/workforce/role_playing_worker.py +1 -0
- camel/societies/workforce/single_agent_worker.py +1 -0
- camel/societies/workforce/utils.py +67 -2
- camel/societies/workforce/workforce.py +270 -36
- camel/societies/workforce/workforce_logger.py +0 -8
- camel/tasks/task.py +2 -0
- camel/toolkits/__init__.py +2 -0
- camel/toolkits/file_write_toolkit.py +526 -121
- camel/toolkits/message_agent_toolkit.py +608 -0
- {camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a5.dist-info}/METADATA +6 -4
- {camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a5.dist-info}/RECORD +28 -27
- {camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a5.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.71a4.dist-info → camel_ai-0.2.71a5.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
|
+
import concurrent.futures
|
|
17
18
|
import json
|
|
18
19
|
import time
|
|
19
20
|
import uuid
|
|
@@ -28,6 +29,7 @@ from typing import (
|
|
|
28
29
|
Optional,
|
|
29
30
|
Set,
|
|
30
31
|
Tuple,
|
|
32
|
+
Union,
|
|
31
33
|
)
|
|
32
34
|
|
|
33
35
|
from colorama import Fore
|
|
@@ -40,12 +42,16 @@ from camel.societies.workforce.base import BaseNode
|
|
|
40
42
|
from camel.societies.workforce.prompts import (
|
|
41
43
|
ASSIGN_TASK_PROMPT,
|
|
42
44
|
CREATE_NODE_PROMPT,
|
|
45
|
+
FAILURE_ANALYSIS_PROMPT,
|
|
43
46
|
WF_TASK_DECOMPOSE_PROMPT,
|
|
44
47
|
)
|
|
45
48
|
from camel.societies.workforce.role_playing_worker import RolePlayingWorker
|
|
46
49
|
from camel.societies.workforce.single_agent_worker import SingleAgentWorker
|
|
47
50
|
from camel.societies.workforce.task_channel import TaskChannel
|
|
48
51
|
from camel.societies.workforce.utils import (
|
|
52
|
+
FailureContext,
|
|
53
|
+
RecoveryDecision,
|
|
54
|
+
RecoveryStrategy,
|
|
49
55
|
TaskAssignment,
|
|
50
56
|
TaskAssignResult,
|
|
51
57
|
WorkerConf,
|
|
@@ -214,7 +220,9 @@ class Workforce(BaseNode):
|
|
|
214
220
|
share_memory: bool = False,
|
|
215
221
|
) -> None:
|
|
216
222
|
super().__init__(description)
|
|
217
|
-
self._child_listening_tasks: Deque[
|
|
223
|
+
self._child_listening_tasks: Deque[
|
|
224
|
+
Union[asyncio.Task, concurrent.futures.Future]
|
|
225
|
+
] = deque()
|
|
218
226
|
self._children = children or []
|
|
219
227
|
self.new_worker_agent = new_worker_agent
|
|
220
228
|
self.graceful_shutdown_timeout = graceful_shutdown_timeout
|
|
@@ -611,6 +619,31 @@ class Workforce(BaseNode):
|
|
|
611
619
|
# Remove original task dependencies as it's now decomposed
|
|
612
620
|
del self._task_dependencies[original_task_id]
|
|
613
621
|
|
|
622
|
+
def _increment_in_flight_tasks(self, task_id: str) -> None:
|
|
623
|
+
r"""Safely increment the in-flight tasks counter with logging."""
|
|
624
|
+
self._in_flight_tasks += 1
|
|
625
|
+
logger.debug(
|
|
626
|
+
f"Incremented in-flight tasks for {task_id}. "
|
|
627
|
+
f"Count: {self._in_flight_tasks}"
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
def _decrement_in_flight_tasks(
|
|
631
|
+
self, task_id: str, context: str = ""
|
|
632
|
+
) -> None:
|
|
633
|
+
r"""Safely decrement the in-flight tasks counter with safety checks."""
|
|
634
|
+
if self._in_flight_tasks > 0:
|
|
635
|
+
self._in_flight_tasks -= 1
|
|
636
|
+
logger.debug(
|
|
637
|
+
f"Decremented in-flight tasks for {task_id} ({context}). "
|
|
638
|
+
f"Count: {self._in_flight_tasks}"
|
|
639
|
+
)
|
|
640
|
+
else:
|
|
641
|
+
logger.debug(
|
|
642
|
+
f"Attempted to decrement in-flight tasks for {task_id} "
|
|
643
|
+
f"({context}) but counter is already 0. "
|
|
644
|
+
f"Counter: {self._in_flight_tasks}"
|
|
645
|
+
)
|
|
646
|
+
|
|
614
647
|
def _cleanup_task_tracking(self, task_id: str) -> None:
|
|
615
648
|
r"""Clean up tracking data for a task to prevent memory leaks.
|
|
616
649
|
|
|
@@ -634,9 +667,6 @@ class Workforce(BaseNode):
|
|
|
634
667
|
)
|
|
635
668
|
self.task_agent.reset()
|
|
636
669
|
subtasks = task.decompose(self.task_agent, decompose_prompt)
|
|
637
|
-
task.subtasks = subtasks
|
|
638
|
-
for subtask in subtasks:
|
|
639
|
-
subtask.parent = task
|
|
640
670
|
|
|
641
671
|
# Update dependency tracking for decomposed task
|
|
642
672
|
if subtasks:
|
|
@@ -644,6 +674,79 @@ class Workforce(BaseNode):
|
|
|
644
674
|
|
|
645
675
|
return subtasks
|
|
646
676
|
|
|
677
|
+
def _analyze_failure(
|
|
678
|
+
self, task: Task, error_message: str
|
|
679
|
+
) -> RecoveryDecision:
|
|
680
|
+
r"""Analyze a task failure and decide on the best recovery strategy.
|
|
681
|
+
|
|
682
|
+
Args:
|
|
683
|
+
task (Task): The failed task
|
|
684
|
+
error_message (str): The error message from the failure
|
|
685
|
+
|
|
686
|
+
Returns:
|
|
687
|
+
RecoveryDecision: The decided recovery strategy with reasoning
|
|
688
|
+
"""
|
|
689
|
+
# First, do a quick smart analysis based on error patterns
|
|
690
|
+
error_msg_lower = error_message.lower()
|
|
691
|
+
if any(
|
|
692
|
+
keyword in error_msg_lower
|
|
693
|
+
for keyword in [
|
|
694
|
+
'connection',
|
|
695
|
+
'network',
|
|
696
|
+
'server disconnected',
|
|
697
|
+
'timeout',
|
|
698
|
+
'apiconnectionerror',
|
|
699
|
+
]
|
|
700
|
+
):
|
|
701
|
+
return RecoveryDecision(
|
|
702
|
+
strategy=RecoveryStrategy.RETRY,
|
|
703
|
+
reasoning="Network/connection error detected, retrying task",
|
|
704
|
+
modified_task_content=None,
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
# Create failure context
|
|
708
|
+
failure_context = FailureContext(
|
|
709
|
+
task_id=task.id,
|
|
710
|
+
task_content=task.content,
|
|
711
|
+
failure_count=task.failure_count,
|
|
712
|
+
error_message=error_message,
|
|
713
|
+
worker_id=task.assigned_worker_id,
|
|
714
|
+
task_depth=task.get_depth(),
|
|
715
|
+
additional_info=str(task.additional_info)
|
|
716
|
+
if task.additional_info
|
|
717
|
+
else None,
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
# Format the analysis prompt
|
|
721
|
+
analysis_prompt = FAILURE_ANALYSIS_PROMPT.format(
|
|
722
|
+
task_id=failure_context.task_id,
|
|
723
|
+
task_content=failure_context.task_content,
|
|
724
|
+
failure_count=failure_context.failure_count,
|
|
725
|
+
error_message=failure_context.error_message,
|
|
726
|
+
worker_id=failure_context.worker_id or "unknown",
|
|
727
|
+
task_depth=failure_context.task_depth,
|
|
728
|
+
additional_info=failure_context.additional_info or "None",
|
|
729
|
+
)
|
|
730
|
+
|
|
731
|
+
try:
|
|
732
|
+
# Get decision from task agent
|
|
733
|
+
self.task_agent.reset()
|
|
734
|
+
response = self.task_agent.step(
|
|
735
|
+
analysis_prompt, response_format=RecoveryDecision
|
|
736
|
+
)
|
|
737
|
+
return response.msg.parsed
|
|
738
|
+
|
|
739
|
+
except Exception as e:
|
|
740
|
+
logger.warning(
|
|
741
|
+
f"Error during failure analysis: {e}, defaulting to RETRY"
|
|
742
|
+
)
|
|
743
|
+
return RecoveryDecision(
|
|
744
|
+
strategy=RecoveryStrategy.RETRY,
|
|
745
|
+
reasoning=f"Analysis failed due to error: {e!s}, "
|
|
746
|
+
f"defaulting to retry",
|
|
747
|
+
modified_task_content=None,
|
|
748
|
+
)
|
|
749
|
+
|
|
647
750
|
# Human intervention methods
|
|
648
751
|
async def _async_pause(self) -> None:
|
|
649
752
|
r"""Async implementation of pause to run on the event loop."""
|
|
@@ -1029,9 +1132,6 @@ class Workforce(BaseNode):
|
|
|
1029
1132
|
needed
|
|
1030
1133
|
>>> print(result.result)
|
|
1031
1134
|
"""
|
|
1032
|
-
import asyncio
|
|
1033
|
-
import concurrent.futures
|
|
1034
|
-
|
|
1035
1135
|
# Check if we're already in an event loop
|
|
1036
1136
|
try:
|
|
1037
1137
|
current_loop = asyncio.get_running_loop()
|
|
@@ -1206,7 +1306,39 @@ class Workforce(BaseNode):
|
|
|
1206
1306
|
|
|
1207
1307
|
return self._task
|
|
1208
1308
|
|
|
1209
|
-
|
|
1309
|
+
def _start_child_node_when_paused(
|
|
1310
|
+
self, start_coroutine: Coroutine
|
|
1311
|
+
) -> None:
|
|
1312
|
+
r"""Helper to start a child node when workforce is paused.
|
|
1313
|
+
|
|
1314
|
+
Args:
|
|
1315
|
+
start_coroutine: The coroutine to start (e.g., worker_node.start())
|
|
1316
|
+
"""
|
|
1317
|
+
if self._state == WorkforceState.PAUSED and hasattr(
|
|
1318
|
+
self, '_child_listening_tasks'
|
|
1319
|
+
):
|
|
1320
|
+
if self._loop and not self._loop.is_closed():
|
|
1321
|
+
# Use thread-safe coroutine execution for dynamic addition
|
|
1322
|
+
child_task: Union[asyncio.Task, concurrent.futures.Future]
|
|
1323
|
+
try:
|
|
1324
|
+
# Check if we're in the same thread as the loop
|
|
1325
|
+
current_loop = asyncio.get_running_loop()
|
|
1326
|
+
if current_loop is self._loop:
|
|
1327
|
+
# Same loop context - use create_task
|
|
1328
|
+
child_task = self._loop.create_task(start_coroutine)
|
|
1329
|
+
else:
|
|
1330
|
+
# Different loop context - use thread-safe approach
|
|
1331
|
+
child_task = asyncio.run_coroutine_threadsafe(
|
|
1332
|
+
start_coroutine, self._loop
|
|
1333
|
+
)
|
|
1334
|
+
except RuntimeError:
|
|
1335
|
+
# No running loop in current thread - use thread-safe
|
|
1336
|
+
# approach
|
|
1337
|
+
child_task = asyncio.run_coroutine_threadsafe(
|
|
1338
|
+
start_coroutine, self._loop
|
|
1339
|
+
)
|
|
1340
|
+
self._child_listening_tasks.append(child_task)
|
|
1341
|
+
|
|
1210
1342
|
def add_single_agent_worker(
|
|
1211
1343
|
self,
|
|
1212
1344
|
description: str,
|
|
@@ -1214,6 +1346,7 @@ class Workforce(BaseNode):
|
|
|
1214
1346
|
pool_max_size: int = DEFAULT_WORKER_POOL_SIZE,
|
|
1215
1347
|
) -> Workforce:
|
|
1216
1348
|
r"""Add a worker node to the workforce that uses a single agent.
|
|
1349
|
+
Can be called when workforce is paused to dynamically add workers.
|
|
1217
1350
|
|
|
1218
1351
|
Args:
|
|
1219
1352
|
description (str): Description of the worker node.
|
|
@@ -1223,7 +1356,15 @@ class Workforce(BaseNode):
|
|
|
1223
1356
|
|
|
1224
1357
|
Returns:
|
|
1225
1358
|
Workforce: The workforce node itself.
|
|
1359
|
+
|
|
1360
|
+
Raises:
|
|
1361
|
+
RuntimeError: If called while workforce is running (not paused).
|
|
1226
1362
|
"""
|
|
1363
|
+
if self._state == WorkforceState.RUNNING:
|
|
1364
|
+
raise RuntimeError(
|
|
1365
|
+
"Cannot add workers while workforce is running. "
|
|
1366
|
+
"Pause the workforce first."
|
|
1367
|
+
)
|
|
1227
1368
|
# Ensure the worker agent shares this workforce's pause control
|
|
1228
1369
|
self._attach_pause_event_to_agent(worker)
|
|
1229
1370
|
|
|
@@ -1233,6 +1374,14 @@ class Workforce(BaseNode):
|
|
|
1233
1374
|
pool_max_size=pool_max_size,
|
|
1234
1375
|
)
|
|
1235
1376
|
self._children.append(worker_node)
|
|
1377
|
+
|
|
1378
|
+
# If we have a channel set up, set it for the new worker
|
|
1379
|
+
if hasattr(self, '_channel') and self._channel is not None:
|
|
1380
|
+
worker_node.set_channel(self._channel)
|
|
1381
|
+
|
|
1382
|
+
# If workforce is paused, start the worker's listening task
|
|
1383
|
+
self._start_child_node_when_paused(worker_node.start())
|
|
1384
|
+
|
|
1236
1385
|
if self.metrics_logger:
|
|
1237
1386
|
self.metrics_logger.log_worker_created(
|
|
1238
1387
|
worker_id=worker_node.node_id,
|
|
@@ -1241,7 +1390,6 @@ class Workforce(BaseNode):
|
|
|
1241
1390
|
)
|
|
1242
1391
|
return self
|
|
1243
1392
|
|
|
1244
|
-
@check_if_running(False)
|
|
1245
1393
|
def add_role_playing_worker(
|
|
1246
1394
|
self,
|
|
1247
1395
|
description: str,
|
|
@@ -1253,6 +1401,7 @@ class Workforce(BaseNode):
|
|
|
1253
1401
|
chat_turn_limit: int = 3,
|
|
1254
1402
|
) -> Workforce:
|
|
1255
1403
|
r"""Add a worker node to the workforce that uses `RolePlaying` system.
|
|
1404
|
+
Can be called when workforce is paused to dynamically add workers.
|
|
1256
1405
|
|
|
1257
1406
|
Args:
|
|
1258
1407
|
description (str): Description of the node.
|
|
@@ -1272,7 +1421,15 @@ class Workforce(BaseNode):
|
|
|
1272
1421
|
|
|
1273
1422
|
Returns:
|
|
1274
1423
|
Workforce: The workforce node itself.
|
|
1424
|
+
|
|
1425
|
+
Raises:
|
|
1426
|
+
RuntimeError: If called while workforce is running (not paused).
|
|
1275
1427
|
"""
|
|
1428
|
+
if self._state == WorkforceState.RUNNING:
|
|
1429
|
+
raise RuntimeError(
|
|
1430
|
+
"Cannot add workers while workforce is running. "
|
|
1431
|
+
"Pause the workforce first."
|
|
1432
|
+
)
|
|
1276
1433
|
# Ensure provided kwargs carry pause_event so that internally created
|
|
1277
1434
|
# ChatAgents (assistant/user/summarizer) inherit it.
|
|
1278
1435
|
assistant_agent_kwargs = self._ensure_pause_event_in_kwargs(
|
|
@@ -1295,6 +1452,14 @@ class Workforce(BaseNode):
|
|
|
1295
1452
|
chat_turn_limit=chat_turn_limit,
|
|
1296
1453
|
)
|
|
1297
1454
|
self._children.append(worker_node)
|
|
1455
|
+
|
|
1456
|
+
# If we have a channel set up, set it for the new worker
|
|
1457
|
+
if hasattr(self, '_channel') and self._channel is not None:
|
|
1458
|
+
worker_node.set_channel(self._channel)
|
|
1459
|
+
|
|
1460
|
+
# If workforce is paused, start the worker's listening task
|
|
1461
|
+
self._start_child_node_when_paused(worker_node.start())
|
|
1462
|
+
|
|
1298
1463
|
if self.metrics_logger:
|
|
1299
1464
|
self.metrics_logger.log_worker_created(
|
|
1300
1465
|
worker_id=worker_node.node_id,
|
|
@@ -1303,20 +1468,35 @@ class Workforce(BaseNode):
|
|
|
1303
1468
|
)
|
|
1304
1469
|
return self
|
|
1305
1470
|
|
|
1306
|
-
@check_if_running(False)
|
|
1307
1471
|
def add_workforce(self, workforce: Workforce) -> Workforce:
|
|
1308
1472
|
r"""Add a workforce node to the workforce.
|
|
1473
|
+
Can be called when workforce is paused to dynamically add workers.
|
|
1309
1474
|
|
|
1310
1475
|
Args:
|
|
1311
1476
|
workforce (Workforce): The workforce node to be added.
|
|
1312
1477
|
|
|
1313
1478
|
Returns:
|
|
1314
1479
|
Workforce: The workforce node itself.
|
|
1480
|
+
|
|
1481
|
+
Raises:
|
|
1482
|
+
RuntimeError: If called while workforce is running (not paused).
|
|
1315
1483
|
"""
|
|
1484
|
+
if self._state == WorkforceState.RUNNING:
|
|
1485
|
+
raise RuntimeError(
|
|
1486
|
+
"Cannot add workers while workforce is running. "
|
|
1487
|
+
"Pause the workforce first."
|
|
1488
|
+
)
|
|
1316
1489
|
# Align child workforce's pause_event with this one for unified
|
|
1317
1490
|
# control of worker agents only.
|
|
1318
1491
|
workforce._pause_event = self._pause_event
|
|
1319
1492
|
self._children.append(workforce)
|
|
1493
|
+
|
|
1494
|
+
# If we have a channel set up, set it for the new workforce
|
|
1495
|
+
if hasattr(self, '_channel') and self._channel is not None:
|
|
1496
|
+
workforce.set_channel(self._channel)
|
|
1497
|
+
|
|
1498
|
+
# If workforce is paused, start the child workforce's listening task
|
|
1499
|
+
self._start_child_node_when_paused(workforce.start())
|
|
1320
1500
|
return self
|
|
1321
1501
|
|
|
1322
1502
|
async def _async_reset(self) -> None:
|
|
@@ -1654,15 +1834,13 @@ class Workforce(BaseNode):
|
|
|
1654
1834
|
)
|
|
1655
1835
|
|
|
1656
1836
|
try:
|
|
1657
|
-
self._in_flight_tasks += 1
|
|
1658
1837
|
await self._channel.post_task(task, self.node_id, assignee_id)
|
|
1838
|
+
self._increment_in_flight_tasks(task.id)
|
|
1659
1839
|
logger.debug(
|
|
1660
1840
|
f"Posted task {task.id} to {assignee_id}. "
|
|
1661
1841
|
f"In-flight tasks: {self._in_flight_tasks}"
|
|
1662
1842
|
)
|
|
1663
1843
|
except Exception as e:
|
|
1664
|
-
# Decrement counter if posting failed
|
|
1665
|
-
self._in_flight_tasks -= 1
|
|
1666
1844
|
logger.error(
|
|
1667
1845
|
f"Failed to post task {task.id} to {assignee_id}: {e}"
|
|
1668
1846
|
)
|
|
@@ -1789,10 +1967,6 @@ class Workforce(BaseNode):
|
|
|
1789
1967
|
timeout=TASK_TIMEOUT_SECONDS,
|
|
1790
1968
|
)
|
|
1791
1969
|
except Exception as e:
|
|
1792
|
-
# Decrement in-flight counter to prevent hanging
|
|
1793
|
-
if self._in_flight_tasks > 0:
|
|
1794
|
-
self._in_flight_tasks -= 1
|
|
1795
|
-
|
|
1796
1970
|
error_msg = (
|
|
1797
1971
|
f"Error getting returned task {e} in "
|
|
1798
1972
|
f"workforce {self.node_id}. "
|
|
@@ -1804,8 +1978,11 @@ class Workforce(BaseNode):
|
|
|
1804
1978
|
if self._pending_tasks and self._assignees:
|
|
1805
1979
|
for task in self._pending_tasks:
|
|
1806
1980
|
if task.id in self._assignees:
|
|
1807
|
-
# Mark
|
|
1981
|
+
# Mark task as failed and decrement counter
|
|
1808
1982
|
task.set_state(TaskState.FAILED)
|
|
1983
|
+
self._decrement_in_flight_tasks(
|
|
1984
|
+
task.id, "timeout/error in _get_returned_task"
|
|
1985
|
+
)
|
|
1809
1986
|
return task
|
|
1810
1987
|
return None
|
|
1811
1988
|
|
|
@@ -1905,7 +2082,6 @@ class Workforce(BaseNode):
|
|
|
1905
2082
|
task_id=task.id,
|
|
1906
2083
|
worker_id=worker_id,
|
|
1907
2084
|
error_message=detailed_error,
|
|
1908
|
-
error_type="TaskFailure",
|
|
1909
2085
|
metadata={
|
|
1910
2086
|
'failure_count': task.failure_count,
|
|
1911
2087
|
'task_content': task.content,
|
|
@@ -1944,21 +2120,57 @@ class Workforce(BaseNode):
|
|
|
1944
2120
|
await self._channel.archive_task(task.id)
|
|
1945
2121
|
return True
|
|
1946
2122
|
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
assignee = await self._create_worker_node_for_task(task)
|
|
2123
|
+
# Use intelligent failure analysis to decide recovery strategy
|
|
2124
|
+
recovery_decision = self._analyze_failure(task, detailed_error)
|
|
1950
2125
|
|
|
1951
|
-
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
1955
|
-
|
|
2126
|
+
logger.info(
|
|
2127
|
+
f"Task {task.id} failure "
|
|
2128
|
+
f"analysis: {recovery_decision.strategy.value} - "
|
|
2129
|
+
f"{recovery_decision.reasoning}"
|
|
2130
|
+
)
|
|
2131
|
+
|
|
2132
|
+
if recovery_decision.strategy == RecoveryStrategy.RETRY:
|
|
2133
|
+
# Simply retry the task by reposting it
|
|
2134
|
+
if task.id in self._assignees:
|
|
2135
|
+
assignee_id = self._assignees[task.id]
|
|
2136
|
+
await self._post_task(task, assignee_id)
|
|
2137
|
+
action_taken = f"retried with same worker {assignee_id}"
|
|
2138
|
+
else:
|
|
2139
|
+
# Find a new assignee and retry
|
|
2140
|
+
batch_result = await self._find_assignee([task])
|
|
2141
|
+
assignment = batch_result.assignments[0]
|
|
2142
|
+
self._assignees[task.id] = assignment.assignee_id
|
|
2143
|
+
await self._post_task(task, assignment.assignee_id)
|
|
2144
|
+
action_taken = (
|
|
2145
|
+
f"retried with new worker {assignment.assignee_id}"
|
|
1956
2146
|
)
|
|
1957
|
-
self._sync_shared_memory()
|
|
1958
2147
|
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
2148
|
+
elif recovery_decision.strategy == RecoveryStrategy.REPLAN:
|
|
2149
|
+
# Modify the task content and retry
|
|
2150
|
+
if recovery_decision.modified_task_content:
|
|
2151
|
+
task.content = recovery_decision.modified_task_content
|
|
2152
|
+
logger.info(f"Task {task.id} content modified for replan")
|
|
2153
|
+
|
|
2154
|
+
# Repost the modified task
|
|
2155
|
+
if task.id in self._assignees:
|
|
2156
|
+
assignee_id = self._assignees[task.id]
|
|
2157
|
+
await self._post_task(task, assignee_id)
|
|
2158
|
+
action_taken = (
|
|
2159
|
+
f"replanned and retried with worker {assignee_id}"
|
|
2160
|
+
)
|
|
2161
|
+
else:
|
|
2162
|
+
# Find a new assignee for the replanned task
|
|
2163
|
+
batch_result = await self._find_assignee([task])
|
|
2164
|
+
assignment = batch_result.assignments[0]
|
|
2165
|
+
self._assignees[task.id] = assignment.assignee_id
|
|
2166
|
+
await self._post_task(task, assignment.assignee_id)
|
|
2167
|
+
action_taken = (
|
|
2168
|
+
f"replanned and assigned to "
|
|
2169
|
+
f"worker {assignment.assignee_id}"
|
|
2170
|
+
)
|
|
2171
|
+
|
|
2172
|
+
elif recovery_decision.strategy == RecoveryStrategy.DECOMPOSE:
|
|
2173
|
+
# Decompose the task into subtasks
|
|
1962
2174
|
subtasks = self._decompose_task(task)
|
|
1963
2175
|
if self.metrics_logger and subtasks:
|
|
1964
2176
|
self.metrics_logger.log_task_decomposed(
|
|
@@ -2000,7 +2212,14 @@ class Workforce(BaseNode):
|
|
|
2000
2212
|
await self._post_ready_tasks()
|
|
2001
2213
|
return False
|
|
2002
2214
|
|
|
2003
|
-
|
|
2215
|
+
elif recovery_decision.strategy == RecoveryStrategy.CREATE_WORKER:
|
|
2216
|
+
assignee = await self._create_worker_node_for_task(task)
|
|
2217
|
+
await self._post_task(task, assignee.node_id)
|
|
2218
|
+
action_taken = (
|
|
2219
|
+
f"created new worker {assignee.node_id} and assigned "
|
|
2220
|
+
f"task {task.id} to it"
|
|
2221
|
+
)
|
|
2222
|
+
|
|
2004
2223
|
if task.id in self._assignees:
|
|
2005
2224
|
await self._channel.archive_task(task.id)
|
|
2006
2225
|
|
|
@@ -2275,7 +2494,9 @@ class Workforce(BaseNode):
|
|
|
2275
2494
|
await self._post_ready_tasks()
|
|
2276
2495
|
continue
|
|
2277
2496
|
|
|
2278
|
-
self.
|
|
2497
|
+
self._decrement_in_flight_tasks(
|
|
2498
|
+
returned_task.id, "task returned successfully"
|
|
2499
|
+
)
|
|
2279
2500
|
|
|
2280
2501
|
# Check for stop request after getting task
|
|
2281
2502
|
if self._stop_requested:
|
|
@@ -2360,8 +2581,9 @@ class Workforce(BaseNode):
|
|
|
2360
2581
|
|
|
2361
2582
|
except Exception as e:
|
|
2362
2583
|
# Decrement in-flight counter to prevent hanging
|
|
2363
|
-
|
|
2364
|
-
|
|
2584
|
+
self._decrement_in_flight_tasks(
|
|
2585
|
+
"unknown", "exception in task processing loop"
|
|
2586
|
+
)
|
|
2365
2587
|
|
|
2366
2588
|
logger.error(
|
|
2367
2589
|
f"Error processing task in workforce {self.node_id}: {e}"
|
|
@@ -2440,8 +2662,20 @@ class Workforce(BaseNode):
|
|
|
2440
2662
|
for task in self._child_listening_tasks:
|
|
2441
2663
|
if not task.done():
|
|
2442
2664
|
task.cancel()
|
|
2665
|
+
|
|
2666
|
+
# Handle both asyncio.Task and concurrent.futures.
|
|
2667
|
+
# Future
|
|
2668
|
+
awaitables = []
|
|
2669
|
+
for task in self._child_listening_tasks:
|
|
2670
|
+
if isinstance(task, concurrent.futures.Future):
|
|
2671
|
+
# Convert Future to awaitable
|
|
2672
|
+
awaitables.append(asyncio.wrap_future(task))
|
|
2673
|
+
else:
|
|
2674
|
+
# Already an asyncio.Task
|
|
2675
|
+
awaitables.append(task)
|
|
2676
|
+
|
|
2443
2677
|
await asyncio.gather(
|
|
2444
|
-
*
|
|
2678
|
+
*awaitables,
|
|
2445
2679
|
return_exceptions=True,
|
|
2446
2680
|
)
|
|
2447
2681
|
|
|
@@ -180,7 +180,6 @@ class WorkforceLogger:
|
|
|
180
180
|
self,
|
|
181
181
|
task_id: str,
|
|
182
182
|
error_message: str,
|
|
183
|
-
error_type: str,
|
|
184
183
|
worker_id: Optional[str] = None,
|
|
185
184
|
metadata: Optional[Dict[str, Any]] = None,
|
|
186
185
|
) -> None:
|
|
@@ -190,7 +189,6 @@ class WorkforceLogger:
|
|
|
190
189
|
task_id=task_id,
|
|
191
190
|
worker_id=worker_id,
|
|
192
191
|
error_message=error_message,
|
|
193
|
-
error_type=error_type,
|
|
194
192
|
metadata=metadata or {},
|
|
195
193
|
)
|
|
196
194
|
if task_id in self._task_hierarchy:
|
|
@@ -484,7 +482,6 @@ class WorkforceLogger:
|
|
|
484
482
|
'total_tasks_created': 0,
|
|
485
483
|
'total_tasks_completed': 0,
|
|
486
484
|
'total_tasks_failed': 0,
|
|
487
|
-
'error_types_count': {},
|
|
488
485
|
'worker_utilization': {},
|
|
489
486
|
'current_pending_tasks': 0,
|
|
490
487
|
'total_workforce_running_time_seconds': 0.0,
|
|
@@ -560,11 +557,6 @@ class WorkforceLogger:
|
|
|
560
557
|
tasks_handled_by_worker[worker_id] = (
|
|
561
558
|
tasks_handled_by_worker.get(worker_id, 0) + 1
|
|
562
559
|
)
|
|
563
|
-
error_type = entry['error_type']
|
|
564
|
-
kpis['error_types_count'][error_type] = (
|
|
565
|
-
kpis['error_types_count'].get(error_type, 0) + 1
|
|
566
|
-
)
|
|
567
|
-
|
|
568
560
|
elif event_type == 'queue_status':
|
|
569
561
|
pass # Placeholder for now
|
|
570
562
|
|
camel/tasks/task.py
CHANGED
camel/toolkits/__init__.py
CHANGED
|
@@ -83,6 +83,7 @@ from .google_drive_mcp_toolkit import GoogleDriveMCPToolkit
|
|
|
83
83
|
from .craw4ai_toolkit import Crawl4AIToolkit
|
|
84
84
|
from .markitdown_toolkit import MarkItDownToolkit
|
|
85
85
|
from .note_taking_toolkit import NoteTakingToolkit
|
|
86
|
+
from .message_agent_toolkit import AgentCommunicationToolkit
|
|
86
87
|
|
|
87
88
|
__all__ = [
|
|
88
89
|
'BaseToolkit',
|
|
@@ -154,4 +155,5 @@ __all__ = [
|
|
|
154
155
|
'Crawl4AIToolkit',
|
|
155
156
|
'MarkItDownToolkit',
|
|
156
157
|
'NoteTakingToolkit',
|
|
158
|
+
'AgentCommunicationToolkit',
|
|
157
159
|
]
|