camel-ai 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/agents/_utils.py +38 -0
- camel/agents/chat_agent.py +2217 -519
- camel/agents/mcp_agent.py +30 -27
- camel/configs/__init__.py +15 -0
- camel/configs/aihubmix_config.py +88 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/cometapi_config.py +104 -0
- camel/configs/minimax_config.py +93 -0
- camel/configs/nebius_config.py +103 -0
- camel/data_collectors/alpaca_collector.py +15 -6
- camel/datasets/base_generator.py +39 -10
- camel/environments/single_step.py +28 -3
- camel/environments/tic_tac_toe.py +1 -1
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/docker/Dockerfile +3 -12
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/loaders/__init__.py +11 -2
- camel/loaders/chunkr_reader.py +9 -0
- camel/memories/agent_memories.py +48 -4
- camel/memories/base.py +26 -0
- camel/memories/blocks/chat_history_block.py +122 -4
- camel/memories/context_creators/score_based.py +25 -384
- camel/memories/records.py +88 -8
- camel/messages/base.py +153 -34
- camel/models/__init__.py +10 -0
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +1 -16
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +6 -19
- camel/models/aws_bedrock_model.py +2 -33
- camel/models/azure_openai_model.py +114 -89
- camel/models/base_audio_model.py +3 -1
- camel/models/base_model.py +32 -14
- camel/models/cohere_model.py +1 -16
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +1 -16
- camel/models/fish_audio_model.py +6 -0
- camel/models/gemini_model.py +36 -18
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +1 -16
- camel/models/lmstudio_model.py +1 -17
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +1 -16
- camel/models/model_factory.py +27 -1
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +105 -24
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +4 -19
- camel/models/openai_compatible_model.py +62 -41
- camel/models/openai_model.py +62 -57
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +34 -47
- camel/models/sglang_model.py +64 -31
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +60 -16
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/retrievers/auto_retriever.py +1 -0
- camel/runtimes/daytona_runtime.py +11 -12
- camel/societies/__init__.py +2 -0
- camel/societies/workforce/__init__.py +2 -0
- camel/societies/workforce/events.py +122 -0
- camel/societies/workforce/prompts.py +146 -66
- camel/societies/workforce/role_playing_worker.py +15 -11
- camel/societies/workforce/single_agent_worker.py +302 -65
- camel/societies/workforce/structured_output_handler.py +30 -18
- camel/societies/workforce/task_channel.py +163 -27
- camel/societies/workforce/utils.py +107 -13
- camel/societies/workforce/workflow_memory_manager.py +772 -0
- camel/societies/workforce/workforce.py +1949 -579
- camel/societies/workforce/workforce_callback.py +74 -0
- camel/societies/workforce/workforce_logger.py +168 -145
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/key_value_storages/json.py +15 -2
- camel/storages/key_value_storages/mem0_cloud.py +48 -47
- camel/storages/object_storages/google_cloud.py +1 -1
- camel/storages/vectordb_storages/oceanbase.py +13 -13
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/storages/vectordb_storages/tidb.py +8 -6
- camel/tasks/task.py +4 -3
- camel/toolkits/__init__.py +20 -7
- camel/toolkits/aci_toolkit.py +45 -0
- camel/toolkits/base.py +6 -4
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/dappier_toolkit.py +5 -1
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
- camel/toolkits/excel_toolkit.py +1 -1
- camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +430 -36
- camel/toolkits/function_tool.py +13 -3
- camel/toolkits/github_toolkit.py +104 -17
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +38 -4
- camel/toolkits/google_drive_mcp_toolkit.py +12 -31
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +15 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +77 -8
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +884 -88
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +959 -89
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +9 -2
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +281 -213
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +23 -3
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +72 -7
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -132
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +321 -8
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +151 -53
- camel/toolkits/klavis_toolkit.py +5 -1
- camel/toolkits/markitdown_toolkit.py +27 -1
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +366 -71
- camel/toolkits/memory_toolkit.py +5 -1
- camel/toolkits/message_integration.py +18 -13
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/note_taking_toolkit.py +19 -10
- camel/toolkits/notion_mcp_toolkit.py +16 -26
- camel/toolkits/openbb_toolkit.py +5 -1
- camel/toolkits/origene_mcp_toolkit.py +8 -49
- camel/toolkits/playwright_mcp_toolkit.py +12 -31
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/search_toolkit.py +264 -91
- camel/toolkits/slack_toolkit.py +64 -10
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +17 -11
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/zapier_toolkit.py +5 -1
- camel/types/__init__.py +2 -2
- camel/types/enums.py +274 -7
- camel/types/openai_types.py +2 -2
- camel/types/unified_model_type.py +15 -0
- camel/utils/commons.py +36 -5
- camel/utils/constants.py +3 -0
- camel/utils/context_utils.py +1003 -0
- camel/utils/mcp.py +138 -4
- camel/utils/token_counting.py +43 -20
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +223 -83
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +170 -141
- camel/loaders/pandas_reader.py +0 -368
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1550
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -16,12 +16,15 @@ from __future__ import annotations
|
|
|
16
16
|
import asyncio
|
|
17
17
|
import concurrent.futures
|
|
18
18
|
import json
|
|
19
|
+
import os
|
|
19
20
|
import time
|
|
20
21
|
import uuid
|
|
21
22
|
from collections import deque
|
|
22
23
|
from enum import Enum
|
|
23
24
|
from typing import (
|
|
25
|
+
TYPE_CHECKING,
|
|
24
26
|
Any,
|
|
27
|
+
Callable,
|
|
25
28
|
Coroutine,
|
|
26
29
|
Deque,
|
|
27
30
|
Dict,
|
|
@@ -31,8 +34,15 @@ from typing import (
|
|
|
31
34
|
Set,
|
|
32
35
|
Tuple,
|
|
33
36
|
Union,
|
|
37
|
+
cast,
|
|
34
38
|
)
|
|
35
39
|
|
|
40
|
+
from .workforce_callback import WorkforceCallback
|
|
41
|
+
from .workforce_metrics import WorkforceMetrics
|
|
42
|
+
|
|
43
|
+
if TYPE_CHECKING:
|
|
44
|
+
from camel.utils.context_utils import ContextUtility
|
|
45
|
+
|
|
36
46
|
from colorama import Fore
|
|
37
47
|
|
|
38
48
|
from camel.agents import ChatAgent
|
|
@@ -43,19 +53,23 @@ from camel.societies.workforce.base import BaseNode
|
|
|
43
53
|
from camel.societies.workforce.prompts import (
|
|
44
54
|
ASSIGN_TASK_PROMPT,
|
|
45
55
|
CREATE_NODE_PROMPT,
|
|
46
|
-
|
|
56
|
+
FAILURE_ANALYSIS_RESPONSE_FORMAT,
|
|
57
|
+
QUALITY_EVALUATION_RESPONSE_FORMAT,
|
|
58
|
+
TASK_AGENT_SYSTEM_MESSAGE,
|
|
59
|
+
TASK_ANALYSIS_PROMPT,
|
|
47
60
|
TASK_DECOMPOSE_PROMPT,
|
|
48
61
|
)
|
|
49
62
|
from camel.societies.workforce.role_playing_worker import RolePlayingWorker
|
|
50
|
-
from camel.societies.workforce.single_agent_worker import
|
|
63
|
+
from camel.societies.workforce.single_agent_worker import (
|
|
64
|
+
SingleAgentWorker,
|
|
65
|
+
)
|
|
51
66
|
from camel.societies.workforce.structured_output_handler import (
|
|
52
67
|
StructuredOutputHandler,
|
|
53
68
|
)
|
|
54
69
|
from camel.societies.workforce.task_channel import TaskChannel
|
|
55
70
|
from camel.societies.workforce.utils import (
|
|
56
|
-
FailureContext,
|
|
57
|
-
RecoveryDecision,
|
|
58
71
|
RecoveryStrategy,
|
|
72
|
+
TaskAnalysisResult,
|
|
59
73
|
TaskAssignment,
|
|
60
74
|
TaskAssignResult,
|
|
61
75
|
WorkerConf,
|
|
@@ -70,21 +84,39 @@ from camel.tasks.task import (
|
|
|
70
84
|
)
|
|
71
85
|
from camel.toolkits import (
|
|
72
86
|
CodeExecutionToolkit,
|
|
87
|
+
FunctionTool,
|
|
73
88
|
SearchToolkit,
|
|
74
|
-
TaskPlanningToolkit,
|
|
75
89
|
ThinkingToolkit,
|
|
76
90
|
)
|
|
77
91
|
from camel.types import ModelPlatformType, ModelType
|
|
78
92
|
from camel.utils import dependencies_required
|
|
79
93
|
|
|
94
|
+
from .events import (
|
|
95
|
+
AllTasksCompletedEvent,
|
|
96
|
+
TaskAssignedEvent,
|
|
97
|
+
TaskCompletedEvent,
|
|
98
|
+
TaskCreatedEvent,
|
|
99
|
+
TaskDecomposedEvent,
|
|
100
|
+
TaskFailedEvent,
|
|
101
|
+
TaskStartedEvent,
|
|
102
|
+
WorkerCreatedEvent,
|
|
103
|
+
)
|
|
80
104
|
from .workforce_logger import WorkforceLogger
|
|
81
105
|
|
|
82
|
-
|
|
106
|
+
if os.environ.get("TRACEROOT_ENABLED", "False").lower() == "true":
|
|
107
|
+
try:
|
|
108
|
+
import traceroot # type: ignore[import]
|
|
109
|
+
|
|
110
|
+
logger = traceroot.get_logger('camel')
|
|
111
|
+
except ImportError:
|
|
112
|
+
logger = get_logger(__name__)
|
|
113
|
+
else:
|
|
114
|
+
logger = get_logger(__name__)
|
|
83
115
|
|
|
84
116
|
# Constants for configuration values
|
|
85
117
|
MAX_TASK_RETRIES = 3
|
|
86
118
|
MAX_PENDING_TASKS_LIMIT = 20
|
|
87
|
-
TASK_TIMEOUT_SECONDS =
|
|
119
|
+
TASK_TIMEOUT_SECONDS = 600.0
|
|
88
120
|
DEFAULT_WORKER_POOL_SIZE = 10
|
|
89
121
|
|
|
90
122
|
|
|
@@ -151,9 +183,9 @@ class Workforce(BaseNode):
|
|
|
151
183
|
task_agent (Optional[ChatAgent], optional): A custom task planning
|
|
152
184
|
agent instance for task decomposition and composition. If
|
|
153
185
|
provided, the workforce will create a new agent using this agent's
|
|
154
|
-
model configuration but with the required system message
|
|
155
|
-
|
|
156
|
-
|
|
186
|
+
model configuration but with the required system message. If None,
|
|
187
|
+
a default agent will be created using DEFAULT model settings.
|
|
188
|
+
(default: :obj:`None`)
|
|
157
189
|
new_worker_agent (Optional[ChatAgent], optional): A template agent for
|
|
158
190
|
workers created dynamically at runtime when existing workers cannot
|
|
159
191
|
handle failed tasks. If None, workers will be created with default
|
|
@@ -163,6 +195,11 @@ class Workforce(BaseNode):
|
|
|
163
195
|
for graceful shutdown when a task fails 3 times. During this
|
|
164
196
|
period, the workforce remains active for debugging.
|
|
165
197
|
Set to 0 for immediate shutdown. (default: :obj:`15.0`)
|
|
198
|
+
task_timeout_seconds (Optional[float], optional): The timeout in
|
|
199
|
+
seconds for waiting for tasks to be returned by workers. If None,
|
|
200
|
+
uses the global TASK_TIMEOUT_SECONDS value (600.0 seconds).
|
|
201
|
+
Increase this value for tasks that require more processing time.
|
|
202
|
+
(default: :obj:`None`)
|
|
166
203
|
share_memory (bool, optional): Whether to enable shared memory across
|
|
167
204
|
SingleAgentWorker instances in the workforce. When enabled, all
|
|
168
205
|
SingleAgentWorker instances, coordinator agent, and task planning
|
|
@@ -180,6 +217,17 @@ class Workforce(BaseNode):
|
|
|
180
217
|
support native structured output. When disabled, the workforce
|
|
181
218
|
uses the native response_format parameter.
|
|
182
219
|
(default: :obj:`True`)
|
|
220
|
+
callbacks (Optional[List[WorkforceCallback]], optional): A list of
|
|
221
|
+
callback handlers to observe and record workforce lifecycle events
|
|
222
|
+
and metrics (e.g., task creation/assignment/start/completion/
|
|
223
|
+
failure, worker creation/deletion, all-tasks-completed). All
|
|
224
|
+
items must be instances of :class:`WorkforceCallback`, otherwise
|
|
225
|
+
a :class:`ValueError` is raised. If none of the provided
|
|
226
|
+
callbacks implement :class:`WorkforceMetrics`, a built-in
|
|
227
|
+
:class:`WorkforceLogger` (implements both callback and metrics)
|
|
228
|
+
is added automatically. If at least one provided callback
|
|
229
|
+
implements :class:`WorkforceMetrics`, no default logger is added.
|
|
230
|
+
(default: :obj:`None`)
|
|
183
231
|
|
|
184
232
|
Example:
|
|
185
233
|
>>> import asyncio
|
|
@@ -231,6 +279,8 @@ class Workforce(BaseNode):
|
|
|
231
279
|
graceful_shutdown_timeout: float = 15.0,
|
|
232
280
|
share_memory: bool = False,
|
|
233
281
|
use_structured_output_handler: bool = True,
|
|
282
|
+
task_timeout_seconds: Optional[float] = None,
|
|
283
|
+
callbacks: Optional[List[WorkforceCallback]] = None,
|
|
234
284
|
) -> None:
|
|
235
285
|
super().__init__(description)
|
|
236
286
|
self._child_listening_tasks: Deque[
|
|
@@ -241,9 +291,11 @@ class Workforce(BaseNode):
|
|
|
241
291
|
self.graceful_shutdown_timeout = graceful_shutdown_timeout
|
|
242
292
|
self.share_memory = share_memory
|
|
243
293
|
self.use_structured_output_handler = use_structured_output_handler
|
|
294
|
+
self.task_timeout_seconds = (
|
|
295
|
+
task_timeout_seconds or TASK_TIMEOUT_SECONDS
|
|
296
|
+
)
|
|
244
297
|
if self.use_structured_output_handler:
|
|
245
298
|
self.structured_handler = StructuredOutputHandler()
|
|
246
|
-
self.metrics_logger = WorkforceLogger(workforce_id=self.node_id)
|
|
247
299
|
self._task: Optional[Task] = None
|
|
248
300
|
self._pending_tasks: Deque[Task] = deque()
|
|
249
301
|
self._task_dependencies: Dict[str, List[str]] = {}
|
|
@@ -256,6 +308,7 @@ class Workforce(BaseNode):
|
|
|
256
308
|
self._pause_event = asyncio.Event()
|
|
257
309
|
self._pause_event.set() # Initially not paused
|
|
258
310
|
self._stop_requested = False
|
|
311
|
+
self._skip_requested = False
|
|
259
312
|
self._snapshots: List[WorkforceSnapshot] = []
|
|
260
313
|
self._completed_tasks: List[Task] = []
|
|
261
314
|
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
|
@@ -265,15 +318,9 @@ class Workforce(BaseNode):
|
|
|
265
318
|
self._last_snapshot_time: float = 0.0
|
|
266
319
|
# Minimum seconds between automatic snapshots
|
|
267
320
|
self.snapshot_interval: float = 30.0
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
role_or_desc = child.description
|
|
272
|
-
self.metrics_logger.log_worker_created(
|
|
273
|
-
worker_id=child.node_id,
|
|
274
|
-
worker_type=worker_type,
|
|
275
|
-
role=role_or_desc,
|
|
276
|
-
)
|
|
321
|
+
# Shared memory UUID tracking to prevent re-sharing duplicates
|
|
322
|
+
self._shared_memory_uuids: Set[str] = set()
|
|
323
|
+
self._initialize_callbacks(callbacks)
|
|
277
324
|
|
|
278
325
|
# Set up coordinator agent with default system message
|
|
279
326
|
coord_agent_sys_msg = BaseMessage.make_assistant_message(
|
|
@@ -302,8 +349,7 @@ class Workforce(BaseNode):
|
|
|
302
349
|
if coordinator_agent.system_message is not None:
|
|
303
350
|
user_sys_msg_content = coordinator_agent.system_message.content
|
|
304
351
|
combined_content = (
|
|
305
|
-
f"{user_sys_msg_content}\n\n"
|
|
306
|
-
f"{coord_agent_sys_msg.content}"
|
|
352
|
+
f"{user_sys_msg_content}\n\n{coord_agent_sys_msg.content}"
|
|
307
353
|
)
|
|
308
354
|
combined_sys_msg = BaseMessage.make_assistant_message(
|
|
309
355
|
role_name=coordinator_agent.system_message.role_name,
|
|
@@ -327,10 +373,7 @@ class Workforce(BaseNode):
|
|
|
327
373
|
None,
|
|
328
374
|
),
|
|
329
375
|
output_language=coordinator_agent.output_language,
|
|
330
|
-
tools=
|
|
331
|
-
tool.func
|
|
332
|
-
for tool in coordinator_agent._internal_tools.values()
|
|
333
|
-
],
|
|
376
|
+
tools=list(coordinator_agent._internal_tools.values()),
|
|
334
377
|
external_tools=[
|
|
335
378
|
schema
|
|
336
379
|
for schema in coordinator_agent._external_tool_schemas.values() # noqa: E501
|
|
@@ -340,28 +383,20 @@ class Workforce(BaseNode):
|
|
|
340
383
|
stop_event=coordinator_agent.stop_event,
|
|
341
384
|
)
|
|
342
385
|
|
|
343
|
-
# Set up task agent with default system message
|
|
386
|
+
# Set up task agent with default system message
|
|
344
387
|
task_sys_msg = BaseMessage.make_assistant_message(
|
|
345
388
|
role_name="Task Planner",
|
|
346
|
-
content=
|
|
347
|
-
"tasks that are sequential and require the same type of "
|
|
348
|
-
"agent together in one agent process. Only decompose tasks "
|
|
349
|
-
"that can be handled in parallel and require different types "
|
|
350
|
-
"of agents. This ensures efficient execution by minimizing "
|
|
351
|
-
"context switching between agents.",
|
|
389
|
+
content=TASK_AGENT_SYSTEM_MESSAGE,
|
|
352
390
|
)
|
|
353
|
-
task_planning_tools = TaskPlanningToolkit().get_tools()
|
|
354
391
|
|
|
355
392
|
if task_agent is None:
|
|
356
393
|
logger.warning(
|
|
357
394
|
"No task_agent provided. Using default ChatAgent "
|
|
358
395
|
"settings (ModelPlatformType.DEFAULT, ModelType.DEFAULT) "
|
|
359
|
-
"with default system message
|
|
396
|
+
"with default system message."
|
|
360
397
|
)
|
|
361
|
-
task_tools = TaskPlanningToolkit().get_tools()
|
|
362
398
|
self.task_agent = ChatAgent(
|
|
363
399
|
task_sys_msg,
|
|
364
|
-
tools=task_tools, # type: ignore[arg-type]
|
|
365
400
|
)
|
|
366
401
|
else:
|
|
367
402
|
logger.info(
|
|
@@ -373,8 +408,7 @@ class Workforce(BaseNode):
|
|
|
373
408
|
if task_agent.system_message is not None:
|
|
374
409
|
user_task_sys_msg_content = task_agent.system_message.content
|
|
375
410
|
combined_task_content = (
|
|
376
|
-
f"{user_task_sys_msg_content}\n\n"
|
|
377
|
-
f"{task_sys_msg.content}"
|
|
411
|
+
f"{user_task_sys_msg_content}\n\n{task_sys_msg.content}"
|
|
378
412
|
)
|
|
379
413
|
combined_task_sys_msg = BaseMessage.make_assistant_message(
|
|
380
414
|
role_name=task_agent.system_message.role_name,
|
|
@@ -385,9 +419,10 @@ class Workforce(BaseNode):
|
|
|
385
419
|
|
|
386
420
|
# Since ChatAgent constructor uses a dictionary with
|
|
387
421
|
# function names as keys, we don't need to manually deduplicate.
|
|
388
|
-
combined_tools =
|
|
389
|
-
|
|
390
|
-
|
|
422
|
+
combined_tools: List[Union[FunctionTool, Callable]] = cast(
|
|
423
|
+
List[Union[FunctionTool, Callable]],
|
|
424
|
+
list(task_agent._internal_tools.values()),
|
|
425
|
+
)
|
|
391
426
|
|
|
392
427
|
# Create a new agent with the provided agent's configuration
|
|
393
428
|
# but with the combined system message and tools
|
|
@@ -434,10 +469,85 @@ class Workforce(BaseNode):
|
|
|
434
469
|
"better context continuity during task handoffs."
|
|
435
470
|
)
|
|
436
471
|
|
|
472
|
+
# Shared context utility for workflow management (created lazily)
|
|
473
|
+
self._shared_context_utility: Optional["ContextUtility"] = None
|
|
474
|
+
|
|
437
475
|
# ------------------------------------------------------------------
|
|
438
476
|
# Helper for propagating pause control to externally supplied agents
|
|
439
477
|
# ------------------------------------------------------------------
|
|
440
478
|
|
|
479
|
+
def _initialize_callbacks(
|
|
480
|
+
self, callbacks: Optional[List[WorkforceCallback]]
|
|
481
|
+
) -> None:
|
|
482
|
+
r"""Validate, register, and prime workforce callbacks."""
|
|
483
|
+
self._callbacks: List[WorkforceCallback] = []
|
|
484
|
+
|
|
485
|
+
if callbacks:
|
|
486
|
+
for cb in callbacks:
|
|
487
|
+
if isinstance(cb, WorkforceCallback):
|
|
488
|
+
self._callbacks.append(cb)
|
|
489
|
+
else:
|
|
490
|
+
raise ValueError(
|
|
491
|
+
"All callbacks must be instances of WorkforceCallback"
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
has_metrics_callback = any(
|
|
495
|
+
isinstance(cb, WorkforceMetrics) for cb in self._callbacks
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
if not has_metrics_callback:
|
|
499
|
+
self._callbacks.append(WorkforceLogger(workforce_id=self.node_id))
|
|
500
|
+
else:
|
|
501
|
+
logger.info(
|
|
502
|
+
"WorkforceMetrics implementation detected. Skipping default "
|
|
503
|
+
"WorkforceLogger addition."
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
for child in self._children:
|
|
507
|
+
self._notify_worker_created(child)
|
|
508
|
+
|
|
509
|
+
def _notify_worker_created(
|
|
510
|
+
self,
|
|
511
|
+
worker_node: BaseNode,
|
|
512
|
+
*,
|
|
513
|
+
worker_type: Optional[str] = None,
|
|
514
|
+
role: Optional[str] = None,
|
|
515
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
516
|
+
) -> None:
|
|
517
|
+
r"""Emit a worker-created event to all registered callbacks."""
|
|
518
|
+
event = WorkerCreatedEvent(
|
|
519
|
+
worker_id=worker_node.node_id,
|
|
520
|
+
worker_type=worker_type or type(worker_node).__name__,
|
|
521
|
+
role=role or worker_node.description,
|
|
522
|
+
metadata=metadata,
|
|
523
|
+
)
|
|
524
|
+
for cb in self._callbacks:
|
|
525
|
+
cb.log_worker_created(event)
|
|
526
|
+
|
|
527
|
+
def _get_or_create_shared_context_utility(
|
|
528
|
+
self,
|
|
529
|
+
session_id: Optional[str] = None,
|
|
530
|
+
) -> "ContextUtility":
|
|
531
|
+
r"""Get or create the shared context utility for workflow management.
|
|
532
|
+
|
|
533
|
+
This method creates the context utility only when needed, avoiding
|
|
534
|
+
unnecessary session folder creation during initialization.
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
session_id (Optional[str]): Custom session ID to use. If None,
|
|
538
|
+
auto-generates a timestamped session ID. (default: :obj:`None`)
|
|
539
|
+
|
|
540
|
+
Returns:
|
|
541
|
+
ContextUtility: The shared context utility instance.
|
|
542
|
+
"""
|
|
543
|
+
if self._shared_context_utility is None:
|
|
544
|
+
from camel.utils.context_utils import ContextUtility
|
|
545
|
+
|
|
546
|
+
self._shared_context_utility = ContextUtility.get_workforce_shared(
|
|
547
|
+
session_id=session_id
|
|
548
|
+
)
|
|
549
|
+
return self._shared_context_utility
|
|
550
|
+
|
|
441
551
|
def _validate_agent_compatibility(
|
|
442
552
|
self, agent: ChatAgent, agent_context: str = "agent"
|
|
443
553
|
) -> None:
|
|
@@ -474,6 +584,9 @@ class Workforce(BaseNode):
|
|
|
474
584
|
"the Workforce."
|
|
475
585
|
)
|
|
476
586
|
|
|
587
|
+
# ------------------------------------------------------------------
|
|
588
|
+
# Helper for propagating pause control to externally supplied agents
|
|
589
|
+
# ------------------------------------------------------------------
|
|
477
590
|
def _attach_pause_event_to_agent(self, agent: ChatAgent) -> None:
|
|
478
591
|
r"""Ensure the given ChatAgent shares this workforce's pause_event.
|
|
479
592
|
|
|
@@ -599,14 +712,29 @@ class Workforce(BaseNode):
|
|
|
599
712
|
)
|
|
600
713
|
return
|
|
601
714
|
|
|
602
|
-
#
|
|
715
|
+
# Filter out already-shared records to prevent re-sharing
|
|
716
|
+
# This prevents exponential growth of duplicate records
|
|
717
|
+
new_records = []
|
|
603
718
|
for record in memory_records:
|
|
719
|
+
record_uuid = str(record.uuid)
|
|
720
|
+
if record_uuid not in self._shared_memory_uuids:
|
|
721
|
+
new_records.append(record)
|
|
722
|
+
self._shared_memory_uuids.add(record_uuid)
|
|
723
|
+
|
|
724
|
+
if not new_records:
|
|
725
|
+
logger.debug(
|
|
726
|
+
"No new records to share (all were already shared)"
|
|
727
|
+
)
|
|
728
|
+
return
|
|
729
|
+
|
|
730
|
+
# Share with coordinator agent
|
|
731
|
+
for record in new_records:
|
|
604
732
|
# Only add records from other agents to avoid duplication
|
|
605
733
|
if record.agent_id != self.coordinator_agent.agent_id:
|
|
606
734
|
self.coordinator_agent.memory.write_record(record)
|
|
607
735
|
|
|
608
736
|
# Share with task agent
|
|
609
|
-
for record in
|
|
737
|
+
for record in new_records:
|
|
610
738
|
if record.agent_id != self.task_agent.agent_id:
|
|
611
739
|
self.task_agent.memory.write_record(record)
|
|
612
740
|
|
|
@@ -618,12 +746,12 @@ class Workforce(BaseNode):
|
|
|
618
746
|
]
|
|
619
747
|
|
|
620
748
|
for worker in single_agent_workers:
|
|
621
|
-
for record in
|
|
749
|
+
for record in new_records:
|
|
622
750
|
if record.agent_id != worker.worker.agent_id:
|
|
623
751
|
worker.worker.memory.write_record(record)
|
|
624
752
|
|
|
625
753
|
logger.info(
|
|
626
|
-
f"Shared {len(
|
|
754
|
+
f"Shared {len(new_records)} new memory records across "
|
|
627
755
|
f"{len(single_agent_workers) + 2} agents in workforce "
|
|
628
756
|
f"{self.node_id}"
|
|
629
757
|
)
|
|
@@ -730,10 +858,12 @@ class Workforce(BaseNode):
|
|
|
730
858
|
Union[List[Task], Generator[List[Task], None, None]]:
|
|
731
859
|
The subtasks or generator of subtasks.
|
|
732
860
|
"""
|
|
733
|
-
decompose_prompt =
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
861
|
+
decompose_prompt = str(
|
|
862
|
+
TASK_DECOMPOSE_PROMPT.format(
|
|
863
|
+
content=task.content,
|
|
864
|
+
child_nodes_info=self._get_child_nodes_info(),
|
|
865
|
+
additional_info=task.additional_info,
|
|
866
|
+
)
|
|
737
867
|
)
|
|
738
868
|
self.task_agent.reset()
|
|
739
869
|
result = task.decompose(self.task_agent, decompose_prompt)
|
|
@@ -761,76 +891,126 @@ class Workforce(BaseNode):
|
|
|
761
891
|
self._update_dependencies_for_decomposition(task, subtasks)
|
|
762
892
|
return subtasks
|
|
763
893
|
|
|
764
|
-
def
|
|
765
|
-
self,
|
|
766
|
-
|
|
767
|
-
|
|
894
|
+
def _analyze_task(
|
|
895
|
+
self,
|
|
896
|
+
task: Task,
|
|
897
|
+
*,
|
|
898
|
+
for_failure: bool,
|
|
899
|
+
error_message: Optional[str] = None,
|
|
900
|
+
) -> TaskAnalysisResult:
|
|
901
|
+
r"""Unified task analysis for both failures and quality evaluation.
|
|
902
|
+
|
|
903
|
+
This method consolidates the logic for analyzing task failures and
|
|
904
|
+
evaluating task quality, using the unified TASK_ANALYSIS_PROMPT.
|
|
768
905
|
|
|
769
906
|
Args:
|
|
770
|
-
task (Task): The
|
|
771
|
-
|
|
907
|
+
task (Task): The task to analyze
|
|
908
|
+
for_failure (bool): True for failure analysis, False for quality
|
|
909
|
+
evaluation
|
|
910
|
+
error_message (Optional[str]): Error message, required when
|
|
911
|
+
for_failure=True
|
|
772
912
|
|
|
773
913
|
Returns:
|
|
774
|
-
|
|
914
|
+
TaskAnalysisResult: Unified analysis result with recovery strategy
|
|
915
|
+
and optional quality metrics
|
|
916
|
+
|
|
917
|
+
Raises:
|
|
918
|
+
ValueError: If for_failure=True but error_message is None
|
|
775
919
|
"""
|
|
776
|
-
#
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
920
|
+
# Validate required parameters
|
|
921
|
+
if for_failure and error_message is None:
|
|
922
|
+
raise ValueError("error_message is required when for_failure=True")
|
|
923
|
+
|
|
924
|
+
# Determine task result and issue-specific analysis based on context
|
|
925
|
+
if for_failure:
|
|
926
|
+
task_result = "N/A (task failed)"
|
|
927
|
+
issue_type = "Task Failure"
|
|
928
|
+
issue_analysis = f"**Error Message:** {error_message}"
|
|
929
|
+
response_format = FAILURE_ANALYSIS_RESPONSE_FORMAT
|
|
930
|
+
result_schema = TaskAnalysisResult
|
|
931
|
+
fallback_values: Dict[str, Any] = {
|
|
932
|
+
"reasoning": "Defaulting to retry due to parsing error",
|
|
933
|
+
"recovery_strategy": RecoveryStrategy.RETRY,
|
|
934
|
+
"modified_task_content": None,
|
|
935
|
+
"issues": [error_message] if error_message else [],
|
|
936
|
+
}
|
|
937
|
+
examples: List[Dict[str, Any]] = [
|
|
938
|
+
{
|
|
939
|
+
"reasoning": "Temporary network error, worth retrying",
|
|
940
|
+
"recovery_strategy": "retry",
|
|
941
|
+
"modified_task_content": None,
|
|
942
|
+
"issues": ["Network timeout"],
|
|
943
|
+
}
|
|
786
944
|
]
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
945
|
+
else:
|
|
946
|
+
# Quality evaluation
|
|
947
|
+
task_result = task.result or "No result available"
|
|
948
|
+
issue_type = "Quality Evaluation"
|
|
949
|
+
issue_analysis = (
|
|
950
|
+
"Provide a quality score (0-100) and list any specific "
|
|
951
|
+
"issues found."
|
|
792
952
|
)
|
|
953
|
+
response_format = QUALITY_EVALUATION_RESPONSE_FORMAT
|
|
954
|
+
result_schema = TaskAnalysisResult
|
|
955
|
+
fallback_values = {
|
|
956
|
+
"reasoning": (
|
|
957
|
+
"Defaulting to acceptable quality due to parsing error"
|
|
958
|
+
),
|
|
959
|
+
"issues": [],
|
|
960
|
+
"recovery_strategy": None,
|
|
961
|
+
"modified_task_content": None,
|
|
962
|
+
"quality_score": 80,
|
|
963
|
+
}
|
|
964
|
+
examples = [
|
|
965
|
+
{
|
|
966
|
+
"reasoning": (
|
|
967
|
+
"Excellent implementation with comprehensive tests"
|
|
968
|
+
),
|
|
969
|
+
"issues": [],
|
|
970
|
+
"recovery_strategy": None,
|
|
971
|
+
"modified_task_content": None,
|
|
972
|
+
"quality_score": 98,
|
|
973
|
+
},
|
|
974
|
+
{
|
|
975
|
+
"reasoning": (
|
|
976
|
+
"Implementation incomplete with missing features"
|
|
977
|
+
),
|
|
978
|
+
"issues": [
|
|
979
|
+
"Incomplete implementation",
|
|
980
|
+
"Missing error handling",
|
|
981
|
+
],
|
|
982
|
+
"recovery_strategy": "replan",
|
|
983
|
+
"modified_task_content": (
|
|
984
|
+
"Previous attempt was incomplete. "
|
|
985
|
+
"Please implement with: 1) Full feature "
|
|
986
|
+
"coverage, 2) Proper error handling"
|
|
987
|
+
),
|
|
988
|
+
"quality_score": 45,
|
|
989
|
+
},
|
|
990
|
+
]
|
|
793
991
|
|
|
794
|
-
#
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
# Format the analysis prompt
|
|
808
|
-
analysis_prompt = FAILURE_ANALYSIS_PROMPT.format(
|
|
809
|
-
task_id=failure_context.task_id,
|
|
810
|
-
task_content=failure_context.task_content,
|
|
811
|
-
failure_count=failure_context.failure_count,
|
|
812
|
-
error_message=failure_context.error_message,
|
|
813
|
-
worker_id=failure_context.worker_id or "unknown",
|
|
814
|
-
task_depth=failure_context.task_depth,
|
|
815
|
-
additional_info=failure_context.additional_info or "None",
|
|
992
|
+
# Format the unified analysis prompt
|
|
993
|
+
analysis_prompt = str(
|
|
994
|
+
TASK_ANALYSIS_PROMPT.format(
|
|
995
|
+
task_id=task.id,
|
|
996
|
+
task_content=task.content,
|
|
997
|
+
task_result=task_result,
|
|
998
|
+
failure_count=task.failure_count,
|
|
999
|
+
task_depth=task.get_depth(),
|
|
1000
|
+
assigned_worker=task.assigned_worker_id or "unknown",
|
|
1001
|
+
issue_type=issue_type,
|
|
1002
|
+
issue_specific_analysis=issue_analysis,
|
|
1003
|
+
response_format=response_format,
|
|
1004
|
+
)
|
|
816
1005
|
)
|
|
817
1006
|
|
|
818
1007
|
try:
|
|
819
|
-
# Check if we should use structured handler
|
|
820
1008
|
if self.use_structured_output_handler:
|
|
821
|
-
# Use structured handler
|
|
822
1009
|
enhanced_prompt = (
|
|
823
1010
|
self.structured_handler.generate_structured_prompt(
|
|
824
1011
|
base_prompt=analysis_prompt,
|
|
825
|
-
schema=
|
|
826
|
-
examples=
|
|
827
|
-
{
|
|
828
|
-
"strategy": "RETRY",
|
|
829
|
-
"reasoning": "Temporary network error, "
|
|
830
|
-
"worth retrying",
|
|
831
|
-
"modified_task_content": None,
|
|
832
|
-
}
|
|
833
|
-
],
|
|
1012
|
+
schema=result_schema,
|
|
1013
|
+
examples=examples,
|
|
834
1014
|
)
|
|
835
1015
|
)
|
|
836
1016
|
|
|
@@ -839,43 +1019,224 @@ class Workforce(BaseNode):
|
|
|
839
1019
|
|
|
840
1020
|
result = self.structured_handler.parse_structured_response(
|
|
841
1021
|
response.msg.content if response.msg else "",
|
|
842
|
-
schema=
|
|
843
|
-
fallback_values=
|
|
844
|
-
"strategy": RecoveryStrategy.RETRY,
|
|
845
|
-
"reasoning": "Defaulting to retry due to parsing "
|
|
846
|
-
"issues",
|
|
847
|
-
"modified_task_content": None,
|
|
848
|
-
},
|
|
1022
|
+
schema=result_schema,
|
|
1023
|
+
fallback_values=fallback_values,
|
|
849
1024
|
)
|
|
850
|
-
|
|
851
|
-
if isinstance(result,
|
|
1025
|
+
|
|
1026
|
+
if isinstance(result, TaskAnalysisResult):
|
|
852
1027
|
return result
|
|
853
1028
|
elif isinstance(result, dict):
|
|
854
|
-
return
|
|
1029
|
+
return result_schema(**result)
|
|
855
1030
|
else:
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
reasoning="Failed to parse recovery decision",
|
|
859
|
-
modified_task_content=None,
|
|
860
|
-
)
|
|
1031
|
+
# Fallback based on context
|
|
1032
|
+
return TaskAnalysisResult(**fallback_values)
|
|
861
1033
|
else:
|
|
862
|
-
# Use existing native structured output code
|
|
863
1034
|
self.task_agent.reset()
|
|
864
1035
|
response = self.task_agent.step(
|
|
865
|
-
analysis_prompt, response_format=
|
|
1036
|
+
analysis_prompt, response_format=result_schema
|
|
866
1037
|
)
|
|
867
1038
|
return response.msg.parsed
|
|
868
1039
|
|
|
869
1040
|
except Exception as e:
|
|
870
1041
|
logger.warning(
|
|
871
|
-
f"Error during
|
|
1042
|
+
f"Error during task analysis "
|
|
1043
|
+
f"({'failure' if for_failure else 'quality'}): {e}, "
|
|
1044
|
+
f"using fallback"
|
|
872
1045
|
)
|
|
873
|
-
return
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
1046
|
+
return TaskAnalysisResult(**fallback_values)
|
|
1047
|
+
|
|
1048
|
+
async def _apply_recovery_strategy(
|
|
1049
|
+
self,
|
|
1050
|
+
task: Task,
|
|
1051
|
+
recovery_decision: TaskAnalysisResult,
|
|
1052
|
+
) -> bool:
|
|
1053
|
+
r"""Apply the recovery strategy from a task analysis result.
|
|
1054
|
+
|
|
1055
|
+
This method centralizes the recovery logic for both execution failures
|
|
1056
|
+
and quality-based failures.
|
|
1057
|
+
|
|
1058
|
+
Args:
|
|
1059
|
+
task (Task): The task that needs recovery
|
|
1060
|
+
recovery_decision (TaskAnalysisResult): The analysis result with
|
|
1061
|
+
recovery strategy
|
|
1062
|
+
|
|
1063
|
+
Returns:
|
|
1064
|
+
bool: True if workforce should halt (e.g., decompose needs
|
|
1065
|
+
different handling), False otherwise
|
|
1066
|
+
"""
|
|
1067
|
+
strategy = (
|
|
1068
|
+
recovery_decision.recovery_strategy or RecoveryStrategy.RETRY
|
|
1069
|
+
)
|
|
1070
|
+
action_taken = ""
|
|
1071
|
+
|
|
1072
|
+
try:
|
|
1073
|
+
if strategy == RecoveryStrategy.RETRY:
|
|
1074
|
+
# Simply retry the task by reposting it to the same worker
|
|
1075
|
+
# Check both _assignees dict and task.assigned_worker_id
|
|
1076
|
+
assignee_id = (
|
|
1077
|
+
self._assignees.get(task.id) or task.assigned_worker_id
|
|
1078
|
+
)
|
|
1079
|
+
|
|
1080
|
+
if assignee_id:
|
|
1081
|
+
# Retry with the same worker - no coordinator call needed
|
|
1082
|
+
await self._post_task(task, assignee_id)
|
|
1083
|
+
action_taken = f"retried with same worker {assignee_id}"
|
|
1084
|
+
logger.info(
|
|
1085
|
+
f"Task {task.id} retrying with same worker "
|
|
1086
|
+
f"{assignee_id} (no coordinator call)"
|
|
1087
|
+
)
|
|
1088
|
+
else:
|
|
1089
|
+
# No previous assignment exists - find a new assignee
|
|
1090
|
+
logger.info(
|
|
1091
|
+
f"Task {task.id} has no previous assignee, "
|
|
1092
|
+
f"calling coordinator"
|
|
1093
|
+
)
|
|
1094
|
+
batch_result = await self._find_assignee([task])
|
|
1095
|
+
assignment = batch_result.assignments[0]
|
|
1096
|
+
self._assignees[task.id] = assignment.assignee_id
|
|
1097
|
+
await self._post_task(task, assignment.assignee_id)
|
|
1098
|
+
action_taken = (
|
|
1099
|
+
f"retried with new worker {assignment.assignee_id}"
|
|
1100
|
+
)
|
|
1101
|
+
|
|
1102
|
+
elif strategy == RecoveryStrategy.REPLAN:
|
|
1103
|
+
# Modify the task content and retry
|
|
1104
|
+
if recovery_decision.modified_task_content:
|
|
1105
|
+
task.content = recovery_decision.modified_task_content
|
|
1106
|
+
logger.info(f"Task {task.id} content modified for replan")
|
|
1107
|
+
|
|
1108
|
+
# Repost the modified task
|
|
1109
|
+
if task.id in self._assignees:
|
|
1110
|
+
assignee_id = self._assignees[task.id]
|
|
1111
|
+
await self._post_task(task, assignee_id)
|
|
1112
|
+
action_taken = (
|
|
1113
|
+
f"replanned and retried with worker {assignee_id}"
|
|
1114
|
+
)
|
|
1115
|
+
else:
|
|
1116
|
+
# Find a new assignee for the replanned task
|
|
1117
|
+
batch_result = await self._find_assignee([task])
|
|
1118
|
+
assignment = batch_result.assignments[0]
|
|
1119
|
+
self._assignees[task.id] = assignment.assignee_id
|
|
1120
|
+
await self._post_task(task, assignment.assignee_id)
|
|
1121
|
+
action_taken = (
|
|
1122
|
+
f"replanned and assigned to "
|
|
1123
|
+
f"worker {assignment.assignee_id}"
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
elif strategy == RecoveryStrategy.REASSIGN:
|
|
1127
|
+
# Reassign to a different worker
|
|
1128
|
+
old_worker = task.assigned_worker_id
|
|
1129
|
+
logger.info(
|
|
1130
|
+
f"Task {task.id} will be reassigned from worker "
|
|
1131
|
+
f"{old_worker}"
|
|
1132
|
+
)
|
|
1133
|
+
|
|
1134
|
+
# Find a different worker
|
|
1135
|
+
batch_result = await self._find_assignee([task])
|
|
1136
|
+
assignment = batch_result.assignments[0]
|
|
1137
|
+
new_worker = assignment.assignee_id
|
|
1138
|
+
|
|
1139
|
+
# If same worker, force find another
|
|
1140
|
+
if new_worker == old_worker and len(self._children) > 1:
|
|
1141
|
+
logger.info("Same worker selected, finding alternative")
|
|
1142
|
+
# Try to find different worker by adding note to
|
|
1143
|
+
# task content
|
|
1144
|
+
task.content = (
|
|
1145
|
+
f"{task.content}\n\n"
|
|
1146
|
+
f"Note: Previous worker {old_worker} had quality "
|
|
1147
|
+
f"issues. Needs different approach."
|
|
1148
|
+
)
|
|
1149
|
+
batch_result = await self._find_assignee([task])
|
|
1150
|
+
assignment = batch_result.assignments[0]
|
|
1151
|
+
new_worker = assignment.assignee_id
|
|
1152
|
+
|
|
1153
|
+
self._assignees[task.id] = new_worker
|
|
1154
|
+
await self._post_task(task, new_worker)
|
|
1155
|
+
action_taken = f"reassigned from {old_worker} to {new_worker}"
|
|
1156
|
+
logger.info(
|
|
1157
|
+
f"Task {task.id} reassigned from {old_worker} to "
|
|
1158
|
+
f"{new_worker}"
|
|
1159
|
+
)
|
|
1160
|
+
|
|
1161
|
+
elif strategy == RecoveryStrategy.DECOMPOSE:
|
|
1162
|
+
# Decompose the task into subtasks
|
|
1163
|
+
reason = (
|
|
1164
|
+
"failure"
|
|
1165
|
+
if not recovery_decision.is_quality_evaluation
|
|
1166
|
+
else "quality issues"
|
|
1167
|
+
)
|
|
1168
|
+
logger.info(
|
|
1169
|
+
f"Task {task.id} will be decomposed due to {reason}"
|
|
1170
|
+
)
|
|
1171
|
+
subtasks_result = self._decompose_task(task)
|
|
1172
|
+
|
|
1173
|
+
# Handle both streaming and non-streaming results
|
|
1174
|
+
if isinstance(subtasks_result, Generator):
|
|
1175
|
+
subtasks = []
|
|
1176
|
+
for new_tasks in subtasks_result:
|
|
1177
|
+
subtasks.extend(new_tasks)
|
|
1178
|
+
else:
|
|
1179
|
+
subtasks = subtasks_result
|
|
1180
|
+
|
|
1181
|
+
if subtasks:
|
|
1182
|
+
task_decomposed_event = TaskDecomposedEvent(
|
|
1183
|
+
parent_task_id=task.id,
|
|
1184
|
+
subtask_ids=[st.id for st in subtasks],
|
|
1185
|
+
)
|
|
1186
|
+
for cb in self._callbacks:
|
|
1187
|
+
cb.log_task_decomposed(task_decomposed_event)
|
|
1188
|
+
for subtask in subtasks:
|
|
1189
|
+
task_created_event = TaskCreatedEvent(
|
|
1190
|
+
task_id=subtask.id,
|
|
1191
|
+
description=subtask.content,
|
|
1192
|
+
parent_task_id=task.id,
|
|
1193
|
+
task_type=subtask.type,
|
|
1194
|
+
metadata=subtask.additional_info,
|
|
1195
|
+
)
|
|
1196
|
+
for cb in self._callbacks:
|
|
1197
|
+
cb.log_task_created(task_created_event)
|
|
1198
|
+
|
|
1199
|
+
# Insert subtasks at the head of the queue
|
|
1200
|
+
self._pending_tasks.extendleft(reversed(subtasks))
|
|
1201
|
+
await self._post_ready_tasks()
|
|
1202
|
+
action_taken = f"decomposed into {len(subtasks)} subtasks"
|
|
1203
|
+
|
|
1204
|
+
logger.info(
|
|
1205
|
+
f"Task {task.id} decomposed into {len(subtasks)} subtasks"
|
|
1206
|
+
)
|
|
1207
|
+
|
|
1208
|
+
# Sync shared memory after task decomposition
|
|
1209
|
+
if self.share_memory:
|
|
1210
|
+
logger.info(
|
|
1211
|
+
f"Syncing shared memory after task {task.id} "
|
|
1212
|
+
f"decomposition"
|
|
1213
|
+
)
|
|
1214
|
+
self._sync_shared_memory()
|
|
1215
|
+
|
|
1216
|
+
# For decompose, we return early with special handling
|
|
1217
|
+
return True
|
|
1218
|
+
|
|
1219
|
+
elif strategy == RecoveryStrategy.CREATE_WORKER:
|
|
1220
|
+
assignee = await self._create_worker_node_for_task(task)
|
|
1221
|
+
await self._post_task(task, assignee.node_id)
|
|
1222
|
+
action_taken = (
|
|
1223
|
+
f"created new worker {assignee.node_id} and assigned "
|
|
1224
|
+
f"task {task.id} to it"
|
|
1225
|
+
)
|
|
1226
|
+
|
|
1227
|
+
except Exception as e:
|
|
1228
|
+
logger.error(
|
|
1229
|
+
f"Recovery strategy {strategy} failed for task {task.id}: {e}",
|
|
1230
|
+
exc_info=True,
|
|
878
1231
|
)
|
|
1232
|
+
raise
|
|
1233
|
+
|
|
1234
|
+
logger.debug(
|
|
1235
|
+
f"Task {task.id} recovery: {action_taken}. "
|
|
1236
|
+
f"Strategy: {strategy.value}"
|
|
1237
|
+
)
|
|
1238
|
+
|
|
1239
|
+
return False
|
|
879
1240
|
|
|
880
1241
|
# Human intervention methods
|
|
881
1242
|
async def _async_pause(self) -> None:
|
|
@@ -966,6 +1327,39 @@ class Workforce(BaseNode):
|
|
|
966
1327
|
f"(event-loop not yet started)."
|
|
967
1328
|
)
|
|
968
1329
|
|
|
1330
|
+
async def _async_skip_gracefully(self) -> None:
|
|
1331
|
+
r"""Async implementation of skip_gracefully to run on the event
|
|
1332
|
+
loop.
|
|
1333
|
+
"""
|
|
1334
|
+
self._skip_requested = True
|
|
1335
|
+
if self._pause_event.is_set() is False:
|
|
1336
|
+
self._pause_event.set() # Resume if paused to process skip
|
|
1337
|
+
logger.info(f"Workforce {self.node_id} skip requested.")
|
|
1338
|
+
|
|
1339
|
+
def skip_gracefully(self) -> None:
|
|
1340
|
+
r"""Request workforce to skip current pending tasks and move to next
|
|
1341
|
+
main task from the queue. If no main tasks exist, acts like
|
|
1342
|
+
stop_gracefully.
|
|
1343
|
+
|
|
1344
|
+
This method clears the current pending subtasks and moves to the next
|
|
1345
|
+
main task in the queue if available. Works both when the internal
|
|
1346
|
+
event-loop is alive and when it has not yet been started.
|
|
1347
|
+
"""
|
|
1348
|
+
|
|
1349
|
+
if self._loop and not self._loop.is_closed():
|
|
1350
|
+
self._submit_coro_to_loop(self._async_skip_gracefully())
|
|
1351
|
+
else:
|
|
1352
|
+
# Loop not yet created, set the flag synchronously so later
|
|
1353
|
+
# startup will respect it.
|
|
1354
|
+
self._skip_requested = True
|
|
1355
|
+
# Ensure any pending pause is released so that when the loop does
|
|
1356
|
+
# start it can see the skip request and exit.
|
|
1357
|
+
self._pause_event.set()
|
|
1358
|
+
logger.info(
|
|
1359
|
+
f"Workforce {self.node_id} skip requested "
|
|
1360
|
+
f"(event-loop not yet started)."
|
|
1361
|
+
)
|
|
1362
|
+
|
|
969
1363
|
def save_snapshot(self, description: str = "") -> None:
|
|
970
1364
|
r"""Save current state as a snapshot."""
|
|
971
1365
|
snapshot = WorkforceSnapshot(
|
|
@@ -1020,67 +1414,193 @@ class Workforce(BaseNode):
|
|
|
1020
1414
|
logger.warning(f"Task {task_id} not found in pending tasks.")
|
|
1021
1415
|
return False
|
|
1022
1416
|
|
|
1417
|
+
def get_main_task_queue(self) -> List[Task]:
|
|
1418
|
+
r"""Get current main task queue for human review.
|
|
1419
|
+
Returns:
|
|
1420
|
+
List[Task]: List of main tasks waiting to be decomposed
|
|
1421
|
+
and executed.
|
|
1422
|
+
"""
|
|
1423
|
+
# Return tasks from pending queue that need decomposition
|
|
1424
|
+
return [
|
|
1425
|
+
t
|
|
1426
|
+
for t in self._pending_tasks
|
|
1427
|
+
if t.additional_info
|
|
1428
|
+
and t.additional_info.get('_needs_decomposition')
|
|
1429
|
+
]
|
|
1430
|
+
|
|
1023
1431
|
def add_task(
|
|
1024
1432
|
self,
|
|
1025
1433
|
content: str,
|
|
1026
1434
|
task_id: Optional[str] = None,
|
|
1027
1435
|
additional_info: Optional[Dict[str, Any]] = None,
|
|
1436
|
+
as_subtask: bool = False,
|
|
1028
1437
|
insert_position: int = -1,
|
|
1029
1438
|
) -> Task:
|
|
1030
|
-
r"""Add a new task to the
|
|
1031
|
-
new_task = Task(
|
|
1032
|
-
content=content,
|
|
1033
|
-
id=task_id or f"human_added_{len(self._pending_tasks)}",
|
|
1034
|
-
additional_info=additional_info,
|
|
1035
|
-
)
|
|
1036
|
-
if insert_position == -1:
|
|
1037
|
-
self._pending_tasks.append(new_task)
|
|
1038
|
-
else:
|
|
1039
|
-
# Convert deque to list, insert, then back to deque
|
|
1040
|
-
tasks_list = list(self._pending_tasks)
|
|
1041
|
-
tasks_list.insert(insert_position, new_task)
|
|
1042
|
-
self._pending_tasks = deque(tasks_list)
|
|
1439
|
+
r"""Add a new task to the workforce.
|
|
1043
1440
|
|
|
1044
|
-
|
|
1045
|
-
|
|
1441
|
+
By default, this method adds a main task that will be decomposed into
|
|
1442
|
+
subtasks. Set `as_subtask=True` to add a task directly to the pending
|
|
1443
|
+
subtask queue without decomposition.
|
|
1046
1444
|
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1445
|
+
Args:
|
|
1446
|
+
content (str): The content of the task.
|
|
1447
|
+
task_id (Optional[str], optional): Optional ID for the task.
|
|
1448
|
+
If not provided, a unique ID will be generated.
|
|
1449
|
+
additional_info (Optional[Dict[str, Any]], optional): Optional
|
|
1450
|
+
additional metadata for the task.
|
|
1451
|
+
as_subtask (bool, optional): If True, adds the task directly to
|
|
1452
|
+
the pending subtask queue. If False, adds as a main task that
|
|
1453
|
+
will be decomposed. Defaults to False.
|
|
1454
|
+
insert_position (int, optional): Position to insert the task in
|
|
1455
|
+
the pending queue. Only applies when as_subtask=True.
|
|
1456
|
+
Defaults to -1 (append to end).
|
|
1059
1457
|
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1458
|
+
Returns:
|
|
1459
|
+
Task: The created task object.
|
|
1460
|
+
"""
|
|
1461
|
+
if as_subtask:
|
|
1462
|
+
new_task = Task(
|
|
1463
|
+
content=content,
|
|
1464
|
+
id=task_id or f"human_added_{len(self._pending_tasks)}",
|
|
1465
|
+
additional_info=additional_info,
|
|
1466
|
+
)
|
|
1064
1467
|
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1468
|
+
# Add directly to current pending subtasks
|
|
1469
|
+
if insert_position == -1:
|
|
1470
|
+
self._pending_tasks.append(new_task)
|
|
1471
|
+
else:
|
|
1472
|
+
# Convert deque to list, insert, then back to deque
|
|
1473
|
+
tasks_list = list(self._pending_tasks)
|
|
1474
|
+
tasks_list.insert(insert_position, new_task)
|
|
1475
|
+
self._pending_tasks = deque(tasks_list)
|
|
1476
|
+
|
|
1477
|
+
logger.info(f"New subtask added to pending queue: {new_task.id}")
|
|
1478
|
+
return new_task
|
|
1479
|
+
else:
|
|
1480
|
+
# Add as main task that needs decomposition
|
|
1481
|
+
# Use additional_info to mark this task needs decomposition
|
|
1482
|
+
# Make a copy to avoid modifying user's dict
|
|
1483
|
+
info = additional_info.copy() if additional_info else {}
|
|
1484
|
+
info['_needs_decomposition'] = True
|
|
1485
|
+
|
|
1486
|
+
task_count = sum(
|
|
1487
|
+
1
|
|
1488
|
+
for t in self._pending_tasks
|
|
1489
|
+
if t.additional_info
|
|
1490
|
+
and t.additional_info.get('_needs_decomposition')
|
|
1072
1491
|
)
|
|
1073
|
-
return False
|
|
1074
1492
|
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1493
|
+
new_task = Task(
|
|
1494
|
+
content=content,
|
|
1495
|
+
id=task_id or f"main_task_{task_count}",
|
|
1496
|
+
additional_info=info,
|
|
1079
1497
|
)
|
|
1080
|
-
return False
|
|
1081
1498
|
|
|
1082
|
-
|
|
1083
|
-
|
|
1499
|
+
self._pending_tasks.append(new_task)
|
|
1500
|
+
logger.info(f"New main task added to pending queue: {new_task.id}")
|
|
1501
|
+
return new_task
|
|
1502
|
+
|
|
1503
|
+
def add_main_task(
|
|
1504
|
+
self,
|
|
1505
|
+
content: str,
|
|
1506
|
+
task_id: Optional[str] = None,
|
|
1507
|
+
additional_info: Optional[Dict[str, Any]] = None,
|
|
1508
|
+
) -> Task:
|
|
1509
|
+
r"""Add a new main task that will be decomposed into subtasks.
|
|
1510
|
+
|
|
1511
|
+
This is an alias for :meth:`add_task` with `as_subtask=False`.
|
|
1512
|
+
|
|
1513
|
+
Args:
|
|
1514
|
+
content (str): The content of the main task.
|
|
1515
|
+
task_id (Optional[str], optional): Optional ID for the task.
|
|
1516
|
+
additional_info (Optional[Dict[str, Any]], optional): Optional
|
|
1517
|
+
additional metadata.
|
|
1518
|
+
|
|
1519
|
+
Returns:
|
|
1520
|
+
Task: The created main task object.
|
|
1521
|
+
"""
|
|
1522
|
+
return self.add_task(
|
|
1523
|
+
content=content,
|
|
1524
|
+
task_id=task_id,
|
|
1525
|
+
additional_info=additional_info,
|
|
1526
|
+
as_subtask=False,
|
|
1527
|
+
)
|
|
1528
|
+
|
|
1529
|
+
def add_subtask(
|
|
1530
|
+
self,
|
|
1531
|
+
content: str,
|
|
1532
|
+
task_id: Optional[str] = None,
|
|
1533
|
+
additional_info: Optional[Dict[str, Any]] = None,
|
|
1534
|
+
insert_position: int = -1,
|
|
1535
|
+
) -> Task:
|
|
1536
|
+
r"""Add a new subtask to the current pending queue.
|
|
1537
|
+
|
|
1538
|
+
This is an alias for :meth:`add_task` with `as_subtask=True`.
|
|
1539
|
+
|
|
1540
|
+
Args:
|
|
1541
|
+
content (str): The content of the subtask.
|
|
1542
|
+
task_id (Optional[str], optional): Optional ID for the task.
|
|
1543
|
+
additional_info (Optional[Dict[str, Any]], optional): Optional
|
|
1544
|
+
additional metadata.
|
|
1545
|
+
insert_position (int, optional): Position to insert the task.
|
|
1546
|
+
Defaults to -1 (append to end).
|
|
1547
|
+
|
|
1548
|
+
Returns:
|
|
1549
|
+
Task: The created subtask object.
|
|
1550
|
+
"""
|
|
1551
|
+
return self.add_task(
|
|
1552
|
+
content=content,
|
|
1553
|
+
task_id=task_id,
|
|
1554
|
+
additional_info=additional_info,
|
|
1555
|
+
as_subtask=True,
|
|
1556
|
+
insert_position=insert_position,
|
|
1557
|
+
)
|
|
1558
|
+
|
|
1559
|
+
def remove_task(self, task_id: str) -> bool:
|
|
1560
|
+
r"""Remove a task from the pending queue or main task queue.
|
|
1561
|
+
|
|
1562
|
+
Args:
|
|
1563
|
+
task_id (str): The ID of the task to remove.
|
|
1564
|
+
|
|
1565
|
+
Returns:
|
|
1566
|
+
bool: True if task was found and removed, False otherwise.
|
|
1567
|
+
"""
|
|
1568
|
+
# Check main task queue first
|
|
1569
|
+
pending_tasks_list = list(self._pending_tasks)
|
|
1570
|
+
for i, task in enumerate(pending_tasks_list):
|
|
1571
|
+
if task.id == task_id:
|
|
1572
|
+
pending_tasks_list.pop(i)
|
|
1573
|
+
self._pending_tasks = deque(pending_tasks_list)
|
|
1574
|
+
logger.info(f"Task {task_id} removed from pending queue.")
|
|
1575
|
+
return True
|
|
1576
|
+
|
|
1577
|
+
logger.warning(f"Task {task_id} not found in any task queue.")
|
|
1578
|
+
return False
|
|
1579
|
+
|
|
1580
|
+
def reorder_tasks(self, task_ids: List[str]) -> bool:
|
|
1581
|
+
r"""Reorder pending tasks according to the provided task IDs list."""
|
|
1582
|
+
# Create a mapping of task_id to task
|
|
1583
|
+
tasks_dict = {task.id: task for task in self._pending_tasks}
|
|
1584
|
+
|
|
1585
|
+
# Check if all provided IDs exist
|
|
1586
|
+
invalid_ids = [
|
|
1587
|
+
task_id for task_id in task_ids if task_id not in tasks_dict
|
|
1588
|
+
]
|
|
1589
|
+
if invalid_ids:
|
|
1590
|
+
logger.warning(
|
|
1591
|
+
f"Task IDs not found in pending tasks: {invalid_ids}"
|
|
1592
|
+
)
|
|
1593
|
+
return False
|
|
1594
|
+
|
|
1595
|
+
# Check if we have the same number of tasks
|
|
1596
|
+
if len(task_ids) != len(self._pending_tasks):
|
|
1597
|
+
logger.warning(
|
|
1598
|
+
"Number of task IDs doesn't match pending tasks count."
|
|
1599
|
+
)
|
|
1600
|
+
return False
|
|
1601
|
+
|
|
1602
|
+
# Reorder tasks
|
|
1603
|
+
reordered_tasks = deque([tasks_dict[task_id] for task_id in task_ids])
|
|
1084
1604
|
self._pending_tasks = reordered_tasks
|
|
1085
1605
|
|
|
1086
1606
|
logger.info("Tasks reordered successfully.")
|
|
@@ -1169,26 +1689,21 @@ class Workforce(BaseNode):
|
|
|
1169
1689
|
"main_task_id": self._task.id if self._task else None,
|
|
1170
1690
|
}
|
|
1171
1691
|
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1692
|
+
async def handle_decompose_append_task(
|
|
1693
|
+
self, task: Task, reset: bool = True
|
|
1694
|
+
) -> List[Task]:
|
|
1695
|
+
r"""Handle task decomposition and validation with
|
|
1696
|
+
workforce environment functions. Then append to
|
|
1697
|
+
pending tasks if decomposition happened.
|
|
1177
1698
|
|
|
1178
1699
|
Args:
|
|
1179
1700
|
task (Task): The task to be processed.
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
runs the task in a blocking one-shot manner.
|
|
1701
|
+
reset (Bool): Should trigger workforce reset (Workforce must not
|
|
1702
|
+
be running). Default: True
|
|
1183
1703
|
|
|
1184
1704
|
Returns:
|
|
1185
|
-
Task: The
|
|
1705
|
+
List[Task]: The decomposed subtasks or the original task.
|
|
1186
1706
|
"""
|
|
1187
|
-
# Delegate to intervention pipeline when requested to keep
|
|
1188
|
-
# backward-compat.
|
|
1189
|
-
if interactive:
|
|
1190
|
-
return await self._process_task_with_snapshot(task)
|
|
1191
|
-
|
|
1192
1707
|
if not validate_task_content(task.content, task.id):
|
|
1193
1708
|
task.state = TaskState.FAILED
|
|
1194
1709
|
task.result = "Task failed: Invalid or empty content provided"
|
|
@@ -1196,18 +1711,25 @@ class Workforce(BaseNode):
|
|
|
1196
1711
|
f"Task {task.id} rejected: Invalid or empty content. "
|
|
1197
1712
|
f"Content preview: '{task.content}'"
|
|
1198
1713
|
)
|
|
1199
|
-
return task
|
|
1714
|
+
return [task]
|
|
1200
1715
|
|
|
1201
|
-
self.
|
|
1716
|
+
if reset and self._state != WorkforceState.RUNNING:
|
|
1717
|
+
self.reset()
|
|
1718
|
+
logger.info("Workforce reset before handling task.")
|
|
1719
|
+
|
|
1720
|
+
# Focus on the new task
|
|
1202
1721
|
self._task = task
|
|
1203
|
-
if self.metrics_logger:
|
|
1204
|
-
self.metrics_logger.log_task_created(
|
|
1205
|
-
task_id=task.id,
|
|
1206
|
-
description=task.content,
|
|
1207
|
-
task_type=task.type,
|
|
1208
|
-
metadata=task.additional_info,
|
|
1209
|
-
)
|
|
1210
1722
|
task.state = TaskState.FAILED
|
|
1723
|
+
|
|
1724
|
+
task_created_event = TaskCreatedEvent(
|
|
1725
|
+
task_id=task.id,
|
|
1726
|
+
description=task.content,
|
|
1727
|
+
task_type=task.type,
|
|
1728
|
+
metadata=task.additional_info,
|
|
1729
|
+
)
|
|
1730
|
+
for cb in self._callbacks:
|
|
1731
|
+
cb.log_task_created(task_created_event)
|
|
1732
|
+
|
|
1211
1733
|
# The agent tend to be overconfident on the whole task, so we
|
|
1212
1734
|
# decompose the task into subtasks first
|
|
1213
1735
|
subtasks_result = self._decompose_task(task)
|
|
@@ -1221,26 +1743,57 @@ class Workforce(BaseNode):
|
|
|
1221
1743
|
else:
|
|
1222
1744
|
# This is a regular list (non-streaming mode)
|
|
1223
1745
|
subtasks = subtasks_result
|
|
1224
|
-
if
|
|
1225
|
-
|
|
1226
|
-
parent_task_id=task.id,
|
|
1746
|
+
if subtasks:
|
|
1747
|
+
task_decomposed_event = TaskDecomposedEvent(
|
|
1748
|
+
parent_task_id=task.id,
|
|
1749
|
+
subtask_ids=[st.id for st in subtasks],
|
|
1227
1750
|
)
|
|
1751
|
+
for cb in self._callbacks:
|
|
1752
|
+
cb.log_task_decomposed(task_decomposed_event)
|
|
1228
1753
|
for subtask in subtasks:
|
|
1229
|
-
|
|
1754
|
+
task_created_event = TaskCreatedEvent(
|
|
1230
1755
|
task_id=subtask.id,
|
|
1231
1756
|
description=subtask.content,
|
|
1232
1757
|
parent_task_id=task.id,
|
|
1233
1758
|
task_type=subtask.type,
|
|
1234
1759
|
metadata=subtask.additional_info,
|
|
1235
1760
|
)
|
|
1761
|
+
for cb in self._callbacks:
|
|
1762
|
+
cb.log_task_created(task_created_event)
|
|
1763
|
+
|
|
1236
1764
|
if subtasks:
|
|
1237
|
-
#
|
|
1238
|
-
#
|
|
1765
|
+
# _pending_tasks will contain both undecomposed
|
|
1766
|
+
# and decomposed tasks, so we use additional_info
|
|
1767
|
+
# to mark the tasks that need decomposition instead
|
|
1239
1768
|
self._pending_tasks.extendleft(reversed(subtasks))
|
|
1240
1769
|
else:
|
|
1241
1770
|
# If no decomposition, execute the original task.
|
|
1242
1771
|
self._pending_tasks.append(task)
|
|
1243
1772
|
|
|
1773
|
+
return subtasks
|
|
1774
|
+
|
|
1775
|
+
@check_if_running(False)
|
|
1776
|
+
async def process_task_async(
|
|
1777
|
+
self, task: Task, interactive: bool = False
|
|
1778
|
+
) -> Task:
|
|
1779
|
+
r"""Main entry point to process a task asynchronously.
|
|
1780
|
+
|
|
1781
|
+
Args:
|
|
1782
|
+
task (Task): The task to be processed.
|
|
1783
|
+
interactive (bool, optional): If True, enables human-intervention
|
|
1784
|
+
workflow (pause/resume/snapshot). Defaults to False, which
|
|
1785
|
+
runs the task in a blocking one-shot manner.
|
|
1786
|
+
|
|
1787
|
+
Returns:
|
|
1788
|
+
Task: The updated task.
|
|
1789
|
+
"""
|
|
1790
|
+
# Delegate to intervention pipeline when requested to keep
|
|
1791
|
+
# backward-compat.
|
|
1792
|
+
if interactive:
|
|
1793
|
+
return await self._process_task_with_snapshot(task)
|
|
1794
|
+
|
|
1795
|
+
subtasks = await self.handle_decompose_append_task(task)
|
|
1796
|
+
|
|
1244
1797
|
self.set_channel(TaskChannel())
|
|
1245
1798
|
|
|
1246
1799
|
await self.start()
|
|
@@ -1322,39 +1875,8 @@ class Workforce(BaseNode):
|
|
|
1322
1875
|
Task: The updated task.
|
|
1323
1876
|
"""
|
|
1324
1877
|
|
|
1325
|
-
|
|
1326
|
-
task.state = TaskState.FAILED
|
|
1327
|
-
task.result = "Task failed: Invalid or empty content provided"
|
|
1328
|
-
logger.warning(
|
|
1329
|
-
f"Task {task.id} rejected: Invalid or empty content. "
|
|
1330
|
-
f"Content preview: '{task.content}'"
|
|
1331
|
-
)
|
|
1332
|
-
return task
|
|
1333
|
-
|
|
1334
|
-
self.reset()
|
|
1335
|
-
self._task = task
|
|
1336
|
-
self._state = WorkforceState.RUNNING
|
|
1337
|
-
task.state = TaskState.FAILED # TODO: Add logic for OPEN
|
|
1338
|
-
|
|
1339
|
-
# Decompose the task into subtasks first
|
|
1340
|
-
subtasks_result = self._decompose_task(task)
|
|
1878
|
+
await self.handle_decompose_append_task(task)
|
|
1341
1879
|
|
|
1342
|
-
# Handle both streaming and non-streaming results
|
|
1343
|
-
if isinstance(subtasks_result, Generator):
|
|
1344
|
-
# This is a generator (streaming mode)
|
|
1345
|
-
subtasks = []
|
|
1346
|
-
for new_tasks in subtasks_result:
|
|
1347
|
-
subtasks.extend(new_tasks)
|
|
1348
|
-
else:
|
|
1349
|
-
# This is a regular list (non-streaming mode)
|
|
1350
|
-
subtasks = subtasks_result
|
|
1351
|
-
if subtasks:
|
|
1352
|
-
# If decomposition happened, the original task becomes a container.
|
|
1353
|
-
# We only execute its subtasks.
|
|
1354
|
-
self._pending_tasks.extendleft(reversed(subtasks))
|
|
1355
|
-
else:
|
|
1356
|
-
# If no decomposition, execute the original task.
|
|
1357
|
-
self._pending_tasks.append(task)
|
|
1358
1880
|
self.set_channel(TaskChannel())
|
|
1359
1881
|
|
|
1360
1882
|
# Save initial snapshot
|
|
@@ -1493,6 +2015,9 @@ class Workforce(BaseNode):
|
|
|
1493
2015
|
start_coroutine, self._loop
|
|
1494
2016
|
)
|
|
1495
2017
|
self._child_listening_tasks.append(child_task)
|
|
2018
|
+
else:
|
|
2019
|
+
# Close the coroutine to prevent RuntimeWarning
|
|
2020
|
+
start_coroutine.close()
|
|
1496
2021
|
else:
|
|
1497
2022
|
# Close the coroutine to prevent RuntimeWarning
|
|
1498
2023
|
start_coroutine.close()
|
|
@@ -1502,6 +2027,7 @@ class Workforce(BaseNode):
|
|
|
1502
2027
|
description: str,
|
|
1503
2028
|
worker: ChatAgent,
|
|
1504
2029
|
pool_max_size: int = DEFAULT_WORKER_POOL_SIZE,
|
|
2030
|
+
enable_workflow_memory: bool = False,
|
|
1505
2031
|
) -> Workforce:
|
|
1506
2032
|
r"""Add a worker node to the workforce that uses a single agent.
|
|
1507
2033
|
Can be called when workforce is paused to dynamically add workers.
|
|
@@ -1511,6 +2037,9 @@ class Workforce(BaseNode):
|
|
|
1511
2037
|
worker (ChatAgent): The agent to be added.
|
|
1512
2038
|
pool_max_size (int): Maximum size of the agent pool.
|
|
1513
2039
|
(default: :obj:`10`)
|
|
2040
|
+
enable_workflow_memory (bool): Whether to enable workflow memory
|
|
2041
|
+
accumulation. Set to True if you plan to call
|
|
2042
|
+
save_workflow_memories(). (default: :obj:`False`)
|
|
1514
2043
|
|
|
1515
2044
|
Returns:
|
|
1516
2045
|
Workforce: The workforce node itself.
|
|
@@ -1537,6 +2066,8 @@ class Workforce(BaseNode):
|
|
|
1537
2066
|
worker=worker,
|
|
1538
2067
|
pool_max_size=pool_max_size,
|
|
1539
2068
|
use_structured_output_handler=self.use_structured_output_handler,
|
|
2069
|
+
context_utility=None, # Will be set during save/load operations
|
|
2070
|
+
enable_workflow_memory=enable_workflow_memory,
|
|
1540
2071
|
)
|
|
1541
2072
|
self._children.append(worker_node)
|
|
1542
2073
|
|
|
@@ -1547,12 +2078,10 @@ class Workforce(BaseNode):
|
|
|
1547
2078
|
# If workforce is paused, start the worker's listening task
|
|
1548
2079
|
self._start_child_node_when_paused(worker_node.start())
|
|
1549
2080
|
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
role=worker_node.description,
|
|
1555
|
-
)
|
|
2081
|
+
self._notify_worker_created(
|
|
2082
|
+
worker_node,
|
|
2083
|
+
worker_type='SingleAgentWorker',
|
|
2084
|
+
)
|
|
1556
2085
|
return self
|
|
1557
2086
|
|
|
1558
2087
|
def add_role_playing_worker(
|
|
@@ -1626,12 +2155,10 @@ class Workforce(BaseNode):
|
|
|
1626
2155
|
# If workforce is paused, start the worker's listening task
|
|
1627
2156
|
self._start_child_node_when_paused(worker_node.start())
|
|
1628
2157
|
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
role=worker_node.description,
|
|
1634
|
-
)
|
|
2158
|
+
self._notify_worker_created(
|
|
2159
|
+
worker_node,
|
|
2160
|
+
worker_type='RolePlayingWorker',
|
|
2161
|
+
)
|
|
1635
2162
|
return self
|
|
1636
2163
|
|
|
1637
2164
|
def add_workforce(self, workforce: Workforce) -> Workforce:
|
|
@@ -1692,6 +2219,7 @@ class Workforce(BaseNode):
|
|
|
1692
2219
|
# Reset intervention state
|
|
1693
2220
|
self._state = WorkforceState.IDLE
|
|
1694
2221
|
self._stop_requested = False
|
|
2222
|
+
self._skip_requested = False
|
|
1695
2223
|
# Handle asyncio.Event in a thread-safe way
|
|
1696
2224
|
if self._loop and not self._loop.is_closed():
|
|
1697
2225
|
# If we have a loop, use it to set the event safely
|
|
@@ -1707,118 +2235,520 @@ class Workforce(BaseNode):
|
|
|
1707
2235
|
# No active loop, directly set the event
|
|
1708
2236
|
self._pause_event.set()
|
|
1709
2237
|
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
self.metrics_logger = WorkforceLogger(workforce_id=self.node_id)
|
|
1714
|
-
|
|
1715
|
-
@check_if_running(False)
|
|
1716
|
-
def set_channel(self, channel: TaskChannel) -> None:
|
|
1717
|
-
r"""Set the channel for the node and all the child nodes under it."""
|
|
1718
|
-
self._channel = channel
|
|
1719
|
-
for child in self._children:
|
|
1720
|
-
child.set_channel(channel)
|
|
2238
|
+
for cb in self._callbacks:
|
|
2239
|
+
if isinstance(cb, WorkforceMetrics):
|
|
2240
|
+
cb.reset_task_data()
|
|
1721
2241
|
|
|
1722
|
-
def
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
2242
|
+
def save_workflow_memories(
|
|
2243
|
+
self,
|
|
2244
|
+
session_id: Optional[str] = None,
|
|
2245
|
+
) -> Dict[str, str]:
|
|
2246
|
+
r"""Save workflow memories for all SingleAgentWorker instances in the
|
|
2247
|
+
workforce.
|
|
2248
|
+
|
|
2249
|
+
.. deprecated:: 0.2.80
|
|
2250
|
+
This synchronous method processes workers sequentially, which can
|
|
2251
|
+
be slow for multiple agents. Use
|
|
2252
|
+
:meth:`save_workflow_memories_async`
|
|
2253
|
+
instead for parallel processing and significantly better
|
|
2254
|
+
performance.
|
|
2255
|
+
|
|
2256
|
+
This method iterates through all child workers and triggers workflow
|
|
2257
|
+
saving for SingleAgentWorker instances using their
|
|
2258
|
+
save_workflow_memories()
|
|
2259
|
+
method.
|
|
2260
|
+
Other worker types are skipped.
|
|
1739
2261
|
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
2262
|
+
Args:
|
|
2263
|
+
session_id (Optional[str]): Custom session ID to use for saving
|
|
2264
|
+
workflows. If None, auto-generates a timestamped session ID.
|
|
2265
|
+
Useful for organizing workflows by project or context.
|
|
2266
|
+
(default: :obj:`None`)
|
|
1743
2267
|
|
|
1744
|
-
|
|
1745
|
-
|
|
2268
|
+
Returns:
|
|
2269
|
+
Dict[str, str]: Dictionary mapping worker node IDs to save results.
|
|
2270
|
+
Values are either file paths (success) or error messages
|
|
2271
|
+
(failure).
|
|
1746
2272
|
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
2273
|
+
Example:
|
|
2274
|
+
>>> workforce = Workforce("My Team")
|
|
2275
|
+
>>> # ... add workers and process tasks ...
|
|
2276
|
+
>>> # save with auto-generated session id
|
|
2277
|
+
>>> results = workforce.save_workflow_memories()
|
|
2278
|
+
>>> print(results)
|
|
2279
|
+
{'worker_123': '/path/to/developer_agent_workflow.md',
|
|
2280
|
+
'worker_456': 'error: No conversation context available'}
|
|
2281
|
+
>>> # save with custom project id
|
|
2282
|
+
>>> results = workforce.save_workflow_memories(
|
|
2283
|
+
... session_id="project_123"
|
|
2284
|
+
... )
|
|
2285
|
+
|
|
2286
|
+
Note:
|
|
2287
|
+
For better performance with multiple workers, use the async
|
|
2288
|
+
version::
|
|
2289
|
+
|
|
2290
|
+
results = await workforce.save_workflow_memories_async()
|
|
2291
|
+
|
|
2292
|
+
See Also:
|
|
2293
|
+
:meth:`save_workflow_memories_async`: Async version with parallel
|
|
2294
|
+
processing for significantly better performance.
|
|
2295
|
+
"""
|
|
2296
|
+
import warnings
|
|
2297
|
+
|
|
2298
|
+
warnings.warn(
|
|
2299
|
+
"save_workflow_memories() is slow for multiple workers. "
|
|
2300
|
+
"Consider using save_workflow_memories_async() for parallel "
|
|
2301
|
+
"processing and ~4x faster performance.",
|
|
2302
|
+
DeprecationWarning,
|
|
2303
|
+
stacklevel=2,
|
|
2304
|
+
)
|
|
2305
|
+
results = {}
|
|
1751
2306
|
|
|
1752
|
-
|
|
2307
|
+
# Get or create shared context utility for this save operation
|
|
2308
|
+
shared_context_utility = self._get_or_create_shared_context_utility(
|
|
2309
|
+
session_id=session_id
|
|
2310
|
+
)
|
|
1753
2311
|
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
2312
|
+
for child in self._children:
|
|
2313
|
+
if isinstance(child, SingleAgentWorker):
|
|
2314
|
+
try:
|
|
2315
|
+
# Set the shared context utility for this operation
|
|
2316
|
+
child._shared_context_utility = shared_context_utility
|
|
2317
|
+
child.worker.set_context_utility(shared_context_utility)
|
|
2318
|
+
|
|
2319
|
+
result = child.save_workflow_memories()
|
|
2320
|
+
if result.get("status") == "success":
|
|
2321
|
+
results[child.node_id] = result.get(
|
|
2322
|
+
"file_path", "unknown_path"
|
|
2323
|
+
)
|
|
2324
|
+
else:
|
|
2325
|
+
# Error: check if there's a separate message field,
|
|
2326
|
+
# otherwise use the status itself
|
|
2327
|
+
error_msg = result.get(
|
|
2328
|
+
"message", result.get("status", "Unknown error")
|
|
2329
|
+
)
|
|
2330
|
+
results[child.node_id] = f"error: {error_msg}"
|
|
1757
2331
|
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
toolkit_name = tool.func.__self__.__class__.__name__
|
|
2332
|
+
except Exception as e:
|
|
2333
|
+
results[child.node_id] = f"error: {e!s}"
|
|
1761
2334
|
else:
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
toolkit_tools[toolkit_name].append(tool_name)
|
|
2335
|
+
# Skip non-SingleAgentWorker types
|
|
2336
|
+
results[child.node_id] = (
|
|
2337
|
+
f"skipped: {type(child).__name__} not supported"
|
|
2338
|
+
)
|
|
1767
2339
|
|
|
1768
|
-
|
|
2340
|
+
logger.info(f"Workflow save completed for {len(results)} workers")
|
|
2341
|
+
return results
|
|
1769
2342
|
|
|
1770
|
-
def
|
|
1771
|
-
|
|
2343
|
+
async def save_workflow_memories_async(
|
|
2344
|
+
self,
|
|
2345
|
+
session_id: Optional[str] = None,
|
|
2346
|
+
) -> Dict[str, str]:
|
|
2347
|
+
r"""Asynchronously save workflow memories for all SingleAgentWorker
|
|
2348
|
+
instances in the workforce.
|
|
1772
2349
|
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
valid_worker_ids = {child.node_id for child in self._children}
|
|
1777
|
-
return valid_worker_ids
|
|
2350
|
+
This is the async version of save_workflow_memories() that parallelizes
|
|
2351
|
+
LLM summarization calls across all workers using asyncio.gather(),
|
|
2352
|
+
significantly reducing total save time.
|
|
1778
2353
|
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
feedback in the case of invalid worker IDs.
|
|
2354
|
+
This method iterates through all child workers and triggers workflow
|
|
2355
|
+
saving for SingleAgentWorker instances using their
|
|
2356
|
+
save_workflow_memories_async() method in parallel.
|
|
2357
|
+
Other worker types are skipped.
|
|
1784
2358
|
|
|
1785
2359
|
Args:
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
|
|
2360
|
+
session_id (Optional[str]): Custom session ID to use for saving
|
|
2361
|
+
workflows. If None, auto-generates a timestamped session ID.
|
|
2362
|
+
Useful for organizing workflows by project or context.
|
|
2363
|
+
(default: :obj:`None`)
|
|
1789
2364
|
|
|
1790
2365
|
Returns:
|
|
1791
|
-
|
|
2366
|
+
Dict[str, str]: Dictionary mapping worker node IDs to save results.
|
|
2367
|
+
Values are either file paths (success) or error messages
|
|
2368
|
+
(failure).
|
|
2369
|
+
|
|
2370
|
+
Example:
|
|
2371
|
+
>>> workforce = Workforce("My Team")
|
|
2372
|
+
>>> # ... add workers and process tasks ...
|
|
2373
|
+
>>> # save with parallel summarization (faster)
|
|
2374
|
+
>>> results = await workforce.save_workflow_memories_async()
|
|
2375
|
+
>>> print(results)
|
|
2376
|
+
{'worker_123': '/path/to/developer_agent_workflow.md',
|
|
2377
|
+
'worker_456': '/path/to/search_agent_workflow.md',
|
|
2378
|
+
'worker_789': '/path/to/document_agent_workflow.md'}
|
|
1792
2379
|
"""
|
|
1793
|
-
|
|
1794
|
-
tasks_info = ""
|
|
1795
|
-
for task in tasks:
|
|
1796
|
-
tasks_info += f"Task ID: {task.id}\n"
|
|
1797
|
-
tasks_info += f"Content: {task.content}\n"
|
|
1798
|
-
if task.additional_info:
|
|
1799
|
-
tasks_info += f"Additional Info: {task.additional_info}\n"
|
|
1800
|
-
tasks_info += "---\n"
|
|
2380
|
+
import asyncio
|
|
1801
2381
|
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
2382
|
+
results = {}
|
|
2383
|
+
|
|
2384
|
+
# Get or create shared context utility for this save operation
|
|
2385
|
+
shared_context_utility = self._get_or_create_shared_context_utility(
|
|
2386
|
+
session_id=session_id
|
|
1807
2387
|
)
|
|
1808
2388
|
|
|
1809
|
-
#
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
2389
|
+
# Prepare tasks for parallel execution
|
|
2390
|
+
async def save_single_worker(
|
|
2391
|
+
child: BaseNode,
|
|
2392
|
+
) -> tuple[str, str]:
|
|
2393
|
+
"""Save workflow for a single worker, then return (node_id,
|
|
2394
|
+
result)."""
|
|
2395
|
+
if isinstance(child, SingleAgentWorker):
|
|
2396
|
+
try:
|
|
2397
|
+
# Set the shared context utility for this operation
|
|
2398
|
+
child._shared_context_utility = shared_context_utility
|
|
2399
|
+
child.worker.set_context_utility(shared_context_utility)
|
|
2400
|
+
|
|
2401
|
+
result = await child.save_workflow_memories_async()
|
|
2402
|
+
if result.get("status") == "success":
|
|
2403
|
+
return (
|
|
2404
|
+
child.node_id,
|
|
2405
|
+
result.get("file_path", "unknown_path"),
|
|
2406
|
+
)
|
|
2407
|
+
else:
|
|
2408
|
+
# Error: check if there's a separate message field,
|
|
2409
|
+
# otherwise use the status itself
|
|
2410
|
+
error_msg = result.get(
|
|
2411
|
+
"message", result.get("status", "Unknown error")
|
|
2412
|
+
)
|
|
2413
|
+
return (child.node_id, f"error: {error_msg}")
|
|
1819
2414
|
|
|
1820
|
-
|
|
1821
|
-
|
|
2415
|
+
except Exception as e:
|
|
2416
|
+
return (child.node_id, f"error: {e!s}")
|
|
2417
|
+
else:
|
|
2418
|
+
# Skip non-SingleAgentWorker types
|
|
2419
|
+
return (
|
|
2420
|
+
child.node_id,
|
|
2421
|
+
f"skipped: {type(child).__name__} not supported",
|
|
2422
|
+
)
|
|
2423
|
+
|
|
2424
|
+
# Create tasks for all workers
|
|
2425
|
+
tasks = [save_single_worker(child) for child in self._children]
|
|
2426
|
+
|
|
2427
|
+
# Execute all tasks in parallel using asyncio.gather()
|
|
2428
|
+
parallel_results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
2429
|
+
|
|
2430
|
+
# Process results
|
|
2431
|
+
for result in parallel_results:
|
|
2432
|
+
if isinstance(result, Exception):
|
|
2433
|
+
# Handle any unexpected exceptions
|
|
2434
|
+
logger.error(
|
|
2435
|
+
f"Unexpected error during workflow save: {result}"
|
|
2436
|
+
)
|
|
2437
|
+
results["unknown"] = f"error: {result!s}"
|
|
2438
|
+
elif isinstance(result, tuple) and len(result) == 2:
|
|
2439
|
+
# Successfully got (node_id, save_result) tuple
|
|
2440
|
+
node_id, save_result = result
|
|
2441
|
+
results[node_id] = save_result
|
|
2442
|
+
else:
|
|
2443
|
+
# Unexpected result format
|
|
2444
|
+
logger.error(f"Unexpected result format: {result}")
|
|
2445
|
+
results["unknown"] = "error: unexpected result format"
|
|
2446
|
+
|
|
2447
|
+
logger.info(
|
|
2448
|
+
f"Workflow save completed for {len(results)} workers "
|
|
2449
|
+
f"(parallelized)"
|
|
2450
|
+
)
|
|
2451
|
+
return results
|
|
2452
|
+
|
|
2453
|
+
def load_workflow_memories(
|
|
2454
|
+
self,
|
|
2455
|
+
session_id: Optional[str] = None,
|
|
2456
|
+
worker_max_workflows: int = 3,
|
|
2457
|
+
coordinator_max_workflows: int = 5,
|
|
2458
|
+
task_agent_max_workflows: int = 3,
|
|
2459
|
+
) -> Dict[str, bool]:
|
|
2460
|
+
r"""Load workflow memories for all SingleAgentWorker instances in the
|
|
2461
|
+
workforce.
|
|
2462
|
+
|
|
2463
|
+
This method iterates through all child workers and loads relevant
|
|
2464
|
+
workflow files for SingleAgentWorker instances using their
|
|
2465
|
+
load_workflow_memories()
|
|
2466
|
+
method. Workers match files based on their description names.
|
|
2467
|
+
|
|
2468
|
+
Args:
|
|
2469
|
+
session_id (Optional[str]): Specific workforce session ID to load
|
|
2470
|
+
from. If None, searches across all sessions.
|
|
2471
|
+
(default: :obj:`None`)
|
|
2472
|
+
worker_max_workflows (int): Maximum number of workflow files to
|
|
2473
|
+
load per worker agent. (default: :obj:`3`)
|
|
2474
|
+
coordinator_max_workflows (int): Maximum number of workflow files
|
|
2475
|
+
to load for the coordinator agent. (default: :obj:`5`)
|
|
2476
|
+
task_agent_max_workflows (int): Maximum number of workflow files
|
|
2477
|
+
to load for the task planning agent. (default: :obj:`3`)
|
|
2478
|
+
|
|
2479
|
+
Returns:
|
|
2480
|
+
Dict[str, bool]: Dictionary mapping worker node IDs to load
|
|
2481
|
+
success status.
|
|
2482
|
+
True indicates successful loading, False indicates failure.
|
|
2483
|
+
|
|
2484
|
+
Example:
|
|
2485
|
+
>>> workforce = Workforce("My Team")
|
|
2486
|
+
>>> workforce.add_single_agent_worker(
|
|
2487
|
+
... "data_analyst", analyst_agent
|
|
2488
|
+
... )
|
|
2489
|
+
>>> success_status = workforce.load_workflow_memories(
|
|
2490
|
+
... worker_max_workflows=5,
|
|
2491
|
+
... coordinator_max_workflows=10,
|
|
2492
|
+
... task_agent_max_workflows=5
|
|
2493
|
+
... )
|
|
2494
|
+
>>> print(success_status)
|
|
2495
|
+
{'worker_123': True} # Successfully loaded workflows for
|
|
2496
|
+
# data_analyst
|
|
2497
|
+
"""
|
|
2498
|
+
results = {}
|
|
2499
|
+
|
|
2500
|
+
# For loading, we don't create a new session - instead we search
|
|
2501
|
+
# existing ones
|
|
2502
|
+
# Each worker will search independently across all existing sessions
|
|
2503
|
+
|
|
2504
|
+
# First, load workflows for SingleAgentWorker instances
|
|
2505
|
+
for child in self._children:
|
|
2506
|
+
if isinstance(child, SingleAgentWorker):
|
|
2507
|
+
try:
|
|
2508
|
+
# For loading, don't set shared context utility
|
|
2509
|
+
# Let each worker search across existing sessions
|
|
2510
|
+
success = child.load_workflow_memories(
|
|
2511
|
+
max_workflows=worker_max_workflows,
|
|
2512
|
+
session_id=session_id,
|
|
2513
|
+
)
|
|
2514
|
+
results[child.node_id] = success
|
|
2515
|
+
|
|
2516
|
+
except Exception as e:
|
|
2517
|
+
logger.error(
|
|
2518
|
+
f"Failed to load workflow for {child.node_id}: {e!s}"
|
|
2519
|
+
)
|
|
2520
|
+
results[child.node_id] = False
|
|
2521
|
+
else:
|
|
2522
|
+
# Skip non-SingleAgentWorker types
|
|
2523
|
+
results[child.node_id] = False
|
|
2524
|
+
|
|
2525
|
+
# Load aggregated workflow summaries for coordinator and task agents
|
|
2526
|
+
self._load_management_agent_workflows(
|
|
2527
|
+
coordinator_max_workflows, task_agent_max_workflows, session_id
|
|
2528
|
+
)
|
|
2529
|
+
|
|
2530
|
+
logger.info(f"Workflow load completed for {len(results)} workers")
|
|
2531
|
+
return results
|
|
2532
|
+
|
|
2533
|
+
def _load_management_agent_workflows(
|
|
2534
|
+
self,
|
|
2535
|
+
coordinator_max_workflows: int,
|
|
2536
|
+
task_agent_max_workflows: int,
|
|
2537
|
+
session_id: Optional[str] = None,
|
|
2538
|
+
) -> None:
|
|
2539
|
+
r"""Load workflow summaries for coordinator and task planning agents.
|
|
2540
|
+
|
|
2541
|
+
This method loads aggregated workflow summaries to help:
|
|
2542
|
+
- Coordinator agent: understand task assignment patterns and worker
|
|
2543
|
+
capabilities
|
|
2544
|
+
- Task agent: understand task decomposition patterns and
|
|
2545
|
+
successful strategies
|
|
2546
|
+
|
|
2547
|
+
Args:
|
|
2548
|
+
coordinator_max_workflows (int): Maximum number of workflow files
|
|
2549
|
+
to load for the coordinator agent.
|
|
2550
|
+
task_agent_max_workflows (int): Maximum number of workflow files
|
|
2551
|
+
to load for the task planning agent.
|
|
2552
|
+
session_id (Optional[str]): Specific session ID to load from.
|
|
2553
|
+
If None, searches across all sessions.
|
|
2554
|
+
"""
|
|
2555
|
+
try:
|
|
2556
|
+
import glob
|
|
2557
|
+
import os
|
|
2558
|
+
from pathlib import Path
|
|
2559
|
+
|
|
2560
|
+
from camel.utils.context_utils import ContextUtility
|
|
2561
|
+
|
|
2562
|
+
# For loading management workflows, search across all sessions
|
|
2563
|
+
camel_workdir = os.environ.get("CAMEL_WORKDIR")
|
|
2564
|
+
if camel_workdir:
|
|
2565
|
+
base_dir = os.path.join(camel_workdir, "workforce_workflows")
|
|
2566
|
+
else:
|
|
2567
|
+
base_dir = "workforce_workflows"
|
|
2568
|
+
|
|
2569
|
+
# Search for workflow files in specified or all session directories
|
|
2570
|
+
if session_id:
|
|
2571
|
+
search_path = str(
|
|
2572
|
+
Path(base_dir) / session_id / "*_workflow*.md"
|
|
2573
|
+
)
|
|
2574
|
+
else:
|
|
2575
|
+
search_path = str(Path(base_dir) / "*" / "*_workflow*.md")
|
|
2576
|
+
workflow_files = glob.glob(search_path)
|
|
2577
|
+
|
|
2578
|
+
if not workflow_files:
|
|
2579
|
+
logger.info(
|
|
2580
|
+
"No workflow files found for management agent context"
|
|
2581
|
+
)
|
|
2582
|
+
return
|
|
2583
|
+
|
|
2584
|
+
# Sort by modification time (most recent first)
|
|
2585
|
+
workflow_files.sort(
|
|
2586
|
+
key=lambda x: os.path.getmtime(x), reverse=True
|
|
2587
|
+
)
|
|
2588
|
+
|
|
2589
|
+
# Load workflows for coordinator agent
|
|
2590
|
+
coordinator_loaded = 0
|
|
2591
|
+
for file_path in workflow_files[:coordinator_max_workflows]:
|
|
2592
|
+
try:
|
|
2593
|
+
filename = os.path.basename(file_path).replace('.md', '')
|
|
2594
|
+
session_dir = os.path.dirname(file_path)
|
|
2595
|
+
session_id = os.path.basename(session_dir)
|
|
2596
|
+
|
|
2597
|
+
# Use shared context utility with specific session
|
|
2598
|
+
temp_utility = ContextUtility.get_workforce_shared(
|
|
2599
|
+
session_id
|
|
2600
|
+
)
|
|
2601
|
+
|
|
2602
|
+
status = temp_utility.load_markdown_context_to_memory(
|
|
2603
|
+
self.coordinator_agent, filename
|
|
2604
|
+
)
|
|
2605
|
+
if "Context appended" in status:
|
|
2606
|
+
coordinator_loaded += 1
|
|
2607
|
+
except Exception as e:
|
|
2608
|
+
logger.warning(
|
|
2609
|
+
f"Failed to load coordinator workflow {file_path}: {e}"
|
|
2610
|
+
)
|
|
2611
|
+
|
|
2612
|
+
# Load workflows for task agent
|
|
2613
|
+
task_agent_loaded = 0
|
|
2614
|
+
for file_path in workflow_files[:task_agent_max_workflows]:
|
|
2615
|
+
try:
|
|
2616
|
+
filename = os.path.basename(file_path).replace('.md', '')
|
|
2617
|
+
session_dir = os.path.dirname(file_path)
|
|
2618
|
+
session_id = os.path.basename(session_dir)
|
|
2619
|
+
|
|
2620
|
+
# Use shared context utility with specific session
|
|
2621
|
+
temp_utility = ContextUtility.get_workforce_shared(
|
|
2622
|
+
session_id
|
|
2623
|
+
)
|
|
2624
|
+
|
|
2625
|
+
status = temp_utility.load_markdown_context_to_memory(
|
|
2626
|
+
self.task_agent, filename
|
|
2627
|
+
)
|
|
2628
|
+
if "Context appended" in status:
|
|
2629
|
+
task_agent_loaded += 1
|
|
2630
|
+
except Exception as e:
|
|
2631
|
+
logger.warning(
|
|
2632
|
+
f"Failed to load task agent workflow {file_path}: {e}"
|
|
2633
|
+
)
|
|
2634
|
+
|
|
2635
|
+
logger.info(
|
|
2636
|
+
f"Loaded {coordinator_loaded} workflows for coordinator, "
|
|
2637
|
+
f"{task_agent_loaded} workflows for task agent"
|
|
2638
|
+
)
|
|
2639
|
+
|
|
2640
|
+
except Exception as e:
|
|
2641
|
+
logger.error(f"Error loading management agent workflows: {e}")
|
|
2642
|
+
|
|
2643
|
+
@check_if_running(False)
|
|
2644
|
+
def set_channel(self, channel: TaskChannel) -> None:
|
|
2645
|
+
r"""Set the channel for the node and all the child nodes under it."""
|
|
2646
|
+
self._channel = channel
|
|
2647
|
+
for child in self._children:
|
|
2648
|
+
child.set_channel(channel)
|
|
2649
|
+
|
|
2650
|
+
def _get_child_nodes_info(self) -> str:
|
|
2651
|
+
r"""Get the information of all the child nodes under this node."""
|
|
2652
|
+
return "".join(
|
|
2653
|
+
f"<{child.node_id}>:<{child.description}>:<{self._get_node_info(child)}>\n"
|
|
2654
|
+
for child in self._children
|
|
2655
|
+
)
|
|
2656
|
+
|
|
2657
|
+
def _get_node_info(self, node) -> str:
|
|
2658
|
+
r"""Get descriptive information for a specific node type."""
|
|
2659
|
+
if isinstance(node, Workforce):
|
|
2660
|
+
return "A Workforce node"
|
|
2661
|
+
elif isinstance(node, SingleAgentWorker):
|
|
2662
|
+
return self._get_single_agent_toolkit_info(node)
|
|
2663
|
+
elif isinstance(node, RolePlayingWorker):
|
|
2664
|
+
return "A Role playing node"
|
|
2665
|
+
else:
|
|
2666
|
+
return "Unknown node"
|
|
2667
|
+
|
|
2668
|
+
def _get_single_agent_toolkit_info(
|
|
2669
|
+
self, worker: 'SingleAgentWorker'
|
|
2670
|
+
) -> str:
|
|
2671
|
+
r"""Get formatted information for a SingleAgentWorker node."""
|
|
2672
|
+
toolkit_tools = self._group_tools_by_toolkit(worker.worker.tool_dict)
|
|
2673
|
+
|
|
2674
|
+
if not toolkit_tools:
|
|
2675
|
+
return ""
|
|
2676
|
+
|
|
2677
|
+
toolkit_info = []
|
|
2678
|
+
for toolkit_name, tools in sorted(toolkit_tools.items()):
|
|
2679
|
+
tools_str = ', '.join(sorted(tools))
|
|
2680
|
+
toolkit_info.append(f"{toolkit_name}({tools_str})")
|
|
2681
|
+
|
|
2682
|
+
return ", ".join(toolkit_info)
|
|
2683
|
+
|
|
2684
|
+
def _group_tools_by_toolkit(self, tool_dict: dict) -> dict[str, list[str]]:
|
|
2685
|
+
r"""Group tools by their parent toolkit class names."""
|
|
2686
|
+
toolkit_tools: dict[str, list[str]] = {}
|
|
2687
|
+
|
|
2688
|
+
for tool_name, tool in tool_dict.items():
|
|
2689
|
+
if hasattr(tool.func, '__self__'):
|
|
2690
|
+
toolkit_name = tool.func.__self__.__class__.__name__
|
|
2691
|
+
else:
|
|
2692
|
+
toolkit_name = "Standalone"
|
|
2693
|
+
|
|
2694
|
+
if toolkit_name not in toolkit_tools:
|
|
2695
|
+
toolkit_tools[toolkit_name] = []
|
|
2696
|
+
toolkit_tools[toolkit_name].append(tool_name)
|
|
2697
|
+
|
|
2698
|
+
return toolkit_tools
|
|
2699
|
+
|
|
2700
|
+
def _get_valid_worker_ids(self) -> set:
|
|
2701
|
+
r"""Get all valid worker IDs from child nodes.
|
|
2702
|
+
|
|
2703
|
+
Returns:
|
|
2704
|
+
set: Set of valid worker IDs that can be assigned tasks.
|
|
2705
|
+
"""
|
|
2706
|
+
valid_worker_ids = {child.node_id for child in self._children}
|
|
2707
|
+
return valid_worker_ids
|
|
2708
|
+
|
|
2709
|
+
def _call_coordinator_for_assignment(
|
|
2710
|
+
self, tasks: List[Task], invalid_ids: Optional[List[str]] = None
|
|
2711
|
+
) -> TaskAssignResult:
|
|
2712
|
+
r"""Call coordinator agent to assign tasks with optional validation
|
|
2713
|
+
feedback in the case of invalid worker IDs.
|
|
2714
|
+
|
|
2715
|
+
Args:
|
|
2716
|
+
tasks (List[Task]): Tasks to assign.
|
|
2717
|
+
invalid_ids (List[str], optional): Invalid worker IDs from previous
|
|
2718
|
+
attempt (if any).
|
|
2719
|
+
|
|
2720
|
+
Returns:
|
|
2721
|
+
TaskAssignResult: Assignment result from coordinator.
|
|
2722
|
+
"""
|
|
2723
|
+
# format tasks information for the prompt
|
|
2724
|
+
tasks_info = ""
|
|
2725
|
+
for task in tasks:
|
|
2726
|
+
tasks_info += f"Task ID: {task.id}\n"
|
|
2727
|
+
tasks_info += f"Content: {task.content}\n"
|
|
2728
|
+
if task.additional_info:
|
|
2729
|
+
tasks_info += f"Additional Info: {task.additional_info}\n"
|
|
2730
|
+
tasks_info += "---\n"
|
|
2731
|
+
|
|
2732
|
+
prompt = str(
|
|
2733
|
+
ASSIGN_TASK_PROMPT.format(
|
|
2734
|
+
tasks_info=tasks_info,
|
|
2735
|
+
child_nodes_info=self._get_child_nodes_info(),
|
|
2736
|
+
)
|
|
2737
|
+
)
|
|
2738
|
+
|
|
2739
|
+
# add feedback if this is a retry
|
|
2740
|
+
if invalid_ids:
|
|
2741
|
+
valid_worker_ids = list(self._get_valid_worker_ids())
|
|
2742
|
+
feedback = (
|
|
2743
|
+
f"VALIDATION ERROR: The following worker IDs are invalid: "
|
|
2744
|
+
f"{invalid_ids}. "
|
|
2745
|
+
f"VALID WORKER IDS: {valid_worker_ids}. "
|
|
2746
|
+
f"Please reassign ONLY the above tasks using these valid IDs."
|
|
2747
|
+
)
|
|
2748
|
+
prompt = prompt + f"\n\n{feedback}"
|
|
2749
|
+
|
|
2750
|
+
# Check if we should use structured handler
|
|
2751
|
+
if self.use_structured_output_handler:
|
|
1822
2752
|
# Use structured handler for prompt-based extraction
|
|
1823
2753
|
enhanced_prompt = (
|
|
1824
2754
|
self.structured_handler.generate_structured_prompt(
|
|
@@ -2057,8 +2987,40 @@ class Workforce(BaseNode):
|
|
|
2057
2987
|
TaskAssignResult: Assignment result containing task assignments
|
|
2058
2988
|
with their dependencies.
|
|
2059
2989
|
"""
|
|
2990
|
+
# Wait for workers to be ready before assignment with exponential
|
|
2991
|
+
# backoff
|
|
2992
|
+
worker_readiness_timeout = 2.0 # Maximum wait time in seconds
|
|
2993
|
+
worker_readiness_check_interval = 0.05 # Initial check interval
|
|
2994
|
+
start_time = time.time()
|
|
2995
|
+
check_interval = worker_readiness_check_interval
|
|
2996
|
+
backoff_multiplier = 1.5 # Exponential backoff factor
|
|
2997
|
+
max_interval = 0.5 # Cap the maximum interval
|
|
2998
|
+
|
|
2999
|
+
while (time.time() - start_time) < worker_readiness_timeout:
|
|
3000
|
+
valid_worker_ids = self._get_valid_worker_ids()
|
|
3001
|
+
if len(valid_worker_ids) > 0:
|
|
3002
|
+
elapsed = time.time() - start_time
|
|
3003
|
+
logger.debug(
|
|
3004
|
+
f"Workers ready after {elapsed:.3f}s: "
|
|
3005
|
+
f"{len(valid_worker_ids)} workers available"
|
|
3006
|
+
)
|
|
3007
|
+
break
|
|
3008
|
+
|
|
3009
|
+
await asyncio.sleep(check_interval)
|
|
3010
|
+
# Exponential backoff with cap
|
|
3011
|
+
check_interval = min(
|
|
3012
|
+
check_interval * backoff_multiplier, max_interval
|
|
3013
|
+
)
|
|
3014
|
+
else:
|
|
3015
|
+
# Timeout reached, log warning but continue
|
|
3016
|
+
logger.warning(
|
|
3017
|
+
f"Worker readiness timeout after "
|
|
3018
|
+
f"{worker_readiness_timeout}s, "
|
|
3019
|
+
f"proceeding with {len(self._children)} children"
|
|
3020
|
+
)
|
|
3021
|
+
valid_worker_ids = self._get_valid_worker_ids()
|
|
3022
|
+
|
|
2060
3023
|
self.coordinator_agent.reset()
|
|
2061
|
-
valid_worker_ids = self._get_valid_worker_ids()
|
|
2062
3024
|
|
|
2063
3025
|
logger.debug(
|
|
2064
3026
|
f"Sending batch assignment request to coordinator "
|
|
@@ -2092,7 +3054,24 @@ class Workforce(BaseNode):
|
|
|
2092
3054
|
invalid_assignments, tasks, valid_worker_ids
|
|
2093
3055
|
)
|
|
2094
3056
|
)
|
|
2095
|
-
|
|
3057
|
+
|
|
3058
|
+
# Combine assignments with deduplication, prioritizing retry results
|
|
3059
|
+
assignment_map = {a.task_id: a for a in valid_assignments}
|
|
3060
|
+
assignment_map.update(
|
|
3061
|
+
{a.task_id: a for a in retry_and_fallback_assignments}
|
|
3062
|
+
)
|
|
3063
|
+
all_assignments = list(assignment_map.values())
|
|
3064
|
+
|
|
3065
|
+
# Log any overwrites for debugging
|
|
3066
|
+
valid_task_ids = {a.task_id for a in valid_assignments}
|
|
3067
|
+
retry_task_ids = {a.task_id for a in retry_and_fallback_assignments}
|
|
3068
|
+
overlap_task_ids = valid_task_ids & retry_task_ids
|
|
3069
|
+
|
|
3070
|
+
if overlap_task_ids:
|
|
3071
|
+
logger.warning(
|
|
3072
|
+
f"Retry assignments overrode {len(overlap_task_ids)} "
|
|
3073
|
+
f"valid assignments for tasks: {sorted(overlap_task_ids)}"
|
|
3074
|
+
)
|
|
2096
3075
|
|
|
2097
3076
|
# Update Task.dependencies for all final assignments
|
|
2098
3077
|
self._update_task_dependencies_from_assignments(all_assignments, tasks)
|
|
@@ -2105,10 +3084,11 @@ class Workforce(BaseNode):
|
|
|
2105
3084
|
|
|
2106
3085
|
task.assigned_worker_id = assignee_id
|
|
2107
3086
|
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
3087
|
+
task_started_event = TaskStartedEvent(
|
|
3088
|
+
task_id=task.id, worker_id=assignee_id
|
|
3089
|
+
)
|
|
3090
|
+
for cb in self._callbacks:
|
|
3091
|
+
cb.log_task_started(task_started_event)
|
|
2112
3092
|
|
|
2113
3093
|
try:
|
|
2114
3094
|
await self._channel.post_task(task, self.node_id, assignee_id)
|
|
@@ -2140,10 +3120,12 @@ class Workforce(BaseNode):
|
|
|
2140
3120
|
Returns:
|
|
2141
3121
|
Worker: The created worker node.
|
|
2142
3122
|
"""
|
|
2143
|
-
prompt =
|
|
2144
|
-
|
|
2145
|
-
|
|
2146
|
-
|
|
3123
|
+
prompt = str(
|
|
3124
|
+
CREATE_NODE_PROMPT.format(
|
|
3125
|
+
content=task.content,
|
|
3126
|
+
child_nodes_info=self._get_child_nodes_info(),
|
|
3127
|
+
additional_info=task.additional_info,
|
|
3128
|
+
)
|
|
2147
3129
|
)
|
|
2148
3130
|
# Check if we should use structured handler
|
|
2149
3131
|
if self.use_structured_output_handler:
|
|
@@ -2170,8 +3152,7 @@ class Workforce(BaseNode):
|
|
|
2170
3152
|
"worker creation"
|
|
2171
3153
|
)
|
|
2172
3154
|
new_node_conf = WorkerConf(
|
|
2173
|
-
description=f"Fallback worker for task: "
|
|
2174
|
-
f"{task.content}",
|
|
3155
|
+
description=f"Fallback worker for task: {task.content}",
|
|
2175
3156
|
role="General Assistant",
|
|
2176
3157
|
sys_msg="You are a general assistant that can help "
|
|
2177
3158
|
"with various tasks.",
|
|
@@ -2181,7 +3162,7 @@ class Workforce(BaseNode):
|
|
|
2181
3162
|
response.msg.content,
|
|
2182
3163
|
schema=WorkerConf,
|
|
2183
3164
|
fallback_values={
|
|
2184
|
-
"description": f"Worker for task:
|
|
3165
|
+
"description": f"Worker for task: {task.content}",
|
|
2185
3166
|
"role": "Task Specialist",
|
|
2186
3167
|
"sys_msg": f"You are a specialist for: {task.content}",
|
|
2187
3168
|
},
|
|
@@ -2209,8 +3190,7 @@ class Workforce(BaseNode):
|
|
|
2209
3190
|
)
|
|
2210
3191
|
# Create a fallback worker configuration
|
|
2211
3192
|
new_node_conf = WorkerConf(
|
|
2212
|
-
description=f"Fallback worker for "
|
|
2213
|
-
f"task: {task.content}",
|
|
3193
|
+
description=f"Fallback worker for task: {task.content}",
|
|
2214
3194
|
role="General Assistant",
|
|
2215
3195
|
sys_msg="You are a general assistant that can help "
|
|
2216
3196
|
"with various tasks.",
|
|
@@ -2254,13 +3234,13 @@ class Workforce(BaseNode):
|
|
|
2254
3234
|
print(f"{Fore.CYAN}{new_node} created.{Fore.RESET}")
|
|
2255
3235
|
|
|
2256
3236
|
self._children.append(new_node)
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
3237
|
+
|
|
3238
|
+
self._notify_worker_created(
|
|
3239
|
+
new_node,
|
|
3240
|
+
worker_type='SingleAgentWorker',
|
|
3241
|
+
role=new_node_conf.role,
|
|
3242
|
+
metadata={'description': new_node_conf.description},
|
|
3243
|
+
)
|
|
2264
3244
|
self._child_listening_tasks.append(
|
|
2265
3245
|
asyncio.create_task(new_node.start())
|
|
2266
3246
|
)
|
|
@@ -2304,13 +3284,27 @@ class Workforce(BaseNode):
|
|
|
2304
3284
|
r"""Get the task that's published by this node and just get returned
|
|
2305
3285
|
from the assignee. Includes timeout handling to prevent indefinite
|
|
2306
3286
|
waiting.
|
|
3287
|
+
|
|
3288
|
+
Raises:
|
|
3289
|
+
asyncio.TimeoutError: If waiting for task exceeds timeout
|
|
2307
3290
|
"""
|
|
2308
3291
|
try:
|
|
2309
3292
|
# Add timeout to prevent indefinite waiting
|
|
2310
3293
|
return await asyncio.wait_for(
|
|
2311
3294
|
self._channel.get_returned_task_by_publisher(self.node_id),
|
|
2312
|
-
timeout=
|
|
3295
|
+
timeout=self.task_timeout_seconds,
|
|
2313
3296
|
)
|
|
3297
|
+
except asyncio.TimeoutError:
|
|
3298
|
+
# Re-raise timeout errors to be handled by caller
|
|
3299
|
+
# This prevents hanging when tasks are stuck
|
|
3300
|
+
logger.warning(
|
|
3301
|
+
f"Timeout waiting for task return in workforce "
|
|
3302
|
+
f"{self.node_id}. "
|
|
3303
|
+
f"Timeout: {self.task_timeout_seconds}s, "
|
|
3304
|
+
f"Pending tasks: {len(self._pending_tasks)}, "
|
|
3305
|
+
f"In-flight tasks: {self._in_flight_tasks}"
|
|
3306
|
+
)
|
|
3307
|
+
raise
|
|
2314
3308
|
except Exception as e:
|
|
2315
3309
|
error_msg = (
|
|
2316
3310
|
f"Error getting returned task {e} in "
|
|
@@ -2329,7 +3323,15 @@ class Workforce(BaseNode):
|
|
|
2329
3323
|
tasks_to_assign = [
|
|
2330
3324
|
task
|
|
2331
3325
|
for task in self._pending_tasks
|
|
2332
|
-
if
|
|
3326
|
+
if (
|
|
3327
|
+
task.id not in self._task_dependencies
|
|
3328
|
+
and (
|
|
3329
|
+
task.additional_info is None
|
|
3330
|
+
or not task.additional_info.get(
|
|
3331
|
+
"_needs_decomposition", False
|
|
3332
|
+
)
|
|
3333
|
+
)
|
|
3334
|
+
)
|
|
2333
3335
|
]
|
|
2334
3336
|
if tasks_to_assign:
|
|
2335
3337
|
logger.debug(
|
|
@@ -2339,22 +3341,24 @@ class Workforce(BaseNode):
|
|
|
2339
3341
|
batch_result = await self._find_assignee(tasks_to_assign)
|
|
2340
3342
|
logger.debug(
|
|
2341
3343
|
f"Coordinator returned assignments:\n"
|
|
2342
|
-
f"{json.dumps(batch_result.
|
|
3344
|
+
f"{json.dumps(batch_result.model_dump(), indent=2)}"
|
|
2343
3345
|
)
|
|
2344
3346
|
for assignment in batch_result.assignments:
|
|
2345
3347
|
self._task_dependencies[assignment.task_id] = (
|
|
2346
3348
|
assignment.dependencies
|
|
2347
3349
|
)
|
|
2348
3350
|
self._assignees[assignment.task_id] = assignment.assignee_id
|
|
2349
|
-
|
|
3351
|
+
|
|
3352
|
+
task_assigned_event = TaskAssignedEvent(
|
|
3353
|
+
task_id=assignment.task_id,
|
|
3354
|
+
worker_id=assignment.assignee_id,
|
|
3355
|
+
dependencies=assignment.dependencies,
|
|
3356
|
+
queue_time_seconds=None,
|
|
3357
|
+
)
|
|
3358
|
+
for cb in self._callbacks:
|
|
2350
3359
|
# queue_time_seconds can be derived by logger if task
|
|
2351
3360
|
# creation time is logged
|
|
2352
|
-
|
|
2353
|
-
task_id=assignment.task_id,
|
|
2354
|
-
worker_id=assignment.assignee_id,
|
|
2355
|
-
dependencies=assignment.dependencies,
|
|
2356
|
-
queue_time_seconds=None,
|
|
2357
|
-
)
|
|
3361
|
+
cb.log_task_assigned(task_assigned_event)
|
|
2358
3362
|
|
|
2359
3363
|
# Step 2: Iterate through all pending tasks and post those that are
|
|
2360
3364
|
# ready
|
|
@@ -2365,21 +3369,139 @@ class Workforce(BaseNode):
|
|
|
2365
3369
|
for task in self._pending_tasks:
|
|
2366
3370
|
# A task must be assigned to be considered for posting
|
|
2367
3371
|
if task.id in self._task_dependencies:
|
|
3372
|
+
# Skip if task has already been posted to prevent duplicates
|
|
3373
|
+
try:
|
|
3374
|
+
task_from_channel = await self._channel.get_task_by_id(
|
|
3375
|
+
task.id
|
|
3376
|
+
)
|
|
3377
|
+
# Check if task is already assigned to a worker
|
|
3378
|
+
if (
|
|
3379
|
+
task_from_channel
|
|
3380
|
+
and task_from_channel.assigned_worker_id
|
|
3381
|
+
):
|
|
3382
|
+
logger.debug(
|
|
3383
|
+
f"Task {task.id} already assigned to "
|
|
3384
|
+
f"{task_from_channel.assigned_worker_id}, "
|
|
3385
|
+
f"skipping to prevent duplicate"
|
|
3386
|
+
)
|
|
3387
|
+
continue
|
|
3388
|
+
except Exception as e:
|
|
3389
|
+
logger.info(
|
|
3390
|
+
f"Task {task.id} non existent in channel. "
|
|
3391
|
+
f"Assigning task: {e}"
|
|
3392
|
+
)
|
|
2368
3393
|
dependencies = self._task_dependencies[task.id]
|
|
2369
|
-
|
|
2370
|
-
#
|
|
2371
|
-
|
|
2372
|
-
dep_id in completed_tasks_info
|
|
2373
|
-
|
|
2374
|
-
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2379
|
-
|
|
3394
|
+
|
|
3395
|
+
# Check if all dependencies are in completed state
|
|
3396
|
+
all_deps_completed = all(
|
|
3397
|
+
dep_id in completed_tasks_info for dep_id in dependencies
|
|
3398
|
+
)
|
|
3399
|
+
|
|
3400
|
+
# Only proceed with dependency checks if all deps are completed
|
|
3401
|
+
if all_deps_completed:
|
|
3402
|
+
# Check if all dependencies succeeded (state is DONE)
|
|
3403
|
+
all_deps_done = all(
|
|
3404
|
+
completed_tasks_info[dep_id] == TaskState.DONE
|
|
3405
|
+
for dep_id in dependencies
|
|
2380
3406
|
)
|
|
2381
|
-
|
|
2382
|
-
|
|
3407
|
+
|
|
3408
|
+
# Check if any dependency failed
|
|
3409
|
+
any_dep_failed = any(
|
|
3410
|
+
completed_tasks_info[dep_id] == TaskState.FAILED
|
|
3411
|
+
for dep_id in dependencies
|
|
3412
|
+
)
|
|
3413
|
+
|
|
3414
|
+
if all_deps_done:
|
|
3415
|
+
# All dependencies completed successfully - post the
|
|
3416
|
+
# task
|
|
3417
|
+
assignee_id = self._assignees[task.id]
|
|
3418
|
+
logger.debug(
|
|
3419
|
+
f"Posting task {task.id} to "
|
|
3420
|
+
f"assignee {assignee_id}. "
|
|
3421
|
+
f"Dependencies met."
|
|
3422
|
+
)
|
|
3423
|
+
await self._post_task(task, assignee_id)
|
|
3424
|
+
posted_tasks.append(task)
|
|
3425
|
+
elif any_dep_failed:
|
|
3426
|
+
# Check if any failed dependencies can still be retried
|
|
3427
|
+
failed_deps = [
|
|
3428
|
+
dep_id
|
|
3429
|
+
for dep_id in dependencies
|
|
3430
|
+
if completed_tasks_info[dep_id] == TaskState.FAILED
|
|
3431
|
+
]
|
|
3432
|
+
|
|
3433
|
+
# Check if any failed dependency is still retryable
|
|
3434
|
+
failed_tasks_with_retry_potential = []
|
|
3435
|
+
permanently_failed_deps = []
|
|
3436
|
+
|
|
3437
|
+
for dep_id in failed_deps:
|
|
3438
|
+
# Find the failed dependency task
|
|
3439
|
+
failed_task = next(
|
|
3440
|
+
(
|
|
3441
|
+
t
|
|
3442
|
+
for t in self._completed_tasks
|
|
3443
|
+
if t.id == dep_id
|
|
3444
|
+
),
|
|
3445
|
+
None,
|
|
3446
|
+
)
|
|
3447
|
+
if (
|
|
3448
|
+
failed_task
|
|
3449
|
+
and failed_task.failure_count
|
|
3450
|
+
< MAX_TASK_RETRIES
|
|
3451
|
+
):
|
|
3452
|
+
failed_tasks_with_retry_potential.append(
|
|
3453
|
+
dep_id
|
|
3454
|
+
)
|
|
3455
|
+
else:
|
|
3456
|
+
permanently_failed_deps.append(dep_id)
|
|
3457
|
+
|
|
3458
|
+
# Only fail the task if ALL dependencies are
|
|
3459
|
+
# permanently failed
|
|
3460
|
+
if (
|
|
3461
|
+
permanently_failed_deps
|
|
3462
|
+
and not failed_tasks_with_retry_potential
|
|
3463
|
+
):
|
|
3464
|
+
logger.error(
|
|
3465
|
+
f"Task {task.id} cannot proceed: dependencies "
|
|
3466
|
+
f"{permanently_failed_deps} have "
|
|
3467
|
+
f"permanently failed. "
|
|
3468
|
+
f"Marking task as failed."
|
|
3469
|
+
)
|
|
3470
|
+
task.state = TaskState.FAILED
|
|
3471
|
+
task.result = (
|
|
3472
|
+
f"Task failed due to permanently "
|
|
3473
|
+
f"failed dependencies: "
|
|
3474
|
+
f"{permanently_failed_deps}"
|
|
3475
|
+
)
|
|
3476
|
+
|
|
3477
|
+
# Log the failure to metrics
|
|
3478
|
+
task_failed_event = TaskFailedEvent(
|
|
3479
|
+
task_id=task.id,
|
|
3480
|
+
worker_id=task.assigned_worker_id or "unknown",
|
|
3481
|
+
error_message=task.result,
|
|
3482
|
+
metadata={
|
|
3483
|
+
'failure_reason': 'dependency_failure',
|
|
3484
|
+
'failed_dependencies': (
|
|
3485
|
+
permanently_failed_deps
|
|
3486
|
+
),
|
|
3487
|
+
},
|
|
3488
|
+
)
|
|
3489
|
+
for cb in self._callbacks:
|
|
3490
|
+
cb.log_task_failed(task_failed_event)
|
|
3491
|
+
|
|
3492
|
+
self._completed_tasks.append(task)
|
|
3493
|
+
self._cleanup_task_tracking(task.id)
|
|
3494
|
+
posted_tasks.append(task) # Remove from pending
|
|
3495
|
+
else:
|
|
3496
|
+
# Some dependencies may still be retried, keep
|
|
3497
|
+
# task pending
|
|
3498
|
+
logger.debug(
|
|
3499
|
+
f"Task {task.id} waiting: dependencies "
|
|
3500
|
+
f"{failed_tasks_with_retry_potential} "
|
|
3501
|
+
f"failed but may be retried "
|
|
3502
|
+
f"(attempt < {MAX_TASK_RETRIES})"
|
|
3503
|
+
)
|
|
3504
|
+
# else: Not all dependencies completed yet, skip this task
|
|
2383
3505
|
|
|
2384
3506
|
# Step 3: Remove the posted tasks from the pending list
|
|
2385
3507
|
for task in posted_tasks:
|
|
@@ -2391,53 +3513,59 @@ class Workforce(BaseNode):
|
|
|
2391
3513
|
pass
|
|
2392
3514
|
|
|
2393
3515
|
async def _handle_failed_task(self, task: Task) -> bool:
|
|
3516
|
+
r"""Handle a task that failed during execution.
|
|
3517
|
+
|
|
3518
|
+
Args:
|
|
3519
|
+
task (Task): The failed task
|
|
3520
|
+
|
|
3521
|
+
Returns:
|
|
3522
|
+
bool: True if workforce should halt, False otherwise
|
|
3523
|
+
"""
|
|
2394
3524
|
task.failure_count += 1
|
|
2395
3525
|
|
|
2396
3526
|
# Determine detailed failure information
|
|
2397
|
-
# Use the actual error/result stored in task.result
|
|
2398
3527
|
failure_reason = task.result or "Unknown error"
|
|
2399
|
-
|
|
2400
|
-
# Add context about the worker and task
|
|
2401
3528
|
worker_id = task.assigned_worker_id or "unknown"
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
detailed_error = f"{failure_reason}{worker_info}"
|
|
3529
|
+
detailed_error = f"{failure_reason} (assigned to worker: {worker_id})"
|
|
2405
3530
|
|
|
2406
3531
|
logger.error(
|
|
2407
3532
|
f"Task {task.id} failed (attempt "
|
|
2408
|
-
f"{task.failure_count}/
|
|
3533
|
+
f"{task.failure_count}/{MAX_TASK_RETRIES}): {detailed_error}"
|
|
2409
3534
|
)
|
|
2410
3535
|
|
|
2411
|
-
|
|
2412
|
-
|
|
2413
|
-
|
|
2414
|
-
|
|
2415
|
-
|
|
2416
|
-
|
|
2417
|
-
|
|
2418
|
-
|
|
2419
|
-
|
|
2420
|
-
|
|
2421
|
-
|
|
3536
|
+
print(
|
|
3537
|
+
f"{Fore.RED}❌ Task {task.id} failed "
|
|
3538
|
+
f"(attempt {task.failure_count}/{MAX_TASK_RETRIES}): "
|
|
3539
|
+
f"{failure_reason}{Fore.RESET}"
|
|
3540
|
+
)
|
|
3541
|
+
|
|
3542
|
+
task_failed_event = TaskFailedEvent(
|
|
3543
|
+
task_id=task.id,
|
|
3544
|
+
worker_id=worker_id,
|
|
3545
|
+
error_message=detailed_error,
|
|
3546
|
+
metadata={
|
|
3547
|
+
'failure_count': task.failure_count,
|
|
3548
|
+
'task_content': task.content,
|
|
3549
|
+
'result_length': len(task.result) if task.result else 0,
|
|
3550
|
+
},
|
|
3551
|
+
)
|
|
3552
|
+
for cb in self._callbacks:
|
|
3553
|
+
cb.log_task_failed(task_failed_event)
|
|
2422
3554
|
|
|
2423
|
-
# Check for immediate halt conditions
|
|
2424
|
-
# should halt
|
|
3555
|
+
# Check for immediate halt conditions
|
|
2425
3556
|
if task.failure_count >= MAX_TASK_RETRIES:
|
|
2426
3557
|
logger.error(
|
|
2427
3558
|
f"Task {task.id} has exceeded maximum retry attempts "
|
|
2428
|
-
f"({MAX_TASK_RETRIES}). Final failure "
|
|
2429
|
-
f"
|
|
3559
|
+
f"({MAX_TASK_RETRIES}). Final failure reason: "
|
|
3560
|
+
f"{detailed_error}. "
|
|
2430
3561
|
f"Task content: '{task.content}'"
|
|
2431
3562
|
)
|
|
2432
3563
|
self._cleanup_task_tracking(task.id)
|
|
2433
|
-
# Mark task as completed for dependency tracking before halting
|
|
2434
3564
|
self._completed_tasks.append(task)
|
|
2435
3565
|
if task.id in self._assignees:
|
|
2436
3566
|
await self._channel.archive_task(task.id)
|
|
2437
3567
|
return True
|
|
2438
3568
|
|
|
2439
|
-
# If too many tasks are failing rapidly, also halt to prevent infinite
|
|
2440
|
-
# loops
|
|
2441
3569
|
if len(self._pending_tasks) > MAX_PENDING_TASKS_LIMIT:
|
|
2442
3570
|
logger.error(
|
|
2443
3571
|
f"Too many pending tasks ({len(self._pending_tasks)} > "
|
|
@@ -2445,18 +3573,24 @@ class Workforce(BaseNode):
|
|
|
2445
3573
|
f"explosion. Last failed task: {task.id}"
|
|
2446
3574
|
)
|
|
2447
3575
|
self._cleanup_task_tracking(task.id)
|
|
2448
|
-
# Mark task as completed for dependency tracking before halting
|
|
2449
3576
|
self._completed_tasks.append(task)
|
|
2450
3577
|
if task.id in self._assignees:
|
|
2451
3578
|
await self._channel.archive_task(task.id)
|
|
2452
3579
|
return True
|
|
2453
3580
|
|
|
2454
3581
|
# Use intelligent failure analysis to decide recovery strategy
|
|
2455
|
-
recovery_decision = self.
|
|
3582
|
+
recovery_decision = self._analyze_task(
|
|
3583
|
+
task, for_failure=True, error_message=detailed_error
|
|
3584
|
+
)
|
|
2456
3585
|
|
|
3586
|
+
strategy_str = (
|
|
3587
|
+
recovery_decision.recovery_strategy.value
|
|
3588
|
+
if recovery_decision.recovery_strategy
|
|
3589
|
+
else "none"
|
|
3590
|
+
)
|
|
2457
3591
|
logger.info(
|
|
2458
3592
|
f"Task {task.id} failure "
|
|
2459
|
-
f"analysis: {
|
|
3593
|
+
f"analysis: {strategy_str} - "
|
|
2460
3594
|
f"{recovery_decision.reasoning}"
|
|
2461
3595
|
)
|
|
2462
3596
|
|
|
@@ -2465,105 +3599,23 @@ class Workforce(BaseNode):
|
|
|
2465
3599
|
await self._channel.archive_task(task.id)
|
|
2466
3600
|
self._cleanup_task_tracking(task.id)
|
|
2467
3601
|
|
|
3602
|
+
# Apply recovery strategy
|
|
2468
3603
|
try:
|
|
2469
|
-
|
|
2470
|
-
|
|
2471
|
-
|
|
2472
|
-
assignee_id = self._assignees[task.id]
|
|
2473
|
-
await self._post_task(task, assignee_id)
|
|
2474
|
-
action_taken = f"retried with same worker {assignee_id}"
|
|
2475
|
-
else:
|
|
2476
|
-
# Find a new assignee and retry
|
|
2477
|
-
batch_result = await self._find_assignee([task])
|
|
2478
|
-
assignment = batch_result.assignments[0]
|
|
2479
|
-
self._assignees[task.id] = assignment.assignee_id
|
|
2480
|
-
await self._post_task(task, assignment.assignee_id)
|
|
2481
|
-
action_taken = (
|
|
2482
|
-
f"retried with new worker {assignment.assignee_id}"
|
|
2483
|
-
)
|
|
2484
|
-
|
|
2485
|
-
elif recovery_decision.strategy == RecoveryStrategy.REPLAN:
|
|
2486
|
-
# Modify the task content and retry
|
|
2487
|
-
if recovery_decision.modified_task_content:
|
|
2488
|
-
task.content = recovery_decision.modified_task_content
|
|
2489
|
-
logger.info(f"Task {task.id} content modified for replan")
|
|
2490
|
-
|
|
2491
|
-
# Repost the modified task
|
|
2492
|
-
if task.id in self._assignees:
|
|
2493
|
-
assignee_id = self._assignees[task.id]
|
|
2494
|
-
await self._post_task(task, assignee_id)
|
|
2495
|
-
action_taken = (
|
|
2496
|
-
f"replanned and retried with worker {assignee_id}"
|
|
2497
|
-
)
|
|
2498
|
-
else:
|
|
2499
|
-
# Find a new assignee for the replanned task
|
|
2500
|
-
batch_result = await self._find_assignee([task])
|
|
2501
|
-
assignment = batch_result.assignments[0]
|
|
2502
|
-
self._assignees[task.id] = assignment.assignee_id
|
|
2503
|
-
await self._post_task(task, assignment.assignee_id)
|
|
2504
|
-
action_taken = (
|
|
2505
|
-
f"replanned and assigned to "
|
|
2506
|
-
f"worker {assignment.assignee_id}"
|
|
2507
|
-
)
|
|
2508
|
-
|
|
2509
|
-
elif recovery_decision.strategy == RecoveryStrategy.DECOMPOSE:
|
|
2510
|
-
# Decompose the task into subtasks
|
|
2511
|
-
subtasks_result = self._decompose_task(task)
|
|
2512
|
-
|
|
2513
|
-
# Handle both streaming and non-streaming results
|
|
2514
|
-
if isinstance(subtasks_result, Generator):
|
|
2515
|
-
# This is a generator (streaming mode)
|
|
2516
|
-
subtasks = []
|
|
2517
|
-
for new_tasks in subtasks_result:
|
|
2518
|
-
subtasks.extend(new_tasks)
|
|
2519
|
-
else:
|
|
2520
|
-
# This is a regular list (non-streaming mode)
|
|
2521
|
-
subtasks = subtasks_result
|
|
2522
|
-
if self.metrics_logger and subtasks:
|
|
2523
|
-
self.metrics_logger.log_task_decomposed(
|
|
2524
|
-
parent_task_id=task.id,
|
|
2525
|
-
subtask_ids=[st.id for st in subtasks],
|
|
2526
|
-
)
|
|
2527
|
-
for subtask in subtasks:
|
|
2528
|
-
self.metrics_logger.log_task_created(
|
|
2529
|
-
task_id=subtask.id,
|
|
2530
|
-
description=subtask.content,
|
|
2531
|
-
parent_task_id=task.id,
|
|
2532
|
-
task_type=subtask.type,
|
|
2533
|
-
metadata=subtask.additional_info,
|
|
2534
|
-
)
|
|
2535
|
-
# Insert packets at the head of the queue
|
|
2536
|
-
self._pending_tasks.extendleft(reversed(subtasks))
|
|
2537
|
-
|
|
2538
|
-
await self._post_ready_tasks()
|
|
2539
|
-
action_taken = f"decomposed into {len(subtasks)} subtasks"
|
|
2540
|
-
|
|
2541
|
-
logger.debug(
|
|
2542
|
-
f"Task {task.id} failed and was {action_taken}. "
|
|
2543
|
-
f"Dependencies updated for subtasks."
|
|
2544
|
-
)
|
|
2545
|
-
|
|
2546
|
-
# Sync shared memory after task decomposition
|
|
2547
|
-
if self.share_memory:
|
|
2548
|
-
logger.info(
|
|
2549
|
-
f"Syncing shared memory after "
|
|
2550
|
-
f"task {task.id} decomposition"
|
|
2551
|
-
)
|
|
2552
|
-
self._sync_shared_memory()
|
|
3604
|
+
is_decompose = await self._apply_recovery_strategy(
|
|
3605
|
+
task, recovery_decision
|
|
3606
|
+
)
|
|
2553
3607
|
|
|
2554
|
-
|
|
2555
|
-
|
|
3608
|
+
# For decompose, we handle it specially
|
|
3609
|
+
if is_decompose:
|
|
3610
|
+
# Task was decomposed, add to completed tasks
|
|
3611
|
+
self._completed_tasks.append(task)
|
|
2556
3612
|
return False
|
|
2557
3613
|
|
|
2558
|
-
elif recovery_decision.strategy == RecoveryStrategy.CREATE_WORKER:
|
|
2559
|
-
assignee = await self._create_worker_node_for_task(task)
|
|
2560
|
-
await self._post_task(task, assignee.node_id)
|
|
2561
|
-
action_taken = (
|
|
2562
|
-
f"created new worker {assignee.node_id} and assigned "
|
|
2563
|
-
f"task {task.id} to it"
|
|
2564
|
-
)
|
|
2565
3614
|
except Exception as e:
|
|
2566
|
-
logger.error(
|
|
3615
|
+
logger.error(
|
|
3616
|
+
f"Recovery strategy failed for task {task.id}: {e}",
|
|
3617
|
+
exc_info=True,
|
|
3618
|
+
)
|
|
2567
3619
|
# If max retries reached, halt the workforce
|
|
2568
3620
|
if task.failure_count >= MAX_TASK_RETRIES:
|
|
2569
3621
|
self._completed_tasks.append(task)
|
|
@@ -2571,18 +3623,17 @@ class Workforce(BaseNode):
|
|
|
2571
3623
|
self._completed_tasks.append(task)
|
|
2572
3624
|
return False
|
|
2573
3625
|
|
|
3626
|
+
# Task is being retried - don't add to completed tasks
|
|
3627
|
+
# It will be added when it actually completes or permanently fails
|
|
2574
3628
|
logger.debug(
|
|
2575
|
-
f"Task {task.id}
|
|
2576
|
-
f"
|
|
3629
|
+
f"Task {task.id} is being retried (strategy: "
|
|
3630
|
+
f"{recovery_decision.recovery_strategy}). "
|
|
3631
|
+
f"Not adding to completed tasks until final outcome."
|
|
2577
3632
|
)
|
|
2578
|
-
# Mark task as completed for dependency tracking
|
|
2579
|
-
self._completed_tasks.append(task)
|
|
2580
3633
|
|
|
2581
|
-
# Sync shared memory after task
|
|
3634
|
+
# Sync shared memory after task recovery
|
|
2582
3635
|
if self.share_memory:
|
|
2583
|
-
logger.info(
|
|
2584
|
-
f"Syncing shared memory after task {task.id} completion"
|
|
2585
|
-
)
|
|
3636
|
+
logger.info(f"Syncing shared memory after task {task.id} recovery")
|
|
2586
3637
|
self._sync_shared_memory()
|
|
2587
3638
|
|
|
2588
3639
|
# Check if any pending tasks are now ready to execute
|
|
@@ -2590,61 +3641,60 @@ class Workforce(BaseNode):
|
|
|
2590
3641
|
return False
|
|
2591
3642
|
|
|
2592
3643
|
async def _handle_completed_task(self, task: Task) -> None:
|
|
2593
|
-
|
|
2594
|
-
|
|
2595
|
-
|
|
2596
|
-
token_usage = None
|
|
2597
|
-
|
|
2598
|
-
# Get processing time from task start time or additional info
|
|
2599
|
-
if task.id in self._task_start_times:
|
|
2600
|
-
processing_time_seconds = (
|
|
2601
|
-
time.time() - self._task_start_times[task.id]
|
|
2602
|
-
)
|
|
2603
|
-
self._cleanup_task_tracking(task.id)
|
|
2604
|
-
elif (
|
|
2605
|
-
task.additional_info is not None
|
|
2606
|
-
and 'processing_time_seconds' in task.additional_info
|
|
2607
|
-
):
|
|
2608
|
-
processing_time_seconds = task.additional_info[
|
|
2609
|
-
'processing_time_seconds'
|
|
2610
|
-
]
|
|
3644
|
+
worker_id = task.assigned_worker_id or "unknown"
|
|
3645
|
+
processing_time_seconds = None
|
|
3646
|
+
token_usage = None
|
|
2611
3647
|
|
|
2612
|
-
|
|
2613
|
-
|
|
2614
|
-
|
|
2615
|
-
|
|
2616
|
-
|
|
2617
|
-
)
|
|
2618
|
-
|
|
2619
|
-
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
2623
|
-
|
|
2624
|
-
|
|
2625
|
-
for child in self._children
|
|
2626
|
-
if child.node_id == worker_id
|
|
2627
|
-
),
|
|
2628
|
-
None,
|
|
2629
|
-
)
|
|
2630
|
-
if isinstance(assignee_node, SingleAgentWorker):
|
|
2631
|
-
try:
|
|
2632
|
-
_, total_tokens = (
|
|
2633
|
-
assignee_node.worker.memory.get_context()
|
|
2634
|
-
)
|
|
2635
|
-
token_usage = {'total_tokens': total_tokens}
|
|
2636
|
-
except Exception:
|
|
2637
|
-
token_usage = None
|
|
3648
|
+
# Get processing time from task start time or additional info
|
|
3649
|
+
if task.id in self._task_start_times:
|
|
3650
|
+
processing_time_seconds = (
|
|
3651
|
+
time.time() - self._task_start_times[task.id]
|
|
3652
|
+
)
|
|
3653
|
+
self._cleanup_task_tracking(task.id)
|
|
3654
|
+
elif (
|
|
3655
|
+
task.additional_info is not None
|
|
3656
|
+
and 'processing_time_seconds' in task.additional_info
|
|
3657
|
+
):
|
|
3658
|
+
processing_time_seconds = task.additional_info[
|
|
3659
|
+
'processing_time_seconds'
|
|
3660
|
+
]
|
|
2638
3661
|
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
|
|
2643
|
-
|
|
2644
|
-
|
|
2645
|
-
|
|
2646
|
-
|
|
3662
|
+
# Get token usage from task additional info (preferred - actual
|
|
3663
|
+
# usage)
|
|
3664
|
+
if (
|
|
3665
|
+
task.additional_info is not None
|
|
3666
|
+
and 'token_usage' in task.additional_info
|
|
3667
|
+
):
|
|
3668
|
+
token_usage = task.additional_info['token_usage']
|
|
3669
|
+
else:
|
|
3670
|
+
# Fallback: Try to get token usage from SingleAgentWorker
|
|
3671
|
+
# memory
|
|
3672
|
+
assignee_node = next(
|
|
3673
|
+
(
|
|
3674
|
+
child
|
|
3675
|
+
for child in self._children
|
|
3676
|
+
if child.node_id == worker_id
|
|
3677
|
+
),
|
|
3678
|
+
None,
|
|
2647
3679
|
)
|
|
3680
|
+
if isinstance(assignee_node, SingleAgentWorker):
|
|
3681
|
+
try:
|
|
3682
|
+
_, total_tokens = assignee_node.worker.memory.get_context()
|
|
3683
|
+
token_usage = {'total_tokens': total_tokens}
|
|
3684
|
+
except Exception:
|
|
3685
|
+
token_usage = None
|
|
3686
|
+
|
|
3687
|
+
# Log the completed task
|
|
3688
|
+
task_completed_event = TaskCompletedEvent(
|
|
3689
|
+
task_id=task.id,
|
|
3690
|
+
worker_id=worker_id,
|
|
3691
|
+
result_summary=task.result if task.result else "Completed",
|
|
3692
|
+
processing_time_seconds=processing_time_seconds,
|
|
3693
|
+
token_usage=token_usage,
|
|
3694
|
+
metadata={'current_state': task.state.value},
|
|
3695
|
+
)
|
|
3696
|
+
for cb in self._callbacks:
|
|
3697
|
+
cb.log_task_completed(task_completed_event)
|
|
2648
3698
|
|
|
2649
3699
|
# Find and remove the completed task from pending tasks
|
|
2650
3700
|
tasks_list = list(self._pending_tasks)
|
|
@@ -2764,15 +3814,23 @@ class Workforce(BaseNode):
|
|
|
2764
3814
|
r"""Returns an ASCII tree representation of the task hierarchy and
|
|
2765
3815
|
worker status.
|
|
2766
3816
|
"""
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
3817
|
+
metrics_cb: List[WorkforceMetrics] = [
|
|
3818
|
+
cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
|
|
3819
|
+
]
|
|
3820
|
+
if len(metrics_cb) == 0:
|
|
3821
|
+
return "Metrics Callback not initialized."
|
|
3822
|
+
else:
|
|
3823
|
+
return metrics_cb[0].get_ascii_tree_representation()
|
|
2770
3824
|
|
|
2771
3825
|
def get_workforce_kpis(self) -> Dict[str, Any]:
|
|
2772
3826
|
r"""Returns a dictionary of key performance indicators."""
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
|
|
3827
|
+
metrics_cb: List[WorkforceMetrics] = [
|
|
3828
|
+
cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
|
|
3829
|
+
]
|
|
3830
|
+
if len(metrics_cb) == 0:
|
|
3831
|
+
return {"error": "Metrics Callback not initialized."}
|
|
3832
|
+
else:
|
|
3833
|
+
return metrics_cb[0].get_kpis()
|
|
2776
3834
|
|
|
2777
3835
|
def dump_workforce_logs(self, file_path: str) -> None:
|
|
2778
3836
|
r"""Dumps all collected logs to a JSON file.
|
|
@@ -2780,13 +3838,133 @@ class Workforce(BaseNode):
|
|
|
2780
3838
|
Args:
|
|
2781
3839
|
file_path (str): The path to the JSON file.
|
|
2782
3840
|
"""
|
|
2783
|
-
|
|
3841
|
+
metrics_cb: List[WorkforceMetrics] = [
|
|
3842
|
+
cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
|
|
3843
|
+
]
|
|
3844
|
+
if len(metrics_cb) == 0:
|
|
2784
3845
|
print("Logger not initialized. Cannot dump logs.")
|
|
2785
3846
|
return
|
|
2786
|
-
|
|
3847
|
+
metrics_cb[0].dump_to_json(file_path)
|
|
2787
3848
|
# Use logger.info or print, consistent with existing style
|
|
2788
3849
|
logger.info(f"Workforce logs dumped to {file_path}")
|
|
2789
3850
|
|
|
3851
|
+
async def _handle_skip_task(self) -> bool:
|
|
3852
|
+
r"""Handle skip request by marking pending and in-flight tasks
|
|
3853
|
+
as completed.
|
|
3854
|
+
|
|
3855
|
+
Returns:
|
|
3856
|
+
bool: True if workforce should stop (no independent tasks),
|
|
3857
|
+
False to continue.
|
|
3858
|
+
"""
|
|
3859
|
+
logger.info("Skip requested, processing skip logic.")
|
|
3860
|
+
|
|
3861
|
+
# Mark all pending tasks as completed instead of just clearing
|
|
3862
|
+
pending_tasks_to_complete = list(self._pending_tasks)
|
|
3863
|
+
if pending_tasks_to_complete:
|
|
3864
|
+
logger.info(
|
|
3865
|
+
f"Marking {len(pending_tasks_to_complete)} pending tasks "
|
|
3866
|
+
f"as completed."
|
|
3867
|
+
)
|
|
3868
|
+
for task in pending_tasks_to_complete:
|
|
3869
|
+
# Don't remove tasks that need decomposition
|
|
3870
|
+
if task.additional_info and task.additional_info.get(
|
|
3871
|
+
'_needs_decomposition', False
|
|
3872
|
+
):
|
|
3873
|
+
continue
|
|
3874
|
+
# Set task state to DONE and add a completion message
|
|
3875
|
+
task.state = TaskState.DONE
|
|
3876
|
+
task.result = "Task marked as completed due to skip request"
|
|
3877
|
+
|
|
3878
|
+
# Use the existing handle completed task function
|
|
3879
|
+
await self._handle_completed_task(task)
|
|
3880
|
+
|
|
3881
|
+
# Handle in-flight tasks if they exist
|
|
3882
|
+
if self._in_flight_tasks > 0:
|
|
3883
|
+
logger.info(
|
|
3884
|
+
f"Found {self._in_flight_tasks} in-flight tasks. "
|
|
3885
|
+
f"Retrieving and completing them."
|
|
3886
|
+
)
|
|
3887
|
+
try:
|
|
3888
|
+
# Get all in-flight tasks for this publisher from the channel
|
|
3889
|
+
in_flight_tasks = await self._channel.get_in_flight_tasks(
|
|
3890
|
+
self.node_id
|
|
3891
|
+
)
|
|
3892
|
+
logger.info(
|
|
3893
|
+
f"Retrieved {len(in_flight_tasks)} in-flight "
|
|
3894
|
+
f"tasks from channel."
|
|
3895
|
+
)
|
|
3896
|
+
|
|
3897
|
+
for task in in_flight_tasks:
|
|
3898
|
+
# Set task state to DONE and add a completion message
|
|
3899
|
+
task.state = TaskState.DONE
|
|
3900
|
+
task.result = (
|
|
3901
|
+
"Task marked as completed due to skip request"
|
|
3902
|
+
)
|
|
3903
|
+
|
|
3904
|
+
# Remove the task from the channel to avoid hanging
|
|
3905
|
+
await self._channel.remove_task(task.id)
|
|
3906
|
+
|
|
3907
|
+
# Decrement in-flight counter
|
|
3908
|
+
self._decrement_in_flight_tasks(
|
|
3909
|
+
task.id, "skip request - removed from channel"
|
|
3910
|
+
)
|
|
3911
|
+
|
|
3912
|
+
# Handle as completed task to update dependencies
|
|
3913
|
+
await self._handle_completed_task(task)
|
|
3914
|
+
|
|
3915
|
+
logger.info(
|
|
3916
|
+
f"Completed in-flight task {task.id} due "
|
|
3917
|
+
f"to skip request."
|
|
3918
|
+
)
|
|
3919
|
+
|
|
3920
|
+
except Exception as e:
|
|
3921
|
+
logger.error(
|
|
3922
|
+
f"Error handling in-flight tasks during skip: {e}",
|
|
3923
|
+
exc_info=True,
|
|
3924
|
+
)
|
|
3925
|
+
# Reset in-flight counter to prevent hanging
|
|
3926
|
+
self._in_flight_tasks = 0
|
|
3927
|
+
|
|
3928
|
+
# Check if there are any main pending tasks after filtering
|
|
3929
|
+
if self._pending_tasks:
|
|
3930
|
+
# Check if the first pending task needs decomposition
|
|
3931
|
+
next_task = self._pending_tasks[0]
|
|
3932
|
+
if next_task.additional_info and next_task.additional_info.get(
|
|
3933
|
+
'_needs_decomposition'
|
|
3934
|
+
):
|
|
3935
|
+
logger.info(
|
|
3936
|
+
f"Decomposing main task {next_task.id} after skip request."
|
|
3937
|
+
)
|
|
3938
|
+
try:
|
|
3939
|
+
# Remove the decomposition flag to avoid re-decomposition
|
|
3940
|
+
next_task.additional_info['_needs_decomposition'] = False
|
|
3941
|
+
|
|
3942
|
+
# Decompose the task and append subtasks to _pending_tasks
|
|
3943
|
+
await self.handle_decompose_append_task(
|
|
3944
|
+
next_task, reset=False
|
|
3945
|
+
)
|
|
3946
|
+
|
|
3947
|
+
# Mark the main task as completed and remove from pending
|
|
3948
|
+
await self._handle_completed_task(next_task)
|
|
3949
|
+
logger.info(
|
|
3950
|
+
f"Main task {next_task.id} decomposed after "
|
|
3951
|
+
f"skip request"
|
|
3952
|
+
)
|
|
3953
|
+
except Exception as e:
|
|
3954
|
+
logger.error(
|
|
3955
|
+
f"Error decomposing main task {next_task.id} "
|
|
3956
|
+
f"after skip: {e}",
|
|
3957
|
+
exc_info=True,
|
|
3958
|
+
)
|
|
3959
|
+
|
|
3960
|
+
logger.info("Pending tasks available after skip, continuing.")
|
|
3961
|
+
await self._post_ready_tasks()
|
|
3962
|
+
return False # Continue processing
|
|
3963
|
+
else:
|
|
3964
|
+
# No pending tasks available, act like stop
|
|
3965
|
+
logger.info("No pending tasks available, acting like stop.")
|
|
3966
|
+
return True # Stop processing
|
|
3967
|
+
|
|
2790
3968
|
@check_if_running(False)
|
|
2791
3969
|
async def _listen_to_channel(self) -> None:
|
|
2792
3970
|
r"""Continuously listen to the channel, post task to the channel and
|
|
@@ -2815,6 +3993,75 @@ class Workforce(BaseNode):
|
|
|
2815
3993
|
logger.info("Stop requested, breaking execution loop.")
|
|
2816
3994
|
break
|
|
2817
3995
|
|
|
3996
|
+
# Check for skip request after potential pause
|
|
3997
|
+
if self._skip_requested:
|
|
3998
|
+
should_stop = await self._handle_skip_task()
|
|
3999
|
+
if should_stop:
|
|
4000
|
+
self._stop_requested = True
|
|
4001
|
+
break
|
|
4002
|
+
|
|
4003
|
+
# Reset skip flag
|
|
4004
|
+
self._skip_requested = False
|
|
4005
|
+
continue
|
|
4006
|
+
|
|
4007
|
+
# Check if we should decompose a main task
|
|
4008
|
+
# Only decompose when no tasks are in flight and pending queue
|
|
4009
|
+
# is empty
|
|
4010
|
+
if not self._pending_tasks and self._in_flight_tasks == 0:
|
|
4011
|
+
# All tasks completed, will exit loop
|
|
4012
|
+
break
|
|
4013
|
+
|
|
4014
|
+
# Check if the first pending task needs decomposition
|
|
4015
|
+
# This happens when add_task(as_subtask=False) was called
|
|
4016
|
+
if self._pending_tasks and self._in_flight_tasks == 0:
|
|
4017
|
+
next_task = self._pending_tasks[0]
|
|
4018
|
+
if (
|
|
4019
|
+
next_task.additional_info
|
|
4020
|
+
and next_task.additional_info.get(
|
|
4021
|
+
'_needs_decomposition'
|
|
4022
|
+
)
|
|
4023
|
+
):
|
|
4024
|
+
logger.info(f"Decomposing main task: {next_task.id}")
|
|
4025
|
+
try:
|
|
4026
|
+
# Remove the decomposition flag to avoid
|
|
4027
|
+
# re-decomposition
|
|
4028
|
+
next_task.additional_info[
|
|
4029
|
+
'_needs_decomposition'
|
|
4030
|
+
] = False
|
|
4031
|
+
|
|
4032
|
+
# Decompose the task and append subtasks to
|
|
4033
|
+
# _pending_tasks
|
|
4034
|
+
await self.handle_decompose_append_task(
|
|
4035
|
+
next_task, reset=False
|
|
4036
|
+
)
|
|
4037
|
+
|
|
4038
|
+
# Mark the main task as completed (decomposition
|
|
4039
|
+
# successful) and Remove it from pending tasks
|
|
4040
|
+
await self._handle_completed_task(next_task)
|
|
4041
|
+
logger.info(
|
|
4042
|
+
f"Main task {next_task.id} decomposed and "
|
|
4043
|
+
f"ready for processing"
|
|
4044
|
+
)
|
|
4045
|
+
except Exception as e:
|
|
4046
|
+
logger.error(
|
|
4047
|
+
f"Error decomposing main task {next_task.id}: "
|
|
4048
|
+
f"{e}",
|
|
4049
|
+
exc_info=True,
|
|
4050
|
+
)
|
|
4051
|
+
# Revert back to the queue for retry later if
|
|
4052
|
+
# decomposition failed
|
|
4053
|
+
if not self._pending_tasks:
|
|
4054
|
+
self._pending_tasks.appendleft(next_task)
|
|
4055
|
+
else:
|
|
4056
|
+
logger.warning(
|
|
4057
|
+
"Pending tasks exist after decomposition "
|
|
4058
|
+
"error."
|
|
4059
|
+
)
|
|
4060
|
+
|
|
4061
|
+
# Immediately assign and post the transferred tasks
|
|
4062
|
+
await self._post_ready_tasks()
|
|
4063
|
+
continue
|
|
4064
|
+
|
|
2818
4065
|
# Save snapshot before processing next task
|
|
2819
4066
|
if self._pending_tasks:
|
|
2820
4067
|
current_task = self._pending_tasks[0]
|
|
@@ -2829,9 +4076,24 @@ class Workforce(BaseNode):
|
|
|
2829
4076
|
self._last_snapshot_time = time.time()
|
|
2830
4077
|
|
|
2831
4078
|
# Get returned task
|
|
2832
|
-
|
|
4079
|
+
try:
|
|
4080
|
+
returned_task = await self._get_returned_task()
|
|
4081
|
+
except asyncio.TimeoutError:
|
|
4082
|
+
# Handle timeout - check if we have tasks stuck in flight
|
|
4083
|
+
if self._in_flight_tasks > 0:
|
|
4084
|
+
logger.warning(
|
|
4085
|
+
f"Timeout waiting for {self._in_flight_tasks} "
|
|
4086
|
+
f"in-flight tasks. Breaking to prevent hanging."
|
|
4087
|
+
)
|
|
4088
|
+
# Break the loop to prevent indefinite hanging
|
|
4089
|
+
# The finally block will handle cleanup
|
|
4090
|
+
break
|
|
4091
|
+
else:
|
|
4092
|
+
# No tasks in flight, safe to continue
|
|
4093
|
+
await self._post_ready_tasks()
|
|
4094
|
+
continue
|
|
2833
4095
|
|
|
2834
|
-
# If no task was returned, continue
|
|
4096
|
+
# If no task was returned (other errors), continue
|
|
2835
4097
|
if returned_task is None:
|
|
2836
4098
|
logger.debug(
|
|
2837
4099
|
f"No task returned in workforce {self.node_id}. "
|
|
@@ -2872,6 +4134,20 @@ class Workforce(BaseNode):
|
|
|
2872
4134
|
)
|
|
2873
4135
|
if not halt:
|
|
2874
4136
|
continue
|
|
4137
|
+
|
|
4138
|
+
# Do not halt if we have main tasks in queue
|
|
4139
|
+
if len(self.get_main_task_queue()) > 0:
|
|
4140
|
+
print(
|
|
4141
|
+
f"{Fore.RED}Task {returned_task.id} has "
|
|
4142
|
+
f"failed for {MAX_TASK_RETRIES} times "
|
|
4143
|
+
f"after insufficient results, skipping "
|
|
4144
|
+
f"that task. Final error: "
|
|
4145
|
+
f"{returned_task.result or 'Unknown err'}"
|
|
4146
|
+
f"{Fore.RESET}"
|
|
4147
|
+
)
|
|
4148
|
+
self._skip_requested = True
|
|
4149
|
+
continue
|
|
4150
|
+
|
|
2875
4151
|
print(
|
|
2876
4152
|
f"{Fore.RED}Task {returned_task.id} has "
|
|
2877
4153
|
f"failed for {MAX_TASK_RETRIES} times after "
|
|
@@ -2890,16 +4166,106 @@ class Workforce(BaseNode):
|
|
|
2890
4166
|
)
|
|
2891
4167
|
continue
|
|
2892
4168
|
else:
|
|
2893
|
-
|
|
2894
|
-
|
|
2895
|
-
f"successfully.{Fore.RESET}"
|
|
4169
|
+
quality_eval = self._analyze_task(
|
|
4170
|
+
returned_task, for_failure=False
|
|
2896
4171
|
)
|
|
2897
|
-
|
|
4172
|
+
|
|
4173
|
+
if not quality_eval.quality_sufficient:
|
|
4174
|
+
logger.info(
|
|
4175
|
+
f"Task {returned_task.id} quality check: "
|
|
4176
|
+
f"score={quality_eval.quality_score}, "
|
|
4177
|
+
f"issues={quality_eval.issues}, "
|
|
4178
|
+
f"strategy={quality_eval.recovery_strategy}"
|
|
4179
|
+
)
|
|
4180
|
+
|
|
4181
|
+
# Check retry limit before attempting recovery
|
|
4182
|
+
if returned_task.failure_count >= 2:
|
|
4183
|
+
print(
|
|
4184
|
+
f"{Fore.YELLOW}Task {returned_task.id} "
|
|
4185
|
+
f"completed with low quality score: "
|
|
4186
|
+
f"{quality_eval.quality_score} "
|
|
4187
|
+
f"(retry limit reached){Fore.RESET}"
|
|
4188
|
+
)
|
|
4189
|
+
await self._handle_completed_task(
|
|
4190
|
+
returned_task
|
|
4191
|
+
)
|
|
4192
|
+
continue
|
|
4193
|
+
|
|
4194
|
+
# Print visual feedback for quality-failed tasks
|
|
4195
|
+
# with recovery strategy
|
|
4196
|
+
recovery_action = (
|
|
4197
|
+
quality_eval.recovery_strategy.value
|
|
4198
|
+
if quality_eval.recovery_strategy
|
|
4199
|
+
else ""
|
|
4200
|
+
)
|
|
4201
|
+
print(
|
|
4202
|
+
f"{Fore.YELLOW}⚠️ Task {returned_task.id} "
|
|
4203
|
+
f"failed quality check (score: "
|
|
4204
|
+
f"{quality_eval.quality_score}). "
|
|
4205
|
+
f"Issues: {', '.join(quality_eval.issues)}. "
|
|
4206
|
+
f"Recovery: {recovery_action}{Fore.RESET}"
|
|
4207
|
+
)
|
|
4208
|
+
|
|
4209
|
+
# Mark as failed for recovery
|
|
4210
|
+
returned_task.failure_count += 1
|
|
4211
|
+
returned_task.state = TaskState.FAILED
|
|
4212
|
+
returned_task.result = (
|
|
4213
|
+
f"Quality insufficient (score: "
|
|
4214
|
+
f"{quality_eval.quality_score}). "
|
|
4215
|
+
f"Issues: {', '.join(quality_eval.issues)}"
|
|
4216
|
+
)
|
|
4217
|
+
|
|
4218
|
+
# Clean up tracking before attempting recovery
|
|
4219
|
+
if returned_task.id in self._assignees:
|
|
4220
|
+
await self._channel.archive_task(
|
|
4221
|
+
returned_task.id
|
|
4222
|
+
)
|
|
4223
|
+
self._cleanup_task_tracking(returned_task.id)
|
|
4224
|
+
|
|
4225
|
+
# Apply LLM-recommended recovery strategy
|
|
4226
|
+
try:
|
|
4227
|
+
is_decompose = (
|
|
4228
|
+
await self._apply_recovery_strategy(
|
|
4229
|
+
returned_task, quality_eval
|
|
4230
|
+
)
|
|
4231
|
+
)
|
|
4232
|
+
|
|
4233
|
+
# For decompose, cleanup happens in the method
|
|
4234
|
+
if is_decompose:
|
|
4235
|
+
continue
|
|
4236
|
+
|
|
4237
|
+
except Exception as e:
|
|
4238
|
+
logger.error(
|
|
4239
|
+
f"Error handling quality-failed task "
|
|
4240
|
+
f"{returned_task.id}: {e}",
|
|
4241
|
+
exc_info=True,
|
|
4242
|
+
)
|
|
4243
|
+
continue
|
|
4244
|
+
else:
|
|
4245
|
+
print(
|
|
4246
|
+
f"{Fore.CYAN}Task {returned_task.id} "
|
|
4247
|
+
f"completed successfully (quality score: "
|
|
4248
|
+
f"{quality_eval.quality_score}).{Fore.RESET}"
|
|
4249
|
+
)
|
|
4250
|
+
await self._handle_completed_task(returned_task)
|
|
2898
4251
|
elif returned_task.state == TaskState.FAILED:
|
|
2899
4252
|
try:
|
|
2900
4253
|
halt = await self._handle_failed_task(returned_task)
|
|
2901
4254
|
if not halt:
|
|
2902
4255
|
continue
|
|
4256
|
+
|
|
4257
|
+
# Do not halt if we have main tasks in queue
|
|
4258
|
+
if len(self.get_main_task_queue()) > 0:
|
|
4259
|
+
print(
|
|
4260
|
+
f"{Fore.RED}Task {returned_task.id} has "
|
|
4261
|
+
f"failed for {MAX_TASK_RETRIES} times, "
|
|
4262
|
+
f"skipping that task. Final error: "
|
|
4263
|
+
f"{returned_task.result or 'Unknown error'}"
|
|
4264
|
+
f"{Fore.RESET}"
|
|
4265
|
+
)
|
|
4266
|
+
self._skip_requested = True
|
|
4267
|
+
continue
|
|
4268
|
+
|
|
2903
4269
|
print(
|
|
2904
4270
|
f"{Fore.RED}Task {returned_task.id} has failed "
|
|
2905
4271
|
f"for {MAX_TASK_RETRIES} times, halting "
|
|
@@ -2952,6 +4318,9 @@ class Workforce(BaseNode):
|
|
|
2952
4318
|
elif not self._pending_tasks and self._in_flight_tasks == 0:
|
|
2953
4319
|
self._state = WorkforceState.IDLE
|
|
2954
4320
|
logger.info("All tasks completed.")
|
|
4321
|
+
all_tasks_completed_event = AllTasksCompletedEvent()
|
|
4322
|
+
for cb in self._callbacks:
|
|
4323
|
+
cb.log_all_tasks_completed(all_tasks_completed_event)
|
|
2955
4324
|
|
|
2956
4325
|
# shut down the whole workforce tree
|
|
2957
4326
|
self.stop()
|
|
@@ -3064,6 +4433,7 @@ class Workforce(BaseNode):
|
|
|
3064
4433
|
graceful_shutdown_timeout=self.graceful_shutdown_timeout,
|
|
3065
4434
|
share_memory=self.share_memory,
|
|
3066
4435
|
use_structured_output_handler=self.use_structured_output_handler,
|
|
4436
|
+
task_timeout_seconds=self.task_timeout_seconds,
|
|
3067
4437
|
)
|
|
3068
4438
|
|
|
3069
4439
|
for child in self._children:
|