camel-ai 0.2.67__py3-none-any.whl → 0.2.80a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/agents/_types.py +6 -2
- camel/agents/_utils.py +38 -0
- camel/agents/chat_agent.py +4014 -410
- camel/agents/mcp_agent.py +30 -27
- camel/agents/repo_agent.py +2 -1
- camel/benchmarks/browsecomp.py +6 -6
- camel/configs/__init__.py +15 -0
- camel/configs/aihubmix_config.py +88 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/cometapi_config.py +104 -0
- camel/configs/minimax_config.py +93 -0
- camel/configs/nebius_config.py +103 -0
- camel/configs/vllm_config.py +2 -0
- camel/data_collectors/alpaca_collector.py +15 -6
- camel/datagen/self_improving_cot.py +1 -1
- camel/datasets/base_generator.py +39 -10
- camel/environments/__init__.py +12 -0
- camel/environments/rlcards_env.py +860 -0
- camel/environments/single_step.py +28 -3
- camel/environments/tic_tac_toe.py +1 -1
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/docker/Dockerfile +4 -16
- camel/interpreters/docker_interpreter.py +3 -2
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/interpreters/internal_python_interpreter.py +51 -2
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/loaders/__init__.py +11 -2
- camel/loaders/base_loader.py +85 -0
- camel/loaders/chunkr_reader.py +9 -0
- camel/loaders/firecrawl_reader.py +4 -4
- camel/logger.py +1 -1
- camel/memories/agent_memories.py +84 -1
- camel/memories/base.py +34 -0
- camel/memories/blocks/chat_history_block.py +122 -4
- camel/memories/blocks/vectordb_block.py +8 -1
- camel/memories/context_creators/score_based.py +29 -237
- camel/memories/records.py +88 -8
- camel/messages/base.py +166 -40
- camel/messages/func_message.py +32 -5
- camel/models/__init__.py +10 -0
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +1 -16
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +117 -18
- camel/models/aws_bedrock_model.py +2 -33
- camel/models/azure_openai_model.py +205 -91
- camel/models/base_audio_model.py +3 -1
- camel/models/base_model.py +189 -24
- camel/models/cohere_model.py +5 -17
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +6 -16
- camel/models/fish_audio_model.py +6 -0
- camel/models/gemini_model.py +71 -20
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +49 -32
- camel/models/lmstudio_model.py +1 -17
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +1 -16
- camel/models/model_factory.py +27 -1
- camel/models/model_manager.py +24 -6
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +185 -19
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +4 -19
- camel/models/openai_compatible_model.py +171 -46
- camel/models/openai_model.py +205 -77
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +34 -47
- camel/models/sglang_model.py +64 -31
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +60 -16
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/retrievers/auto_retriever.py +1 -0
- camel/runtimes/configs.py +11 -11
- camel/runtimes/daytona_runtime.py +15 -16
- camel/runtimes/docker_runtime.py +6 -6
- camel/runtimes/remote_http_runtime.py +5 -5
- camel/services/agent_openapi_server.py +380 -0
- camel/societies/__init__.py +2 -0
- camel/societies/role_playing.py +26 -28
- camel/societies/workforce/__init__.py +2 -0
- camel/societies/workforce/events.py +122 -0
- camel/societies/workforce/prompts.py +249 -38
- camel/societies/workforce/role_playing_worker.py +82 -20
- camel/societies/workforce/single_agent_worker.py +634 -34
- camel/societies/workforce/structured_output_handler.py +512 -0
- camel/societies/workforce/task_channel.py +169 -23
- camel/societies/workforce/utils.py +176 -9
- camel/societies/workforce/worker.py +77 -23
- camel/societies/workforce/workflow_memory_manager.py +772 -0
- camel/societies/workforce/workforce.py +3168 -478
- camel/societies/workforce/workforce_callback.py +74 -0
- camel/societies/workforce/workforce_logger.py +203 -175
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/__init__.py +4 -0
- camel/storages/key_value_storages/json.py +15 -2
- camel/storages/key_value_storages/mem0_cloud.py +48 -47
- camel/storages/object_storages/google_cloud.py +1 -1
- camel/storages/vectordb_storages/__init__.py +6 -0
- camel/storages/vectordb_storages/chroma.py +731 -0
- camel/storages/vectordb_storages/oceanbase.py +13 -13
- camel/storages/vectordb_storages/pgvector.py +349 -0
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/storages/vectordb_storages/surreal.py +365 -0
- camel/storages/vectordb_storages/tidb.py +8 -6
- camel/tasks/task.py +244 -27
- camel/toolkits/__init__.py +46 -8
- camel/toolkits/aci_toolkit.py +64 -19
- camel/toolkits/arxiv_toolkit.py +6 -6
- camel/toolkits/base.py +63 -5
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/craw4ai_toolkit.py +93 -0
- camel/toolkits/dappier_toolkit.py +10 -6
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
- camel/toolkits/excel_toolkit.py +901 -67
- camel/toolkits/file_toolkit.py +1402 -0
- camel/toolkits/function_tool.py +30 -6
- camel/toolkits/github_toolkit.py +107 -20
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +38 -4
- camel/toolkits/google_drive_mcp_toolkit.py +54 -0
- camel/toolkits/human_toolkit.py +34 -10
- camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +3749 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +32 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1815 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +590 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +130 -0
- camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +26 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1032 -0
- camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
- camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
- camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
- camel/toolkits/image_generation_toolkit.py +390 -0
- camel/toolkits/jina_reranker_toolkit.py +3 -4
- camel/toolkits/klavis_toolkit.py +5 -1
- camel/toolkits/markitdown_toolkit.py +104 -0
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +370 -45
- camel/toolkits/memory_toolkit.py +5 -1
- camel/toolkits/message_agent_toolkit.py +608 -0
- camel/toolkits/message_integration.py +724 -0
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/note_taking_toolkit.py +277 -0
- camel/toolkits/notion_mcp_toolkit.py +224 -0
- camel/toolkits/openbb_toolkit.py +5 -1
- camel/toolkits/origene_mcp_toolkit.py +56 -0
- camel/toolkits/playwright_mcp_toolkit.py +12 -31
- camel/toolkits/pptx_toolkit.py +25 -12
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/screenshot_toolkit.py +213 -0
- camel/toolkits/search_toolkit.py +437 -142
- camel/toolkits/slack_toolkit.py +104 -50
- camel/toolkits/sympy_toolkit.py +1 -1
- camel/toolkits/task_planning_toolkit.py +3 -3
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/thinking_toolkit.py +1 -1
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +106 -26
- camel/toolkits/video_download_toolkit.py +17 -14
- camel/toolkits/web_deploy_toolkit.py +1219 -0
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/zapier_toolkit.py +5 -1
- camel/types/__init__.py +2 -2
- camel/types/agents/tool_calling_record.py +4 -1
- camel/types/enums.py +316 -40
- camel/types/openai_types.py +2 -2
- camel/types/unified_model_type.py +31 -4
- camel/utils/commons.py +36 -5
- camel/utils/constants.py +3 -0
- camel/utils/context_utils.py +1003 -0
- camel/utils/mcp.py +138 -4
- camel/utils/mcp_client.py +45 -1
- camel/utils/message_summarizer.py +148 -0
- camel/utils/token_counting.py +43 -20
- camel/utils/tool_result.py +44 -0
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +296 -85
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +219 -146
- camel/loaders/pandas_reader.py +0 -368
- camel/toolkits/dalle_toolkit.py +0 -175
- camel/toolkits/file_write_toolkit.py +0 -444
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1037
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,12 +14,34 @@
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
|
+
import concurrent.futures
|
|
17
18
|
import json
|
|
19
|
+
import os
|
|
18
20
|
import time
|
|
19
21
|
import uuid
|
|
20
22
|
from collections import deque
|
|
21
23
|
from enum import Enum
|
|
22
|
-
from typing import
|
|
24
|
+
from typing import (
|
|
25
|
+
TYPE_CHECKING,
|
|
26
|
+
Any,
|
|
27
|
+
Callable,
|
|
28
|
+
Coroutine,
|
|
29
|
+
Deque,
|
|
30
|
+
Dict,
|
|
31
|
+
Generator,
|
|
32
|
+
List,
|
|
33
|
+
Optional,
|
|
34
|
+
Set,
|
|
35
|
+
Tuple,
|
|
36
|
+
Union,
|
|
37
|
+
cast,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
from .workforce_callback import WorkforceCallback
|
|
41
|
+
from .workforce_metrics import WorkforceMetrics
|
|
42
|
+
|
|
43
|
+
if TYPE_CHECKING:
|
|
44
|
+
from camel.utils.context_utils import ContextUtility
|
|
23
45
|
|
|
24
46
|
from colorama import Fore
|
|
25
47
|
|
|
@@ -31,30 +53,71 @@ from camel.societies.workforce.base import BaseNode
|
|
|
31
53
|
from camel.societies.workforce.prompts import (
|
|
32
54
|
ASSIGN_TASK_PROMPT,
|
|
33
55
|
CREATE_NODE_PROMPT,
|
|
34
|
-
|
|
56
|
+
FAILURE_ANALYSIS_RESPONSE_FORMAT,
|
|
57
|
+
QUALITY_EVALUATION_RESPONSE_FORMAT,
|
|
58
|
+
TASK_AGENT_SYSTEM_MESSAGE,
|
|
59
|
+
TASK_ANALYSIS_PROMPT,
|
|
60
|
+
TASK_DECOMPOSE_PROMPT,
|
|
35
61
|
)
|
|
36
62
|
from camel.societies.workforce.role_playing_worker import RolePlayingWorker
|
|
37
|
-
from camel.societies.workforce.single_agent_worker import
|
|
63
|
+
from camel.societies.workforce.single_agent_worker import (
|
|
64
|
+
SingleAgentWorker,
|
|
65
|
+
)
|
|
66
|
+
from camel.societies.workforce.structured_output_handler import (
|
|
67
|
+
StructuredOutputHandler,
|
|
68
|
+
)
|
|
38
69
|
from camel.societies.workforce.task_channel import TaskChannel
|
|
39
70
|
from camel.societies.workforce.utils import (
|
|
71
|
+
RecoveryStrategy,
|
|
72
|
+
TaskAnalysisResult,
|
|
73
|
+
TaskAssignment,
|
|
40
74
|
TaskAssignResult,
|
|
41
75
|
WorkerConf,
|
|
42
76
|
check_if_running,
|
|
43
77
|
)
|
|
44
78
|
from camel.societies.workforce.worker import Worker
|
|
45
|
-
from camel.tasks.task import
|
|
79
|
+
from camel.tasks.task import (
|
|
80
|
+
Task,
|
|
81
|
+
TaskState,
|
|
82
|
+
is_task_result_insufficient,
|
|
83
|
+
validate_task_content,
|
|
84
|
+
)
|
|
46
85
|
from camel.toolkits import (
|
|
47
86
|
CodeExecutionToolkit,
|
|
87
|
+
FunctionTool,
|
|
48
88
|
SearchToolkit,
|
|
49
|
-
TaskPlanningToolkit,
|
|
50
89
|
ThinkingToolkit,
|
|
51
90
|
)
|
|
52
91
|
from camel.types import ModelPlatformType, ModelType
|
|
53
92
|
from camel.utils import dependencies_required
|
|
54
93
|
|
|
94
|
+
from .events import (
|
|
95
|
+
AllTasksCompletedEvent,
|
|
96
|
+
TaskAssignedEvent,
|
|
97
|
+
TaskCompletedEvent,
|
|
98
|
+
TaskCreatedEvent,
|
|
99
|
+
TaskDecomposedEvent,
|
|
100
|
+
TaskFailedEvent,
|
|
101
|
+
TaskStartedEvent,
|
|
102
|
+
WorkerCreatedEvent,
|
|
103
|
+
)
|
|
55
104
|
from .workforce_logger import WorkforceLogger
|
|
56
105
|
|
|
57
|
-
|
|
106
|
+
if os.environ.get("TRACEROOT_ENABLED", "False").lower() == "true":
|
|
107
|
+
try:
|
|
108
|
+
import traceroot # type: ignore[import]
|
|
109
|
+
|
|
110
|
+
logger = traceroot.get_logger('camel')
|
|
111
|
+
except ImportError:
|
|
112
|
+
logger = get_logger(__name__)
|
|
113
|
+
else:
|
|
114
|
+
logger = get_logger(__name__)
|
|
115
|
+
|
|
116
|
+
# Constants for configuration values
|
|
117
|
+
MAX_TASK_RETRIES = 3
|
|
118
|
+
MAX_PENDING_TASKS_LIMIT = 20
|
|
119
|
+
TASK_TIMEOUT_SECONDS = 600.0
|
|
120
|
+
DEFAULT_WORKER_POOL_SIZE = 10
|
|
58
121
|
|
|
59
122
|
|
|
60
123
|
class WorkforceState(Enum):
|
|
@@ -110,31 +173,33 @@ class Workforce(BaseNode):
|
|
|
110
173
|
children (Optional[List[BaseNode]], optional): List of child nodes
|
|
111
174
|
under this node. Each child node can be a worker node or
|
|
112
175
|
another workforce node. (default: :obj:`None`)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
(default: :obj:`None`
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
available parameters.
|
|
132
|
-
(default: :obj:`None` - creates workers with SearchToolkit,
|
|
133
|
-
CodeExecutionToolkit, and ThinkingToolkit)
|
|
176
|
+
coordinator_agent (Optional[ChatAgent], optional): A custom coordinator
|
|
177
|
+
agent instance for task assignment and worker creation. If
|
|
178
|
+
provided, the workforce will create a new agent using this agent's
|
|
179
|
+
model configuration but with the required system message and
|
|
180
|
+
functionality.
|
|
181
|
+
If None, a default agent will be created using DEFAULT model
|
|
182
|
+
settings. (default: :obj:`None`)
|
|
183
|
+
task_agent (Optional[ChatAgent], optional): A custom task planning
|
|
184
|
+
agent instance for task decomposition and composition. If
|
|
185
|
+
provided, the workforce will create a new agent using this agent's
|
|
186
|
+
model configuration but with the required system message. If None,
|
|
187
|
+
a default agent will be created using DEFAULT model settings.
|
|
188
|
+
(default: :obj:`None`)
|
|
189
|
+
new_worker_agent (Optional[ChatAgent], optional): A template agent for
|
|
190
|
+
workers created dynamically at runtime when existing workers cannot
|
|
191
|
+
handle failed tasks. If None, workers will be created with default
|
|
192
|
+
settings including SearchToolkit, CodeExecutionToolkit, and
|
|
193
|
+
ThinkingToolkit. (default: :obj:`None`)
|
|
134
194
|
graceful_shutdown_timeout (float, optional): The timeout in seconds
|
|
135
195
|
for graceful shutdown when a task fails 3 times. During this
|
|
136
196
|
period, the workforce remains active for debugging.
|
|
137
197
|
Set to 0 for immediate shutdown. (default: :obj:`15.0`)
|
|
198
|
+
task_timeout_seconds (Optional[float], optional): The timeout in
|
|
199
|
+
seconds for waiting for tasks to be returned by workers. If None,
|
|
200
|
+
uses the global TASK_TIMEOUT_SECONDS value (600.0 seconds).
|
|
201
|
+
Increase this value for tasks that require more processing time.
|
|
202
|
+
(default: :obj:`None`)
|
|
138
203
|
share_memory (bool, optional): Whether to enable shared memory across
|
|
139
204
|
SingleAgentWorker instances in the workforce. When enabled, all
|
|
140
205
|
SingleAgentWorker instances, coordinator agent, and task planning
|
|
@@ -144,45 +209,93 @@ class Workforce(BaseNode):
|
|
|
144
209
|
SingleAgentWorker instances; RolePlayingWorker and nested
|
|
145
210
|
Workforce instances do not participate in memory sharing.
|
|
146
211
|
(default: :obj:`False`)
|
|
212
|
+
use_structured_output_handler (bool, optional): Whether to use the
|
|
213
|
+
structured output handler instead of native structured output.
|
|
214
|
+
When enabled, the workforce will use prompts with structured
|
|
215
|
+
output instructions and regex extraction to parse responses.
|
|
216
|
+
This ensures compatibility with agents that don't reliably
|
|
217
|
+
support native structured output. When disabled, the workforce
|
|
218
|
+
uses the native response_format parameter.
|
|
219
|
+
(default: :obj:`True`)
|
|
220
|
+
callbacks (Optional[List[WorkforceCallback]], optional): A list of
|
|
221
|
+
callback handlers to observe and record workforce lifecycle events
|
|
222
|
+
and metrics (e.g., task creation/assignment/start/completion/
|
|
223
|
+
failure, worker creation/deletion, all-tasks-completed). All
|
|
224
|
+
items must be instances of :class:`WorkforceCallback`, otherwise
|
|
225
|
+
a :class:`ValueError` is raised. If none of the provided
|
|
226
|
+
callbacks implement :class:`WorkforceMetrics`, a built-in
|
|
227
|
+
:class:`WorkforceLogger` (implements both callback and metrics)
|
|
228
|
+
is added automatically. If at least one provided callback
|
|
229
|
+
implements :class:`WorkforceMetrics`, no default logger is added.
|
|
230
|
+
(default: :obj:`None`)
|
|
147
231
|
|
|
148
232
|
Example:
|
|
149
|
-
>>> # Configure with custom model and shared memory
|
|
150
233
|
>>> import asyncio
|
|
234
|
+
>>> from camel.agents import ChatAgent
|
|
235
|
+
>>> from camel.models import ModelFactory
|
|
236
|
+
>>> from camel.types import ModelPlatformType, ModelType
|
|
237
|
+
>>> from camel.tasks import Task
|
|
238
|
+
>>>
|
|
239
|
+
>>> # Simple workforce with default agents
|
|
240
|
+
>>> workforce = Workforce("Research Team")
|
|
241
|
+
>>>
|
|
242
|
+
>>> # Workforce with custom model configuration
|
|
151
243
|
>>> model = ModelFactory.create(
|
|
152
|
-
... ModelPlatformType.OPENAI, ModelType.GPT_4O
|
|
244
|
+
... ModelPlatformType.OPENAI, model_type=ModelType.GPT_4O
|
|
153
245
|
... )
|
|
246
|
+
>>> coordinator_agent = ChatAgent(model=model)
|
|
247
|
+
>>> task_agent = ChatAgent(model=model)
|
|
248
|
+
>>>
|
|
154
249
|
>>> workforce = Workforce(
|
|
155
250
|
... "Research Team",
|
|
156
|
-
...
|
|
157
|
-
...
|
|
158
|
-
... share_memory=True # Enable shared memory
|
|
251
|
+
... coordinator_agent=coordinator_agent,
|
|
252
|
+
... task_agent=task_agent,
|
|
159
253
|
... )
|
|
160
254
|
>>>
|
|
161
255
|
>>> # Process a task
|
|
162
256
|
>>> async def main():
|
|
163
257
|
... task = Task(content="Research AI trends", id="1")
|
|
164
|
-
... result = workforce.
|
|
258
|
+
... result = await workforce.process_task_async(task)
|
|
165
259
|
... return result
|
|
166
|
-
>>>
|
|
260
|
+
>>>
|
|
261
|
+
>>> result_task = asyncio.run(main())
|
|
262
|
+
|
|
263
|
+
Note:
|
|
264
|
+
When custom coordinator_agent or task_agent are provided, the workforce
|
|
265
|
+
will preserve the user's system message and append the required
|
|
266
|
+
workforce coordination or task planning instructions to it. This
|
|
267
|
+
ensures both the user's intent is preserved and proper workforce
|
|
268
|
+
functionality is maintained. All other agent configurations (model,
|
|
269
|
+
memory, tools, etc.) will also be preserved.
|
|
167
270
|
"""
|
|
168
271
|
|
|
169
272
|
def __init__(
|
|
170
273
|
self,
|
|
171
274
|
description: str,
|
|
172
275
|
children: Optional[List[BaseNode]] = None,
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
276
|
+
coordinator_agent: Optional[ChatAgent] = None,
|
|
277
|
+
task_agent: Optional[ChatAgent] = None,
|
|
278
|
+
new_worker_agent: Optional[ChatAgent] = None,
|
|
176
279
|
graceful_shutdown_timeout: float = 15.0,
|
|
177
280
|
share_memory: bool = False,
|
|
281
|
+
use_structured_output_handler: bool = True,
|
|
282
|
+
task_timeout_seconds: Optional[float] = None,
|
|
283
|
+
callbacks: Optional[List[WorkforceCallback]] = None,
|
|
178
284
|
) -> None:
|
|
179
285
|
super().__init__(description)
|
|
180
|
-
self._child_listening_tasks: Deque[
|
|
286
|
+
self._child_listening_tasks: Deque[
|
|
287
|
+
Union[asyncio.Task, concurrent.futures.Future]
|
|
288
|
+
] = deque()
|
|
181
289
|
self._children = children or []
|
|
182
|
-
self.
|
|
290
|
+
self.new_worker_agent = new_worker_agent
|
|
183
291
|
self.graceful_shutdown_timeout = graceful_shutdown_timeout
|
|
184
292
|
self.share_memory = share_memory
|
|
185
|
-
self.
|
|
293
|
+
self.use_structured_output_handler = use_structured_output_handler
|
|
294
|
+
self.task_timeout_seconds = (
|
|
295
|
+
task_timeout_seconds or TASK_TIMEOUT_SECONDS
|
|
296
|
+
)
|
|
297
|
+
if self.use_structured_output_handler:
|
|
298
|
+
self.structured_handler = StructuredOutputHandler()
|
|
186
299
|
self._task: Optional[Task] = None
|
|
187
300
|
self._pending_tasks: Deque[Task] = deque()
|
|
188
301
|
self._task_dependencies: Dict[str, List[str]] = {}
|
|
@@ -195,54 +308,158 @@ class Workforce(BaseNode):
|
|
|
195
308
|
self._pause_event = asyncio.Event()
|
|
196
309
|
self._pause_event.set() # Initially not paused
|
|
197
310
|
self._stop_requested = False
|
|
311
|
+
self._skip_requested = False
|
|
198
312
|
self._snapshots: List[WorkforceSnapshot] = []
|
|
199
313
|
self._completed_tasks: List[Task] = []
|
|
200
314
|
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
|
201
315
|
self._main_task_future: Optional[asyncio.Future] = None
|
|
316
|
+
self._cleanup_task: Optional[asyncio.Task] = None
|
|
202
317
|
# Snapshot throttle support
|
|
203
318
|
self._last_snapshot_time: float = 0.0
|
|
204
319
|
# Minimum seconds between automatic snapshots
|
|
205
320
|
self.snapshot_interval: float = 30.0
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
role_or_desc = child.description
|
|
210
|
-
self.metrics_logger.log_worker_created(
|
|
211
|
-
worker_id=child.node_id,
|
|
212
|
-
worker_type=worker_type,
|
|
213
|
-
role=role_or_desc,
|
|
214
|
-
)
|
|
321
|
+
# Shared memory UUID tracking to prevent re-sharing duplicates
|
|
322
|
+
self._shared_memory_uuids: Set[str] = set()
|
|
323
|
+
self._initialize_callbacks(callbacks)
|
|
215
324
|
|
|
216
|
-
#
|
|
217
|
-
|
|
325
|
+
# Set up coordinator agent with default system message
|
|
326
|
+
coord_agent_sys_msg = BaseMessage.make_assistant_message(
|
|
327
|
+
role_name="Workforce Manager",
|
|
328
|
+
content="You are coordinating a group of workers. A worker "
|
|
329
|
+
"can be a group of agents or a single agent. Each worker is "
|
|
330
|
+
"created to solve a specific kind of task. Your job "
|
|
331
|
+
"includes assigning tasks to a existing worker, creating "
|
|
332
|
+
"a new worker for a task, etc.",
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
if coordinator_agent is None:
|
|
218
336
|
logger.warning(
|
|
219
|
-
"No
|
|
337
|
+
"No coordinator_agent provided. Using default "
|
|
220
338
|
"ChatAgent settings (ModelPlatformType.DEFAULT, "
|
|
221
|
-
"ModelType.DEFAULT)
|
|
222
|
-
"that assigns tasks and handles failures, pass a dictionary "
|
|
223
|
-
"with ChatAgent parameters, e.g.: {'model': your_model, "
|
|
224
|
-
"'tools': your_tools, 'token_limit': 8000}. See ChatAgent "
|
|
225
|
-
"documentation for all available options."
|
|
339
|
+
"ModelType.DEFAULT) with default system message."
|
|
226
340
|
)
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
"
|
|
231
|
-
"
|
|
232
|
-
"
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
341
|
+
self.coordinator_agent = ChatAgent(coord_agent_sys_msg)
|
|
342
|
+
else:
|
|
343
|
+
logger.info(
|
|
344
|
+
"Custom coordinator_agent provided. Preserving user's "
|
|
345
|
+
"system message and appending workforce coordination "
|
|
346
|
+
"instructions to ensure proper functionality."
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
if coordinator_agent.system_message is not None:
|
|
350
|
+
user_sys_msg_content = coordinator_agent.system_message.content
|
|
351
|
+
combined_content = (
|
|
352
|
+
f"{user_sys_msg_content}\n\n{coord_agent_sys_msg.content}"
|
|
353
|
+
)
|
|
354
|
+
combined_sys_msg = BaseMessage.make_assistant_message(
|
|
355
|
+
role_name=coordinator_agent.system_message.role_name,
|
|
356
|
+
content=combined_content,
|
|
357
|
+
)
|
|
358
|
+
else:
|
|
359
|
+
combined_sys_msg = coord_agent_sys_msg
|
|
360
|
+
|
|
361
|
+
# Create a new agent with the provided agent's configuration
|
|
362
|
+
# but with the combined system message
|
|
363
|
+
self.coordinator_agent = ChatAgent(
|
|
364
|
+
system_message=combined_sys_msg,
|
|
365
|
+
model=coordinator_agent.model_backend,
|
|
366
|
+
memory=coordinator_agent.memory,
|
|
367
|
+
message_window_size=getattr(
|
|
368
|
+
coordinator_agent.memory, "window_size", None
|
|
369
|
+
),
|
|
370
|
+
token_limit=getattr(
|
|
371
|
+
coordinator_agent.memory.get_context_creator(),
|
|
372
|
+
"token_limit",
|
|
373
|
+
None,
|
|
374
|
+
),
|
|
375
|
+
output_language=coordinator_agent.output_language,
|
|
376
|
+
tools=list(coordinator_agent._internal_tools.values()),
|
|
377
|
+
external_tools=[
|
|
378
|
+
schema
|
|
379
|
+
for schema in coordinator_agent._external_tool_schemas.values() # noqa: E501
|
|
380
|
+
],
|
|
381
|
+
response_terminators=coordinator_agent.response_terminators,
|
|
382
|
+
max_iteration=coordinator_agent.max_iteration,
|
|
383
|
+
stop_event=coordinator_agent.stop_event,
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# Set up task agent with default system message
|
|
387
|
+
task_sys_msg = BaseMessage.make_assistant_message(
|
|
388
|
+
role_name="Task Planner",
|
|
389
|
+
content=TASK_AGENT_SYSTEM_MESSAGE,
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
if task_agent is None:
|
|
238
393
|
logger.warning(
|
|
239
|
-
"No
|
|
240
|
-
"
|
|
241
|
-
"
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
394
|
+
"No task_agent provided. Using default ChatAgent "
|
|
395
|
+
"settings (ModelPlatformType.DEFAULT, ModelType.DEFAULT) "
|
|
396
|
+
"with default system message."
|
|
397
|
+
)
|
|
398
|
+
self.task_agent = ChatAgent(
|
|
399
|
+
task_sys_msg,
|
|
400
|
+
)
|
|
401
|
+
else:
|
|
402
|
+
logger.info(
|
|
403
|
+
"Custom task_agent provided. Preserving user's "
|
|
404
|
+
"system message and appending task planning "
|
|
405
|
+
"instructions to ensure proper functionality."
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
if task_agent.system_message is not None:
|
|
409
|
+
user_task_sys_msg_content = task_agent.system_message.content
|
|
410
|
+
combined_task_content = (
|
|
411
|
+
f"{user_task_sys_msg_content}\n\n{task_sys_msg.content}"
|
|
412
|
+
)
|
|
413
|
+
combined_task_sys_msg = BaseMessage.make_assistant_message(
|
|
414
|
+
role_name=task_agent.system_message.role_name,
|
|
415
|
+
content=combined_task_content,
|
|
416
|
+
)
|
|
417
|
+
else:
|
|
418
|
+
combined_task_sys_msg = task_sys_msg
|
|
419
|
+
|
|
420
|
+
# Since ChatAgent constructor uses a dictionary with
|
|
421
|
+
# function names as keys, we don't need to manually deduplicate.
|
|
422
|
+
combined_tools: List[Union[FunctionTool, Callable]] = cast(
|
|
423
|
+
List[Union[FunctionTool, Callable]],
|
|
424
|
+
list(task_agent._internal_tools.values()),
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# Create a new agent with the provided agent's configuration
|
|
428
|
+
# but with the combined system message and tools
|
|
429
|
+
self.task_agent = ChatAgent(
|
|
430
|
+
system_message=combined_task_sys_msg,
|
|
431
|
+
model=task_agent.model_backend,
|
|
432
|
+
memory=task_agent.memory,
|
|
433
|
+
message_window_size=getattr(
|
|
434
|
+
task_agent.memory, "window_size", None
|
|
435
|
+
),
|
|
436
|
+
token_limit=getattr(
|
|
437
|
+
task_agent.memory.get_context_creator(),
|
|
438
|
+
"token_limit",
|
|
439
|
+
None,
|
|
440
|
+
),
|
|
441
|
+
output_language=task_agent.output_language,
|
|
442
|
+
tools=combined_tools,
|
|
443
|
+
external_tools=[
|
|
444
|
+
schema
|
|
445
|
+
for schema in task_agent._external_tool_schemas.values()
|
|
446
|
+
],
|
|
447
|
+
response_terminators=task_agent.response_terminators,
|
|
448
|
+
max_iteration=task_agent.max_iteration,
|
|
449
|
+
stop_event=task_agent.stop_event,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
if new_worker_agent is None:
|
|
453
|
+
logger.info(
|
|
454
|
+
"No new_worker_agent provided. Workers created at runtime "
|
|
455
|
+
"will use default ChatAgent settings with SearchToolkit, "
|
|
456
|
+
"CodeExecutionToolkit, and ThinkingToolkit. To customize "
|
|
457
|
+
"runtime worker creation, pass a ChatAgent instance."
|
|
458
|
+
)
|
|
459
|
+
else:
|
|
460
|
+
# Validate new_worker_agent if provided
|
|
461
|
+
self._validate_agent_compatibility(
|
|
462
|
+
new_worker_agent, "new_worker_agent"
|
|
246
463
|
)
|
|
247
464
|
|
|
248
465
|
if self.share_memory:
|
|
@@ -252,32 +469,153 @@ class Workforce(BaseNode):
|
|
|
252
469
|
"better context continuity during task handoffs."
|
|
253
470
|
)
|
|
254
471
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
472
|
+
# Shared context utility for workflow management (created lazily)
|
|
473
|
+
self._shared_context_utility: Optional["ContextUtility"] = None
|
|
474
|
+
|
|
475
|
+
# ------------------------------------------------------------------
|
|
476
|
+
# Helper for propagating pause control to externally supplied agents
|
|
477
|
+
# ------------------------------------------------------------------
|
|
478
|
+
|
|
479
|
+
def _initialize_callbacks(
|
|
480
|
+
self, callbacks: Optional[List[WorkforceCallback]]
|
|
481
|
+
) -> None:
|
|
482
|
+
r"""Validate, register, and prime workforce callbacks."""
|
|
483
|
+
self._callbacks: List[WorkforceCallback] = []
|
|
484
|
+
|
|
485
|
+
if callbacks:
|
|
486
|
+
for cb in callbacks:
|
|
487
|
+
if isinstance(cb, WorkforceCallback):
|
|
488
|
+
self._callbacks.append(cb)
|
|
489
|
+
else:
|
|
490
|
+
raise ValueError(
|
|
491
|
+
"All callbacks must be instances of WorkforceCallback"
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
has_metrics_callback = any(
|
|
495
|
+
isinstance(cb, WorkforceMetrics) for cb in self._callbacks
|
|
262
496
|
)
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
497
|
+
|
|
498
|
+
if not has_metrics_callback:
|
|
499
|
+
self._callbacks.append(WorkforceLogger(workforce_id=self.node_id))
|
|
500
|
+
else:
|
|
501
|
+
logger.info(
|
|
502
|
+
"WorkforceMetrics implementation detected. Skipping default "
|
|
503
|
+
"WorkforceLogger addition."
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
for child in self._children:
|
|
507
|
+
self._notify_worker_created(child)
|
|
508
|
+
|
|
509
|
+
def _notify_worker_created(
|
|
510
|
+
self,
|
|
511
|
+
worker_node: BaseNode,
|
|
512
|
+
*,
|
|
513
|
+
worker_type: Optional[str] = None,
|
|
514
|
+
role: Optional[str] = None,
|
|
515
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
516
|
+
) -> None:
|
|
517
|
+
r"""Emit a worker-created event to all registered callbacks."""
|
|
518
|
+
event = WorkerCreatedEvent(
|
|
519
|
+
worker_id=worker_node.node_id,
|
|
520
|
+
worker_type=worker_type or type(worker_node).__name__,
|
|
521
|
+
role=role or worker_node.description,
|
|
522
|
+
metadata=metadata,
|
|
266
523
|
)
|
|
524
|
+
for cb in self._callbacks:
|
|
525
|
+
cb.log_worker_created(event)
|
|
267
526
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
527
|
+
def _get_or_create_shared_context_utility(
|
|
528
|
+
self,
|
|
529
|
+
session_id: Optional[str] = None,
|
|
530
|
+
) -> "ContextUtility":
|
|
531
|
+
r"""Get or create the shared context utility for workflow management.
|
|
532
|
+
|
|
533
|
+
This method creates the context utility only when needed, avoiding
|
|
534
|
+
unnecessary session folder creation during initialization.
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
session_id (Optional[str]): Custom session ID to use. If None,
|
|
538
|
+
auto-generates a timestamped session ID. (default: :obj:`None`)
|
|
539
|
+
|
|
540
|
+
Returns:
|
|
541
|
+
ContextUtility: The shared context utility instance.
|
|
542
|
+
"""
|
|
543
|
+
if self._shared_context_utility is None:
|
|
544
|
+
from camel.utils.context_utils import ContextUtility
|
|
545
|
+
|
|
546
|
+
self._shared_context_utility = ContextUtility.get_workforce_shared(
|
|
547
|
+
session_id=session_id
|
|
548
|
+
)
|
|
549
|
+
return self._shared_context_utility
|
|
550
|
+
|
|
551
|
+
def _validate_agent_compatibility(
|
|
552
|
+
self, agent: ChatAgent, agent_context: str = "agent"
|
|
553
|
+
) -> None:
|
|
554
|
+
r"""Validate that agent configuration is compatible with workforce
|
|
555
|
+
settings.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
agent (ChatAgent): The agent to validate.
|
|
559
|
+
agent_context (str): Context description for error messages.
|
|
560
|
+
|
|
561
|
+
Raises:
|
|
562
|
+
ValueError: If agent has tools and stream mode enabled but
|
|
563
|
+
use_structured_output_handler is False.
|
|
564
|
+
"""
|
|
565
|
+
agent_has_tools = (
|
|
566
|
+
bool(agent.tool_dict) if hasattr(agent, 'tool_dict') else False
|
|
567
|
+
)
|
|
568
|
+
agent_stream_mode = (
|
|
569
|
+
getattr(agent.model_backend, 'stream', False)
|
|
570
|
+
if hasattr(agent, 'model_backend')
|
|
571
|
+
else False
|
|
276
572
|
)
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
573
|
+
|
|
574
|
+
if (
|
|
575
|
+
agent_has_tools
|
|
576
|
+
and agent_stream_mode
|
|
577
|
+
and not self.use_structured_output_handler
|
|
578
|
+
):
|
|
579
|
+
raise ValueError(
|
|
580
|
+
f"{agent_context} has tools and stream mode enabled, but "
|
|
581
|
+
"use_structured_output_handler is False. Native structured "
|
|
582
|
+
"output doesn't work with tool calls in stream mode. "
|
|
583
|
+
"Please set use_structured_output_handler=True when creating "
|
|
584
|
+
"the Workforce."
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
# ------------------------------------------------------------------
|
|
588
|
+
# Helper for propagating pause control to externally supplied agents
|
|
589
|
+
# ------------------------------------------------------------------
|
|
590
|
+
def _attach_pause_event_to_agent(self, agent: ChatAgent) -> None:
|
|
591
|
+
r"""Ensure the given ChatAgent shares this workforce's pause_event.
|
|
592
|
+
|
|
593
|
+
If the agent already has a different pause_event we overwrite it and
|
|
594
|
+
emit a debug log (it is unlikely an agent needs multiple independent
|
|
595
|
+
pause controls once managed by this workforce)."""
|
|
596
|
+
try:
|
|
597
|
+
existing_pause_event = getattr(agent, "pause_event", None)
|
|
598
|
+
if existing_pause_event is not self._pause_event:
|
|
599
|
+
if existing_pause_event is not None:
|
|
600
|
+
logger.debug(
|
|
601
|
+
f"Overriding pause_event for agent {agent.agent_id} "
|
|
602
|
+
f"(had different pause_event: "
|
|
603
|
+
f"{id(existing_pause_event)} "
|
|
604
|
+
f"-> {id(self._pause_event)})"
|
|
605
|
+
)
|
|
606
|
+
agent.pause_event = self._pause_event
|
|
607
|
+
except AttributeError:
|
|
608
|
+
# Should not happen, but guard against unexpected objects
|
|
609
|
+
logger.warning(
|
|
610
|
+
f"Cannot attach pause_event to object {type(agent)} - "
|
|
611
|
+
f"missing pause_event attribute"
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
def _ensure_pause_event_in_kwargs(self, kwargs: Optional[Dict]) -> Dict:
|
|
615
|
+
r"""Insert pause_event into kwargs dict for ChatAgent construction."""
|
|
616
|
+
new_kwargs = dict(kwargs) if kwargs else {}
|
|
617
|
+
new_kwargs.setdefault("pause_event", self._pause_event)
|
|
618
|
+
return new_kwargs
|
|
281
619
|
|
|
282
620
|
def __repr__(self):
|
|
283
621
|
return (
|
|
@@ -368,16 +706,35 @@ class Workforce(BaseNode):
|
|
|
368
706
|
continue
|
|
369
707
|
|
|
370
708
|
if not memory_records:
|
|
709
|
+
logger.warning(
|
|
710
|
+
"No valid memory records could be reconstructed "
|
|
711
|
+
"for sharing"
|
|
712
|
+
)
|
|
371
713
|
return
|
|
372
714
|
|
|
373
|
-
#
|
|
715
|
+
# Filter out already-shared records to prevent re-sharing
|
|
716
|
+
# This prevents exponential growth of duplicate records
|
|
717
|
+
new_records = []
|
|
374
718
|
for record in memory_records:
|
|
719
|
+
record_uuid = str(record.uuid)
|
|
720
|
+
if record_uuid not in self._shared_memory_uuids:
|
|
721
|
+
new_records.append(record)
|
|
722
|
+
self._shared_memory_uuids.add(record_uuid)
|
|
723
|
+
|
|
724
|
+
if not new_records:
|
|
725
|
+
logger.debug(
|
|
726
|
+
"No new records to share (all were already shared)"
|
|
727
|
+
)
|
|
728
|
+
return
|
|
729
|
+
|
|
730
|
+
# Share with coordinator agent
|
|
731
|
+
for record in new_records:
|
|
375
732
|
# Only add records from other agents to avoid duplication
|
|
376
733
|
if record.agent_id != self.coordinator_agent.agent_id:
|
|
377
734
|
self.coordinator_agent.memory.write_record(record)
|
|
378
735
|
|
|
379
736
|
# Share with task agent
|
|
380
|
-
for record in
|
|
737
|
+
for record in new_records:
|
|
381
738
|
if record.agent_id != self.task_agent.agent_id:
|
|
382
739
|
self.task_agent.memory.write_record(record)
|
|
383
740
|
|
|
@@ -389,12 +746,12 @@ class Workforce(BaseNode):
|
|
|
389
746
|
]
|
|
390
747
|
|
|
391
748
|
for worker in single_agent_workers:
|
|
392
|
-
for record in
|
|
749
|
+
for record in new_records:
|
|
393
750
|
if record.agent_id != worker.worker.agent_id:
|
|
394
751
|
worker.worker.memory.write_record(record)
|
|
395
752
|
|
|
396
753
|
logger.info(
|
|
397
|
-
f"Shared {len(
|
|
754
|
+
f"Shared {len(new_records)} new memory records across "
|
|
398
755
|
f"{len(single_agent_workers) + 2} agents in workforce "
|
|
399
756
|
f"{self.node_id}"
|
|
400
757
|
)
|
|
@@ -413,25 +770,473 @@ class Workforce(BaseNode):
|
|
|
413
770
|
except Exception as e:
|
|
414
771
|
logger.warning(f"Error synchronizing shared memory: {e}")
|
|
415
772
|
|
|
416
|
-
def
|
|
773
|
+
def _update_dependencies_for_decomposition(
|
|
774
|
+
self, original_task: Task, subtasks: List[Task]
|
|
775
|
+
) -> None:
|
|
776
|
+
r"""Update dependency tracking when a task is decomposed into subtasks.
|
|
777
|
+
Tasks that depended on the original task should now depend on all
|
|
778
|
+
subtasks. The last subtask inherits the original task's dependencies.
|
|
779
|
+
"""
|
|
780
|
+
if not subtasks:
|
|
781
|
+
return
|
|
782
|
+
|
|
783
|
+
original_task_id = original_task.id
|
|
784
|
+
subtask_ids = [subtask.id for subtask in subtasks]
|
|
785
|
+
|
|
786
|
+
# Find tasks that depend on the original task
|
|
787
|
+
dependent_task_ids = [
|
|
788
|
+
task_id
|
|
789
|
+
for task_id, deps in self._task_dependencies.items()
|
|
790
|
+
if original_task_id in deps
|
|
791
|
+
]
|
|
792
|
+
|
|
793
|
+
# Update dependent tasks to depend on all subtasks
|
|
794
|
+
for task_id in dependent_task_ids:
|
|
795
|
+
dependencies = self._task_dependencies[task_id]
|
|
796
|
+
dependencies.remove(original_task_id)
|
|
797
|
+
dependencies.extend(subtask_ids)
|
|
798
|
+
|
|
799
|
+
# The last subtask inherits original task's dependencies (if any)
|
|
800
|
+
if original_task_id in self._task_dependencies:
|
|
801
|
+
original_dependencies = self._task_dependencies[original_task_id]
|
|
802
|
+
if original_dependencies:
|
|
803
|
+
# Set dependencies for the last subtask to maintain execution
|
|
804
|
+
# order
|
|
805
|
+
self._task_dependencies[subtask_ids[-1]] = (
|
|
806
|
+
original_dependencies.copy()
|
|
807
|
+
)
|
|
808
|
+
# Remove original task dependencies as it's now decomposed
|
|
809
|
+
del self._task_dependencies[original_task_id]
|
|
810
|
+
|
|
811
|
+
def _increment_in_flight_tasks(self, task_id: str) -> None:
|
|
812
|
+
r"""Safely increment the in-flight tasks counter with logging."""
|
|
813
|
+
self._in_flight_tasks += 1
|
|
814
|
+
logger.debug(
|
|
815
|
+
f"Incremented in-flight tasks for {task_id}. "
|
|
816
|
+
f"Count: {self._in_flight_tasks}"
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
def _decrement_in_flight_tasks(
|
|
820
|
+
self, task_id: str, context: str = ""
|
|
821
|
+
) -> None:
|
|
822
|
+
r"""Safely decrement the in-flight tasks counter with safety checks."""
|
|
823
|
+
if self._in_flight_tasks > 0:
|
|
824
|
+
self._in_flight_tasks -= 1
|
|
825
|
+
logger.debug(
|
|
826
|
+
f"Decremented in-flight tasks for {task_id} ({context}). "
|
|
827
|
+
f"Count: {self._in_flight_tasks}"
|
|
828
|
+
)
|
|
829
|
+
else:
|
|
830
|
+
logger.debug(
|
|
831
|
+
f"Attempted to decrement in-flight tasks for {task_id} "
|
|
832
|
+
f"({context}) but counter is already 0. "
|
|
833
|
+
f"Counter: {self._in_flight_tasks}"
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
def _cleanup_task_tracking(self, task_id: str) -> None:
|
|
837
|
+
r"""Clean up tracking data for a task to prevent memory leaks.
|
|
838
|
+
|
|
839
|
+
Args:
|
|
840
|
+
task_id (str): The ID of the task to clean up.
|
|
841
|
+
"""
|
|
842
|
+
if task_id in self._task_start_times:
|
|
843
|
+
del self._task_start_times[task_id]
|
|
844
|
+
|
|
845
|
+
if task_id in self._task_dependencies:
|
|
846
|
+
del self._task_dependencies[task_id]
|
|
847
|
+
|
|
848
|
+
if task_id in self._assignees:
|
|
849
|
+
del self._assignees[task_id]
|
|
850
|
+
|
|
851
|
+
def _decompose_task(
|
|
852
|
+
self, task: Task
|
|
853
|
+
) -> Union[List[Task], Generator[List[Task], None, None]]:
|
|
417
854
|
r"""Decompose the task into subtasks. This method will also set the
|
|
418
855
|
relationship between the task and its subtasks.
|
|
419
856
|
|
|
420
857
|
Returns:
|
|
421
|
-
List[Task]
|
|
858
|
+
Union[List[Task], Generator[List[Task], None, None]]:
|
|
859
|
+
The subtasks or generator of subtasks.
|
|
422
860
|
"""
|
|
423
|
-
decompose_prompt =
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
861
|
+
decompose_prompt = str(
|
|
862
|
+
TASK_DECOMPOSE_PROMPT.format(
|
|
863
|
+
content=task.content,
|
|
864
|
+
child_nodes_info=self._get_child_nodes_info(),
|
|
865
|
+
additional_info=task.additional_info,
|
|
866
|
+
)
|
|
427
867
|
)
|
|
428
868
|
self.task_agent.reset()
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
869
|
+
result = task.decompose(self.task_agent, decompose_prompt)
|
|
870
|
+
|
|
871
|
+
# Handle both streaming and non-streaming results
|
|
872
|
+
if isinstance(result, Generator):
|
|
873
|
+
# This is a generator (streaming mode)
|
|
874
|
+
def streaming_with_dependencies():
|
|
875
|
+
all_subtasks = []
|
|
876
|
+
for new_tasks in result:
|
|
877
|
+
all_subtasks.extend(new_tasks)
|
|
878
|
+
# Update dependency tracking for each batch of new tasks
|
|
879
|
+
if new_tasks:
|
|
880
|
+
self._update_dependencies_for_decomposition(
|
|
881
|
+
task, all_subtasks
|
|
882
|
+
)
|
|
883
|
+
yield new_tasks
|
|
433
884
|
|
|
434
|
-
|
|
885
|
+
return streaming_with_dependencies()
|
|
886
|
+
else:
|
|
887
|
+
# This is a regular list (non-streaming mode)
|
|
888
|
+
subtasks = result
|
|
889
|
+
# Update dependency tracking for decomposed task
|
|
890
|
+
if subtasks:
|
|
891
|
+
self._update_dependencies_for_decomposition(task, subtasks)
|
|
892
|
+
return subtasks
|
|
893
|
+
|
|
894
|
+
def _analyze_task(
|
|
895
|
+
self,
|
|
896
|
+
task: Task,
|
|
897
|
+
*,
|
|
898
|
+
for_failure: bool,
|
|
899
|
+
error_message: Optional[str] = None,
|
|
900
|
+
) -> TaskAnalysisResult:
|
|
901
|
+
r"""Unified task analysis for both failures and quality evaluation.
|
|
902
|
+
|
|
903
|
+
This method consolidates the logic for analyzing task failures and
|
|
904
|
+
evaluating task quality, using the unified TASK_ANALYSIS_PROMPT.
|
|
905
|
+
|
|
906
|
+
Args:
|
|
907
|
+
task (Task): The task to analyze
|
|
908
|
+
for_failure (bool): True for failure analysis, False for quality
|
|
909
|
+
evaluation
|
|
910
|
+
error_message (Optional[str]): Error message, required when
|
|
911
|
+
for_failure=True
|
|
912
|
+
|
|
913
|
+
Returns:
|
|
914
|
+
TaskAnalysisResult: Unified analysis result with recovery strategy
|
|
915
|
+
and optional quality metrics
|
|
916
|
+
|
|
917
|
+
Raises:
|
|
918
|
+
ValueError: If for_failure=True but error_message is None
|
|
919
|
+
"""
|
|
920
|
+
# Validate required parameters
|
|
921
|
+
if for_failure and error_message is None:
|
|
922
|
+
raise ValueError("error_message is required when for_failure=True")
|
|
923
|
+
|
|
924
|
+
# Determine task result and issue-specific analysis based on context
|
|
925
|
+
if for_failure:
|
|
926
|
+
task_result = "N/A (task failed)"
|
|
927
|
+
issue_type = "Task Failure"
|
|
928
|
+
issue_analysis = f"**Error Message:** {error_message}"
|
|
929
|
+
response_format = FAILURE_ANALYSIS_RESPONSE_FORMAT
|
|
930
|
+
result_schema = TaskAnalysisResult
|
|
931
|
+
fallback_values: Dict[str, Any] = {
|
|
932
|
+
"reasoning": "Defaulting to retry due to parsing error",
|
|
933
|
+
"recovery_strategy": RecoveryStrategy.RETRY,
|
|
934
|
+
"modified_task_content": None,
|
|
935
|
+
"issues": [error_message] if error_message else [],
|
|
936
|
+
}
|
|
937
|
+
examples: List[Dict[str, Any]] = [
|
|
938
|
+
{
|
|
939
|
+
"reasoning": "Temporary network error, worth retrying",
|
|
940
|
+
"recovery_strategy": "retry",
|
|
941
|
+
"modified_task_content": None,
|
|
942
|
+
"issues": ["Network timeout"],
|
|
943
|
+
}
|
|
944
|
+
]
|
|
945
|
+
else:
|
|
946
|
+
# Quality evaluation
|
|
947
|
+
task_result = task.result or "No result available"
|
|
948
|
+
issue_type = "Quality Evaluation"
|
|
949
|
+
issue_analysis = (
|
|
950
|
+
"Provide a quality score (0-100) and list any specific "
|
|
951
|
+
"issues found."
|
|
952
|
+
)
|
|
953
|
+
response_format = QUALITY_EVALUATION_RESPONSE_FORMAT
|
|
954
|
+
result_schema = TaskAnalysisResult
|
|
955
|
+
fallback_values = {
|
|
956
|
+
"reasoning": (
|
|
957
|
+
"Defaulting to acceptable quality due to parsing error"
|
|
958
|
+
),
|
|
959
|
+
"issues": [],
|
|
960
|
+
"recovery_strategy": None,
|
|
961
|
+
"modified_task_content": None,
|
|
962
|
+
"quality_score": 80,
|
|
963
|
+
}
|
|
964
|
+
examples = [
|
|
965
|
+
{
|
|
966
|
+
"reasoning": (
|
|
967
|
+
"Excellent implementation with comprehensive tests"
|
|
968
|
+
),
|
|
969
|
+
"issues": [],
|
|
970
|
+
"recovery_strategy": None,
|
|
971
|
+
"modified_task_content": None,
|
|
972
|
+
"quality_score": 98,
|
|
973
|
+
},
|
|
974
|
+
{
|
|
975
|
+
"reasoning": (
|
|
976
|
+
"Implementation incomplete with missing features"
|
|
977
|
+
),
|
|
978
|
+
"issues": [
|
|
979
|
+
"Incomplete implementation",
|
|
980
|
+
"Missing error handling",
|
|
981
|
+
],
|
|
982
|
+
"recovery_strategy": "replan",
|
|
983
|
+
"modified_task_content": (
|
|
984
|
+
"Previous attempt was incomplete. "
|
|
985
|
+
"Please implement with: 1) Full feature "
|
|
986
|
+
"coverage, 2) Proper error handling"
|
|
987
|
+
),
|
|
988
|
+
"quality_score": 45,
|
|
989
|
+
},
|
|
990
|
+
]
|
|
991
|
+
|
|
992
|
+
# Format the unified analysis prompt
|
|
993
|
+
analysis_prompt = str(
|
|
994
|
+
TASK_ANALYSIS_PROMPT.format(
|
|
995
|
+
task_id=task.id,
|
|
996
|
+
task_content=task.content,
|
|
997
|
+
task_result=task_result,
|
|
998
|
+
failure_count=task.failure_count,
|
|
999
|
+
task_depth=task.get_depth(),
|
|
1000
|
+
assigned_worker=task.assigned_worker_id or "unknown",
|
|
1001
|
+
issue_type=issue_type,
|
|
1002
|
+
issue_specific_analysis=issue_analysis,
|
|
1003
|
+
response_format=response_format,
|
|
1004
|
+
)
|
|
1005
|
+
)
|
|
1006
|
+
|
|
1007
|
+
try:
|
|
1008
|
+
if self.use_structured_output_handler:
|
|
1009
|
+
enhanced_prompt = (
|
|
1010
|
+
self.structured_handler.generate_structured_prompt(
|
|
1011
|
+
base_prompt=analysis_prompt,
|
|
1012
|
+
schema=result_schema,
|
|
1013
|
+
examples=examples,
|
|
1014
|
+
)
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
self.task_agent.reset()
|
|
1018
|
+
response = self.task_agent.step(enhanced_prompt)
|
|
1019
|
+
|
|
1020
|
+
result = self.structured_handler.parse_structured_response(
|
|
1021
|
+
response.msg.content if response.msg else "",
|
|
1022
|
+
schema=result_schema,
|
|
1023
|
+
fallback_values=fallback_values,
|
|
1024
|
+
)
|
|
1025
|
+
|
|
1026
|
+
if isinstance(result, TaskAnalysisResult):
|
|
1027
|
+
return result
|
|
1028
|
+
elif isinstance(result, dict):
|
|
1029
|
+
return result_schema(**result)
|
|
1030
|
+
else:
|
|
1031
|
+
# Fallback based on context
|
|
1032
|
+
return TaskAnalysisResult(**fallback_values)
|
|
1033
|
+
else:
|
|
1034
|
+
self.task_agent.reset()
|
|
1035
|
+
response = self.task_agent.step(
|
|
1036
|
+
analysis_prompt, response_format=result_schema
|
|
1037
|
+
)
|
|
1038
|
+
return response.msg.parsed
|
|
1039
|
+
|
|
1040
|
+
except Exception as e:
|
|
1041
|
+
logger.warning(
|
|
1042
|
+
f"Error during task analysis "
|
|
1043
|
+
f"({'failure' if for_failure else 'quality'}): {e}, "
|
|
1044
|
+
f"using fallback"
|
|
1045
|
+
)
|
|
1046
|
+
return TaskAnalysisResult(**fallback_values)
|
|
1047
|
+
|
|
1048
|
+
async def _apply_recovery_strategy(
|
|
1049
|
+
self,
|
|
1050
|
+
task: Task,
|
|
1051
|
+
recovery_decision: TaskAnalysisResult,
|
|
1052
|
+
) -> bool:
|
|
1053
|
+
r"""Apply the recovery strategy from a task analysis result.
|
|
1054
|
+
|
|
1055
|
+
This method centralizes the recovery logic for both execution failures
|
|
1056
|
+
and quality-based failures.
|
|
1057
|
+
|
|
1058
|
+
Args:
|
|
1059
|
+
task (Task): The task that needs recovery
|
|
1060
|
+
recovery_decision (TaskAnalysisResult): The analysis result with
|
|
1061
|
+
recovery strategy
|
|
1062
|
+
|
|
1063
|
+
Returns:
|
|
1064
|
+
bool: True if workforce should halt (e.g., decompose needs
|
|
1065
|
+
different handling), False otherwise
|
|
1066
|
+
"""
|
|
1067
|
+
strategy = (
|
|
1068
|
+
recovery_decision.recovery_strategy or RecoveryStrategy.RETRY
|
|
1069
|
+
)
|
|
1070
|
+
action_taken = ""
|
|
1071
|
+
|
|
1072
|
+
try:
|
|
1073
|
+
if strategy == RecoveryStrategy.RETRY:
|
|
1074
|
+
# Simply retry the task by reposting it to the same worker
|
|
1075
|
+
# Check both _assignees dict and task.assigned_worker_id
|
|
1076
|
+
assignee_id = (
|
|
1077
|
+
self._assignees.get(task.id) or task.assigned_worker_id
|
|
1078
|
+
)
|
|
1079
|
+
|
|
1080
|
+
if assignee_id:
|
|
1081
|
+
# Retry with the same worker - no coordinator call needed
|
|
1082
|
+
await self._post_task(task, assignee_id)
|
|
1083
|
+
action_taken = f"retried with same worker {assignee_id}"
|
|
1084
|
+
logger.info(
|
|
1085
|
+
f"Task {task.id} retrying with same worker "
|
|
1086
|
+
f"{assignee_id} (no coordinator call)"
|
|
1087
|
+
)
|
|
1088
|
+
else:
|
|
1089
|
+
# No previous assignment exists - find a new assignee
|
|
1090
|
+
logger.info(
|
|
1091
|
+
f"Task {task.id} has no previous assignee, "
|
|
1092
|
+
f"calling coordinator"
|
|
1093
|
+
)
|
|
1094
|
+
batch_result = await self._find_assignee([task])
|
|
1095
|
+
assignment = batch_result.assignments[0]
|
|
1096
|
+
self._assignees[task.id] = assignment.assignee_id
|
|
1097
|
+
await self._post_task(task, assignment.assignee_id)
|
|
1098
|
+
action_taken = (
|
|
1099
|
+
f"retried with new worker {assignment.assignee_id}"
|
|
1100
|
+
)
|
|
1101
|
+
|
|
1102
|
+
elif strategy == RecoveryStrategy.REPLAN:
|
|
1103
|
+
# Modify the task content and retry
|
|
1104
|
+
if recovery_decision.modified_task_content:
|
|
1105
|
+
task.content = recovery_decision.modified_task_content
|
|
1106
|
+
logger.info(f"Task {task.id} content modified for replan")
|
|
1107
|
+
|
|
1108
|
+
# Repost the modified task
|
|
1109
|
+
if task.id in self._assignees:
|
|
1110
|
+
assignee_id = self._assignees[task.id]
|
|
1111
|
+
await self._post_task(task, assignee_id)
|
|
1112
|
+
action_taken = (
|
|
1113
|
+
f"replanned and retried with worker {assignee_id}"
|
|
1114
|
+
)
|
|
1115
|
+
else:
|
|
1116
|
+
# Find a new assignee for the replanned task
|
|
1117
|
+
batch_result = await self._find_assignee([task])
|
|
1118
|
+
assignment = batch_result.assignments[0]
|
|
1119
|
+
self._assignees[task.id] = assignment.assignee_id
|
|
1120
|
+
await self._post_task(task, assignment.assignee_id)
|
|
1121
|
+
action_taken = (
|
|
1122
|
+
f"replanned and assigned to "
|
|
1123
|
+
f"worker {assignment.assignee_id}"
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
elif strategy == RecoveryStrategy.REASSIGN:
|
|
1127
|
+
# Reassign to a different worker
|
|
1128
|
+
old_worker = task.assigned_worker_id
|
|
1129
|
+
logger.info(
|
|
1130
|
+
f"Task {task.id} will be reassigned from worker "
|
|
1131
|
+
f"{old_worker}"
|
|
1132
|
+
)
|
|
1133
|
+
|
|
1134
|
+
# Find a different worker
|
|
1135
|
+
batch_result = await self._find_assignee([task])
|
|
1136
|
+
assignment = batch_result.assignments[0]
|
|
1137
|
+
new_worker = assignment.assignee_id
|
|
1138
|
+
|
|
1139
|
+
# If same worker, force find another
|
|
1140
|
+
if new_worker == old_worker and len(self._children) > 1:
|
|
1141
|
+
logger.info("Same worker selected, finding alternative")
|
|
1142
|
+
# Try to find different worker by adding note to
|
|
1143
|
+
# task content
|
|
1144
|
+
task.content = (
|
|
1145
|
+
f"{task.content}\n\n"
|
|
1146
|
+
f"Note: Previous worker {old_worker} had quality "
|
|
1147
|
+
f"issues. Needs different approach."
|
|
1148
|
+
)
|
|
1149
|
+
batch_result = await self._find_assignee([task])
|
|
1150
|
+
assignment = batch_result.assignments[0]
|
|
1151
|
+
new_worker = assignment.assignee_id
|
|
1152
|
+
|
|
1153
|
+
self._assignees[task.id] = new_worker
|
|
1154
|
+
await self._post_task(task, new_worker)
|
|
1155
|
+
action_taken = f"reassigned from {old_worker} to {new_worker}"
|
|
1156
|
+
logger.info(
|
|
1157
|
+
f"Task {task.id} reassigned from {old_worker} to "
|
|
1158
|
+
f"{new_worker}"
|
|
1159
|
+
)
|
|
1160
|
+
|
|
1161
|
+
elif strategy == RecoveryStrategy.DECOMPOSE:
|
|
1162
|
+
# Decompose the task into subtasks
|
|
1163
|
+
reason = (
|
|
1164
|
+
"failure"
|
|
1165
|
+
if not recovery_decision.is_quality_evaluation
|
|
1166
|
+
else "quality issues"
|
|
1167
|
+
)
|
|
1168
|
+
logger.info(
|
|
1169
|
+
f"Task {task.id} will be decomposed due to {reason}"
|
|
1170
|
+
)
|
|
1171
|
+
subtasks_result = self._decompose_task(task)
|
|
1172
|
+
|
|
1173
|
+
# Handle both streaming and non-streaming results
|
|
1174
|
+
if isinstance(subtasks_result, Generator):
|
|
1175
|
+
subtasks = []
|
|
1176
|
+
for new_tasks in subtasks_result:
|
|
1177
|
+
subtasks.extend(new_tasks)
|
|
1178
|
+
else:
|
|
1179
|
+
subtasks = subtasks_result
|
|
1180
|
+
|
|
1181
|
+
if subtasks:
|
|
1182
|
+
task_decomposed_event = TaskDecomposedEvent(
|
|
1183
|
+
parent_task_id=task.id,
|
|
1184
|
+
subtask_ids=[st.id for st in subtasks],
|
|
1185
|
+
)
|
|
1186
|
+
for cb in self._callbacks:
|
|
1187
|
+
cb.log_task_decomposed(task_decomposed_event)
|
|
1188
|
+
for subtask in subtasks:
|
|
1189
|
+
task_created_event = TaskCreatedEvent(
|
|
1190
|
+
task_id=subtask.id,
|
|
1191
|
+
description=subtask.content,
|
|
1192
|
+
parent_task_id=task.id,
|
|
1193
|
+
task_type=subtask.type,
|
|
1194
|
+
metadata=subtask.additional_info,
|
|
1195
|
+
)
|
|
1196
|
+
for cb in self._callbacks:
|
|
1197
|
+
cb.log_task_created(task_created_event)
|
|
1198
|
+
|
|
1199
|
+
# Insert subtasks at the head of the queue
|
|
1200
|
+
self._pending_tasks.extendleft(reversed(subtasks))
|
|
1201
|
+
await self._post_ready_tasks()
|
|
1202
|
+
action_taken = f"decomposed into {len(subtasks)} subtasks"
|
|
1203
|
+
|
|
1204
|
+
logger.info(
|
|
1205
|
+
f"Task {task.id} decomposed into {len(subtasks)} subtasks"
|
|
1206
|
+
)
|
|
1207
|
+
|
|
1208
|
+
# Sync shared memory after task decomposition
|
|
1209
|
+
if self.share_memory:
|
|
1210
|
+
logger.info(
|
|
1211
|
+
f"Syncing shared memory after task {task.id} "
|
|
1212
|
+
f"decomposition"
|
|
1213
|
+
)
|
|
1214
|
+
self._sync_shared_memory()
|
|
1215
|
+
|
|
1216
|
+
# For decompose, we return early with special handling
|
|
1217
|
+
return True
|
|
1218
|
+
|
|
1219
|
+
elif strategy == RecoveryStrategy.CREATE_WORKER:
|
|
1220
|
+
assignee = await self._create_worker_node_for_task(task)
|
|
1221
|
+
await self._post_task(task, assignee.node_id)
|
|
1222
|
+
action_taken = (
|
|
1223
|
+
f"created new worker {assignee.node_id} and assigned "
|
|
1224
|
+
f"task {task.id} to it"
|
|
1225
|
+
)
|
|
1226
|
+
|
|
1227
|
+
except Exception as e:
|
|
1228
|
+
logger.error(
|
|
1229
|
+
f"Recovery strategy {strategy} failed for task {task.id}: {e}",
|
|
1230
|
+
exc_info=True,
|
|
1231
|
+
)
|
|
1232
|
+
raise
|
|
1233
|
+
|
|
1234
|
+
logger.debug(
|
|
1235
|
+
f"Task {task.id} recovery: {action_taken}. "
|
|
1236
|
+
f"Strategy: {strategy.value}"
|
|
1237
|
+
)
|
|
1238
|
+
|
|
1239
|
+
return False
|
|
435
1240
|
|
|
436
1241
|
# Human intervention methods
|
|
437
1242
|
async def _async_pause(self) -> None:
|
|
@@ -522,12 +1327,45 @@ class Workforce(BaseNode):
|
|
|
522
1327
|
f"(event-loop not yet started)."
|
|
523
1328
|
)
|
|
524
1329
|
|
|
525
|
-
def
|
|
526
|
-
r"""
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
1330
|
+
async def _async_skip_gracefully(self) -> None:
|
|
1331
|
+
r"""Async implementation of skip_gracefully to run on the event
|
|
1332
|
+
loop.
|
|
1333
|
+
"""
|
|
1334
|
+
self._skip_requested = True
|
|
1335
|
+
if self._pause_event.is_set() is False:
|
|
1336
|
+
self._pause_event.set() # Resume if paused to process skip
|
|
1337
|
+
logger.info(f"Workforce {self.node_id} skip requested.")
|
|
1338
|
+
|
|
1339
|
+
def skip_gracefully(self) -> None:
|
|
1340
|
+
r"""Request workforce to skip current pending tasks and move to next
|
|
1341
|
+
main task from the queue. If no main tasks exist, acts like
|
|
1342
|
+
stop_gracefully.
|
|
1343
|
+
|
|
1344
|
+
This method clears the current pending subtasks and moves to the next
|
|
1345
|
+
main task in the queue if available. Works both when the internal
|
|
1346
|
+
event-loop is alive and when it has not yet been started.
|
|
1347
|
+
"""
|
|
1348
|
+
|
|
1349
|
+
if self._loop and not self._loop.is_closed():
|
|
1350
|
+
self._submit_coro_to_loop(self._async_skip_gracefully())
|
|
1351
|
+
else:
|
|
1352
|
+
# Loop not yet created, set the flag synchronously so later
|
|
1353
|
+
# startup will respect it.
|
|
1354
|
+
self._skip_requested = True
|
|
1355
|
+
# Ensure any pending pause is released so that when the loop does
|
|
1356
|
+
# start it can see the skip request and exit.
|
|
1357
|
+
self._pause_event.set()
|
|
1358
|
+
logger.info(
|
|
1359
|
+
f"Workforce {self.node_id} skip requested "
|
|
1360
|
+
f"(event-loop not yet started)."
|
|
1361
|
+
)
|
|
1362
|
+
|
|
1363
|
+
def save_snapshot(self, description: str = "") -> None:
|
|
1364
|
+
r"""Save current state as a snapshot."""
|
|
1365
|
+
snapshot = WorkforceSnapshot(
|
|
1366
|
+
main_task=self._task,
|
|
1367
|
+
pending_tasks=self._pending_tasks,
|
|
1368
|
+
completed_tasks=self._completed_tasks,
|
|
531
1369
|
task_dependencies=self._task_dependencies,
|
|
532
1370
|
assignees=self._assignees,
|
|
533
1371
|
current_task_index=len(self._completed_tasks),
|
|
@@ -564,7 +1402,7 @@ class Workforce(BaseNode):
|
|
|
564
1402
|
if not validate_task_content(new_content, task_id):
|
|
565
1403
|
logger.warning(
|
|
566
1404
|
f"Task {task_id} content modification rejected: "
|
|
567
|
-
f"Invalid content. Content preview: '{new_content
|
|
1405
|
+
f"Invalid content. Content preview: '{new_content}'"
|
|
568
1406
|
)
|
|
569
1407
|
return False
|
|
570
1408
|
|
|
@@ -576,41 +1414,167 @@ class Workforce(BaseNode):
|
|
|
576
1414
|
logger.warning(f"Task {task_id} not found in pending tasks.")
|
|
577
1415
|
return False
|
|
578
1416
|
|
|
1417
|
+
def get_main_task_queue(self) -> List[Task]:
|
|
1418
|
+
r"""Get current main task queue for human review.
|
|
1419
|
+
Returns:
|
|
1420
|
+
List[Task]: List of main tasks waiting to be decomposed
|
|
1421
|
+
and executed.
|
|
1422
|
+
"""
|
|
1423
|
+
# Return tasks from pending queue that need decomposition
|
|
1424
|
+
return [
|
|
1425
|
+
t
|
|
1426
|
+
for t in self._pending_tasks
|
|
1427
|
+
if t.additional_info
|
|
1428
|
+
and t.additional_info.get('_needs_decomposition')
|
|
1429
|
+
]
|
|
1430
|
+
|
|
579
1431
|
def add_task(
|
|
580
1432
|
self,
|
|
581
1433
|
content: str,
|
|
582
1434
|
task_id: Optional[str] = None,
|
|
583
1435
|
additional_info: Optional[Dict[str, Any]] = None,
|
|
1436
|
+
as_subtask: bool = False,
|
|
584
1437
|
insert_position: int = -1,
|
|
585
1438
|
) -> Task:
|
|
586
|
-
r"""Add a new task to the
|
|
587
|
-
|
|
1439
|
+
r"""Add a new task to the workforce.
|
|
1440
|
+
|
|
1441
|
+
By default, this method adds a main task that will be decomposed into
|
|
1442
|
+
subtasks. Set `as_subtask=True` to add a task directly to the pending
|
|
1443
|
+
subtask queue without decomposition.
|
|
1444
|
+
|
|
1445
|
+
Args:
|
|
1446
|
+
content (str): The content of the task.
|
|
1447
|
+
task_id (Optional[str], optional): Optional ID for the task.
|
|
1448
|
+
If not provided, a unique ID will be generated.
|
|
1449
|
+
additional_info (Optional[Dict[str, Any]], optional): Optional
|
|
1450
|
+
additional metadata for the task.
|
|
1451
|
+
as_subtask (bool, optional): If True, adds the task directly to
|
|
1452
|
+
the pending subtask queue. If False, adds as a main task that
|
|
1453
|
+
will be decomposed. Defaults to False.
|
|
1454
|
+
insert_position (int, optional): Position to insert the task in
|
|
1455
|
+
the pending queue. Only applies when as_subtask=True.
|
|
1456
|
+
Defaults to -1 (append to end).
|
|
1457
|
+
|
|
1458
|
+
Returns:
|
|
1459
|
+
Task: The created task object.
|
|
1460
|
+
"""
|
|
1461
|
+
if as_subtask:
|
|
1462
|
+
new_task = Task(
|
|
1463
|
+
content=content,
|
|
1464
|
+
id=task_id or f"human_added_{len(self._pending_tasks)}",
|
|
1465
|
+
additional_info=additional_info,
|
|
1466
|
+
)
|
|
1467
|
+
|
|
1468
|
+
# Add directly to current pending subtasks
|
|
1469
|
+
if insert_position == -1:
|
|
1470
|
+
self._pending_tasks.append(new_task)
|
|
1471
|
+
else:
|
|
1472
|
+
# Convert deque to list, insert, then back to deque
|
|
1473
|
+
tasks_list = list(self._pending_tasks)
|
|
1474
|
+
tasks_list.insert(insert_position, new_task)
|
|
1475
|
+
self._pending_tasks = deque(tasks_list)
|
|
1476
|
+
|
|
1477
|
+
logger.info(f"New subtask added to pending queue: {new_task.id}")
|
|
1478
|
+
return new_task
|
|
1479
|
+
else:
|
|
1480
|
+
# Add as main task that needs decomposition
|
|
1481
|
+
# Use additional_info to mark this task needs decomposition
|
|
1482
|
+
# Make a copy to avoid modifying user's dict
|
|
1483
|
+
info = additional_info.copy() if additional_info else {}
|
|
1484
|
+
info['_needs_decomposition'] = True
|
|
1485
|
+
|
|
1486
|
+
task_count = sum(
|
|
1487
|
+
1
|
|
1488
|
+
for t in self._pending_tasks
|
|
1489
|
+
if t.additional_info
|
|
1490
|
+
and t.additional_info.get('_needs_decomposition')
|
|
1491
|
+
)
|
|
1492
|
+
|
|
1493
|
+
new_task = Task(
|
|
1494
|
+
content=content,
|
|
1495
|
+
id=task_id or f"main_task_{task_count}",
|
|
1496
|
+
additional_info=info,
|
|
1497
|
+
)
|
|
1498
|
+
|
|
1499
|
+
self._pending_tasks.append(new_task)
|
|
1500
|
+
logger.info(f"New main task added to pending queue: {new_task.id}")
|
|
1501
|
+
return new_task
|
|
1502
|
+
|
|
1503
|
+
def add_main_task(
|
|
1504
|
+
self,
|
|
1505
|
+
content: str,
|
|
1506
|
+
task_id: Optional[str] = None,
|
|
1507
|
+
additional_info: Optional[Dict[str, Any]] = None,
|
|
1508
|
+
) -> Task:
|
|
1509
|
+
r"""Add a new main task that will be decomposed into subtasks.
|
|
1510
|
+
|
|
1511
|
+
This is an alias for :meth:`add_task` with `as_subtask=False`.
|
|
1512
|
+
|
|
1513
|
+
Args:
|
|
1514
|
+
content (str): The content of the main task.
|
|
1515
|
+
task_id (Optional[str], optional): Optional ID for the task.
|
|
1516
|
+
additional_info (Optional[Dict[str, Any]], optional): Optional
|
|
1517
|
+
additional metadata.
|
|
1518
|
+
|
|
1519
|
+
Returns:
|
|
1520
|
+
Task: The created main task object.
|
|
1521
|
+
"""
|
|
1522
|
+
return self.add_task(
|
|
588
1523
|
content=content,
|
|
589
|
-
|
|
1524
|
+
task_id=task_id,
|
|
590
1525
|
additional_info=additional_info,
|
|
1526
|
+
as_subtask=False,
|
|
591
1527
|
)
|
|
592
|
-
if insert_position == -1:
|
|
593
|
-
self._pending_tasks.append(new_task)
|
|
594
|
-
else:
|
|
595
|
-
# Convert deque to list, insert, then back to deque
|
|
596
|
-
tasks_list = list(self._pending_tasks)
|
|
597
|
-
tasks_list.insert(insert_position, new_task)
|
|
598
|
-
self._pending_tasks = deque(tasks_list)
|
|
599
1528
|
|
|
600
|
-
|
|
601
|
-
|
|
1529
|
+
def add_subtask(
|
|
1530
|
+
self,
|
|
1531
|
+
content: str,
|
|
1532
|
+
task_id: Optional[str] = None,
|
|
1533
|
+
additional_info: Optional[Dict[str, Any]] = None,
|
|
1534
|
+
insert_position: int = -1,
|
|
1535
|
+
) -> Task:
|
|
1536
|
+
r"""Add a new subtask to the current pending queue.
|
|
1537
|
+
|
|
1538
|
+
This is an alias for :meth:`add_task` with `as_subtask=True`.
|
|
1539
|
+
|
|
1540
|
+
Args:
|
|
1541
|
+
content (str): The content of the subtask.
|
|
1542
|
+
task_id (Optional[str], optional): Optional ID for the task.
|
|
1543
|
+
additional_info (Optional[Dict[str, Any]], optional): Optional
|
|
1544
|
+
additional metadata.
|
|
1545
|
+
insert_position (int, optional): Position to insert the task.
|
|
1546
|
+
Defaults to -1 (append to end).
|
|
1547
|
+
|
|
1548
|
+
Returns:
|
|
1549
|
+
Task: The created subtask object.
|
|
1550
|
+
"""
|
|
1551
|
+
return self.add_task(
|
|
1552
|
+
content=content,
|
|
1553
|
+
task_id=task_id,
|
|
1554
|
+
additional_info=additional_info,
|
|
1555
|
+
as_subtask=True,
|
|
1556
|
+
insert_position=insert_position,
|
|
1557
|
+
)
|
|
602
1558
|
|
|
603
1559
|
def remove_task(self, task_id: str) -> bool:
|
|
604
|
-
r"""Remove a task from the pending queue.
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
1560
|
+
r"""Remove a task from the pending queue or main task queue.
|
|
1561
|
+
|
|
1562
|
+
Args:
|
|
1563
|
+
task_id (str): The ID of the task to remove.
|
|
1564
|
+
|
|
1565
|
+
Returns:
|
|
1566
|
+
bool: True if task was found and removed, False otherwise.
|
|
1567
|
+
"""
|
|
1568
|
+
# Check main task queue first
|
|
1569
|
+
pending_tasks_list = list(self._pending_tasks)
|
|
1570
|
+
for i, task in enumerate(pending_tasks_list):
|
|
608
1571
|
if task.id == task_id:
|
|
609
|
-
|
|
610
|
-
self._pending_tasks = deque(
|
|
611
|
-
logger.info(f"Task {task_id} removed.")
|
|
1572
|
+
pending_tasks_list.pop(i)
|
|
1573
|
+
self._pending_tasks = deque(pending_tasks_list)
|
|
1574
|
+
logger.info(f"Task {task_id} removed from pending queue.")
|
|
612
1575
|
return True
|
|
613
|
-
|
|
1576
|
+
|
|
1577
|
+
logger.warning(f"Task {task_id} not found in any task queue.")
|
|
614
1578
|
return False
|
|
615
1579
|
|
|
616
1580
|
def reorder_tasks(self, task_ids: List[str]) -> bool:
|
|
@@ -619,8 +1583,13 @@ class Workforce(BaseNode):
|
|
|
619
1583
|
tasks_dict = {task.id: task for task in self._pending_tasks}
|
|
620
1584
|
|
|
621
1585
|
# Check if all provided IDs exist
|
|
622
|
-
|
|
623
|
-
|
|
1586
|
+
invalid_ids = [
|
|
1587
|
+
task_id for task_id in task_ids if task_id not in tasks_dict
|
|
1588
|
+
]
|
|
1589
|
+
if invalid_ids:
|
|
1590
|
+
logger.warning(
|
|
1591
|
+
f"Task IDs not found in pending tasks: {invalid_ids}"
|
|
1592
|
+
)
|
|
624
1593
|
return False
|
|
625
1594
|
|
|
626
1595
|
# Check if we have the same number of tasks
|
|
@@ -671,8 +1640,8 @@ class Workforce(BaseNode):
|
|
|
671
1640
|
# Reset state for tasks being moved back to pending
|
|
672
1641
|
for task in tasks_to_move_back:
|
|
673
1642
|
# Handle all possible task states
|
|
674
|
-
if task.state in [TaskState.DONE, TaskState.
|
|
675
|
-
task.state = TaskState.OPEN
|
|
1643
|
+
if task.state in [TaskState.DONE, TaskState.OPEN]:
|
|
1644
|
+
task.state = TaskState.FAILED # TODO: Add logic for OPEN
|
|
676
1645
|
# Clear result to avoid confusion
|
|
677
1646
|
task.result = None
|
|
678
1647
|
# Reset failure count to give task a fresh start
|
|
@@ -720,68 +1689,111 @@ class Workforce(BaseNode):
|
|
|
720
1689
|
"main_task_id": self._task.id if self._task else None,
|
|
721
1690
|
}
|
|
722
1691
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
1692
|
+
async def handle_decompose_append_task(
|
|
1693
|
+
self, task: Task, reset: bool = True
|
|
1694
|
+
) -> List[Task]:
|
|
1695
|
+
r"""Handle task decomposition and validation with
|
|
1696
|
+
workforce environment functions. Then append to
|
|
1697
|
+
pending tasks if decomposition happened.
|
|
728
1698
|
|
|
729
1699
|
Args:
|
|
730
1700
|
task (Task): The task to be processed.
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
runs the task in a blocking one-shot manner.
|
|
1701
|
+
reset (Bool): Should trigger workforce reset (Workforce must not
|
|
1702
|
+
be running). Default: True
|
|
734
1703
|
|
|
735
1704
|
Returns:
|
|
736
|
-
Task: The
|
|
1705
|
+
List[Task]: The decomposed subtasks or the original task.
|
|
737
1706
|
"""
|
|
738
|
-
# Delegate to intervention pipeline when requested to keep
|
|
739
|
-
# backward-compat.
|
|
740
|
-
if interactive:
|
|
741
|
-
return await self._process_task_with_snapshot(task)
|
|
742
|
-
|
|
743
1707
|
if not validate_task_content(task.content, task.id):
|
|
744
1708
|
task.state = TaskState.FAILED
|
|
745
1709
|
task.result = "Task failed: Invalid or empty content provided"
|
|
746
1710
|
logger.warning(
|
|
747
1711
|
f"Task {task.id} rejected: Invalid or empty content. "
|
|
748
|
-
f"Content preview: '{task.content
|
|
1712
|
+
f"Content preview: '{task.content}'"
|
|
749
1713
|
)
|
|
750
|
-
return task
|
|
1714
|
+
return [task]
|
|
1715
|
+
|
|
1716
|
+
if reset and self._state != WorkforceState.RUNNING:
|
|
1717
|
+
self.reset()
|
|
1718
|
+
logger.info("Workforce reset before handling task.")
|
|
751
1719
|
|
|
752
|
-
|
|
1720
|
+
# Focus on the new task
|
|
753
1721
|
self._task = task
|
|
754
|
-
if self.metrics_logger:
|
|
755
|
-
self.metrics_logger.log_task_created(
|
|
756
|
-
task_id=task.id,
|
|
757
|
-
description=task.content,
|
|
758
|
-
task_type=task.type,
|
|
759
|
-
metadata=task.additional_info,
|
|
760
|
-
)
|
|
761
1722
|
task.state = TaskState.FAILED
|
|
1723
|
+
|
|
1724
|
+
task_created_event = TaskCreatedEvent(
|
|
1725
|
+
task_id=task.id,
|
|
1726
|
+
description=task.content,
|
|
1727
|
+
task_type=task.type,
|
|
1728
|
+
metadata=task.additional_info,
|
|
1729
|
+
)
|
|
1730
|
+
for cb in self._callbacks:
|
|
1731
|
+
cb.log_task_created(task_created_event)
|
|
1732
|
+
|
|
762
1733
|
# The agent tend to be overconfident on the whole task, so we
|
|
763
1734
|
# decompose the task into subtasks first
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
1735
|
+
subtasks_result = self._decompose_task(task)
|
|
1736
|
+
|
|
1737
|
+
# Handle both streaming and non-streaming results
|
|
1738
|
+
if isinstance(subtasks_result, Generator):
|
|
1739
|
+
# This is a generator (streaming mode)
|
|
1740
|
+
subtasks = []
|
|
1741
|
+
for new_tasks in subtasks_result:
|
|
1742
|
+
subtasks.extend(new_tasks)
|
|
1743
|
+
else:
|
|
1744
|
+
# This is a regular list (non-streaming mode)
|
|
1745
|
+
subtasks = subtasks_result
|
|
1746
|
+
if subtasks:
|
|
1747
|
+
task_decomposed_event = TaskDecomposedEvent(
|
|
1748
|
+
parent_task_id=task.id,
|
|
1749
|
+
subtask_ids=[st.id for st in subtasks],
|
|
768
1750
|
)
|
|
1751
|
+
for cb in self._callbacks:
|
|
1752
|
+
cb.log_task_decomposed(task_decomposed_event)
|
|
769
1753
|
for subtask in subtasks:
|
|
770
|
-
|
|
1754
|
+
task_created_event = TaskCreatedEvent(
|
|
771
1755
|
task_id=subtask.id,
|
|
772
1756
|
description=subtask.content,
|
|
773
1757
|
parent_task_id=task.id,
|
|
774
1758
|
task_type=subtask.type,
|
|
775
1759
|
metadata=subtask.additional_info,
|
|
776
1760
|
)
|
|
1761
|
+
for cb in self._callbacks:
|
|
1762
|
+
cb.log_task_created(task_created_event)
|
|
1763
|
+
|
|
777
1764
|
if subtasks:
|
|
778
|
-
#
|
|
779
|
-
#
|
|
1765
|
+
# _pending_tasks will contain both undecomposed
|
|
1766
|
+
# and decomposed tasks, so we use additional_info
|
|
1767
|
+
# to mark the tasks that need decomposition instead
|
|
780
1768
|
self._pending_tasks.extendleft(reversed(subtasks))
|
|
781
1769
|
else:
|
|
782
1770
|
# If no decomposition, execute the original task.
|
|
783
1771
|
self._pending_tasks.append(task)
|
|
784
1772
|
|
|
1773
|
+
return subtasks
|
|
1774
|
+
|
|
1775
|
+
@check_if_running(False)
|
|
1776
|
+
async def process_task_async(
|
|
1777
|
+
self, task: Task, interactive: bool = False
|
|
1778
|
+
) -> Task:
|
|
1779
|
+
r"""Main entry point to process a task asynchronously.
|
|
1780
|
+
|
|
1781
|
+
Args:
|
|
1782
|
+
task (Task): The task to be processed.
|
|
1783
|
+
interactive (bool, optional): If True, enables human-intervention
|
|
1784
|
+
workflow (pause/resume/snapshot). Defaults to False, which
|
|
1785
|
+
runs the task in a blocking one-shot manner.
|
|
1786
|
+
|
|
1787
|
+
Returns:
|
|
1788
|
+
Task: The updated task.
|
|
1789
|
+
"""
|
|
1790
|
+
# Delegate to intervention pipeline when requested to keep
|
|
1791
|
+
# backward-compat.
|
|
1792
|
+
if interactive:
|
|
1793
|
+
return await self._process_task_with_snapshot(task)
|
|
1794
|
+
|
|
1795
|
+
subtasks = await self.handle_decompose_append_task(task)
|
|
1796
|
+
|
|
785
1797
|
self.set_channel(TaskChannel())
|
|
786
1798
|
|
|
787
1799
|
await self.start()
|
|
@@ -818,14 +1830,19 @@ class Workforce(BaseNode):
|
|
|
818
1830
|
needed
|
|
819
1831
|
>>> print(result.result)
|
|
820
1832
|
"""
|
|
821
|
-
import asyncio
|
|
822
|
-
import concurrent.futures
|
|
823
|
-
|
|
824
1833
|
# Check if we're already in an event loop
|
|
825
1834
|
try:
|
|
826
|
-
asyncio.get_running_loop()
|
|
1835
|
+
current_loop = asyncio.get_running_loop()
|
|
1836
|
+
# Store the current loop for potential reuse by async tools
|
|
1837
|
+
self._loop = current_loop
|
|
1838
|
+
|
|
1839
|
+
logger.info(
|
|
1840
|
+
"Running in active event loop context. "
|
|
1841
|
+
"Consider using process_task_async() directly for better "
|
|
1842
|
+
"async tool compatibility."
|
|
1843
|
+
)
|
|
827
1844
|
|
|
828
|
-
#
|
|
1845
|
+
# Create a new thread with a fresh event loop
|
|
829
1846
|
def run_in_thread():
|
|
830
1847
|
# Create new event loop for this thread
|
|
831
1848
|
new_loop = asyncio.new_event_loop()
|
|
@@ -836,6 +1853,8 @@ class Workforce(BaseNode):
|
|
|
836
1853
|
)
|
|
837
1854
|
finally:
|
|
838
1855
|
new_loop.close()
|
|
1856
|
+
# Restore original loop reference
|
|
1857
|
+
self._loop = current_loop
|
|
839
1858
|
|
|
840
1859
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
841
1860
|
future = executor.submit(run_in_thread)
|
|
@@ -856,24 +1875,8 @@ class Workforce(BaseNode):
|
|
|
856
1875
|
Task: The updated task.
|
|
857
1876
|
"""
|
|
858
1877
|
|
|
859
|
-
|
|
860
|
-
task.state = TaskState.FAILED
|
|
861
|
-
task.result = "Task failed: Invalid or empty content provided"
|
|
862
|
-
logger.warning(
|
|
863
|
-
f"Task {task.id} rejected: Invalid or empty content. "
|
|
864
|
-
f"Content preview: '{task.content[:50]}...'"
|
|
865
|
-
)
|
|
866
|
-
return task
|
|
867
|
-
|
|
868
|
-
self.reset()
|
|
869
|
-
self._task = task
|
|
870
|
-
self._state = WorkforceState.RUNNING
|
|
871
|
-
task.state = TaskState.OPEN
|
|
872
|
-
self._pending_tasks.append(task)
|
|
1878
|
+
await self.handle_decompose_append_task(task)
|
|
873
1879
|
|
|
874
|
-
# Decompose the task into subtasks first
|
|
875
|
-
subtasks = self._decompose_task(task)
|
|
876
|
-
self._pending_tasks.extendleft(reversed(subtasks))
|
|
877
1880
|
self.set_channel(TaskChannel())
|
|
878
1881
|
|
|
879
1882
|
# Save initial snapshot
|
|
@@ -980,30 +1983,107 @@ class Workforce(BaseNode):
|
|
|
980
1983
|
|
|
981
1984
|
return self._task
|
|
982
1985
|
|
|
983
|
-
|
|
1986
|
+
def _start_child_node_when_paused(
|
|
1987
|
+
self, start_coroutine: Coroutine
|
|
1988
|
+
) -> None:
|
|
1989
|
+
r"""Helper to start a child node when workforce is paused.
|
|
1990
|
+
|
|
1991
|
+
Args:
|
|
1992
|
+
start_coroutine: The coroutine to start (e.g., worker_node.start())
|
|
1993
|
+
"""
|
|
1994
|
+
if self._state == WorkforceState.PAUSED and hasattr(
|
|
1995
|
+
self, '_child_listening_tasks'
|
|
1996
|
+
):
|
|
1997
|
+
if self._loop and not self._loop.is_closed():
|
|
1998
|
+
# Use thread-safe coroutine execution for dynamic addition
|
|
1999
|
+
child_task: Union[asyncio.Task, concurrent.futures.Future]
|
|
2000
|
+
try:
|
|
2001
|
+
# Check if we're in the same thread as the loop
|
|
2002
|
+
current_loop = asyncio.get_running_loop()
|
|
2003
|
+
if current_loop is self._loop:
|
|
2004
|
+
# Same loop context - use create_task
|
|
2005
|
+
child_task = self._loop.create_task(start_coroutine)
|
|
2006
|
+
else:
|
|
2007
|
+
# Different loop context - use thread-safe approach
|
|
2008
|
+
child_task = asyncio.run_coroutine_threadsafe(
|
|
2009
|
+
start_coroutine, self._loop
|
|
2010
|
+
)
|
|
2011
|
+
except RuntimeError:
|
|
2012
|
+
# No running loop in current thread - use thread-safe
|
|
2013
|
+
# approach
|
|
2014
|
+
child_task = asyncio.run_coroutine_threadsafe(
|
|
2015
|
+
start_coroutine, self._loop
|
|
2016
|
+
)
|
|
2017
|
+
self._child_listening_tasks.append(child_task)
|
|
2018
|
+
else:
|
|
2019
|
+
# Close the coroutine to prevent RuntimeWarning
|
|
2020
|
+
start_coroutine.close()
|
|
2021
|
+
else:
|
|
2022
|
+
# Close the coroutine to prevent RuntimeWarning
|
|
2023
|
+
start_coroutine.close()
|
|
2024
|
+
|
|
984
2025
|
def add_single_agent_worker(
|
|
985
|
-
self,
|
|
2026
|
+
self,
|
|
2027
|
+
description: str,
|
|
2028
|
+
worker: ChatAgent,
|
|
2029
|
+
pool_max_size: int = DEFAULT_WORKER_POOL_SIZE,
|
|
2030
|
+
enable_workflow_memory: bool = False,
|
|
986
2031
|
) -> Workforce:
|
|
987
2032
|
r"""Add a worker node to the workforce that uses a single agent.
|
|
2033
|
+
Can be called when workforce is paused to dynamically add workers.
|
|
988
2034
|
|
|
989
2035
|
Args:
|
|
990
2036
|
description (str): Description of the worker node.
|
|
991
2037
|
worker (ChatAgent): The agent to be added.
|
|
2038
|
+
pool_max_size (int): Maximum size of the agent pool.
|
|
2039
|
+
(default: :obj:`10`)
|
|
2040
|
+
enable_workflow_memory (bool): Whether to enable workflow memory
|
|
2041
|
+
accumulation. Set to True if you plan to call
|
|
2042
|
+
save_workflow_memories(). (default: :obj:`False`)
|
|
992
2043
|
|
|
993
2044
|
Returns:
|
|
994
2045
|
Workforce: The workforce node itself.
|
|
2046
|
+
|
|
2047
|
+
Raises:
|
|
2048
|
+
RuntimeError: If called while workforce is running (not paused).
|
|
2049
|
+
ValueError: If worker has tools and stream mode enabled but
|
|
2050
|
+
use_structured_output_handler is False.
|
|
995
2051
|
"""
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
worker_id=worker_node.node_id,
|
|
1001
|
-
worker_type='SingleAgentWorker',
|
|
1002
|
-
role=worker_node.description,
|
|
2052
|
+
if self._state == WorkforceState.RUNNING:
|
|
2053
|
+
raise RuntimeError(
|
|
2054
|
+
"Cannot add workers while workforce is running. "
|
|
2055
|
+
"Pause the workforce first."
|
|
1003
2056
|
)
|
|
2057
|
+
|
|
2058
|
+
# Validate worker agent compatibility
|
|
2059
|
+
self._validate_agent_compatibility(worker, "Worker agent")
|
|
2060
|
+
|
|
2061
|
+
# Ensure the worker agent shares this workforce's pause control
|
|
2062
|
+
self._attach_pause_event_to_agent(worker)
|
|
2063
|
+
|
|
2064
|
+
worker_node = SingleAgentWorker(
|
|
2065
|
+
description=description,
|
|
2066
|
+
worker=worker,
|
|
2067
|
+
pool_max_size=pool_max_size,
|
|
2068
|
+
use_structured_output_handler=self.use_structured_output_handler,
|
|
2069
|
+
context_utility=None, # Will be set during save/load operations
|
|
2070
|
+
enable_workflow_memory=enable_workflow_memory,
|
|
2071
|
+
)
|
|
2072
|
+
self._children.append(worker_node)
|
|
2073
|
+
|
|
2074
|
+
# If we have a channel set up, set it for the new worker
|
|
2075
|
+
if hasattr(self, '_channel') and self._channel is not None:
|
|
2076
|
+
worker_node.set_channel(self._channel)
|
|
2077
|
+
|
|
2078
|
+
# If workforce is paused, start the worker's listening task
|
|
2079
|
+
self._start_child_node_when_paused(worker_node.start())
|
|
2080
|
+
|
|
2081
|
+
self._notify_worker_created(
|
|
2082
|
+
worker_node,
|
|
2083
|
+
worker_type='SingleAgentWorker',
|
|
2084
|
+
)
|
|
1004
2085
|
return self
|
|
1005
2086
|
|
|
1006
|
-
@check_if_running(False)
|
|
1007
2087
|
def add_role_playing_worker(
|
|
1008
2088
|
self,
|
|
1009
2089
|
description: str,
|
|
@@ -1015,6 +2095,7 @@ class Workforce(BaseNode):
|
|
|
1015
2095
|
chat_turn_limit: int = 3,
|
|
1016
2096
|
) -> Workforce:
|
|
1017
2097
|
r"""Add a worker node to the workforce that uses `RolePlaying` system.
|
|
2098
|
+
Can be called when workforce is paused to dynamically add workers.
|
|
1018
2099
|
|
|
1019
2100
|
Args:
|
|
1020
2101
|
description (str): Description of the node.
|
|
@@ -1034,7 +2115,27 @@ class Workforce(BaseNode):
|
|
|
1034
2115
|
|
|
1035
2116
|
Returns:
|
|
1036
2117
|
Workforce: The workforce node itself.
|
|
2118
|
+
|
|
2119
|
+
Raises:
|
|
2120
|
+
RuntimeError: If called while workforce is running (not paused).
|
|
1037
2121
|
"""
|
|
2122
|
+
if self._state == WorkforceState.RUNNING:
|
|
2123
|
+
raise RuntimeError(
|
|
2124
|
+
"Cannot add workers while workforce is running. "
|
|
2125
|
+
"Pause the workforce first."
|
|
2126
|
+
)
|
|
2127
|
+
# Ensure provided kwargs carry pause_event so that internally created
|
|
2128
|
+
# ChatAgents (assistant/user/summarizer) inherit it.
|
|
2129
|
+
assistant_agent_kwargs = self._ensure_pause_event_in_kwargs(
|
|
2130
|
+
assistant_agent_kwargs
|
|
2131
|
+
)
|
|
2132
|
+
user_agent_kwargs = self._ensure_pause_event_in_kwargs(
|
|
2133
|
+
user_agent_kwargs
|
|
2134
|
+
)
|
|
2135
|
+
summarize_agent_kwargs = self._ensure_pause_event_in_kwargs(
|
|
2136
|
+
summarize_agent_kwargs
|
|
2137
|
+
)
|
|
2138
|
+
|
|
1038
2139
|
worker_node = RolePlayingWorker(
|
|
1039
2140
|
description=description,
|
|
1040
2141
|
assistant_role_name=assistant_role_name,
|
|
@@ -1043,27 +2144,52 @@ class Workforce(BaseNode):
|
|
|
1043
2144
|
user_agent_kwargs=user_agent_kwargs,
|
|
1044
2145
|
summarize_agent_kwargs=summarize_agent_kwargs,
|
|
1045
2146
|
chat_turn_limit=chat_turn_limit,
|
|
2147
|
+
use_structured_output_handler=self.use_structured_output_handler,
|
|
1046
2148
|
)
|
|
1047
2149
|
self._children.append(worker_node)
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
2150
|
+
|
|
2151
|
+
# If we have a channel set up, set it for the new worker
|
|
2152
|
+
if hasattr(self, '_channel') and self._channel is not None:
|
|
2153
|
+
worker_node.set_channel(self._channel)
|
|
2154
|
+
|
|
2155
|
+
# If workforce is paused, start the worker's listening task
|
|
2156
|
+
self._start_child_node_when_paused(worker_node.start())
|
|
2157
|
+
|
|
2158
|
+
self._notify_worker_created(
|
|
2159
|
+
worker_node,
|
|
2160
|
+
worker_type='RolePlayingWorker',
|
|
2161
|
+
)
|
|
1054
2162
|
return self
|
|
1055
2163
|
|
|
1056
|
-
@check_if_running(False)
|
|
1057
2164
|
def add_workforce(self, workforce: Workforce) -> Workforce:
|
|
1058
2165
|
r"""Add a workforce node to the workforce.
|
|
2166
|
+
Can be called when workforce is paused to dynamically add workers.
|
|
1059
2167
|
|
|
1060
2168
|
Args:
|
|
1061
2169
|
workforce (Workforce): The workforce node to be added.
|
|
1062
2170
|
|
|
1063
2171
|
Returns:
|
|
1064
2172
|
Workforce: The workforce node itself.
|
|
2173
|
+
|
|
2174
|
+
Raises:
|
|
2175
|
+
RuntimeError: If called while workforce is running (not paused).
|
|
1065
2176
|
"""
|
|
2177
|
+
if self._state == WorkforceState.RUNNING:
|
|
2178
|
+
raise RuntimeError(
|
|
2179
|
+
"Cannot add workers while workforce is running. "
|
|
2180
|
+
"Pause the workforce first."
|
|
2181
|
+
)
|
|
2182
|
+
# Align child workforce's pause_event with this one for unified
|
|
2183
|
+
# control of worker agents only.
|
|
2184
|
+
workforce._pause_event = self._pause_event
|
|
1066
2185
|
self._children.append(workforce)
|
|
2186
|
+
|
|
2187
|
+
# If we have a channel set up, set it for the new workforce
|
|
2188
|
+
if hasattr(self, '_channel') and self._channel is not None:
|
|
2189
|
+
workforce.set_channel(self._channel)
|
|
2190
|
+
|
|
2191
|
+
# If workforce is paused, start the child workforce's listening task
|
|
2192
|
+
self._start_child_node_when_paused(workforce.start())
|
|
1067
2193
|
return self
|
|
1068
2194
|
|
|
1069
2195
|
async def _async_reset(self) -> None:
|
|
@@ -1093,22 +2219,426 @@ class Workforce(BaseNode):
|
|
|
1093
2219
|
# Reset intervention state
|
|
1094
2220
|
self._state = WorkforceState.IDLE
|
|
1095
2221
|
self._stop_requested = False
|
|
2222
|
+
self._skip_requested = False
|
|
1096
2223
|
# Handle asyncio.Event in a thread-safe way
|
|
1097
2224
|
if self._loop and not self._loop.is_closed():
|
|
1098
2225
|
# If we have a loop, use it to set the event safely
|
|
1099
|
-
asyncio.run_coroutine_threadsafe(
|
|
1100
|
-
self._async_reset(), self._loop
|
|
1101
|
-
).result()
|
|
1102
|
-
else:
|
|
1103
2226
|
try:
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
2227
|
+
asyncio.run_coroutine_threadsafe(
|
|
2228
|
+
self._async_reset(), self._loop
|
|
2229
|
+
).result()
|
|
2230
|
+
except RuntimeError as e:
|
|
2231
|
+
logger.warning(f"Failed to reset via existing loop: {e}")
|
|
2232
|
+
# Fallback to direct event manipulation
|
|
2233
|
+
self._pause_event.set()
|
|
1110
2234
|
else:
|
|
1111
|
-
|
|
2235
|
+
# No active loop, directly set the event
|
|
2236
|
+
self._pause_event.set()
|
|
2237
|
+
|
|
2238
|
+
for cb in self._callbacks:
|
|
2239
|
+
if isinstance(cb, WorkforceMetrics):
|
|
2240
|
+
cb.reset_task_data()
|
|
2241
|
+
|
|
2242
|
+
def save_workflow_memories(
|
|
2243
|
+
self,
|
|
2244
|
+
session_id: Optional[str] = None,
|
|
2245
|
+
) -> Dict[str, str]:
|
|
2246
|
+
r"""Save workflow memories for all SingleAgentWorker instances in the
|
|
2247
|
+
workforce.
|
|
2248
|
+
|
|
2249
|
+
.. deprecated:: 0.2.80
|
|
2250
|
+
This synchronous method processes workers sequentially, which can
|
|
2251
|
+
be slow for multiple agents. Use
|
|
2252
|
+
:meth:`save_workflow_memories_async`
|
|
2253
|
+
instead for parallel processing and significantly better
|
|
2254
|
+
performance.
|
|
2255
|
+
|
|
2256
|
+
This method iterates through all child workers and triggers workflow
|
|
2257
|
+
saving for SingleAgentWorker instances using their
|
|
2258
|
+
save_workflow_memories()
|
|
2259
|
+
method.
|
|
2260
|
+
Other worker types are skipped.
|
|
2261
|
+
|
|
2262
|
+
Args:
|
|
2263
|
+
session_id (Optional[str]): Custom session ID to use for saving
|
|
2264
|
+
workflows. If None, auto-generates a timestamped session ID.
|
|
2265
|
+
Useful for organizing workflows by project or context.
|
|
2266
|
+
(default: :obj:`None`)
|
|
2267
|
+
|
|
2268
|
+
Returns:
|
|
2269
|
+
Dict[str, str]: Dictionary mapping worker node IDs to save results.
|
|
2270
|
+
Values are either file paths (success) or error messages
|
|
2271
|
+
(failure).
|
|
2272
|
+
|
|
2273
|
+
Example:
|
|
2274
|
+
>>> workforce = Workforce("My Team")
|
|
2275
|
+
>>> # ... add workers and process tasks ...
|
|
2276
|
+
>>> # save with auto-generated session id
|
|
2277
|
+
>>> results = workforce.save_workflow_memories()
|
|
2278
|
+
>>> print(results)
|
|
2279
|
+
{'worker_123': '/path/to/developer_agent_workflow.md',
|
|
2280
|
+
'worker_456': 'error: No conversation context available'}
|
|
2281
|
+
>>> # save with custom project id
|
|
2282
|
+
>>> results = workforce.save_workflow_memories(
|
|
2283
|
+
... session_id="project_123"
|
|
2284
|
+
... )
|
|
2285
|
+
|
|
2286
|
+
Note:
|
|
2287
|
+
For better performance with multiple workers, use the async
|
|
2288
|
+
version::
|
|
2289
|
+
|
|
2290
|
+
results = await workforce.save_workflow_memories_async()
|
|
2291
|
+
|
|
2292
|
+
See Also:
|
|
2293
|
+
:meth:`save_workflow_memories_async`: Async version with parallel
|
|
2294
|
+
processing for significantly better performance.
|
|
2295
|
+
"""
|
|
2296
|
+
import warnings
|
|
2297
|
+
|
|
2298
|
+
warnings.warn(
|
|
2299
|
+
"save_workflow_memories() is slow for multiple workers. "
|
|
2300
|
+
"Consider using save_workflow_memories_async() for parallel "
|
|
2301
|
+
"processing and ~4x faster performance.",
|
|
2302
|
+
DeprecationWarning,
|
|
2303
|
+
stacklevel=2,
|
|
2304
|
+
)
|
|
2305
|
+
results = {}
|
|
2306
|
+
|
|
2307
|
+
# Get or create shared context utility for this save operation
|
|
2308
|
+
shared_context_utility = self._get_or_create_shared_context_utility(
|
|
2309
|
+
session_id=session_id
|
|
2310
|
+
)
|
|
2311
|
+
|
|
2312
|
+
for child in self._children:
|
|
2313
|
+
if isinstance(child, SingleAgentWorker):
|
|
2314
|
+
try:
|
|
2315
|
+
# Set the shared context utility for this operation
|
|
2316
|
+
child._shared_context_utility = shared_context_utility
|
|
2317
|
+
child.worker.set_context_utility(shared_context_utility)
|
|
2318
|
+
|
|
2319
|
+
result = child.save_workflow_memories()
|
|
2320
|
+
if result.get("status") == "success":
|
|
2321
|
+
results[child.node_id] = result.get(
|
|
2322
|
+
"file_path", "unknown_path"
|
|
2323
|
+
)
|
|
2324
|
+
else:
|
|
2325
|
+
# Error: check if there's a separate message field,
|
|
2326
|
+
# otherwise use the status itself
|
|
2327
|
+
error_msg = result.get(
|
|
2328
|
+
"message", result.get("status", "Unknown error")
|
|
2329
|
+
)
|
|
2330
|
+
results[child.node_id] = f"error: {error_msg}"
|
|
2331
|
+
|
|
2332
|
+
except Exception as e:
|
|
2333
|
+
results[child.node_id] = f"error: {e!s}"
|
|
2334
|
+
else:
|
|
2335
|
+
# Skip non-SingleAgentWorker types
|
|
2336
|
+
results[child.node_id] = (
|
|
2337
|
+
f"skipped: {type(child).__name__} not supported"
|
|
2338
|
+
)
|
|
2339
|
+
|
|
2340
|
+
logger.info(f"Workflow save completed for {len(results)} workers")
|
|
2341
|
+
return results
|
|
2342
|
+
|
|
2343
|
+
async def save_workflow_memories_async(
|
|
2344
|
+
self,
|
|
2345
|
+
session_id: Optional[str] = None,
|
|
2346
|
+
) -> Dict[str, str]:
|
|
2347
|
+
r"""Asynchronously save workflow memories for all SingleAgentWorker
|
|
2348
|
+
instances in the workforce.
|
|
2349
|
+
|
|
2350
|
+
This is the async version of save_workflow_memories() that parallelizes
|
|
2351
|
+
LLM summarization calls across all workers using asyncio.gather(),
|
|
2352
|
+
significantly reducing total save time.
|
|
2353
|
+
|
|
2354
|
+
This method iterates through all child workers and triggers workflow
|
|
2355
|
+
saving for SingleAgentWorker instances using their
|
|
2356
|
+
save_workflow_memories_async() method in parallel.
|
|
2357
|
+
Other worker types are skipped.
|
|
2358
|
+
|
|
2359
|
+
Args:
|
|
2360
|
+
session_id (Optional[str]): Custom session ID to use for saving
|
|
2361
|
+
workflows. If None, auto-generates a timestamped session ID.
|
|
2362
|
+
Useful for organizing workflows by project or context.
|
|
2363
|
+
(default: :obj:`None`)
|
|
2364
|
+
|
|
2365
|
+
Returns:
|
|
2366
|
+
Dict[str, str]: Dictionary mapping worker node IDs to save results.
|
|
2367
|
+
Values are either file paths (success) or error messages
|
|
2368
|
+
(failure).
|
|
2369
|
+
|
|
2370
|
+
Example:
|
|
2371
|
+
>>> workforce = Workforce("My Team")
|
|
2372
|
+
>>> # ... add workers and process tasks ...
|
|
2373
|
+
>>> # save with parallel summarization (faster)
|
|
2374
|
+
>>> results = await workforce.save_workflow_memories_async()
|
|
2375
|
+
>>> print(results)
|
|
2376
|
+
{'worker_123': '/path/to/developer_agent_workflow.md',
|
|
2377
|
+
'worker_456': '/path/to/search_agent_workflow.md',
|
|
2378
|
+
'worker_789': '/path/to/document_agent_workflow.md'}
|
|
2379
|
+
"""
|
|
2380
|
+
import asyncio
|
|
2381
|
+
|
|
2382
|
+
results = {}
|
|
2383
|
+
|
|
2384
|
+
# Get or create shared context utility for this save operation
|
|
2385
|
+
shared_context_utility = self._get_or_create_shared_context_utility(
|
|
2386
|
+
session_id=session_id
|
|
2387
|
+
)
|
|
2388
|
+
|
|
2389
|
+
# Prepare tasks for parallel execution
|
|
2390
|
+
async def save_single_worker(
|
|
2391
|
+
child: BaseNode,
|
|
2392
|
+
) -> tuple[str, str]:
|
|
2393
|
+
"""Save workflow for a single worker, then return (node_id,
|
|
2394
|
+
result)."""
|
|
2395
|
+
if isinstance(child, SingleAgentWorker):
|
|
2396
|
+
try:
|
|
2397
|
+
# Set the shared context utility for this operation
|
|
2398
|
+
child._shared_context_utility = shared_context_utility
|
|
2399
|
+
child.worker.set_context_utility(shared_context_utility)
|
|
2400
|
+
|
|
2401
|
+
result = await child.save_workflow_memories_async()
|
|
2402
|
+
if result.get("status") == "success":
|
|
2403
|
+
return (
|
|
2404
|
+
child.node_id,
|
|
2405
|
+
result.get("file_path", "unknown_path"),
|
|
2406
|
+
)
|
|
2407
|
+
else:
|
|
2408
|
+
# Error: check if there's a separate message field,
|
|
2409
|
+
# otherwise use the status itself
|
|
2410
|
+
error_msg = result.get(
|
|
2411
|
+
"message", result.get("status", "Unknown error")
|
|
2412
|
+
)
|
|
2413
|
+
return (child.node_id, f"error: {error_msg}")
|
|
2414
|
+
|
|
2415
|
+
except Exception as e:
|
|
2416
|
+
return (child.node_id, f"error: {e!s}")
|
|
2417
|
+
else:
|
|
2418
|
+
# Skip non-SingleAgentWorker types
|
|
2419
|
+
return (
|
|
2420
|
+
child.node_id,
|
|
2421
|
+
f"skipped: {type(child).__name__} not supported",
|
|
2422
|
+
)
|
|
2423
|
+
|
|
2424
|
+
# Create tasks for all workers
|
|
2425
|
+
tasks = [save_single_worker(child) for child in self._children]
|
|
2426
|
+
|
|
2427
|
+
# Execute all tasks in parallel using asyncio.gather()
|
|
2428
|
+
parallel_results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
2429
|
+
|
|
2430
|
+
# Process results
|
|
2431
|
+
for result in parallel_results:
|
|
2432
|
+
if isinstance(result, Exception):
|
|
2433
|
+
# Handle any unexpected exceptions
|
|
2434
|
+
logger.error(
|
|
2435
|
+
f"Unexpected error during workflow save: {result}"
|
|
2436
|
+
)
|
|
2437
|
+
results["unknown"] = f"error: {result!s}"
|
|
2438
|
+
elif isinstance(result, tuple) and len(result) == 2:
|
|
2439
|
+
# Successfully got (node_id, save_result) tuple
|
|
2440
|
+
node_id, save_result = result
|
|
2441
|
+
results[node_id] = save_result
|
|
2442
|
+
else:
|
|
2443
|
+
# Unexpected result format
|
|
2444
|
+
logger.error(f"Unexpected result format: {result}")
|
|
2445
|
+
results["unknown"] = "error: unexpected result format"
|
|
2446
|
+
|
|
2447
|
+
logger.info(
|
|
2448
|
+
f"Workflow save completed for {len(results)} workers "
|
|
2449
|
+
f"(parallelized)"
|
|
2450
|
+
)
|
|
2451
|
+
return results
|
|
2452
|
+
|
|
2453
|
+
def load_workflow_memories(
|
|
2454
|
+
self,
|
|
2455
|
+
session_id: Optional[str] = None,
|
|
2456
|
+
worker_max_workflows: int = 3,
|
|
2457
|
+
coordinator_max_workflows: int = 5,
|
|
2458
|
+
task_agent_max_workflows: int = 3,
|
|
2459
|
+
) -> Dict[str, bool]:
|
|
2460
|
+
r"""Load workflow memories for all SingleAgentWorker instances in the
|
|
2461
|
+
workforce.
|
|
2462
|
+
|
|
2463
|
+
This method iterates through all child workers and loads relevant
|
|
2464
|
+
workflow files for SingleAgentWorker instances using their
|
|
2465
|
+
load_workflow_memories()
|
|
2466
|
+
method. Workers match files based on their description names.
|
|
2467
|
+
|
|
2468
|
+
Args:
|
|
2469
|
+
session_id (Optional[str]): Specific workforce session ID to load
|
|
2470
|
+
from. If None, searches across all sessions.
|
|
2471
|
+
(default: :obj:`None`)
|
|
2472
|
+
worker_max_workflows (int): Maximum number of workflow files to
|
|
2473
|
+
load per worker agent. (default: :obj:`3`)
|
|
2474
|
+
coordinator_max_workflows (int): Maximum number of workflow files
|
|
2475
|
+
to load for the coordinator agent. (default: :obj:`5`)
|
|
2476
|
+
task_agent_max_workflows (int): Maximum number of workflow files
|
|
2477
|
+
to load for the task planning agent. (default: :obj:`3`)
|
|
2478
|
+
|
|
2479
|
+
Returns:
|
|
2480
|
+
Dict[str, bool]: Dictionary mapping worker node IDs to load
|
|
2481
|
+
success status.
|
|
2482
|
+
True indicates successful loading, False indicates failure.
|
|
2483
|
+
|
|
2484
|
+
Example:
|
|
2485
|
+
>>> workforce = Workforce("My Team")
|
|
2486
|
+
>>> workforce.add_single_agent_worker(
|
|
2487
|
+
... "data_analyst", analyst_agent
|
|
2488
|
+
... )
|
|
2489
|
+
>>> success_status = workforce.load_workflow_memories(
|
|
2490
|
+
... worker_max_workflows=5,
|
|
2491
|
+
... coordinator_max_workflows=10,
|
|
2492
|
+
... task_agent_max_workflows=5
|
|
2493
|
+
... )
|
|
2494
|
+
>>> print(success_status)
|
|
2495
|
+
{'worker_123': True} # Successfully loaded workflows for
|
|
2496
|
+
# data_analyst
|
|
2497
|
+
"""
|
|
2498
|
+
results = {}
|
|
2499
|
+
|
|
2500
|
+
# For loading, we don't create a new session - instead we search
|
|
2501
|
+
# existing ones
|
|
2502
|
+
# Each worker will search independently across all existing sessions
|
|
2503
|
+
|
|
2504
|
+
# First, load workflows for SingleAgentWorker instances
|
|
2505
|
+
for child in self._children:
|
|
2506
|
+
if isinstance(child, SingleAgentWorker):
|
|
2507
|
+
try:
|
|
2508
|
+
# For loading, don't set shared context utility
|
|
2509
|
+
# Let each worker search across existing sessions
|
|
2510
|
+
success = child.load_workflow_memories(
|
|
2511
|
+
max_workflows=worker_max_workflows,
|
|
2512
|
+
session_id=session_id,
|
|
2513
|
+
)
|
|
2514
|
+
results[child.node_id] = success
|
|
2515
|
+
|
|
2516
|
+
except Exception as e:
|
|
2517
|
+
logger.error(
|
|
2518
|
+
f"Failed to load workflow for {child.node_id}: {e!s}"
|
|
2519
|
+
)
|
|
2520
|
+
results[child.node_id] = False
|
|
2521
|
+
else:
|
|
2522
|
+
# Skip non-SingleAgentWorker types
|
|
2523
|
+
results[child.node_id] = False
|
|
2524
|
+
|
|
2525
|
+
# Load aggregated workflow summaries for coordinator and task agents
|
|
2526
|
+
self._load_management_agent_workflows(
|
|
2527
|
+
coordinator_max_workflows, task_agent_max_workflows, session_id
|
|
2528
|
+
)
|
|
2529
|
+
|
|
2530
|
+
logger.info(f"Workflow load completed for {len(results)} workers")
|
|
2531
|
+
return results
|
|
2532
|
+
|
|
2533
|
+
def _load_management_agent_workflows(
|
|
2534
|
+
self,
|
|
2535
|
+
coordinator_max_workflows: int,
|
|
2536
|
+
task_agent_max_workflows: int,
|
|
2537
|
+
session_id: Optional[str] = None,
|
|
2538
|
+
) -> None:
|
|
2539
|
+
r"""Load workflow summaries for coordinator and task planning agents.
|
|
2540
|
+
|
|
2541
|
+
This method loads aggregated workflow summaries to help:
|
|
2542
|
+
- Coordinator agent: understand task assignment patterns and worker
|
|
2543
|
+
capabilities
|
|
2544
|
+
- Task agent: understand task decomposition patterns and
|
|
2545
|
+
successful strategies
|
|
2546
|
+
|
|
2547
|
+
Args:
|
|
2548
|
+
coordinator_max_workflows (int): Maximum number of workflow files
|
|
2549
|
+
to load for the coordinator agent.
|
|
2550
|
+
task_agent_max_workflows (int): Maximum number of workflow files
|
|
2551
|
+
to load for the task planning agent.
|
|
2552
|
+
session_id (Optional[str]): Specific session ID to load from.
|
|
2553
|
+
If None, searches across all sessions.
|
|
2554
|
+
"""
|
|
2555
|
+
try:
|
|
2556
|
+
import glob
|
|
2557
|
+
import os
|
|
2558
|
+
from pathlib import Path
|
|
2559
|
+
|
|
2560
|
+
from camel.utils.context_utils import ContextUtility
|
|
2561
|
+
|
|
2562
|
+
# For loading management workflows, search across all sessions
|
|
2563
|
+
camel_workdir = os.environ.get("CAMEL_WORKDIR")
|
|
2564
|
+
if camel_workdir:
|
|
2565
|
+
base_dir = os.path.join(camel_workdir, "workforce_workflows")
|
|
2566
|
+
else:
|
|
2567
|
+
base_dir = "workforce_workflows"
|
|
2568
|
+
|
|
2569
|
+
# Search for workflow files in specified or all session directories
|
|
2570
|
+
if session_id:
|
|
2571
|
+
search_path = str(
|
|
2572
|
+
Path(base_dir) / session_id / "*_workflow*.md"
|
|
2573
|
+
)
|
|
2574
|
+
else:
|
|
2575
|
+
search_path = str(Path(base_dir) / "*" / "*_workflow*.md")
|
|
2576
|
+
workflow_files = glob.glob(search_path)
|
|
2577
|
+
|
|
2578
|
+
if not workflow_files:
|
|
2579
|
+
logger.info(
|
|
2580
|
+
"No workflow files found for management agent context"
|
|
2581
|
+
)
|
|
2582
|
+
return
|
|
2583
|
+
|
|
2584
|
+
# Sort by modification time (most recent first)
|
|
2585
|
+
workflow_files.sort(
|
|
2586
|
+
key=lambda x: os.path.getmtime(x), reverse=True
|
|
2587
|
+
)
|
|
2588
|
+
|
|
2589
|
+
# Load workflows for coordinator agent
|
|
2590
|
+
coordinator_loaded = 0
|
|
2591
|
+
for file_path in workflow_files[:coordinator_max_workflows]:
|
|
2592
|
+
try:
|
|
2593
|
+
filename = os.path.basename(file_path).replace('.md', '')
|
|
2594
|
+
session_dir = os.path.dirname(file_path)
|
|
2595
|
+
session_id = os.path.basename(session_dir)
|
|
2596
|
+
|
|
2597
|
+
# Use shared context utility with specific session
|
|
2598
|
+
temp_utility = ContextUtility.get_workforce_shared(
|
|
2599
|
+
session_id
|
|
2600
|
+
)
|
|
2601
|
+
|
|
2602
|
+
status = temp_utility.load_markdown_context_to_memory(
|
|
2603
|
+
self.coordinator_agent, filename
|
|
2604
|
+
)
|
|
2605
|
+
if "Context appended" in status:
|
|
2606
|
+
coordinator_loaded += 1
|
|
2607
|
+
except Exception as e:
|
|
2608
|
+
logger.warning(
|
|
2609
|
+
f"Failed to load coordinator workflow {file_path}: {e}"
|
|
2610
|
+
)
|
|
2611
|
+
|
|
2612
|
+
# Load workflows for task agent
|
|
2613
|
+
task_agent_loaded = 0
|
|
2614
|
+
for file_path in workflow_files[:task_agent_max_workflows]:
|
|
2615
|
+
try:
|
|
2616
|
+
filename = os.path.basename(file_path).replace('.md', '')
|
|
2617
|
+
session_dir = os.path.dirname(file_path)
|
|
2618
|
+
session_id = os.path.basename(session_dir)
|
|
2619
|
+
|
|
2620
|
+
# Use shared context utility with specific session
|
|
2621
|
+
temp_utility = ContextUtility.get_workforce_shared(
|
|
2622
|
+
session_id
|
|
2623
|
+
)
|
|
2624
|
+
|
|
2625
|
+
status = temp_utility.load_markdown_context_to_memory(
|
|
2626
|
+
self.task_agent, filename
|
|
2627
|
+
)
|
|
2628
|
+
if "Context appended" in status:
|
|
2629
|
+
task_agent_loaded += 1
|
|
2630
|
+
except Exception as e:
|
|
2631
|
+
logger.warning(
|
|
2632
|
+
f"Failed to load task agent workflow {file_path}: {e}"
|
|
2633
|
+
)
|
|
2634
|
+
|
|
2635
|
+
logger.info(
|
|
2636
|
+
f"Loaded {coordinator_loaded} workflows for coordinator, "
|
|
2637
|
+
f"{task_agent_loaded} workflows for task agent"
|
|
2638
|
+
)
|
|
2639
|
+
|
|
2640
|
+
except Exception as e:
|
|
2641
|
+
logger.error(f"Error loading management agent workflows: {e}")
|
|
1112
2642
|
|
|
1113
2643
|
@check_if_running(False)
|
|
1114
2644
|
def set_channel(self, channel: TaskChannel) -> None:
|
|
@@ -1119,25 +2649,332 @@ class Workforce(BaseNode):
|
|
|
1119
2649
|
|
|
1120
2650
|
def _get_child_nodes_info(self) -> str:
|
|
1121
2651
|
r"""Get the information of all the child nodes under this node."""
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
2652
|
+
return "".join(
|
|
2653
|
+
f"<{child.node_id}>:<{child.description}>:<{self._get_node_info(child)}>\n"
|
|
2654
|
+
for child in self._children
|
|
2655
|
+
)
|
|
2656
|
+
|
|
2657
|
+
def _get_node_info(self, node) -> str:
|
|
2658
|
+
r"""Get descriptive information for a specific node type."""
|
|
2659
|
+
if isinstance(node, Workforce):
|
|
2660
|
+
return "A Workforce node"
|
|
2661
|
+
elif isinstance(node, SingleAgentWorker):
|
|
2662
|
+
return self._get_single_agent_toolkit_info(node)
|
|
2663
|
+
elif isinstance(node, RolePlayingWorker):
|
|
2664
|
+
return "A Role playing node"
|
|
2665
|
+
else:
|
|
2666
|
+
return "Unknown node"
|
|
2667
|
+
|
|
2668
|
+
def _get_single_agent_toolkit_info(
|
|
2669
|
+
self, worker: 'SingleAgentWorker'
|
|
2670
|
+
) -> str:
|
|
2671
|
+
r"""Get formatted information for a SingleAgentWorker node."""
|
|
2672
|
+
toolkit_tools = self._group_tools_by_toolkit(worker.worker.tool_dict)
|
|
2673
|
+
|
|
2674
|
+
if not toolkit_tools:
|
|
2675
|
+
return ""
|
|
2676
|
+
|
|
2677
|
+
toolkit_info = []
|
|
2678
|
+
for toolkit_name, tools in sorted(toolkit_tools.items()):
|
|
2679
|
+
tools_str = ', '.join(sorted(tools))
|
|
2680
|
+
toolkit_info.append(f"{toolkit_name}({tools_str})")
|
|
2681
|
+
|
|
2682
|
+
return ", ".join(toolkit_info)
|
|
2683
|
+
|
|
2684
|
+
def _group_tools_by_toolkit(self, tool_dict: dict) -> dict[str, list[str]]:
|
|
2685
|
+
r"""Group tools by their parent toolkit class names."""
|
|
2686
|
+
toolkit_tools: dict[str, list[str]] = {}
|
|
2687
|
+
|
|
2688
|
+
for tool_name, tool in tool_dict.items():
|
|
2689
|
+
if hasattr(tool.func, '__self__'):
|
|
2690
|
+
toolkit_name = tool.func.__self__.__class__.__name__
|
|
2691
|
+
else:
|
|
2692
|
+
toolkit_name = "Standalone"
|
|
2693
|
+
|
|
2694
|
+
if toolkit_name not in toolkit_tools:
|
|
2695
|
+
toolkit_tools[toolkit_name] = []
|
|
2696
|
+
toolkit_tools[toolkit_name].append(tool_name)
|
|
2697
|
+
|
|
2698
|
+
return toolkit_tools
|
|
2699
|
+
|
|
2700
|
+
def _get_valid_worker_ids(self) -> set:
|
|
2701
|
+
r"""Get all valid worker IDs from child nodes.
|
|
2702
|
+
|
|
2703
|
+
Returns:
|
|
2704
|
+
set: Set of valid worker IDs that can be assigned tasks.
|
|
2705
|
+
"""
|
|
2706
|
+
valid_worker_ids = {child.node_id for child in self._children}
|
|
2707
|
+
return valid_worker_ids
|
|
2708
|
+
|
|
2709
|
+
def _call_coordinator_for_assignment(
|
|
2710
|
+
self, tasks: List[Task], invalid_ids: Optional[List[str]] = None
|
|
2711
|
+
) -> TaskAssignResult:
|
|
2712
|
+
r"""Call coordinator agent to assign tasks with optional validation
|
|
2713
|
+
feedback in the case of invalid worker IDs.
|
|
2714
|
+
|
|
2715
|
+
Args:
|
|
2716
|
+
tasks (List[Task]): Tasks to assign.
|
|
2717
|
+
invalid_ids (List[str], optional): Invalid worker IDs from previous
|
|
2718
|
+
attempt (if any).
|
|
2719
|
+
|
|
2720
|
+
Returns:
|
|
2721
|
+
TaskAssignResult: Assignment result from coordinator.
|
|
2722
|
+
"""
|
|
2723
|
+
# format tasks information for the prompt
|
|
2724
|
+
tasks_info = ""
|
|
2725
|
+
for task in tasks:
|
|
2726
|
+
tasks_info += f"Task ID: {task.id}\n"
|
|
2727
|
+
tasks_info += f"Content: {task.content}\n"
|
|
2728
|
+
if task.additional_info:
|
|
2729
|
+
tasks_info += f"Additional Info: {task.additional_info}\n"
|
|
2730
|
+
tasks_info += "---\n"
|
|
2731
|
+
|
|
2732
|
+
prompt = str(
|
|
2733
|
+
ASSIGN_TASK_PROMPT.format(
|
|
2734
|
+
tasks_info=tasks_info,
|
|
2735
|
+
child_nodes_info=self._get_child_nodes_info(),
|
|
2736
|
+
)
|
|
2737
|
+
)
|
|
2738
|
+
|
|
2739
|
+
# add feedback if this is a retry
|
|
2740
|
+
if invalid_ids:
|
|
2741
|
+
valid_worker_ids = list(self._get_valid_worker_ids())
|
|
2742
|
+
feedback = (
|
|
2743
|
+
f"VALIDATION ERROR: The following worker IDs are invalid: "
|
|
2744
|
+
f"{invalid_ids}. "
|
|
2745
|
+
f"VALID WORKER IDS: {valid_worker_ids}. "
|
|
2746
|
+
f"Please reassign ONLY the above tasks using these valid IDs."
|
|
2747
|
+
)
|
|
2748
|
+
prompt = prompt + f"\n\n{feedback}"
|
|
2749
|
+
|
|
2750
|
+
# Check if we should use structured handler
|
|
2751
|
+
if self.use_structured_output_handler:
|
|
2752
|
+
# Use structured handler for prompt-based extraction
|
|
2753
|
+
enhanced_prompt = (
|
|
2754
|
+
self.structured_handler.generate_structured_prompt(
|
|
2755
|
+
base_prompt=prompt,
|
|
2756
|
+
schema=TaskAssignResult,
|
|
2757
|
+
examples=[
|
|
2758
|
+
{
|
|
2759
|
+
"assignments": [
|
|
2760
|
+
{
|
|
2761
|
+
"task_id": "task_1",
|
|
2762
|
+
"assignee_id": "worker_123",
|
|
2763
|
+
"dependencies": [],
|
|
2764
|
+
}
|
|
2765
|
+
]
|
|
2766
|
+
}
|
|
2767
|
+
],
|
|
1129
2768
|
)
|
|
1130
|
-
|
|
1131
|
-
|
|
2769
|
+
)
|
|
2770
|
+
|
|
2771
|
+
# Get response without structured format
|
|
2772
|
+
response = self.coordinator_agent.step(enhanced_prompt)
|
|
2773
|
+
|
|
2774
|
+
if response.msg is None or response.msg.content is None:
|
|
2775
|
+
logger.error(
|
|
2776
|
+
"Coordinator agent returned empty response for "
|
|
2777
|
+
"task assignment"
|
|
2778
|
+
)
|
|
2779
|
+
return TaskAssignResult(assignments=[])
|
|
2780
|
+
|
|
2781
|
+
# Parse with structured handler
|
|
2782
|
+
result = self.structured_handler.parse_structured_response(
|
|
2783
|
+
response.msg.content,
|
|
2784
|
+
schema=TaskAssignResult,
|
|
2785
|
+
fallback_values={"assignments": []},
|
|
2786
|
+
)
|
|
2787
|
+
# Ensure we return a TaskAssignResult instance
|
|
2788
|
+
if isinstance(result, TaskAssignResult):
|
|
2789
|
+
return result
|
|
2790
|
+
elif isinstance(result, dict):
|
|
2791
|
+
return TaskAssignResult(**result)
|
|
2792
|
+
else:
|
|
2793
|
+
return TaskAssignResult(assignments=[])
|
|
2794
|
+
else:
|
|
2795
|
+
# Use existing native structured output code
|
|
2796
|
+
response = self.coordinator_agent.step(
|
|
2797
|
+
prompt, response_format=TaskAssignResult
|
|
2798
|
+
)
|
|
2799
|
+
|
|
2800
|
+
if response.msg is None or response.msg.content is None:
|
|
2801
|
+
logger.error(
|
|
2802
|
+
"Coordinator agent returned empty response for "
|
|
2803
|
+
"task assignment"
|
|
2804
|
+
)
|
|
2805
|
+
return TaskAssignResult(assignments=[])
|
|
2806
|
+
|
|
2807
|
+
try:
|
|
2808
|
+
result_dict = json.loads(response.msg.content, parse_int=str)
|
|
2809
|
+
return TaskAssignResult(**result_dict)
|
|
2810
|
+
except json.JSONDecodeError as e:
|
|
2811
|
+
logger.error(
|
|
2812
|
+
f"JSON parsing error in task assignment: Invalid response "
|
|
2813
|
+
f"format - {e}. Response content: "
|
|
2814
|
+
f"{response.msg.content}"
|
|
2815
|
+
)
|
|
2816
|
+
return TaskAssignResult(assignments=[])
|
|
2817
|
+
|
|
2818
|
+
def _validate_assignments(
|
|
2819
|
+
self, assignments: List[TaskAssignment], valid_ids: Set[str]
|
|
2820
|
+
) -> Tuple[List[TaskAssignment], List[TaskAssignment]]:
|
|
2821
|
+
r"""Validate task assignments against valid worker IDs.
|
|
2822
|
+
|
|
2823
|
+
Args:
|
|
2824
|
+
assignments (List[TaskAssignment]): Assignments to validate.
|
|
2825
|
+
valid_ids (Set[str]): Set of valid worker IDs.
|
|
2826
|
+
|
|
2827
|
+
Returns:
|
|
2828
|
+
Tuple[List[TaskAssignment], List[TaskAssignment]]:
|
|
2829
|
+
(valid_assignments, invalid_assignments)
|
|
2830
|
+
"""
|
|
2831
|
+
valid_assignments: List[TaskAssignment] = []
|
|
2832
|
+
invalid_assignments: List[TaskAssignment] = []
|
|
2833
|
+
|
|
2834
|
+
for assignment in assignments:
|
|
2835
|
+
if assignment.assignee_id in valid_ids:
|
|
2836
|
+
valid_assignments.append(assignment)
|
|
2837
|
+
else:
|
|
2838
|
+
invalid_assignments.append(assignment)
|
|
2839
|
+
|
|
2840
|
+
return valid_assignments, invalid_assignments
|
|
2841
|
+
|
|
2842
|
+
async def _handle_task_assignment_fallbacks(
|
|
2843
|
+
self, tasks: List[Task]
|
|
2844
|
+
) -> List:
|
|
2845
|
+
r"""Create new workers for unassigned tasks as fallback.
|
|
2846
|
+
|
|
2847
|
+
Args:
|
|
2848
|
+
tasks (List[Task]): Tasks that need new workers.
|
|
2849
|
+
|
|
2850
|
+
Returns:
|
|
2851
|
+
List[TaskAssignment]: Assignments for newly created workers.
|
|
2852
|
+
"""
|
|
2853
|
+
fallback_assignments = []
|
|
2854
|
+
|
|
2855
|
+
for task in tasks:
|
|
2856
|
+
logger.info(f"Creating new worker for unassigned task {task.id}")
|
|
2857
|
+
new_worker = await self._create_worker_node_for_task(task)
|
|
2858
|
+
|
|
2859
|
+
assignment = TaskAssignment(
|
|
2860
|
+
task_id=task.id,
|
|
2861
|
+
assignee_id=new_worker.node_id,
|
|
2862
|
+
dependencies=[],
|
|
2863
|
+
)
|
|
2864
|
+
fallback_assignments.append(assignment)
|
|
2865
|
+
|
|
2866
|
+
return fallback_assignments
|
|
2867
|
+
|
|
2868
|
+
async def _handle_assignment_retry_and_fallback(
|
|
2869
|
+
self,
|
|
2870
|
+
invalid_assignments: List[TaskAssignment],
|
|
2871
|
+
tasks: List[Task],
|
|
2872
|
+
valid_worker_ids: Set[str],
|
|
2873
|
+
) -> List[TaskAssignment]:
|
|
2874
|
+
r"""Called if Coordinator agent fails to assign tasks to valid worker
|
|
2875
|
+
IDs. Handles retry assignment and fallback worker creation for invalid
|
|
2876
|
+
assignments.
|
|
2877
|
+
|
|
2878
|
+
Args:
|
|
2879
|
+
invalid_assignments (List[TaskAssignment]): Invalid assignments to
|
|
2880
|
+
retry.
|
|
2881
|
+
tasks (List[Task]): Original tasks list for task lookup.
|
|
2882
|
+
valid_worker_ids (set): Set of valid worker IDs.
|
|
2883
|
+
|
|
2884
|
+
Returns:
|
|
2885
|
+
List[TaskAssignment]: Final assignments for the invalid tasks.
|
|
2886
|
+
"""
|
|
2887
|
+
invalid_ids = [a.assignee_id for a in invalid_assignments]
|
|
2888
|
+
invalid_tasks = [
|
|
2889
|
+
task
|
|
2890
|
+
for task in tasks
|
|
2891
|
+
if any(a.task_id == task.id for a in invalid_assignments)
|
|
2892
|
+
]
|
|
2893
|
+
|
|
2894
|
+
# handle cases where coordinator returned no assignments at all
|
|
2895
|
+
if not invalid_assignments:
|
|
2896
|
+
invalid_tasks = tasks # all tasks need assignment
|
|
2897
|
+
logger.warning(
|
|
2898
|
+
f"Coordinator returned no assignments. "
|
|
2899
|
+
f"Retrying assignment for all {len(invalid_tasks)} tasks."
|
|
2900
|
+
)
|
|
2901
|
+
else:
|
|
2902
|
+
logger.warning(
|
|
2903
|
+
f"Invalid worker IDs detected: {invalid_ids}. "
|
|
2904
|
+
f"Retrying assignment for {len(invalid_tasks)} tasks."
|
|
2905
|
+
)
|
|
2906
|
+
|
|
2907
|
+
# retry assignment with feedback
|
|
2908
|
+
retry_result = self._call_coordinator_for_assignment(
|
|
2909
|
+
invalid_tasks, invalid_ids
|
|
2910
|
+
)
|
|
2911
|
+
final_assignments = []
|
|
2912
|
+
|
|
2913
|
+
if retry_result.assignments:
|
|
2914
|
+
retry_valid, retry_invalid = self._validate_assignments(
|
|
2915
|
+
retry_result.assignments, valid_worker_ids
|
|
2916
|
+
)
|
|
2917
|
+
final_assignments.extend(retry_valid)
|
|
2918
|
+
|
|
2919
|
+
# collect tasks that are still unassigned for fallback
|
|
2920
|
+
if retry_invalid:
|
|
2921
|
+
unassigned_tasks = [
|
|
2922
|
+
task
|
|
2923
|
+
for task in invalid_tasks
|
|
2924
|
+
if any(a.task_id == task.id for a in retry_invalid)
|
|
2925
|
+
]
|
|
1132
2926
|
else:
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
2927
|
+
unassigned_tasks = []
|
|
2928
|
+
else:
|
|
2929
|
+
# retry failed completely, all invalid tasks need fallback
|
|
2930
|
+
logger.warning("Retry assignment failed")
|
|
2931
|
+
unassigned_tasks = invalid_tasks
|
|
2932
|
+
|
|
2933
|
+
# handle fallback for any remaining unassigned tasks
|
|
2934
|
+
if unassigned_tasks:
|
|
2935
|
+
logger.warning(
|
|
2936
|
+
f"Creating fallback workers for {len(unassigned_tasks)} "
|
|
2937
|
+
f"unassigned tasks"
|
|
2938
|
+
)
|
|
2939
|
+
fallback_assignments = (
|
|
2940
|
+
await self._handle_task_assignment_fallbacks(unassigned_tasks)
|
|
1137
2941
|
)
|
|
1138
|
-
|
|
2942
|
+
final_assignments.extend(fallback_assignments)
|
|
2943
|
+
|
|
2944
|
+
return final_assignments
|
|
1139
2945
|
|
|
1140
|
-
def
|
|
2946
|
+
def _update_task_dependencies_from_assignments(
|
|
2947
|
+
self, assignments: List[TaskAssignment], tasks: List[Task]
|
|
2948
|
+
) -> None:
|
|
2949
|
+
r"""Update Task.dependencies with actual Task objects based on
|
|
2950
|
+
assignments.
|
|
2951
|
+
|
|
2952
|
+
Args:
|
|
2953
|
+
assignments (List[TaskAssignment]): The task assignments
|
|
2954
|
+
containing dependency IDs.
|
|
2955
|
+
tasks (List[Task]): The tasks that were assigned.
|
|
2956
|
+
"""
|
|
2957
|
+
# Create a lookup map for all available tasks
|
|
2958
|
+
all_tasks = {}
|
|
2959
|
+
for task_list in [self._completed_tasks, self._pending_tasks, tasks]:
|
|
2960
|
+
for task in task_list:
|
|
2961
|
+
all_tasks[task.id] = task
|
|
2962
|
+
|
|
2963
|
+
# Update dependencies for each assigned task
|
|
2964
|
+
for assignment in assignments:
|
|
2965
|
+
if not assignment.dependencies:
|
|
2966
|
+
continue
|
|
2967
|
+
|
|
2968
|
+
matching_tasks = [t for t in tasks if t.id == assignment.task_id]
|
|
2969
|
+
if matching_tasks:
|
|
2970
|
+
task = matching_tasks[0]
|
|
2971
|
+
task.dependencies = [
|
|
2972
|
+
all_tasks[dep_id]
|
|
2973
|
+
for dep_id in assignment.dependencies
|
|
2974
|
+
if dep_id in all_tasks
|
|
2975
|
+
]
|
|
2976
|
+
|
|
2977
|
+
async def _find_assignee(
|
|
1141
2978
|
self,
|
|
1142
2979
|
tasks: List[Task],
|
|
1143
2980
|
) -> TaskAssignResult:
|
|
@@ -1150,49 +2987,129 @@ class Workforce(BaseNode):
|
|
|
1150
2987
|
TaskAssignResult: Assignment result containing task assignments
|
|
1151
2988
|
with their dependencies.
|
|
1152
2989
|
"""
|
|
2990
|
+
# Wait for workers to be ready before assignment with exponential
|
|
2991
|
+
# backoff
|
|
2992
|
+
worker_readiness_timeout = 2.0 # Maximum wait time in seconds
|
|
2993
|
+
worker_readiness_check_interval = 0.05 # Initial check interval
|
|
2994
|
+
start_time = time.time()
|
|
2995
|
+
check_interval = worker_readiness_check_interval
|
|
2996
|
+
backoff_multiplier = 1.5 # Exponential backoff factor
|
|
2997
|
+
max_interval = 0.5 # Cap the maximum interval
|
|
2998
|
+
|
|
2999
|
+
while (time.time() - start_time) < worker_readiness_timeout:
|
|
3000
|
+
valid_worker_ids = self._get_valid_worker_ids()
|
|
3001
|
+
if len(valid_worker_ids) > 0:
|
|
3002
|
+
elapsed = time.time() - start_time
|
|
3003
|
+
logger.debug(
|
|
3004
|
+
f"Workers ready after {elapsed:.3f}s: "
|
|
3005
|
+
f"{len(valid_worker_ids)} workers available"
|
|
3006
|
+
)
|
|
3007
|
+
break
|
|
3008
|
+
|
|
3009
|
+
await asyncio.sleep(check_interval)
|
|
3010
|
+
# Exponential backoff with cap
|
|
3011
|
+
check_interval = min(
|
|
3012
|
+
check_interval * backoff_multiplier, max_interval
|
|
3013
|
+
)
|
|
3014
|
+
else:
|
|
3015
|
+
# Timeout reached, log warning but continue
|
|
3016
|
+
logger.warning(
|
|
3017
|
+
f"Worker readiness timeout after "
|
|
3018
|
+
f"{worker_readiness_timeout}s, "
|
|
3019
|
+
f"proceeding with {len(self._children)} children"
|
|
3020
|
+
)
|
|
3021
|
+
valid_worker_ids = self._get_valid_worker_ids()
|
|
3022
|
+
|
|
1153
3023
|
self.coordinator_agent.reset()
|
|
1154
3024
|
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
3025
|
+
logger.debug(
|
|
3026
|
+
f"Sending batch assignment request to coordinator "
|
|
3027
|
+
f"for {len(tasks)} tasks."
|
|
3028
|
+
)
|
|
3029
|
+
|
|
3030
|
+
assignment_result = self._call_coordinator_for_assignment(tasks)
|
|
3031
|
+
|
|
3032
|
+
# validate assignments
|
|
3033
|
+
valid_assignments, invalid_assignments = self._validate_assignments(
|
|
3034
|
+
assignment_result.assignments, valid_worker_ids
|
|
3035
|
+
)
|
|
3036
|
+
|
|
3037
|
+
# check if we have assignments for all tasks
|
|
3038
|
+
assigned_task_ids = {
|
|
3039
|
+
a.task_id for a in valid_assignments + invalid_assignments
|
|
3040
|
+
}
|
|
3041
|
+
unassigned_tasks = [t for t in tasks if t.id not in assigned_task_ids]
|
|
3042
|
+
|
|
3043
|
+
# if all assignments are valid and all tasks are assigned, return early
|
|
3044
|
+
if not invalid_assignments and not unassigned_tasks:
|
|
3045
|
+
self._update_task_dependencies_from_assignments(
|
|
3046
|
+
valid_assignments, tasks
|
|
3047
|
+
)
|
|
3048
|
+
return TaskAssignResult(assignments=valid_assignments)
|
|
1163
3049
|
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
3050
|
+
# handle retry and fallback for invalid assignments and unassigned
|
|
3051
|
+
# tasks
|
|
3052
|
+
retry_and_fallback_assignments = (
|
|
3053
|
+
await self._handle_assignment_retry_and_fallback(
|
|
3054
|
+
invalid_assignments, tasks, valid_worker_ids
|
|
3055
|
+
)
|
|
1167
3056
|
)
|
|
1168
3057
|
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
3058
|
+
# Combine assignments with deduplication, prioritizing retry results
|
|
3059
|
+
assignment_map = {a.task_id: a for a in valid_assignments}
|
|
3060
|
+
assignment_map.update(
|
|
3061
|
+
{a.task_id: a for a in retry_and_fallback_assignments}
|
|
1172
3062
|
)
|
|
3063
|
+
all_assignments = list(assignment_map.values())
|
|
1173
3064
|
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
3065
|
+
# Log any overwrites for debugging
|
|
3066
|
+
valid_task_ids = {a.task_id for a in valid_assignments}
|
|
3067
|
+
retry_task_ids = {a.task_id for a in retry_and_fallback_assignments}
|
|
3068
|
+
overlap_task_ids = valid_task_ids & retry_task_ids
|
|
3069
|
+
|
|
3070
|
+
if overlap_task_ids:
|
|
3071
|
+
logger.warning(
|
|
3072
|
+
f"Retry assignments overrode {len(overlap_task_ids)} "
|
|
3073
|
+
f"valid assignments for tasks: {sorted(overlap_task_ids)}"
|
|
3074
|
+
)
|
|
3075
|
+
|
|
3076
|
+
# Update Task.dependencies for all final assignments
|
|
3077
|
+
self._update_task_dependencies_from_assignments(all_assignments, tasks)
|
|
3078
|
+
|
|
3079
|
+
return TaskAssignResult(assignments=all_assignments)
|
|
1180
3080
|
|
|
1181
3081
|
async def _post_task(self, task: Task, assignee_id: str) -> None:
|
|
1182
3082
|
# Record the start time when a task is posted
|
|
1183
3083
|
self._task_start_times[task.id] = time.time()
|
|
1184
3084
|
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
3085
|
+
task.assigned_worker_id = assignee_id
|
|
3086
|
+
|
|
3087
|
+
task_started_event = TaskStartedEvent(
|
|
3088
|
+
task_id=task.id, worker_id=assignee_id
|
|
3089
|
+
)
|
|
3090
|
+
for cb in self._callbacks:
|
|
3091
|
+
cb.log_task_started(task_started_event)
|
|
3092
|
+
|
|
3093
|
+
try:
|
|
3094
|
+
await self._channel.post_task(task, self.node_id, assignee_id)
|
|
3095
|
+
self._increment_in_flight_tasks(task.id)
|
|
3096
|
+
logger.debug(
|
|
3097
|
+
f"Posted task {task.id} to {assignee_id}. "
|
|
3098
|
+
f"In-flight tasks: {self._in_flight_tasks}"
|
|
3099
|
+
)
|
|
3100
|
+
except Exception as e:
|
|
3101
|
+
logger.error(
|
|
3102
|
+
f"Failed to post task {task.id} to {assignee_id}: {e}"
|
|
3103
|
+
)
|
|
3104
|
+
print(
|
|
3105
|
+
f"{Fore.RED}Failed to post task {task.id} to {assignee_id}: "
|
|
3106
|
+
f"{e}{Fore.RESET}"
|
|
1188
3107
|
)
|
|
1189
|
-
self._in_flight_tasks += 1
|
|
1190
|
-
await self._channel.post_task(task, self.node_id, assignee_id)
|
|
1191
3108
|
|
|
1192
3109
|
async def _post_dependency(self, dependency: Task) -> None:
|
|
1193
3110
|
await self._channel.post_dependency(dependency, self.node_id)
|
|
1194
3111
|
|
|
1195
|
-
def _create_worker_node_for_task(self, task: Task) -> Worker:
|
|
3112
|
+
async def _create_worker_node_for_task(self, task: Task) -> Worker:
|
|
1196
3113
|
r"""Creates a new worker node for a given task and add it to the
|
|
1197
3114
|
children list of this node. This is one of the actions that
|
|
1198
3115
|
the coordinator can take when a task has failed.
|
|
@@ -1203,84 +3120,200 @@ class Workforce(BaseNode):
|
|
|
1203
3120
|
Returns:
|
|
1204
3121
|
Worker: The created worker node.
|
|
1205
3122
|
"""
|
|
1206
|
-
prompt =
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
prompt, response_format=WorkerConf
|
|
3123
|
+
prompt = str(
|
|
3124
|
+
CREATE_NODE_PROMPT.format(
|
|
3125
|
+
content=task.content,
|
|
3126
|
+
child_nodes_info=self._get_child_nodes_info(),
|
|
3127
|
+
additional_info=task.additional_info,
|
|
3128
|
+
)
|
|
1213
3129
|
)
|
|
1214
|
-
|
|
1215
|
-
|
|
3130
|
+
# Check if we should use structured handler
|
|
3131
|
+
if self.use_structured_output_handler:
|
|
3132
|
+
# Use structured handler
|
|
3133
|
+
enhanced_prompt = (
|
|
3134
|
+
self.structured_handler.generate_structured_prompt(
|
|
3135
|
+
base_prompt=prompt,
|
|
3136
|
+
schema=WorkerConf,
|
|
3137
|
+
examples=[
|
|
3138
|
+
{
|
|
3139
|
+
"description": "Data analysis specialist",
|
|
3140
|
+
"role": "Data Analyst",
|
|
3141
|
+
"sys_msg": "You are an expert data analyst.",
|
|
3142
|
+
}
|
|
3143
|
+
],
|
|
3144
|
+
)
|
|
3145
|
+
)
|
|
3146
|
+
|
|
3147
|
+
response = self.coordinator_agent.step(enhanced_prompt)
|
|
3148
|
+
|
|
3149
|
+
if response.msg is None or response.msg.content is None:
|
|
3150
|
+
logger.error(
|
|
3151
|
+
"Coordinator agent returned empty response for "
|
|
3152
|
+
"worker creation"
|
|
3153
|
+
)
|
|
3154
|
+
new_node_conf = WorkerConf(
|
|
3155
|
+
description=f"Fallback worker for task: {task.content}",
|
|
3156
|
+
role="General Assistant",
|
|
3157
|
+
sys_msg="You are a general assistant that can help "
|
|
3158
|
+
"with various tasks.",
|
|
3159
|
+
)
|
|
3160
|
+
else:
|
|
3161
|
+
result = self.structured_handler.parse_structured_response(
|
|
3162
|
+
response.msg.content,
|
|
3163
|
+
schema=WorkerConf,
|
|
3164
|
+
fallback_values={
|
|
3165
|
+
"description": f"Worker for task: {task.content}",
|
|
3166
|
+
"role": "Task Specialist",
|
|
3167
|
+
"sys_msg": f"You are a specialist for: {task.content}",
|
|
3168
|
+
},
|
|
3169
|
+
)
|
|
3170
|
+
# Ensure we have a WorkerConf instance
|
|
3171
|
+
if isinstance(result, WorkerConf):
|
|
3172
|
+
new_node_conf = result
|
|
3173
|
+
elif isinstance(result, dict):
|
|
3174
|
+
new_node_conf = WorkerConf(**result)
|
|
3175
|
+
else:
|
|
3176
|
+
new_node_conf = WorkerConf(
|
|
3177
|
+
description=f"Worker for task: {task.content}",
|
|
3178
|
+
role="Task Specialist",
|
|
3179
|
+
sys_msg=f"You are a specialist for: {task.content}",
|
|
3180
|
+
)
|
|
3181
|
+
else:
|
|
3182
|
+
# Use existing native structured output code
|
|
3183
|
+
response = self.coordinator_agent.step(
|
|
3184
|
+
prompt, response_format=WorkerConf
|
|
3185
|
+
)
|
|
3186
|
+
if response.msg is None or response.msg.content is None:
|
|
3187
|
+
logger.error(
|
|
3188
|
+
"Coordinator agent returned empty response for "
|
|
3189
|
+
"worker creation"
|
|
3190
|
+
)
|
|
3191
|
+
# Create a fallback worker configuration
|
|
3192
|
+
new_node_conf = WorkerConf(
|
|
3193
|
+
description=f"Fallback worker for task: {task.content}",
|
|
3194
|
+
role="General Assistant",
|
|
3195
|
+
sys_msg="You are a general assistant that can help "
|
|
3196
|
+
"with various tasks.",
|
|
3197
|
+
)
|
|
3198
|
+
else:
|
|
3199
|
+
try:
|
|
3200
|
+
result_dict = json.loads(response.msg.content)
|
|
3201
|
+
new_node_conf = WorkerConf(**result_dict)
|
|
3202
|
+
except json.JSONDecodeError as e:
|
|
3203
|
+
logger.error(
|
|
3204
|
+
f"JSON parsing error in worker creation: Invalid "
|
|
3205
|
+
f"response format - {e}. Response content: "
|
|
3206
|
+
f"{response.msg.content}"
|
|
3207
|
+
)
|
|
3208
|
+
raise RuntimeError(
|
|
3209
|
+
f"Failed to create worker for task {task.id}: "
|
|
3210
|
+
f"Coordinator agent returned malformed JSON response. "
|
|
3211
|
+
) from e
|
|
1216
3212
|
|
|
1217
|
-
new_agent = self._create_new_agent(
|
|
3213
|
+
new_agent = await self._create_new_agent(
|
|
1218
3214
|
new_node_conf.role,
|
|
1219
3215
|
new_node_conf.sys_msg,
|
|
1220
3216
|
)
|
|
1221
3217
|
|
|
3218
|
+
# Validate the new agent compatibility before creating worker
|
|
3219
|
+
try:
|
|
3220
|
+
self._validate_agent_compatibility(
|
|
3221
|
+
new_agent, f"Agent for task {task.id}"
|
|
3222
|
+
)
|
|
3223
|
+
except ValueError as e:
|
|
3224
|
+
raise ValueError(f"Cannot create worker for task {task.id}: {e!s}")
|
|
3225
|
+
|
|
1222
3226
|
new_node = SingleAgentWorker(
|
|
1223
3227
|
description=new_node_conf.description,
|
|
1224
3228
|
worker=new_agent,
|
|
3229
|
+
pool_max_size=DEFAULT_WORKER_POOL_SIZE,
|
|
3230
|
+
use_structured_output_handler=self.use_structured_output_handler,
|
|
1225
3231
|
)
|
|
1226
3232
|
new_node.set_channel(self._channel)
|
|
1227
3233
|
|
|
1228
3234
|
print(f"{Fore.CYAN}{new_node} created.{Fore.RESET}")
|
|
1229
3235
|
|
|
1230
3236
|
self._children.append(new_node)
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
3237
|
+
|
|
3238
|
+
self._notify_worker_created(
|
|
3239
|
+
new_node,
|
|
3240
|
+
worker_type='SingleAgentWorker',
|
|
3241
|
+
role=new_node_conf.role,
|
|
3242
|
+
metadata={'description': new_node_conf.description},
|
|
3243
|
+
)
|
|
1238
3244
|
self._child_listening_tasks.append(
|
|
1239
3245
|
asyncio.create_task(new_node.start())
|
|
1240
3246
|
)
|
|
1241
3247
|
return new_node
|
|
1242
3248
|
|
|
1243
|
-
def _create_new_agent(self, role: str, sys_msg: str) -> ChatAgent:
|
|
3249
|
+
async def _create_new_agent(self, role: str, sys_msg: str) -> ChatAgent:
|
|
1244
3250
|
worker_sys_msg = BaseMessage.make_assistant_message(
|
|
1245
3251
|
role_name=role,
|
|
1246
3252
|
content=sys_msg,
|
|
1247
3253
|
)
|
|
1248
3254
|
|
|
1249
|
-
if self.
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
3255
|
+
if self.new_worker_agent is not None:
|
|
3256
|
+
# Clone the template agent to create an independent instance
|
|
3257
|
+
cloned_agent = self.new_worker_agent.clone(with_memory=False)
|
|
3258
|
+
# Update the system message for the specific role
|
|
3259
|
+
cloned_agent._system_message = worker_sys_msg
|
|
3260
|
+
cloned_agent.init_messages() # Initialize with new system message
|
|
3261
|
+
return cloned_agent
|
|
3262
|
+
else:
|
|
3263
|
+
# Default tools for a new agent
|
|
3264
|
+
function_list = [
|
|
3265
|
+
SearchToolkit().search_duckduckgo,
|
|
3266
|
+
*CodeExecutionToolkit().get_tools(),
|
|
3267
|
+
*ThinkingToolkit().get_tools(),
|
|
3268
|
+
]
|
|
1258
3269
|
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
3270
|
+
model = ModelFactory.create(
|
|
3271
|
+
model_platform=ModelPlatformType.DEFAULT,
|
|
3272
|
+
model_type=ModelType.DEFAULT,
|
|
3273
|
+
model_config_dict={"temperature": 0},
|
|
3274
|
+
)
|
|
1264
3275
|
|
|
1265
|
-
|
|
3276
|
+
return ChatAgent(
|
|
3277
|
+
system_message=worker_sys_msg,
|
|
3278
|
+
model=model,
|
|
3279
|
+
tools=function_list, # type: ignore[arg-type]
|
|
3280
|
+
pause_event=self._pause_event,
|
|
3281
|
+
)
|
|
1266
3282
|
|
|
1267
|
-
async def _get_returned_task(self) -> Task:
|
|
3283
|
+
async def _get_returned_task(self) -> Optional[Task]:
|
|
1268
3284
|
r"""Get the task that's published by this node and just get returned
|
|
1269
3285
|
from the assignee. Includes timeout handling to prevent indefinite
|
|
1270
3286
|
waiting.
|
|
3287
|
+
|
|
3288
|
+
Raises:
|
|
3289
|
+
asyncio.TimeoutError: If waiting for task exceeds timeout
|
|
1271
3290
|
"""
|
|
1272
3291
|
try:
|
|
1273
3292
|
# Add timeout to prevent indefinite waiting
|
|
1274
3293
|
return await asyncio.wait_for(
|
|
1275
3294
|
self._channel.get_returned_task_by_publisher(self.node_id),
|
|
1276
|
-
timeout=
|
|
3295
|
+
timeout=self.task_timeout_seconds,
|
|
1277
3296
|
)
|
|
1278
3297
|
except asyncio.TimeoutError:
|
|
3298
|
+
# Re-raise timeout errors to be handled by caller
|
|
3299
|
+
# This prevents hanging when tasks are stuck
|
|
1279
3300
|
logger.warning(
|
|
1280
|
-
f"Timeout waiting for
|
|
1281
|
-
f"
|
|
3301
|
+
f"Timeout waiting for task return in workforce "
|
|
3302
|
+
f"{self.node_id}. "
|
|
3303
|
+
f"Timeout: {self.task_timeout_seconds}s, "
|
|
3304
|
+
f"Pending tasks: {len(self._pending_tasks)}, "
|
|
3305
|
+
f"In-flight tasks: {self._in_flight_tasks}"
|
|
3306
|
+
)
|
|
3307
|
+
raise
|
|
3308
|
+
except Exception as e:
|
|
3309
|
+
error_msg = (
|
|
3310
|
+
f"Error getting returned task {e} in "
|
|
3311
|
+
f"workforce {self.node_id}. "
|
|
3312
|
+
f"Current pending tasks: {len(self._pending_tasks)}, "
|
|
3313
|
+
f"In-flight tasks: {self._in_flight_tasks}"
|
|
1282
3314
|
)
|
|
1283
|
-
|
|
3315
|
+
logger.error(error_msg, exc_info=True)
|
|
3316
|
+
return None
|
|
1284
3317
|
|
|
1285
3318
|
async def _post_ready_tasks(self) -> None:
|
|
1286
3319
|
r"""Checks for unassigned tasks, assigns them, and then posts any
|
|
@@ -1290,53 +3323,185 @@ class Workforce(BaseNode):
|
|
|
1290
3323
|
tasks_to_assign = [
|
|
1291
3324
|
task
|
|
1292
3325
|
for task in self._pending_tasks
|
|
1293
|
-
if
|
|
3326
|
+
if (
|
|
3327
|
+
task.id not in self._task_dependencies
|
|
3328
|
+
and (
|
|
3329
|
+
task.additional_info is None
|
|
3330
|
+
or not task.additional_info.get(
|
|
3331
|
+
"_needs_decomposition", False
|
|
3332
|
+
)
|
|
3333
|
+
)
|
|
3334
|
+
)
|
|
1294
3335
|
]
|
|
1295
3336
|
if tasks_to_assign:
|
|
1296
3337
|
logger.debug(
|
|
1297
3338
|
f"Found {len(tasks_to_assign)} new tasks. "
|
|
1298
3339
|
f"Requesting assignment..."
|
|
1299
3340
|
)
|
|
1300
|
-
batch_result = self._find_assignee(tasks_to_assign)
|
|
3341
|
+
batch_result = await self._find_assignee(tasks_to_assign)
|
|
1301
3342
|
logger.debug(
|
|
1302
3343
|
f"Coordinator returned assignments:\n"
|
|
1303
|
-
f"{json.dumps(batch_result.
|
|
3344
|
+
f"{json.dumps(batch_result.model_dump(), indent=2)}"
|
|
1304
3345
|
)
|
|
1305
3346
|
for assignment in batch_result.assignments:
|
|
1306
3347
|
self._task_dependencies[assignment.task_id] = (
|
|
1307
3348
|
assignment.dependencies
|
|
1308
3349
|
)
|
|
1309
3350
|
self._assignees[assignment.task_id] = assignment.assignee_id
|
|
1310
|
-
|
|
3351
|
+
|
|
3352
|
+
task_assigned_event = TaskAssignedEvent(
|
|
3353
|
+
task_id=assignment.task_id,
|
|
3354
|
+
worker_id=assignment.assignee_id,
|
|
3355
|
+
dependencies=assignment.dependencies,
|
|
3356
|
+
queue_time_seconds=None,
|
|
3357
|
+
)
|
|
3358
|
+
for cb in self._callbacks:
|
|
1311
3359
|
# queue_time_seconds can be derived by logger if task
|
|
1312
3360
|
# creation time is logged
|
|
1313
|
-
|
|
1314
|
-
task_id=assignment.task_id,
|
|
1315
|
-
worker_id=assignment.assignee_id,
|
|
1316
|
-
dependencies=assignment.dependencies,
|
|
1317
|
-
queue_time_seconds=None,
|
|
1318
|
-
)
|
|
3361
|
+
cb.log_task_assigned(task_assigned_event)
|
|
1319
3362
|
|
|
1320
3363
|
# Step 2: Iterate through all pending tasks and post those that are
|
|
1321
3364
|
# ready
|
|
1322
3365
|
posted_tasks = []
|
|
3366
|
+
# Pre-compute completed task IDs and their states for O(1) lookups
|
|
3367
|
+
completed_tasks_info = {t.id: t.state for t in self._completed_tasks}
|
|
3368
|
+
|
|
1323
3369
|
for task in self._pending_tasks:
|
|
1324
3370
|
# A task must be assigned to be considered for posting
|
|
1325
3371
|
if task.id in self._task_dependencies:
|
|
3372
|
+
# Skip if task has already been posted to prevent duplicates
|
|
3373
|
+
try:
|
|
3374
|
+
task_from_channel = await self._channel.get_task_by_id(
|
|
3375
|
+
task.id
|
|
3376
|
+
)
|
|
3377
|
+
# Check if task is already assigned to a worker
|
|
3378
|
+
if (
|
|
3379
|
+
task_from_channel
|
|
3380
|
+
and task_from_channel.assigned_worker_id
|
|
3381
|
+
):
|
|
3382
|
+
logger.debug(
|
|
3383
|
+
f"Task {task.id} already assigned to "
|
|
3384
|
+
f"{task_from_channel.assigned_worker_id}, "
|
|
3385
|
+
f"skipping to prevent duplicate"
|
|
3386
|
+
)
|
|
3387
|
+
continue
|
|
3388
|
+
except Exception as e:
|
|
3389
|
+
logger.info(
|
|
3390
|
+
f"Task {task.id} non existent in channel. "
|
|
3391
|
+
f"Assigning task: {e}"
|
|
3392
|
+
)
|
|
1326
3393
|
dependencies = self._task_dependencies[task.id]
|
|
1327
|
-
|
|
1328
|
-
#
|
|
1329
|
-
|
|
1330
|
-
dep_id in
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
3394
|
+
|
|
3395
|
+
# Check if all dependencies are in completed state
|
|
3396
|
+
all_deps_completed = all(
|
|
3397
|
+
dep_id in completed_tasks_info for dep_id in dependencies
|
|
3398
|
+
)
|
|
3399
|
+
|
|
3400
|
+
# Only proceed with dependency checks if all deps are completed
|
|
3401
|
+
if all_deps_completed:
|
|
3402
|
+
# Check if all dependencies succeeded (state is DONE)
|
|
3403
|
+
all_deps_done = all(
|
|
3404
|
+
completed_tasks_info[dep_id] == TaskState.DONE
|
|
3405
|
+
for dep_id in dependencies
|
|
1337
3406
|
)
|
|
1338
|
-
|
|
1339
|
-
|
|
3407
|
+
|
|
3408
|
+
# Check if any dependency failed
|
|
3409
|
+
any_dep_failed = any(
|
|
3410
|
+
completed_tasks_info[dep_id] == TaskState.FAILED
|
|
3411
|
+
for dep_id in dependencies
|
|
3412
|
+
)
|
|
3413
|
+
|
|
3414
|
+
if all_deps_done:
|
|
3415
|
+
# All dependencies completed successfully - post the
|
|
3416
|
+
# task
|
|
3417
|
+
assignee_id = self._assignees[task.id]
|
|
3418
|
+
logger.debug(
|
|
3419
|
+
f"Posting task {task.id} to "
|
|
3420
|
+
f"assignee {assignee_id}. "
|
|
3421
|
+
f"Dependencies met."
|
|
3422
|
+
)
|
|
3423
|
+
await self._post_task(task, assignee_id)
|
|
3424
|
+
posted_tasks.append(task)
|
|
3425
|
+
elif any_dep_failed:
|
|
3426
|
+
# Check if any failed dependencies can still be retried
|
|
3427
|
+
failed_deps = [
|
|
3428
|
+
dep_id
|
|
3429
|
+
for dep_id in dependencies
|
|
3430
|
+
if completed_tasks_info[dep_id] == TaskState.FAILED
|
|
3431
|
+
]
|
|
3432
|
+
|
|
3433
|
+
# Check if any failed dependency is still retryable
|
|
3434
|
+
failed_tasks_with_retry_potential = []
|
|
3435
|
+
permanently_failed_deps = []
|
|
3436
|
+
|
|
3437
|
+
for dep_id in failed_deps:
|
|
3438
|
+
# Find the failed dependency task
|
|
3439
|
+
failed_task = next(
|
|
3440
|
+
(
|
|
3441
|
+
t
|
|
3442
|
+
for t in self._completed_tasks
|
|
3443
|
+
if t.id == dep_id
|
|
3444
|
+
),
|
|
3445
|
+
None,
|
|
3446
|
+
)
|
|
3447
|
+
if (
|
|
3448
|
+
failed_task
|
|
3449
|
+
and failed_task.failure_count
|
|
3450
|
+
< MAX_TASK_RETRIES
|
|
3451
|
+
):
|
|
3452
|
+
failed_tasks_with_retry_potential.append(
|
|
3453
|
+
dep_id
|
|
3454
|
+
)
|
|
3455
|
+
else:
|
|
3456
|
+
permanently_failed_deps.append(dep_id)
|
|
3457
|
+
|
|
3458
|
+
# Only fail the task if ALL dependencies are
|
|
3459
|
+
# permanently failed
|
|
3460
|
+
if (
|
|
3461
|
+
permanently_failed_deps
|
|
3462
|
+
and not failed_tasks_with_retry_potential
|
|
3463
|
+
):
|
|
3464
|
+
logger.error(
|
|
3465
|
+
f"Task {task.id} cannot proceed: dependencies "
|
|
3466
|
+
f"{permanently_failed_deps} have "
|
|
3467
|
+
f"permanently failed. "
|
|
3468
|
+
f"Marking task as failed."
|
|
3469
|
+
)
|
|
3470
|
+
task.state = TaskState.FAILED
|
|
3471
|
+
task.result = (
|
|
3472
|
+
f"Task failed due to permanently "
|
|
3473
|
+
f"failed dependencies: "
|
|
3474
|
+
f"{permanently_failed_deps}"
|
|
3475
|
+
)
|
|
3476
|
+
|
|
3477
|
+
# Log the failure to metrics
|
|
3478
|
+
task_failed_event = TaskFailedEvent(
|
|
3479
|
+
task_id=task.id,
|
|
3480
|
+
worker_id=task.assigned_worker_id or "unknown",
|
|
3481
|
+
error_message=task.result,
|
|
3482
|
+
metadata={
|
|
3483
|
+
'failure_reason': 'dependency_failure',
|
|
3484
|
+
'failed_dependencies': (
|
|
3485
|
+
permanently_failed_deps
|
|
3486
|
+
),
|
|
3487
|
+
},
|
|
3488
|
+
)
|
|
3489
|
+
for cb in self._callbacks:
|
|
3490
|
+
cb.log_task_failed(task_failed_event)
|
|
3491
|
+
|
|
3492
|
+
self._completed_tasks.append(task)
|
|
3493
|
+
self._cleanup_task_tracking(task.id)
|
|
3494
|
+
posted_tasks.append(task) # Remove from pending
|
|
3495
|
+
else:
|
|
3496
|
+
# Some dependencies may still be retried, keep
|
|
3497
|
+
# task pending
|
|
3498
|
+
logger.debug(
|
|
3499
|
+
f"Task {task.id} waiting: dependencies "
|
|
3500
|
+
f"{failed_tasks_with_retry_potential} "
|
|
3501
|
+
f"failed but may be retried "
|
|
3502
|
+
f"(attempt < {MAX_TASK_RETRIES})"
|
|
3503
|
+
)
|
|
3504
|
+
# else: Not all dependencies completed yet, skip this task
|
|
1340
3505
|
|
|
1341
3506
|
# Step 3: Remove the posted tasks from the pending list
|
|
1342
3507
|
for task in posted_tasks:
|
|
@@ -1348,80 +3513,127 @@ class Workforce(BaseNode):
|
|
|
1348
3513
|
pass
|
|
1349
3514
|
|
|
1350
3515
|
async def _handle_failed_task(self, task: Task) -> bool:
|
|
3516
|
+
r"""Handle a task that failed during execution.
|
|
3517
|
+
|
|
3518
|
+
Args:
|
|
3519
|
+
task (Task): The failed task
|
|
3520
|
+
|
|
3521
|
+
Returns:
|
|
3522
|
+
bool: True if workforce should halt, False otherwise
|
|
3523
|
+
"""
|
|
1351
3524
|
task.failure_count += 1
|
|
1352
3525
|
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
worker_id=worker_id,
|
|
1358
|
-
error_message=task.result or "Task execution failed",
|
|
1359
|
-
error_type="TaskFailure",
|
|
1360
|
-
metadata={'failure_count': task.failure_count},
|
|
1361
|
-
)
|
|
3526
|
+
# Determine detailed failure information
|
|
3527
|
+
failure_reason = task.result or "Unknown error"
|
|
3528
|
+
worker_id = task.assigned_worker_id or "unknown"
|
|
3529
|
+
detailed_error = f"{failure_reason} (assigned to worker: {worker_id})"
|
|
1362
3530
|
|
|
1363
|
-
|
|
1364
|
-
|
|
3531
|
+
logger.error(
|
|
3532
|
+
f"Task {task.id} failed (attempt "
|
|
3533
|
+
f"{task.failure_count}/{MAX_TASK_RETRIES}): {detailed_error}"
|
|
3534
|
+
)
|
|
1365
3535
|
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
3536
|
+
print(
|
|
3537
|
+
f"{Fore.RED}❌ Task {task.id} failed "
|
|
3538
|
+
f"(attempt {task.failure_count}/{MAX_TASK_RETRIES}): "
|
|
3539
|
+
f"{failure_reason}{Fore.RESET}"
|
|
3540
|
+
)
|
|
1369
3541
|
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
3542
|
+
task_failed_event = TaskFailedEvent(
|
|
3543
|
+
task_id=task.id,
|
|
3544
|
+
worker_id=worker_id,
|
|
3545
|
+
error_message=detailed_error,
|
|
3546
|
+
metadata={
|
|
3547
|
+
'failure_count': task.failure_count,
|
|
3548
|
+
'task_content': task.content,
|
|
3549
|
+
'result_length': len(task.result) if task.result else 0,
|
|
3550
|
+
},
|
|
3551
|
+
)
|
|
3552
|
+
for cb in self._callbacks:
|
|
3553
|
+
cb.log_task_failed(task_failed_event)
|
|
3554
|
+
|
|
3555
|
+
# Check for immediate halt conditions
|
|
3556
|
+
if task.failure_count >= MAX_TASK_RETRIES:
|
|
3557
|
+
logger.error(
|
|
3558
|
+
f"Task {task.id} has exceeded maximum retry attempts "
|
|
3559
|
+
f"({MAX_TASK_RETRIES}). Final failure reason: "
|
|
3560
|
+
f"{detailed_error}. "
|
|
3561
|
+
f"Task content: '{task.content}'"
|
|
3562
|
+
)
|
|
3563
|
+
self._cleanup_task_tracking(task.id)
|
|
3564
|
+
self._completed_tasks.append(task)
|
|
3565
|
+
if task.id in self._assignees:
|
|
3566
|
+
await self._channel.archive_task(task.id)
|
|
3567
|
+
return True
|
|
1377
3568
|
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
task_id=subtask.id,
|
|
1390
|
-
description=subtask.content,
|
|
1391
|
-
parent_task_id=task.id,
|
|
1392
|
-
task_type=subtask.type,
|
|
1393
|
-
metadata=subtask.additional_info,
|
|
1394
|
-
)
|
|
1395
|
-
# Insert packets at the head of the queue
|
|
1396
|
-
self._pending_tasks.extendleft(reversed(subtasks))
|
|
3569
|
+
if len(self._pending_tasks) > MAX_PENDING_TASKS_LIMIT:
|
|
3570
|
+
logger.error(
|
|
3571
|
+
f"Too many pending tasks ({len(self._pending_tasks)} > "
|
|
3572
|
+
f"{MAX_PENDING_TASKS_LIMIT}). Halting to prevent task "
|
|
3573
|
+
f"explosion. Last failed task: {task.id}"
|
|
3574
|
+
)
|
|
3575
|
+
self._cleanup_task_tracking(task.id)
|
|
3576
|
+
self._completed_tasks.append(task)
|
|
3577
|
+
if task.id in self._assignees:
|
|
3578
|
+
await self._channel.archive_task(task.id)
|
|
3579
|
+
return True
|
|
1397
3580
|
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
f"task {task.id}"
|
|
1403
|
-
)
|
|
1404
|
-
self._sync_shared_memory()
|
|
3581
|
+
# Use intelligent failure analysis to decide recovery strategy
|
|
3582
|
+
recovery_decision = self._analyze_task(
|
|
3583
|
+
task, for_failure=True, error_message=detailed_error
|
|
3584
|
+
)
|
|
1405
3585
|
|
|
1406
|
-
|
|
1407
|
-
|
|
3586
|
+
strategy_str = (
|
|
3587
|
+
recovery_decision.recovery_strategy.value
|
|
3588
|
+
if recovery_decision.recovery_strategy
|
|
3589
|
+
else "none"
|
|
3590
|
+
)
|
|
3591
|
+
logger.info(
|
|
3592
|
+
f"Task {task.id} failure "
|
|
3593
|
+
f"analysis: {strategy_str} - "
|
|
3594
|
+
f"{recovery_decision.reasoning}"
|
|
3595
|
+
)
|
|
3596
|
+
|
|
3597
|
+
# Clean up tracking before attempting recovery
|
|
1408
3598
|
if task.id in self._assignees:
|
|
1409
3599
|
await self._channel.archive_task(task.id)
|
|
3600
|
+
self._cleanup_task_tracking(task.id)
|
|
3601
|
+
|
|
3602
|
+
# Apply recovery strategy
|
|
3603
|
+
try:
|
|
3604
|
+
is_decompose = await self._apply_recovery_strategy(
|
|
3605
|
+
task, recovery_decision
|
|
3606
|
+
)
|
|
1410
3607
|
|
|
3608
|
+
# For decompose, we handle it specially
|
|
3609
|
+
if is_decompose:
|
|
3610
|
+
# Task was decomposed, add to completed tasks
|
|
3611
|
+
self._completed_tasks.append(task)
|
|
3612
|
+
return False
|
|
3613
|
+
|
|
3614
|
+
except Exception as e:
|
|
3615
|
+
logger.error(
|
|
3616
|
+
f"Recovery strategy failed for task {task.id}: {e}",
|
|
3617
|
+
exc_info=True,
|
|
3618
|
+
)
|
|
3619
|
+
# If max retries reached, halt the workforce
|
|
3620
|
+
if task.failure_count >= MAX_TASK_RETRIES:
|
|
3621
|
+
self._completed_tasks.append(task)
|
|
3622
|
+
return True
|
|
3623
|
+
self._completed_tasks.append(task)
|
|
3624
|
+
return False
|
|
3625
|
+
|
|
3626
|
+
# Task is being retried - don't add to completed tasks
|
|
3627
|
+
# It will be added when it actually completes or permanently fails
|
|
1411
3628
|
logger.debug(
|
|
1412
|
-
f"Task {task.id}
|
|
1413
|
-
f"
|
|
3629
|
+
f"Task {task.id} is being retried (strategy: "
|
|
3630
|
+
f"{recovery_decision.recovery_strategy}). "
|
|
3631
|
+
f"Not adding to completed tasks until final outcome."
|
|
1414
3632
|
)
|
|
1415
|
-
# Mark task as completed for dependency tracking
|
|
1416
|
-
self._completed_tasks.append(task)
|
|
1417
3633
|
|
|
1418
|
-
#
|
|
1419
|
-
|
|
1420
|
-
# Sync shared memory after task completion to share knowledge
|
|
3634
|
+
# Sync shared memory after task recovery
|
|
1421
3635
|
if self.share_memory:
|
|
1422
|
-
logger.info(
|
|
1423
|
-
f"Syncing shared memory after task {task.id} completion"
|
|
1424
|
-
)
|
|
3636
|
+
logger.info(f"Syncing shared memory after task {task.id} recovery")
|
|
1425
3637
|
self._sync_shared_memory()
|
|
1426
3638
|
|
|
1427
3639
|
# Check if any pending tasks are now ready to execute
|
|
@@ -1429,33 +3641,34 @@ class Workforce(BaseNode):
|
|
|
1429
3641
|
return False
|
|
1430
3642
|
|
|
1431
3643
|
async def _handle_completed_task(self, task: Task) -> None:
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
processing_time_seconds
|
|
1448
|
-
|
|
1449
|
-
]
|
|
1450
|
-
|
|
1451
|
-
# Get token usage from task additional info
|
|
1452
|
-
if (
|
|
1453
|
-
task.additional_info is not None
|
|
1454
|
-
and 'token_usage' in task.additional_info
|
|
1455
|
-
):
|
|
1456
|
-
token_usage = task.additional_info['token_usage']
|
|
3644
|
+
worker_id = task.assigned_worker_id or "unknown"
|
|
3645
|
+
processing_time_seconds = None
|
|
3646
|
+
token_usage = None
|
|
3647
|
+
|
|
3648
|
+
# Get processing time from task start time or additional info
|
|
3649
|
+
if task.id in self._task_start_times:
|
|
3650
|
+
processing_time_seconds = (
|
|
3651
|
+
time.time() - self._task_start_times[task.id]
|
|
3652
|
+
)
|
|
3653
|
+
self._cleanup_task_tracking(task.id)
|
|
3654
|
+
elif (
|
|
3655
|
+
task.additional_info is not None
|
|
3656
|
+
and 'processing_time_seconds' in task.additional_info
|
|
3657
|
+
):
|
|
3658
|
+
processing_time_seconds = task.additional_info[
|
|
3659
|
+
'processing_time_seconds'
|
|
3660
|
+
]
|
|
1457
3661
|
|
|
1458
|
-
|
|
3662
|
+
# Get token usage from task additional info (preferred - actual
|
|
3663
|
+
# usage)
|
|
3664
|
+
if (
|
|
3665
|
+
task.additional_info is not None
|
|
3666
|
+
and 'token_usage' in task.additional_info
|
|
3667
|
+
):
|
|
3668
|
+
token_usage = task.additional_info['token_usage']
|
|
3669
|
+
else:
|
|
3670
|
+
# Fallback: Try to get token usage from SingleAgentWorker
|
|
3671
|
+
# memory
|
|
1459
3672
|
assignee_node = next(
|
|
1460
3673
|
(
|
|
1461
3674
|
child
|
|
@@ -1465,18 +3678,23 @@ class Workforce(BaseNode):
|
|
|
1465
3678
|
None,
|
|
1466
3679
|
)
|
|
1467
3680
|
if isinstance(assignee_node, SingleAgentWorker):
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
3681
|
+
try:
|
|
3682
|
+
_, total_tokens = assignee_node.worker.memory.get_context()
|
|
3683
|
+
token_usage = {'total_tokens': total_tokens}
|
|
3684
|
+
except Exception:
|
|
3685
|
+
token_usage = None
|
|
3686
|
+
|
|
3687
|
+
# Log the completed task
|
|
3688
|
+
task_completed_event = TaskCompletedEvent(
|
|
3689
|
+
task_id=task.id,
|
|
3690
|
+
worker_id=worker_id,
|
|
3691
|
+
result_summary=task.result if task.result else "Completed",
|
|
3692
|
+
processing_time_seconds=processing_time_seconds,
|
|
3693
|
+
token_usage=token_usage,
|
|
3694
|
+
metadata={'current_state': task.state.value},
|
|
3695
|
+
)
|
|
3696
|
+
for cb in self._callbacks:
|
|
3697
|
+
cb.log_task_completed(task_completed_event)
|
|
1480
3698
|
|
|
1481
3699
|
# Find and remove the completed task from pending tasks
|
|
1482
3700
|
tasks_list = list(self._pending_tasks)
|
|
@@ -1495,31 +3713,65 @@ class Workforce(BaseNode):
|
|
|
1495
3713
|
break
|
|
1496
3714
|
|
|
1497
3715
|
if not found_and_removed:
|
|
1498
|
-
# Task was already removed from pending queue (
|
|
1499
|
-
# it
|
|
1500
|
-
# draw user attention with a warning; record at debug level.
|
|
3716
|
+
# Task was already removed from pending queue (common case when
|
|
3717
|
+
# it was posted and removed immediately).
|
|
1501
3718
|
logger.debug(
|
|
1502
3719
|
f"Completed task {task.id} was already removed from pending "
|
|
1503
|
-
"queue."
|
|
3720
|
+
"queue (normal for posted tasks)."
|
|
1504
3721
|
)
|
|
1505
3722
|
|
|
1506
3723
|
# Archive the task and update dependency tracking
|
|
1507
3724
|
if task.id in self._assignees:
|
|
1508
3725
|
await self._channel.archive_task(task.id)
|
|
1509
3726
|
|
|
1510
|
-
# Ensure it's in completed tasks set
|
|
1511
|
-
|
|
3727
|
+
# Ensure it's in completed tasks set by updating if it exists or
|
|
3728
|
+
# appending if it's new.
|
|
3729
|
+
task_found_in_completed = False
|
|
3730
|
+
for i, t in enumerate(self._completed_tasks):
|
|
3731
|
+
if t.id == task.id:
|
|
3732
|
+
self._completed_tasks[i] = task
|
|
3733
|
+
task_found_in_completed = True
|
|
3734
|
+
break
|
|
3735
|
+
if not task_found_in_completed:
|
|
3736
|
+
self._completed_tasks.append(task)
|
|
1512
3737
|
|
|
1513
3738
|
# Handle parent task completion logic
|
|
1514
3739
|
parent = task.parent
|
|
1515
|
-
if parent
|
|
3740
|
+
if parent:
|
|
3741
|
+
# Check if all subtasks are completed and successful
|
|
1516
3742
|
all_subtasks_done = all(
|
|
1517
|
-
|
|
3743
|
+
any(
|
|
3744
|
+
t.id == sub.id and t.state == TaskState.DONE
|
|
3745
|
+
for t in self._completed_tasks
|
|
3746
|
+
)
|
|
1518
3747
|
for sub in parent.subtasks
|
|
1519
3748
|
)
|
|
1520
3749
|
if all_subtasks_done:
|
|
1521
|
-
#
|
|
3750
|
+
# Collect results from successful subtasks only
|
|
3751
|
+
successful_results = []
|
|
3752
|
+
for sub in parent.subtasks:
|
|
3753
|
+
completed_subtask = next(
|
|
3754
|
+
(
|
|
3755
|
+
t
|
|
3756
|
+
for t in self._completed_tasks
|
|
3757
|
+
if t.id == sub.id and t.state == TaskState.DONE
|
|
3758
|
+
),
|
|
3759
|
+
None,
|
|
3760
|
+
)
|
|
3761
|
+
if completed_subtask and completed_subtask.result:
|
|
3762
|
+
successful_results.append(
|
|
3763
|
+
f"--- Subtask {sub.id} Result ---\n"
|
|
3764
|
+
f"{completed_subtask.result}"
|
|
3765
|
+
)
|
|
3766
|
+
|
|
3767
|
+
# Set parent task state and result
|
|
1522
3768
|
parent.state = TaskState.DONE
|
|
3769
|
+
parent.result = (
|
|
3770
|
+
"\n\n".join(successful_results)
|
|
3771
|
+
if successful_results
|
|
3772
|
+
else "All subtasks completed"
|
|
3773
|
+
)
|
|
3774
|
+
|
|
1523
3775
|
logger.debug(
|
|
1524
3776
|
f"All subtasks of {parent.id} are done. "
|
|
1525
3777
|
f"Marking parent as complete."
|
|
@@ -1562,15 +3814,23 @@ class Workforce(BaseNode):
|
|
|
1562
3814
|
r"""Returns an ASCII tree representation of the task hierarchy and
|
|
1563
3815
|
worker status.
|
|
1564
3816
|
"""
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
3817
|
+
metrics_cb: List[WorkforceMetrics] = [
|
|
3818
|
+
cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
|
|
3819
|
+
]
|
|
3820
|
+
if len(metrics_cb) == 0:
|
|
3821
|
+
return "Metrics Callback not initialized."
|
|
3822
|
+
else:
|
|
3823
|
+
return metrics_cb[0].get_ascii_tree_representation()
|
|
1568
3824
|
|
|
1569
3825
|
def get_workforce_kpis(self) -> Dict[str, Any]:
|
|
1570
3826
|
r"""Returns a dictionary of key performance indicators."""
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
3827
|
+
metrics_cb: List[WorkforceMetrics] = [
|
|
3828
|
+
cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
|
|
3829
|
+
]
|
|
3830
|
+
if len(metrics_cb) == 0:
|
|
3831
|
+
return {"error": "Metrics Callback not initialized."}
|
|
3832
|
+
else:
|
|
3833
|
+
return metrics_cb[0].get_kpis()
|
|
1574
3834
|
|
|
1575
3835
|
def dump_workforce_logs(self, file_path: str) -> None:
|
|
1576
3836
|
r"""Dumps all collected logs to a JSON file.
|
|
@@ -1578,13 +3838,133 @@ class Workforce(BaseNode):
|
|
|
1578
3838
|
Args:
|
|
1579
3839
|
file_path (str): The path to the JSON file.
|
|
1580
3840
|
"""
|
|
1581
|
-
|
|
3841
|
+
metrics_cb: List[WorkforceMetrics] = [
|
|
3842
|
+
cb for cb in self._callbacks if isinstance(cb, WorkforceMetrics)
|
|
3843
|
+
]
|
|
3844
|
+
if len(metrics_cb) == 0:
|
|
1582
3845
|
print("Logger not initialized. Cannot dump logs.")
|
|
1583
3846
|
return
|
|
1584
|
-
|
|
3847
|
+
metrics_cb[0].dump_to_json(file_path)
|
|
1585
3848
|
# Use logger.info or print, consistent with existing style
|
|
1586
3849
|
logger.info(f"Workforce logs dumped to {file_path}")
|
|
1587
3850
|
|
|
3851
|
+
async def _handle_skip_task(self) -> bool:
|
|
3852
|
+
r"""Handle skip request by marking pending and in-flight tasks
|
|
3853
|
+
as completed.
|
|
3854
|
+
|
|
3855
|
+
Returns:
|
|
3856
|
+
bool: True if workforce should stop (no independent tasks),
|
|
3857
|
+
False to continue.
|
|
3858
|
+
"""
|
|
3859
|
+
logger.info("Skip requested, processing skip logic.")
|
|
3860
|
+
|
|
3861
|
+
# Mark all pending tasks as completed instead of just clearing
|
|
3862
|
+
pending_tasks_to_complete = list(self._pending_tasks)
|
|
3863
|
+
if pending_tasks_to_complete:
|
|
3864
|
+
logger.info(
|
|
3865
|
+
f"Marking {len(pending_tasks_to_complete)} pending tasks "
|
|
3866
|
+
f"as completed."
|
|
3867
|
+
)
|
|
3868
|
+
for task in pending_tasks_to_complete:
|
|
3869
|
+
# Don't remove tasks that need decomposition
|
|
3870
|
+
if task.additional_info and task.additional_info.get(
|
|
3871
|
+
'_needs_decomposition', False
|
|
3872
|
+
):
|
|
3873
|
+
continue
|
|
3874
|
+
# Set task state to DONE and add a completion message
|
|
3875
|
+
task.state = TaskState.DONE
|
|
3876
|
+
task.result = "Task marked as completed due to skip request"
|
|
3877
|
+
|
|
3878
|
+
# Use the existing handle completed task function
|
|
3879
|
+
await self._handle_completed_task(task)
|
|
3880
|
+
|
|
3881
|
+
# Handle in-flight tasks if they exist
|
|
3882
|
+
if self._in_flight_tasks > 0:
|
|
3883
|
+
logger.info(
|
|
3884
|
+
f"Found {self._in_flight_tasks} in-flight tasks. "
|
|
3885
|
+
f"Retrieving and completing them."
|
|
3886
|
+
)
|
|
3887
|
+
try:
|
|
3888
|
+
# Get all in-flight tasks for this publisher from the channel
|
|
3889
|
+
in_flight_tasks = await self._channel.get_in_flight_tasks(
|
|
3890
|
+
self.node_id
|
|
3891
|
+
)
|
|
3892
|
+
logger.info(
|
|
3893
|
+
f"Retrieved {len(in_flight_tasks)} in-flight "
|
|
3894
|
+
f"tasks from channel."
|
|
3895
|
+
)
|
|
3896
|
+
|
|
3897
|
+
for task in in_flight_tasks:
|
|
3898
|
+
# Set task state to DONE and add a completion message
|
|
3899
|
+
task.state = TaskState.DONE
|
|
3900
|
+
task.result = (
|
|
3901
|
+
"Task marked as completed due to skip request"
|
|
3902
|
+
)
|
|
3903
|
+
|
|
3904
|
+
# Remove the task from the channel to avoid hanging
|
|
3905
|
+
await self._channel.remove_task(task.id)
|
|
3906
|
+
|
|
3907
|
+
# Decrement in-flight counter
|
|
3908
|
+
self._decrement_in_flight_tasks(
|
|
3909
|
+
task.id, "skip request - removed from channel"
|
|
3910
|
+
)
|
|
3911
|
+
|
|
3912
|
+
# Handle as completed task to update dependencies
|
|
3913
|
+
await self._handle_completed_task(task)
|
|
3914
|
+
|
|
3915
|
+
logger.info(
|
|
3916
|
+
f"Completed in-flight task {task.id} due "
|
|
3917
|
+
f"to skip request."
|
|
3918
|
+
)
|
|
3919
|
+
|
|
3920
|
+
except Exception as e:
|
|
3921
|
+
logger.error(
|
|
3922
|
+
f"Error handling in-flight tasks during skip: {e}",
|
|
3923
|
+
exc_info=True,
|
|
3924
|
+
)
|
|
3925
|
+
# Reset in-flight counter to prevent hanging
|
|
3926
|
+
self._in_flight_tasks = 0
|
|
3927
|
+
|
|
3928
|
+
# Check if there are any main pending tasks after filtering
|
|
3929
|
+
if self._pending_tasks:
|
|
3930
|
+
# Check if the first pending task needs decomposition
|
|
3931
|
+
next_task = self._pending_tasks[0]
|
|
3932
|
+
if next_task.additional_info and next_task.additional_info.get(
|
|
3933
|
+
'_needs_decomposition'
|
|
3934
|
+
):
|
|
3935
|
+
logger.info(
|
|
3936
|
+
f"Decomposing main task {next_task.id} after skip request."
|
|
3937
|
+
)
|
|
3938
|
+
try:
|
|
3939
|
+
# Remove the decomposition flag to avoid re-decomposition
|
|
3940
|
+
next_task.additional_info['_needs_decomposition'] = False
|
|
3941
|
+
|
|
3942
|
+
# Decompose the task and append subtasks to _pending_tasks
|
|
3943
|
+
await self.handle_decompose_append_task(
|
|
3944
|
+
next_task, reset=False
|
|
3945
|
+
)
|
|
3946
|
+
|
|
3947
|
+
# Mark the main task as completed and remove from pending
|
|
3948
|
+
await self._handle_completed_task(next_task)
|
|
3949
|
+
logger.info(
|
|
3950
|
+
f"Main task {next_task.id} decomposed after "
|
|
3951
|
+
f"skip request"
|
|
3952
|
+
)
|
|
3953
|
+
except Exception as e:
|
|
3954
|
+
logger.error(
|
|
3955
|
+
f"Error decomposing main task {next_task.id} "
|
|
3956
|
+
f"after skip: {e}",
|
|
3957
|
+
exc_info=True,
|
|
3958
|
+
)
|
|
3959
|
+
|
|
3960
|
+
logger.info("Pending tasks available after skip, continuing.")
|
|
3961
|
+
await self._post_ready_tasks()
|
|
3962
|
+
return False # Continue processing
|
|
3963
|
+
else:
|
|
3964
|
+
# No pending tasks available, act like stop
|
|
3965
|
+
logger.info("No pending tasks available, acting like stop.")
|
|
3966
|
+
return True # Stop processing
|
|
3967
|
+
|
|
1588
3968
|
@check_if_running(False)
|
|
1589
3969
|
async def _listen_to_channel(self) -> None:
|
|
1590
3970
|
r"""Continuously listen to the channel, post task to the channel and
|
|
@@ -1613,6 +3993,75 @@ class Workforce(BaseNode):
|
|
|
1613
3993
|
logger.info("Stop requested, breaking execution loop.")
|
|
1614
3994
|
break
|
|
1615
3995
|
|
|
3996
|
+
# Check for skip request after potential pause
|
|
3997
|
+
if self._skip_requested:
|
|
3998
|
+
should_stop = await self._handle_skip_task()
|
|
3999
|
+
if should_stop:
|
|
4000
|
+
self._stop_requested = True
|
|
4001
|
+
break
|
|
4002
|
+
|
|
4003
|
+
# Reset skip flag
|
|
4004
|
+
self._skip_requested = False
|
|
4005
|
+
continue
|
|
4006
|
+
|
|
4007
|
+
# Check if we should decompose a main task
|
|
4008
|
+
# Only decompose when no tasks are in flight and pending queue
|
|
4009
|
+
# is empty
|
|
4010
|
+
if not self._pending_tasks and self._in_flight_tasks == 0:
|
|
4011
|
+
# All tasks completed, will exit loop
|
|
4012
|
+
break
|
|
4013
|
+
|
|
4014
|
+
# Check if the first pending task needs decomposition
|
|
4015
|
+
# This happens when add_task(as_subtask=False) was called
|
|
4016
|
+
if self._pending_tasks and self._in_flight_tasks == 0:
|
|
4017
|
+
next_task = self._pending_tasks[0]
|
|
4018
|
+
if (
|
|
4019
|
+
next_task.additional_info
|
|
4020
|
+
and next_task.additional_info.get(
|
|
4021
|
+
'_needs_decomposition'
|
|
4022
|
+
)
|
|
4023
|
+
):
|
|
4024
|
+
logger.info(f"Decomposing main task: {next_task.id}")
|
|
4025
|
+
try:
|
|
4026
|
+
# Remove the decomposition flag to avoid
|
|
4027
|
+
# re-decomposition
|
|
4028
|
+
next_task.additional_info[
|
|
4029
|
+
'_needs_decomposition'
|
|
4030
|
+
] = False
|
|
4031
|
+
|
|
4032
|
+
# Decompose the task and append subtasks to
|
|
4033
|
+
# _pending_tasks
|
|
4034
|
+
await self.handle_decompose_append_task(
|
|
4035
|
+
next_task, reset=False
|
|
4036
|
+
)
|
|
4037
|
+
|
|
4038
|
+
# Mark the main task as completed (decomposition
|
|
4039
|
+
# successful) and Remove it from pending tasks
|
|
4040
|
+
await self._handle_completed_task(next_task)
|
|
4041
|
+
logger.info(
|
|
4042
|
+
f"Main task {next_task.id} decomposed and "
|
|
4043
|
+
f"ready for processing"
|
|
4044
|
+
)
|
|
4045
|
+
except Exception as e:
|
|
4046
|
+
logger.error(
|
|
4047
|
+
f"Error decomposing main task {next_task.id}: "
|
|
4048
|
+
f"{e}",
|
|
4049
|
+
exc_info=True,
|
|
4050
|
+
)
|
|
4051
|
+
# Revert back to the queue for retry later if
|
|
4052
|
+
# decomposition failed
|
|
4053
|
+
if not self._pending_tasks:
|
|
4054
|
+
self._pending_tasks.appendleft(next_task)
|
|
4055
|
+
else:
|
|
4056
|
+
logger.warning(
|
|
4057
|
+
"Pending tasks exist after decomposition "
|
|
4058
|
+
"error."
|
|
4059
|
+
)
|
|
4060
|
+
|
|
4061
|
+
# Immediately assign and post the transferred tasks
|
|
4062
|
+
await self._post_ready_tasks()
|
|
4063
|
+
continue
|
|
4064
|
+
|
|
1616
4065
|
# Save snapshot before processing next task
|
|
1617
4066
|
if self._pending_tasks:
|
|
1618
4067
|
current_task = self._pending_tasks[0]
|
|
@@ -1626,9 +4075,37 @@ class Workforce(BaseNode):
|
|
|
1626
4075
|
)
|
|
1627
4076
|
self._last_snapshot_time = time.time()
|
|
1628
4077
|
|
|
1629
|
-
# Get returned task
|
|
1630
|
-
|
|
1631
|
-
|
|
4078
|
+
# Get returned task
|
|
4079
|
+
try:
|
|
4080
|
+
returned_task = await self._get_returned_task()
|
|
4081
|
+
except asyncio.TimeoutError:
|
|
4082
|
+
# Handle timeout - check if we have tasks stuck in flight
|
|
4083
|
+
if self._in_flight_tasks > 0:
|
|
4084
|
+
logger.warning(
|
|
4085
|
+
f"Timeout waiting for {self._in_flight_tasks} "
|
|
4086
|
+
f"in-flight tasks. Breaking to prevent hanging."
|
|
4087
|
+
)
|
|
4088
|
+
# Break the loop to prevent indefinite hanging
|
|
4089
|
+
# The finally block will handle cleanup
|
|
4090
|
+
break
|
|
4091
|
+
else:
|
|
4092
|
+
# No tasks in flight, safe to continue
|
|
4093
|
+
await self._post_ready_tasks()
|
|
4094
|
+
continue
|
|
4095
|
+
|
|
4096
|
+
# If no task was returned (other errors), continue
|
|
4097
|
+
if returned_task is None:
|
|
4098
|
+
logger.debug(
|
|
4099
|
+
f"No task returned in workforce {self.node_id}. "
|
|
4100
|
+
f"Pending: {len(self._pending_tasks)}, "
|
|
4101
|
+
f"In-flight: {self._in_flight_tasks}"
|
|
4102
|
+
)
|
|
4103
|
+
await self._post_ready_tasks()
|
|
4104
|
+
continue
|
|
4105
|
+
|
|
4106
|
+
self._decrement_in_flight_tasks(
|
|
4107
|
+
returned_task.id, "task returned successfully"
|
|
4108
|
+
)
|
|
1632
4109
|
|
|
1633
4110
|
# Check for stop request after getting task
|
|
1634
4111
|
if self._stop_requested:
|
|
@@ -1637,24 +4114,178 @@ class Workforce(BaseNode):
|
|
|
1637
4114
|
|
|
1638
4115
|
# Process the returned task based on its state
|
|
1639
4116
|
if returned_task.state == TaskState.DONE:
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
4117
|
+
# Check if the "completed" task actually failed to provide
|
|
4118
|
+
# useful results
|
|
4119
|
+
if is_task_result_insufficient(returned_task):
|
|
4120
|
+
result_preview = (
|
|
4121
|
+
returned_task.result
|
|
4122
|
+
if returned_task.result
|
|
4123
|
+
else "No result"
|
|
4124
|
+
)
|
|
4125
|
+
logger.warning(
|
|
4126
|
+
f"Task {returned_task.id} marked as DONE but "
|
|
4127
|
+
f"result is insufficient. "
|
|
4128
|
+
f"Treating as failed. Result: '{result_preview}'"
|
|
4129
|
+
)
|
|
4130
|
+
returned_task.state = TaskState.FAILED
|
|
4131
|
+
try:
|
|
4132
|
+
halt = await self._handle_failed_task(
|
|
4133
|
+
returned_task
|
|
4134
|
+
)
|
|
4135
|
+
if not halt:
|
|
4136
|
+
continue
|
|
4137
|
+
|
|
4138
|
+
# Do not halt if we have main tasks in queue
|
|
4139
|
+
if len(self.get_main_task_queue()) > 0:
|
|
4140
|
+
print(
|
|
4141
|
+
f"{Fore.RED}Task {returned_task.id} has "
|
|
4142
|
+
f"failed for {MAX_TASK_RETRIES} times "
|
|
4143
|
+
f"after insufficient results, skipping "
|
|
4144
|
+
f"that task. Final error: "
|
|
4145
|
+
f"{returned_task.result or 'Unknown err'}"
|
|
4146
|
+
f"{Fore.RESET}"
|
|
4147
|
+
)
|
|
4148
|
+
self._skip_requested = True
|
|
4149
|
+
continue
|
|
4150
|
+
|
|
4151
|
+
print(
|
|
4152
|
+
f"{Fore.RED}Task {returned_task.id} has "
|
|
4153
|
+
f"failed for {MAX_TASK_RETRIES} times after "
|
|
4154
|
+
f"insufficient results, halting the "
|
|
4155
|
+
f"workforce. Final error: "
|
|
4156
|
+
f"{returned_task.result or 'Unknown error'}"
|
|
4157
|
+
f"{Fore.RESET}"
|
|
4158
|
+
)
|
|
4159
|
+
await self._graceful_shutdown(returned_task)
|
|
4160
|
+
break
|
|
4161
|
+
except Exception as e:
|
|
4162
|
+
logger.error(
|
|
4163
|
+
f"Error handling insufficient task result "
|
|
4164
|
+
f"{returned_task.id}: {e}",
|
|
4165
|
+
exc_info=True,
|
|
4166
|
+
)
|
|
4167
|
+
continue
|
|
4168
|
+
else:
|
|
4169
|
+
quality_eval = self._analyze_task(
|
|
4170
|
+
returned_task, for_failure=False
|
|
4171
|
+
)
|
|
4172
|
+
|
|
4173
|
+
if not quality_eval.quality_sufficient:
|
|
4174
|
+
logger.info(
|
|
4175
|
+
f"Task {returned_task.id} quality check: "
|
|
4176
|
+
f"score={quality_eval.quality_score}, "
|
|
4177
|
+
f"issues={quality_eval.issues}, "
|
|
4178
|
+
f"strategy={quality_eval.recovery_strategy}"
|
|
4179
|
+
)
|
|
4180
|
+
|
|
4181
|
+
# Check retry limit before attempting recovery
|
|
4182
|
+
if returned_task.failure_count >= 2:
|
|
4183
|
+
print(
|
|
4184
|
+
f"{Fore.YELLOW}Task {returned_task.id} "
|
|
4185
|
+
f"completed with low quality score: "
|
|
4186
|
+
f"{quality_eval.quality_score} "
|
|
4187
|
+
f"(retry limit reached){Fore.RESET}"
|
|
4188
|
+
)
|
|
4189
|
+
await self._handle_completed_task(
|
|
4190
|
+
returned_task
|
|
4191
|
+
)
|
|
4192
|
+
continue
|
|
4193
|
+
|
|
4194
|
+
# Print visual feedback for quality-failed tasks
|
|
4195
|
+
# with recovery strategy
|
|
4196
|
+
recovery_action = (
|
|
4197
|
+
quality_eval.recovery_strategy.value
|
|
4198
|
+
if quality_eval.recovery_strategy
|
|
4199
|
+
else ""
|
|
4200
|
+
)
|
|
4201
|
+
print(
|
|
4202
|
+
f"{Fore.YELLOW}⚠️ Task {returned_task.id} "
|
|
4203
|
+
f"failed quality check (score: "
|
|
4204
|
+
f"{quality_eval.quality_score}). "
|
|
4205
|
+
f"Issues: {', '.join(quality_eval.issues)}. "
|
|
4206
|
+
f"Recovery: {recovery_action}{Fore.RESET}"
|
|
4207
|
+
)
|
|
4208
|
+
|
|
4209
|
+
# Mark as failed for recovery
|
|
4210
|
+
returned_task.failure_count += 1
|
|
4211
|
+
returned_task.state = TaskState.FAILED
|
|
4212
|
+
returned_task.result = (
|
|
4213
|
+
f"Quality insufficient (score: "
|
|
4214
|
+
f"{quality_eval.quality_score}). "
|
|
4215
|
+
f"Issues: {', '.join(quality_eval.issues)}"
|
|
4216
|
+
)
|
|
4217
|
+
|
|
4218
|
+
# Clean up tracking before attempting recovery
|
|
4219
|
+
if returned_task.id in self._assignees:
|
|
4220
|
+
await self._channel.archive_task(
|
|
4221
|
+
returned_task.id
|
|
4222
|
+
)
|
|
4223
|
+
self._cleanup_task_tracking(returned_task.id)
|
|
4224
|
+
|
|
4225
|
+
# Apply LLM-recommended recovery strategy
|
|
4226
|
+
try:
|
|
4227
|
+
is_decompose = (
|
|
4228
|
+
await self._apply_recovery_strategy(
|
|
4229
|
+
returned_task, quality_eval
|
|
4230
|
+
)
|
|
4231
|
+
)
|
|
4232
|
+
|
|
4233
|
+
# For decompose, cleanup happens in the method
|
|
4234
|
+
if is_decompose:
|
|
4235
|
+
continue
|
|
4236
|
+
|
|
4237
|
+
except Exception as e:
|
|
4238
|
+
logger.error(
|
|
4239
|
+
f"Error handling quality-failed task "
|
|
4240
|
+
f"{returned_task.id}: {e}",
|
|
4241
|
+
exc_info=True,
|
|
4242
|
+
)
|
|
4243
|
+
continue
|
|
4244
|
+
else:
|
|
4245
|
+
print(
|
|
4246
|
+
f"{Fore.CYAN}Task {returned_task.id} "
|
|
4247
|
+
f"completed successfully (quality score: "
|
|
4248
|
+
f"{quality_eval.quality_score}).{Fore.RESET}"
|
|
4249
|
+
)
|
|
4250
|
+
await self._handle_completed_task(returned_task)
|
|
1645
4251
|
elif returned_task.state == TaskState.FAILED:
|
|
1646
|
-
|
|
1647
|
-
|
|
4252
|
+
try:
|
|
4253
|
+
halt = await self._handle_failed_task(returned_task)
|
|
4254
|
+
if not halt:
|
|
4255
|
+
continue
|
|
4256
|
+
|
|
4257
|
+
# Do not halt if we have main tasks in queue
|
|
4258
|
+
if len(self.get_main_task_queue()) > 0:
|
|
4259
|
+
print(
|
|
4260
|
+
f"{Fore.RED}Task {returned_task.id} has "
|
|
4261
|
+
f"failed for {MAX_TASK_RETRIES} times, "
|
|
4262
|
+
f"skipping that task. Final error: "
|
|
4263
|
+
f"{returned_task.result or 'Unknown error'}"
|
|
4264
|
+
f"{Fore.RESET}"
|
|
4265
|
+
)
|
|
4266
|
+
self._skip_requested = True
|
|
4267
|
+
continue
|
|
4268
|
+
|
|
4269
|
+
print(
|
|
4270
|
+
f"{Fore.RED}Task {returned_task.id} has failed "
|
|
4271
|
+
f"for {MAX_TASK_RETRIES} times, halting "
|
|
4272
|
+
f"the workforce. Final error: "
|
|
4273
|
+
f"{returned_task.result or 'Unknown error'}"
|
|
4274
|
+
f"{Fore.RESET}"
|
|
4275
|
+
)
|
|
4276
|
+
# Graceful shutdown instead of immediate break
|
|
4277
|
+
await self._graceful_shutdown(returned_task)
|
|
4278
|
+
break
|
|
4279
|
+
except Exception as e:
|
|
4280
|
+
logger.error(
|
|
4281
|
+
f"Error handling failed task "
|
|
4282
|
+
f"{returned_task.id}: {e}",
|
|
4283
|
+
exc_info=True,
|
|
4284
|
+
)
|
|
4285
|
+
# Continue to prevent hanging
|
|
1648
4286
|
continue
|
|
1649
|
-
print(
|
|
1650
|
-
f"{Fore.RED}Task {returned_task.id} has failed "
|
|
1651
|
-
f"for 3 times, halting the workforce.{Fore.RESET}"
|
|
1652
|
-
)
|
|
1653
|
-
# Graceful shutdown instead of immediate break
|
|
1654
|
-
await self._graceful_shutdown(returned_task)
|
|
1655
|
-
break
|
|
1656
4287
|
elif returned_task.state == TaskState.OPEN:
|
|
1657
|
-
# TODO:
|
|
4288
|
+
# TODO: Add logic for OPEN
|
|
1658
4289
|
pass
|
|
1659
4290
|
else:
|
|
1660
4291
|
raise ValueError(
|
|
@@ -1662,7 +4293,19 @@ class Workforce(BaseNode):
|
|
|
1662
4293
|
)
|
|
1663
4294
|
|
|
1664
4295
|
except Exception as e:
|
|
1665
|
-
|
|
4296
|
+
# Decrement in-flight counter to prevent hanging
|
|
4297
|
+
self._decrement_in_flight_tasks(
|
|
4298
|
+
"unknown", "exception in task processing loop"
|
|
4299
|
+
)
|
|
4300
|
+
|
|
4301
|
+
logger.error(
|
|
4302
|
+
f"Error processing task in workforce {self.node_id}: {e}"
|
|
4303
|
+
f"Workforce state - Pending tasks: "
|
|
4304
|
+
f"{len(self._pending_tasks)}, "
|
|
4305
|
+
f"In-flight tasks: {self._in_flight_tasks}, "
|
|
4306
|
+
f"Completed tasks: {len(self._completed_tasks)}"
|
|
4307
|
+
)
|
|
4308
|
+
|
|
1666
4309
|
if self._stop_requested:
|
|
1667
4310
|
break
|
|
1668
4311
|
# Continue with next iteration unless stop is requested
|
|
@@ -1675,6 +4318,9 @@ class Workforce(BaseNode):
|
|
|
1675
4318
|
elif not self._pending_tasks and self._in_flight_tasks == 0:
|
|
1676
4319
|
self._state = WorkforceState.IDLE
|
|
1677
4320
|
logger.info("All tasks completed.")
|
|
4321
|
+
all_tasks_completed_event = AllTasksCompletedEvent()
|
|
4322
|
+
for cb in self._callbacks:
|
|
4323
|
+
cb.log_all_tasks_completed(all_tasks_completed_event)
|
|
1678
4324
|
|
|
1679
4325
|
# shut down the whole workforce tree
|
|
1680
4326
|
self.stop()
|
|
@@ -1716,11 +4362,50 @@ class Workforce(BaseNode):
|
|
|
1716
4362
|
r"""Stop all the child nodes under it. The node itself will be stopped
|
|
1717
4363
|
by its parent node.
|
|
1718
4364
|
"""
|
|
4365
|
+
# Stop all child nodes first
|
|
1719
4366
|
for child in self._children:
|
|
1720
4367
|
if child._running:
|
|
1721
4368
|
child.stop()
|
|
1722
|
-
|
|
1723
|
-
|
|
4369
|
+
|
|
4370
|
+
# Cancel child listening tasks
|
|
4371
|
+
if self._child_listening_tasks:
|
|
4372
|
+
try:
|
|
4373
|
+
loop = asyncio.get_running_loop()
|
|
4374
|
+
if loop and not loop.is_closed():
|
|
4375
|
+
# Create graceful cleanup task
|
|
4376
|
+
async def cleanup():
|
|
4377
|
+
await asyncio.sleep(0.1) # Brief grace period
|
|
4378
|
+
for task in self._child_listening_tasks:
|
|
4379
|
+
if not task.done():
|
|
4380
|
+
task.cancel()
|
|
4381
|
+
|
|
4382
|
+
# Handle both asyncio.Task and concurrent.futures.
|
|
4383
|
+
# Future
|
|
4384
|
+
awaitables = []
|
|
4385
|
+
for task in self._child_listening_tasks:
|
|
4386
|
+
if isinstance(task, concurrent.futures.Future):
|
|
4387
|
+
# Convert Future to awaitable
|
|
4388
|
+
awaitables.append(asyncio.wrap_future(task))
|
|
4389
|
+
else:
|
|
4390
|
+
# Already an asyncio.Task
|
|
4391
|
+
awaitables.append(task)
|
|
4392
|
+
|
|
4393
|
+
await asyncio.gather(
|
|
4394
|
+
*awaitables,
|
|
4395
|
+
return_exceptions=True,
|
|
4396
|
+
)
|
|
4397
|
+
|
|
4398
|
+
self._cleanup_task = loop.create_task(cleanup())
|
|
4399
|
+
else:
|
|
4400
|
+
# No active loop, cancel immediately
|
|
4401
|
+
for task in self._child_listening_tasks:
|
|
4402
|
+
task.cancel()
|
|
4403
|
+
except (RuntimeError, Exception) as e:
|
|
4404
|
+
# Fallback: cancel immediately
|
|
4405
|
+
logger.debug(f"Exception during task cleanup: {e}")
|
|
4406
|
+
for task in self._child_listening_tasks:
|
|
4407
|
+
task.cancel()
|
|
4408
|
+
|
|
1724
4409
|
self._running = False
|
|
1725
4410
|
|
|
1726
4411
|
def clone(self, with_memory: bool = False) -> 'Workforce':
|
|
@@ -1738,43 +4423,36 @@ class Workforce(BaseNode):
|
|
|
1738
4423
|
"""
|
|
1739
4424
|
|
|
1740
4425
|
# Create a new instance with the same configuration
|
|
1741
|
-
# Extract the original kwargs from the agents to properly clone them
|
|
1742
|
-
coordinator_kwargs = (
|
|
1743
|
-
getattr(self.coordinator_agent, 'init_kwargs', {}) or {}
|
|
1744
|
-
)
|
|
1745
|
-
task_kwargs = getattr(self.task_agent, 'init_kwargs', {}) or {}
|
|
1746
|
-
|
|
1747
4426
|
new_instance = Workforce(
|
|
1748
4427
|
description=self.description,
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
if self.
|
|
4428
|
+
coordinator_agent=self.coordinator_agent.clone(with_memory),
|
|
4429
|
+
task_agent=self.task_agent.clone(with_memory),
|
|
4430
|
+
new_worker_agent=self.new_worker_agent.clone(with_memory)
|
|
4431
|
+
if self.new_worker_agent
|
|
1753
4432
|
else None,
|
|
1754
4433
|
graceful_shutdown_timeout=self.graceful_shutdown_timeout,
|
|
1755
4434
|
share_memory=self.share_memory,
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
new_instance.task_agent = self.task_agent.clone(with_memory)
|
|
1759
|
-
new_instance.coordinator_agent = self.coordinator_agent.clone(
|
|
1760
|
-
with_memory
|
|
4435
|
+
use_structured_output_handler=self.use_structured_output_handler,
|
|
4436
|
+
task_timeout_seconds=self.task_timeout_seconds,
|
|
1761
4437
|
)
|
|
1762
4438
|
|
|
1763
4439
|
for child in self._children:
|
|
1764
4440
|
if isinstance(child, SingleAgentWorker):
|
|
1765
4441
|
cloned_worker = child.worker.clone(with_memory)
|
|
1766
4442
|
new_instance.add_single_agent_worker(
|
|
1767
|
-
child.description,
|
|
4443
|
+
child.description,
|
|
4444
|
+
cloned_worker,
|
|
4445
|
+
pool_max_size=10,
|
|
1768
4446
|
)
|
|
1769
4447
|
elif isinstance(child, RolePlayingWorker):
|
|
1770
4448
|
new_instance.add_role_playing_worker(
|
|
1771
4449
|
child.description,
|
|
1772
4450
|
child.assistant_role_name,
|
|
1773
4451
|
child.user_role_name,
|
|
1774
|
-
child.chat_turn_limit,
|
|
1775
4452
|
child.assistant_agent_kwargs,
|
|
1776
4453
|
child.user_agent_kwargs,
|
|
1777
4454
|
child.summarize_agent_kwargs,
|
|
4455
|
+
child.chat_turn_limit,
|
|
1778
4456
|
)
|
|
1779
4457
|
elif isinstance(child, Workforce):
|
|
1780
4458
|
new_instance.add_workforce(child.clone(with_memory))
|
|
@@ -1868,7 +4546,7 @@ class Workforce(BaseNode):
|
|
|
1868
4546
|
)
|
|
1869
4547
|
|
|
1870
4548
|
try:
|
|
1871
|
-
result_task = await workforce_instance.
|
|
4549
|
+
result_task = await workforce_instance.process_task_async(task)
|
|
1872
4550
|
return {
|
|
1873
4551
|
"status": "success",
|
|
1874
4552
|
"task_id": result_task.id,
|
|
@@ -2064,6 +4742,18 @@ class Workforce(BaseNode):
|
|
|
2064
4742
|
)
|
|
2065
4743
|
|
|
2066
4744
|
agent = ChatAgent(sys_msg, **(agent_kwargs or {}))
|
|
4745
|
+
|
|
4746
|
+
# Validate agent compatibility
|
|
4747
|
+
try:
|
|
4748
|
+
workforce_instance._validate_agent_compatibility(
|
|
4749
|
+
agent, "Worker agent"
|
|
4750
|
+
)
|
|
4751
|
+
except ValueError as e:
|
|
4752
|
+
return {
|
|
4753
|
+
"status": "error",
|
|
4754
|
+
"message": str(e),
|
|
4755
|
+
}
|
|
4756
|
+
|
|
2067
4757
|
workforce_instance.add_single_agent_worker(description, agent)
|
|
2068
4758
|
|
|
2069
4759
|
return {
|