deepagents 0.0.12rc1__py3-none-any.whl → 0.0.12rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepagents/__init__.py +7 -5
- deepagents/graph.py +109 -129
- deepagents/middleware/__init__.py +6 -0
- deepagents/middleware/filesystem.py +1125 -0
- deepagents/middleware/subagents.py +481 -0
- {deepagents-0.0.12rc1.dist-info → deepagents-0.0.12rc2.dist-info}/METADATA +12 -11
- deepagents-0.0.12rc2.dist-info/RECORD +10 -0
- {deepagents-0.0.12rc1.dist-info → deepagents-0.0.12rc2.dist-info}/top_level.txt +0 -1
- deepagents/middleware.py +0 -216
- deepagents/model.py +0 -5
- deepagents/prompts.py +0 -429
- deepagents/state.py +0 -33
- deepagents/tools.py +0 -313
- deepagents/types.py +0 -21
- deepagents-0.0.12rc1.dist-info/RECORD +0 -18
- tests/test_deepagents.py +0 -136
- tests/test_filesystem.py +0 -196
- tests/test_hitl.py +0 -51
- tests/test_middleware.py +0 -57
- tests/utils.py +0 -81
- {deepagents-0.0.12rc1.dist-info → deepagents-0.0.12rc2.dist-info}/WHEEL +0 -0
- {deepagents-0.0.12rc1.dist-info → deepagents-0.0.12rc2.dist-info}/licenses/LICENSE +0 -0
deepagents/middleware.py
DELETED
|
@@ -1,216 +0,0 @@
|
|
|
1
|
-
"""DeepAgents implemented as Middleware"""
|
|
2
|
-
|
|
3
|
-
from langchain.agents import create_agent
|
|
4
|
-
from langchain.agents.middleware import AgentMiddleware, AgentState, ModelRequest, SummarizationMiddleware
|
|
5
|
-
from langchain.agents.middleware.prompt_caching import AnthropicPromptCachingMiddleware
|
|
6
|
-
from langchain_core.tools import BaseTool, tool, InjectedToolCallId
|
|
7
|
-
from langchain_core.messages import ToolMessage
|
|
8
|
-
from langchain.chat_models import init_chat_model
|
|
9
|
-
from langgraph.types import Command
|
|
10
|
-
from langgraph.runtime import Runtime
|
|
11
|
-
from langchain.tools.tool_node import InjectedState
|
|
12
|
-
from typing import Annotated
|
|
13
|
-
from deepagents.state import PlanningState, FilesystemState
|
|
14
|
-
from deepagents.tools import write_todos, get_filesystem_tools
|
|
15
|
-
from deepagents.prompts import WRITE_TODOS_SYSTEM_PROMPT, TASK_SYSTEM_PROMPT, FILESYSTEM_SYSTEM_PROMPT, FILESYSTEM_SYSTEM_PROMPT_LONGTERM_SUPPLEMENT, TASK_TOOL_DESCRIPTION, BASE_AGENT_PROMPT
|
|
16
|
-
from deepagents.types import SubAgent, CustomSubAgent
|
|
17
|
-
|
|
18
|
-
###########################
|
|
19
|
-
# Planning Middleware
|
|
20
|
-
###########################
|
|
21
|
-
|
|
22
|
-
class PlanningMiddleware(AgentMiddleware):
|
|
23
|
-
state_schema = PlanningState
|
|
24
|
-
tools = [write_todos]
|
|
25
|
-
|
|
26
|
-
def modify_model_request(self, request: ModelRequest, agent_state: PlanningState, runtime: Runtime) -> ModelRequest:
|
|
27
|
-
if request.system_prompt is None:
|
|
28
|
-
request.system_prompt = WRITE_TODOS_SYSTEM_PROMPT
|
|
29
|
-
else:
|
|
30
|
-
request.system_prompt = request.system_prompt + "\n\n" + WRITE_TODOS_SYSTEM_PROMPT
|
|
31
|
-
return request
|
|
32
|
-
|
|
33
|
-
###########################
|
|
34
|
-
# Filesystem Middleware
|
|
35
|
-
###########################
|
|
36
|
-
|
|
37
|
-
class FilesystemMiddleware(AgentMiddleware):
|
|
38
|
-
state_schema = FilesystemState
|
|
39
|
-
|
|
40
|
-
def __init__(self, *, use_longterm_memory: bool = False, system_prompt: str = None, custom_tool_descriptions: dict[str, str] = {}) -> None:
|
|
41
|
-
self.system_prompt = FILESYSTEM_SYSTEM_PROMPT
|
|
42
|
-
if system_prompt is not None:
|
|
43
|
-
self.system_prompt = system_prompt
|
|
44
|
-
elif use_longterm_memory:
|
|
45
|
-
self.system_prompt += FILESYSTEM_SYSTEM_PROMPT_LONGTERM_SUPPLEMENT
|
|
46
|
-
|
|
47
|
-
self.tools = get_filesystem_tools(use_longterm_memory, custom_tool_descriptions)
|
|
48
|
-
|
|
49
|
-
def modify_model_request(self, request: ModelRequest, agent_state: FilesystemState, runtime: Runtime) -> ModelRequest:
|
|
50
|
-
if request.system_prompt is None:
|
|
51
|
-
request.system_prompt = self.system_prompt
|
|
52
|
-
else:
|
|
53
|
-
request.system_prompt = request.system_prompt + "\n\n" + self.system_prompt
|
|
54
|
-
return request
|
|
55
|
-
|
|
56
|
-
###########################
|
|
57
|
-
# SubAgent Middleware
|
|
58
|
-
###########################
|
|
59
|
-
|
|
60
|
-
class SubAgentMiddleware(AgentMiddleware):
|
|
61
|
-
def __init__(
|
|
62
|
-
self,
|
|
63
|
-
default_subagent_tools: list[BaseTool] = [],
|
|
64
|
-
subagents: list[SubAgent | CustomSubAgent] = [],
|
|
65
|
-
model=None,
|
|
66
|
-
is_async=False,
|
|
67
|
-
) -> None:
|
|
68
|
-
super().__init__()
|
|
69
|
-
task_tool = create_task_tool(
|
|
70
|
-
default_subagent_tools=default_subagent_tools,
|
|
71
|
-
subagents=subagents,
|
|
72
|
-
model=model,
|
|
73
|
-
is_async=is_async,
|
|
74
|
-
)
|
|
75
|
-
self.tools = [task_tool]
|
|
76
|
-
|
|
77
|
-
def modify_model_request(self, request: ModelRequest, agent_state: AgentState, runtime: Runtime) -> ModelRequest:
|
|
78
|
-
if request.system_prompt is None:
|
|
79
|
-
request.system_prompt = TASK_SYSTEM_PROMPT
|
|
80
|
-
else:
|
|
81
|
-
request.system_prompt = request.system_prompt + "\n\n" + TASK_SYSTEM_PROMPT
|
|
82
|
-
return request
|
|
83
|
-
|
|
84
|
-
def _get_agents(
|
|
85
|
-
default_subagent_tools: list[BaseTool],
|
|
86
|
-
subagents: list[SubAgent | CustomSubAgent],
|
|
87
|
-
model
|
|
88
|
-
):
|
|
89
|
-
default_subagent_middleware = [
|
|
90
|
-
PlanningMiddleware(),
|
|
91
|
-
FilesystemMiddleware(),
|
|
92
|
-
# TODO: Add this back when fixed
|
|
93
|
-
SummarizationMiddleware(
|
|
94
|
-
model=model,
|
|
95
|
-
max_tokens_before_summary=120000,
|
|
96
|
-
messages_to_keep=20,
|
|
97
|
-
),
|
|
98
|
-
AnthropicPromptCachingMiddleware(ttl="5m", unsupported_model_behavior="ignore"),
|
|
99
|
-
]
|
|
100
|
-
agents = {
|
|
101
|
-
"general-purpose": create_agent(
|
|
102
|
-
model,
|
|
103
|
-
system_prompt=BASE_AGENT_PROMPT,
|
|
104
|
-
tools=default_subagent_tools,
|
|
105
|
-
checkpointer=False,
|
|
106
|
-
middleware=default_subagent_middleware
|
|
107
|
-
)
|
|
108
|
-
}
|
|
109
|
-
for _agent in subagents:
|
|
110
|
-
if "graph" in _agent:
|
|
111
|
-
agents[_agent["name"]] = _agent["graph"]
|
|
112
|
-
continue
|
|
113
|
-
if "tools" in _agent:
|
|
114
|
-
_tools = _agent["tools"]
|
|
115
|
-
else:
|
|
116
|
-
_tools = default_subagent_tools.copy()
|
|
117
|
-
# Resolve per-subagent model: can be instance or dict
|
|
118
|
-
if "model" in _agent:
|
|
119
|
-
agent_model = _agent["model"]
|
|
120
|
-
if isinstance(agent_model, dict):
|
|
121
|
-
# Dictionary settings - create model from config
|
|
122
|
-
sub_model = init_chat_model(**agent_model)
|
|
123
|
-
else:
|
|
124
|
-
# Model instance - use directly
|
|
125
|
-
sub_model = agent_model
|
|
126
|
-
else:
|
|
127
|
-
# Fallback to main model
|
|
128
|
-
sub_model = model
|
|
129
|
-
if "middleware" in _agent:
|
|
130
|
-
_middleware = [*default_subagent_middleware, *_agent["middleware"]]
|
|
131
|
-
else:
|
|
132
|
-
_middleware = default_subagent_middleware
|
|
133
|
-
agents[_agent["name"]] = create_agent(
|
|
134
|
-
sub_model,
|
|
135
|
-
system_prompt=_agent["prompt"],
|
|
136
|
-
tools=_tools,
|
|
137
|
-
middleware=_middleware,
|
|
138
|
-
checkpointer=False,
|
|
139
|
-
)
|
|
140
|
-
return agents
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
def _get_subagent_description(subagents: list[SubAgent | CustomSubAgent]):
|
|
144
|
-
return [f"- {_agent['name']}: {_agent['description']}" for _agent in subagents]
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
def create_task_tool(
|
|
148
|
-
default_subagent_tools: list[BaseTool],
|
|
149
|
-
subagents: list[SubAgent | CustomSubAgent],
|
|
150
|
-
model,
|
|
151
|
-
is_async: bool = False,
|
|
152
|
-
):
|
|
153
|
-
agents = _get_agents(
|
|
154
|
-
default_subagent_tools, subagents, model
|
|
155
|
-
)
|
|
156
|
-
other_agents_string = _get_subagent_description(subagents)
|
|
157
|
-
|
|
158
|
-
if is_async:
|
|
159
|
-
@tool(
|
|
160
|
-
description=TASK_TOOL_DESCRIPTION.format(other_agents=other_agents_string)
|
|
161
|
-
)
|
|
162
|
-
async def task(
|
|
163
|
-
description: str,
|
|
164
|
-
subagent_type: str,
|
|
165
|
-
state: Annotated[dict, InjectedState],
|
|
166
|
-
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
167
|
-
):
|
|
168
|
-
if subagent_type not in agents:
|
|
169
|
-
return f"Error: invoked agent of type {subagent_type}, the only allowed types are {[f'`{k}`' for k in agents]}"
|
|
170
|
-
sub_agent = agents[subagent_type]
|
|
171
|
-
state["messages"] = [{"role": "user", "content": description}]
|
|
172
|
-
result = await sub_agent.ainvoke(state)
|
|
173
|
-
state_update = {}
|
|
174
|
-
for k, v in result.items():
|
|
175
|
-
if k not in ["todos", "messages"]:
|
|
176
|
-
state_update[k] = v
|
|
177
|
-
return Command(
|
|
178
|
-
update={
|
|
179
|
-
**state_update,
|
|
180
|
-
"messages": [
|
|
181
|
-
ToolMessage(
|
|
182
|
-
result["messages"][-1].content, tool_call_id=tool_call_id
|
|
183
|
-
)
|
|
184
|
-
],
|
|
185
|
-
}
|
|
186
|
-
)
|
|
187
|
-
else:
|
|
188
|
-
@tool(
|
|
189
|
-
description=TASK_TOOL_DESCRIPTION.format(other_agents=other_agents_string)
|
|
190
|
-
)
|
|
191
|
-
def task(
|
|
192
|
-
description: str,
|
|
193
|
-
subagent_type: str,
|
|
194
|
-
state: Annotated[dict, InjectedState],
|
|
195
|
-
tool_call_id: Annotated[str, InjectedToolCallId],
|
|
196
|
-
):
|
|
197
|
-
if subagent_type not in agents:
|
|
198
|
-
return f"Error: invoked agent of type {subagent_type}, the only allowed types are {[f'`{k}`' for k in agents]}"
|
|
199
|
-
sub_agent = agents[subagent_type]
|
|
200
|
-
state["messages"] = [{"role": "user", "content": description}]
|
|
201
|
-
result = sub_agent.invoke(state)
|
|
202
|
-
state_update = {}
|
|
203
|
-
for k, v in result.items():
|
|
204
|
-
if k not in ["todos", "messages"]:
|
|
205
|
-
state_update[k] = v
|
|
206
|
-
return Command(
|
|
207
|
-
update={
|
|
208
|
-
**state_update,
|
|
209
|
-
"messages": [
|
|
210
|
-
ToolMessage(
|
|
211
|
-
result["messages"][-1].content, tool_call_id=tool_call_id
|
|
212
|
-
)
|
|
213
|
-
],
|
|
214
|
-
}
|
|
215
|
-
)
|
|
216
|
-
return task
|
deepagents/model.py
DELETED
deepagents/prompts.py
DELETED
|
@@ -1,429 +0,0 @@
|
|
|
1
|
-
WRITE_TODOS_TOOL_DESCRIPTION = """Use this tool to create and manage a structured task list for your current work session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user.
|
|
2
|
-
It also helps the user understand the progress of the task and overall progress of their requests.
|
|
3
|
-
Only use this tool if you think it will be helpful in staying organized. If the user's request is trivial and takes less than 3 steps, it is better to NOT use this tool and just do the taks directly.
|
|
4
|
-
|
|
5
|
-
## When to Use This Tool
|
|
6
|
-
Use this tool in these scenarios:
|
|
7
|
-
|
|
8
|
-
1. Complex multi-step tasks - When a task requires 3 or more distinct steps or actions
|
|
9
|
-
2. Non-trivial and complex tasks - Tasks that require careful planning or multiple operations
|
|
10
|
-
3. User explicitly requests todo list - When the user directly asks you to use the todo list
|
|
11
|
-
4. User provides multiple tasks - When users provide a list of things to be done (numbered or comma-separated)
|
|
12
|
-
5. The plan may need future revisions or updates based on results from the first few steps. Keeping track of this in a list is helpful.
|
|
13
|
-
|
|
14
|
-
## How to Use This Tool
|
|
15
|
-
1. When you start working on a task - Mark it as in_progress BEFORE beginning work.
|
|
16
|
-
2. After completing a task - Mark it as completed and add any new follow-up tasks discovered during implementation.
|
|
17
|
-
3. You can also update future tasks, such as deleting them if they are no longer necessary, or adding new tasks that are necessary. Don't change previously completed tasks.
|
|
18
|
-
4. You can make several updates to the todo list at once. For example, when you complete a task, you can mark the next task you need to start as in_progress.
|
|
19
|
-
|
|
20
|
-
## When NOT to Use This Tool
|
|
21
|
-
It is important to skip using this tool when:
|
|
22
|
-
1. There is only a single, straightforward task
|
|
23
|
-
2. The task is trivial and tracking it provides no benefit
|
|
24
|
-
3. The task can be completed in less than 3 trivial steps
|
|
25
|
-
4. The task is purely conversational or informational
|
|
26
|
-
|
|
27
|
-
## Examples of When to Use the Todo List
|
|
28
|
-
|
|
29
|
-
<example>
|
|
30
|
-
User: I want to add a dark mode toggle to the application settings. Make sure you run the tests and build when you're done!
|
|
31
|
-
Assistant: I'll help add a dark mode toggle to your application settings. Let me create a todo list to track this implementation.
|
|
32
|
-
*Creates todo list with the following items:*
|
|
33
|
-
1. Create dark mode toggle component in Settings page
|
|
34
|
-
2. Add dark mode state management (context/store)
|
|
35
|
-
3. Implement CSS-in-JS styles for dark theme
|
|
36
|
-
4. Update existing components to support theme switching
|
|
37
|
-
5. Run tests and build process, addressing any failures or errors that occur
|
|
38
|
-
*Begins working on the first task*
|
|
39
|
-
|
|
40
|
-
<reasoning>
|
|
41
|
-
The assistant used the todo list because:
|
|
42
|
-
1. Adding dark mode in it of itself is a multi-step feature requiring UI, state management, and styling changes
|
|
43
|
-
2. The assistant inferred that tests and build need to pass by adding "Ensure tests and build succeed" as the final task
|
|
44
|
-
3. Both of the user's requests are complex and require multiple steps to complete.
|
|
45
|
-
</reasoning>
|
|
46
|
-
</example>
|
|
47
|
-
|
|
48
|
-
<example>
|
|
49
|
-
User: Help me plan a comprehensive marketing campaign for our new product launch, including social media, email marketing, and press releases.
|
|
50
|
-
Assistant: I'll help you plan a comprehensive marketing campaign for your product launch. Let me create a todo list to organize all the components.
|
|
51
|
-
*Creates todo list with the following items:*
|
|
52
|
-
1. Define target audience and key messaging
|
|
53
|
-
2. Create social media content calendar and posting schedule
|
|
54
|
-
3. Design email marketing sequence and automation flows
|
|
55
|
-
4. Draft press release and identify media contacts
|
|
56
|
-
5. Set up tracking and analytics for campaign performance
|
|
57
|
-
6. Create visual assets and brand materials
|
|
58
|
-
7. Coordinate launch timeline and deliverables
|
|
59
|
-
*Begins working on the first task*
|
|
60
|
-
|
|
61
|
-
<reasoning>
|
|
62
|
-
The assistant used the todo list because:
|
|
63
|
-
1. Marketing campaign planning involves multiple distinct channels and activities
|
|
64
|
-
2. Each component requires careful coordination and planning
|
|
65
|
-
3. The systematic approach ensures all aspects of the launch are covered
|
|
66
|
-
</reasoning>
|
|
67
|
-
</example>
|
|
68
|
-
|
|
69
|
-
<example>
|
|
70
|
-
User: Help me rename the function getCwd to getCurrentWorkingDirectory across my project
|
|
71
|
-
Assistant: Let me first search through your codebase to find all occurrences of 'getCwd'.
|
|
72
|
-
*Uses search tools to locate all instances of getCwd in the codebase*
|
|
73
|
-
Assistant: I've found 15 instances of 'getCwd' across 8 different files. Let me create a todo list to track these changes.
|
|
74
|
-
*Creates todo list with specific items for each file that needs updating*
|
|
75
|
-
|
|
76
|
-
<reasoning>
|
|
77
|
-
The assistant used the todo list because:
|
|
78
|
-
1. The assistant searched to understand the scope of the task
|
|
79
|
-
2. Upon finding multiple occurrences across different files, it determined this was a complex task with multiple steps (>3)
|
|
80
|
-
3. The todo list helps ensure every instance is tracked and updated systematically
|
|
81
|
-
4. This approach prevents missing any occurrences and maintains consistency.
|
|
82
|
-
</reasoning>
|
|
83
|
-
</example>
|
|
84
|
-
|
|
85
|
-
<example>
|
|
86
|
-
User: I need help organizing my home office: decluttering the desk, setting up a filing system, organizing digital files, and creating a daily workflow.
|
|
87
|
-
Assistant: I'll help you organize your home office systematically. Let me create a todo list to track each area of organization.
|
|
88
|
-
*Creates todo list with items like: 1) Sort and declutter desk items, 2) Set up physical filing system for documents, 3) Organize digital files and folders, 4) Create daily workflow and productivity systems, 5) Establish maintenance routines*
|
|
89
|
-
Let's start with decluttering your desk space to create a clean foundation for the other organizational systems.
|
|
90
|
-
|
|
91
|
-
<reasoning>
|
|
92
|
-
The assistant used the todo list because:
|
|
93
|
-
1. The user provided multiple distinct organizational tasks
|
|
94
|
-
2. Each area requires different approaches and careful planning
|
|
95
|
-
3. Home organization benefits from systematic, step-by-step completion
|
|
96
|
-
4. Progress tracking ensures all areas are addressed thoroughly
|
|
97
|
-
</reasoning>
|
|
98
|
-
</example>
|
|
99
|
-
|
|
100
|
-
## Examples of When NOT to Use the Todo List
|
|
101
|
-
|
|
102
|
-
<example>
|
|
103
|
-
User: How do I print 'Hello World' in Python?
|
|
104
|
-
Assistant: In Python, you can print "Hello World" with this simple code:
|
|
105
|
-
|
|
106
|
-
python
|
|
107
|
-
print("Hello World")
|
|
108
|
-
|
|
109
|
-
This will output the text "Hello World" to the console when executed.</assistant>
|
|
110
|
-
|
|
111
|
-
<reasoning>
|
|
112
|
-
The assistant did not use the todo list because this is a single, trivial task that can be completed in one step. There's no need to track multiple tasks or steps for such a straightforward request.
|
|
113
|
-
</reasoning>
|
|
114
|
-
</example>
|
|
115
|
-
|
|
116
|
-
<example>
|
|
117
|
-
User: What's the best way to water houseplants?
|
|
118
|
-
Assistant: The best way to water houseplants depends on the specific plant type, but here are some general guidelines:
|
|
119
|
-
|
|
120
|
-
- Check soil moisture by inserting your finger about 1-2 inches into the soil
|
|
121
|
-
- Water thoroughly when the top inch feels dry, allowing water to drain from the bottom
|
|
122
|
-
- Use room temperature water and water in the morning when possible
|
|
123
|
-
- Avoid overwatering, which is more harmful than underwatering for most plants
|
|
124
|
-
|
|
125
|
-
Different plants have different needs, so it's important to research your specific varieties for optimal care.
|
|
126
|
-
|
|
127
|
-
<reasoning>
|
|
128
|
-
The assistant did not use the todo list because this is an informational request with no actual tasks to complete. The user is simply asking for advice and explanation, not for the assistant to perform multiple steps or activities.
|
|
129
|
-
</reasoning>
|
|
130
|
-
</example>
|
|
131
|
-
|
|
132
|
-
<example>
|
|
133
|
-
User: Can you add a comment to the calculateTotal function to explain what it does?
|
|
134
|
-
Assistant: Sure, let me add a comment to the calculateTotal function to explain what it does.
|
|
135
|
-
* Uses the Edit tool to add a comment to the calculateTotal function *
|
|
136
|
-
|
|
137
|
-
<reasoning>
|
|
138
|
-
The assistant did not use the todo list because this is a single, straightforward task confined to one location. Adding a comment doesn't require tracking multiple steps or systematic organization.
|
|
139
|
-
</reasoning>
|
|
140
|
-
</example>
|
|
141
|
-
|
|
142
|
-
<example>
|
|
143
|
-
User: What time is it in Tokyo right now?
|
|
144
|
-
Assistant: I'll check the current time in Tokyo for you.
|
|
145
|
-
|
|
146
|
-
*Searches for current time in Tokyo*
|
|
147
|
-
|
|
148
|
-
The current time in Tokyo, Japan is [current time]. Tokyo is in the Japan Standard Time (JST) zone, which is UTC+9.
|
|
149
|
-
|
|
150
|
-
<reasoning>
|
|
151
|
-
The assistant did not use the todo list because this is a single information lookup with immediate results. There are no multiple steps to track or organize, making the todo list unnecessary for this straightforward request.
|
|
152
|
-
</reasoning>
|
|
153
|
-
</example>
|
|
154
|
-
|
|
155
|
-
<example>
|
|
156
|
-
User: I need to write a function that checks if a number is prime and then test it out.
|
|
157
|
-
Assistant: I'll help you write a function that checks if a number is prime and then test it out.
|
|
158
|
-
*Writes function that checks if a number is prime*
|
|
159
|
-
*Tests the function*
|
|
160
|
-
|
|
161
|
-
<reasoning>
|
|
162
|
-
Even though this is a multi-step task, it is very straightforward and can be completed in two trivial steps (which is less than 3 steps!). Using the todo list here is overkill and wastes time and tokens.
|
|
163
|
-
</reasoning>
|
|
164
|
-
</example>
|
|
165
|
-
|
|
166
|
-
<example>
|
|
167
|
-
User: I want you to order a pizza from Dominos, order a burger from McDonald's, and order a salad from Subway.
|
|
168
|
-
Assistant: I'll help you order a pizza from Dominos, order a burger from McDonald's, and order a salad from Subway.
|
|
169
|
-
*Orders a pizza from Dominos*
|
|
170
|
-
*Orders a burger from McDonald's*
|
|
171
|
-
*Orders a salad from Subway*
|
|
172
|
-
|
|
173
|
-
<reasoning>
|
|
174
|
-
Even though this is a multi-step task, assuming the assistant has the ability to order from these restaurants, it is very straightforward and can be completed in three trivial tool calls.
|
|
175
|
-
Using the todo list here is overkill and wastes time and tokens. These three tool calls should be made in parallel, in fact.
|
|
176
|
-
</reasoning>
|
|
177
|
-
</example>
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
## Task States and Management
|
|
181
|
-
|
|
182
|
-
1. **Task States**: Use these states to track progress:
|
|
183
|
-
- pending: Task not yet started
|
|
184
|
-
- in_progress: Currently working on (you can have multiple tasks in_progress at a time if they are not related to each other and can be run in parallel)
|
|
185
|
-
- completed: Task finished successfully
|
|
186
|
-
|
|
187
|
-
2. **Task Management**:
|
|
188
|
-
- Update task status in real-time as you work
|
|
189
|
-
- Mark tasks complete IMMEDIATELY after finishing (don't batch completions)
|
|
190
|
-
- Complete current tasks before starting new ones
|
|
191
|
-
- Remove tasks that are no longer relevant from the list entirely
|
|
192
|
-
- IMPORTANT: When you write this todo list, you should mark your first task (or tasks) as in_progress immediately!.
|
|
193
|
-
- IMPORTANT: Unless all tasks are completed, you should always have at least one task in_progress to show the user that you are working on something.
|
|
194
|
-
|
|
195
|
-
3. **Task Completion Requirements**:
|
|
196
|
-
- ONLY mark a task as completed when you have FULLY accomplished it
|
|
197
|
-
- If you encounter errors, blockers, or cannot finish, keep the task as in_progress
|
|
198
|
-
- When blocked, create a new task describing what needs to be resolved
|
|
199
|
-
- Never mark a task as completed if:
|
|
200
|
-
- There are unresolved issues or errors
|
|
201
|
-
- Work is partial or incomplete
|
|
202
|
-
- You encountered blockers that prevent completion
|
|
203
|
-
- You couldn't find necessary resources or dependencies
|
|
204
|
-
- Quality standards haven't been met
|
|
205
|
-
|
|
206
|
-
4. **Task Breakdown**:
|
|
207
|
-
- Create specific, actionable items
|
|
208
|
-
- Break complex tasks into smaller, manageable steps
|
|
209
|
-
- Use clear, descriptive task names
|
|
210
|
-
|
|
211
|
-
Being proactive with task management demonstrates attentiveness and ensures you complete all requirements successfully
|
|
212
|
-
Remember: If you only need to make a few tool calls to complete a task, and it is clear what you need to do, it is better to just do the task directly and NOT call this tool at all.
|
|
213
|
-
"""
|
|
214
|
-
|
|
215
|
-
TASK_TOOL_DESCRIPTION = """Launch an ephemeral subagent to handle complex, multi-step independent tasks with isolated context windows.
|
|
216
|
-
|
|
217
|
-
Available agent types and the tools they have access to:
|
|
218
|
-
- general-purpose: General-purpose agent for researching complex questions, searching for files and content, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you. This agent has access to all tools as the main agent.
|
|
219
|
-
{other_agents}
|
|
220
|
-
|
|
221
|
-
When using the Task tool, you must specify a subagent_type parameter to select which agent type to use.
|
|
222
|
-
|
|
223
|
-
## Usage notes:
|
|
224
|
-
1. Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses
|
|
225
|
-
2. When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.
|
|
226
|
-
3. Each agent invocation is stateless. You will not be able to send additional messages to the agent, nor will the agent be able to communicate with you outside of its final report. Therefore, your prompt should contain a highly detailed task description for the agent to perform autonomously and you should specify exactly what information the agent should return back to you in its final and only message to you.
|
|
227
|
-
4. The agent's outputs should generally be trusted
|
|
228
|
-
5. Clearly tell the agent whether you expect it to create content, perform analysis, or just do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent
|
|
229
|
-
6. If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement.
|
|
230
|
-
7. When only the general-purpose agent is provided, you should use it for all tasks. It is great for isolating context and token usage, and completing specific, complex tasks, as it has all the same capabilities as the main agent.
|
|
231
|
-
|
|
232
|
-
### Example usage of the general-purpose agent:
|
|
233
|
-
|
|
234
|
-
<example_agent_descriptions>
|
|
235
|
-
"general-purpose": use this agent for general purpose tasks, it has access to all tools as the main agent.
|
|
236
|
-
</example_agent_descriptions>
|
|
237
|
-
|
|
238
|
-
<example>
|
|
239
|
-
User: "I want to conduct research on the accomplishments of Lebron James, Michael Jordan, and Kobe Bryant, and then compare them."
|
|
240
|
-
Assistant: *Uses the task tool in parallel to conduct isolated research on each of the three players*
|
|
241
|
-
Assistant: *Synthesizes the results of the three isolated research tasks and responds to the User*
|
|
242
|
-
<commentary>
|
|
243
|
-
Research is a complex, multi-step task in it of itself.
|
|
244
|
-
The research of each individual player is not dependent on the research of the other players.
|
|
245
|
-
The assistant uses the task tool to break down the complex objective into three isolated tasks.
|
|
246
|
-
Each research task only needs to worry about context and tokens about one player, then returns synthesized information about each player as the Tool Result.
|
|
247
|
-
This means each research task can dive deep and spend tokens and context deeply researching each player, but the final result is synthesized information, and saves us tokens in the long run when comparing the players to each other.
|
|
248
|
-
</commentary>
|
|
249
|
-
</example>
|
|
250
|
-
|
|
251
|
-
<example>
|
|
252
|
-
User: "Analyze a single large code repository for security vulnerabilities and generate a report."
|
|
253
|
-
Assistant: *Launches a single `task` subagent for the repository analysis*
|
|
254
|
-
Assistant: *Receives report and integrates results into final summary*
|
|
255
|
-
<commentary>
|
|
256
|
-
Subagent is used to isolate a large, context-heavy task, even though there is only one. This prevents the main thread from being overloaded with details.
|
|
257
|
-
If the user then asks followup questions, we have a concise report to reference instead of the entire history of analysis and tool calls, which is good and saves us time and money.
|
|
258
|
-
</commentary>
|
|
259
|
-
</example>
|
|
260
|
-
|
|
261
|
-
<example>
|
|
262
|
-
User: "Schedule two meetings for me and prepare agendas for each."
|
|
263
|
-
Assistant: *Calls the task tool in parallel to launch two `task` subagents (one per meeting) to prepare agendas*
|
|
264
|
-
Assistant: *Returns final schedules and agendas*
|
|
265
|
-
<commentary>
|
|
266
|
-
Tasks are simple individually, but subagents help silo agenda preparation.
|
|
267
|
-
Each subagent only needs to worry about the agenda for one meeting.
|
|
268
|
-
</commentary>
|
|
269
|
-
</example>
|
|
270
|
-
|
|
271
|
-
<example>
|
|
272
|
-
User: "I want to order a pizza from Dominos, order a burger from McDonald's, and order a salad from Subway."
|
|
273
|
-
Assistant: *Calls tools directly in parallel to order a pizza from Dominos, a burger from McDonald's, and a salad from Subway*
|
|
274
|
-
<commentary>
|
|
275
|
-
The assistant did not use the task tool because the objective is super simple and clear and only requires a few trivial tool calls.
|
|
276
|
-
It is better to just complete the task directly and NOT use the `task`tool.
|
|
277
|
-
</commentary>
|
|
278
|
-
</example>
|
|
279
|
-
|
|
280
|
-
### Example usage with custom agents:
|
|
281
|
-
|
|
282
|
-
<example_agent_descriptions>
|
|
283
|
-
"content-reviewer": use this agent after you are done creating significant content or documents
|
|
284
|
-
"greeting-responder": use this agent when to respond to user greetings with a friendly joke
|
|
285
|
-
"research-analyst": use this agent to conduct thorough research on complex topics
|
|
286
|
-
</example_agent_description>
|
|
287
|
-
|
|
288
|
-
<example>
|
|
289
|
-
user: "Please write a function that checks if a number is prime"
|
|
290
|
-
assistant: Sure let me write a function that checks if a number is prime
|
|
291
|
-
assistant: First let me use the Write tool to write a function that checks if a number is prime
|
|
292
|
-
assistant: I'm going to use the Write tool to write the following code:
|
|
293
|
-
<code>
|
|
294
|
-
function isPrime(n) {{
|
|
295
|
-
if (n <= 1) return false
|
|
296
|
-
for (let i = 2; i * i <= n; i++) {{
|
|
297
|
-
if (n % i === 0) return false
|
|
298
|
-
}}
|
|
299
|
-
return true
|
|
300
|
-
}}
|
|
301
|
-
</code>
|
|
302
|
-
<commentary>
|
|
303
|
-
Since significant content was created and the task was completed, now use the content-reviewer agent to review the work
|
|
304
|
-
</commentary>
|
|
305
|
-
assistant: Now let me use the content-reviewer agent to review the code
|
|
306
|
-
assistant: Uses the Task tool to launch with the content-reviewer agent
|
|
307
|
-
</example>
|
|
308
|
-
|
|
309
|
-
<example>
|
|
310
|
-
user: "Can you help me research the environmental impact of different renewable energy sources and create a comprehensive report?"
|
|
311
|
-
<commentary>
|
|
312
|
-
This is a complex research task that would benefit from using the research-analyst agent to conduct thorough analysis
|
|
313
|
-
</commentary>
|
|
314
|
-
assistant: I'll help you research the environmental impact of renewable energy sources. Let me use the research-analyst agent to conduct comprehensive research on this topic.
|
|
315
|
-
assistant: Uses the Task tool to launch with the research-analyst agent, providing detailed instructions about what research to conduct and what format the report should take
|
|
316
|
-
</example>
|
|
317
|
-
|
|
318
|
-
<example>
|
|
319
|
-
user: "Hello"
|
|
320
|
-
<commentary>
|
|
321
|
-
Since the user is greeting, use the greeting-responder agent to respond with a friendly joke
|
|
322
|
-
</commentary>
|
|
323
|
-
assistant: "I'm going to use the Task tool to launch with the greeting-responder agent"
|
|
324
|
-
</example>"""
|
|
325
|
-
|
|
326
|
-
LIST_FILES_TOOL_DESCRIPTION = """Lists all files in the filesystem.
|
|
327
|
-
|
|
328
|
-
Usage:
|
|
329
|
-
- The list_files tool will return a list of all files in the filesystem.
|
|
330
|
-
- This is very useful for exploring the file system and finding the right file to read or edit.
|
|
331
|
-
- You should almost ALWAYS use this tool before using the Read or Edit tools."""
|
|
332
|
-
LIST_FILES_TOOL_DESCRIPTION_LONGTERM_SUPPLEMENT = "\n- Files from the longterm filesystem will be prefixed with the memories/ path."
|
|
333
|
-
|
|
334
|
-
READ_FILE_TOOL_DESCRIPTION = """Reads a file from the filesystem. You can access any file directly by using this tool.
|
|
335
|
-
Assume this tool is able to read all files on the machine. If the User provides a path to a file assume that path is valid. It is okay to read a file that does not exist; an error will be returned.
|
|
336
|
-
|
|
337
|
-
Usage:
|
|
338
|
-
- The file_path parameter must be an absolute path, not a relative path
|
|
339
|
-
- By default, it reads up to 2000 lines starting from the beginning of the file
|
|
340
|
-
- You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters
|
|
341
|
-
- Any lines longer than 2000 characters will be truncated
|
|
342
|
-
- Results are returned using cat -n format, with line numbers starting at 1
|
|
343
|
-
- You have the capability to call multiple tools in a single response. It is always better to speculatively read multiple files as a batch that are potentially useful.
|
|
344
|
-
- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents.
|
|
345
|
-
- You should ALWAYS make sure a file has been read before editing it."""
|
|
346
|
-
READ_FILE_TOOL_DESCRIPTION_LONGTERM_SUPPLEMENT = "\n- file_paths prefixed with the memories/ path will be read from the longterm filesystem."
|
|
347
|
-
|
|
348
|
-
EDIT_FILE_TOOL_DESCRIPTION = """Performs exact string replacements in files.
|
|
349
|
-
|
|
350
|
-
Usage:
|
|
351
|
-
- You must use your `Read` tool at least once in the conversation before editing. This tool will error if you attempt an edit without reading the file.
|
|
352
|
-
- When editing text from Read tool output, ensure you preserve the exact indentation (tabs/spaces) as it appears AFTER the line number prefix. The line number prefix format is: spaces + line number + tab. Everything after that tab is the actual file content to match. Never include any part of the line number prefix in the old_string or new_string.
|
|
353
|
-
- ALWAYS prefer editing existing files. NEVER write new files unless explicitly required.
|
|
354
|
-
- Only use emojis if the user explicitly requests it. Avoid adding emojis to files unless asked.
|
|
355
|
-
- The edit will FAIL if `old_string` is not unique in the file. Either provide a larger string with more surrounding context to make it unique or use `replace_all` to change every instance of `old_string`.
|
|
356
|
-
- Use `replace_all` for replacing and renaming strings across the file. This parameter is useful if you want to rename a variable for instance."""
|
|
357
|
-
EDIT_FILE_TOOL_DESCRIPTION_LONGTERM_SUPPLEMENT = "\n- You can edit files in the longterm filesystem by prefixing the filename with the memories/ path."
|
|
358
|
-
|
|
359
|
-
WRITE_FILE_TOOL_DESCRIPTION = """Writes to a new file in the filesystem.
|
|
360
|
-
|
|
361
|
-
Usage:
|
|
362
|
-
- The file_path parameter must be an absolute path, not a relative path
|
|
363
|
-
- The content parameter must be a string
|
|
364
|
-
- The write_file tool will create the a new file.
|
|
365
|
-
- Prefer to edit existing files over creating new ones when possible.
|
|
366
|
-
- file_paths prefixed with the memories/ path will be written to the longterm filesystem."""
|
|
367
|
-
WRITE_FILE_TOOL_DESCRIPTION_LONGTERM_SUPPLEMENT = "\n- file_paths prefixed with the memories/ path will be written to the longterm filesystem."
|
|
368
|
-
|
|
369
|
-
FILESYSTEM_SYSTEM_PROMPT = """## Filesystem Tools `ls`, `read_file`, `write_file`, `edit_file`
|
|
370
|
-
|
|
371
|
-
You have access to a filesystem which you can interact with using these tools.
|
|
372
|
-
Do not prepend a / to file_paths.
|
|
373
|
-
|
|
374
|
-
- ls: list all files in the filesystem
|
|
375
|
-
- read_file: read a file from the filesystem
|
|
376
|
-
- write_file: write to a file in the filesystem
|
|
377
|
-
- edit_file: edit a file in the filesystem"""
|
|
378
|
-
FILESYSTEM_SYSTEM_PROMPT_LONGTERM_SUPPLEMENT = """
|
|
379
|
-
|
|
380
|
-
You also have access to a longterm filesystem in which you can store files that you want to keep around for longer than the current conversation.
|
|
381
|
-
In order to interact with the longterm filesystem, you can use those same tools, but filenames must be prefixed with the memories/ path.
|
|
382
|
-
Remember, to interact with the longterm filesystem, you must prefix the filename with the memories/ path."""
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
WRITE_TODOS_SYSTEM_PROMPT = """## `write_todos`
|
|
386
|
-
|
|
387
|
-
You have access to the `write_todos` tool to help you manage and plan complex objectives.
|
|
388
|
-
Use this tool for complex objectives to ensure that you are tracking each necessary step and giving the user visibility into your progress.
|
|
389
|
-
This tool is very helpful for planning complex objectives, and for breaking down these larger complex objectives into smaller steps.
|
|
390
|
-
|
|
391
|
-
It is critical that you mark todos as completed as soon as you are done with a step. Do not batch up multiple steps before marking them as completed.
|
|
392
|
-
For simple objectives that only require a few steps, it is better to just complete the objective directly and NOT use this tool.
|
|
393
|
-
Writing todos takes time and tokens, use it when it is helpful for managing complex many-step problems! But not for simple few-step requests.
|
|
394
|
-
|
|
395
|
-
## Important To-Do List Usage Notes to Remember
|
|
396
|
-
- The `write_todos` tool should never be called multiple times in parallel.
|
|
397
|
-
- Don't be afraid to revise the To-Do list as you go. New information may reveal new tasks that need to be done, or old tasks that are irrelevant."""
|
|
398
|
-
|
|
399
|
-
TASK_SYSTEM_PROMPT = """## `task` (subagent spawner)
|
|
400
|
-
|
|
401
|
-
You have access to a `task` tool to launch short-lived subagents that handle isolated tasks. These agents are ephemeral — they live only for the duration of the task and return a single result.
|
|
402
|
-
|
|
403
|
-
When to use the task tool:
|
|
404
|
-
- When a task is complex and multi-step, and can be fully delegated in isolation
|
|
405
|
-
- When a task is independent of other tasks and can run in parallel
|
|
406
|
-
- When a task requires focused reasoning or heavy token/context usage that would bloat the orchestrator thread
|
|
407
|
-
- When sandboxing improves reliability (e.g. code execution, structured searches, data formatting)
|
|
408
|
-
- When you only care about the output of the subagent, and not the intermediate steps (ex. performing a lot of research and then returned a synthesized report, performing a series of computations or lookups to achieve a concise, relevant answer.)
|
|
409
|
-
|
|
410
|
-
Subagent lifecycle:
|
|
411
|
-
1. **Spawn** → Provide clear role, instructions, and expected output
|
|
412
|
-
2. **Run** → The subagent completes the task autonomously
|
|
413
|
-
3. **Return** → The subagent provides a single structured result
|
|
414
|
-
4. **Reconcile** → Incorporate or synthesize the result into the main thread
|
|
415
|
-
|
|
416
|
-
When NOT to use the task tool:
|
|
417
|
-
- If you need to see the intermediate reasoning or steps after the subagent has completed (the task tool hides them)
|
|
418
|
-
- If the task is trivial (a few tool calls or simple lookup)
|
|
419
|
-
- If delegating does not reduce token usage, complexity, or context switching
|
|
420
|
-
- If splitting would add latency without benefit
|
|
421
|
-
|
|
422
|
-
## Important Task Tool Usage Notes to Remember
|
|
423
|
-
- Whenever possible, parallelize the work that you do. This is true for both tool_calls, and for tasks. Whenever you have independent steps to complete - make tool_calls, or kick off tasks (subagents) in parallel to accomplish them faster. This saves time for the user, which is incredibly important.
|
|
424
|
-
- Remember to use the `task` tool to silo independent tasks within a multi-part objective.
|
|
425
|
-
- You should use the `task` tool whenever you have a complex task that will take multiple steps, and is independent from other tasks that the agent needs to complete. These agents are highly competent and efficient."""
|
|
426
|
-
|
|
427
|
-
BASE_AGENT_PROMPT = """
|
|
428
|
-
In order to complete the objective that the user asks of you, you have access to a number of standard tools.
|
|
429
|
-
"""
|