emdash-core 0.1.7__py3-none-any.whl → 0.1.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/__init__.py +6 -1
- emdash_core/agent/events.py +29 -0
- emdash_core/agent/prompts/__init__.py +5 -0
- emdash_core/agent/prompts/main_agent.py +22 -2
- emdash_core/agent/prompts/plan_mode.py +126 -0
- emdash_core/agent/prompts/subagents.py +11 -7
- emdash_core/agent/prompts/workflow.py +138 -43
- emdash_core/agent/providers/base.py +4 -0
- emdash_core/agent/providers/models.py +7 -0
- emdash_core/agent/providers/openai_provider.py +74 -2
- emdash_core/agent/runner.py +556 -34
- emdash_core/agent/skills.py +319 -0
- emdash_core/agent/toolkit.py +48 -0
- emdash_core/agent/tools/__init__.py +3 -2
- emdash_core/agent/tools/modes.py +197 -53
- emdash_core/agent/tools/search.py +4 -0
- emdash_core/agent/tools/skill.py +193 -0
- emdash_core/agent/tools/spec.py +61 -94
- emdash_core/agent/tools/tasks.py +15 -78
- emdash_core/api/agent.py +7 -7
- emdash_core/api/index.py +1 -1
- emdash_core/api/projectmd.py +4 -2
- emdash_core/api/router.py +2 -0
- emdash_core/api/skills.py +241 -0
- emdash_core/checkpoint/__init__.py +40 -0
- emdash_core/checkpoint/cli.py +175 -0
- emdash_core/checkpoint/git_operations.py +250 -0
- emdash_core/checkpoint/manager.py +231 -0
- emdash_core/checkpoint/models.py +107 -0
- emdash_core/checkpoint/storage.py +201 -0
- emdash_core/config.py +1 -1
- emdash_core/core/config.py +18 -2
- emdash_core/graph/schema.py +5 -5
- emdash_core/ingestion/orchestrator.py +19 -10
- emdash_core/models/agent.py +1 -1
- emdash_core/server.py +42 -0
- emdash_core/sse/stream.py +1 -0
- {emdash_core-0.1.7.dist-info → emdash_core-0.1.25.dist-info}/METADATA +1 -2
- {emdash_core-0.1.7.dist-info → emdash_core-0.1.25.dist-info}/RECORD +41 -31
- {emdash_core-0.1.7.dist-info → emdash_core-0.1.25.dist-info}/entry_points.txt +1 -0
- {emdash_core-0.1.7.dist-info → emdash_core-0.1.25.dist-info}/WHEEL +0 -0
emdash_core/__init__.py
CHANGED
emdash_core/agent/events.py
CHANGED
|
@@ -28,6 +28,7 @@ class EventType(Enum):
|
|
|
28
28
|
# Interaction
|
|
29
29
|
CLARIFICATION = "clarification"
|
|
30
30
|
CLARIFICATION_RESPONSE = "clarification_response"
|
|
31
|
+
PLAN_SUBMITTED = "plan_submitted"
|
|
31
32
|
|
|
32
33
|
# Errors
|
|
33
34
|
ERROR = "error"
|
|
@@ -226,6 +227,34 @@ class AgentEventEmitter:
|
|
|
226
227
|
"options": options,
|
|
227
228
|
})
|
|
228
229
|
|
|
230
|
+
def emit_plan_submitted(
|
|
231
|
+
self,
|
|
232
|
+
title: str,
|
|
233
|
+
summary: str,
|
|
234
|
+
files_to_modify: list[dict] | None = None,
|
|
235
|
+
implementation_steps: list[str] | None = None,
|
|
236
|
+
risks: list[str] | None = None,
|
|
237
|
+
testing_strategy: str | None = None,
|
|
238
|
+
) -> AgentEvent:
|
|
239
|
+
"""Convenience method to emit a plan submission event.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
title: Plan title
|
|
243
|
+
summary: Plan summary
|
|
244
|
+
files_to_modify: List of files with path, lines, changes
|
|
245
|
+
implementation_steps: Ordered implementation steps
|
|
246
|
+
risks: Potential risks or considerations
|
|
247
|
+
testing_strategy: How changes will be tested
|
|
248
|
+
"""
|
|
249
|
+
return self.emit(EventType.PLAN_SUBMITTED, {
|
|
250
|
+
"title": title,
|
|
251
|
+
"summary": summary,
|
|
252
|
+
"files_to_modify": files_to_modify or [],
|
|
253
|
+
"implementation_steps": implementation_steps or [],
|
|
254
|
+
"risks": risks or [],
|
|
255
|
+
"testing_strategy": testing_strategy or "",
|
|
256
|
+
})
|
|
257
|
+
|
|
229
258
|
def emit_error(self, message: str, details: str | None = None) -> AgentEvent:
|
|
230
259
|
"""Convenience method to emit an error.
|
|
231
260
|
|
|
@@ -11,6 +11,7 @@ from .workflow import (
|
|
|
11
11
|
EXPLORATION_OUTPUT_FORMAT,
|
|
12
12
|
PLAN_TEMPLATE,
|
|
13
13
|
SIZING_GUIDELINES,
|
|
14
|
+
PARALLEL_EXECUTION,
|
|
14
15
|
)
|
|
15
16
|
from .main_agent import (
|
|
16
17
|
BASE_SYSTEM_PROMPT,
|
|
@@ -18,6 +19,7 @@ from .main_agent import (
|
|
|
18
19
|
build_tools_section,
|
|
19
20
|
)
|
|
20
21
|
from .subagents import SUBAGENT_PROMPTS, get_subagent_prompt
|
|
22
|
+
from .plan_mode import PLAN_MODE_PROMPT
|
|
21
23
|
|
|
22
24
|
__all__ = [
|
|
23
25
|
# Workflow patterns
|
|
@@ -28,6 +30,7 @@ __all__ = [
|
|
|
28
30
|
"EXPLORATION_OUTPUT_FORMAT",
|
|
29
31
|
"PLAN_TEMPLATE",
|
|
30
32
|
"SIZING_GUIDELINES",
|
|
33
|
+
"PARALLEL_EXECUTION",
|
|
31
34
|
# Main agent
|
|
32
35
|
"BASE_SYSTEM_PROMPT",
|
|
33
36
|
"build_system_prompt",
|
|
@@ -35,4 +38,6 @@ __all__ = [
|
|
|
35
38
|
# Sub-agents
|
|
36
39
|
"SUBAGENT_PROMPTS",
|
|
37
40
|
"get_subagent_prompt",
|
|
41
|
+
# Plan mode
|
|
42
|
+
"PLAN_MODE_PROMPT",
|
|
38
43
|
]
|
|
@@ -8,13 +8,14 @@ from .workflow import (
|
|
|
8
8
|
WORKFLOW_PATTERNS,
|
|
9
9
|
EXPLORATION_STRATEGY,
|
|
10
10
|
OUTPUT_GUIDELINES,
|
|
11
|
+
PARALLEL_EXECUTION,
|
|
11
12
|
)
|
|
12
13
|
|
|
13
14
|
# Base system prompt template with placeholder for tools
|
|
14
15
|
BASE_SYSTEM_PROMPT = """You are a code exploration and implementation assistant. You orchestrate focused sub-agents for exploration while maintaining the high-level view.
|
|
15
16
|
|
|
16
17
|
{tools_section}
|
|
17
|
-
""" + WORKFLOW_PATTERNS + EXPLORATION_STRATEGY + OUTPUT_GUIDELINES
|
|
18
|
+
""" + WORKFLOW_PATTERNS + PARALLEL_EXECUTION + EXPLORATION_STRATEGY + OUTPUT_GUIDELINES
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
def build_system_prompt(toolkit) -> str:
|
|
@@ -27,7 +28,26 @@ def build_system_prompt(toolkit) -> str:
|
|
|
27
28
|
Complete system prompt string
|
|
28
29
|
"""
|
|
29
30
|
tools_section = build_tools_section(toolkit)
|
|
30
|
-
|
|
31
|
+
skills_section = build_skills_section()
|
|
32
|
+
prompt = BASE_SYSTEM_PROMPT.format(tools_section=tools_section)
|
|
33
|
+
|
|
34
|
+
# Add skills section if there are skills available
|
|
35
|
+
if skills_section:
|
|
36
|
+
prompt += "\n" + skills_section
|
|
37
|
+
|
|
38
|
+
return prompt
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def build_skills_section() -> str:
|
|
42
|
+
"""Build the skills section of the system prompt.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Formatted string with available skills, or empty string if none
|
|
46
|
+
"""
|
|
47
|
+
from ..skills import SkillRegistry
|
|
48
|
+
|
|
49
|
+
registry = SkillRegistry.get_instance()
|
|
50
|
+
return registry.get_skills_for_prompt()
|
|
31
51
|
|
|
32
52
|
|
|
33
53
|
def build_tools_section(toolkit) -> str:
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Plan mode system prompt.
|
|
2
|
+
|
|
3
|
+
Provides guidance for agents operating in plan mode, where they can only
|
|
4
|
+
explore and design but not modify code.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
PLAN_MODE_PROMPT = """You are in **plan mode**. Your job is to explore the codebase and design a detailed implementation plan for user approval.
|
|
8
|
+
|
|
9
|
+
## Constraints
|
|
10
|
+
- You can ONLY use read-only tools: read_file, grep, glob, semantic_search, list_files, web, task
|
|
11
|
+
- You CANNOT modify files, execute commands, or make changes
|
|
12
|
+
- Focus on understanding the codebase and designing a thorough plan
|
|
13
|
+
|
|
14
|
+
## Workflow
|
|
15
|
+
|
|
16
|
+
### 1. Explore
|
|
17
|
+
Use search and read tools to deeply understand the codebase:
|
|
18
|
+
- Find relevant files, classes, and functions
|
|
19
|
+
- Understand existing patterns and conventions
|
|
20
|
+
- Identify dependencies and relationships
|
|
21
|
+
- Read the actual code, don't assume
|
|
22
|
+
- Launch 2-3 sub-agents in PARALLEL for faster exploration (multiple `task` calls in one response)
|
|
23
|
+
|
|
24
|
+
### 2. Analyze
|
|
25
|
+
Before designing, ensure you understand:
|
|
26
|
+
- Current architecture and patterns used
|
|
27
|
+
- How similar features are implemented
|
|
28
|
+
- What tests exist and testing patterns
|
|
29
|
+
- Potential side effects of changes
|
|
30
|
+
|
|
31
|
+
### 3. Design
|
|
32
|
+
Create a detailed implementation plan:
|
|
33
|
+
- Break down into concrete, actionable steps
|
|
34
|
+
- Reference specific files with line numbers (e.g., `src/auth.py:45-60`)
|
|
35
|
+
- Describe exact changes for each file
|
|
36
|
+
- Consider edge cases and error handling
|
|
37
|
+
- Identify what tests need to be added/modified
|
|
38
|
+
|
|
39
|
+
### 4. Submit
|
|
40
|
+
Call `exit_plan` with a comprehensive plan including:
|
|
41
|
+
- **title**: Clear, concise title
|
|
42
|
+
- **summary**: What will be implemented and why
|
|
43
|
+
- **files_to_modify**: Array of objects with file path, line numbers, and description of changes
|
|
44
|
+
- **implementation_steps**: Detailed ordered steps
|
|
45
|
+
- **risks**: Potential issues, breaking changes, or considerations
|
|
46
|
+
- **testing_strategy**: How changes will be tested
|
|
47
|
+
|
|
48
|
+
## Parallel Execution
|
|
49
|
+
Launch multiple sub-agents simultaneously by calling `task` multiple times in one response.
|
|
50
|
+
Example: To explore auth and database code together, include two `task` calls in the same message.
|
|
51
|
+
|
|
52
|
+
## Adaptive Planning
|
|
53
|
+
|
|
54
|
+
Scale your plan detail based on task complexity:
|
|
55
|
+
|
|
56
|
+
| Factor | Simple Task | Complex Task |
|
|
57
|
+
|--------|-------------|--------------|
|
|
58
|
+
| **Complexity** | Checklist | Phases with rollback |
|
|
59
|
+
| **Risk** | Minimal detail | Edge cases, rollback |
|
|
60
|
+
| **Uncertainty** | Prescriptive | Exploratory first |
|
|
61
|
+
|
|
62
|
+
### Required (always include)
|
|
63
|
+
- **Summary**: What and why
|
|
64
|
+
- **Critical Files**: Files with line numbers - bridges to execution
|
|
65
|
+
|
|
66
|
+
### Conditional (only if needed)
|
|
67
|
+
- **Phases**: Multi-phase work (each independently testable)
|
|
68
|
+
- **Risks**: Non-trivial risks only
|
|
69
|
+
- **Open Questions**: Genuine unknowns - mark explicitly
|
|
70
|
+
- **Testing**: Beyond obvious test cases
|
|
71
|
+
|
|
72
|
+
### Principles
|
|
73
|
+
- Each section must "earn its place" - no empty boilerplate
|
|
74
|
+
- Detail scales with risk (logout button ≠ database migration)
|
|
75
|
+
- Follow existing codebase patterns
|
|
76
|
+
- Mark unknowns explicitly, don't hide uncertainty
|
|
77
|
+
|
|
78
|
+
### Anti-patterns
|
|
79
|
+
- Over-planning simple tasks
|
|
80
|
+
- Under-planning complex ones
|
|
81
|
+
- Hiding uncertainty behind confident language
|
|
82
|
+
- Ignoring existing codebase patterns
|
|
83
|
+
|
|
84
|
+
## Example: Simple Task
|
|
85
|
+
```
|
|
86
|
+
Title: Add logout button
|
|
87
|
+
|
|
88
|
+
Summary: Add logout button to user menu that clears session.
|
|
89
|
+
|
|
90
|
+
Critical Files:
|
|
91
|
+
- src/components/UserMenu.tsx:45-60 - Add LogoutButton component
|
|
92
|
+
- src/api/auth.ts:23 - Add logout() call
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Example: Complex Task
|
|
96
|
+
```
|
|
97
|
+
Title: Migrate user database to new schema
|
|
98
|
+
|
|
99
|
+
Summary: Migrate users table to support multi-tenancy with zero downtime.
|
|
100
|
+
|
|
101
|
+
Critical Files:
|
|
102
|
+
- migrations/002_add_tenant.py - Schema migration
|
|
103
|
+
- src/models/user.py:1-150 - Update User model
|
|
104
|
+
- src/api/users.py:30-80 - Update queries
|
|
105
|
+
|
|
106
|
+
Phases:
|
|
107
|
+
1. Add nullable tenant_id column (backwards compatible)
|
|
108
|
+
2. Backfill tenant_id for existing users
|
|
109
|
+
3. Make tenant_id required, update all queries
|
|
110
|
+
4. Remove legacy fallbacks
|
|
111
|
+
|
|
112
|
+
Risks:
|
|
113
|
+
- Data loss if backfill fails mid-way → Add rollback migration
|
|
114
|
+
- Performance during backfill → Run in batches
|
|
115
|
+
|
|
116
|
+
Open Questions:
|
|
117
|
+
- Default tenant for existing users? (need product decision)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## After exit_plan
|
|
121
|
+
The user will either:
|
|
122
|
+
- **Approve**: You'll return to code mode to implement the plan
|
|
123
|
+
- **Reject**: You'll receive feedback and can revise the plan
|
|
124
|
+
|
|
125
|
+
Remember: Thorough planning prevents rework. Take time to understand before proposing changes.
|
|
126
|
+
"""
|
|
@@ -9,6 +9,7 @@ from .workflow import (
|
|
|
9
9
|
EXPLORATION_OUTPUT_FORMAT,
|
|
10
10
|
PLAN_TEMPLATE,
|
|
11
11
|
SIZING_GUIDELINES,
|
|
12
|
+
PARALLEL_EXECUTION,
|
|
12
13
|
)
|
|
13
14
|
|
|
14
15
|
# Explore agent prompt
|
|
@@ -33,6 +34,8 @@ When you have a specific target:
|
|
|
33
34
|
|
|
34
35
|
{EFFICIENCY_RULES}
|
|
35
36
|
|
|
37
|
+
{PARALLEL_EXECUTION}
|
|
38
|
+
|
|
36
39
|
{EXPLORATION_OUTPUT_FORMAT}
|
|
37
40
|
|
|
38
41
|
## Constraints
|
|
@@ -41,33 +44,34 @@ When you have a specific target:
|
|
|
41
44
|
- Be concise - the main agent needs your results, not your process"""
|
|
42
45
|
|
|
43
46
|
# Plan agent prompt
|
|
44
|
-
PLAN_PROMPT = f"""You are a software architect. Your job is to understand a codebase and design clear implementation
|
|
47
|
+
PLAN_PROMPT = f"""You are a software architect sub-agent. Your job is to understand a codebase and design a clear implementation plan that you return to the main agent.
|
|
45
48
|
|
|
46
49
|
## Your Mission
|
|
47
|
-
Explore the codebase, understand patterns and conventions, then
|
|
50
|
+
Explore the codebase, understand patterns and conventions, then return a concrete implementation plan.
|
|
48
51
|
|
|
49
52
|
## Approach
|
|
50
53
|
|
|
51
54
|
### 1. Understand Context (use 30-40% of your turns)
|
|
52
55
|
- Find similar features/patterns in the codebase
|
|
53
56
|
- Understand the architecture and conventions
|
|
54
|
-
- Identify files that will need changes
|
|
57
|
+
- Identify files that will need changes (with line numbers)
|
|
55
58
|
- Note any constraints or dependencies
|
|
56
59
|
|
|
57
60
|
### 2. Design the Solution
|
|
58
61
|
- Follow existing patterns when possible
|
|
59
62
|
- Break into clear, ordered steps
|
|
60
63
|
- Identify risks and edge cases
|
|
61
|
-
- Consider error handling
|
|
64
|
+
- Consider error handling and testing
|
|
62
65
|
|
|
63
|
-
### 3.
|
|
66
|
+
### 3. Return the Plan
|
|
64
67
|
{PLAN_TEMPLATE}
|
|
65
68
|
|
|
66
69
|
## Constraints
|
|
67
|
-
- You
|
|
70
|
+
- You are read-only - cannot modify files
|
|
68
71
|
- Focus on actionable steps, not theory
|
|
69
|
-
- Reference specific files and line numbers
|
|
72
|
+
- Reference specific files and line numbers (e.g., `src/auth.py:45-60`)
|
|
70
73
|
- Keep plans focused and concrete
|
|
74
|
+
- Your output goes to the main agent for review
|
|
71
75
|
{SIZING_GUIDELINES}"""
|
|
72
76
|
|
|
73
77
|
# Bash agent prompt
|
|
@@ -10,8 +10,11 @@ WORKFLOW_PATTERNS = """
|
|
|
10
10
|
|
|
11
11
|
### 1. Understand Before Acting
|
|
12
12
|
- Read code before modifying it
|
|
13
|
-
- Ask clarifying questions when requirements are ambiguous
|
|
14
13
|
- Search for similar patterns already in the codebase
|
|
14
|
+
- When requirements are ambiguous, use `ask_followup_question` tool (not text output)
|
|
15
|
+
- ONLY after exploring the codebase first - questions should be informed by research
|
|
16
|
+
- ONLY one question at a time - never ask multiple questions in parallel
|
|
17
|
+
- Ask the most critical question first, then continue based on the answer
|
|
15
18
|
|
|
16
19
|
### 2. Break Down Hard Problems
|
|
17
20
|
When facing a task you don't immediately know how to solve:
|
|
@@ -19,8 +22,9 @@ When facing a task you don't immediately know how to solve:
|
|
|
19
22
|
a) **Decompose**: Split into smaller, concrete sub-tasks
|
|
20
23
|
b) **Explore**: Use sub-agents to gather context (can run in parallel)
|
|
21
24
|
c) **Plan**: Write out your approach before implementing
|
|
22
|
-
d) **
|
|
23
|
-
e) **
|
|
25
|
+
d) **Submit**: Use `exit_plan` tool when your plan is ready for user approval
|
|
26
|
+
e) **Execute**: Work through tasks one at a time
|
|
27
|
+
f) **Validate**: Check your work against requirements
|
|
24
28
|
|
|
25
29
|
### 3. Use Sub-Agents Strategically
|
|
26
30
|
Spawn sub-agents via the `task` tool when you need:
|
|
@@ -41,24 +45,69 @@ Update the user on progress for long-running work.
|
|
|
41
45
|
EXPLORATION_STRATEGY = """
|
|
42
46
|
## Exploration Strategy
|
|
43
47
|
|
|
44
|
-
###
|
|
45
|
-
|
|
46
|
-
2. Find relevant files (glob for patterns, grep for keywords)
|
|
47
|
-
3. Read key files to understand patterns
|
|
48
|
-
4. Deep dive into specific areas
|
|
48
|
+
### Phase 1: Orient (Where to Start)
|
|
49
|
+
Before searching randomly, understand the codebase structure:
|
|
49
50
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
51
|
+
```
|
|
52
|
+
list_files("src") → Understand directory structure
|
|
53
|
+
glob("**/*.py") → Find all Python files
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Phase 2: Search (Find Relevant Code)
|
|
57
|
+
Use the right tool for the job:
|
|
58
|
+
|
|
59
|
+
| Tool | Searches | Use When | Example |
|
|
60
|
+
|------|----------|----------|---------|
|
|
61
|
+
| `glob` | File paths/names | Know filename pattern | `glob("**/auth*.py")` |
|
|
62
|
+
| `grep` | File contents | Know exact text | `grep("def authenticate")` |
|
|
63
|
+
| `semantic_search` | Conceptual meaning | Fuzzy/conceptual | `semantic_search("user login flow")` |
|
|
64
|
+
|
|
65
|
+
**Parallel searches**: Run 2-3 searches together when exploring:
|
|
66
|
+
```
|
|
67
|
+
# In one response, invoke all three:
|
|
68
|
+
grep("authenticate")
|
|
69
|
+
grep("login")
|
|
70
|
+
grep("session")
|
|
71
|
+
→ All run concurrently, results return together
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Phase 3: Understand (Deep Dive)
|
|
75
|
+
Once you find relevant code:
|
|
76
|
+
|
|
77
|
+
```
|
|
78
|
+
read_file("src/auth/manager.py")
|
|
79
|
+
→ Read the full file to understand implementation
|
|
80
|
+
|
|
81
|
+
read_file("src/auth/manager.py", offset=45, limit=30)
|
|
82
|
+
→ Read specific section (lines 45-75)
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Follow imports and function calls manually by reading related files.
|
|
86
|
+
|
|
87
|
+
### Tool Selection Quick Reference
|
|
88
|
+
|
|
89
|
+
| Goal | Best Tool |
|
|
90
|
+
|------|-----------|
|
|
91
|
+
| Find by filename | `glob` |
|
|
92
|
+
| Find by content | `grep` |
|
|
93
|
+
| Find by concept | `semantic_search` |
|
|
94
|
+
| Read code | `read_file` |
|
|
95
|
+
| List directory | `list_files` |
|
|
96
|
+
| Web research | `web` |
|
|
56
97
|
|
|
57
98
|
### When Stuck
|
|
58
|
-
1.
|
|
59
|
-
2.
|
|
60
|
-
3.
|
|
61
|
-
4. Ask
|
|
99
|
+
1. **Wrong results?** → Try `semantic_search` with different phrasing
|
|
100
|
+
2. **Too many results?** → Add more specific terms to grep
|
|
101
|
+
3. **Need context?** → Read imports at top of file, follow them
|
|
102
|
+
4. **Still lost?** → Ask user ONE focused question with `ask_followup_question` (after exhausting search options)
|
|
103
|
+
|
|
104
|
+
### Stopping Criteria
|
|
105
|
+
You have enough context when you can answer:
|
|
106
|
+
- What files/functions are involved?
|
|
107
|
+
- What patterns does the codebase use?
|
|
108
|
+
- What would need to change?
|
|
109
|
+
|
|
110
|
+
Stop exploring when you can confidently describe the implementation approach.
|
|
62
111
|
"""
|
|
63
112
|
|
|
64
113
|
# Output formatting guidelines
|
|
@@ -68,7 +117,41 @@ OUTPUT_GUIDELINES = """
|
|
|
68
117
|
- Show relevant code snippets
|
|
69
118
|
- Be concise but thorough
|
|
70
119
|
- Explain your reasoning for complex decisions
|
|
71
|
-
- NEVER provide time estimates (hours, days, weeks)
|
|
120
|
+
- NEVER provide time estimates (hours, days, weeks)
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
# Parallel tool execution patterns
|
|
124
|
+
PARALLEL_EXECUTION = """
|
|
125
|
+
## Parallel Tool Execution
|
|
126
|
+
|
|
127
|
+
You can execute multiple tools concurrently by invoking them in a single response.
|
|
128
|
+
|
|
129
|
+
### How It Works
|
|
130
|
+
- Multiple tool invocations in one message execute concurrently, not sequentially
|
|
131
|
+
- Results return together before continuing
|
|
132
|
+
|
|
133
|
+
### Use Parallel Execution For:
|
|
134
|
+
- Reading multiple files simultaneously
|
|
135
|
+
- Running independent grep/glob searches
|
|
136
|
+
- Launching multiple sub-agents for independent exploration
|
|
137
|
+
- Any independent operations that don't depend on each other
|
|
138
|
+
|
|
139
|
+
### Use Sequential Execution When:
|
|
140
|
+
- One tool's output is needed for the next (dependencies)
|
|
141
|
+
- Example: read a file before editing it
|
|
142
|
+
- Example: mkdir before cp, git add before git commit
|
|
143
|
+
|
|
144
|
+
### Example
|
|
145
|
+
Instead of:
|
|
146
|
+
1. grep for "authenticate" → wait for results
|
|
147
|
+
2. grep for "login" → wait for results
|
|
148
|
+
3. grep for "session" → wait for results
|
|
149
|
+
|
|
150
|
+
Do this in ONE message:
|
|
151
|
+
- grep for "authenticate"
|
|
152
|
+
- grep for "login"
|
|
153
|
+
- grep for "session"
|
|
154
|
+
→ All three run concurrently, results return together
|
|
72
155
|
"""
|
|
73
156
|
|
|
74
157
|
# Efficiency rules for sub-agents with limited turns
|
|
@@ -78,6 +161,7 @@ EFFICIENCY_RULES = """
|
|
|
78
161
|
- If 3 searches return nothing, try different terms or report "not found"
|
|
79
162
|
- Read only the parts of files you need (use offset/limit for large files)
|
|
80
163
|
- Don't read entire files when you only need a specific function
|
|
164
|
+
- Parallelize independent searches - invoke multiple tools in one response
|
|
81
165
|
"""
|
|
82
166
|
|
|
83
167
|
# Structured output format for exploration results
|
|
@@ -96,41 +180,52 @@ Structure your final response as:
|
|
|
96
180
|
**Confidence**: high/medium/low
|
|
97
181
|
"""
|
|
98
182
|
|
|
99
|
-
# Plan template for Plan agents
|
|
183
|
+
# Plan template for Plan sub-agents (returns to main agent)
|
|
100
184
|
PLAN_TEMPLATE = """
|
|
101
|
-
## Plan
|
|
102
|
-
Use `write_plan` to save your plan. Structure it as:
|
|
185
|
+
## Adaptive Plan Structure
|
|
103
186
|
|
|
104
|
-
|
|
105
|
-
# [Feature Name] Implementation Plan
|
|
187
|
+
Adapt your plan structure based on these factors:
|
|
106
188
|
|
|
107
|
-
|
|
108
|
-
|
|
189
|
+
| Factor | Simple Task | Complex Task |
|
|
190
|
+
|--------|-------------|--------------|
|
|
191
|
+
| **Complexity** | Checklist format | Phases with rollback points |
|
|
192
|
+
| **Risk** | Minimal detail | Detailed with edge cases |
|
|
193
|
+
| **Uncertainty** | Prescriptive steps | Exploratory phases first |
|
|
194
|
+
| **Scope** | Implicit boundaries | Explicit scope & non-goals |
|
|
109
195
|
|
|
110
|
-
|
|
111
|
-
- `path/to/file.py` - What changes
|
|
196
|
+
### Required Sections (always include)
|
|
112
197
|
|
|
113
|
-
|
|
114
|
-
- `path/to/new.py` - Purpose
|
|
198
|
+
**Summary**: What and why (1-2 sentences)
|
|
115
199
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
- Specific changes to make
|
|
119
|
-
- Code patterns to follow (reference existing code)
|
|
200
|
+
**Critical Files**: Files to modify with line numbers - this bridges to execution
|
|
201
|
+
- `path/to/file.py:45-60` - What changes
|
|
120
202
|
|
|
121
|
-
|
|
203
|
+
### Conditional Sections (include only if needed)
|
|
122
204
|
|
|
123
|
-
|
|
124
|
-
-
|
|
205
|
+
**Files to Create**: Only if creating new files
|
|
206
|
+
**Phases**: Only for multi-phase work (each phase independently testable)
|
|
207
|
+
**Risks**: Only if non-trivial risks exist
|
|
208
|
+
**Open Questions**: Only if genuine unknowns - mark explicitly, don't hide uncertainty
|
|
209
|
+
**Testing**: Only if tests needed beyond obvious
|
|
125
210
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
211
|
+
### Principles
|
|
212
|
+
- Each section must "earn its place" - no empty boilerplate
|
|
213
|
+
- Detail scales with risk (logout button ≠ database migration)
|
|
214
|
+
- Follow existing codebase patterns, not novel approaches
|
|
215
|
+
- Mark unknowns explicitly rather than pretending certainty
|
|
216
|
+
|
|
217
|
+
### Anti-patterns to Avoid
|
|
218
|
+
- Over-planning simple tasks
|
|
219
|
+
- Under-planning complex/risky ones
|
|
220
|
+
- Hiding uncertainty behind confident language
|
|
221
|
+
- Ignoring existing patterns in the codebase
|
|
222
|
+
|
|
223
|
+
Your output will be reviewed by the main agent, who will consolidate findings and submit the final plan for user approval.
|
|
129
224
|
"""
|
|
130
225
|
|
|
131
|
-
#
|
|
226
|
+
# Guidelines (no time estimates)
|
|
132
227
|
SIZING_GUIDELINES = """
|
|
133
|
-
##
|
|
228
|
+
## Guidelines
|
|
134
229
|
- NEVER include time estimates (no hours, days, weeks, sprints, timelines)
|
|
135
|
-
-
|
|
230
|
+
- Focus on what needs to be done, not how long it takes
|
|
136
231
|
"""
|
|
@@ -34,11 +34,13 @@ class LLMResponse:
|
|
|
34
34
|
"""Unified response from any LLM provider."""
|
|
35
35
|
|
|
36
36
|
content: Optional[str] = None
|
|
37
|
+
thinking: Optional[str] = None # Model's chain-of-thought reasoning
|
|
37
38
|
tool_calls: list[ToolCall] = field(default_factory=list)
|
|
38
39
|
raw: Any = None # Original provider response
|
|
39
40
|
stop_reason: Optional[str] = None
|
|
40
41
|
input_tokens: int = 0 # Tokens in the request
|
|
41
42
|
output_tokens: int = 0 # Tokens in the response
|
|
43
|
+
thinking_tokens: int = 0 # Tokens used for thinking (if available)
|
|
42
44
|
|
|
43
45
|
|
|
44
46
|
class LLMProvider(ABC):
|
|
@@ -54,6 +56,7 @@ class LLMProvider(ABC):
|
|
|
54
56
|
tools: Optional[list[dict]] = None,
|
|
55
57
|
system: Optional[str] = None,
|
|
56
58
|
reasoning: bool = False,
|
|
59
|
+
thinking: bool = False,
|
|
57
60
|
images: Optional[list[ImageContent]] = None,
|
|
58
61
|
) -> LLMResponse:
|
|
59
62
|
"""Send a chat completion request.
|
|
@@ -63,6 +66,7 @@ class LLMProvider(ABC):
|
|
|
63
66
|
tools: Optional list of tool schemas
|
|
64
67
|
system: Optional system prompt (will be prepended or handled per provider)
|
|
65
68
|
reasoning: Enable reasoning mode (for models that support it)
|
|
69
|
+
thinking: Enable extended thinking (for models that support it)
|
|
66
70
|
images: Optional list of images for vision-capable models
|
|
67
71
|
|
|
68
72
|
Returns:
|
|
@@ -16,6 +16,7 @@ class ChatModelSpec:
|
|
|
16
16
|
max_output_tokens: int # Max output tokens
|
|
17
17
|
supports_tools: bool # Whether model supports function calling
|
|
18
18
|
supports_vision: bool # Whether model supports image input
|
|
19
|
+
supports_thinking: bool # Whether model supports extended thinking
|
|
19
20
|
description: str # Human-readable description
|
|
20
21
|
|
|
21
22
|
|
|
@@ -43,6 +44,7 @@ class ChatModel(Enum):
|
|
|
43
44
|
max_output_tokens=32000,
|
|
44
45
|
supports_tools=True,
|
|
45
46
|
supports_vision=True,
|
|
47
|
+
supports_thinking=True,
|
|
46
48
|
description="Claude Opus 4 - Most capable, complex reasoning",
|
|
47
49
|
)
|
|
48
50
|
|
|
@@ -54,6 +56,7 @@ class ChatModel(Enum):
|
|
|
54
56
|
max_output_tokens=16000,
|
|
55
57
|
supports_tools=True,
|
|
56
58
|
supports_vision=True,
|
|
59
|
+
supports_thinking=True,
|
|
57
60
|
description="Claude Sonnet 4 - Balanced performance and cost",
|
|
58
61
|
)
|
|
59
62
|
|
|
@@ -65,6 +68,7 @@ class ChatModel(Enum):
|
|
|
65
68
|
max_output_tokens=8192,
|
|
66
69
|
supports_tools=True,
|
|
67
70
|
supports_vision=True,
|
|
71
|
+
supports_thinking=False,
|
|
68
72
|
description="Claude Haiku 4.5 - Fast and efficient",
|
|
69
73
|
)
|
|
70
74
|
|
|
@@ -80,6 +84,7 @@ class ChatModel(Enum):
|
|
|
80
84
|
max_output_tokens=16384,
|
|
81
85
|
supports_tools=True,
|
|
82
86
|
supports_vision=True,
|
|
87
|
+
supports_thinking=False,
|
|
83
88
|
description="GPT-4o Mini - Fast and cost-effective",
|
|
84
89
|
)
|
|
85
90
|
|
|
@@ -95,6 +100,7 @@ class ChatModel(Enum):
|
|
|
95
100
|
max_output_tokens=16384,
|
|
96
101
|
supports_tools=True,
|
|
97
102
|
supports_vision=False,
|
|
103
|
+
supports_thinking=False,
|
|
98
104
|
description="GLM-4P7 - Fireworks GLM model",
|
|
99
105
|
)
|
|
100
106
|
|
|
@@ -106,6 +112,7 @@ class ChatModel(Enum):
|
|
|
106
112
|
max_output_tokens=16384,
|
|
107
113
|
supports_tools=True,
|
|
108
114
|
supports_vision=False,
|
|
115
|
+
supports_thinking=False,
|
|
109
116
|
description="MiniMax M2P1 - Long context model",
|
|
110
117
|
)
|
|
111
118
|
|