PyPI - emdash-core - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.33__py3-none-any.whl - Mend

emdash-core 0.1.7py3-none-any.whl → 0.1.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

emdash_core/__init__.py +6 -1
emdash_core/agent/__init__.py +4 -0
emdash_core/agent/events.py +52 -1
emdash_core/agent/inprocess_subagent.py +123 -10
emdash_core/agent/prompts/__init__.py +6 -0
emdash_core/agent/prompts/main_agent.py +53 -3
emdash_core/agent/prompts/plan_mode.py +255 -0
emdash_core/agent/prompts/subagents.py +84 -16
emdash_core/agent/prompts/workflow.py +270 -56
emdash_core/agent/providers/base.py +4 -0
emdash_core/agent/providers/factory.py +2 -2
emdash_core/agent/providers/models.py +7 -0
emdash_core/agent/providers/openai_provider.py +137 -13
emdash_core/agent/runner/__init__.py +49 -0
emdash_core/agent/runner/agent_runner.py +753 -0
emdash_core/agent/runner/context.py +451 -0
emdash_core/agent/runner/factory.py +108 -0
emdash_core/agent/runner/plan.py +217 -0
emdash_core/agent/runner/sdk_runner.py +324 -0
emdash_core/agent/runner/utils.py +67 -0
emdash_core/agent/skills.py +358 -0
emdash_core/agent/toolkit.py +85 -5
emdash_core/agent/toolkits/plan.py +9 -11
emdash_core/agent/tools/__init__.py +3 -2
emdash_core/agent/tools/coding.py +48 -4
emdash_core/agent/tools/modes.py +207 -55
emdash_core/agent/tools/search.py +4 -0
emdash_core/agent/tools/skill.py +193 -0
emdash_core/agent/tools/spec.py +61 -94
emdash_core/agent/tools/task.py +41 -2
emdash_core/agent/tools/tasks.py +15 -78
emdash_core/api/agent.py +562 -8
emdash_core/api/index.py +1 -1
emdash_core/api/projectmd.py +4 -2
emdash_core/api/router.py +2 -0
emdash_core/api/skills.py +241 -0
emdash_core/checkpoint/__init__.py +40 -0
emdash_core/checkpoint/cli.py +175 -0
emdash_core/checkpoint/git_operations.py +250 -0
emdash_core/checkpoint/manager.py +231 -0
emdash_core/checkpoint/models.py +107 -0
emdash_core/checkpoint/storage.py +201 -0
emdash_core/config.py +1 -1
emdash_core/core/config.py +18 -2
emdash_core/graph/schema.py +5 -5
emdash_core/ingestion/orchestrator.py +19 -10
emdash_core/models/agent.py +1 -1
emdash_core/server.py +42 -0
emdash_core/skills/frontend-design/SKILL.md +56 -0
emdash_core/sse/stream.py +5 -0
{emdash_core-0.1.7.dist-info → emdash_core-0.1.33.dist-info}/METADATA +2 -2
{emdash_core-0.1.7.dist-info → emdash_core-0.1.33.dist-info}/RECORD +54 -37
{emdash_core-0.1.7.dist-info → emdash_core-0.1.33.dist-info}/entry_points.txt +1 -0
emdash_core/agent/runner.py +0 -601
{emdash_core-0.1.7.dist-info → emdash_core-0.1.33.dist-info}/WHEEL +0 -0

emdash_core/agent/prompts/workflow.py CHANGED Viewed

@@ -8,57 +8,183 @@ consistent behavior across agent types.
 WORKFLOW_PATTERNS = """
 ## Workflow for Complex Tasks
+### User Plan Mode Commands
+When the user explicitly asks to "enter plan mode" or says "plan mode":
+- Call `enter_plan_mode(reason="User requested to enter plan mode for task planning")`
+- This REQUIRES user approval before plan mode activates
+- Do NOT ask clarification questions instead - use the tool
+### CRITICAL: Spawn Plan Agent for Non-Trivial Tasks
+For ANY task that involves:
+- Creating new features or applications
+- Multi-file changes
+- Architectural decisions
+- Unclear or ambiguous requirements
+You MUST spawn a **Plan agent** via the `task` tool FIRST before implementing. The Plan agent will:
+1. Explore the codebase to understand patterns and architecture
+2. Design a concrete implementation plan
+3. Return the plan to you
+After receiving the plan:
+1. Write it to the plan file specified in plan mode (usually `.emdash/plan.md`) using `write_to_file`
+2. Call `exit_plan` to present for user approval
+3. After approval, implement the plan
+**Plan agent is for IMPLEMENTATION tasks** (building/changing code):
+- "Create a family expense app" → spawn Plan agent
+- "Add authentication routes" → spawn Plan agent
+- "Refactor the database layer" → spawn Plan agent
+**Plan agent is NOT for RESEARCH tasks** (reading/understanding code):
+- "Read the router and report" → use direct tools, no planning needed
+- "What files handle routing?" → use direct tools or Explore agent
+- "How does authentication work?" → use Explore agent
+- "What does this function do?" → just read and answer
+**Trivial implementation tasks** (no planning needed):
+- "Fix this typo" → just fix it
+- "Add a log statement here" → just add it
 ### 1. Understand Before Acting
 - Read code before modifying it
-- Ask clarifying questions when requirements are ambiguous
 - Search for similar patterns already in the codebase
+- When requirements are ambiguous, use `ask_followup_question` tool (not text output)
+  - ONLY after exploring the codebase first - questions should be informed by research
+  - ONLY one question at a time - never ask multiple questions in parallel
+  - Ask the most critical question first, then continue based on the answer
+  - NEVER ask generic questions like "What platform?" without first understanding the codebase
 ### 2. Break Down Hard Problems
 When facing a task you don't immediately know how to solve:
-a) **Decompose**: Split into smaller, concrete sub-tasks
-b) **Explore**: Use sub-agents to gather context (can run in parallel)
-c) **Plan**: Write out your approach before implementing
-d) **Execute**: Work through tasks one at a time
+a) **Spawn Plan Agent**: Call `task(subagent_type="Plan", prompt="...")` to design the approach
+b) **Save Plan**: Write the returned plan to the plan file (specified in plan mode approval)
+c) **Present for Approval**: Call `exit_plan` to show the plan to the user
+d) **Execute**: After approval, implement the plan step by step
 e) **Validate**: Check your work against requirements
-### 3. Use Sub-Agents Strategically
-Spawn sub-agents via the `task` tool when you need:
-- **Explore**: Find files, patterns, or understand code structure
-- **Plan**: Design implementation approach for complex features
+### 3. Targeted vs Open-Ended Queries
+**Targeted queries** (you know what to look for) → Use direct tools:
+- "Read the router" → `glob("**/router*")` then `read_file`
+- "What's in config.ts?" → `read_file("config.ts")`
+- "Find the UserService class" → `grep("class UserService")`
+**Open-ended queries** (need to explore possibilities) → Spawn Explore agent:
+- "Where are errors handled?" → could be many places
+- "How does authentication work?" → requires understanding multiple files
+- "What is the codebase structure?" → broad exploration
+### 4. Parallel Tool Execution
+Run independent searches in parallel (single response with multiple tool calls):
+```
+# Good: parallel independent searches
+glob("**/router*")
+glob("**/pages/**/*.astro")
+→ Both run concurrently, results return together
+```
+### 5. Sub-Agent Decision Matrix
+| Task Type | Example | Sub-Agent |
+|-----------|---------|-----------|
+| **Research (open-ended)** | "How does auth work?" | Explore |
+| **Research (targeted)** | "Read the router" | None (direct tools) |
+| **Implementation (complex)** | "Add user profiles" | Plan |
+| **Implementation (trivial)** | "Fix this typo" | None (just do it) |
-Guidelines:
-- Launch multiple Explore agents in parallel for independent searches
-- Use sub-agents for focused work that would clutter your context
-- Prefer sub-agents over doing 5+ search operations yourself
+**Explore agent**: Open-ended research across multiple files
+- "Where are errors handled?"
+- "What is the codebase structure?"
-### 4. Track Progress
-For multi-step tasks, mentally track what's done and what's next.
-Update the user on progress for long-running work.
+**Plan agent**: Implementation tasks that modify code
+- New features, refactoring, architectural changes
+- NOT for research/reading tasks
 """
 # Exploration strategy for code navigation
 EXPLORATION_STRATEGY = """
 ## Exploration Strategy
-### Start Broad, Then Focus
-1. Understand project structure (list_files on key directories)
-2. Find relevant files (glob for patterns, grep for keywords)
-3. Read key files to understand patterns
-4. Deep dive into specific areas
+### Phase 1: Orient (Where to Start)
+Before searching randomly, understand the codebase structure:
+```
+list_files("src")   → Understand directory structure
+glob("**/*.py")     → Find all Python files
+```
+### Phase 2: Search (Find Relevant Code)
+Use the right tool for the job:
-### Tool Selection
-- **glob** searches file NAMES/PATHS → glob("**/*.py")
-- **grep** searches file CONTENTS → grep("authenticate")
-- **semantic_search** finds conceptually related code
-- Use local tools for the LOCAL codebase
-- Use GitHub/MCP tools for REMOTE repositories, PRs, issues
+| Tool | Searches | Use When | Example |
+|------|----------|----------|---------|
+| `glob` | File paths/names | Know filename pattern | `glob("**/auth*.py")` |
+| `grep` | File contents | Know exact text | `grep("def authenticate")` |
+| `semantic_search` | Conceptual meaning | Fuzzy/conceptual | `semantic_search("user login flow")` |
+**Parallel searches based on multiple hypotheses**:
+When you have context clues, run parallel searches for each possibility:
+```
+# Example: "read the router" in an Astro project
+glob("**/router*")         # Files with "router" in name
+glob("**/pages/**/*.astro") # Astro's file-based routing
+→ Both run in parallel, then read the relevant results
+```
+**Following imports after reading**:
+When you read a file and see an import, read that imported file to complete the picture:
+```
+# After reading src/pages/[...slug].astro which imports AppRouter
+read_file("src/components/Router.tsx")  # Follow the import
+```
+### Phase 3: Understand (Deep Dive)
+Once you find relevant code:
+```
+read_file("src/auth/manager.py")
+→ Read the full file to understand implementation
+read_file("src/auth/manager.py", offset=45, limit=30)
+→ Read specific section (lines 45-75)
+```
+Follow imports and function calls manually by reading related files.
+### Tool Selection Quick Reference
+| Goal | Best Tool |
+|------|-----------|
+| Find by filename | `glob` |
+| Find by content | `grep` |
+| Find by concept | `semantic_search` |
+| Read code | `read_file` |
+| List directory | `list_files` |
+| Web research | `web` |
 ### When Stuck
-1. Step back - what are you actually trying to find?
-2. Try alternative search terms
-3. Look at imports/dependencies for clues
-4. Ask the user for clarification
+1. **Wrong results?** → Try `semantic_search` with different phrasing
+2. **Too many results?** → Add more specific terms to grep
+3. **Need context?** → Read imports at top of file, follow them
+4. **Still lost?** → Ask user ONE focused question with `ask_followup_question` (after exhausting search options)
+### Stopping Criteria
+You have enough context when you can answer:
+- What files/functions are involved?
+- What patterns does the codebase use?
+- What would need to change?
+Stop exploring when you can confidently describe the implementation approach.
+### CRITICAL: After Clarification → Act
+**When you receive an answer to a clarification question, your NEXT action must be implementation/planning - NOT more exploration.**
+The user answered your question. You now have what you need. Act on it.
 """
 # Output formatting guidelines
@@ -68,7 +194,41 @@ OUTPUT_GUIDELINES = """
 - Show relevant code snippets
 - Be concise but thorough
 - Explain your reasoning for complex decisions
-- NEVER provide time estimates (hours, days, weeks). Use complexity sizing: S/M/L/XL
+- NEVER provide time estimates (hours, days, weeks)
+"""
+# Parallel tool execution patterns
+PARALLEL_EXECUTION = """
+## Parallel Tool Execution
+You can execute multiple tools concurrently by invoking them in a single response.
+### How It Works
+- Multiple tool invocations in one message execute concurrently, not sequentially
+- Results return together before continuing
+### Use Parallel Execution For:
+- Reading multiple files simultaneously
+- Running independent grep/glob searches
+- Launching multiple sub-agents for independent exploration
+- Any independent operations that don't depend on each other
+### Use Sequential Execution When:
+- One tool's output is needed for the next (dependencies)
+- Example: read a file before editing it
+- Example: mkdir before cp, git add before git commit
+### Example
+Instead of:
+1. grep for "authenticate" → wait for results
+2. grep for "login" → wait for results
+3. grep for "session" → wait for results
+Do this in ONE message:
+- grep for "authenticate"
+- grep for "login"
+- grep for "session"
+→ All three run concurrently, results return together
 """
 # Efficiency rules for sub-agents with limited turns
@@ -78,6 +238,7 @@ EFFICIENCY_RULES = """
 - If 3 searches return nothing, try different terms or report "not found"
 - Read only the parts of files you need (use offset/limit for large files)
 - Don't read entire files when you only need a specific function
+- Parallelize independent searches - invoke multiple tools in one response
 """
 # Structured output format for exploration results
@@ -96,41 +257,94 @@ Structure your final response as:
 **Confidence**: high/medium/low
 """
-# Plan template for Plan agents
+# Plan template for Plan sub-agents (returns to main agent)
 PLAN_TEMPLATE = """
-## Plan Template
-Use `write_plan` to save your plan. Structure it as:
+## Adaptive Plan Structure
-```markdown
-# [Feature Name] Implementation Plan
+Adapt your plan structure based on these factors:
-## Summary
-Brief description of what this plan accomplishes.
+| Factor | Simple Task | Complex Task |
+|--------|-------------|--------------|
+| **Complexity** | Checklist format | Phases with rollback points |
+| **Risk** | Minimal detail | Detailed with edge cases |
+| **Uncertainty** | Prescriptive steps | Exploratory phases first |
+| **Scope** | Implicit boundaries | Explicit scope & non-goals |
-## Files to Modify
-- `path/to/file.py` - What changes
+### Required Sections (always include)
-## Files to Create (if any)
-- `path/to/new.py` - Purpose
+**Summary**: What and why (1-2 sentences)
-## Implementation Steps
-1. **Step name**: Detailed description
-   - Specific changes to make
-   - Code patterns to follow (reference existing code)
+**Critical Files**: Files to modify with line numbers - this bridges to execution
+- `path/to/file.py:45-60` - What changes
-2. **Step name**: ...
+### Conditional Sections (include only if needed)
-## Edge Cases & Error Handling
-- Case: How to handle
+**Files to Create**: Only if creating new files
+**Phases**: Only for multi-phase work (each phase independently testable)
+**Risks**: Only if non-trivial risks exist
+**Open Questions**: Only if genuine unknowns - mark explicitly, don't hide uncertainty
+**Testing**: Only if tests needed beyond obvious
-## Open Questions
-Things that need clarification.
-```
+### Principles
+- Each section must "earn its place" - no empty boilerplate
+- Detail scales with risk (logout button ≠ database migration)
+- Follow existing codebase patterns, not novel approaches
+- Mark unknowns explicitly rather than pretending certainty
+- **NEVER include time estimates** (no "Day 1-2", "Week 1", hours, days, sprints, timelines)
+### Anti-patterns to Avoid
+- Over-planning simple tasks
+- Under-planning complex/risky ones
+- Hiding uncertainty behind confident language
+- Ignoring existing patterns in the codebase
+- Including time estimates (Days, Weeks, Sprints, etc.) - focus on WHAT, not WHEN
+Your output will be reviewed by the main agent, who will consolidate findings and submit the final plan for user approval.
 """
-# Sizing guidelines (no time estimates)
+# Guidelines (no time estimates)
 SIZING_GUIDELINES = """
-## Sizing (No Time Estimates)
+## Guidelines
 - NEVER include time estimates (no hours, days, weeks, sprints, timelines)
-- Use complexity sizing instead: S (small), M (medium), L (large), XL (extra large)
+- Focus on what needs to be done, not how long it takes
+"""
+# Todo list usage guidance
+TODO_LIST_GUIDANCE = """
+## Todo List Usage
+You have access to `write_todo` and `update_todo_list` tools. Use them strategically - not for every task.
+### When to USE the todo list:
+- **3+ distinct steps** needed to complete the task
+- **Multiple files** need to be changed
+- **User gives a list** of tasks (numbered or comma-separated)
+- **Complex feature** implementation with multiple pieces
+- **Need to track progress** across iterations or when task spans multiple tool calls
+### When to SKIP the todo list:
+- **Single focused change** (one edit, one file)
+- **Trivial fixes** (typo, add a log statement)
+- **Research/informational questions** (just answer them)
+- **Task completes in 1-2 steps** (just do it)
+### Examples:
+**Use todo list:**
+- "Implement user authentication with login, logout, and session management" → 3+ steps, multiple files
+- "Fix these 5 type errors" → list of tasks
+- "Add dark mode support across the app" → complex, multiple files
+**Skip todo list:**
+- "Fix the typo in README" → single focused change
+- "Add tool_choice parameter to this function" → one edit
+- "What files handle routing?" → informational question
+- "Update the error message here" → trivial fix
+### Usage pattern:
+1. Use `write_todo(title="...", reset=true)` to start fresh with first task
+2. Use `write_todo(title="...")` to add more tasks
+3. Use `update_todo_list(task_id="1", status="in_progress")` when starting a task
+4. Use `update_todo_list(task_id="1", status="completed")` when done
+5. Mark tasks complete IMMEDIATELY after finishing - don't batch completions
 """

emdash_core/agent/providers/base.py CHANGED Viewed

@@ -34,11 +34,13 @@ class LLMResponse:
     """Unified response from any LLM provider."""
     content: Optional[str] = None
+    thinking: Optional[str] = None  # Model's chain-of-thought reasoning
     tool_calls: list[ToolCall] = field(default_factory=list)
     raw: Any = None  # Original provider response
     stop_reason: Optional[str] = None
     input_tokens: int = 0  # Tokens in the request
     output_tokens: int = 0  # Tokens in the response
+    thinking_tokens: int = 0  # Tokens used for thinking (if available)
 class LLMProvider(ABC):
@@ -54,6 +56,7 @@ class LLMProvider(ABC):
         tools: Optional[list[dict]] = None,
         system: Optional[str] = None,
         reasoning: bool = False,
+        thinking: bool = False,
         images: Optional[list[ImageContent]] = None,
     ) -> LLMResponse:
         """Send a chat completion request.
@@ -63,6 +66,7 @@ class LLMProvider(ABC):
             tools: Optional list of tool schemas
             system: Optional system prompt (will be prepended or handled per provider)
             reasoning: Enable reasoning mode (for models that support it)
+            thinking: Enable extended thinking (for models that support it)
             images: Optional list of images for vision-capable models
         Returns:

emdash_core/agent/providers/factory.py CHANGED Viewed

@@ -13,8 +13,8 @@ from .transformers_provider import TransformersProvider
 # Configuration - Single source of truth
 # ═══════════════════════════════════════════════════════════════════════════════
-# Default model alias
-DEFAULT_MODEL = "fireworks:accounts/fireworks/models/minimax-m2p1"
+# Default model alias - uses OPENAI_BASE_URL if set
+DEFAULT_MODEL = os.environ.get("EMDASH_DEFAULT_MODEL", "fireworks:accounts/fireworks/models/minimax-m2p1")
 # Default API key environment variable (used by default model)
 DEFAULT_API_KEY_ENV = "FIREWORKS_API_KEY"

emdash_core/agent/providers/models.py CHANGED Viewed

@@ -16,6 +16,7 @@ class ChatModelSpec:
     max_output_tokens: int  # Max output tokens
     supports_tools: bool  # Whether model supports function calling
     supports_vision: bool  # Whether model supports image input
+    supports_thinking: bool  # Whether model supports extended thinking
     description: str  # Human-readable description
@@ -43,6 +44,7 @@ class ChatModel(Enum):
         max_output_tokens=32000,
         supports_tools=True,
         supports_vision=True,
+        supports_thinking=True,
         description="Claude Opus 4 - Most capable, complex reasoning",
     )
@@ -54,6 +56,7 @@ class ChatModel(Enum):
         max_output_tokens=16000,
         supports_tools=True,
         supports_vision=True,
+        supports_thinking=True,
         description="Claude Sonnet 4 - Balanced performance and cost",
     )
@@ -65,6 +68,7 @@ class ChatModel(Enum):
         max_output_tokens=8192,
         supports_tools=True,
         supports_vision=True,
+        supports_thinking=False,
         description="Claude Haiku 4.5 - Fast and efficient",
     )
@@ -80,6 +84,7 @@ class ChatModel(Enum):
         max_output_tokens=16384,
         supports_tools=True,
         supports_vision=True,
+        supports_thinking=False,
         description="GPT-4o Mini - Fast and cost-effective",
     )
@@ -95,6 +100,7 @@ class ChatModel(Enum):
         max_output_tokens=16384,
         supports_tools=True,
         supports_vision=False,
+        supports_thinking=False,
         description="GLM-4P7 - Fireworks GLM model",
     )
@@ -106,6 +112,7 @@ class ChatModel(Enum):
         max_output_tokens=16384,
         supports_tools=True,
         supports_vision=False,
+        supports_thinking=False,
         description="MiniMax M2P1 - Long context model",
     )

emdash-core 0.1.7__py3-none-any.whl → 0.1.33__py3-none-any.whl

emdash-core 0.1.7py3-none-any.whl → 0.1.33py3-none-any.whl