PyPI - emdash-core - Versions diffs - 0.1.25__py3-none-any.whl → 0.1.33__py3-none-any.whl - Mend

emdash-core 0.1.25py3-none-any.whl → 0.1.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

emdash_core/agent/__init__.py +4 -0
emdash_core/agent/events.py +42 -20
emdash_core/agent/inprocess_subagent.py +123 -10
emdash_core/agent/prompts/__init__.py +4 -3
emdash_core/agent/prompts/main_agent.py +32 -2
emdash_core/agent/prompts/plan_mode.py +236 -107
emdash_core/agent/prompts/subagents.py +79 -15
emdash_core/agent/prompts/workflow.py +145 -26
emdash_core/agent/providers/factory.py +2 -2
emdash_core/agent/providers/openai_provider.py +67 -15
emdash_core/agent/runner/__init__.py +49 -0
emdash_core/agent/runner/agent_runner.py +753 -0
emdash_core/agent/runner/context.py +451 -0
emdash_core/agent/runner/factory.py +108 -0
emdash_core/agent/runner/plan.py +217 -0
emdash_core/agent/runner/sdk_runner.py +324 -0
emdash_core/agent/runner/utils.py +67 -0
emdash_core/agent/skills.py +47 -8
emdash_core/agent/toolkit.py +46 -14
emdash_core/agent/toolkits/plan.py +9 -11
emdash_core/agent/tools/__init__.py +2 -2
emdash_core/agent/tools/coding.py +48 -4
emdash_core/agent/tools/modes.py +151 -143
emdash_core/agent/tools/task.py +41 -2
emdash_core/api/agent.py +555 -1
emdash_core/skills/frontend-design/SKILL.md +56 -0
emdash_core/sse/stream.py +4 -0
{emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/METADATA +2 -1
{emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/RECORD +31 -24
emdash_core/agent/runner.py +0 -1123
{emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/WHEEL +0 -0
{emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/entry_points.txt +0 -0

emdash_core/agent/prompts/workflow.py CHANGED Viewed

@@ -8,6 +8,46 @@ consistent behavior across agent types.
 WORKFLOW_PATTERNS = """
 ## Workflow for Complex Tasks
+### User Plan Mode Commands
+When the user explicitly asks to "enter plan mode" or says "plan mode":
+- Call `enter_plan_mode(reason="User requested to enter plan mode for task planning")`
+- This REQUIRES user approval before plan mode activates
+- Do NOT ask clarification questions instead - use the tool
+### CRITICAL: Spawn Plan Agent for Non-Trivial Tasks
+For ANY task that involves:
+- Creating new features or applications
+- Multi-file changes
+- Architectural decisions
+- Unclear or ambiguous requirements
+You MUST spawn a **Plan agent** via the `task` tool FIRST before implementing. The Plan agent will:
+1. Explore the codebase to understand patterns and architecture
+2. Design a concrete implementation plan
+3. Return the plan to you
+After receiving the plan:
+1. Write it to the plan file specified in plan mode (usually `.emdash/plan.md`) using `write_to_file`
+2. Call `exit_plan` to present for user approval
+3. After approval, implement the plan
+**Plan agent is for IMPLEMENTATION tasks** (building/changing code):
+- "Create a family expense app" → spawn Plan agent
+- "Add authentication routes" → spawn Plan agent
+- "Refactor the database layer" → spawn Plan agent
+**Plan agent is NOT for RESEARCH tasks** (reading/understanding code):
+- "Read the router and report" → use direct tools, no planning needed
+- "What files handle routing?" → use direct tools or Explore agent
+- "How does authentication work?" → use Explore agent
+- "What does this function do?" → just read and answer
+**Trivial implementation tasks** (no planning needed):
+- "Fix this typo" → just fix it
+- "Add a log statement here" → just add it
 ### 1. Understand Before Acting
 - Read code before modifying it
 - Search for similar patterns already in the codebase
@@ -15,30 +55,55 @@ WORKFLOW_PATTERNS = """
   - ONLY after exploring the codebase first - questions should be informed by research
   - ONLY one question at a time - never ask multiple questions in parallel
   - Ask the most critical question first, then continue based on the answer
+  - NEVER ask generic questions like "What platform?" without first understanding the codebase
 ### 2. Break Down Hard Problems
 When facing a task you don't immediately know how to solve:
-a) **Decompose**: Split into smaller, concrete sub-tasks
-b) **Explore**: Use sub-agents to gather context (can run in parallel)
-c) **Plan**: Write out your approach before implementing
-d) **Submit**: Use `exit_plan` tool when your plan is ready for user approval
-e) **Execute**: Work through tasks one at a time
-f) **Validate**: Check your work against requirements
-### 3. Use Sub-Agents Strategically
-Spawn sub-agents via the `task` tool when you need:
-- **Explore**: Find files, patterns, or understand code structure
-- **Plan**: Design implementation approach for complex features
-Guidelines:
-- Launch multiple Explore agents in parallel for independent searches
-- Use sub-agents for focused work that would clutter your context
-- Prefer sub-agents over doing 5+ search operations yourself
-### 4. Track Progress
-For multi-step tasks, mentally track what's done and what's next.
-Update the user on progress for long-running work.
+a) **Spawn Plan Agent**: Call `task(subagent_type="Plan", prompt="...")` to design the approach
+b) **Save Plan**: Write the returned plan to the plan file (specified in plan mode approval)
+c) **Present for Approval**: Call `exit_plan` to show the plan to the user
+d) **Execute**: After approval, implement the plan step by step
+e) **Validate**: Check your work against requirements
+### 3. Targeted vs Open-Ended Queries
+**Targeted queries** (you know what to look for) → Use direct tools:
+- "Read the router" → `glob("**/router*")` then `read_file`
+- "What's in config.ts?" → `read_file("config.ts")`
+- "Find the UserService class" → `grep("class UserService")`
+**Open-ended queries** (need to explore possibilities) → Spawn Explore agent:
+- "Where are errors handled?" → could be many places
+- "How does authentication work?" → requires understanding multiple files
+- "What is the codebase structure?" → broad exploration
+### 4. Parallel Tool Execution
+Run independent searches in parallel (single response with multiple tool calls):
+```
+# Good: parallel independent searches
+glob("**/router*")
+glob("**/pages/**/*.astro")
+→ Both run concurrently, results return together
+```
+### 5. Sub-Agent Decision Matrix
+| Task Type | Example | Sub-Agent |
+|-----------|---------|-----------|
+| **Research (open-ended)** | "How does auth work?" | Explore |
+| **Research (targeted)** | "Read the router" | None (direct tools) |
+| **Implementation (complex)** | "Add user profiles" | Plan |
+| **Implementation (trivial)** | "Fix this typo" | None (just do it) |
+**Explore agent**: Open-ended research across multiple files
+- "Where are errors handled?"
+- "What is the codebase structure?"
+**Plan agent**: Implementation tasks that modify code
+- New features, refactoring, architectural changes
+- NOT for research/reading tasks
 """
 # Exploration strategy for code navigation
@@ -62,13 +127,20 @@ Use the right tool for the job:
 | `grep` | File contents | Know exact text | `grep("def authenticate")` |
 | `semantic_search` | Conceptual meaning | Fuzzy/conceptual | `semantic_search("user login flow")` |
-**Parallel searches**: Run 2-3 searches together when exploring:
+**Parallel searches based on multiple hypotheses**:
+When you have context clues, run parallel searches for each possibility:
+```
+# Example: "read the router" in an Astro project
+glob("**/router*")         # Files with "router" in name
+glob("**/pages/**/*.astro") # Astro's file-based routing
+→ Both run in parallel, then read the relevant results
+```
+**Following imports after reading**:
+When you read a file and see an import, read that imported file to complete the picture:
 ```
-# In one response, invoke all three:
-grep("authenticate")
-grep("login")
-grep("session")
-→ All run concurrently, results return together
+# After reading src/pages/[...slug].astro which imports AppRouter
+read_file("src/components/Router.tsx")  # Follow the import
 ```
 ### Phase 3: Understand (Deep Dive)
@@ -108,6 +180,11 @@ You have enough context when you can answer:
 - What would need to change?
 Stop exploring when you can confidently describe the implementation approach.
+### CRITICAL: After Clarification → Act
+**When you receive an answer to a clarification question, your NEXT action must be implementation/planning - NOT more exploration.**
+The user answered your question. You now have what you need. Act on it.
 """
 # Output formatting guidelines
@@ -213,12 +290,14 @@ Adapt your plan structure based on these factors:
 - Detail scales with risk (logout button ≠ database migration)
 - Follow existing codebase patterns, not novel approaches
 - Mark unknowns explicitly rather than pretending certainty
+- **NEVER include time estimates** (no "Day 1-2", "Week 1", hours, days, sprints, timelines)
 ### Anti-patterns to Avoid
 - Over-planning simple tasks
 - Under-planning complex/risky ones
 - Hiding uncertainty behind confident language
 - Ignoring existing patterns in the codebase
+- Including time estimates (Days, Weeks, Sprints, etc.) - focus on WHAT, not WHEN
 Your output will be reviewed by the main agent, who will consolidate findings and submit the final plan for user approval.
 """
@@ -229,3 +308,43 @@ SIZING_GUIDELINES = """
 - NEVER include time estimates (no hours, days, weeks, sprints, timelines)
 - Focus on what needs to be done, not how long it takes
 """
+# Todo list usage guidance
+TODO_LIST_GUIDANCE = """
+## Todo List Usage
+You have access to `write_todo` and `update_todo_list` tools. Use them strategically - not for every task.
+### When to USE the todo list:
+- **3+ distinct steps** needed to complete the task
+- **Multiple files** need to be changed
+- **User gives a list** of tasks (numbered or comma-separated)
+- **Complex feature** implementation with multiple pieces
+- **Need to track progress** across iterations or when task spans multiple tool calls
+### When to SKIP the todo list:
+- **Single focused change** (one edit, one file)
+- **Trivial fixes** (typo, add a log statement)
+- **Research/informational questions** (just answer them)
+- **Task completes in 1-2 steps** (just do it)
+### Examples:
+**Use todo list:**
+- "Implement user authentication with login, logout, and session management" → 3+ steps, multiple files
+- "Fix these 5 type errors" → list of tasks
+- "Add dark mode support across the app" → complex, multiple files
+**Skip todo list:**
+- "Fix the typo in README" → single focused change
+- "Add tool_choice parameter to this function" → one edit
+- "What files handle routing?" → informational question
+- "Update the error message here" → trivial fix
+### Usage pattern:
+1. Use `write_todo(title="...", reset=true)` to start fresh with first task
+2. Use `write_todo(title="...")` to add more tasks
+3. Use `update_todo_list(task_id="1", status="in_progress")` when starting a task
+4. Use `update_todo_list(task_id="1", status="completed")` when done
+5. Mark tasks complete IMMEDIATELY after finishing - don't batch completions
+"""

emdash_core/agent/providers/factory.py CHANGED Viewed

@@ -13,8 +13,8 @@ from .transformers_provider import TransformersProvider
 # Configuration - Single source of truth
 # ═══════════════════════════════════════════════════════════════════════════════
-# Default model alias
-DEFAULT_MODEL = "fireworks:accounts/fireworks/models/minimax-m2p1"
+# Default model alias - uses OPENAI_BASE_URL if set
+DEFAULT_MODEL = os.environ.get("EMDASH_DEFAULT_MODEL", "fireworks:accounts/fireworks/models/minimax-m2p1")
 # Default API key environment variable (used by default model)
 DEFAULT_API_KEY_ENV = "FIREWORKS_API_KEY"

emdash_core/agent/providers/openai_provider.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import os
 import base64
+import time
 from typing import Optional, Union
 from openai import OpenAI
@@ -69,9 +70,9 @@ class OpenAIProvider(LLMProvider):
                 self._context_limit = 128000
                 self._provider = self._infer_provider(model)
-        # Override provider if OPENAI_BASE_URL is set (custom OpenAI-compatible API)
-        if os.environ.get("OPENAI_BASE_URL"):
-            self._provider = "openai"
+        # Note: We no longer override provider based on OPENAI_BASE_URL
+        # Each provider (fireworks, anthropic) uses its own base_url
+        # OPENAI_BASE_URL only applies to "openai" provider
         # Create OpenAI client with provider-specific configuration
         config = PROVIDER_CONFIG.get(self._provider, PROVIDER_CONFIG["openai"])
@@ -136,10 +137,24 @@ class OpenAIProvider(LLMProvider):
         self._reasoning_override = self._parse_bool_env("EMDASH_LLM_REASONING")
         self._thinking_override = self._parse_bool_env("EMDASH_LLM_THINKING")
         self._thinking_budget = int(os.environ.get("EMDASH_THINKING_BUDGET", "10000"))
+        # Reasoning effort for Fireworks thinking models: none, low, medium, high
+        self._reasoning_effort = os.environ.get("EMDASH_REASONING_EFFORT", "medium")
+        # Use OPENAI_BASE_URL env var only for OpenAI provider, otherwise use provider config
+        if self._provider == "openai":
+            base_url = os.environ.get("OPENAI_BASE_URL") or config["base_url"]
+        else:
+            base_url = config["base_url"]
+        # Configure timeout from environment (default 300 seconds / 5 minutes)
+        # LLM calls can take a while with large contexts, so we use a generous default
+        timeout_seconds = int(os.environ.get("EMDASH_LLM_TIMEOUT", "300"))
+        self._timeout = timeout_seconds
         self.client = OpenAI(
             api_key=api_key,
-            base_url=config["base_url"],
+            base_url=base_url,
+            timeout=timeout_seconds,
         )
     @staticmethod
@@ -175,13 +190,10 @@ class OpenAIProvider(LLMProvider):
     def _infer_provider(self, model: str) -> str:
         """Infer provider from model string.
-        If OPENAI_BASE_URL is set, always returns 'openai' to use the custom
-        OpenAI-compatible API endpoint with OPENAI_API_KEY.
+        Returns the appropriate provider based on model name.
+        OPENAI_BASE_URL only affects the openai provider's base URL,
+        not provider selection.
         """
-        # If custom base URL is set, use openai provider (uses OPENAI_API_KEY)
-        if os.environ.get("OPENAI_BASE_URL"):
-            return "openai"
         model_lower = model.lower()
         if "claude" in model_lower or "anthropic" in model_lower:
             return "anthropic"
@@ -231,6 +243,7 @@ class OpenAIProvider(LLMProvider):
         # Add tools if provided
         if tools:
             kwargs["tools"] = tools
+            kwargs["tool_choice"] = "auto"
         # Add reasoning support via extra_body for providers that support it
         # Skip reasoning for custom base URLs (they may not support it)
@@ -254,6 +267,17 @@ class OpenAIProvider(LLMProvider):
                 self._thinking_budget,
             )
+        # Add reasoning_effort for Fireworks thinking models
+        # This controls the depth of reasoning: none, low, medium, high
+        if thinking and self._provider == "fireworks" and self._reasoning_effort != "none":
+            kwargs["reasoning_effort"] = self._reasoning_effort
+            log.info(
+                "Reasoning effort enabled provider={} model={} effort={}",
+                self._provider,
+                self.model,
+                self._reasoning_effort,
+            )
         # Add images if provided (vision support)
         if images:
             log.info(
@@ -312,21 +336,32 @@ class OpenAIProvider(LLMProvider):
             )
         # Call OpenAI SDK
+        start_time = time.time()
         try:
             response = self.client.chat.completions.create(**kwargs)
         except Exception as exc:  # pragma: no cover - defensive logging
+            elapsed = time.time() - start_time
             status = getattr(exc, "status_code", None)
             code = getattr(exc, "code", None)
             log.exception(
-                "LLM request failed provider={} model={} status={} code={} error={}",
+                "LLM request failed provider={} model={} status={} code={} elapsed={:.1f}s error={}",
                 self._provider,
                 self.model,
                 status,
                 code,
+                elapsed,
                 exc,
             )
             raise
+        elapsed = time.time() - start_time
+        log.info(
+            "LLM request completed provider={} model={} elapsed={:.1f}s",
+            self._provider,
+            self.model,
+            elapsed,
+        )
         return self._to_llm_response(response)
     def _to_llm_response(self, response) -> LLMResponse:
@@ -374,6 +409,16 @@ class OpenAIProvider(LLMProvider):
             # Simple string content
             content = raw_content
+        # Check for reasoning_content field (Fireworks/OpenAI thinking models)
+        # This is separate from Anthropic's content blocks format
+        if not thinking and hasattr(message, "reasoning_content") and message.reasoning_content:
+            thinking = message.reasoning_content
+            log.debug(
+                "Reasoning content extracted from message.reasoning_content provider={} len={}",
+                self._provider,
+                len(thinking),
+            )
         # Extract tool calls
         tool_calls = []
         if message.tool_calls:
@@ -391,17 +436,24 @@ class OpenAIProvider(LLMProvider):
         if hasattr(response, "usage") and response.usage:
             input_tokens = getattr(response.usage, "prompt_tokens", 0) or 0
             output_tokens = getattr(response.usage, "completion_tokens", 0) or 0
-            # Anthropic returns thinking tokens in cache_creation_input_tokens or similar
-            # For now, estimate from the thinking content length
-            if thinking:
+            # Try to get reasoning/thinking tokens from the API response
+            # Different providers use different field names
+            thinking_tokens = (
+                getattr(response.usage, "reasoning_tokens", 0)
+                or getattr(response.usage, "thinking_tokens", 0)
+                or 0
+            )
+            # If no explicit thinking tokens but we have thinking content, estimate
+            if not thinking_tokens and thinking:
                 thinking_tokens = len(thinking) // 4  # Rough estimate
         if thinking:
             log.info(
-                "Extended thinking captured provider={} model={} thinking_len={}",
+                "Extended thinking captured provider={} model={} thinking_len={} thinking_tokens={}",
                 self._provider,
                 self.model,
                 len(thinking),
+                thinking_tokens,
             )
         return LLMResponse(

emdash_core/agent/runner/__init__.py ADDED Viewed

@@ -0,0 +1,49 @@
+"""Agent runner module for LLM-powered exploration.
+This module provides the AgentRunner class and related utilities for running
+LLM agents with tool access for code exploration.
+The module is organized as follows:
+- agent_runner.py: Main AgentRunner class
+- context.py: Context estimation, compaction, and management
+- plan.py: Plan approval/rejection functionality
+- utils.py: JSON encoding and utility functions
+"""
+from .agent_runner import AgentRunner
+from .sdk_runner import SDKAgentRunner, is_claude_model
+from .factory import get_runner, create_hybrid_runner
+from .utils import SafeJSONEncoder, summarize_tool_result
+from .context import (
+    estimate_context_tokens,
+    get_context_breakdown,
+    maybe_compact_context,
+    compact_messages_with_llm,
+    format_messages_for_summary,
+    get_reranked_context,
+    emit_context_frame,
+)
+from .plan import PlanMixin
+__all__ = [
+    # Main classes
+    "AgentRunner",
+    "SDKAgentRunner",
+    # Factory functions
+    "get_runner",
+    "create_hybrid_runner",
+    "is_claude_model",
+    # Utils
+    "SafeJSONEncoder",
+    "summarize_tool_result",
+    # Context functions
+    "estimate_context_tokens",
+    "get_context_breakdown",
+    "maybe_compact_context",
+    "compact_messages_with_llm",
+    "format_messages_for_summary",
+    "get_reranked_context",
+    "emit_context_frame",
+    # Plan management
+    "PlanMixin",
+]

emdash-core 0.1.25__py3-none-any.whl → 0.1.33__py3-none-any.whl

emdash-core 0.1.25py3-none-any.whl → 0.1.33py3-none-any.whl