PyPI - emdash-core - Versions diffs - 0.1.37__py3-none-any.whl → 0.1.60__py3-none-any.whl - Mend

emdash-core 0.1.37py3-none-any.whl → 0.1.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

emdash_core/agent/agents.py +9 -0
emdash_core/agent/background.py +481 -0
emdash_core/agent/inprocess_subagent.py +70 -1
emdash_core/agent/mcp/config.py +78 -2
emdash_core/agent/prompts/main_agent.py +53 -1
emdash_core/agent/prompts/plan_mode.py +65 -44
emdash_core/agent/prompts/subagents.py +73 -1
emdash_core/agent/prompts/workflow.py +179 -28
emdash_core/agent/providers/models.py +1 -1
emdash_core/agent/providers/openai_provider.py +10 -0
emdash_core/agent/research/researcher.py +154 -45
emdash_core/agent/runner/agent_runner.py +145 -19
emdash_core/agent/runner/sdk_runner.py +29 -2
emdash_core/agent/skills.py +81 -1
emdash_core/agent/toolkit.py +87 -11
emdash_core/agent/tools/__init__.py +2 -0
emdash_core/agent/tools/coding.py +344 -52
emdash_core/agent/tools/lsp.py +361 -0
emdash_core/agent/tools/skill.py +21 -1
emdash_core/agent/tools/task.py +16 -19
emdash_core/agent/tools/task_output.py +262 -32
emdash_core/agent/verifier/__init__.py +11 -0
emdash_core/agent/verifier/manager.py +295 -0
emdash_core/agent/verifier/models.py +97 -0
emdash_core/{swarm/worktree_manager.py → agent/worktree.py} +19 -1
emdash_core/api/agent.py +297 -2
emdash_core/api/research.py +3 -3
emdash_core/api/router.py +0 -4
emdash_core/context/longevity.py +197 -0
emdash_core/context/providers/explored_areas.py +83 -39
emdash_core/context/reranker.py +35 -144
emdash_core/context/simple_reranker.py +500 -0
emdash_core/context/tool_relevance.py +84 -0
emdash_core/core/config.py +8 -0
emdash_core/graph/__init__.py +8 -1
emdash_core/graph/connection.py +24 -3
emdash_core/graph/writer.py +7 -1
emdash_core/models/agent.py +10 -0
emdash_core/server.py +1 -6
emdash_core/sse/stream.py +16 -1
emdash_core/utils/__init__.py +0 -2
emdash_core/utils/git.py +103 -0
emdash_core/utils/image.py +147 -160
{emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/METADATA +6 -6
{emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/RECORD +47 -52
emdash_core/api/swarm.py +0 -223
emdash_core/db/__init__.py +0 -67
emdash_core/db/auth.py +0 -134
emdash_core/db/models.py +0 -91
emdash_core/db/provider.py +0 -222
emdash_core/db/providers/__init__.py +0 -5
emdash_core/db/providers/supabase.py +0 -452
emdash_core/swarm/__init__.py +0 -17
emdash_core/swarm/merge_agent.py +0 -383
emdash_core/swarm/session_manager.py +0 -274
emdash_core/swarm/swarm_runner.py +0 -226
emdash_core/swarm/task_definition.py +0 -137
emdash_core/swarm/worker_spawner.py +0 -319
{emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/WHEEL +0 -0
{emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/entry_points.txt +0 -0

emdash_core/agent/prompts/workflow.py CHANGED Viewed

@@ -48,35 +48,38 @@ After receiving the plan:
 - "Fix this typo" → just fix it
 - "Add a log statement here" → just add it
-### 1. Understand Before Acting
-- Read code before modifying it
-- Search for similar patterns already in the codebase
-- When requirements are ambiguous, use `ask_followup_question` tool (not text output)
-  - ONLY after exploring the codebase first - questions should be informed by research
-  - ONLY one question at a time - never ask multiple questions in parallel
-  - Ask the most critical question first, then continue based on the answer
-  - NEVER ask generic questions like "What platform?" without first understanding the codebase
-### 2. Break Down Hard Problems
-When facing a task you don't immediately know how to solve:
-a) **Spawn Plan Agent**: Call `task(subagent_type="Plan", prompt="...")` to design the approach
-b) **Save Plan**: Write the returned plan to the plan file (specified in plan mode approval)
-c) **Present for Approval**: Call `exit_plan` to show the plan to the user
-d) **Execute**: After approval, implement the plan step by step
-e) **Validate**: Check your work against requirements
-### 3. Targeted vs Open-Ended Queries
-**Targeted queries** (you know what to look for) → Use direct tools:
+### 1. Know What To Do → Plan-First, Execute
+When you understand the task and know how to approach it:
+1. State a brief plan (3-5 steps)
+2. Execute directly - don't explore "just in case"
+3. Read only the files your plan requires
+Examples:
+- "Add logout button to settings" → You know where settings is, just do it
+- "Fix the typo in README" → Just fix it
+- "Update the API endpoint" → Read it, update it, done
+### 2. Don't Know What To Do → Explore First
+When you're genuinely uncertain about the codebase or approach:
+- **Spawn Explore agent** for open-ended research across multiple files
+- **Ask ONE clarifying question** if you need user input (not multiple)
+Examples:
+- "Where are errors handled?" → Explore agent (could be many places)
+- "How does authentication work?" → Explore agent (multiple files)
+- "What framework should I use?" → Ask user (decision needed)
+### 3. Direct Tools vs Explore Agent
+**Use direct tools** when you know what to look for:
 - "Read the router" → `glob("**/router*")` then `read_file`
-- "What's in config.ts?" → `read_file("config.ts")`
-- "Find the UserService class" → `grep("class UserService")`
+- "Find UserService class" → `grep("class UserService")`
-**Open-ended queries** (need to explore possibilities) → Spawn Explore agent:
-- "Where are errors handled?" → could be many places
-- "How does authentication work?" → requires understanding multiple files
-- "What is the codebase structure?" → broad exploration
+**Spawn Explore agent** when you need broad exploration:
+- "What is the codebase structure?"
+- "How does X integrate with Y?"
 ### 4. Parallel Tool Execution
@@ -110,6 +113,34 @@ glob("**/pages/**/*.astro")
 - Spawned via `task(subagent_type="<agent-name>", prompt="...")`
 - Use the same tools as Explore agent (read-only by default)
 - Examples: security-audit, api-review, test-generator
+### 6. Iterating with Spawned Agents
+Users may want to **continue iterating** with a spawned agent's findings:
+**Follow-up patterns to recognize:**
+- "Tell me more about X" (where X was in agent's findings)
+- "Go deeper on the auth module"
+- "What about error handling there?"
+- "Can you explore that further?"
+**When user wants to iterate:**
+1. **Spawn the same agent again** with a refined prompt that builds on previous findings
+2. Include relevant context from the previous response in the new prompt
+3. Be specific about what to explore further
+**Example iteration:**
+```
+User: "spawn explore agent to find auth code"
+→ Agent finds auth in src/auth/ with 5 files
+User: "go deeper on the session handling"
+→ Spawn Explore again: "In src/auth/, analyze session handling in detail.
+   Previous exploration found auth.py, session.py, middleware.py.
+   Focus on how sessions are created, validated, and expired."
+```
+**Key principle:** The user sees the spawned agent's thinking and findings in real-time. They may want to drill down, pivot, or expand the exploration. Always be ready to spawn another agent with a more focused or expanded prompt based on what was found.
 """
 # Exploration strategy for code navigation
@@ -203,6 +234,79 @@ OUTPUT_GUIDELINES = """
 - NEVER provide time estimates (hours, days, weeks)
 """
+# Verification and self-critique after changes
+VERIFICATION_AND_CRITIQUE = """
+## Verification & Self-Critique
+After making changes, you MUST verify they work correctly. Don't assume success - prove it.
+### Verification Steps
+**1. Syntax & Build Check**
+After code changes, run the appropriate check:
+- Python: `python -m py_compile <file>` or run tests
+- TypeScript/JS: `tsc --noEmit` or `npm run build`
+- Rust: `cargo check`
+- Go: `go build`
+**2. Behavioral Verification**
+Depending on what changed:
+| Change Type | Verification |
+|-------------|--------------|
+| Moving/renaming files | Check imports still resolve, run build |
+| Refactoring functions | Run related tests, verify callers work |
+| API changes | Check all consumers updated |
+| Config changes | Restart/reload to verify config loads |
+| Database changes | Verify migrations, check queries |
+**3. Self-Critique Checklist**
+Before declaring "done", ask yourself:
+- [ ] Did I break any existing functionality?
+- [ ] Are all imports/references updated?
+- [ ] Did I introduce any regressions?
+- [ ] Would a code reviewer approve this?
+- [ ] Did I test the happy path AND edge cases?
+### Critical Scenarios Requiring Extra Verification
+**Moving/Renaming Files:**
+```
+1. Update all imports in dependent files
+2. Run build to catch broken references
+3. Grep for old path to ensure nothing was missed
+4. Run tests to verify functionality preserved
+```
+**Deleting Code:**
+```
+1. Search for usages before deleting
+2. Verify nothing depends on deleted code
+3. Run tests to catch regressions
+```
+**Changing Function Signatures:**
+```
+1. Update all callers
+2. Run type checker (if available)
+3. Run tests covering the changed function
+```
+### When Verification Fails
+If verification reveals issues:
+1. **Don't ignore it** - fix the problem
+2. **Update your todo list** - add fix tasks
+3. **Re-verify after fixing** - ensure the fix works
+4. **Learn from it** - what did you miss initially?
+### Anti-Patterns to Avoid
+- Saying "done" without running build/tests
+- Assuming refactors don't break anything
+- Skipping verification because "it's a small change"
+- Moving on when tests fail
+- Ignoring type errors or warnings
+"""
 # Parallel tool execution patterns
 PARALLEL_EXECUTION = """
 ## Parallel Tool Execution
@@ -237,6 +341,17 @@ Do this in ONE message:
 → All three run concurrently, results return together
 """
+# Plan-First rule to prevent over-exploration
+PLAN_FIRST_RULE = """
+## Plan-First Reminder
+**Know what to do?** → State a 3-5 step plan, then execute. Don't explore beyond your plan.
+**Don't know?** → Spawn Explore agent or ask ONE clarifying question.
+Trust user context - if they say "the file" or "this", they know which one.
+"""
 # Efficiency rules for sub-agents with limited turns
 EFFICIENCY_RULES = """
 ## Efficiency Rules
@@ -333,7 +448,7 @@ You have access to `write_todo` and `update_todo_list` tools. **Use them frequen
 - User can see your progress in real-time
 - Helps you stay organized on complex tasks
 - Creates a clear record of what was done
-- Prevents forgetting steps in multi-part tasks
+- **Prevents forgetting steps** - you won't accidentally stop before completing all tasks
 ### Only SKIP the todo list for:
 - **Truly trivial fixes** (single typo, one-line change)
@@ -359,6 +474,42 @@ You have access to `write_todo` and `update_todo_list` tools. **Use them frequen
 4. **Mark completed**: Use `update_todo_list(task_id="1", status="completed")` IMMEDIATELY after finishing
 5. **Never batch**: Mark each task complete right away, don't wait
+### CRITICAL: Don't stop until ALL todos are complete
+Before finishing, check your todo list. If ANY task is not marked completed, you're not done.
+If you have remaining tasks like "commit and push", DO THEM - don't just announce them.
 ### When in doubt, USE THE TODO LIST
 It's better to over-track than under-track. The user appreciates seeing progress.
 """
+# Critical rule about actions vs announcements
+ACTION_NOT_ANNOUNCEMENT = """
+## CRITICAL: Act, Don't Announce
+**NEVER say "Now let me do X" or "Let me X" without actually calling the tool in the same response.**
+When you output text without tool calls, your turn ENDS. The task stops.
+### Bad (causes task to stop incomplete):
+```
+I've completed the merge. Now let me commit and push:
+[NO TOOL CALL - TASK STOPS HERE]
+```
+### Good (actually executes the action):
+```
+I've completed the merge. Committing and pushing now.
+[execute_command: git add . && git commit -m "..." && git push]
+```
+### Rules:
+1. **If you say you'll do something, DO IT in the same response**
+2. **If you have pending todos, execute them before responding with text only**
+3. **Text-only responses signal "I'm done" - only use when truly finished**
+4. **Check your todo list before each text response - are there incomplete items?**
+### The Pattern:
+- Want to do multiple things? → Make multiple tool calls in one response
+- Have more steps? → Keep calling tools until ALL are done
+- Ready to finish? → Then and only then, respond with just text
+"""

emdash_core/agent/providers/models.py CHANGED Viewed

@@ -111,7 +111,7 @@ class ChatModel(Enum):
         context_window=1000000,
         max_output_tokens=16384,
         supports_tools=True,
-        supports_vision=False,
+        supports_vision=False,  # Fireworks deployment doesn't expose vision
         supports_thinking=False,
         description="MiniMax M2P1 - Long context model",
     )

emdash_core/agent/providers/openai_provider.py CHANGED Viewed

@@ -139,6 +139,8 @@ class OpenAIProvider(LLMProvider):
         self._thinking_budget = int(os.environ.get("EMDASH_THINKING_BUDGET", "10000"))
         # Reasoning effort for Fireworks thinking models: none, low, medium, high
         self._reasoning_effort = os.environ.get("EMDASH_REASONING_EFFORT", "medium")
+        # Parallel tool calls for OpenAI-compatible APIs (Fireworks supports this)
+        self._parallel_tool_calls = self._parse_bool_env("EMDASH_PARALLEL_TOOL_CALLS")
         # Use OPENAI_BASE_URL env var only for OpenAI provider, otherwise use provider config
         if self._provider == "openai":
@@ -244,6 +246,14 @@ class OpenAIProvider(LLMProvider):
         if tools:
             kwargs["tools"] = tools
             kwargs["tool_choice"] = "auto"
+            # Add parallel_tool_calls if enabled (Fireworks and OpenAI support this)
+            if self._parallel_tool_calls is True:
+                kwargs["parallel_tool_calls"] = True
+                log.debug(
+                    "Parallel tool calls enabled provider={} model={}",
+                    self._provider,
+                    self.model,
+                )
         # Add reasoning support via extra_body for providers that support it
         # Skip reasoning for custom base URLs (they may not support it)

emdash_core/agent/research/researcher.py CHANGED Viewed

@@ -13,6 +13,7 @@ Team values enforced:
 """
 import json
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Optional
 from rich.console import Console
@@ -129,6 +130,8 @@ class ResearcherAgent:
         questions: list[ResearchQuestion],
         context: dict,
         budget: dict,
+        parallel: bool = True,
+        max_workers: int = 3,
     ) -> tuple[list[EvidenceItem], dict]:
         """Execute appropriate macros for questions.
@@ -136,6 +139,8 @@ class ResearcherAgent:
             questions: Research questions to investigate
             context: Prior context from previous iterations
             budget: Remaining budget {tool_calls, tokens, time_s}
+            parallel: Whether to run questions in parallel (default True)
+            max_workers: Max parallel workers (default 3)
         Returns:
             Tuple of (evidence_items, updated_context)
@@ -144,67 +149,171 @@ class ResearcherAgent:
         updated_context = dict(context)
         budget_remaining = budget.get("tool_calls", 50)
-        for question in questions:
-            if budget_remaining <= 0:
-                if self.verbose:
-                    self.console.print("[yellow]Budget exhausted[/yellow]")
-                break
+        # Step 1: Bootstrap search (sequential - establishes shared context)
+        if not updated_context.get("last_search_results") and questions:
+            # Use first question's topic for bootstrap
+            first_topic = self._extract_topic(questions[0].question)
+            evidence, ctx_updates, budget_remaining = self._bootstrap_search(
+                topic=first_topic,
+                budget_remaining=budget_remaining,
+            )
+            all_evidence.extend(evidence)
+            updated_context.update(ctx_updates)
+        if budget_remaining <= 0:
             if self.verbose:
-                self.console.print(f"[dim]Investigating: {question.question}[/dim]")
-            # Extract topic from question
-            topic = self._extract_topic(question.question)
+                self.console.print("[yellow]Budget exhausted after bootstrap[/yellow]")
+            return all_evidence, updated_context
+        # Step 2: Run macro execution for each question
+        if parallel and len(questions) > 1:
+            # Parallel execution
+            evidence, ctx = self._run_questions_parallel(
+                questions=questions,
+                context=updated_context,
+                budget_remaining=budget_remaining,
+                max_workers=max_workers,
+            )
+            all_evidence.extend(evidence)
+            updated_context.update(ctx)
+        else:
+            # Sequential execution (for single question or when parallel=False)
+            for question in questions:
+                if budget_remaining <= 0:
+                    if self.verbose:
+                        self.console.print("[yellow]Budget exhausted[/yellow]")
+                    break
-            # First, try direct search to bootstrap context
-            if not updated_context.get("last_search_results"):
-                evidence, ctx_updates, budget_remaining = self._bootstrap_search(
-                    topic=topic,
+                evidence, ctx, budget_remaining = self._run_single_question(
+                    question=question,
+                    context=updated_context,
                     budget_remaining=budget_remaining,
                 )
                 all_evidence.extend(evidence)
-                updated_context.update(ctx_updates)
+                updated_context.update(ctx)
-            # Then try macros if we have context
-            if budget_remaining > 0 and updated_context.get("last_search_results"):
-                macros_to_run = question.suggested_tools or suggest_macros(
-                    question.question,
-                    include_github=self._mcp_available
-                )
+        return all_evidence, updated_context
-                for macro_name in macros_to_run:
-                    if budget_remaining <= 0:
-                        break
+    def _run_single_question(
+        self,
+        question: ResearchQuestion,
+        context: dict,
+        budget_remaining: int,
+    ) -> tuple[list[EvidenceItem], dict, int]:
+        """Run macros for a single question.
-                    if macro_name not in TOOL_MACROS:
-                        continue
+        Args:
+            question: Research question to investigate
+            context: Current context
+            budget_remaining: Remaining budget
-                    params = {"topic": topic, "symbol": topic}
+        Returns:
+            Tuple of (evidence, context_updates, remaining_budget)
+        """
+        evidence: list[EvidenceItem] = []
+        ctx_updates: dict = {}
-                    if "last_search_top" in updated_context:
-                        top_result = updated_context["last_search_top"]
-                        params["identifier"] = top_result.get("qualified_name", topic)
+        if self.verbose:
+            self.console.print(f"[dim]Investigating: {question.question}[/dim]")
-                    try:
-                        evidence, ctx_updates = self.macro_executor.execute_macro(
-                            macro_name=macro_name,
-                            params=params,
-                            budget_remaining=budget_remaining,
-                            prior_context=updated_context,
-                        )
+        topic = self._extract_topic(question.question)
+        if budget_remaining > 0 and context.get("last_search_results"):
+            macros_to_run = question.suggested_tools or suggest_macros(
+                question.question,
+                include_github=self._mcp_available
+            )
+            for macro_name in macros_to_run:
+                if budget_remaining <= 0:
+                    break
-                        all_evidence.extend(evidence)
-                        updated_context.update(ctx_updates)
-                        budget_remaining -= len(evidence)
+                if macro_name not in TOOL_MACROS:
+                    continue
-                        if self.verbose:
-                            self.console.print(f"  [green]{macro_name}: {len(evidence)} evidence[/green]")
+                params = {"topic": topic, "symbol": topic}
-                    except Exception as e:
-                        if self.verbose:
-                            self.console.print(f"  [red]{macro_name}: {e}[/red]")
+                if "last_search_top" in context:
+                    top_result = context["last_search_top"]
+                    params["identifier"] = top_result.get("qualified_name", topic)
-        return all_evidence, updated_context
+                try:
+                    ev, ctx = self.macro_executor.execute_macro(
+                        macro_name=macro_name,
+                        params=params,
+                        budget_remaining=budget_remaining,
+                        prior_context=context,
+                    )
+                    evidence.extend(ev)
+                    ctx_updates.update(ctx)
+                    budget_remaining -= len(ev)
+                    if self.verbose:
+                        self.console.print(f"  [green]{macro_name}: {len(ev)} evidence[/green]")
+                except Exception as e:
+                    if self.verbose:
+                        self.console.print(f"  [red]{macro_name}: {e}[/red]")
+        return evidence, ctx_updates, budget_remaining
+    def _run_questions_parallel(
+        self,
+        questions: list[ResearchQuestion],
+        context: dict,
+        budget_remaining: int,
+        max_workers: int = 3,
+    ) -> tuple[list[EvidenceItem], dict]:
+        """Run multiple questions in parallel using thread pool.
+        Args:
+            questions: Research questions to investigate
+            context: Shared context from bootstrap
+            budget_remaining: Total remaining budget
+            max_workers: Max parallel workers
+        Returns:
+            Tuple of (all_evidence, merged_context)
+        """
+        all_evidence: list[EvidenceItem] = []
+        merged_context: dict = {}
+        # Distribute budget among questions (with some buffer)
+        budget_per_question = max(5, budget_remaining // len(questions))
+        if self.verbose:
+            self.console.print(
+                f"[dim]Running {len(questions)} questions in parallel "
+                f"({max_workers} workers, {budget_per_question} budget each)[/dim]"
+            )
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all questions
+            future_to_question = {
+                executor.submit(
+                    self._run_single_question,
+                    question=q,
+                    context=context,
+                    budget_remaining=budget_per_question,
+                ): q
+                for q in questions
+            }
+            # Collect results as they complete
+            for future in as_completed(future_to_question):
+                question = future_to_question[future]
+                try:
+                    evidence, ctx_updates, _ = future.result()
+                    all_evidence.extend(evidence)
+                    merged_context.update(ctx_updates)
+                except Exception as e:
+                    if self.verbose:
+                        self.console.print(
+                            f"[red]Question failed: {question.question[:50]}... - {e}[/red]"
+                        )
+        return all_evidence, merged_context
     def _bootstrap_search(
         self,

emdash-core 0.1.37__py3-none-any.whl → 0.1.60__py3-none-any.whl

emdash-core 0.1.37py3-none-any.whl → 0.1.60py3-none-any.whl