emdash-core 0.1.25__py3-none-any.whl → 0.1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. emdash_core/agent/__init__.py +4 -0
  2. emdash_core/agent/events.py +42 -20
  3. emdash_core/agent/inprocess_subagent.py +123 -10
  4. emdash_core/agent/prompts/__init__.py +4 -3
  5. emdash_core/agent/prompts/main_agent.py +32 -2
  6. emdash_core/agent/prompts/plan_mode.py +236 -107
  7. emdash_core/agent/prompts/subagents.py +79 -15
  8. emdash_core/agent/prompts/workflow.py +145 -26
  9. emdash_core/agent/providers/factory.py +2 -2
  10. emdash_core/agent/providers/openai_provider.py +67 -15
  11. emdash_core/agent/runner/__init__.py +49 -0
  12. emdash_core/agent/runner/agent_runner.py +753 -0
  13. emdash_core/agent/runner/context.py +451 -0
  14. emdash_core/agent/runner/factory.py +108 -0
  15. emdash_core/agent/runner/plan.py +217 -0
  16. emdash_core/agent/runner/sdk_runner.py +324 -0
  17. emdash_core/agent/runner/utils.py +67 -0
  18. emdash_core/agent/skills.py +47 -8
  19. emdash_core/agent/toolkit.py +46 -14
  20. emdash_core/agent/toolkits/plan.py +9 -11
  21. emdash_core/agent/tools/__init__.py +2 -2
  22. emdash_core/agent/tools/coding.py +48 -4
  23. emdash_core/agent/tools/modes.py +151 -143
  24. emdash_core/agent/tools/task.py +41 -2
  25. emdash_core/api/agent.py +555 -1
  26. emdash_core/skills/frontend-design/SKILL.md +56 -0
  27. emdash_core/sse/stream.py +4 -0
  28. {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/METADATA +2 -1
  29. {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/RECORD +31 -24
  30. emdash_core/agent/runner.py +0 -1123
  31. {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/WHEEL +0 -0
  32. {emdash_core-0.1.25.dist-info → emdash_core-0.1.33.dist-info}/entry_points.txt +0 -0
@@ -8,6 +8,46 @@ consistent behavior across agent types.
8
8
  WORKFLOW_PATTERNS = """
9
9
  ## Workflow for Complex Tasks
10
10
 
11
+ ### User Plan Mode Commands
12
+
13
+ When the user explicitly asks to "enter plan mode" or says "plan mode":
14
+ - Call `enter_plan_mode(reason="User requested to enter plan mode for task planning")`
15
+ - This REQUIRES user approval before plan mode activates
16
+ - Do NOT ask clarification questions instead - use the tool
17
+
18
+ ### CRITICAL: Spawn Plan Agent for Non-Trivial Tasks
19
+
20
+ For ANY task that involves:
21
+ - Creating new features or applications
22
+ - Multi-file changes
23
+ - Architectural decisions
24
+ - Unclear or ambiguous requirements
25
+
26
+ You MUST spawn a **Plan agent** via the `task` tool FIRST before implementing. The Plan agent will:
27
+ 1. Explore the codebase to understand patterns and architecture
28
+ 2. Design a concrete implementation plan
29
+ 3. Return the plan to you
30
+
31
+ After receiving the plan:
32
+ 1. Write it to the plan file specified in plan mode (usually `.emdash/plan.md`) using `write_to_file`
33
+ 2. Call `exit_plan` to present for user approval
34
+ 3. After approval, implement the plan
35
+
36
+ **Plan agent is for IMPLEMENTATION tasks** (building/changing code):
37
+ - "Create a family expense app" → spawn Plan agent
38
+ - "Add authentication routes" → spawn Plan agent
39
+ - "Refactor the database layer" → spawn Plan agent
40
+
41
+ **Plan agent is NOT for RESEARCH tasks** (reading/understanding code):
42
+ - "Read the router and report" → use direct tools, no planning needed
43
+ - "What files handle routing?" → use direct tools or Explore agent
44
+ - "How does authentication work?" → use Explore agent
45
+ - "What does this function do?" → just read and answer
46
+
47
+ **Trivial implementation tasks** (no planning needed):
48
+ - "Fix this typo" → just fix it
49
+ - "Add a log statement here" → just add it
50
+
11
51
  ### 1. Understand Before Acting
12
52
  - Read code before modifying it
13
53
  - Search for similar patterns already in the codebase
@@ -15,30 +55,55 @@ WORKFLOW_PATTERNS = """
15
55
  - ONLY after exploring the codebase first - questions should be informed by research
16
56
  - ONLY one question at a time - never ask multiple questions in parallel
17
57
  - Ask the most critical question first, then continue based on the answer
58
+ - NEVER ask generic questions like "What platform?" without first understanding the codebase
18
59
 
19
60
  ### 2. Break Down Hard Problems
20
61
  When facing a task you don't immediately know how to solve:
21
62
 
22
- a) **Decompose**: Split into smaller, concrete sub-tasks
23
- b) **Explore**: Use sub-agents to gather context (can run in parallel)
24
- c) **Plan**: Write out your approach before implementing
25
- d) **Submit**: Use `exit_plan` tool when your plan is ready for user approval
26
- e) **Execute**: Work through tasks one at a time
27
- f) **Validate**: Check your work against requirements
28
-
29
- ### 3. Use Sub-Agents Strategically
30
- Spawn sub-agents via the `task` tool when you need:
31
- - **Explore**: Find files, patterns, or understand code structure
32
- - **Plan**: Design implementation approach for complex features
33
-
34
- Guidelines:
35
- - Launch multiple Explore agents in parallel for independent searches
36
- - Use sub-agents for focused work that would clutter your context
37
- - Prefer sub-agents over doing 5+ search operations yourself
38
-
39
- ### 4. Track Progress
40
- For multi-step tasks, mentally track what's done and what's next.
41
- Update the user on progress for long-running work.
63
+ a) **Spawn Plan Agent**: Call `task(subagent_type="Plan", prompt="...")` to design the approach
64
+ b) **Save Plan**: Write the returned plan to the plan file (specified in plan mode approval)
65
+ c) **Present for Approval**: Call `exit_plan` to show the plan to the user
66
+ d) **Execute**: After approval, implement the plan step by step
67
+ e) **Validate**: Check your work against requirements
68
+
69
+ ### 3. Targeted vs Open-Ended Queries
70
+
71
+ **Targeted queries** (you know what to look for) → Use direct tools:
72
+ - "Read the router" `glob("**/router*")` then `read_file`
73
+ - "What's in config.ts?" `read_file("config.ts")`
74
+ - "Find the UserService class" → `grep("class UserService")`
75
+
76
+ **Open-ended queries** (need to explore possibilities) Spawn Explore agent:
77
+ - "Where are errors handled?" could be many places
78
+ - "How does authentication work?" requires understanding multiple files
79
+ - "What is the codebase structure?" → broad exploration
80
+
81
+ ### 4. Parallel Tool Execution
82
+
83
+ Run independent searches in parallel (single response with multiple tool calls):
84
+ ```
85
+ # Good: parallel independent searches
86
+ glob("**/router*")
87
+ glob("**/pages/**/*.astro")
88
+ → Both run concurrently, results return together
89
+ ```
90
+
91
+ ### 5. Sub-Agent Decision Matrix
92
+
93
+ | Task Type | Example | Sub-Agent |
94
+ |-----------|---------|-----------|
95
+ | **Research (open-ended)** | "How does auth work?" | Explore |
96
+ | **Research (targeted)** | "Read the router" | None (direct tools) |
97
+ | **Implementation (complex)** | "Add user profiles" | Plan |
98
+ | **Implementation (trivial)** | "Fix this typo" | None (just do it) |
99
+
100
+ **Explore agent**: Open-ended research across multiple files
101
+ - "Where are errors handled?"
102
+ - "What is the codebase structure?"
103
+
104
+ **Plan agent**: Implementation tasks that modify code
105
+ - New features, refactoring, architectural changes
106
+ - NOT for research/reading tasks
42
107
  """
43
108
 
44
109
  # Exploration strategy for code navigation
@@ -62,13 +127,20 @@ Use the right tool for the job:
62
127
  | `grep` | File contents | Know exact text | `grep("def authenticate")` |
63
128
  | `semantic_search` | Conceptual meaning | Fuzzy/conceptual | `semantic_search("user login flow")` |
64
129
 
65
- **Parallel searches**: Run 2-3 searches together when exploring:
130
+ **Parallel searches based on multiple hypotheses**:
131
+ When you have context clues, run parallel searches for each possibility:
132
+ ```
133
+ # Example: "read the router" in an Astro project
134
+ glob("**/router*") # Files with "router" in name
135
+ glob("**/pages/**/*.astro") # Astro's file-based routing
136
+ → Both run in parallel, then read the relevant results
137
+ ```
138
+
139
+ **Following imports after reading**:
140
+ When you read a file and see an import, read that imported file to complete the picture:
66
141
  ```
67
- # In one response, invoke all three:
68
- grep("authenticate")
69
- grep("login")
70
- grep("session")
71
- → All run concurrently, results return together
142
+ # After reading src/pages/[...slug].astro which imports AppRouter
143
+ read_file("src/components/Router.tsx") # Follow the import
72
144
  ```
73
145
 
74
146
  ### Phase 3: Understand (Deep Dive)
@@ -108,6 +180,11 @@ You have enough context when you can answer:
108
180
  - What would need to change?
109
181
 
110
182
  Stop exploring when you can confidently describe the implementation approach.
183
+
184
+ ### CRITICAL: After Clarification → Act
185
+ **When you receive an answer to a clarification question, your NEXT action must be implementation/planning - NOT more exploration.**
186
+
187
+ The user answered your question. You now have what you need. Act on it.
111
188
  """
112
189
 
113
190
  # Output formatting guidelines
@@ -213,12 +290,14 @@ Adapt your plan structure based on these factors:
213
290
  - Detail scales with risk (logout button ≠ database migration)
214
291
  - Follow existing codebase patterns, not novel approaches
215
292
  - Mark unknowns explicitly rather than pretending certainty
293
+ - **NEVER include time estimates** (no "Day 1-2", "Week 1", hours, days, sprints, timelines)
216
294
 
217
295
  ### Anti-patterns to Avoid
218
296
  - Over-planning simple tasks
219
297
  - Under-planning complex/risky ones
220
298
  - Hiding uncertainty behind confident language
221
299
  - Ignoring existing patterns in the codebase
300
+ - Including time estimates (Days, Weeks, Sprints, etc.) - focus on WHAT, not WHEN
222
301
 
223
302
  Your output will be reviewed by the main agent, who will consolidate findings and submit the final plan for user approval.
224
303
  """
@@ -229,3 +308,43 @@ SIZING_GUIDELINES = """
229
308
  - NEVER include time estimates (no hours, days, weeks, sprints, timelines)
230
309
  - Focus on what needs to be done, not how long it takes
231
310
  """
311
+
312
+ # Todo list usage guidance
313
+ TODO_LIST_GUIDANCE = """
314
+ ## Todo List Usage
315
+
316
+ You have access to `write_todo` and `update_todo_list` tools. Use them strategically - not for every task.
317
+
318
+ ### When to USE the todo list:
319
+ - **3+ distinct steps** needed to complete the task
320
+ - **Multiple files** need to be changed
321
+ - **User gives a list** of tasks (numbered or comma-separated)
322
+ - **Complex feature** implementation with multiple pieces
323
+ - **Need to track progress** across iterations or when task spans multiple tool calls
324
+
325
+ ### When to SKIP the todo list:
326
+ - **Single focused change** (one edit, one file)
327
+ - **Trivial fixes** (typo, add a log statement)
328
+ - **Research/informational questions** (just answer them)
329
+ - **Task completes in 1-2 steps** (just do it)
330
+
331
+ ### Examples:
332
+
333
+ **Use todo list:**
334
+ - "Implement user authentication with login, logout, and session management" → 3+ steps, multiple files
335
+ - "Fix these 5 type errors" → list of tasks
336
+ - "Add dark mode support across the app" → complex, multiple files
337
+
338
+ **Skip todo list:**
339
+ - "Fix the typo in README" → single focused change
340
+ - "Add tool_choice parameter to this function" → one edit
341
+ - "What files handle routing?" → informational question
342
+ - "Update the error message here" → trivial fix
343
+
344
+ ### Usage pattern:
345
+ 1. Use `write_todo(title="...", reset=true)` to start fresh with first task
346
+ 2. Use `write_todo(title="...")` to add more tasks
347
+ 3. Use `update_todo_list(task_id="1", status="in_progress")` when starting a task
348
+ 4. Use `update_todo_list(task_id="1", status="completed")` when done
349
+ 5. Mark tasks complete IMMEDIATELY after finishing - don't batch completions
350
+ """
@@ -13,8 +13,8 @@ from .transformers_provider import TransformersProvider
13
13
  # Configuration - Single source of truth
14
14
  # ═══════════════════════════════════════════════════════════════════════════════
15
15
 
16
- # Default model alias
17
- DEFAULT_MODEL = "fireworks:accounts/fireworks/models/minimax-m2p1"
16
+ # Default model alias - uses OPENAI_BASE_URL if set
17
+ DEFAULT_MODEL = os.environ.get("EMDASH_DEFAULT_MODEL", "fireworks:accounts/fireworks/models/minimax-m2p1")
18
18
 
19
19
  # Default API key environment variable (used by default model)
20
20
  DEFAULT_API_KEY_ENV = "FIREWORKS_API_KEY"
@@ -2,6 +2,7 @@
2
2
 
3
3
  import os
4
4
  import base64
5
+ import time
5
6
  from typing import Optional, Union
6
7
 
7
8
  from openai import OpenAI
@@ -69,9 +70,9 @@ class OpenAIProvider(LLMProvider):
69
70
  self._context_limit = 128000
70
71
  self._provider = self._infer_provider(model)
71
72
 
72
- # Override provider if OPENAI_BASE_URL is set (custom OpenAI-compatible API)
73
- if os.environ.get("OPENAI_BASE_URL"):
74
- self._provider = "openai"
73
+ # Note: We no longer override provider based on OPENAI_BASE_URL
74
+ # Each provider (fireworks, anthropic) uses its own base_url
75
+ # OPENAI_BASE_URL only applies to "openai" provider
75
76
 
76
77
  # Create OpenAI client with provider-specific configuration
77
78
  config = PROVIDER_CONFIG.get(self._provider, PROVIDER_CONFIG["openai"])
@@ -136,10 +137,24 @@ class OpenAIProvider(LLMProvider):
136
137
  self._reasoning_override = self._parse_bool_env("EMDASH_LLM_REASONING")
137
138
  self._thinking_override = self._parse_bool_env("EMDASH_LLM_THINKING")
138
139
  self._thinking_budget = int(os.environ.get("EMDASH_THINKING_BUDGET", "10000"))
140
+ # Reasoning effort for Fireworks thinking models: none, low, medium, high
141
+ self._reasoning_effort = os.environ.get("EMDASH_REASONING_EFFORT", "medium")
142
+
143
+ # Use OPENAI_BASE_URL env var only for OpenAI provider, otherwise use provider config
144
+ if self._provider == "openai":
145
+ base_url = os.environ.get("OPENAI_BASE_URL") or config["base_url"]
146
+ else:
147
+ base_url = config["base_url"]
148
+
149
+ # Configure timeout from environment (default 300 seconds / 5 minutes)
150
+ # LLM calls can take a while with large contexts, so we use a generous default
151
+ timeout_seconds = int(os.environ.get("EMDASH_LLM_TIMEOUT", "300"))
152
+ self._timeout = timeout_seconds
139
153
 
140
154
  self.client = OpenAI(
141
155
  api_key=api_key,
142
- base_url=config["base_url"],
156
+ base_url=base_url,
157
+ timeout=timeout_seconds,
143
158
  )
144
159
 
145
160
  @staticmethod
@@ -175,13 +190,10 @@ class OpenAIProvider(LLMProvider):
175
190
  def _infer_provider(self, model: str) -> str:
176
191
  """Infer provider from model string.
177
192
 
178
- If OPENAI_BASE_URL is set, always returns 'openai' to use the custom
179
- OpenAI-compatible API endpoint with OPENAI_API_KEY.
193
+ Returns the appropriate provider based on model name.
194
+ OPENAI_BASE_URL only affects the openai provider's base URL,
195
+ not provider selection.
180
196
  """
181
- # If custom base URL is set, use openai provider (uses OPENAI_API_KEY)
182
- if os.environ.get("OPENAI_BASE_URL"):
183
- return "openai"
184
-
185
197
  model_lower = model.lower()
186
198
  if "claude" in model_lower or "anthropic" in model_lower:
187
199
  return "anthropic"
@@ -231,6 +243,7 @@ class OpenAIProvider(LLMProvider):
231
243
  # Add tools if provided
232
244
  if tools:
233
245
  kwargs["tools"] = tools
246
+ kwargs["tool_choice"] = "auto"
234
247
 
235
248
  # Add reasoning support via extra_body for providers that support it
236
249
  # Skip reasoning for custom base URLs (they may not support it)
@@ -254,6 +267,17 @@ class OpenAIProvider(LLMProvider):
254
267
  self._thinking_budget,
255
268
  )
256
269
 
270
+ # Add reasoning_effort for Fireworks thinking models
271
+ # This controls the depth of reasoning: none, low, medium, high
272
+ if thinking and self._provider == "fireworks" and self._reasoning_effort != "none":
273
+ kwargs["reasoning_effort"] = self._reasoning_effort
274
+ log.info(
275
+ "Reasoning effort enabled provider={} model={} effort={}",
276
+ self._provider,
277
+ self.model,
278
+ self._reasoning_effort,
279
+ )
280
+
257
281
  # Add images if provided (vision support)
258
282
  if images:
259
283
  log.info(
@@ -312,21 +336,32 @@ class OpenAIProvider(LLMProvider):
312
336
  )
313
337
 
314
338
  # Call OpenAI SDK
339
+ start_time = time.time()
315
340
  try:
316
341
  response = self.client.chat.completions.create(**kwargs)
317
342
  except Exception as exc: # pragma: no cover - defensive logging
343
+ elapsed = time.time() - start_time
318
344
  status = getattr(exc, "status_code", None)
319
345
  code = getattr(exc, "code", None)
320
346
  log.exception(
321
- "LLM request failed provider={} model={} status={} code={} error={}",
347
+ "LLM request failed provider={} model={} status={} code={} elapsed={:.1f}s error={}",
322
348
  self._provider,
323
349
  self.model,
324
350
  status,
325
351
  code,
352
+ elapsed,
326
353
  exc,
327
354
  )
328
355
  raise
329
356
 
357
+ elapsed = time.time() - start_time
358
+ log.info(
359
+ "LLM request completed provider={} model={} elapsed={:.1f}s",
360
+ self._provider,
361
+ self.model,
362
+ elapsed,
363
+ )
364
+
330
365
  return self._to_llm_response(response)
331
366
 
332
367
  def _to_llm_response(self, response) -> LLMResponse:
@@ -374,6 +409,16 @@ class OpenAIProvider(LLMProvider):
374
409
  # Simple string content
375
410
  content = raw_content
376
411
 
412
+ # Check for reasoning_content field (Fireworks/OpenAI thinking models)
413
+ # This is separate from Anthropic's content blocks format
414
+ if not thinking and hasattr(message, "reasoning_content") and message.reasoning_content:
415
+ thinking = message.reasoning_content
416
+ log.debug(
417
+ "Reasoning content extracted from message.reasoning_content provider={} len={}",
418
+ self._provider,
419
+ len(thinking),
420
+ )
421
+
377
422
  # Extract tool calls
378
423
  tool_calls = []
379
424
  if message.tool_calls:
@@ -391,17 +436,24 @@ class OpenAIProvider(LLMProvider):
391
436
  if hasattr(response, "usage") and response.usage:
392
437
  input_tokens = getattr(response.usage, "prompt_tokens", 0) or 0
393
438
  output_tokens = getattr(response.usage, "completion_tokens", 0) or 0
394
- # Anthropic returns thinking tokens in cache_creation_input_tokens or similar
395
- # For now, estimate from the thinking content length
396
- if thinking:
439
+ # Try to get reasoning/thinking tokens from the API response
440
+ # Different providers use different field names
441
+ thinking_tokens = (
442
+ getattr(response.usage, "reasoning_tokens", 0)
443
+ or getattr(response.usage, "thinking_tokens", 0)
444
+ or 0
445
+ )
446
+ # If no explicit thinking tokens but we have thinking content, estimate
447
+ if not thinking_tokens and thinking:
397
448
  thinking_tokens = len(thinking) // 4 # Rough estimate
398
449
 
399
450
  if thinking:
400
451
  log.info(
401
- "Extended thinking captured provider={} model={} thinking_len={}",
452
+ "Extended thinking captured provider={} model={} thinking_len={} thinking_tokens={}",
402
453
  self._provider,
403
454
  self.model,
404
455
  len(thinking),
456
+ thinking_tokens,
405
457
  )
406
458
 
407
459
  return LLMResponse(
@@ -0,0 +1,49 @@
1
+ """Agent runner module for LLM-powered exploration.
2
+
3
+ This module provides the AgentRunner class and related utilities for running
4
+ LLM agents with tool access for code exploration.
5
+
6
+ The module is organized as follows:
7
+ - agent_runner.py: Main AgentRunner class
8
+ - context.py: Context estimation, compaction, and management
9
+ - plan.py: Plan approval/rejection functionality
10
+ - utils.py: JSON encoding and utility functions
11
+ """
12
+
13
+ from .agent_runner import AgentRunner
14
+ from .sdk_runner import SDKAgentRunner, is_claude_model
15
+ from .factory import get_runner, create_hybrid_runner
16
+ from .utils import SafeJSONEncoder, summarize_tool_result
17
+ from .context import (
18
+ estimate_context_tokens,
19
+ get_context_breakdown,
20
+ maybe_compact_context,
21
+ compact_messages_with_llm,
22
+ format_messages_for_summary,
23
+ get_reranked_context,
24
+ emit_context_frame,
25
+ )
26
+ from .plan import PlanMixin
27
+
28
+ __all__ = [
29
+ # Main classes
30
+ "AgentRunner",
31
+ "SDKAgentRunner",
32
+ # Factory functions
33
+ "get_runner",
34
+ "create_hybrid_runner",
35
+ "is_claude_model",
36
+ # Utils
37
+ "SafeJSONEncoder",
38
+ "summarize_tool_result",
39
+ # Context functions
40
+ "estimate_context_tokens",
41
+ "get_context_breakdown",
42
+ "maybe_compact_context",
43
+ "compact_messages_with_llm",
44
+ "format_messages_for_summary",
45
+ "get_reranked_context",
46
+ "emit_context_frame",
47
+ # Plan management
48
+ "PlanMixin",
49
+ ]