emdash-core 0.1.33__py3-none-any.whl → 0.1.60__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. emdash_core/agent/agents.py +93 -23
  2. emdash_core/agent/background.py +481 -0
  3. emdash_core/agent/hooks.py +419 -0
  4. emdash_core/agent/inprocess_subagent.py +114 -10
  5. emdash_core/agent/mcp/config.py +78 -2
  6. emdash_core/agent/prompts/main_agent.py +88 -1
  7. emdash_core/agent/prompts/plan_mode.py +65 -44
  8. emdash_core/agent/prompts/subagents.py +96 -8
  9. emdash_core/agent/prompts/workflow.py +215 -50
  10. emdash_core/agent/providers/models.py +1 -1
  11. emdash_core/agent/providers/openai_provider.py +10 -0
  12. emdash_core/agent/research/researcher.py +154 -45
  13. emdash_core/agent/runner/agent_runner.py +157 -19
  14. emdash_core/agent/runner/context.py +28 -9
  15. emdash_core/agent/runner/sdk_runner.py +29 -2
  16. emdash_core/agent/skills.py +81 -1
  17. emdash_core/agent/toolkit.py +87 -11
  18. emdash_core/agent/toolkits/__init__.py +117 -18
  19. emdash_core/agent/toolkits/base.py +87 -2
  20. emdash_core/agent/toolkits/explore.py +18 -0
  21. emdash_core/agent/toolkits/plan.py +18 -0
  22. emdash_core/agent/tools/__init__.py +2 -0
  23. emdash_core/agent/tools/coding.py +344 -52
  24. emdash_core/agent/tools/lsp.py +361 -0
  25. emdash_core/agent/tools/skill.py +21 -1
  26. emdash_core/agent/tools/task.py +27 -23
  27. emdash_core/agent/tools/task_output.py +262 -32
  28. emdash_core/agent/verifier/__init__.py +11 -0
  29. emdash_core/agent/verifier/manager.py +295 -0
  30. emdash_core/agent/verifier/models.py +97 -0
  31. emdash_core/{swarm/worktree_manager.py → agent/worktree.py} +19 -1
  32. emdash_core/api/agent.py +451 -5
  33. emdash_core/api/research.py +3 -3
  34. emdash_core/api/router.py +0 -4
  35. emdash_core/context/longevity.py +197 -0
  36. emdash_core/context/providers/explored_areas.py +83 -39
  37. emdash_core/context/reranker.py +35 -144
  38. emdash_core/context/simple_reranker.py +500 -0
  39. emdash_core/context/tool_relevance.py +84 -0
  40. emdash_core/core/config.py +8 -0
  41. emdash_core/graph/__init__.py +8 -1
  42. emdash_core/graph/connection.py +24 -3
  43. emdash_core/graph/writer.py +7 -1
  44. emdash_core/ingestion/repository.py +17 -198
  45. emdash_core/models/agent.py +14 -0
  46. emdash_core/server.py +1 -6
  47. emdash_core/sse/stream.py +16 -1
  48. emdash_core/utils/__init__.py +0 -2
  49. emdash_core/utils/git.py +103 -0
  50. emdash_core/utils/image.py +147 -160
  51. {emdash_core-0.1.33.dist-info → emdash_core-0.1.60.dist-info}/METADATA +7 -5
  52. {emdash_core-0.1.33.dist-info → emdash_core-0.1.60.dist-info}/RECORD +54 -58
  53. emdash_core/api/swarm.py +0 -223
  54. emdash_core/db/__init__.py +0 -67
  55. emdash_core/db/auth.py +0 -134
  56. emdash_core/db/models.py +0 -91
  57. emdash_core/db/provider.py +0 -222
  58. emdash_core/db/providers/__init__.py +0 -5
  59. emdash_core/db/providers/supabase.py +0 -452
  60. emdash_core/swarm/__init__.py +0 -17
  61. emdash_core/swarm/merge_agent.py +0 -383
  62. emdash_core/swarm/session_manager.py +0 -274
  63. emdash_core/swarm/swarm_runner.py +0 -226
  64. emdash_core/swarm/task_definition.py +0 -137
  65. emdash_core/swarm/worker_spawner.py +0 -319
  66. {emdash_core-0.1.33.dist-info → emdash_core-0.1.60.dist-info}/WHEEL +0 -0
  67. {emdash_core-0.1.33.dist-info → emdash_core-0.1.60.dist-info}/entry_points.txt +0 -0
@@ -48,35 +48,38 @@ After receiving the plan:
48
48
  - "Fix this typo" → just fix it
49
49
  - "Add a log statement here" → just add it
50
50
 
51
- ### 1. Understand Before Acting
52
- - Read code before modifying it
53
- - Search for similar patterns already in the codebase
54
- - When requirements are ambiguous, use `ask_followup_question` tool (not text output)
55
- - ONLY after exploring the codebase first - questions should be informed by research
56
- - ONLY one question at a time - never ask multiple questions in parallel
57
- - Ask the most critical question first, then continue based on the answer
58
- - NEVER ask generic questions like "What platform?" without first understanding the codebase
59
-
60
- ### 2. Break Down Hard Problems
61
- When facing a task you don't immediately know how to solve:
62
-
63
- a) **Spawn Plan Agent**: Call `task(subagent_type="Plan", prompt="...")` to design the approach
64
- b) **Save Plan**: Write the returned plan to the plan file (specified in plan mode approval)
65
- c) **Present for Approval**: Call `exit_plan` to show the plan to the user
66
- d) **Execute**: After approval, implement the plan step by step
67
- e) **Validate**: Check your work against requirements
68
-
69
- ### 3. Targeted vs Open-Ended Queries
70
-
71
- **Targeted queries** (you know what to look for) Use direct tools:
51
+ ### 1. Know What To Do → Plan-First, Execute
52
+
53
+ When you understand the task and know how to approach it:
54
+ 1. State a brief plan (3-5 steps)
55
+ 2. Execute directly - don't explore "just in case"
56
+ 3. Read only the files your plan requires
57
+
58
+ Examples:
59
+ - "Add logout button to settings" → You know where settings is, just do it
60
+ - "Fix the typo in README" → Just fix it
61
+ - "Update the API endpoint" Read it, update it, done
62
+
63
+ ### 2. Don't Know What To Do Explore First
64
+
65
+ When you're genuinely uncertain about the codebase or approach:
66
+ - **Spawn Explore agent** for open-ended research across multiple files
67
+ - **Ask ONE clarifying question** if you need user input (not multiple)
68
+
69
+ Examples:
70
+ - "Where are errors handled?" → Explore agent (could be many places)
71
+ - "How does authentication work?" Explore agent (multiple files)
72
+ - "What framework should I use?" → Ask user (decision needed)
73
+
74
+ ### 3. Direct Tools vs Explore Agent
75
+
76
+ **Use direct tools** when you know what to look for:
72
77
  - "Read the router" → `glob("**/router*")` then `read_file`
73
- - "What's in config.ts?" → `read_file("config.ts")`
74
- - "Find the UserService class" → `grep("class UserService")`
78
+ - "Find UserService class" → `grep("class UserService")`
75
79
 
76
- **Open-ended queries** (need to explore possibilities) → Spawn Explore agent:
77
- - "Where are errors handled?" → could be many places
78
- - "How does authentication work?" → requires understanding multiple files
79
- - "What is the codebase structure?" → broad exploration
80
+ **Spawn Explore agent** when you need broad exploration:
81
+ - "What is the codebase structure?"
82
+ - "How does X integrate with Y?"
80
83
 
81
84
  ### 4. Parallel Tool Execution
82
85
 
@@ -104,6 +107,40 @@ glob("**/pages/**/*.astro")
104
107
  **Plan agent**: Implementation tasks that modify code
105
108
  - New features, refactoring, architectural changes
106
109
  - NOT for research/reading tasks
110
+
111
+ **Custom agents** (from `.emdash/agents/*.md`):
112
+ - User-defined specialized agents with custom system prompts
113
+ - Spawned via `task(subagent_type="<agent-name>", prompt="...")`
114
+ - Use the same tools as Explore agent (read-only by default)
115
+ - Examples: security-audit, api-review, test-generator
116
+
117
+ ### 6. Iterating with Spawned Agents
118
+
119
+ Users may want to **continue iterating** with a spawned agent's findings:
120
+
121
+ **Follow-up patterns to recognize:**
122
+ - "Tell me more about X" (where X was in agent's findings)
123
+ - "Go deeper on the auth module"
124
+ - "What about error handling there?"
125
+ - "Can you explore that further?"
126
+
127
+ **When user wants to iterate:**
128
+ 1. **Spawn the same agent again** with a refined prompt that builds on previous findings
129
+ 2. Include relevant context from the previous response in the new prompt
130
+ 3. Be specific about what to explore further
131
+
132
+ **Example iteration:**
133
+ ```
134
+ User: "spawn explore agent to find auth code"
135
+ → Agent finds auth in src/auth/ with 5 files
136
+
137
+ User: "go deeper on the session handling"
138
+ → Spawn Explore again: "In src/auth/, analyze session handling in detail.
139
+ Previous exploration found auth.py, session.py, middleware.py.
140
+ Focus on how sessions are created, validated, and expired."
141
+ ```
142
+
143
+ **Key principle:** The user sees the spawned agent's thinking and findings in real-time. They may want to drill down, pivot, or expand the exploration. Always be ready to spawn another agent with a more focused or expanded prompt based on what was found.
107
144
  """
108
145
 
109
146
  # Exploration strategy for code navigation
@@ -197,6 +234,79 @@ OUTPUT_GUIDELINES = """
197
234
  - NEVER provide time estimates (hours, days, weeks)
198
235
  """
199
236
 
237
+ # Verification and self-critique after changes
238
+ VERIFICATION_AND_CRITIQUE = """
239
+ ## Verification & Self-Critique
240
+
241
+ After making changes, you MUST verify they work correctly. Don't assume success - prove it.
242
+
243
+ ### Verification Steps
244
+
245
+ **1. Syntax & Build Check**
246
+ After code changes, run the appropriate check:
247
+ - Python: `python -m py_compile <file>` or run tests
248
+ - TypeScript/JS: `tsc --noEmit` or `npm run build`
249
+ - Rust: `cargo check`
250
+ - Go: `go build`
251
+
252
+ **2. Behavioral Verification**
253
+ Depending on what changed:
254
+ | Change Type | Verification |
255
+ |-------------|--------------|
256
+ | Moving/renaming files | Check imports still resolve, run build |
257
+ | Refactoring functions | Run related tests, verify callers work |
258
+ | API changes | Check all consumers updated |
259
+ | Config changes | Restart/reload to verify config loads |
260
+ | Database changes | Verify migrations, check queries |
261
+
262
+ **3. Self-Critique Checklist**
263
+ Before declaring "done", ask yourself:
264
+ - [ ] Did I break any existing functionality?
265
+ - [ ] Are all imports/references updated?
266
+ - [ ] Did I introduce any regressions?
267
+ - [ ] Would a code reviewer approve this?
268
+ - [ ] Did I test the happy path AND edge cases?
269
+
270
+ ### Critical Scenarios Requiring Extra Verification
271
+
272
+ **Moving/Renaming Files:**
273
+ ```
274
+ 1. Update all imports in dependent files
275
+ 2. Run build to catch broken references
276
+ 3. Grep for old path to ensure nothing was missed
277
+ 4. Run tests to verify functionality preserved
278
+ ```
279
+
280
+ **Deleting Code:**
281
+ ```
282
+ 1. Search for usages before deleting
283
+ 2. Verify nothing depends on deleted code
284
+ 3. Run tests to catch regressions
285
+ ```
286
+
287
+ **Changing Function Signatures:**
288
+ ```
289
+ 1. Update all callers
290
+ 2. Run type checker (if available)
291
+ 3. Run tests covering the changed function
292
+ ```
293
+
294
+ ### When Verification Fails
295
+
296
+ If verification reveals issues:
297
+ 1. **Don't ignore it** - fix the problem
298
+ 2. **Update your todo list** - add fix tasks
299
+ 3. **Re-verify after fixing** - ensure the fix works
300
+ 4. **Learn from it** - what did you miss initially?
301
+
302
+ ### Anti-Patterns to Avoid
303
+ - Saying "done" without running build/tests
304
+ - Assuming refactors don't break anything
305
+ - Skipping verification because "it's a small change"
306
+ - Moving on when tests fail
307
+ - Ignoring type errors or warnings
308
+ """
309
+
200
310
  # Parallel tool execution patterns
201
311
  PARALLEL_EXECUTION = """
202
312
  ## Parallel Tool Execution
@@ -231,6 +341,17 @@ Do this in ONE message:
231
341
  → All three run concurrently, results return together
232
342
  """
233
343
 
344
+ # Plan-First rule to prevent over-exploration
345
+ PLAN_FIRST_RULE = """
346
+ ## Plan-First Reminder
347
+
348
+ **Know what to do?** → State a 3-5 step plan, then execute. Don't explore beyond your plan.
349
+
350
+ **Don't know?** → Spawn Explore agent or ask ONE clarifying question.
351
+
352
+ Trust user context - if they say "the file" or "this", they know which one.
353
+ """
354
+
234
355
  # Efficiency rules for sub-agents with limited turns
235
356
  EFFICIENCY_RULES = """
236
357
  ## Efficiency Rules
@@ -311,40 +432,84 @@ SIZING_GUIDELINES = """
311
432
 
312
433
  # Todo list usage guidance
313
434
  TODO_LIST_GUIDANCE = """
314
- ## Todo List Usage
435
+ ## Todo List Usage - USE PROACTIVELY
315
436
 
316
- You have access to `write_todo` and `update_todo_list` tools. Use them strategically - not for every task.
437
+ You have access to `write_todo` and `update_todo_list` tools. **Use them frequently** to track progress and give the user visibility into what you're doing.
317
438
 
318
- ### When to USE the todo list:
319
- - **3+ distinct steps** needed to complete the task
439
+ ### ALWAYS use the todo list when:
440
+ - **2+ distinct steps** needed to complete the task
320
441
  - **Multiple files** need to be changed
321
442
  - **User gives a list** of tasks (numbered or comma-separated)
322
- - **Complex feature** implementation with multiple pieces
323
- - **Need to track progress** across iterations or when task spans multiple tool calls
443
+ - **Any implementation task** that isn't trivial
444
+ - **Multi-step debugging** or investigation
445
+ - **Before starting work** - plan out what you'll do
446
+
447
+ ### Benefits of using todos:
448
+ - User can see your progress in real-time
449
+ - Helps you stay organized on complex tasks
450
+ - Creates a clear record of what was done
451
+ - **Prevents forgetting steps** - you won't accidentally stop before completing all tasks
324
452
 
325
- ### When to SKIP the todo list:
326
- - **Single focused change** (one edit, one file)
327
- - **Trivial fixes** (typo, add a log statement)
328
- - **Research/informational questions** (just answer them)
329
- - **Task completes in 1-2 steps** (just do it)
453
+ ### Only SKIP the todo list for:
454
+ - **Truly trivial fixes** (single typo, one-line change)
455
+ - **Simple questions** that need only a text answer
456
+ - **Reading a single file** when asked
330
457
 
331
458
  ### Examples:
332
459
 
333
460
  **Use todo list:**
334
- - "Implement user authentication with login, logout, and session management" → 3+ steps, multiple files
335
- - "Fix these 5 type errors" → list of tasks
336
- - "Add dark mode support across the app" complex, multiple files
461
+ - "Fix the login bug" → investigate, identify cause, fix, verify
462
+ - "Add a new API endpoint" → create route, handler, types, tests
463
+ - "Update the config" read current, plan changes, update, verify
464
+ - "Implement dark mode" → multiple files, multiple steps
337
465
 
338
466
  **Skip todo list:**
339
- - "Fix the typo in README" → single focused change
340
- - "Add tool_choice parameter to this function" → one edit
341
- - "What files handle routing?" → informational question
342
- - "Update the error message here" → trivial fix
467
+ - "What does this function do?" → just read and answer
468
+ - "Fix typo in line 5" → single trivial edit
343
469
 
344
470
  ### Usage pattern:
345
- 1. Use `write_todo(title="...", reset=true)` to start fresh with first task
346
- 2. Use `write_todo(title="...")` to add more tasks
347
- 3. Use `update_todo_list(task_id="1", status="in_progress")` when starting a task
348
- 4. Use `update_todo_list(task_id="1", status="completed")` when done
349
- 5. Mark tasks complete IMMEDIATELY after finishing - don't batch completions
471
+ 1. **Start immediately**: Use `write_todo(title="...", reset=true)` as soon as you understand the task
472
+ 2. **Add all steps**: Use `write_todo(title="...")` to add each step you'll take
473
+ 3. **Mark in_progress**: Use `update_todo_list(task_id="1", status="in_progress")` BEFORE starting each task
474
+ 4. **Mark completed**: Use `update_todo_list(task_id="1", status="completed")` IMMEDIATELY after finishing
475
+ 5. **Never batch**: Mark each task complete right away, don't wait
476
+
477
+ ### CRITICAL: Don't stop until ALL todos are complete
478
+ Before finishing, check your todo list. If ANY task is not marked completed, you're not done.
479
+ If you have remaining tasks like "commit and push", DO THEM - don't just announce them.
480
+
481
+ ### When in doubt, USE THE TODO LIST
482
+ It's better to over-track than under-track. The user appreciates seeing progress.
483
+ """
484
+
485
+ # Critical rule about actions vs announcements
486
+ ACTION_NOT_ANNOUNCEMENT = """
487
+ ## CRITICAL: Act, Don't Announce
488
+
489
+ **NEVER say "Now let me do X" or "Let me X" without actually calling the tool in the same response.**
490
+
491
+ When you output text without tool calls, your turn ENDS. The task stops.
492
+
493
+ ### Bad (causes task to stop incomplete):
494
+ ```
495
+ I've completed the merge. Now let me commit and push:
496
+ [NO TOOL CALL - TASK STOPS HERE]
497
+ ```
498
+
499
+ ### Good (actually executes the action):
500
+ ```
501
+ I've completed the merge. Committing and pushing now.
502
+ [execute_command: git add . && git commit -m "..." && git push]
503
+ ```
504
+
505
+ ### Rules:
506
+ 1. **If you say you'll do something, DO IT in the same response**
507
+ 2. **If you have pending todos, execute them before responding with text only**
508
+ 3. **Text-only responses signal "I'm done" - only use when truly finished**
509
+ 4. **Check your todo list before each text response - are there incomplete items?**
510
+
511
+ ### The Pattern:
512
+ - Want to do multiple things? → Make multiple tool calls in one response
513
+ - Have more steps? → Keep calling tools until ALL are done
514
+ - Ready to finish? → Then and only then, respond with just text
350
515
  """
@@ -111,7 +111,7 @@ class ChatModel(Enum):
111
111
  context_window=1000000,
112
112
  max_output_tokens=16384,
113
113
  supports_tools=True,
114
- supports_vision=False,
114
+ supports_vision=False, # Fireworks deployment doesn't expose vision
115
115
  supports_thinking=False,
116
116
  description="MiniMax M2P1 - Long context model",
117
117
  )
@@ -139,6 +139,8 @@ class OpenAIProvider(LLMProvider):
139
139
  self._thinking_budget = int(os.environ.get("EMDASH_THINKING_BUDGET", "10000"))
140
140
  # Reasoning effort for Fireworks thinking models: none, low, medium, high
141
141
  self._reasoning_effort = os.environ.get("EMDASH_REASONING_EFFORT", "medium")
142
+ # Parallel tool calls for OpenAI-compatible APIs (Fireworks supports this)
143
+ self._parallel_tool_calls = self._parse_bool_env("EMDASH_PARALLEL_TOOL_CALLS")
142
144
 
143
145
  # Use OPENAI_BASE_URL env var only for OpenAI provider, otherwise use provider config
144
146
  if self._provider == "openai":
@@ -244,6 +246,14 @@ class OpenAIProvider(LLMProvider):
244
246
  if tools:
245
247
  kwargs["tools"] = tools
246
248
  kwargs["tool_choice"] = "auto"
249
+ # Add parallel_tool_calls if enabled (Fireworks and OpenAI support this)
250
+ if self._parallel_tool_calls is True:
251
+ kwargs["parallel_tool_calls"] = True
252
+ log.debug(
253
+ "Parallel tool calls enabled provider={} model={}",
254
+ self._provider,
255
+ self.model,
256
+ )
247
257
 
248
258
  # Add reasoning support via extra_body for providers that support it
249
259
  # Skip reasoning for custom base URLs (they may not support it)
@@ -13,6 +13,7 @@ Team values enforced:
13
13
  """
14
14
 
15
15
  import json
16
+ from concurrent.futures import ThreadPoolExecutor, as_completed
16
17
  from typing import Optional
17
18
 
18
19
  from rich.console import Console
@@ -129,6 +130,8 @@ class ResearcherAgent:
129
130
  questions: list[ResearchQuestion],
130
131
  context: dict,
131
132
  budget: dict,
133
+ parallel: bool = True,
134
+ max_workers: int = 3,
132
135
  ) -> tuple[list[EvidenceItem], dict]:
133
136
  """Execute appropriate macros for questions.
134
137
 
@@ -136,6 +139,8 @@ class ResearcherAgent:
136
139
  questions: Research questions to investigate
137
140
  context: Prior context from previous iterations
138
141
  budget: Remaining budget {tool_calls, tokens, time_s}
142
+ parallel: Whether to run questions in parallel (default True)
143
+ max_workers: Max parallel workers (default 3)
139
144
 
140
145
  Returns:
141
146
  Tuple of (evidence_items, updated_context)
@@ -144,67 +149,171 @@ class ResearcherAgent:
144
149
  updated_context = dict(context)
145
150
  budget_remaining = budget.get("tool_calls", 50)
146
151
 
147
- for question in questions:
148
- if budget_remaining <= 0:
149
- if self.verbose:
150
- self.console.print("[yellow]Budget exhausted[/yellow]")
151
- break
152
+ # Step 1: Bootstrap search (sequential - establishes shared context)
153
+ if not updated_context.get("last_search_results") and questions:
154
+ # Use first question's topic for bootstrap
155
+ first_topic = self._extract_topic(questions[0].question)
156
+ evidence, ctx_updates, budget_remaining = self._bootstrap_search(
157
+ topic=first_topic,
158
+ budget_remaining=budget_remaining,
159
+ )
160
+ all_evidence.extend(evidence)
161
+ updated_context.update(ctx_updates)
152
162
 
163
+ if budget_remaining <= 0:
153
164
  if self.verbose:
154
- self.console.print(f"[dim]Investigating: {question.question}[/dim]")
155
-
156
- # Extract topic from question
157
- topic = self._extract_topic(question.question)
165
+ self.console.print("[yellow]Budget exhausted after bootstrap[/yellow]")
166
+ return all_evidence, updated_context
167
+
168
+ # Step 2: Run macro execution for each question
169
+ if parallel and len(questions) > 1:
170
+ # Parallel execution
171
+ evidence, ctx = self._run_questions_parallel(
172
+ questions=questions,
173
+ context=updated_context,
174
+ budget_remaining=budget_remaining,
175
+ max_workers=max_workers,
176
+ )
177
+ all_evidence.extend(evidence)
178
+ updated_context.update(ctx)
179
+ else:
180
+ # Sequential execution (for single question or when parallel=False)
181
+ for question in questions:
182
+ if budget_remaining <= 0:
183
+ if self.verbose:
184
+ self.console.print("[yellow]Budget exhausted[/yellow]")
185
+ break
158
186
 
159
- # First, try direct search to bootstrap context
160
- if not updated_context.get("last_search_results"):
161
- evidence, ctx_updates, budget_remaining = self._bootstrap_search(
162
- topic=topic,
187
+ evidence, ctx, budget_remaining = self._run_single_question(
188
+ question=question,
189
+ context=updated_context,
163
190
  budget_remaining=budget_remaining,
164
191
  )
165
192
  all_evidence.extend(evidence)
166
- updated_context.update(ctx_updates)
193
+ updated_context.update(ctx)
167
194
 
168
- # Then try macros if we have context
169
- if budget_remaining > 0 and updated_context.get("last_search_results"):
170
- macros_to_run = question.suggested_tools or suggest_macros(
171
- question.question,
172
- include_github=self._mcp_available
173
- )
195
+ return all_evidence, updated_context
174
196
 
175
- for macro_name in macros_to_run:
176
- if budget_remaining <= 0:
177
- break
197
+ def _run_single_question(
198
+ self,
199
+ question: ResearchQuestion,
200
+ context: dict,
201
+ budget_remaining: int,
202
+ ) -> tuple[list[EvidenceItem], dict, int]:
203
+ """Run macros for a single question.
178
204
 
179
- if macro_name not in TOOL_MACROS:
180
- continue
205
+ Args:
206
+ question: Research question to investigate
207
+ context: Current context
208
+ budget_remaining: Remaining budget
181
209
 
182
- params = {"topic": topic, "symbol": topic}
210
+ Returns:
211
+ Tuple of (evidence, context_updates, remaining_budget)
212
+ """
213
+ evidence: list[EvidenceItem] = []
214
+ ctx_updates: dict = {}
183
215
 
184
- if "last_search_top" in updated_context:
185
- top_result = updated_context["last_search_top"]
186
- params["identifier"] = top_result.get("qualified_name", topic)
216
+ if self.verbose:
217
+ self.console.print(f"[dim]Investigating: {question.question}[/dim]")
187
218
 
188
- try:
189
- evidence, ctx_updates = self.macro_executor.execute_macro(
190
- macro_name=macro_name,
191
- params=params,
192
- budget_remaining=budget_remaining,
193
- prior_context=updated_context,
194
- )
219
+ topic = self._extract_topic(question.question)
220
+
221
+ if budget_remaining > 0 and context.get("last_search_results"):
222
+ macros_to_run = question.suggested_tools or suggest_macros(
223
+ question.question,
224
+ include_github=self._mcp_available
225
+ )
226
+
227
+ for macro_name in macros_to_run:
228
+ if budget_remaining <= 0:
229
+ break
195
230
 
196
- all_evidence.extend(evidence)
197
- updated_context.update(ctx_updates)
198
- budget_remaining -= len(evidence)
231
+ if macro_name not in TOOL_MACROS:
232
+ continue
199
233
 
200
- if self.verbose:
201
- self.console.print(f" [green]{macro_name}: {len(evidence)} evidence[/green]")
234
+ params = {"topic": topic, "symbol": topic}
202
235
 
203
- except Exception as e:
204
- if self.verbose:
205
- self.console.print(f" [red]{macro_name}: {e}[/red]")
236
+ if "last_search_top" in context:
237
+ top_result = context["last_search_top"]
238
+ params["identifier"] = top_result.get("qualified_name", topic)
206
239
 
207
- return all_evidence, updated_context
240
+ try:
241
+ ev, ctx = self.macro_executor.execute_macro(
242
+ macro_name=macro_name,
243
+ params=params,
244
+ budget_remaining=budget_remaining,
245
+ prior_context=context,
246
+ )
247
+
248
+ evidence.extend(ev)
249
+ ctx_updates.update(ctx)
250
+ budget_remaining -= len(ev)
251
+
252
+ if self.verbose:
253
+ self.console.print(f" [green]{macro_name}: {len(ev)} evidence[/green]")
254
+
255
+ except Exception as e:
256
+ if self.verbose:
257
+ self.console.print(f" [red]{macro_name}: {e}[/red]")
258
+
259
+ return evidence, ctx_updates, budget_remaining
260
+
261
+ def _run_questions_parallel(
262
+ self,
263
+ questions: list[ResearchQuestion],
264
+ context: dict,
265
+ budget_remaining: int,
266
+ max_workers: int = 3,
267
+ ) -> tuple[list[EvidenceItem], dict]:
268
+ """Run multiple questions in parallel using thread pool.
269
+
270
+ Args:
271
+ questions: Research questions to investigate
272
+ context: Shared context from bootstrap
273
+ budget_remaining: Total remaining budget
274
+ max_workers: Max parallel workers
275
+
276
+ Returns:
277
+ Tuple of (all_evidence, merged_context)
278
+ """
279
+ all_evidence: list[EvidenceItem] = []
280
+ merged_context: dict = {}
281
+
282
+ # Distribute budget among questions (with some buffer)
283
+ budget_per_question = max(5, budget_remaining // len(questions))
284
+
285
+ if self.verbose:
286
+ self.console.print(
287
+ f"[dim]Running {len(questions)} questions in parallel "
288
+ f"({max_workers} workers, {budget_per_question} budget each)[/dim]"
289
+ )
290
+
291
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
292
+ # Submit all questions
293
+ future_to_question = {
294
+ executor.submit(
295
+ self._run_single_question,
296
+ question=q,
297
+ context=context,
298
+ budget_remaining=budget_per_question,
299
+ ): q
300
+ for q in questions
301
+ }
302
+
303
+ # Collect results as they complete
304
+ for future in as_completed(future_to_question):
305
+ question = future_to_question[future]
306
+ try:
307
+ evidence, ctx_updates, _ = future.result()
308
+ all_evidence.extend(evidence)
309
+ merged_context.update(ctx_updates)
310
+ except Exception as e:
311
+ if self.verbose:
312
+ self.console.print(
313
+ f"[red]Question failed: {question.question[:50]}... - {e}[/red]"
314
+ )
315
+
316
+ return all_evidence, merged_context
208
317
 
209
318
  def _bootstrap_search(
210
319
  self,