emdash-core 0.1.37__py3-none-any.whl → 0.1.60__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/agent/agents.py +9 -0
- emdash_core/agent/background.py +481 -0
- emdash_core/agent/inprocess_subagent.py +70 -1
- emdash_core/agent/mcp/config.py +78 -2
- emdash_core/agent/prompts/main_agent.py +53 -1
- emdash_core/agent/prompts/plan_mode.py +65 -44
- emdash_core/agent/prompts/subagents.py +73 -1
- emdash_core/agent/prompts/workflow.py +179 -28
- emdash_core/agent/providers/models.py +1 -1
- emdash_core/agent/providers/openai_provider.py +10 -0
- emdash_core/agent/research/researcher.py +154 -45
- emdash_core/agent/runner/agent_runner.py +145 -19
- emdash_core/agent/runner/sdk_runner.py +29 -2
- emdash_core/agent/skills.py +81 -1
- emdash_core/agent/toolkit.py +87 -11
- emdash_core/agent/tools/__init__.py +2 -0
- emdash_core/agent/tools/coding.py +344 -52
- emdash_core/agent/tools/lsp.py +361 -0
- emdash_core/agent/tools/skill.py +21 -1
- emdash_core/agent/tools/task.py +16 -19
- emdash_core/agent/tools/task_output.py +262 -32
- emdash_core/agent/verifier/__init__.py +11 -0
- emdash_core/agent/verifier/manager.py +295 -0
- emdash_core/agent/verifier/models.py +97 -0
- emdash_core/{swarm/worktree_manager.py → agent/worktree.py} +19 -1
- emdash_core/api/agent.py +297 -2
- emdash_core/api/research.py +3 -3
- emdash_core/api/router.py +0 -4
- emdash_core/context/longevity.py +197 -0
- emdash_core/context/providers/explored_areas.py +83 -39
- emdash_core/context/reranker.py +35 -144
- emdash_core/context/simple_reranker.py +500 -0
- emdash_core/context/tool_relevance.py +84 -0
- emdash_core/core/config.py +8 -0
- emdash_core/graph/__init__.py +8 -1
- emdash_core/graph/connection.py +24 -3
- emdash_core/graph/writer.py +7 -1
- emdash_core/models/agent.py +10 -0
- emdash_core/server.py +1 -6
- emdash_core/sse/stream.py +16 -1
- emdash_core/utils/__init__.py +0 -2
- emdash_core/utils/git.py +103 -0
- emdash_core/utils/image.py +147 -160
- {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/METADATA +6 -6
- {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/RECORD +47 -52
- emdash_core/api/swarm.py +0 -223
- emdash_core/db/__init__.py +0 -67
- emdash_core/db/auth.py +0 -134
- emdash_core/db/models.py +0 -91
- emdash_core/db/provider.py +0 -222
- emdash_core/db/providers/__init__.py +0 -5
- emdash_core/db/providers/supabase.py +0 -452
- emdash_core/swarm/__init__.py +0 -17
- emdash_core/swarm/merge_agent.py +0 -383
- emdash_core/swarm/session_manager.py +0 -274
- emdash_core/swarm/swarm_runner.py +0 -226
- emdash_core/swarm/task_definition.py +0 -137
- emdash_core/swarm/worker_spawner.py +0 -319
- {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/WHEEL +0 -0
- {emdash_core-0.1.37.dist-info → emdash_core-0.1.60.dist-info}/entry_points.txt +0 -0
|
@@ -48,35 +48,38 @@ After receiving the plan:
|
|
|
48
48
|
- "Fix this typo" → just fix it
|
|
49
49
|
- "Add a log statement here" → just add it
|
|
50
50
|
|
|
51
|
-
### 1.
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
51
|
+
### 1. Know What To Do → Plan-First, Execute
|
|
52
|
+
|
|
53
|
+
When you understand the task and know how to approach it:
|
|
54
|
+
1. State a brief plan (3-5 steps)
|
|
55
|
+
2. Execute directly - don't explore "just in case"
|
|
56
|
+
3. Read only the files your plan requires
|
|
57
|
+
|
|
58
|
+
Examples:
|
|
59
|
+
- "Add logout button to settings" → You know where settings is, just do it
|
|
60
|
+
- "Fix the typo in README" → Just fix it
|
|
61
|
+
- "Update the API endpoint" → Read it, update it, done
|
|
62
|
+
|
|
63
|
+
### 2. Don't Know What To Do → Explore First
|
|
64
|
+
|
|
65
|
+
When you're genuinely uncertain about the codebase or approach:
|
|
66
|
+
- **Spawn Explore agent** for open-ended research across multiple files
|
|
67
|
+
- **Ask ONE clarifying question** if you need user input (not multiple)
|
|
68
|
+
|
|
69
|
+
Examples:
|
|
70
|
+
- "Where are errors handled?" → Explore agent (could be many places)
|
|
71
|
+
- "How does authentication work?" → Explore agent (multiple files)
|
|
72
|
+
- "What framework should I use?" → Ask user (decision needed)
|
|
73
|
+
|
|
74
|
+
### 3. Direct Tools vs Explore Agent
|
|
75
|
+
|
|
76
|
+
**Use direct tools** when you know what to look for:
|
|
72
77
|
- "Read the router" → `glob("**/router*")` then `read_file`
|
|
73
|
-
- "
|
|
74
|
-
- "Find the UserService class" → `grep("class UserService")`
|
|
78
|
+
- "Find UserService class" → `grep("class UserService")`
|
|
75
79
|
|
|
76
|
-
**
|
|
77
|
-
- "
|
|
78
|
-
- "How does
|
|
79
|
-
- "What is the codebase structure?" → broad exploration
|
|
80
|
+
**Spawn Explore agent** when you need broad exploration:
|
|
81
|
+
- "What is the codebase structure?"
|
|
82
|
+
- "How does X integrate with Y?"
|
|
80
83
|
|
|
81
84
|
### 4. Parallel Tool Execution
|
|
82
85
|
|
|
@@ -110,6 +113,34 @@ glob("**/pages/**/*.astro")
|
|
|
110
113
|
- Spawned via `task(subagent_type="<agent-name>", prompt="...")`
|
|
111
114
|
- Use the same tools as Explore agent (read-only by default)
|
|
112
115
|
- Examples: security-audit, api-review, test-generator
|
|
116
|
+
|
|
117
|
+
### 6. Iterating with Spawned Agents
|
|
118
|
+
|
|
119
|
+
Users may want to **continue iterating** with a spawned agent's findings:
|
|
120
|
+
|
|
121
|
+
**Follow-up patterns to recognize:**
|
|
122
|
+
- "Tell me more about X" (where X was in agent's findings)
|
|
123
|
+
- "Go deeper on the auth module"
|
|
124
|
+
- "What about error handling there?"
|
|
125
|
+
- "Can you explore that further?"
|
|
126
|
+
|
|
127
|
+
**When user wants to iterate:**
|
|
128
|
+
1. **Spawn the same agent again** with a refined prompt that builds on previous findings
|
|
129
|
+
2. Include relevant context from the previous response in the new prompt
|
|
130
|
+
3. Be specific about what to explore further
|
|
131
|
+
|
|
132
|
+
**Example iteration:**
|
|
133
|
+
```
|
|
134
|
+
User: "spawn explore agent to find auth code"
|
|
135
|
+
→ Agent finds auth in src/auth/ with 5 files
|
|
136
|
+
|
|
137
|
+
User: "go deeper on the session handling"
|
|
138
|
+
→ Spawn Explore again: "In src/auth/, analyze session handling in detail.
|
|
139
|
+
Previous exploration found auth.py, session.py, middleware.py.
|
|
140
|
+
Focus on how sessions are created, validated, and expired."
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Key principle:** The user sees the spawned agent's thinking and findings in real-time. They may want to drill down, pivot, or expand the exploration. Always be ready to spawn another agent with a more focused or expanded prompt based on what was found.
|
|
113
144
|
"""
|
|
114
145
|
|
|
115
146
|
# Exploration strategy for code navigation
|
|
@@ -203,6 +234,79 @@ OUTPUT_GUIDELINES = """
|
|
|
203
234
|
- NEVER provide time estimates (hours, days, weeks)
|
|
204
235
|
"""
|
|
205
236
|
|
|
237
|
+
# Verification and self-critique after changes
|
|
238
|
+
VERIFICATION_AND_CRITIQUE = """
|
|
239
|
+
## Verification & Self-Critique
|
|
240
|
+
|
|
241
|
+
After making changes, you MUST verify they work correctly. Don't assume success - prove it.
|
|
242
|
+
|
|
243
|
+
### Verification Steps
|
|
244
|
+
|
|
245
|
+
**1. Syntax & Build Check**
|
|
246
|
+
After code changes, run the appropriate check:
|
|
247
|
+
- Python: `python -m py_compile <file>` or run tests
|
|
248
|
+
- TypeScript/JS: `tsc --noEmit` or `npm run build`
|
|
249
|
+
- Rust: `cargo check`
|
|
250
|
+
- Go: `go build`
|
|
251
|
+
|
|
252
|
+
**2. Behavioral Verification**
|
|
253
|
+
Depending on what changed:
|
|
254
|
+
| Change Type | Verification |
|
|
255
|
+
|-------------|--------------|
|
|
256
|
+
| Moving/renaming files | Check imports still resolve, run build |
|
|
257
|
+
| Refactoring functions | Run related tests, verify callers work |
|
|
258
|
+
| API changes | Check all consumers updated |
|
|
259
|
+
| Config changes | Restart/reload to verify config loads |
|
|
260
|
+
| Database changes | Verify migrations, check queries |
|
|
261
|
+
|
|
262
|
+
**3. Self-Critique Checklist**
|
|
263
|
+
Before declaring "done", ask yourself:
|
|
264
|
+
- [ ] Did I break any existing functionality?
|
|
265
|
+
- [ ] Are all imports/references updated?
|
|
266
|
+
- [ ] Did I introduce any regressions?
|
|
267
|
+
- [ ] Would a code reviewer approve this?
|
|
268
|
+
- [ ] Did I test the happy path AND edge cases?
|
|
269
|
+
|
|
270
|
+
### Critical Scenarios Requiring Extra Verification
|
|
271
|
+
|
|
272
|
+
**Moving/Renaming Files:**
|
|
273
|
+
```
|
|
274
|
+
1. Update all imports in dependent files
|
|
275
|
+
2. Run build to catch broken references
|
|
276
|
+
3. Grep for old path to ensure nothing was missed
|
|
277
|
+
4. Run tests to verify functionality preserved
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
**Deleting Code:**
|
|
281
|
+
```
|
|
282
|
+
1. Search for usages before deleting
|
|
283
|
+
2. Verify nothing depends on deleted code
|
|
284
|
+
3. Run tests to catch regressions
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
**Changing Function Signatures:**
|
|
288
|
+
```
|
|
289
|
+
1. Update all callers
|
|
290
|
+
2. Run type checker (if available)
|
|
291
|
+
3. Run tests covering the changed function
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
### When Verification Fails
|
|
295
|
+
|
|
296
|
+
If verification reveals issues:
|
|
297
|
+
1. **Don't ignore it** - fix the problem
|
|
298
|
+
2. **Update your todo list** - add fix tasks
|
|
299
|
+
3. **Re-verify after fixing** - ensure the fix works
|
|
300
|
+
4. **Learn from it** - what did you miss initially?
|
|
301
|
+
|
|
302
|
+
### Anti-Patterns to Avoid
|
|
303
|
+
- Saying "done" without running build/tests
|
|
304
|
+
- Assuming refactors don't break anything
|
|
305
|
+
- Skipping verification because "it's a small change"
|
|
306
|
+
- Moving on when tests fail
|
|
307
|
+
- Ignoring type errors or warnings
|
|
308
|
+
"""
|
|
309
|
+
|
|
206
310
|
# Parallel tool execution patterns
|
|
207
311
|
PARALLEL_EXECUTION = """
|
|
208
312
|
## Parallel Tool Execution
|
|
@@ -237,6 +341,17 @@ Do this in ONE message:
|
|
|
237
341
|
→ All three run concurrently, results return together
|
|
238
342
|
"""
|
|
239
343
|
|
|
344
|
+
# Plan-First rule to prevent over-exploration
|
|
345
|
+
PLAN_FIRST_RULE = """
|
|
346
|
+
## Plan-First Reminder
|
|
347
|
+
|
|
348
|
+
**Know what to do?** → State a 3-5 step plan, then execute. Don't explore beyond your plan.
|
|
349
|
+
|
|
350
|
+
**Don't know?** → Spawn Explore agent or ask ONE clarifying question.
|
|
351
|
+
|
|
352
|
+
Trust user context - if they say "the file" or "this", they know which one.
|
|
353
|
+
"""
|
|
354
|
+
|
|
240
355
|
# Efficiency rules for sub-agents with limited turns
|
|
241
356
|
EFFICIENCY_RULES = """
|
|
242
357
|
## Efficiency Rules
|
|
@@ -333,7 +448,7 @@ You have access to `write_todo` and `update_todo_list` tools. **Use them frequen
|
|
|
333
448
|
- User can see your progress in real-time
|
|
334
449
|
- Helps you stay organized on complex tasks
|
|
335
450
|
- Creates a clear record of what was done
|
|
336
|
-
- Prevents forgetting steps
|
|
451
|
+
- **Prevents forgetting steps** - you won't accidentally stop before completing all tasks
|
|
337
452
|
|
|
338
453
|
### Only SKIP the todo list for:
|
|
339
454
|
- **Truly trivial fixes** (single typo, one-line change)
|
|
@@ -359,6 +474,42 @@ You have access to `write_todo` and `update_todo_list` tools. **Use them frequen
|
|
|
359
474
|
4. **Mark completed**: Use `update_todo_list(task_id="1", status="completed")` IMMEDIATELY after finishing
|
|
360
475
|
5. **Never batch**: Mark each task complete right away, don't wait
|
|
361
476
|
|
|
477
|
+
### CRITICAL: Don't stop until ALL todos are complete
|
|
478
|
+
Before finishing, check your todo list. If ANY task is not marked completed, you're not done.
|
|
479
|
+
If you have remaining tasks like "commit and push", DO THEM - don't just announce them.
|
|
480
|
+
|
|
362
481
|
### When in doubt, USE THE TODO LIST
|
|
363
482
|
It's better to over-track than under-track. The user appreciates seeing progress.
|
|
364
483
|
"""
|
|
484
|
+
|
|
485
|
+
# Critical rule about actions vs announcements
|
|
486
|
+
ACTION_NOT_ANNOUNCEMENT = """
|
|
487
|
+
## CRITICAL: Act, Don't Announce
|
|
488
|
+
|
|
489
|
+
**NEVER say "Now let me do X" or "Let me X" without actually calling the tool in the same response.**
|
|
490
|
+
|
|
491
|
+
When you output text without tool calls, your turn ENDS. The task stops.
|
|
492
|
+
|
|
493
|
+
### Bad (causes task to stop incomplete):
|
|
494
|
+
```
|
|
495
|
+
I've completed the merge. Now let me commit and push:
|
|
496
|
+
[NO TOOL CALL - TASK STOPS HERE]
|
|
497
|
+
```
|
|
498
|
+
|
|
499
|
+
### Good (actually executes the action):
|
|
500
|
+
```
|
|
501
|
+
I've completed the merge. Committing and pushing now.
|
|
502
|
+
[execute_command: git add . && git commit -m "..." && git push]
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
### Rules:
|
|
506
|
+
1. **If you say you'll do something, DO IT in the same response**
|
|
507
|
+
2. **If you have pending todos, execute them before responding with text only**
|
|
508
|
+
3. **Text-only responses signal "I'm done" - only use when truly finished**
|
|
509
|
+
4. **Check your todo list before each text response - are there incomplete items?**
|
|
510
|
+
|
|
511
|
+
### The Pattern:
|
|
512
|
+
- Want to do multiple things? → Make multiple tool calls in one response
|
|
513
|
+
- Have more steps? → Keep calling tools until ALL are done
|
|
514
|
+
- Ready to finish? → Then and only then, respond with just text
|
|
515
|
+
"""
|
|
@@ -111,7 +111,7 @@ class ChatModel(Enum):
|
|
|
111
111
|
context_window=1000000,
|
|
112
112
|
max_output_tokens=16384,
|
|
113
113
|
supports_tools=True,
|
|
114
|
-
supports_vision=False,
|
|
114
|
+
supports_vision=False, # Fireworks deployment doesn't expose vision
|
|
115
115
|
supports_thinking=False,
|
|
116
116
|
description="MiniMax M2P1 - Long context model",
|
|
117
117
|
)
|
|
@@ -139,6 +139,8 @@ class OpenAIProvider(LLMProvider):
|
|
|
139
139
|
self._thinking_budget = int(os.environ.get("EMDASH_THINKING_BUDGET", "10000"))
|
|
140
140
|
# Reasoning effort for Fireworks thinking models: none, low, medium, high
|
|
141
141
|
self._reasoning_effort = os.environ.get("EMDASH_REASONING_EFFORT", "medium")
|
|
142
|
+
# Parallel tool calls for OpenAI-compatible APIs (Fireworks supports this)
|
|
143
|
+
self._parallel_tool_calls = self._parse_bool_env("EMDASH_PARALLEL_TOOL_CALLS")
|
|
142
144
|
|
|
143
145
|
# Use OPENAI_BASE_URL env var only for OpenAI provider, otherwise use provider config
|
|
144
146
|
if self._provider == "openai":
|
|
@@ -244,6 +246,14 @@ class OpenAIProvider(LLMProvider):
|
|
|
244
246
|
if tools:
|
|
245
247
|
kwargs["tools"] = tools
|
|
246
248
|
kwargs["tool_choice"] = "auto"
|
|
249
|
+
# Add parallel_tool_calls if enabled (Fireworks and OpenAI support this)
|
|
250
|
+
if self._parallel_tool_calls is True:
|
|
251
|
+
kwargs["parallel_tool_calls"] = True
|
|
252
|
+
log.debug(
|
|
253
|
+
"Parallel tool calls enabled provider={} model={}",
|
|
254
|
+
self._provider,
|
|
255
|
+
self.model,
|
|
256
|
+
)
|
|
247
257
|
|
|
248
258
|
# Add reasoning support via extra_body for providers that support it
|
|
249
259
|
# Skip reasoning for custom base URLs (they may not support it)
|
|
@@ -13,6 +13,7 @@ Team values enforced:
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
16
17
|
from typing import Optional
|
|
17
18
|
|
|
18
19
|
from rich.console import Console
|
|
@@ -129,6 +130,8 @@ class ResearcherAgent:
|
|
|
129
130
|
questions: list[ResearchQuestion],
|
|
130
131
|
context: dict,
|
|
131
132
|
budget: dict,
|
|
133
|
+
parallel: bool = True,
|
|
134
|
+
max_workers: int = 3,
|
|
132
135
|
) -> tuple[list[EvidenceItem], dict]:
|
|
133
136
|
"""Execute appropriate macros for questions.
|
|
134
137
|
|
|
@@ -136,6 +139,8 @@ class ResearcherAgent:
|
|
|
136
139
|
questions: Research questions to investigate
|
|
137
140
|
context: Prior context from previous iterations
|
|
138
141
|
budget: Remaining budget {tool_calls, tokens, time_s}
|
|
142
|
+
parallel: Whether to run questions in parallel (default True)
|
|
143
|
+
max_workers: Max parallel workers (default 3)
|
|
139
144
|
|
|
140
145
|
Returns:
|
|
141
146
|
Tuple of (evidence_items, updated_context)
|
|
@@ -144,67 +149,171 @@ class ResearcherAgent:
|
|
|
144
149
|
updated_context = dict(context)
|
|
145
150
|
budget_remaining = budget.get("tool_calls", 50)
|
|
146
151
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
+
# Step 1: Bootstrap search (sequential - establishes shared context)
|
|
153
|
+
if not updated_context.get("last_search_results") and questions:
|
|
154
|
+
# Use first question's topic for bootstrap
|
|
155
|
+
first_topic = self._extract_topic(questions[0].question)
|
|
156
|
+
evidence, ctx_updates, budget_remaining = self._bootstrap_search(
|
|
157
|
+
topic=first_topic,
|
|
158
|
+
budget_remaining=budget_remaining,
|
|
159
|
+
)
|
|
160
|
+
all_evidence.extend(evidence)
|
|
161
|
+
updated_context.update(ctx_updates)
|
|
152
162
|
|
|
163
|
+
if budget_remaining <= 0:
|
|
153
164
|
if self.verbose:
|
|
154
|
-
self.console.print(
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
165
|
+
self.console.print("[yellow]Budget exhausted after bootstrap[/yellow]")
|
|
166
|
+
return all_evidence, updated_context
|
|
167
|
+
|
|
168
|
+
# Step 2: Run macro execution for each question
|
|
169
|
+
if parallel and len(questions) > 1:
|
|
170
|
+
# Parallel execution
|
|
171
|
+
evidence, ctx = self._run_questions_parallel(
|
|
172
|
+
questions=questions,
|
|
173
|
+
context=updated_context,
|
|
174
|
+
budget_remaining=budget_remaining,
|
|
175
|
+
max_workers=max_workers,
|
|
176
|
+
)
|
|
177
|
+
all_evidence.extend(evidence)
|
|
178
|
+
updated_context.update(ctx)
|
|
179
|
+
else:
|
|
180
|
+
# Sequential execution (for single question or when parallel=False)
|
|
181
|
+
for question in questions:
|
|
182
|
+
if budget_remaining <= 0:
|
|
183
|
+
if self.verbose:
|
|
184
|
+
self.console.print("[yellow]Budget exhausted[/yellow]")
|
|
185
|
+
break
|
|
158
186
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
topic=topic,
|
|
187
|
+
evidence, ctx, budget_remaining = self._run_single_question(
|
|
188
|
+
question=question,
|
|
189
|
+
context=updated_context,
|
|
163
190
|
budget_remaining=budget_remaining,
|
|
164
191
|
)
|
|
165
192
|
all_evidence.extend(evidence)
|
|
166
|
-
updated_context.update(
|
|
193
|
+
updated_context.update(ctx)
|
|
167
194
|
|
|
168
|
-
|
|
169
|
-
if budget_remaining > 0 and updated_context.get("last_search_results"):
|
|
170
|
-
macros_to_run = question.suggested_tools or suggest_macros(
|
|
171
|
-
question.question,
|
|
172
|
-
include_github=self._mcp_available
|
|
173
|
-
)
|
|
195
|
+
return all_evidence, updated_context
|
|
174
196
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
197
|
+
def _run_single_question(
|
|
198
|
+
self,
|
|
199
|
+
question: ResearchQuestion,
|
|
200
|
+
context: dict,
|
|
201
|
+
budget_remaining: int,
|
|
202
|
+
) -> tuple[list[EvidenceItem], dict, int]:
|
|
203
|
+
"""Run macros for a single question.
|
|
178
204
|
|
|
179
|
-
|
|
180
|
-
|
|
205
|
+
Args:
|
|
206
|
+
question: Research question to investigate
|
|
207
|
+
context: Current context
|
|
208
|
+
budget_remaining: Remaining budget
|
|
181
209
|
|
|
182
|
-
|
|
210
|
+
Returns:
|
|
211
|
+
Tuple of (evidence, context_updates, remaining_budget)
|
|
212
|
+
"""
|
|
213
|
+
evidence: list[EvidenceItem] = []
|
|
214
|
+
ctx_updates: dict = {}
|
|
183
215
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
params["identifier"] = top_result.get("qualified_name", topic)
|
|
216
|
+
if self.verbose:
|
|
217
|
+
self.console.print(f"[dim]Investigating: {question.question}[/dim]")
|
|
187
218
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
219
|
+
topic = self._extract_topic(question.question)
|
|
220
|
+
|
|
221
|
+
if budget_remaining > 0 and context.get("last_search_results"):
|
|
222
|
+
macros_to_run = question.suggested_tools or suggest_macros(
|
|
223
|
+
question.question,
|
|
224
|
+
include_github=self._mcp_available
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
for macro_name in macros_to_run:
|
|
228
|
+
if budget_remaining <= 0:
|
|
229
|
+
break
|
|
195
230
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
budget_remaining -= len(evidence)
|
|
231
|
+
if macro_name not in TOOL_MACROS:
|
|
232
|
+
continue
|
|
199
233
|
|
|
200
|
-
|
|
201
|
-
self.console.print(f" [green]{macro_name}: {len(evidence)} evidence[/green]")
|
|
234
|
+
params = {"topic": topic, "symbol": topic}
|
|
202
235
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
236
|
+
if "last_search_top" in context:
|
|
237
|
+
top_result = context["last_search_top"]
|
|
238
|
+
params["identifier"] = top_result.get("qualified_name", topic)
|
|
206
239
|
|
|
207
|
-
|
|
240
|
+
try:
|
|
241
|
+
ev, ctx = self.macro_executor.execute_macro(
|
|
242
|
+
macro_name=macro_name,
|
|
243
|
+
params=params,
|
|
244
|
+
budget_remaining=budget_remaining,
|
|
245
|
+
prior_context=context,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
evidence.extend(ev)
|
|
249
|
+
ctx_updates.update(ctx)
|
|
250
|
+
budget_remaining -= len(ev)
|
|
251
|
+
|
|
252
|
+
if self.verbose:
|
|
253
|
+
self.console.print(f" [green]{macro_name}: {len(ev)} evidence[/green]")
|
|
254
|
+
|
|
255
|
+
except Exception as e:
|
|
256
|
+
if self.verbose:
|
|
257
|
+
self.console.print(f" [red]{macro_name}: {e}[/red]")
|
|
258
|
+
|
|
259
|
+
return evidence, ctx_updates, budget_remaining
|
|
260
|
+
|
|
261
|
+
def _run_questions_parallel(
|
|
262
|
+
self,
|
|
263
|
+
questions: list[ResearchQuestion],
|
|
264
|
+
context: dict,
|
|
265
|
+
budget_remaining: int,
|
|
266
|
+
max_workers: int = 3,
|
|
267
|
+
) -> tuple[list[EvidenceItem], dict]:
|
|
268
|
+
"""Run multiple questions in parallel using thread pool.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
questions: Research questions to investigate
|
|
272
|
+
context: Shared context from bootstrap
|
|
273
|
+
budget_remaining: Total remaining budget
|
|
274
|
+
max_workers: Max parallel workers
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Tuple of (all_evidence, merged_context)
|
|
278
|
+
"""
|
|
279
|
+
all_evidence: list[EvidenceItem] = []
|
|
280
|
+
merged_context: dict = {}
|
|
281
|
+
|
|
282
|
+
# Distribute budget among questions (with some buffer)
|
|
283
|
+
budget_per_question = max(5, budget_remaining // len(questions))
|
|
284
|
+
|
|
285
|
+
if self.verbose:
|
|
286
|
+
self.console.print(
|
|
287
|
+
f"[dim]Running {len(questions)} questions in parallel "
|
|
288
|
+
f"({max_workers} workers, {budget_per_question} budget each)[/dim]"
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
292
|
+
# Submit all questions
|
|
293
|
+
future_to_question = {
|
|
294
|
+
executor.submit(
|
|
295
|
+
self._run_single_question,
|
|
296
|
+
question=q,
|
|
297
|
+
context=context,
|
|
298
|
+
budget_remaining=budget_per_question,
|
|
299
|
+
): q
|
|
300
|
+
for q in questions
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
# Collect results as they complete
|
|
304
|
+
for future in as_completed(future_to_question):
|
|
305
|
+
question = future_to_question[future]
|
|
306
|
+
try:
|
|
307
|
+
evidence, ctx_updates, _ = future.result()
|
|
308
|
+
all_evidence.extend(evidence)
|
|
309
|
+
merged_context.update(ctx_updates)
|
|
310
|
+
except Exception as e:
|
|
311
|
+
if self.verbose:
|
|
312
|
+
self.console.print(
|
|
313
|
+
f"[red]Question failed: {question.question[:50]}... - {e}[/red]"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
return all_evidence, merged_context
|
|
208
317
|
|
|
209
318
|
def _bootstrap_search(
|
|
210
319
|
self,
|