emdash-core 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/__init__.py +3 -0
- emdash_core/agent/__init__.py +37 -0
- emdash_core/agent/agents.py +225 -0
- emdash_core/agent/code_reviewer.py +476 -0
- emdash_core/agent/compaction.py +143 -0
- emdash_core/agent/context_manager.py +140 -0
- emdash_core/agent/events.py +338 -0
- emdash_core/agent/handlers.py +224 -0
- emdash_core/agent/inprocess_subagent.py +377 -0
- emdash_core/agent/mcp/__init__.py +50 -0
- emdash_core/agent/mcp/client.py +346 -0
- emdash_core/agent/mcp/config.py +302 -0
- emdash_core/agent/mcp/manager.py +496 -0
- emdash_core/agent/mcp/tool_factory.py +213 -0
- emdash_core/agent/prompts/__init__.py +38 -0
- emdash_core/agent/prompts/main_agent.py +104 -0
- emdash_core/agent/prompts/subagents.py +131 -0
- emdash_core/agent/prompts/workflow.py +136 -0
- emdash_core/agent/providers/__init__.py +34 -0
- emdash_core/agent/providers/base.py +143 -0
- emdash_core/agent/providers/factory.py +80 -0
- emdash_core/agent/providers/models.py +220 -0
- emdash_core/agent/providers/openai_provider.py +463 -0
- emdash_core/agent/providers/transformers_provider.py +217 -0
- emdash_core/agent/research/__init__.py +81 -0
- emdash_core/agent/research/agent.py +143 -0
- emdash_core/agent/research/controller.py +254 -0
- emdash_core/agent/research/critic.py +428 -0
- emdash_core/agent/research/macros.py +469 -0
- emdash_core/agent/research/planner.py +449 -0
- emdash_core/agent/research/researcher.py +436 -0
- emdash_core/agent/research/state.py +523 -0
- emdash_core/agent/research/synthesizer.py +594 -0
- emdash_core/agent/reviewer_profile.py +475 -0
- emdash_core/agent/rules.py +123 -0
- emdash_core/agent/runner.py +601 -0
- emdash_core/agent/session.py +262 -0
- emdash_core/agent/spec_schema.py +66 -0
- emdash_core/agent/specification.py +479 -0
- emdash_core/agent/subagent.py +397 -0
- emdash_core/agent/subagent_prompts.py +13 -0
- emdash_core/agent/toolkit.py +482 -0
- emdash_core/agent/toolkits/__init__.py +64 -0
- emdash_core/agent/toolkits/base.py +96 -0
- emdash_core/agent/toolkits/explore.py +47 -0
- emdash_core/agent/toolkits/plan.py +55 -0
- emdash_core/agent/tools/__init__.py +141 -0
- emdash_core/agent/tools/analytics.py +436 -0
- emdash_core/agent/tools/base.py +131 -0
- emdash_core/agent/tools/coding.py +484 -0
- emdash_core/agent/tools/github_mcp.py +592 -0
- emdash_core/agent/tools/history.py +13 -0
- emdash_core/agent/tools/modes.py +153 -0
- emdash_core/agent/tools/plan.py +206 -0
- emdash_core/agent/tools/plan_write.py +135 -0
- emdash_core/agent/tools/search.py +412 -0
- emdash_core/agent/tools/spec.py +341 -0
- emdash_core/agent/tools/task.py +262 -0
- emdash_core/agent/tools/task_output.py +204 -0
- emdash_core/agent/tools/tasks.py +454 -0
- emdash_core/agent/tools/traversal.py +588 -0
- emdash_core/agent/tools/web.py +179 -0
- emdash_core/analytics/__init__.py +5 -0
- emdash_core/analytics/engine.py +1286 -0
- emdash_core/api/__init__.py +5 -0
- emdash_core/api/agent.py +308 -0
- emdash_core/api/agents.py +154 -0
- emdash_core/api/analyze.py +264 -0
- emdash_core/api/auth.py +173 -0
- emdash_core/api/context.py +77 -0
- emdash_core/api/db.py +121 -0
- emdash_core/api/embed.py +131 -0
- emdash_core/api/feature.py +143 -0
- emdash_core/api/health.py +93 -0
- emdash_core/api/index.py +162 -0
- emdash_core/api/plan.py +110 -0
- emdash_core/api/projectmd.py +210 -0
- emdash_core/api/query.py +320 -0
- emdash_core/api/research.py +122 -0
- emdash_core/api/review.py +161 -0
- emdash_core/api/router.py +76 -0
- emdash_core/api/rules.py +116 -0
- emdash_core/api/search.py +119 -0
- emdash_core/api/spec.py +99 -0
- emdash_core/api/swarm.py +223 -0
- emdash_core/api/tasks.py +109 -0
- emdash_core/api/team.py +120 -0
- emdash_core/auth/__init__.py +17 -0
- emdash_core/auth/github.py +389 -0
- emdash_core/config.py +74 -0
- emdash_core/context/__init__.py +52 -0
- emdash_core/context/models.py +50 -0
- emdash_core/context/providers/__init__.py +11 -0
- emdash_core/context/providers/base.py +74 -0
- emdash_core/context/providers/explored_areas.py +183 -0
- emdash_core/context/providers/touched_areas.py +360 -0
- emdash_core/context/registry.py +73 -0
- emdash_core/context/reranker.py +199 -0
- emdash_core/context/service.py +260 -0
- emdash_core/context/session.py +352 -0
- emdash_core/core/__init__.py +104 -0
- emdash_core/core/config.py +454 -0
- emdash_core/core/exceptions.py +55 -0
- emdash_core/core/models.py +265 -0
- emdash_core/core/review_config.py +57 -0
- emdash_core/db/__init__.py +67 -0
- emdash_core/db/auth.py +134 -0
- emdash_core/db/models.py +91 -0
- emdash_core/db/provider.py +222 -0
- emdash_core/db/providers/__init__.py +5 -0
- emdash_core/db/providers/supabase.py +452 -0
- emdash_core/embeddings/__init__.py +24 -0
- emdash_core/embeddings/indexer.py +534 -0
- emdash_core/embeddings/models.py +192 -0
- emdash_core/embeddings/providers/__init__.py +7 -0
- emdash_core/embeddings/providers/base.py +112 -0
- emdash_core/embeddings/providers/fireworks.py +141 -0
- emdash_core/embeddings/providers/openai.py +104 -0
- emdash_core/embeddings/registry.py +146 -0
- emdash_core/embeddings/service.py +215 -0
- emdash_core/graph/__init__.py +26 -0
- emdash_core/graph/builder.py +134 -0
- emdash_core/graph/connection.py +692 -0
- emdash_core/graph/schema.py +416 -0
- emdash_core/graph/writer.py +667 -0
- emdash_core/ingestion/__init__.py +7 -0
- emdash_core/ingestion/change_detector.py +150 -0
- emdash_core/ingestion/git/__init__.py +5 -0
- emdash_core/ingestion/git/commit_analyzer.py +196 -0
- emdash_core/ingestion/github/__init__.py +6 -0
- emdash_core/ingestion/github/pr_fetcher.py +296 -0
- emdash_core/ingestion/github/task_extractor.py +100 -0
- emdash_core/ingestion/orchestrator.py +540 -0
- emdash_core/ingestion/parsers/__init__.py +10 -0
- emdash_core/ingestion/parsers/base_parser.py +66 -0
- emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
- emdash_core/ingestion/parsers/class_extractor.py +154 -0
- emdash_core/ingestion/parsers/function_extractor.py +202 -0
- emdash_core/ingestion/parsers/import_analyzer.py +119 -0
- emdash_core/ingestion/parsers/python_parser.py +123 -0
- emdash_core/ingestion/parsers/registry.py +72 -0
- emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
- emdash_core/ingestion/parsers/typescript_parser.py +278 -0
- emdash_core/ingestion/repository.py +346 -0
- emdash_core/models/__init__.py +38 -0
- emdash_core/models/agent.py +68 -0
- emdash_core/models/index.py +77 -0
- emdash_core/models/query.py +113 -0
- emdash_core/planning/__init__.py +7 -0
- emdash_core/planning/agent_api.py +413 -0
- emdash_core/planning/context_builder.py +265 -0
- emdash_core/planning/feature_context.py +232 -0
- emdash_core/planning/feature_expander.py +646 -0
- emdash_core/planning/llm_explainer.py +198 -0
- emdash_core/planning/similarity.py +509 -0
- emdash_core/planning/team_focus.py +821 -0
- emdash_core/server.py +153 -0
- emdash_core/sse/__init__.py +5 -0
- emdash_core/sse/stream.py +196 -0
- emdash_core/swarm/__init__.py +17 -0
- emdash_core/swarm/merge_agent.py +383 -0
- emdash_core/swarm/session_manager.py +274 -0
- emdash_core/swarm/swarm_runner.py +226 -0
- emdash_core/swarm/task_definition.py +137 -0
- emdash_core/swarm/worker_spawner.py +319 -0
- emdash_core/swarm/worktree_manager.py +278 -0
- emdash_core/templates/__init__.py +10 -0
- emdash_core/templates/defaults/agent-builder.md.template +82 -0
- emdash_core/templates/defaults/focus.md.template +115 -0
- emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
- emdash_core/templates/defaults/pr-review.md.template +80 -0
- emdash_core/templates/defaults/project.md.template +85 -0
- emdash_core/templates/defaults/research_critic.md.template +112 -0
- emdash_core/templates/defaults/research_planner.md.template +85 -0
- emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
- emdash_core/templates/defaults/reviewer.md.template +81 -0
- emdash_core/templates/defaults/spec.md.template +41 -0
- emdash_core/templates/defaults/tasks.md.template +78 -0
- emdash_core/templates/loader.py +296 -0
- emdash_core/utils/__init__.py +45 -0
- emdash_core/utils/git.py +84 -0
- emdash_core/utils/image.py +502 -0
- emdash_core/utils/logger.py +51 -0
- emdash_core-0.1.7.dist-info/METADATA +35 -0
- emdash_core-0.1.7.dist-info/RECORD +187 -0
- emdash_core-0.1.7.dist-info/WHEEL +4 -0
- emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
"""Researcher agent for executing tool macros and collecting evidence.
|
|
2
|
+
|
|
3
|
+
The Researcher executes tool macros to gather evidence for research questions.
|
|
4
|
+
It produces:
|
|
5
|
+
- EvidenceItem list (machine-verifiable)
|
|
6
|
+
- Claim list (grounded statements)
|
|
7
|
+
- Gap list (explicit unknowns)
|
|
8
|
+
|
|
9
|
+
Team values enforced:
|
|
10
|
+
- V1: Prefer "unknown" over guesses
|
|
11
|
+
- V2: All evidence is reproducible (tool calls documented)
|
|
12
|
+
- V4: Cost awareness (uses budget limits)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
from rich.console import Console
|
|
19
|
+
|
|
20
|
+
from ..toolkit import AgentToolkit
|
|
21
|
+
from ..providers import get_provider
|
|
22
|
+
from ..providers.factory import DEFAULT_MODEL
|
|
23
|
+
from ...core.config import get_config
|
|
24
|
+
from ..events import AgentEventEmitter, NullEmitter
|
|
25
|
+
from .state import (
|
|
26
|
+
EvidenceItem,
|
|
27
|
+
Claim,
|
|
28
|
+
Gap,
|
|
29
|
+
ResearchQuestion,
|
|
30
|
+
ResearchPlan,
|
|
31
|
+
)
|
|
32
|
+
from .macros import (
|
|
33
|
+
MacroExecutor,
|
|
34
|
+
suggest_macros,
|
|
35
|
+
TOOL_MACROS,
|
|
36
|
+
)
|
|
37
|
+
from ..compaction import LLMCompactor
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
RESEARCHER_SYSTEM_PROMPT = """You are a research analyst that extracts claims from collected evidence.
|
|
41
|
+
|
|
42
|
+
CRITICAL RULES:
|
|
43
|
+
1. EVERY piece of evidence contains findings - extract them!
|
|
44
|
+
2. NEVER say "X results were found but not examined" - READ the evidence details
|
|
45
|
+
3. Each claim MUST cite evidence IDs (e.g., "based on E1, E3")
|
|
46
|
+
4. Report WHAT was found, not just that something was found
|
|
47
|
+
|
|
48
|
+
You will receive:
|
|
49
|
+
1. A research goal
|
|
50
|
+
2. Questions to answer
|
|
51
|
+
3. DETAILED evidence with actual entities, names, and content
|
|
52
|
+
|
|
53
|
+
YOUR JOB - Extract claims from evidence:
|
|
54
|
+
1. READ each evidence item carefully - it contains actual names and details
|
|
55
|
+
2. MAKE CLAIMS about what the evidence shows (entities, patterns, relationships)
|
|
56
|
+
3. If the exact topic wasn't found, claim what WAS found (it's still valuable)
|
|
57
|
+
4. Identify GAPS only for questions with truly no relevant evidence
|
|
58
|
+
|
|
59
|
+
CLAIM FORMAT:
|
|
60
|
+
{
|
|
61
|
+
"id": "C1",
|
|
62
|
+
"statement": "The codebase contains a WebScrapeService class that handles web scraping operations",
|
|
63
|
+
"evidence_ids": ["E1", "E3"],
|
|
64
|
+
"confidence": 2,
|
|
65
|
+
"assumptions": []
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
Confidence levels:
|
|
69
|
+
- 1: Single source, may have assumptions
|
|
70
|
+
- 2: Multiple sources corroborate (requires 2+ evidence_ids)
|
|
71
|
+
- 3: Strong evidence, no assumptions
|
|
72
|
+
|
|
73
|
+
GAP FORMAT (only when evidence truly missing):
|
|
74
|
+
{
|
|
75
|
+
"question": "What tests cover the toolkit?",
|
|
76
|
+
"reason": "No test files found in any search results",
|
|
77
|
+
"suggested_tools": ["text_search"]
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
OUTPUT FORMAT (JSON only, no markdown):
|
|
81
|
+
{
|
|
82
|
+
"claims": [...],
|
|
83
|
+
"gaps": [...],
|
|
84
|
+
"summary": "Brief summary of key findings"
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
IMPORTANT: Evidence contains ACTUAL entity names and details. Extract them into claims."""
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ResearcherAgent:
|
|
91
|
+
"""Executes tool macros, collects evidence, proposes claims.
|
|
92
|
+
|
|
93
|
+
The Researcher is the "hands" of the research process. It:
|
|
94
|
+
1. Runs appropriate tool macros for each question
|
|
95
|
+
2. Collects evidence with unique IDs
|
|
96
|
+
3. Proposes claims grounded in evidence
|
|
97
|
+
4. Identifies gaps where evidence is missing
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __init__(
|
|
101
|
+
self,
|
|
102
|
+
model: str = DEFAULT_MODEL,
|
|
103
|
+
verbose: bool = True,
|
|
104
|
+
emitter: Optional[AgentEventEmitter] = None,
|
|
105
|
+
):
|
|
106
|
+
"""Initialize the researcher agent.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
model: LLM model to use for claim generation
|
|
110
|
+
verbose: Whether to print progress
|
|
111
|
+
emitter: Event emitter for unified message stream
|
|
112
|
+
"""
|
|
113
|
+
self.provider = get_provider(model)
|
|
114
|
+
self.model = model
|
|
115
|
+
self.verbose = verbose
|
|
116
|
+
self.console = Console()
|
|
117
|
+
self.toolkit = AgentToolkit(enable_session=True)
|
|
118
|
+
self.macro_executor = MacroExecutor(self.toolkit)
|
|
119
|
+
self.claim_counter = 0
|
|
120
|
+
self.emitter = emitter or NullEmitter(agent_name="ResearcherAgent")
|
|
121
|
+
self.compactor = LLMCompactor(self.provider)
|
|
122
|
+
|
|
123
|
+
# Check if GitHub MCP is available
|
|
124
|
+
config = get_config()
|
|
125
|
+
self._mcp_available = config.mcp.is_available
|
|
126
|
+
|
|
127
|
+
def run_macros(
|
|
128
|
+
self,
|
|
129
|
+
questions: list[ResearchQuestion],
|
|
130
|
+
context: dict,
|
|
131
|
+
budget: dict,
|
|
132
|
+
) -> tuple[list[EvidenceItem], dict]:
|
|
133
|
+
"""Execute appropriate macros for questions.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
questions: Research questions to investigate
|
|
137
|
+
context: Prior context from previous iterations
|
|
138
|
+
budget: Remaining budget {tool_calls, tokens, time_s}
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Tuple of (evidence_items, updated_context)
|
|
142
|
+
"""
|
|
143
|
+
all_evidence: list[EvidenceItem] = []
|
|
144
|
+
updated_context = dict(context)
|
|
145
|
+
budget_remaining = budget.get("tool_calls", 50)
|
|
146
|
+
|
|
147
|
+
for question in questions:
|
|
148
|
+
if budget_remaining <= 0:
|
|
149
|
+
if self.verbose:
|
|
150
|
+
self.console.print("[yellow]Budget exhausted[/yellow]")
|
|
151
|
+
break
|
|
152
|
+
|
|
153
|
+
if self.verbose:
|
|
154
|
+
self.console.print(f"[dim]Investigating: {question.question}[/dim]")
|
|
155
|
+
|
|
156
|
+
# Extract topic from question
|
|
157
|
+
topic = self._extract_topic(question.question)
|
|
158
|
+
|
|
159
|
+
# First, try direct search to bootstrap context
|
|
160
|
+
if not updated_context.get("last_search_results"):
|
|
161
|
+
evidence, ctx_updates, budget_remaining = self._bootstrap_search(
|
|
162
|
+
topic=topic,
|
|
163
|
+
budget_remaining=budget_remaining,
|
|
164
|
+
)
|
|
165
|
+
all_evidence.extend(evidence)
|
|
166
|
+
updated_context.update(ctx_updates)
|
|
167
|
+
|
|
168
|
+
# Then try macros if we have context
|
|
169
|
+
if budget_remaining > 0 and updated_context.get("last_search_results"):
|
|
170
|
+
macros_to_run = question.suggested_tools or suggest_macros(
|
|
171
|
+
question.question,
|
|
172
|
+
include_github=self._mcp_available
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
for macro_name in macros_to_run:
|
|
176
|
+
if budget_remaining <= 0:
|
|
177
|
+
break
|
|
178
|
+
|
|
179
|
+
if macro_name not in TOOL_MACROS:
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
params = {"topic": topic, "symbol": topic}
|
|
183
|
+
|
|
184
|
+
if "last_search_top" in updated_context:
|
|
185
|
+
top_result = updated_context["last_search_top"]
|
|
186
|
+
params["identifier"] = top_result.get("qualified_name", topic)
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
evidence, ctx_updates = self.macro_executor.execute_macro(
|
|
190
|
+
macro_name=macro_name,
|
|
191
|
+
params=params,
|
|
192
|
+
budget_remaining=budget_remaining,
|
|
193
|
+
prior_context=updated_context,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
all_evidence.extend(evidence)
|
|
197
|
+
updated_context.update(ctx_updates)
|
|
198
|
+
budget_remaining -= len(evidence)
|
|
199
|
+
|
|
200
|
+
if self.verbose:
|
|
201
|
+
self.console.print(f" [green]{macro_name}: {len(evidence)} evidence[/green]")
|
|
202
|
+
|
|
203
|
+
except Exception as e:
|
|
204
|
+
if self.verbose:
|
|
205
|
+
self.console.print(f" [red]{macro_name}: {e}[/red]")
|
|
206
|
+
|
|
207
|
+
return all_evidence, updated_context
|
|
208
|
+
|
|
209
|
+
def _bootstrap_search(
|
|
210
|
+
self,
|
|
211
|
+
topic: str,
|
|
212
|
+
budget_remaining: int,
|
|
213
|
+
) -> tuple[list[EvidenceItem], dict, int]:
|
|
214
|
+
"""Bootstrap context with direct search calls.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
topic: The topic to search for
|
|
218
|
+
budget_remaining: Remaining tool call budget
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Tuple of (evidence, context_updates, remaining_budget)
|
|
222
|
+
"""
|
|
223
|
+
evidence: list[EvidenceItem] = []
|
|
224
|
+
context: dict = {}
|
|
225
|
+
|
|
226
|
+
# Try semantic search
|
|
227
|
+
try:
|
|
228
|
+
result = self.toolkit.execute(
|
|
229
|
+
"semantic_search",
|
|
230
|
+
query=topic,
|
|
231
|
+
limit=10,
|
|
232
|
+
min_score=0.3,
|
|
233
|
+
)
|
|
234
|
+
budget_remaining -= 1
|
|
235
|
+
|
|
236
|
+
if result.success and result.data.get("results"):
|
|
237
|
+
self.macro_executor.evidence_counter += 1
|
|
238
|
+
ev = EvidenceItem(
|
|
239
|
+
id=f"E{self.macro_executor.evidence_counter}",
|
|
240
|
+
tool="semantic_search",
|
|
241
|
+
input={"query": topic, "limit": 10},
|
|
242
|
+
output_ref=f"result_{self.macro_executor.evidence_counter}",
|
|
243
|
+
summary=f"Search for '{topic}': {len(result.data['results'])} results",
|
|
244
|
+
entities=[r.get("qualified_name", "") for r in result.data["results"][:5]],
|
|
245
|
+
)
|
|
246
|
+
evidence.append(ev)
|
|
247
|
+
|
|
248
|
+
context["last_search_results"] = result.data["results"]
|
|
249
|
+
context["last_search_top"] = result.data["results"][0]
|
|
250
|
+
|
|
251
|
+
if self.verbose:
|
|
252
|
+
self.console.print(f" [green]search: {len(result.data['results'])} results[/green]")
|
|
253
|
+
|
|
254
|
+
except Exception as e:
|
|
255
|
+
if self.verbose:
|
|
256
|
+
self.console.print(f" [dim]search failed: {e}[/dim]")
|
|
257
|
+
|
|
258
|
+
return evidence, context, budget_remaining
|
|
259
|
+
|
|
260
|
+
def propose_claims(
|
|
261
|
+
self,
|
|
262
|
+
goal: str,
|
|
263
|
+
questions: list[ResearchQuestion],
|
|
264
|
+
evidence: list[EvidenceItem],
|
|
265
|
+
prior_claims: list[Claim] = None,
|
|
266
|
+
) -> list[Claim]:
|
|
267
|
+
"""Generate claims grounded in evidence.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
goal: Research goal
|
|
271
|
+
questions: Questions being answered
|
|
272
|
+
evidence: Evidence collected
|
|
273
|
+
prior_claims: Claims from previous iterations
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
List of new Claims
|
|
277
|
+
"""
|
|
278
|
+
if not evidence:
|
|
279
|
+
return []
|
|
280
|
+
|
|
281
|
+
# Build evidence summary for LLM
|
|
282
|
+
evidence_text = "\n".join([
|
|
283
|
+
f"[{e.id}] {e.tool}: {e.summary}"
|
|
284
|
+
for e in evidence
|
|
285
|
+
])
|
|
286
|
+
|
|
287
|
+
questions_text = "\n".join([
|
|
288
|
+
f"- [{q.priority}] {q.question}"
|
|
289
|
+
for q in questions
|
|
290
|
+
])
|
|
291
|
+
|
|
292
|
+
prior_text = ""
|
|
293
|
+
if prior_claims:
|
|
294
|
+
prior_text = "\nPRIOR CLAIMS:\n" + "\n".join([
|
|
295
|
+
f"- {c.statement}" for c in prior_claims
|
|
296
|
+
])
|
|
297
|
+
|
|
298
|
+
user_message = f"""Extract claims from evidence.
|
|
299
|
+
|
|
300
|
+
GOAL: {goal}
|
|
301
|
+
|
|
302
|
+
QUESTIONS:
|
|
303
|
+
{questions_text}
|
|
304
|
+
|
|
305
|
+
EVIDENCE:
|
|
306
|
+
{evidence_text}
|
|
307
|
+
{prior_text}
|
|
308
|
+
|
|
309
|
+
Return JSON with claims and gaps."""
|
|
310
|
+
|
|
311
|
+
messages = [{"role": "user", "content": user_message}]
|
|
312
|
+
response = self.provider.chat(messages, system=RESEARCHER_SYSTEM_PROMPT)
|
|
313
|
+
|
|
314
|
+
return self._parse_claims(response.content or "", evidence)
|
|
315
|
+
|
|
316
|
+
def _parse_claims(
|
|
317
|
+
self,
|
|
318
|
+
content: str,
|
|
319
|
+
evidence: list[EvidenceItem],
|
|
320
|
+
) -> list[Claim]:
|
|
321
|
+
"""Parse claims from LLM response."""
|
|
322
|
+
claims = []
|
|
323
|
+
evidence_ids = {e.id for e in evidence}
|
|
324
|
+
|
|
325
|
+
try:
|
|
326
|
+
json_str = content
|
|
327
|
+
if "```" in content:
|
|
328
|
+
start = content.find("```")
|
|
329
|
+
end = content.find("```", start + 3)
|
|
330
|
+
if end > start:
|
|
331
|
+
json_str = content[start + 3:end]
|
|
332
|
+
if json_str.startswith("json"):
|
|
333
|
+
json_str = json_str[4:]
|
|
334
|
+
|
|
335
|
+
data = json.loads(json_str.strip())
|
|
336
|
+
|
|
337
|
+
for c in data.get("claims", []):
|
|
338
|
+
claim_evidence = c.get("evidence_ids", [])
|
|
339
|
+
valid_evidence = [eid for eid in claim_evidence if eid in evidence_ids]
|
|
340
|
+
|
|
341
|
+
if not valid_evidence:
|
|
342
|
+
continue
|
|
343
|
+
|
|
344
|
+
self.claim_counter += 1
|
|
345
|
+
|
|
346
|
+
confidence = c.get("confidence", 1)
|
|
347
|
+
assumptions = c.get("assumptions", [])
|
|
348
|
+
|
|
349
|
+
if confidence >= 2 and len(valid_evidence) < 2:
|
|
350
|
+
confidence = 1
|
|
351
|
+
if assumptions and confidence > 1:
|
|
352
|
+
confidence = 1
|
|
353
|
+
|
|
354
|
+
try:
|
|
355
|
+
claim = Claim(
|
|
356
|
+
id=c.get("id", f"C{self.claim_counter}"),
|
|
357
|
+
statement=c["statement"],
|
|
358
|
+
evidence_ids=valid_evidence,
|
|
359
|
+
confidence=confidence,
|
|
360
|
+
assumptions=assumptions,
|
|
361
|
+
)
|
|
362
|
+
claims.append(claim)
|
|
363
|
+
except ValueError:
|
|
364
|
+
pass
|
|
365
|
+
|
|
366
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
367
|
+
pass
|
|
368
|
+
|
|
369
|
+
return claims
|
|
370
|
+
|
|
371
|
+
def identify_gaps(
|
|
372
|
+
self,
|
|
373
|
+
plan: ResearchPlan,
|
|
374
|
+
claims: list[Claim],
|
|
375
|
+
evidence: list[EvidenceItem],
|
|
376
|
+
) -> list[Gap]:
|
|
377
|
+
"""Identify questions that couldn't be answered."""
|
|
378
|
+
gaps = []
|
|
379
|
+
|
|
380
|
+
for question in plan.questions:
|
|
381
|
+
if question.priority == "P2":
|
|
382
|
+
continue
|
|
383
|
+
|
|
384
|
+
# Check if any claim addresses this question
|
|
385
|
+
question_addressed = False
|
|
386
|
+
question_lower = question.question.lower()
|
|
387
|
+
|
|
388
|
+
for claim in claims:
|
|
389
|
+
claim_lower = claim.statement.lower()
|
|
390
|
+
keywords = self._extract_keywords(question_lower)
|
|
391
|
+
|
|
392
|
+
if any(kw in claim_lower for kw in keywords):
|
|
393
|
+
question_addressed = True
|
|
394
|
+
break
|
|
395
|
+
|
|
396
|
+
if not question_addressed:
|
|
397
|
+
gaps.append(Gap(
|
|
398
|
+
question=question.question,
|
|
399
|
+
reason="No relevant claims found",
|
|
400
|
+
suggested_tools=question.suggested_tools or ["semantic_search"],
|
|
401
|
+
))
|
|
402
|
+
|
|
403
|
+
return gaps
|
|
404
|
+
|
|
405
|
+
def _extract_topic(self, question: str) -> str:
|
|
406
|
+
"""Extract the main topic from a question."""
|
|
407
|
+
topic = question.lower()
|
|
408
|
+
patterns = [
|
|
409
|
+
"what is the feature/behavior of ",
|
|
410
|
+
"where is ",
|
|
411
|
+
"what depends on ",
|
|
412
|
+
"who owns or touches ",
|
|
413
|
+
"what's risky about ",
|
|
414
|
+
"what tests/ci validate ",
|
|
415
|
+
"what should a reviewer check for ",
|
|
416
|
+
"how does ",
|
|
417
|
+
"what is ",
|
|
418
|
+
]
|
|
419
|
+
|
|
420
|
+
for pattern in patterns:
|
|
421
|
+
if topic.startswith(pattern):
|
|
422
|
+
topic = topic[len(pattern):]
|
|
423
|
+
break
|
|
424
|
+
|
|
425
|
+
return topic.rstrip("?!.").strip() or question
|
|
426
|
+
|
|
427
|
+
def _extract_keywords(self, text: str) -> list[str]:
|
|
428
|
+
"""Extract keywords from text for matching."""
|
|
429
|
+
stopwords = {
|
|
430
|
+
"the", "a", "an", "is", "are", "was", "were", "be",
|
|
431
|
+
"to", "of", "in", "for", "on", "with", "at", "by",
|
|
432
|
+
"what", "where", "who", "how", "why", "when",
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
words = text.lower().split()
|
|
436
|
+
return [w.strip("?!.,") for w in words if w not in stopwords and len(w) > 2]
|