emdash-core 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. emdash_core/__init__.py +3 -0
  2. emdash_core/agent/__init__.py +37 -0
  3. emdash_core/agent/agents.py +225 -0
  4. emdash_core/agent/code_reviewer.py +476 -0
  5. emdash_core/agent/compaction.py +143 -0
  6. emdash_core/agent/context_manager.py +140 -0
  7. emdash_core/agent/events.py +338 -0
  8. emdash_core/agent/handlers.py +224 -0
  9. emdash_core/agent/inprocess_subagent.py +377 -0
  10. emdash_core/agent/mcp/__init__.py +50 -0
  11. emdash_core/agent/mcp/client.py +346 -0
  12. emdash_core/agent/mcp/config.py +302 -0
  13. emdash_core/agent/mcp/manager.py +496 -0
  14. emdash_core/agent/mcp/tool_factory.py +213 -0
  15. emdash_core/agent/prompts/__init__.py +38 -0
  16. emdash_core/agent/prompts/main_agent.py +104 -0
  17. emdash_core/agent/prompts/subagents.py +131 -0
  18. emdash_core/agent/prompts/workflow.py +136 -0
  19. emdash_core/agent/providers/__init__.py +34 -0
  20. emdash_core/agent/providers/base.py +143 -0
  21. emdash_core/agent/providers/factory.py +80 -0
  22. emdash_core/agent/providers/models.py +220 -0
  23. emdash_core/agent/providers/openai_provider.py +463 -0
  24. emdash_core/agent/providers/transformers_provider.py +217 -0
  25. emdash_core/agent/research/__init__.py +81 -0
  26. emdash_core/agent/research/agent.py +143 -0
  27. emdash_core/agent/research/controller.py +254 -0
  28. emdash_core/agent/research/critic.py +428 -0
  29. emdash_core/agent/research/macros.py +469 -0
  30. emdash_core/agent/research/planner.py +449 -0
  31. emdash_core/agent/research/researcher.py +436 -0
  32. emdash_core/agent/research/state.py +523 -0
  33. emdash_core/agent/research/synthesizer.py +594 -0
  34. emdash_core/agent/reviewer_profile.py +475 -0
  35. emdash_core/agent/rules.py +123 -0
  36. emdash_core/agent/runner.py +601 -0
  37. emdash_core/agent/session.py +262 -0
  38. emdash_core/agent/spec_schema.py +66 -0
  39. emdash_core/agent/specification.py +479 -0
  40. emdash_core/agent/subagent.py +397 -0
  41. emdash_core/agent/subagent_prompts.py +13 -0
  42. emdash_core/agent/toolkit.py +482 -0
  43. emdash_core/agent/toolkits/__init__.py +64 -0
  44. emdash_core/agent/toolkits/base.py +96 -0
  45. emdash_core/agent/toolkits/explore.py +47 -0
  46. emdash_core/agent/toolkits/plan.py +55 -0
  47. emdash_core/agent/tools/__init__.py +141 -0
  48. emdash_core/agent/tools/analytics.py +436 -0
  49. emdash_core/agent/tools/base.py +131 -0
  50. emdash_core/agent/tools/coding.py +484 -0
  51. emdash_core/agent/tools/github_mcp.py +592 -0
  52. emdash_core/agent/tools/history.py +13 -0
  53. emdash_core/agent/tools/modes.py +153 -0
  54. emdash_core/agent/tools/plan.py +206 -0
  55. emdash_core/agent/tools/plan_write.py +135 -0
  56. emdash_core/agent/tools/search.py +412 -0
  57. emdash_core/agent/tools/spec.py +341 -0
  58. emdash_core/agent/tools/task.py +262 -0
  59. emdash_core/agent/tools/task_output.py +204 -0
  60. emdash_core/agent/tools/tasks.py +454 -0
  61. emdash_core/agent/tools/traversal.py +588 -0
  62. emdash_core/agent/tools/web.py +179 -0
  63. emdash_core/analytics/__init__.py +5 -0
  64. emdash_core/analytics/engine.py +1286 -0
  65. emdash_core/api/__init__.py +5 -0
  66. emdash_core/api/agent.py +308 -0
  67. emdash_core/api/agents.py +154 -0
  68. emdash_core/api/analyze.py +264 -0
  69. emdash_core/api/auth.py +173 -0
  70. emdash_core/api/context.py +77 -0
  71. emdash_core/api/db.py +121 -0
  72. emdash_core/api/embed.py +131 -0
  73. emdash_core/api/feature.py +143 -0
  74. emdash_core/api/health.py +93 -0
  75. emdash_core/api/index.py +162 -0
  76. emdash_core/api/plan.py +110 -0
  77. emdash_core/api/projectmd.py +210 -0
  78. emdash_core/api/query.py +320 -0
  79. emdash_core/api/research.py +122 -0
  80. emdash_core/api/review.py +161 -0
  81. emdash_core/api/router.py +76 -0
  82. emdash_core/api/rules.py +116 -0
  83. emdash_core/api/search.py +119 -0
  84. emdash_core/api/spec.py +99 -0
  85. emdash_core/api/swarm.py +223 -0
  86. emdash_core/api/tasks.py +109 -0
  87. emdash_core/api/team.py +120 -0
  88. emdash_core/auth/__init__.py +17 -0
  89. emdash_core/auth/github.py +389 -0
  90. emdash_core/config.py +74 -0
  91. emdash_core/context/__init__.py +52 -0
  92. emdash_core/context/models.py +50 -0
  93. emdash_core/context/providers/__init__.py +11 -0
  94. emdash_core/context/providers/base.py +74 -0
  95. emdash_core/context/providers/explored_areas.py +183 -0
  96. emdash_core/context/providers/touched_areas.py +360 -0
  97. emdash_core/context/registry.py +73 -0
  98. emdash_core/context/reranker.py +199 -0
  99. emdash_core/context/service.py +260 -0
  100. emdash_core/context/session.py +352 -0
  101. emdash_core/core/__init__.py +104 -0
  102. emdash_core/core/config.py +454 -0
  103. emdash_core/core/exceptions.py +55 -0
  104. emdash_core/core/models.py +265 -0
  105. emdash_core/core/review_config.py +57 -0
  106. emdash_core/db/__init__.py +67 -0
  107. emdash_core/db/auth.py +134 -0
  108. emdash_core/db/models.py +91 -0
  109. emdash_core/db/provider.py +222 -0
  110. emdash_core/db/providers/__init__.py +5 -0
  111. emdash_core/db/providers/supabase.py +452 -0
  112. emdash_core/embeddings/__init__.py +24 -0
  113. emdash_core/embeddings/indexer.py +534 -0
  114. emdash_core/embeddings/models.py +192 -0
  115. emdash_core/embeddings/providers/__init__.py +7 -0
  116. emdash_core/embeddings/providers/base.py +112 -0
  117. emdash_core/embeddings/providers/fireworks.py +141 -0
  118. emdash_core/embeddings/providers/openai.py +104 -0
  119. emdash_core/embeddings/registry.py +146 -0
  120. emdash_core/embeddings/service.py +215 -0
  121. emdash_core/graph/__init__.py +26 -0
  122. emdash_core/graph/builder.py +134 -0
  123. emdash_core/graph/connection.py +692 -0
  124. emdash_core/graph/schema.py +416 -0
  125. emdash_core/graph/writer.py +667 -0
  126. emdash_core/ingestion/__init__.py +7 -0
  127. emdash_core/ingestion/change_detector.py +150 -0
  128. emdash_core/ingestion/git/__init__.py +5 -0
  129. emdash_core/ingestion/git/commit_analyzer.py +196 -0
  130. emdash_core/ingestion/github/__init__.py +6 -0
  131. emdash_core/ingestion/github/pr_fetcher.py +296 -0
  132. emdash_core/ingestion/github/task_extractor.py +100 -0
  133. emdash_core/ingestion/orchestrator.py +540 -0
  134. emdash_core/ingestion/parsers/__init__.py +10 -0
  135. emdash_core/ingestion/parsers/base_parser.py +66 -0
  136. emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
  137. emdash_core/ingestion/parsers/class_extractor.py +154 -0
  138. emdash_core/ingestion/parsers/function_extractor.py +202 -0
  139. emdash_core/ingestion/parsers/import_analyzer.py +119 -0
  140. emdash_core/ingestion/parsers/python_parser.py +123 -0
  141. emdash_core/ingestion/parsers/registry.py +72 -0
  142. emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
  143. emdash_core/ingestion/parsers/typescript_parser.py +278 -0
  144. emdash_core/ingestion/repository.py +346 -0
  145. emdash_core/models/__init__.py +38 -0
  146. emdash_core/models/agent.py +68 -0
  147. emdash_core/models/index.py +77 -0
  148. emdash_core/models/query.py +113 -0
  149. emdash_core/planning/__init__.py +7 -0
  150. emdash_core/planning/agent_api.py +413 -0
  151. emdash_core/planning/context_builder.py +265 -0
  152. emdash_core/planning/feature_context.py +232 -0
  153. emdash_core/planning/feature_expander.py +646 -0
  154. emdash_core/planning/llm_explainer.py +198 -0
  155. emdash_core/planning/similarity.py +509 -0
  156. emdash_core/planning/team_focus.py +821 -0
  157. emdash_core/server.py +153 -0
  158. emdash_core/sse/__init__.py +5 -0
  159. emdash_core/sse/stream.py +196 -0
  160. emdash_core/swarm/__init__.py +17 -0
  161. emdash_core/swarm/merge_agent.py +383 -0
  162. emdash_core/swarm/session_manager.py +274 -0
  163. emdash_core/swarm/swarm_runner.py +226 -0
  164. emdash_core/swarm/task_definition.py +137 -0
  165. emdash_core/swarm/worker_spawner.py +319 -0
  166. emdash_core/swarm/worktree_manager.py +278 -0
  167. emdash_core/templates/__init__.py +10 -0
  168. emdash_core/templates/defaults/agent-builder.md.template +82 -0
  169. emdash_core/templates/defaults/focus.md.template +115 -0
  170. emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
  171. emdash_core/templates/defaults/pr-review.md.template +80 -0
  172. emdash_core/templates/defaults/project.md.template +85 -0
  173. emdash_core/templates/defaults/research_critic.md.template +112 -0
  174. emdash_core/templates/defaults/research_planner.md.template +85 -0
  175. emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
  176. emdash_core/templates/defaults/reviewer.md.template +81 -0
  177. emdash_core/templates/defaults/spec.md.template +41 -0
  178. emdash_core/templates/defaults/tasks.md.template +78 -0
  179. emdash_core/templates/loader.py +296 -0
  180. emdash_core/utils/__init__.py +45 -0
  181. emdash_core/utils/git.py +84 -0
  182. emdash_core/utils/image.py +502 -0
  183. emdash_core/utils/logger.py +51 -0
  184. emdash_core-0.1.7.dist-info/METADATA +35 -0
  185. emdash_core-0.1.7.dist-info/RECORD +187 -0
  186. emdash_core-0.1.7.dist-info/WHEEL +4 -0
  187. emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,436 @@
1
+ """Researcher agent for executing tool macros and collecting evidence.
2
+
3
+ The Researcher executes tool macros to gather evidence for research questions.
4
+ It produces:
5
+ - EvidenceItem list (machine-verifiable)
6
+ - Claim list (grounded statements)
7
+ - Gap list (explicit unknowns)
8
+
9
+ Team values enforced:
10
+ - V1: Prefer "unknown" over guesses
11
+ - V2: All evidence is reproducible (tool calls documented)
12
+ - V4: Cost awareness (uses budget limits)
13
+ """
14
+
15
+ import json
16
+ from typing import Optional
17
+
18
+ from rich.console import Console
19
+
20
+ from ..toolkit import AgentToolkit
21
+ from ..providers import get_provider
22
+ from ..providers.factory import DEFAULT_MODEL
23
+ from ...core.config import get_config
24
+ from ..events import AgentEventEmitter, NullEmitter
25
+ from .state import (
26
+ EvidenceItem,
27
+ Claim,
28
+ Gap,
29
+ ResearchQuestion,
30
+ ResearchPlan,
31
+ )
32
+ from .macros import (
33
+ MacroExecutor,
34
+ suggest_macros,
35
+ TOOL_MACROS,
36
+ )
37
+ from ..compaction import LLMCompactor
38
+
39
+
40
+ RESEARCHER_SYSTEM_PROMPT = """You are a research analyst that extracts claims from collected evidence.
41
+
42
+ CRITICAL RULES:
43
+ 1. EVERY piece of evidence contains findings - extract them!
44
+ 2. NEVER say "X results were found but not examined" - READ the evidence details
45
+ 3. Each claim MUST cite evidence IDs (e.g., "based on E1, E3")
46
+ 4. Report WHAT was found, not just that something was found
47
+
48
+ You will receive:
49
+ 1. A research goal
50
+ 2. Questions to answer
51
+ 3. DETAILED evidence with actual entities, names, and content
52
+
53
+ YOUR JOB - Extract claims from evidence:
54
+ 1. READ each evidence item carefully - it contains actual names and details
55
+ 2. MAKE CLAIMS about what the evidence shows (entities, patterns, relationships)
56
+ 3. If the exact topic wasn't found, claim what WAS found (it's still valuable)
57
+ 4. Identify GAPS only for questions with truly no relevant evidence
58
+
59
+ CLAIM FORMAT:
60
+ {
61
+ "id": "C1",
62
+ "statement": "The codebase contains a WebScrapeService class that handles web scraping operations",
63
+ "evidence_ids": ["E1", "E3"],
64
+ "confidence": 2,
65
+ "assumptions": []
66
+ }
67
+
68
+ Confidence levels:
69
+ - 1: Single source, may have assumptions
70
+ - 2: Multiple sources corroborate (requires 2+ evidence_ids)
71
+ - 3: Strong evidence, no assumptions
72
+
73
+ GAP FORMAT (only when evidence truly missing):
74
+ {
75
+ "question": "What tests cover the toolkit?",
76
+ "reason": "No test files found in any search results",
77
+ "suggested_tools": ["text_search"]
78
+ }
79
+
80
+ OUTPUT FORMAT (JSON only, no markdown):
81
+ {
82
+ "claims": [...],
83
+ "gaps": [...],
84
+ "summary": "Brief summary of key findings"
85
+ }
86
+
87
+ IMPORTANT: Evidence contains ACTUAL entity names and details. Extract them into claims."""
88
+
89
+
90
+ class ResearcherAgent:
91
+ """Executes tool macros, collects evidence, proposes claims.
92
+
93
+ The Researcher is the "hands" of the research process. It:
94
+ 1. Runs appropriate tool macros for each question
95
+ 2. Collects evidence with unique IDs
96
+ 3. Proposes claims grounded in evidence
97
+ 4. Identifies gaps where evidence is missing
98
+ """
99
+
100
+ def __init__(
101
+ self,
102
+ model: str = DEFAULT_MODEL,
103
+ verbose: bool = True,
104
+ emitter: Optional[AgentEventEmitter] = None,
105
+ ):
106
+ """Initialize the researcher agent.
107
+
108
+ Args:
109
+ model: LLM model to use for claim generation
110
+ verbose: Whether to print progress
111
+ emitter: Event emitter for unified message stream
112
+ """
113
+ self.provider = get_provider(model)
114
+ self.model = model
115
+ self.verbose = verbose
116
+ self.console = Console()
117
+ self.toolkit = AgentToolkit(enable_session=True)
118
+ self.macro_executor = MacroExecutor(self.toolkit)
119
+ self.claim_counter = 0
120
+ self.emitter = emitter or NullEmitter(agent_name="ResearcherAgent")
121
+ self.compactor = LLMCompactor(self.provider)
122
+
123
+ # Check if GitHub MCP is available
124
+ config = get_config()
125
+ self._mcp_available = config.mcp.is_available
126
+
127
+ def run_macros(
128
+ self,
129
+ questions: list[ResearchQuestion],
130
+ context: dict,
131
+ budget: dict,
132
+ ) -> tuple[list[EvidenceItem], dict]:
133
+ """Execute appropriate macros for questions.
134
+
135
+ Args:
136
+ questions: Research questions to investigate
137
+ context: Prior context from previous iterations
138
+ budget: Remaining budget {tool_calls, tokens, time_s}
139
+
140
+ Returns:
141
+ Tuple of (evidence_items, updated_context)
142
+ """
143
+ all_evidence: list[EvidenceItem] = []
144
+ updated_context = dict(context)
145
+ budget_remaining = budget.get("tool_calls", 50)
146
+
147
+ for question in questions:
148
+ if budget_remaining <= 0:
149
+ if self.verbose:
150
+ self.console.print("[yellow]Budget exhausted[/yellow]")
151
+ break
152
+
153
+ if self.verbose:
154
+ self.console.print(f"[dim]Investigating: {question.question}[/dim]")
155
+
156
+ # Extract topic from question
157
+ topic = self._extract_topic(question.question)
158
+
159
+ # First, try direct search to bootstrap context
160
+ if not updated_context.get("last_search_results"):
161
+ evidence, ctx_updates, budget_remaining = self._bootstrap_search(
162
+ topic=topic,
163
+ budget_remaining=budget_remaining,
164
+ )
165
+ all_evidence.extend(evidence)
166
+ updated_context.update(ctx_updates)
167
+
168
+ # Then try macros if we have context
169
+ if budget_remaining > 0 and updated_context.get("last_search_results"):
170
+ macros_to_run = question.suggested_tools or suggest_macros(
171
+ question.question,
172
+ include_github=self._mcp_available
173
+ )
174
+
175
+ for macro_name in macros_to_run:
176
+ if budget_remaining <= 0:
177
+ break
178
+
179
+ if macro_name not in TOOL_MACROS:
180
+ continue
181
+
182
+ params = {"topic": topic, "symbol": topic}
183
+
184
+ if "last_search_top" in updated_context:
185
+ top_result = updated_context["last_search_top"]
186
+ params["identifier"] = top_result.get("qualified_name", topic)
187
+
188
+ try:
189
+ evidence, ctx_updates = self.macro_executor.execute_macro(
190
+ macro_name=macro_name,
191
+ params=params,
192
+ budget_remaining=budget_remaining,
193
+ prior_context=updated_context,
194
+ )
195
+
196
+ all_evidence.extend(evidence)
197
+ updated_context.update(ctx_updates)
198
+ budget_remaining -= len(evidence)
199
+
200
+ if self.verbose:
201
+ self.console.print(f" [green]{macro_name}: {len(evidence)} evidence[/green]")
202
+
203
+ except Exception as e:
204
+ if self.verbose:
205
+ self.console.print(f" [red]{macro_name}: {e}[/red]")
206
+
207
+ return all_evidence, updated_context
208
+
209
+ def _bootstrap_search(
210
+ self,
211
+ topic: str,
212
+ budget_remaining: int,
213
+ ) -> tuple[list[EvidenceItem], dict, int]:
214
+ """Bootstrap context with direct search calls.
215
+
216
+ Args:
217
+ topic: The topic to search for
218
+ budget_remaining: Remaining tool call budget
219
+
220
+ Returns:
221
+ Tuple of (evidence, context_updates, remaining_budget)
222
+ """
223
+ evidence: list[EvidenceItem] = []
224
+ context: dict = {}
225
+
226
+ # Try semantic search
227
+ try:
228
+ result = self.toolkit.execute(
229
+ "semantic_search",
230
+ query=topic,
231
+ limit=10,
232
+ min_score=0.3,
233
+ )
234
+ budget_remaining -= 1
235
+
236
+ if result.success and result.data.get("results"):
237
+ self.macro_executor.evidence_counter += 1
238
+ ev = EvidenceItem(
239
+ id=f"E{self.macro_executor.evidence_counter}",
240
+ tool="semantic_search",
241
+ input={"query": topic, "limit": 10},
242
+ output_ref=f"result_{self.macro_executor.evidence_counter}",
243
+ summary=f"Search for '{topic}': {len(result.data['results'])} results",
244
+ entities=[r.get("qualified_name", "") for r in result.data["results"][:5]],
245
+ )
246
+ evidence.append(ev)
247
+
248
+ context["last_search_results"] = result.data["results"]
249
+ context["last_search_top"] = result.data["results"][0]
250
+
251
+ if self.verbose:
252
+ self.console.print(f" [green]search: {len(result.data['results'])} results[/green]")
253
+
254
+ except Exception as e:
255
+ if self.verbose:
256
+ self.console.print(f" [dim]search failed: {e}[/dim]")
257
+
258
+ return evidence, context, budget_remaining
259
+
260
+ def propose_claims(
261
+ self,
262
+ goal: str,
263
+ questions: list[ResearchQuestion],
264
+ evidence: list[EvidenceItem],
265
+ prior_claims: list[Claim] = None,
266
+ ) -> list[Claim]:
267
+ """Generate claims grounded in evidence.
268
+
269
+ Args:
270
+ goal: Research goal
271
+ questions: Questions being answered
272
+ evidence: Evidence collected
273
+ prior_claims: Claims from previous iterations
274
+
275
+ Returns:
276
+ List of new Claims
277
+ """
278
+ if not evidence:
279
+ return []
280
+
281
+ # Build evidence summary for LLM
282
+ evidence_text = "\n".join([
283
+ f"[{e.id}] {e.tool}: {e.summary}"
284
+ for e in evidence
285
+ ])
286
+
287
+ questions_text = "\n".join([
288
+ f"- [{q.priority}] {q.question}"
289
+ for q in questions
290
+ ])
291
+
292
+ prior_text = ""
293
+ if prior_claims:
294
+ prior_text = "\nPRIOR CLAIMS:\n" + "\n".join([
295
+ f"- {c.statement}" for c in prior_claims
296
+ ])
297
+
298
+ user_message = f"""Extract claims from evidence.
299
+
300
+ GOAL: {goal}
301
+
302
+ QUESTIONS:
303
+ {questions_text}
304
+
305
+ EVIDENCE:
306
+ {evidence_text}
307
+ {prior_text}
308
+
309
+ Return JSON with claims and gaps."""
310
+
311
+ messages = [{"role": "user", "content": user_message}]
312
+ response = self.provider.chat(messages, system=RESEARCHER_SYSTEM_PROMPT)
313
+
314
+ return self._parse_claims(response.content or "", evidence)
315
+
316
+ def _parse_claims(
317
+ self,
318
+ content: str,
319
+ evidence: list[EvidenceItem],
320
+ ) -> list[Claim]:
321
+ """Parse claims from LLM response."""
322
+ claims = []
323
+ evidence_ids = {e.id for e in evidence}
324
+
325
+ try:
326
+ json_str = content
327
+ if "```" in content:
328
+ start = content.find("```")
329
+ end = content.find("```", start + 3)
330
+ if end > start:
331
+ json_str = content[start + 3:end]
332
+ if json_str.startswith("json"):
333
+ json_str = json_str[4:]
334
+
335
+ data = json.loads(json_str.strip())
336
+
337
+ for c in data.get("claims", []):
338
+ claim_evidence = c.get("evidence_ids", [])
339
+ valid_evidence = [eid for eid in claim_evidence if eid in evidence_ids]
340
+
341
+ if not valid_evidence:
342
+ continue
343
+
344
+ self.claim_counter += 1
345
+
346
+ confidence = c.get("confidence", 1)
347
+ assumptions = c.get("assumptions", [])
348
+
349
+ if confidence >= 2 and len(valid_evidence) < 2:
350
+ confidence = 1
351
+ if assumptions and confidence > 1:
352
+ confidence = 1
353
+
354
+ try:
355
+ claim = Claim(
356
+ id=c.get("id", f"C{self.claim_counter}"),
357
+ statement=c["statement"],
358
+ evidence_ids=valid_evidence,
359
+ confidence=confidence,
360
+ assumptions=assumptions,
361
+ )
362
+ claims.append(claim)
363
+ except ValueError:
364
+ pass
365
+
366
+ except (json.JSONDecodeError, KeyError, TypeError):
367
+ pass
368
+
369
+ return claims
370
+
371
+ def identify_gaps(
372
+ self,
373
+ plan: ResearchPlan,
374
+ claims: list[Claim],
375
+ evidence: list[EvidenceItem],
376
+ ) -> list[Gap]:
377
+ """Identify questions that couldn't be answered."""
378
+ gaps = []
379
+
380
+ for question in plan.questions:
381
+ if question.priority == "P2":
382
+ continue
383
+
384
+ # Check if any claim addresses this question
385
+ question_addressed = False
386
+ question_lower = question.question.lower()
387
+
388
+ for claim in claims:
389
+ claim_lower = claim.statement.lower()
390
+ keywords = self._extract_keywords(question_lower)
391
+
392
+ if any(kw in claim_lower for kw in keywords):
393
+ question_addressed = True
394
+ break
395
+
396
+ if not question_addressed:
397
+ gaps.append(Gap(
398
+ question=question.question,
399
+ reason="No relevant claims found",
400
+ suggested_tools=question.suggested_tools or ["semantic_search"],
401
+ ))
402
+
403
+ return gaps
404
+
405
+ def _extract_topic(self, question: str) -> str:
406
+ """Extract the main topic from a question."""
407
+ topic = question.lower()
408
+ patterns = [
409
+ "what is the feature/behavior of ",
410
+ "where is ",
411
+ "what depends on ",
412
+ "who owns or touches ",
413
+ "what's risky about ",
414
+ "what tests/ci validate ",
415
+ "what should a reviewer check for ",
416
+ "how does ",
417
+ "what is ",
418
+ ]
419
+
420
+ for pattern in patterns:
421
+ if topic.startswith(pattern):
422
+ topic = topic[len(pattern):]
423
+ break
424
+
425
+ return topic.rstrip("?!.").strip() or question
426
+
427
+ def _extract_keywords(self, text: str) -> list[str]:
428
+ """Extract keywords from text for matching."""
429
+ stopwords = {
430
+ "the", "a", "an", "is", "are", "was", "were", "be",
431
+ "to", "of", "in", "for", "on", "with", "at", "by",
432
+ "what", "where", "who", "how", "why", "when",
433
+ }
434
+
435
+ words = text.lower().split()
436
+ return [w.strip("?!.,") for w in words if w not in stopwords and len(w) > 2]