code-finder 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_context/__init__.py +33 -0
- claude_context/agentic_integration.py +309 -0
- claude_context/ast_chunker.py +646 -0
- claude_context/config.py +239 -0
- claude_context/context_manager.py +627 -0
- claude_context/embeddings.py +307 -0
- claude_context/embeddings_interface.py +226 -0
- claude_context/enhanced_ast_chunker.py +1129 -0
- claude_context/explorer.py +951 -0
- claude_context/explorer_with_context.py +1008 -0
- claude_context/indexer.py +893 -0
- claude_context/markdown_chunker.py +421 -0
- claude_context/mode_handler.py +1774 -0
- claude_context/query_metrics.py +164 -0
- claude_context/question_generator.py +800 -0
- claude_context/readme_extractor.py +485 -0
- claude_context/repository_adapter.py +399 -0
- claude_context/search.py +493 -0
- claude_context/skills/__init__.py +11 -0
- claude_context/skills/_cli_common.py +74 -0
- claude_context/skills/_index_manager.py +98 -0
- claude_context/skills/api_surface.py +219 -0
- claude_context/skills/evidence_retrieval.py +151 -0
- claude_context/skills/grounded_review.py +212 -0
- claude_context/synthesis/__init__.py +8 -0
- claude_context/synthesis/editor_agent.py +391 -0
- claude_context/synthesis/llm_synthesizer.py +153 -0
- claude_context/synthesis/logic_explainer.py +235 -0
- claude_context/synthesis/multi_review_pipeline.py +717 -0
- claude_context/synthesis/prompt_builder.py +439 -0
- claude_context/synthesis/providers.py +115 -0
- claude_context/synthesis/validators.py +458 -0
- code_finder-0.1.0.dist-info/METADATA +823 -0
- code_finder-0.1.0.dist-info/RECORD +37 -0
- code_finder-0.1.0.dist-info/WHEEL +5 -0
- code_finder-0.1.0.dist-info/entry_points.txt +4 -0
- code_finder-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,951 @@
|
|
|
1
|
+
|
|
2
|
+
"""anti
|
|
3
|
+
Interactive Explorer for Claude Context
|
|
4
|
+
|
|
5
|
+
Provides an interactive Q&A interface for exploring codebases with a strict
|
|
6
|
+
evidence-first approach. Questions are classified and routed to appropriate
|
|
7
|
+
handlers based on intent, ensuring grounded, accurate responses.
|
|
8
|
+
|
|
9
|
+
Philosophy: Facts → Analysis → Reasoning (only with evidence)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import re
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from typing import List, Dict, Any, Optional, Tuple, Set
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from .search import HybridSearcher, SearchResult, create_hybrid_searcher
|
|
21
|
+
from .config import ClaudeContextConfig, MilvusManager
|
|
22
|
+
from .embeddings import LocalEmbeddings
|
|
23
|
+
from .indexer import RepositoryIndexer
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class QuestionType(Enum):
|
|
29
|
+
"""Classification of question types based on required processing"""
|
|
30
|
+
|
|
31
|
+
# Layer 1: Facts only (search)
|
|
32
|
+
DISCOVERY = "discovery" # What/Where/List questions
|
|
33
|
+
DEFINITION = "definition" # Show me X, Find Y
|
|
34
|
+
|
|
35
|
+
# Layer 2: Analysis required (search + static analysis)
|
|
36
|
+
RELATIONSHIP = "relationship" # How X connects to Y
|
|
37
|
+
FLOW = "flow" # Trace data/control flow
|
|
38
|
+
STRUCTURE = "structure" # Module/architecture questions
|
|
39
|
+
|
|
40
|
+
# Layer 3: Reasoning required (search + analysis + inference)
|
|
41
|
+
REASONING = "reasoning" # Why questions
|
|
42
|
+
PATTERN = "pattern" # Design pattern identification
|
|
43
|
+
EVALUATION = "evaluation" # Quality/improvement questions
|
|
44
|
+
|
|
45
|
+
# Special types
|
|
46
|
+
UNKNOWN = "unknown" # Cannot classify
|
|
47
|
+
CLARIFICATION = "clarification" # Need more info
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class QuestionIntent:
|
|
52
|
+
"""Detailed question intent analysis"""
|
|
53
|
+
primary_type: QuestionType
|
|
54
|
+
confidence: float # 0-1 confidence in classification
|
|
55
|
+
keywords: List[str] # Key terms extracted
|
|
56
|
+
entities: List[str] # Code entities mentioned (classes, functions, etc.)
|
|
57
|
+
scope: str # "specific" or "broad"
|
|
58
|
+
requires_llm: bool # Whether LLM would be beneficial
|
|
59
|
+
suggested_approach: str # How to handle this question
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class QuestionClassifier:
|
|
63
|
+
"""
|
|
64
|
+
Classifies questions to route them to appropriate handlers.
|
|
65
|
+
No LLM required - uses pattern matching and keyword analysis.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(self):
|
|
69
|
+
# Define patterns for each question type
|
|
70
|
+
self.patterns = {
|
|
71
|
+
QuestionType.DISCOVERY: {
|
|
72
|
+
"patterns": [
|
|
73
|
+
r"\b(what|which|list|show|find|where|locate)\b.*\b(is|are|all|every|main|primary)\b",
|
|
74
|
+
r"\b(what|where)\s+(is|are)\s+(\w+)",
|
|
75
|
+
r"\b(show|list|find)\s+(me\s+)?(all\s+)?(\w+)",
|
|
76
|
+
r"\b(main|primary|key|core)\s+(components?|modules?|classes?|functions?)\b"
|
|
77
|
+
],
|
|
78
|
+
"keywords": ["what", "where", "which", "list", "show", "find", "locate", "main", "all"],
|
|
79
|
+
"examples": [
|
|
80
|
+
"What are the main components?",
|
|
81
|
+
"Where is authentication handled?",
|
|
82
|
+
"List all API endpoints",
|
|
83
|
+
"Show me the database models"
|
|
84
|
+
]
|
|
85
|
+
},
|
|
86
|
+
|
|
87
|
+
QuestionType.DEFINITION: {
|
|
88
|
+
"patterns": [
|
|
89
|
+
r"\b(show|display|get|find)\s+(me\s+)?(the\s+)?(\w+)\s+(class|function|method|code)\b",
|
|
90
|
+
r"\b(definition|implementation)\s+of\s+(\w+)",
|
|
91
|
+
r"\bwhat\s+does\s+(\w+)\s+look\s+like\b"
|
|
92
|
+
],
|
|
93
|
+
"keywords": ["show", "display", "definition", "implementation", "code"],
|
|
94
|
+
"examples": [
|
|
95
|
+
"Show me the UserService class",
|
|
96
|
+
"Find the login function",
|
|
97
|
+
"What does PaymentProcessor look like?"
|
|
98
|
+
]
|
|
99
|
+
},
|
|
100
|
+
|
|
101
|
+
QuestionType.RELATIONSHIP: {
|
|
102
|
+
"patterns": [
|
|
103
|
+
r"\b(how|what|which)\b.*\b(connect|interact|relate|call|use|depend|communicate)\b",
|
|
104
|
+
r"\b(relationship|connection|interaction)\s+(between|with|of)\b",
|
|
105
|
+
r"\b(what|which)\s+(calls?|uses?|imports?|depends?\s+on)\s+(\w+)",
|
|
106
|
+
r"\b(\w+)\s+and\s+(\w+)\s+(interact|connect|relate|work\s+together)"
|
|
107
|
+
],
|
|
108
|
+
"keywords": ["connect", "interact", "relate", "call", "use", "depend", "relationship", "between"],
|
|
109
|
+
"examples": [
|
|
110
|
+
"How do OrderService and PaymentService interact?",
|
|
111
|
+
"What calls the authenticate function?",
|
|
112
|
+
"Which components depend on Redis?",
|
|
113
|
+
"How are User and Profile connected?"
|
|
114
|
+
]
|
|
115
|
+
},
|
|
116
|
+
|
|
117
|
+
QuestionType.FLOW: {
|
|
118
|
+
"patterns": [
|
|
119
|
+
r"\b(trace|follow|track)\b.*\b(flow|path|journey|process)\b",
|
|
120
|
+
r"\b(how|what)\b.*\b(flows?|moves?|travels?|processes?|happens?)\b",
|
|
121
|
+
r"\b(data|control|execution)\s+(flow|path)\b",
|
|
122
|
+
r"\bstep[- ]?by[- ]?step\b"
|
|
123
|
+
],
|
|
124
|
+
"keywords": ["trace", "follow", "flow", "path", "journey", "process", "step"],
|
|
125
|
+
"examples": [
|
|
126
|
+
"Trace the login flow",
|
|
127
|
+
"How does data flow from API to database?",
|
|
128
|
+
"Follow the order processing path",
|
|
129
|
+
"What happens step by step when a user signs up?"
|
|
130
|
+
]
|
|
131
|
+
},
|
|
132
|
+
|
|
133
|
+
QuestionType.STRUCTURE: {
|
|
134
|
+
"patterns": [
|
|
135
|
+
r"\b(architecture|structure|organization|layout)\b",
|
|
136
|
+
r"\b(module|package|component)\s+(boundaries?|structure|organization)\b",
|
|
137
|
+
r"\b(how\s+is|what\'s\s+the).*\b(organized|structured|architected|laid\s+out)\b"
|
|
138
|
+
],
|
|
139
|
+
"keywords": ["architecture", "structure", "module", "package", "boundaries", "organization"],
|
|
140
|
+
"examples": [
|
|
141
|
+
"What's the architecture of this system?",
|
|
142
|
+
"How is the codebase organized?",
|
|
143
|
+
"What are the module boundaries?",
|
|
144
|
+
"Explain the package structure"
|
|
145
|
+
]
|
|
146
|
+
},
|
|
147
|
+
|
|
148
|
+
QuestionType.REASONING: {
|
|
149
|
+
"patterns": [
|
|
150
|
+
r"\bwhy\b",
|
|
151
|
+
r"\b(reason|rationale|purpose|motivation)\b",
|
|
152
|
+
r"\b(explain|understand)\s+(the\s+)?(reasoning|thinking|decision)\b",
|
|
153
|
+
r"\bwhat\'s\s+the\s+(point|purpose|reason)\b"
|
|
154
|
+
],
|
|
155
|
+
"keywords": ["why", "reason", "rationale", "purpose", "explain", "because"],
|
|
156
|
+
"examples": [
|
|
157
|
+
"Why does this use Redis instead of memory cache?",
|
|
158
|
+
"What's the reason for this abstraction?",
|
|
159
|
+
"Why is this a separate service?",
|
|
160
|
+
"Explain the rationale behind this pattern"
|
|
161
|
+
]
|
|
162
|
+
},
|
|
163
|
+
|
|
164
|
+
QuestionType.PATTERN: {
|
|
165
|
+
"patterns": [
|
|
166
|
+
r"\b(pattern|design|approach|strategy|technique)\b",
|
|
167
|
+
r"\b(what|which)\s+(pattern|design|approach)\b",
|
|
168
|
+
r"\b(singleton|factory|observer|mvc|repository|strategy)\b"
|
|
169
|
+
],
|
|
170
|
+
"keywords": ["pattern", "design", "approach", "strategy", "technique"],
|
|
171
|
+
"examples": [
|
|
172
|
+
"What design patterns are used?",
|
|
173
|
+
"Is this using the repository pattern?",
|
|
174
|
+
"What's the caching strategy?",
|
|
175
|
+
"Identify the architectural patterns"
|
|
176
|
+
]
|
|
177
|
+
},
|
|
178
|
+
|
|
179
|
+
QuestionType.EVALUATION: {
|
|
180
|
+
"patterns": [
|
|
181
|
+
r"\b(good|bad|better|worse|improve|optimize|refactor)\b",
|
|
182
|
+
r"\b(should|could|would)\b.*\b(be|have)\b",
|
|
183
|
+
r"\b(quality|performance|security|maintainability)\b",
|
|
184
|
+
r"\b(issue|problem|concern|smell|antipattern)\b"
|
|
185
|
+
],
|
|
186
|
+
"keywords": ["good", "bad", "improve", "should", "could", "quality", "issue", "problem"],
|
|
187
|
+
"examples": [
|
|
188
|
+
"How could this be improved?",
|
|
189
|
+
"Is this good design?",
|
|
190
|
+
"What are potential issues?",
|
|
191
|
+
"Should this be refactored?"
|
|
192
|
+
]
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
# Compile regex patterns for efficiency
|
|
197
|
+
for qtype in self.patterns:
|
|
198
|
+
self.patterns[qtype]["compiled"] = [
|
|
199
|
+
re.compile(pattern, re.IGNORECASE)
|
|
200
|
+
for pattern in self.patterns[qtype]["patterns"]
|
|
201
|
+
]
|
|
202
|
+
|
|
203
|
+
def classify(self, question: str) -> QuestionIntent:
|
|
204
|
+
"""
|
|
205
|
+
Classify a question into a type and extract intent details.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
question: The user's question
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
QuestionIntent with classification details
|
|
212
|
+
"""
|
|
213
|
+
# Normalize question
|
|
214
|
+
normalized = question.lower().strip()
|
|
215
|
+
|
|
216
|
+
# Extract entities (capitalized words, code-like terms)
|
|
217
|
+
entities = self._extract_entities(question)
|
|
218
|
+
|
|
219
|
+
# Score each question type
|
|
220
|
+
scores = {}
|
|
221
|
+
matched_keywords = set()
|
|
222
|
+
|
|
223
|
+
for qtype, config in self.patterns.items():
|
|
224
|
+
score = 0.0
|
|
225
|
+
|
|
226
|
+
# Check regex patterns
|
|
227
|
+
for pattern in config["compiled"]:
|
|
228
|
+
if pattern.search(normalized):
|
|
229
|
+
score += 1.0
|
|
230
|
+
|
|
231
|
+
# Check keywords
|
|
232
|
+
for keyword in config["keywords"]:
|
|
233
|
+
if keyword in normalized:
|
|
234
|
+
score += 0.5
|
|
235
|
+
matched_keywords.add(keyword)
|
|
236
|
+
|
|
237
|
+
# Normalize score
|
|
238
|
+
max_possible = len(config["patterns"]) + (len(config["keywords"]) * 0.5)
|
|
239
|
+
scores[qtype] = score / max_possible if max_possible > 0 else 0
|
|
240
|
+
|
|
241
|
+
# Find best match
|
|
242
|
+
if scores:
|
|
243
|
+
best_type = max(scores, key=scores.get)
|
|
244
|
+
confidence = scores[best_type]
|
|
245
|
+
else:
|
|
246
|
+
best_type = QuestionType.UNKNOWN
|
|
247
|
+
confidence = 0.0
|
|
248
|
+
|
|
249
|
+
# Determine if we need LLM
|
|
250
|
+
requires_llm = best_type in [
|
|
251
|
+
QuestionType.REASONING,
|
|
252
|
+
QuestionType.PATTERN,
|
|
253
|
+
QuestionType.EVALUATION
|
|
254
|
+
]
|
|
255
|
+
|
|
256
|
+
# Determine scope
|
|
257
|
+
scope = "specific" if entities else "broad"
|
|
258
|
+
|
|
259
|
+
# Suggest approach
|
|
260
|
+
approach = self._suggest_approach(best_type, entities, list(matched_keywords))
|
|
261
|
+
|
|
262
|
+
return QuestionIntent(
|
|
263
|
+
primary_type=best_type,
|
|
264
|
+
confidence=confidence,
|
|
265
|
+
keywords=list(matched_keywords),
|
|
266
|
+
entities=entities,
|
|
267
|
+
scope=scope,
|
|
268
|
+
requires_llm=requires_llm,
|
|
269
|
+
suggested_approach=approach
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
def _extract_entities(self, question: str) -> List[str]:
|
|
273
|
+
"""Extract potential code entities from question"""
|
|
274
|
+
entities = []
|
|
275
|
+
|
|
276
|
+
# Question words to exclude
|
|
277
|
+
question_words = {
|
|
278
|
+
'what', 'where', 'when', 'why', 'how', 'which', 'who',
|
|
279
|
+
'is', 'are', 'was', 'were', 'do', 'does', 'did',
|
|
280
|
+
'can', 'could', 'should', 'would', 'will',
|
|
281
|
+
'show', 'tell', 'find', 'list', 'get', 'trace', 'explain'
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
# Find CamelCase words (likely class names)
|
|
285
|
+
# But exclude common English words that happen to be capitalized
|
|
286
|
+
camel_case = re.findall(r'\b[A-Z][a-z]+(?:[A-Z][a-z]+)*\b', question)
|
|
287
|
+
for word in camel_case:
|
|
288
|
+
if word.lower() not in question_words:
|
|
289
|
+
entities.append(word)
|
|
290
|
+
|
|
291
|
+
# Find snake_case words (likely function/variable names)
|
|
292
|
+
snake_case = re.findall(r'\b[a-z]+(?:_[a-z]+)+\b', question)
|
|
293
|
+
entities.extend(snake_case)
|
|
294
|
+
|
|
295
|
+
# Find code-like terms in backticks or quotes
|
|
296
|
+
quoted = re.findall(r'[`"\']([^`"\']+)[`"\']', question)
|
|
297
|
+
entities.extend(quoted)
|
|
298
|
+
|
|
299
|
+
# Find words ending in common code suffixes
|
|
300
|
+
# These are almost always code entities
|
|
301
|
+
code_terms = re.findall(r'\b\w+(?:Service|Controller|Model|Handler|Manager|Factory|Repository|Component|Module|Class|Function)\b', question, re.IGNORECASE)
|
|
302
|
+
for term in code_terms:
|
|
303
|
+
# Only add if not already in list and not a question word
|
|
304
|
+
if term not in entities and term.lower() not in question_words:
|
|
305
|
+
entities.append(term)
|
|
306
|
+
|
|
307
|
+
# Also look for common code terms
|
|
308
|
+
# Like: API, URL, HTTP, REST, JSON, XML, etc.
|
|
309
|
+
technical_terms = re.findall(r'\b(?:API|URL|HTTP|REST|JSON|XML|SQL|CSS|HTML|JWT|OAuth|Redis|MongoDB|PostgreSQL)\b', question, re.IGNORECASE)
|
|
310
|
+
entities.extend(technical_terms)
|
|
311
|
+
|
|
312
|
+
# Remove duplicates while preserving order
|
|
313
|
+
seen = set()
|
|
314
|
+
unique_entities = []
|
|
315
|
+
for entity in entities:
|
|
316
|
+
if entity not in seen and entity.lower() not in question_words:
|
|
317
|
+
seen.add(entity)
|
|
318
|
+
unique_entities.append(entity)
|
|
319
|
+
|
|
320
|
+
return unique_entities
|
|
321
|
+
|
|
322
|
+
def _suggest_approach(self, qtype: QuestionType, entities: List[str], keywords: List[str]) -> str:
|
|
323
|
+
"""Suggest how to approach answering this question"""
|
|
324
|
+
|
|
325
|
+
suggestions = {
|
|
326
|
+
QuestionType.DISCOVERY: f"Search for {', '.join(entities) if entities else 'relevant components'} and list findings",
|
|
327
|
+
QuestionType.DEFINITION: f"Locate and display the complete code for {', '.join(entities) if entities else 'the requested item'}",
|
|
328
|
+
QuestionType.RELATIONSHIP: f"Find connections between {' and '.join(entities) if len(entities) >= 2 else 'components'} using dependency analysis",
|
|
329
|
+
QuestionType.FLOW: f"Trace the execution path starting from {entities[0] if entities else 'entry point'}",
|
|
330
|
+
QuestionType.STRUCTURE: "Analyze directory structure and module boundaries",
|
|
331
|
+
QuestionType.REASONING: f"Search for evidence about {', '.join(entities) if entities else 'the topic'}, then infer purpose from context",
|
|
332
|
+
QuestionType.PATTERN: "Identify code patterns and match against known design patterns",
|
|
333
|
+
QuestionType.EVALUATION: f"Analyze {', '.join(entities) if entities else 'code quality'} against best practices",
|
|
334
|
+
QuestionType.UNKNOWN: "Clarify the question or try a broader search",
|
|
335
|
+
QuestionType.CLARIFICATION: "Request more specific information"
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
return suggestions.get(qtype, "Perform general search and analysis")
|
|
339
|
+
|
|
340
|
+
def needs_clarification(self, intent: QuestionIntent) -> bool:
|
|
341
|
+
"""Determine if the question needs clarification"""
|
|
342
|
+
return (
|
|
343
|
+
intent.primary_type == QuestionType.UNKNOWN or
|
|
344
|
+
intent.confidence < 0.3 or
|
|
345
|
+
(intent.scope == "broad" and not intent.keywords)
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
def suggest_clarifications(self, question: str, intent: QuestionIntent) -> List[str]:
|
|
349
|
+
"""Suggest clarifying questions"""
|
|
350
|
+
suggestions = []
|
|
351
|
+
|
|
352
|
+
if intent.primary_type == QuestionType.UNKNOWN:
|
|
353
|
+
suggestions.append("Could you rephrase your question? I'm not sure what you're looking for.")
|
|
354
|
+
|
|
355
|
+
if intent.scope == "broad" and not intent.entities:
|
|
356
|
+
suggestions.append("Which specific component or function are you interested in?")
|
|
357
|
+
|
|
358
|
+
if intent.confidence < 0.5:
|
|
359
|
+
# Suggest example questions based on keywords
|
|
360
|
+
if "how" in question.lower():
|
|
361
|
+
suggestions.append("Are you asking about how components interact, or how something is implemented?")
|
|
362
|
+
elif "what" in question.lower():
|
|
363
|
+
suggestions.append("Are you looking for a list of items, or the definition of something specific?")
|
|
364
|
+
|
|
365
|
+
if not suggestions:
|
|
366
|
+
suggestions.append("Could you provide more context or be more specific?")
|
|
367
|
+
|
|
368
|
+
return suggestions
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
@dataclass
|
|
372
|
+
class ExplorationResult:
|
|
373
|
+
"""Result of exploring a question about the codebase"""
|
|
374
|
+
|
|
375
|
+
question: str
|
|
376
|
+
question_type: QuestionType
|
|
377
|
+
confidence: float
|
|
378
|
+
|
|
379
|
+
# Core results
|
|
380
|
+
answer: str # Natural language answer
|
|
381
|
+
code_results: List[SearchResult] = field(default_factory=list)
|
|
382
|
+
|
|
383
|
+
# Metadata
|
|
384
|
+
entities_found: List[str] = field(default_factory=list)
|
|
385
|
+
search_terms_used: List[str] = field(default_factory=list)
|
|
386
|
+
|
|
387
|
+
# Context for future questions
|
|
388
|
+
follow_up_suggestions: List[str] = field(default_factory=list)
|
|
389
|
+
related_files: List[str] = field(default_factory=list)
|
|
390
|
+
|
|
391
|
+
# External context (for future enhancement)
|
|
392
|
+
external_context: Dict[str, Any] = field(default_factory=dict)
|
|
393
|
+
|
|
394
|
+
# Tracking
|
|
395
|
+
processing_time: float = 0.0
|
|
396
|
+
used_llm: bool = False
|
|
397
|
+
|
|
398
|
+
def format_answer(self, include_code: bool = True, max_results: int = 3) -> str:
|
|
399
|
+
"""Format the result for display"""
|
|
400
|
+
output = []
|
|
401
|
+
|
|
402
|
+
# Question and classification
|
|
403
|
+
output.append(f"📝 Question: {self.question}")
|
|
404
|
+
output.append(f" Type: {self.question_type.value} (confidence: {self.confidence:.2f})")
|
|
405
|
+
|
|
406
|
+
# Answer
|
|
407
|
+
output.append(f"\n💡 Answer:\n{self.answer}")
|
|
408
|
+
|
|
409
|
+
# Code examples
|
|
410
|
+
if include_code and self.code_results:
|
|
411
|
+
output.append(f"\n📂 Relevant Code ({len(self.code_results)} results found):")
|
|
412
|
+
for i, result in enumerate(self.code_results[:max_results], 1):
|
|
413
|
+
output.append(f"\n{i}. {result.file_name} ({result.chunk_type})")
|
|
414
|
+
output.append(f" Lines {result.start_line}-{result.end_line}")
|
|
415
|
+
if result.chunk_name:
|
|
416
|
+
output.append(f" Name: {result.chunk_name}")
|
|
417
|
+
# Show first 3 lines of code
|
|
418
|
+
lines = result.content.split('\n')[:3]
|
|
419
|
+
for line in lines:
|
|
420
|
+
output.append(f" {line}")
|
|
421
|
+
if len(result.content.split('\n')) > 3:
|
|
422
|
+
output.append(" ...")
|
|
423
|
+
|
|
424
|
+
# Follow-up suggestions
|
|
425
|
+
if self.follow_up_suggestions:
|
|
426
|
+
output.append("\n🔍 You might also want to ask:")
|
|
427
|
+
for suggestion in self.follow_up_suggestions[:3]:
|
|
428
|
+
output.append(f" - {suggestion}")
|
|
429
|
+
|
|
430
|
+
# Processing info
|
|
431
|
+
output.append(f"\n⏱️ Processed in {self.processing_time:.3f}s")
|
|
432
|
+
|
|
433
|
+
return '\n'.join(output)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
@dataclass
|
|
437
|
+
class ConversationContext:
|
|
438
|
+
"""Maintains context across multiple questions"""
|
|
439
|
+
|
|
440
|
+
session_id: str
|
|
441
|
+
started_at: datetime = field(default_factory=datetime.now)
|
|
442
|
+
|
|
443
|
+
# History
|
|
444
|
+
questions: List[str] = field(default_factory=list)
|
|
445
|
+
results: List[ExplorationResult] = field(default_factory=list)
|
|
446
|
+
|
|
447
|
+
# Current focus
|
|
448
|
+
current_entities: Set[str] = field(default_factory=set)
|
|
449
|
+
current_files: Set[str] = field(default_factory=set)
|
|
450
|
+
|
|
451
|
+
# External context (for future enhancement)
|
|
452
|
+
external_docs: List[str] = field(default_factory=list)
|
|
453
|
+
|
|
454
|
+
def add_result(self, result: ExplorationResult):
|
|
455
|
+
"""Add a result and update context"""
|
|
456
|
+
self.questions.append(result.question)
|
|
457
|
+
self.results.append(result)
|
|
458
|
+
|
|
459
|
+
# Update current focus
|
|
460
|
+
self.current_entities.update(result.entities_found)
|
|
461
|
+
self.current_files.update(result.related_files)
|
|
462
|
+
|
|
463
|
+
def get_context_summary(self) -> str:
|
|
464
|
+
"""Get a summary of the conversation context"""
|
|
465
|
+
return f"""
|
|
466
|
+
Session: {self.session_id}
|
|
467
|
+
Questions asked: {len(self.questions)}
|
|
468
|
+
Entities explored: {', '.join(self.current_entities) if self.current_entities else 'None'}
|
|
469
|
+
Files examined: {len(self.current_files)}
|
|
470
|
+
"""
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
class InteractiveExplorer:
|
|
474
|
+
"""
|
|
475
|
+
Interactive Q&A explorer for codebases.
|
|
476
|
+
|
|
477
|
+
This is the main interface for exploring code through questions.
|
|
478
|
+
It uses the question classifier to route questions to appropriate
|
|
479
|
+
handlers and maintains conversation context.
|
|
480
|
+
"""
|
|
481
|
+
|
|
482
|
+
def __init__(
|
|
483
|
+
self,
|
|
484
|
+
config: ClaudeContextConfig,
|
|
485
|
+
embeddings: LocalEmbeddings,
|
|
486
|
+
milvus_manager: MilvusManager,
|
|
487
|
+
llm_client: Optional[Any] = None, # For future LLM integration
|
|
488
|
+
external_context: Optional[Dict] = None # For future context integration
|
|
489
|
+
):
|
|
490
|
+
"""
|
|
491
|
+
Initialize the explorer.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
config: Configuration
|
|
495
|
+
embeddings: Embeddings model
|
|
496
|
+
milvus_manager: Milvus connection manager
|
|
497
|
+
llm_client: Optional LLM for reasoning questions
|
|
498
|
+
external_context: Optional external documentation
|
|
499
|
+
"""
|
|
500
|
+
self.config = config
|
|
501
|
+
self.classifier = QuestionClassifier()
|
|
502
|
+
self.searcher = create_hybrid_searcher(config, embeddings, milvus_manager)
|
|
503
|
+
self.llm_client = llm_client
|
|
504
|
+
self.external_context = external_context or {}
|
|
505
|
+
|
|
506
|
+
# Session management
|
|
507
|
+
self.sessions: Dict[str, ConversationContext] = {}
|
|
508
|
+
|
|
509
|
+
logger.info("InteractiveExplorer initialized")
|
|
510
|
+
|
|
511
|
+
def start_session(self, session_id: Optional[str] = None) -> str:
|
|
512
|
+
"""Start a new exploration session"""
|
|
513
|
+
if not session_id:
|
|
514
|
+
session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
|
515
|
+
|
|
516
|
+
self.sessions[session_id] = ConversationContext(session_id=session_id)
|
|
517
|
+
logger.info(f"Started exploration session: {session_id}")
|
|
518
|
+
return session_id
|
|
519
|
+
|
|
520
|
+
def ask(
|
|
521
|
+
self,
|
|
522
|
+
question: str,
|
|
523
|
+
session_id: Optional[str] = None,
|
|
524
|
+
include_code: bool = True,
|
|
525
|
+
max_results: int = 5
|
|
526
|
+
) -> ExplorationResult:
|
|
527
|
+
"""
|
|
528
|
+
Ask a question about the codebase.
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
question: The question to answer
|
|
532
|
+
session_id: Optional session ID for context
|
|
533
|
+
include_code: Whether to include code snippets
|
|
534
|
+
max_results: Maximum number of code results
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
ExplorationResult with answer and evidence
|
|
538
|
+
"""
|
|
539
|
+
start_time = datetime.now()
|
|
540
|
+
|
|
541
|
+
# Get or create session
|
|
542
|
+
if session_id and session_id in self.sessions:
|
|
543
|
+
context = self.sessions[session_id]
|
|
544
|
+
else:
|
|
545
|
+
context = ConversationContext(session_id=session_id or "default")
|
|
546
|
+
|
|
547
|
+
# Classify the question
|
|
548
|
+
intent = self.classifier.classify(question)
|
|
549
|
+
|
|
550
|
+
# Route to appropriate handler
|
|
551
|
+
if intent.primary_type in [QuestionType.DISCOVERY, QuestionType.DEFINITION]:
|
|
552
|
+
result = self._handle_discovery_question(question, intent, max_results)
|
|
553
|
+
elif intent.primary_type in [QuestionType.RELATIONSHIP, QuestionType.FLOW, QuestionType.STRUCTURE]:
|
|
554
|
+
result = self._handle_analysis_question(question, intent, max_results)
|
|
555
|
+
elif intent.primary_type in [QuestionType.REASONING, QuestionType.PATTERN, QuestionType.EVALUATION]:
|
|
556
|
+
result = self._handle_reasoning_question(question, intent, max_results)
|
|
557
|
+
else:
|
|
558
|
+
result = self._handle_unknown_question(question, intent)
|
|
559
|
+
|
|
560
|
+
# Calculate processing time
|
|
561
|
+
result.processing_time = (datetime.now() - start_time).total_seconds()
|
|
562
|
+
|
|
563
|
+
# Update session context
|
|
564
|
+
if session_id:
|
|
565
|
+
context.add_result(result)
|
|
566
|
+
|
|
567
|
+
return result
|
|
568
|
+
|
|
569
|
+
def _handle_discovery_question(
|
|
570
|
+
self,
|
|
571
|
+
question: str,
|
|
572
|
+
intent: QuestionIntent,
|
|
573
|
+
max_results: int
|
|
574
|
+
) -> ExplorationResult:
|
|
575
|
+
"""
|
|
576
|
+
Handle discovery/definition questions (Layer 1: Facts only).
|
|
577
|
+
These are "what" and "where" questions that need only search.
|
|
578
|
+
"""
|
|
579
|
+
# Build search query from entities and keywords
|
|
580
|
+
search_terms = intent.entities + intent.keywords
|
|
581
|
+
search_query = ' '.join(search_terms) if search_terms else question
|
|
582
|
+
|
|
583
|
+
# Search for relevant code
|
|
584
|
+
code_results = self.searcher.search(
|
|
585
|
+
search_query,
|
|
586
|
+
limit=max_results
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
# Build answer from results
|
|
590
|
+
if code_results:
|
|
591
|
+
if intent.primary_type == QuestionType.DISCOVERY:
|
|
592
|
+
answer = self._build_discovery_answer(code_results, intent)
|
|
593
|
+
else: # DEFINITION
|
|
594
|
+
answer = self._build_definition_answer(code_results, intent)
|
|
595
|
+
|
|
596
|
+
# Extract found entities and files
|
|
597
|
+
entities_found = list(set(
|
|
598
|
+
r.chunk_name for r in code_results if r.chunk_name
|
|
599
|
+
))
|
|
600
|
+
related_files = list(set(r.file_path for r in code_results))
|
|
601
|
+
|
|
602
|
+
# Suggest follow-ups
|
|
603
|
+
follow_ups = self._suggest_discovery_followups(code_results, intent)
|
|
604
|
+
else:
|
|
605
|
+
answer = f"I couldn't find any code related to: {', '.join(intent.entities) if intent.entities else question}"
|
|
606
|
+
entities_found = []
|
|
607
|
+
related_files = []
|
|
608
|
+
follow_ups = ["Try searching with different terms", "Be more specific about what you're looking for"]
|
|
609
|
+
|
|
610
|
+
return ExplorationResult(
|
|
611
|
+
question=question,
|
|
612
|
+
question_type=intent.primary_type,
|
|
613
|
+
confidence=intent.confidence,
|
|
614
|
+
answer=answer,
|
|
615
|
+
code_results=code_results,
|
|
616
|
+
entities_found=entities_found,
|
|
617
|
+
search_terms_used=search_terms,
|
|
618
|
+
follow_up_suggestions=follow_ups,
|
|
619
|
+
related_files=related_files
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
def _handle_analysis_question(
|
|
623
|
+
self,
|
|
624
|
+
question: str,
|
|
625
|
+
intent: QuestionIntent,
|
|
626
|
+
max_results: int
|
|
627
|
+
) -> ExplorationResult:
|
|
628
|
+
"""
|
|
629
|
+
Handle relationship/flow/structure questions (Layer 2: Analysis).
|
|
630
|
+
These need search plus relationship analysis.
|
|
631
|
+
"""
|
|
632
|
+
# For now, use search with relationship-focused terms
|
|
633
|
+
# In future, add actual dependency analysis
|
|
634
|
+
|
|
635
|
+
# Add relationship keywords to search
|
|
636
|
+
relationship_terms = ["import", "call", "use", "depend", "inherit", "extend"]
|
|
637
|
+
search_terms = intent.entities + relationship_terms
|
|
638
|
+
search_query = ' '.join(search_terms) if search_terms else question
|
|
639
|
+
|
|
640
|
+
# Search for relationships
|
|
641
|
+
code_results = self.searcher.search(
|
|
642
|
+
search_query,
|
|
643
|
+
limit=max_results * 2 # Get more results for relationship analysis
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
# Analyze relationships in results
|
|
647
|
+
if code_results:
|
|
648
|
+
answer = self._build_relationship_answer(code_results, intent)
|
|
649
|
+
entities_found = self._extract_all_entities(code_results)
|
|
650
|
+
related_files = list(set(r.file_path for r in code_results))
|
|
651
|
+
follow_ups = self._suggest_relationship_followups(code_results, intent)
|
|
652
|
+
else:
|
|
653
|
+
answer = f"I couldn't find relationships for: {', '.join(intent.entities) if intent.entities else 'the specified components'}"
|
|
654
|
+
entities_found = []
|
|
655
|
+
related_files = []
|
|
656
|
+
follow_ups = ["Try specifying the exact component names", "Check if the components exist"]
|
|
657
|
+
|
|
658
|
+
return ExplorationResult(
|
|
659
|
+
question=question,
|
|
660
|
+
question_type=intent.primary_type,
|
|
661
|
+
confidence=intent.confidence,
|
|
662
|
+
answer=answer,
|
|
663
|
+
code_results=code_results[:max_results], # Limit displayed results
|
|
664
|
+
entities_found=entities_found,
|
|
665
|
+
search_terms_used=search_terms,
|
|
666
|
+
follow_up_suggestions=follow_ups,
|
|
667
|
+
related_files=related_files
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
def _handle_reasoning_question(
|
|
671
|
+
self,
|
|
672
|
+
question: str,
|
|
673
|
+
intent: QuestionIntent,
|
|
674
|
+
max_results: int
|
|
675
|
+
) -> ExplorationResult:
|
|
676
|
+
"""
|
|
677
|
+
Handle reasoning/pattern/evaluation questions (Layer 3: Reasoning).
|
|
678
|
+
These need search, analysis, and potentially LLM inference.
|
|
679
|
+
"""
|
|
680
|
+
# Search for relevant code
|
|
681
|
+
search_terms = intent.entities + intent.keywords
|
|
682
|
+
search_query = ' '.join(search_terms) if search_terms else question
|
|
683
|
+
|
|
684
|
+
code_results = self.searcher.search(
|
|
685
|
+
search_query,
|
|
686
|
+
limit=max_results
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
if self.llm_client:
|
|
690
|
+
# Use LLM for reasoning (future enhancement)
|
|
691
|
+
answer = self._build_reasoning_answer_with_llm(code_results, intent, question)
|
|
692
|
+
used_llm = True
|
|
693
|
+
else:
|
|
694
|
+
# Fallback: Provide code evidence without reasoning
|
|
695
|
+
answer = self._build_reasoning_answer_without_llm(code_results, intent)
|
|
696
|
+
used_llm = False
|
|
697
|
+
|
|
698
|
+
entities_found = self._extract_all_entities(code_results) if code_results else []
|
|
699
|
+
related_files = list(set(r.file_path for r in code_results)) if code_results else []
|
|
700
|
+
follow_ups = self._suggest_reasoning_followups(code_results, intent)
|
|
701
|
+
|
|
702
|
+
return ExplorationResult(
|
|
703
|
+
question=question,
|
|
704
|
+
question_type=intent.primary_type,
|
|
705
|
+
confidence=intent.confidence,
|
|
706
|
+
answer=answer,
|
|
707
|
+
code_results=code_results,
|
|
708
|
+
entities_found=entities_found,
|
|
709
|
+
search_terms_used=search_terms,
|
|
710
|
+
follow_up_suggestions=follow_ups,
|
|
711
|
+
related_files=related_files,
|
|
712
|
+
used_llm=used_llm
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
def _handle_unknown_question(
|
|
716
|
+
self,
|
|
717
|
+
question: str,
|
|
718
|
+
intent: QuestionIntent
|
|
719
|
+
) -> ExplorationResult:
|
|
720
|
+
"""Handle questions that couldn't be classified"""
|
|
721
|
+
|
|
722
|
+
# Try a general search
|
|
723
|
+
code_results = self.searcher.search(question, limit=5)
|
|
724
|
+
|
|
725
|
+
if code_results:
|
|
726
|
+
answer = "I'm not sure what you're asking, but here's what I found:"
|
|
727
|
+
else:
|
|
728
|
+
answer = "I couldn't understand your question. Could you please rephrase it?"
|
|
729
|
+
|
|
730
|
+
# Suggest clarifications
|
|
731
|
+
follow_ups = self.classifier.suggest_clarifications(question, intent)
|
|
732
|
+
|
|
733
|
+
return ExplorationResult(
|
|
734
|
+
question=question,
|
|
735
|
+
question_type=QuestionType.UNKNOWN,
|
|
736
|
+
confidence=0.0,
|
|
737
|
+
answer=answer,
|
|
738
|
+
code_results=code_results,
|
|
739
|
+
follow_up_suggestions=follow_ups
|
|
740
|
+
)
|
|
741
|
+
|
|
742
|
+
# Answer building methods
|
|
743
|
+
|
|
744
|
+
def _build_discovery_answer(self, results: List[SearchResult], intent: QuestionIntent) -> str:
|
|
745
|
+
"""Build answer for discovery questions"""
|
|
746
|
+
answer_parts = []
|
|
747
|
+
|
|
748
|
+
if intent.entities:
|
|
749
|
+
answer_parts.append(f"Here's what I found about {', '.join(intent.entities)}:")
|
|
750
|
+
else:
|
|
751
|
+
answer_parts.append("Here's what I found:")
|
|
752
|
+
|
|
753
|
+
# Group by type
|
|
754
|
+
by_type = {}
|
|
755
|
+
for r in results:
|
|
756
|
+
if r.chunk_type not in by_type:
|
|
757
|
+
by_type[r.chunk_type] = []
|
|
758
|
+
by_type[r.chunk_type].append(r)
|
|
759
|
+
|
|
760
|
+
for chunk_type, items in by_type.items():
|
|
761
|
+
answer_parts.append(f"\n{chunk_type.title()}s:")
|
|
762
|
+
for item in items[:3]: # Limit to 3 per type
|
|
763
|
+
if item.chunk_name:
|
|
764
|
+
answer_parts.append(f" • {item.chunk_name} in {item.file_name}")
|
|
765
|
+
else:
|
|
766
|
+
answer_parts.append(f" • {item.file_name} (lines {item.start_line}-{item.end_line})")
|
|
767
|
+
|
|
768
|
+
return '\n'.join(answer_parts)
|
|
769
|
+
|
|
770
|
+
def _build_definition_answer(self, results: List[SearchResult], intent: QuestionIntent) -> str:
|
|
771
|
+
"""Build answer for definition questions"""
|
|
772
|
+
if not results:
|
|
773
|
+
return "Definition not found."
|
|
774
|
+
|
|
775
|
+
best_match = results[0]
|
|
776
|
+
answer = f"Found {best_match.chunk_type}: {best_match.chunk_name or 'unnamed'}\n"
|
|
777
|
+
answer += f"Location: {best_match.file_name} (lines {best_match.start_line}-{best_match.end_line})\n"
|
|
778
|
+
|
|
779
|
+
# Add language hint
|
|
780
|
+
if best_match.language:
|
|
781
|
+
answer += f"Language: {best_match.language}\n"
|
|
782
|
+
|
|
783
|
+
return answer
|
|
784
|
+
|
|
785
|
+
def _build_relationship_answer(self, results: List[SearchResult], intent: QuestionIntent) -> str:
|
|
786
|
+
"""Build answer for relationship questions"""
|
|
787
|
+
if len(intent.entities) >= 2:
|
|
788
|
+
answer = f"Analyzing relationship between {' and '.join(intent.entities)}:\n"
|
|
789
|
+
else:
|
|
790
|
+
answer = "Found the following relationships:\n"
|
|
791
|
+
|
|
792
|
+
# Look for imports and calls
|
|
793
|
+
imports = [r for r in results if r.chunk_type == 'import_block']
|
|
794
|
+
functions = [r for r in results if r.chunk_type in ['function', 'method']]
|
|
795
|
+
classes = [r for r in results if r.chunk_type == 'class']
|
|
796
|
+
|
|
797
|
+
if imports:
|
|
798
|
+
answer += f"\nImports ({len(imports)} found):"
|
|
799
|
+
for imp in imports[:3]:
|
|
800
|
+
first_line = imp.content.split('\n')[0][:80]
|
|
801
|
+
answer += f"\n • {imp.file_name}: {first_line}"
|
|
802
|
+
|
|
803
|
+
if classes:
|
|
804
|
+
answer += f"\n\nClasses involved ({len(classes)} found):"
|
|
805
|
+
for cls in classes[:3]:
|
|
806
|
+
answer += f"\n • {cls.chunk_name} in {cls.file_name}"
|
|
807
|
+
|
|
808
|
+
if functions:
|
|
809
|
+
answer += f"\n\nFunctions/Methods ({len(functions)} found):"
|
|
810
|
+
for func in functions[:3]:
|
|
811
|
+
answer += f"\n • {func.chunk_name} in {func.file_name}"
|
|
812
|
+
|
|
813
|
+
return answer
|
|
814
|
+
|
|
815
|
+
def _build_reasoning_answer_without_llm(self, results: List[SearchResult], intent: QuestionIntent) -> str:
|
|
816
|
+
"""Build reasoning answer without LLM"""
|
|
817
|
+
answer = "To answer 'why' questions, I need LLM integration. However, here's the relevant code:\n"
|
|
818
|
+
|
|
819
|
+
if results:
|
|
820
|
+
for r in results[:3]:
|
|
821
|
+
answer += f"\n• {r.chunk_type} '{r.chunk_name or 'unnamed'}' in {r.file_name}"
|
|
822
|
+
# Look for comments that might explain why
|
|
823
|
+
lines = r.content.split('\n')
|
|
824
|
+
for line in lines:
|
|
825
|
+
if '#' in line or '//' in line or '/*' in line:
|
|
826
|
+
comment = line.strip()
|
|
827
|
+
if len(comment) > 10: # Non-trivial comment
|
|
828
|
+
answer += f"\n Comment: {comment[:100]}"
|
|
829
|
+
break
|
|
830
|
+
else:
|
|
831
|
+
answer += "\nNo relevant code found to analyze."
|
|
832
|
+
|
|
833
|
+
answer += "\n\nNote: For deeper reasoning about design decisions, LLM integration would provide better insights."
|
|
834
|
+
|
|
835
|
+
return answer
|
|
836
|
+
|
|
837
|
+
def _build_reasoning_answer_with_llm(self, results: List[SearchResult], intent: QuestionIntent, question: str) -> str:
|
|
838
|
+
"""Build reasoning answer with LLM (future enhancement)"""
|
|
839
|
+
# This is where we'd use the LLM with grounding
|
|
840
|
+
# For now, return the non-LLM answer
|
|
841
|
+
return self._build_reasoning_answer_without_llm(results, intent)
|
|
842
|
+
|
|
843
|
+
# Helper methods
|
|
844
|
+
|
|
845
|
+
def _extract_all_entities(self, results: List[SearchResult]) -> List[str]:
|
|
846
|
+
"""Extract all entities from search results"""
|
|
847
|
+
entities = set()
|
|
848
|
+
for r in results:
|
|
849
|
+
if r.chunk_name:
|
|
850
|
+
entities.add(r.chunk_name)
|
|
851
|
+
# Could also extract entities from content
|
|
852
|
+
return list(entities)
|
|
853
|
+
|
|
854
|
+
def _suggest_discovery_followups(self, results: List[SearchResult], intent: QuestionIntent) -> List[str]:
|
|
855
|
+
"""Suggest follow-up questions for discovery"""
|
|
856
|
+
suggestions = []
|
|
857
|
+
|
|
858
|
+
if results:
|
|
859
|
+
# Based on what was found
|
|
860
|
+
chunk_types = set(r.chunk_type for r in results)
|
|
861
|
+
if 'class' in chunk_types:
|
|
862
|
+
suggestions.append("What methods does this class have?")
|
|
863
|
+
if 'function' in chunk_types:
|
|
864
|
+
suggestions.append("What calls this function?")
|
|
865
|
+
if len(set(r.file_name for r in results)) > 1:
|
|
866
|
+
suggestions.append("How are these files related?")
|
|
867
|
+
|
|
868
|
+
return suggestions
|
|
869
|
+
|
|
870
|
+
def _suggest_relationship_followups(self, results: List[SearchResult], intent: QuestionIntent) -> List[str]:
|
|
871
|
+
"""Suggest follow-up questions for relationships"""
|
|
872
|
+
suggestions = []
|
|
873
|
+
|
|
874
|
+
if len(intent.entities) >= 2:
|
|
875
|
+
suggestions.append(f"What's the data flow between {' and '.join(intent.entities[:2])}?")
|
|
876
|
+
|
|
877
|
+
if results:
|
|
878
|
+
suggestions.append("What are the dependencies of this component?")
|
|
879
|
+
suggestions.append("Are there any circular dependencies?")
|
|
880
|
+
|
|
881
|
+
return suggestions
|
|
882
|
+
|
|
883
|
+
def _suggest_reasoning_followups(self, results: List[SearchResult], intent: QuestionIntent) -> List[str]:
|
|
884
|
+
"""Suggest follow-up questions for reasoning"""
|
|
885
|
+
return [
|
|
886
|
+
"What are the trade-offs of this approach?",
|
|
887
|
+
"How could this be improved?",
|
|
888
|
+
"What patterns are being used here?"
|
|
889
|
+
]
|
|
890
|
+
|
|
891
|
+
|
|
892
|
+
# Example usage and testing
|
|
893
|
+
if __name__ == "__main__":
|
|
894
|
+
# Test the classifier
|
|
895
|
+
classifier = QuestionClassifier()
|
|
896
|
+
|
|
897
|
+
test_questions = [
|
|
898
|
+
# Discovery questions
|
|
899
|
+
"What are the main components?",
|
|
900
|
+
"Where is authentication handled?",
|
|
901
|
+
"List all API endpoints",
|
|
902
|
+
|
|
903
|
+
# Definition questions
|
|
904
|
+
"Show me the UserService class",
|
|
905
|
+
"Find the login function implementation",
|
|
906
|
+
|
|
907
|
+
# Relationship questions
|
|
908
|
+
"How do OrderService and PaymentService interact?",
|
|
909
|
+
"What calls the authenticate function?",
|
|
910
|
+
|
|
911
|
+
# Flow questions
|
|
912
|
+
"Trace the login flow",
|
|
913
|
+
"How does data flow from API to database?",
|
|
914
|
+
|
|
915
|
+
# Structure questions
|
|
916
|
+
"What's the architecture of this system?",
|
|
917
|
+
"How is the codebase organized?",
|
|
918
|
+
|
|
919
|
+
# Reasoning questions
|
|
920
|
+
"Why does this use Redis instead of memory cache?",
|
|
921
|
+
"What's the reason for separating auth into its own service?",
|
|
922
|
+
|
|
923
|
+
# Pattern questions
|
|
924
|
+
"What design patterns are used?",
|
|
925
|
+
"Is this using the repository pattern?",
|
|
926
|
+
|
|
927
|
+
# Evaluation questions
|
|
928
|
+
"How could the error handling be improved?",
|
|
929
|
+
"Are there any security concerns?",
|
|
930
|
+
|
|
931
|
+
# Ambiguous questions
|
|
932
|
+
"How does it work?",
|
|
933
|
+
"Tell me about the system"
|
|
934
|
+
]
|
|
935
|
+
|
|
936
|
+
print("Question Classification Test")
|
|
937
|
+
print("=" * 60)
|
|
938
|
+
|
|
939
|
+
for question in test_questions:
|
|
940
|
+
intent = classifier.classify(question)
|
|
941
|
+
print(f"\nQ: {question}")
|
|
942
|
+
print(f" Type: {intent.primary_type.value} (confidence: {intent.confidence:.2f})")
|
|
943
|
+
print(f" Entities: {intent.entities}")
|
|
944
|
+
print(f" Keywords: {intent.keywords}")
|
|
945
|
+
print(f" Needs LLM: {intent.requires_llm}")
|
|
946
|
+
print(f" Approach: {intent.suggested_approach}")
|
|
947
|
+
|
|
948
|
+
if classifier.needs_clarification(intent):
|
|
949
|
+
print(" ⚠️ Needs clarification:")
|
|
950
|
+
for suggestion in classifier.suggest_clarifications(question, intent):
|
|
951
|
+
print(f" - {suggestion}")
|