code-finder 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. claude_context/__init__.py +33 -0
  2. claude_context/agentic_integration.py +309 -0
  3. claude_context/ast_chunker.py +646 -0
  4. claude_context/config.py +239 -0
  5. claude_context/context_manager.py +627 -0
  6. claude_context/embeddings.py +307 -0
  7. claude_context/embeddings_interface.py +226 -0
  8. claude_context/enhanced_ast_chunker.py +1129 -0
  9. claude_context/explorer.py +951 -0
  10. claude_context/explorer_with_context.py +1008 -0
  11. claude_context/indexer.py +893 -0
  12. claude_context/markdown_chunker.py +421 -0
  13. claude_context/mode_handler.py +1774 -0
  14. claude_context/query_metrics.py +164 -0
  15. claude_context/question_generator.py +800 -0
  16. claude_context/readme_extractor.py +485 -0
  17. claude_context/repository_adapter.py +399 -0
  18. claude_context/search.py +493 -0
  19. claude_context/skills/__init__.py +11 -0
  20. claude_context/skills/_cli_common.py +74 -0
  21. claude_context/skills/_index_manager.py +98 -0
  22. claude_context/skills/api_surface.py +219 -0
  23. claude_context/skills/evidence_retrieval.py +151 -0
  24. claude_context/skills/grounded_review.py +212 -0
  25. claude_context/synthesis/__init__.py +8 -0
  26. claude_context/synthesis/editor_agent.py +391 -0
  27. claude_context/synthesis/llm_synthesizer.py +153 -0
  28. claude_context/synthesis/logic_explainer.py +235 -0
  29. claude_context/synthesis/multi_review_pipeline.py +717 -0
  30. claude_context/synthesis/prompt_builder.py +439 -0
  31. claude_context/synthesis/providers.py +115 -0
  32. claude_context/synthesis/validators.py +458 -0
  33. code_finder-0.1.0.dist-info/METADATA +823 -0
  34. code_finder-0.1.0.dist-info/RECORD +37 -0
  35. code_finder-0.1.0.dist-info/WHEEL +5 -0
  36. code_finder-0.1.0.dist-info/entry_points.txt +4 -0
  37. code_finder-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,951 @@
1
+
2
+ """anti
3
+ Interactive Explorer for Claude Context
4
+
5
+ Provides an interactive Q&A interface for exploring codebases with a strict
6
+ evidence-first approach. Questions are classified and routed to appropriate
7
+ handlers based on intent, ensuring grounded, accurate responses.
8
+
9
+ Philosophy: Facts → Analysis → Reasoning (only with evidence)
10
+ """
11
+
12
+ import logging
13
+ import re
14
+ from enum import Enum
15
+ from typing import List, Dict, Any, Optional, Tuple, Set
16
+ from dataclasses import dataclass, field
17
+ from datetime import datetime
18
+ import json
19
+
20
+ from .search import HybridSearcher, SearchResult, create_hybrid_searcher
21
+ from .config import ClaudeContextConfig, MilvusManager
22
+ from .embeddings import LocalEmbeddings
23
+ from .indexer import RepositoryIndexer
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class QuestionType(Enum):
29
+ """Classification of question types based on required processing"""
30
+
31
+ # Layer 1: Facts only (search)
32
+ DISCOVERY = "discovery" # What/Where/List questions
33
+ DEFINITION = "definition" # Show me X, Find Y
34
+
35
+ # Layer 2: Analysis required (search + static analysis)
36
+ RELATIONSHIP = "relationship" # How X connects to Y
37
+ FLOW = "flow" # Trace data/control flow
38
+ STRUCTURE = "structure" # Module/architecture questions
39
+
40
+ # Layer 3: Reasoning required (search + analysis + inference)
41
+ REASONING = "reasoning" # Why questions
42
+ PATTERN = "pattern" # Design pattern identification
43
+ EVALUATION = "evaluation" # Quality/improvement questions
44
+
45
+ # Special types
46
+ UNKNOWN = "unknown" # Cannot classify
47
+ CLARIFICATION = "clarification" # Need more info
48
+
49
+
50
+ @dataclass
51
+ class QuestionIntent:
52
+ """Detailed question intent analysis"""
53
+ primary_type: QuestionType
54
+ confidence: float # 0-1 confidence in classification
55
+ keywords: List[str] # Key terms extracted
56
+ entities: List[str] # Code entities mentioned (classes, functions, etc.)
57
+ scope: str # "specific" or "broad"
58
+ requires_llm: bool # Whether LLM would be beneficial
59
+ suggested_approach: str # How to handle this question
60
+
61
+
62
+ class QuestionClassifier:
63
+ """
64
+ Classifies questions to route them to appropriate handlers.
65
+ No LLM required - uses pattern matching and keyword analysis.
66
+ """
67
+
68
+ def __init__(self):
69
+ # Define patterns for each question type
70
+ self.patterns = {
71
+ QuestionType.DISCOVERY: {
72
+ "patterns": [
73
+ r"\b(what|which|list|show|find|where|locate)\b.*\b(is|are|all|every|main|primary)\b",
74
+ r"\b(what|where)\s+(is|are)\s+(\w+)",
75
+ r"\b(show|list|find)\s+(me\s+)?(all\s+)?(\w+)",
76
+ r"\b(main|primary|key|core)\s+(components?|modules?|classes?|functions?)\b"
77
+ ],
78
+ "keywords": ["what", "where", "which", "list", "show", "find", "locate", "main", "all"],
79
+ "examples": [
80
+ "What are the main components?",
81
+ "Where is authentication handled?",
82
+ "List all API endpoints",
83
+ "Show me the database models"
84
+ ]
85
+ },
86
+
87
+ QuestionType.DEFINITION: {
88
+ "patterns": [
89
+ r"\b(show|display|get|find)\s+(me\s+)?(the\s+)?(\w+)\s+(class|function|method|code)\b",
90
+ r"\b(definition|implementation)\s+of\s+(\w+)",
91
+ r"\bwhat\s+does\s+(\w+)\s+look\s+like\b"
92
+ ],
93
+ "keywords": ["show", "display", "definition", "implementation", "code"],
94
+ "examples": [
95
+ "Show me the UserService class",
96
+ "Find the login function",
97
+ "What does PaymentProcessor look like?"
98
+ ]
99
+ },
100
+
101
+ QuestionType.RELATIONSHIP: {
102
+ "patterns": [
103
+ r"\b(how|what|which)\b.*\b(connect|interact|relate|call|use|depend|communicate)\b",
104
+ r"\b(relationship|connection|interaction)\s+(between|with|of)\b",
105
+ r"\b(what|which)\s+(calls?|uses?|imports?|depends?\s+on)\s+(\w+)",
106
+ r"\b(\w+)\s+and\s+(\w+)\s+(interact|connect|relate|work\s+together)"
107
+ ],
108
+ "keywords": ["connect", "interact", "relate", "call", "use", "depend", "relationship", "between"],
109
+ "examples": [
110
+ "How do OrderService and PaymentService interact?",
111
+ "What calls the authenticate function?",
112
+ "Which components depend on Redis?",
113
+ "How are User and Profile connected?"
114
+ ]
115
+ },
116
+
117
+ QuestionType.FLOW: {
118
+ "patterns": [
119
+ r"\b(trace|follow|track)\b.*\b(flow|path|journey|process)\b",
120
+ r"\b(how|what)\b.*\b(flows?|moves?|travels?|processes?|happens?)\b",
121
+ r"\b(data|control|execution)\s+(flow|path)\b",
122
+ r"\bstep[- ]?by[- ]?step\b"
123
+ ],
124
+ "keywords": ["trace", "follow", "flow", "path", "journey", "process", "step"],
125
+ "examples": [
126
+ "Trace the login flow",
127
+ "How does data flow from API to database?",
128
+ "Follow the order processing path",
129
+ "What happens step by step when a user signs up?"
130
+ ]
131
+ },
132
+
133
+ QuestionType.STRUCTURE: {
134
+ "patterns": [
135
+ r"\b(architecture|structure|organization|layout)\b",
136
+ r"\b(module|package|component)\s+(boundaries?|structure|organization)\b",
137
+ r"\b(how\s+is|what\'s\s+the).*\b(organized|structured|architected|laid\s+out)\b"
138
+ ],
139
+ "keywords": ["architecture", "structure", "module", "package", "boundaries", "organization"],
140
+ "examples": [
141
+ "What's the architecture of this system?",
142
+ "How is the codebase organized?",
143
+ "What are the module boundaries?",
144
+ "Explain the package structure"
145
+ ]
146
+ },
147
+
148
+ QuestionType.REASONING: {
149
+ "patterns": [
150
+ r"\bwhy\b",
151
+ r"\b(reason|rationale|purpose|motivation)\b",
152
+ r"\b(explain|understand)\s+(the\s+)?(reasoning|thinking|decision)\b",
153
+ r"\bwhat\'s\s+the\s+(point|purpose|reason)\b"
154
+ ],
155
+ "keywords": ["why", "reason", "rationale", "purpose", "explain", "because"],
156
+ "examples": [
157
+ "Why does this use Redis instead of memory cache?",
158
+ "What's the reason for this abstraction?",
159
+ "Why is this a separate service?",
160
+ "Explain the rationale behind this pattern"
161
+ ]
162
+ },
163
+
164
+ QuestionType.PATTERN: {
165
+ "patterns": [
166
+ r"\b(pattern|design|approach|strategy|technique)\b",
167
+ r"\b(what|which)\s+(pattern|design|approach)\b",
168
+ r"\b(singleton|factory|observer|mvc|repository|strategy)\b"
169
+ ],
170
+ "keywords": ["pattern", "design", "approach", "strategy", "technique"],
171
+ "examples": [
172
+ "What design patterns are used?",
173
+ "Is this using the repository pattern?",
174
+ "What's the caching strategy?",
175
+ "Identify the architectural patterns"
176
+ ]
177
+ },
178
+
179
+ QuestionType.EVALUATION: {
180
+ "patterns": [
181
+ r"\b(good|bad|better|worse|improve|optimize|refactor)\b",
182
+ r"\b(should|could|would)\b.*\b(be|have)\b",
183
+ r"\b(quality|performance|security|maintainability)\b",
184
+ r"\b(issue|problem|concern|smell|antipattern)\b"
185
+ ],
186
+ "keywords": ["good", "bad", "improve", "should", "could", "quality", "issue", "problem"],
187
+ "examples": [
188
+ "How could this be improved?",
189
+ "Is this good design?",
190
+ "What are potential issues?",
191
+ "Should this be refactored?"
192
+ ]
193
+ }
194
+ }
195
+
196
+ # Compile regex patterns for efficiency
197
+ for qtype in self.patterns:
198
+ self.patterns[qtype]["compiled"] = [
199
+ re.compile(pattern, re.IGNORECASE)
200
+ for pattern in self.patterns[qtype]["patterns"]
201
+ ]
202
+
203
+ def classify(self, question: str) -> QuestionIntent:
204
+ """
205
+ Classify a question into a type and extract intent details.
206
+
207
+ Args:
208
+ question: The user's question
209
+
210
+ Returns:
211
+ QuestionIntent with classification details
212
+ """
213
+ # Normalize question
214
+ normalized = question.lower().strip()
215
+
216
+ # Extract entities (capitalized words, code-like terms)
217
+ entities = self._extract_entities(question)
218
+
219
+ # Score each question type
220
+ scores = {}
221
+ matched_keywords = set()
222
+
223
+ for qtype, config in self.patterns.items():
224
+ score = 0.0
225
+
226
+ # Check regex patterns
227
+ for pattern in config["compiled"]:
228
+ if pattern.search(normalized):
229
+ score += 1.0
230
+
231
+ # Check keywords
232
+ for keyword in config["keywords"]:
233
+ if keyword in normalized:
234
+ score += 0.5
235
+ matched_keywords.add(keyword)
236
+
237
+ # Normalize score
238
+ max_possible = len(config["patterns"]) + (len(config["keywords"]) * 0.5)
239
+ scores[qtype] = score / max_possible if max_possible > 0 else 0
240
+
241
+ # Find best match
242
+ if scores:
243
+ best_type = max(scores, key=scores.get)
244
+ confidence = scores[best_type]
245
+ else:
246
+ best_type = QuestionType.UNKNOWN
247
+ confidence = 0.0
248
+
249
+ # Determine if we need LLM
250
+ requires_llm = best_type in [
251
+ QuestionType.REASONING,
252
+ QuestionType.PATTERN,
253
+ QuestionType.EVALUATION
254
+ ]
255
+
256
+ # Determine scope
257
+ scope = "specific" if entities else "broad"
258
+
259
+ # Suggest approach
260
+ approach = self._suggest_approach(best_type, entities, list(matched_keywords))
261
+
262
+ return QuestionIntent(
263
+ primary_type=best_type,
264
+ confidence=confidence,
265
+ keywords=list(matched_keywords),
266
+ entities=entities,
267
+ scope=scope,
268
+ requires_llm=requires_llm,
269
+ suggested_approach=approach
270
+ )
271
+
272
+ def _extract_entities(self, question: str) -> List[str]:
273
+ """Extract potential code entities from question"""
274
+ entities = []
275
+
276
+ # Question words to exclude
277
+ question_words = {
278
+ 'what', 'where', 'when', 'why', 'how', 'which', 'who',
279
+ 'is', 'are', 'was', 'were', 'do', 'does', 'did',
280
+ 'can', 'could', 'should', 'would', 'will',
281
+ 'show', 'tell', 'find', 'list', 'get', 'trace', 'explain'
282
+ }
283
+
284
+ # Find CamelCase words (likely class names)
285
+ # But exclude common English words that happen to be capitalized
286
+ camel_case = re.findall(r'\b[A-Z][a-z]+(?:[A-Z][a-z]+)*\b', question)
287
+ for word in camel_case:
288
+ if word.lower() not in question_words:
289
+ entities.append(word)
290
+
291
+ # Find snake_case words (likely function/variable names)
292
+ snake_case = re.findall(r'\b[a-z]+(?:_[a-z]+)+\b', question)
293
+ entities.extend(snake_case)
294
+
295
+ # Find code-like terms in backticks or quotes
296
+ quoted = re.findall(r'[`"\']([^`"\']+)[`"\']', question)
297
+ entities.extend(quoted)
298
+
299
+ # Find words ending in common code suffixes
300
+ # These are almost always code entities
301
+ code_terms = re.findall(r'\b\w+(?:Service|Controller|Model|Handler|Manager|Factory|Repository|Component|Module|Class|Function)\b', question, re.IGNORECASE)
302
+ for term in code_terms:
303
+ # Only add if not already in list and not a question word
304
+ if term not in entities and term.lower() not in question_words:
305
+ entities.append(term)
306
+
307
+ # Also look for common code terms
308
+ # Like: API, URL, HTTP, REST, JSON, XML, etc.
309
+ technical_terms = re.findall(r'\b(?:API|URL|HTTP|REST|JSON|XML|SQL|CSS|HTML|JWT|OAuth|Redis|MongoDB|PostgreSQL)\b', question, re.IGNORECASE)
310
+ entities.extend(technical_terms)
311
+
312
+ # Remove duplicates while preserving order
313
+ seen = set()
314
+ unique_entities = []
315
+ for entity in entities:
316
+ if entity not in seen and entity.lower() not in question_words:
317
+ seen.add(entity)
318
+ unique_entities.append(entity)
319
+
320
+ return unique_entities
321
+
322
+ def _suggest_approach(self, qtype: QuestionType, entities: List[str], keywords: List[str]) -> str:
323
+ """Suggest how to approach answering this question"""
324
+
325
+ suggestions = {
326
+ QuestionType.DISCOVERY: f"Search for {', '.join(entities) if entities else 'relevant components'} and list findings",
327
+ QuestionType.DEFINITION: f"Locate and display the complete code for {', '.join(entities) if entities else 'the requested item'}",
328
+ QuestionType.RELATIONSHIP: f"Find connections between {' and '.join(entities) if len(entities) >= 2 else 'components'} using dependency analysis",
329
+ QuestionType.FLOW: f"Trace the execution path starting from {entities[0] if entities else 'entry point'}",
330
+ QuestionType.STRUCTURE: "Analyze directory structure and module boundaries",
331
+ QuestionType.REASONING: f"Search for evidence about {', '.join(entities) if entities else 'the topic'}, then infer purpose from context",
332
+ QuestionType.PATTERN: "Identify code patterns and match against known design patterns",
333
+ QuestionType.EVALUATION: f"Analyze {', '.join(entities) if entities else 'code quality'} against best practices",
334
+ QuestionType.UNKNOWN: "Clarify the question or try a broader search",
335
+ QuestionType.CLARIFICATION: "Request more specific information"
336
+ }
337
+
338
+ return suggestions.get(qtype, "Perform general search and analysis")
339
+
340
+ def needs_clarification(self, intent: QuestionIntent) -> bool:
341
+ """Determine if the question needs clarification"""
342
+ return (
343
+ intent.primary_type == QuestionType.UNKNOWN or
344
+ intent.confidence < 0.3 or
345
+ (intent.scope == "broad" and not intent.keywords)
346
+ )
347
+
348
+ def suggest_clarifications(self, question: str, intent: QuestionIntent) -> List[str]:
349
+ """Suggest clarifying questions"""
350
+ suggestions = []
351
+
352
+ if intent.primary_type == QuestionType.UNKNOWN:
353
+ suggestions.append("Could you rephrase your question? I'm not sure what you're looking for.")
354
+
355
+ if intent.scope == "broad" and not intent.entities:
356
+ suggestions.append("Which specific component or function are you interested in?")
357
+
358
+ if intent.confidence < 0.5:
359
+ # Suggest example questions based on keywords
360
+ if "how" in question.lower():
361
+ suggestions.append("Are you asking about how components interact, or how something is implemented?")
362
+ elif "what" in question.lower():
363
+ suggestions.append("Are you looking for a list of items, or the definition of something specific?")
364
+
365
+ if not suggestions:
366
+ suggestions.append("Could you provide more context or be more specific?")
367
+
368
+ return suggestions
369
+
370
+
371
+ @dataclass
372
+ class ExplorationResult:
373
+ """Result of exploring a question about the codebase"""
374
+
375
+ question: str
376
+ question_type: QuestionType
377
+ confidence: float
378
+
379
+ # Core results
380
+ answer: str # Natural language answer
381
+ code_results: List[SearchResult] = field(default_factory=list)
382
+
383
+ # Metadata
384
+ entities_found: List[str] = field(default_factory=list)
385
+ search_terms_used: List[str] = field(default_factory=list)
386
+
387
+ # Context for future questions
388
+ follow_up_suggestions: List[str] = field(default_factory=list)
389
+ related_files: List[str] = field(default_factory=list)
390
+
391
+ # External context (for future enhancement)
392
+ external_context: Dict[str, Any] = field(default_factory=dict)
393
+
394
+ # Tracking
395
+ processing_time: float = 0.0
396
+ used_llm: bool = False
397
+
398
+ def format_answer(self, include_code: bool = True, max_results: int = 3) -> str:
399
+ """Format the result for display"""
400
+ output = []
401
+
402
+ # Question and classification
403
+ output.append(f"📝 Question: {self.question}")
404
+ output.append(f" Type: {self.question_type.value} (confidence: {self.confidence:.2f})")
405
+
406
+ # Answer
407
+ output.append(f"\n💡 Answer:\n{self.answer}")
408
+
409
+ # Code examples
410
+ if include_code and self.code_results:
411
+ output.append(f"\n📂 Relevant Code ({len(self.code_results)} results found):")
412
+ for i, result in enumerate(self.code_results[:max_results], 1):
413
+ output.append(f"\n{i}. {result.file_name} ({result.chunk_type})")
414
+ output.append(f" Lines {result.start_line}-{result.end_line}")
415
+ if result.chunk_name:
416
+ output.append(f" Name: {result.chunk_name}")
417
+ # Show first 3 lines of code
418
+ lines = result.content.split('\n')[:3]
419
+ for line in lines:
420
+ output.append(f" {line}")
421
+ if len(result.content.split('\n')) > 3:
422
+ output.append(" ...")
423
+
424
+ # Follow-up suggestions
425
+ if self.follow_up_suggestions:
426
+ output.append("\n🔍 You might also want to ask:")
427
+ for suggestion in self.follow_up_suggestions[:3]:
428
+ output.append(f" - {suggestion}")
429
+
430
+ # Processing info
431
+ output.append(f"\n⏱️ Processed in {self.processing_time:.3f}s")
432
+
433
+ return '\n'.join(output)
434
+
435
+
436
+ @dataclass
437
+ class ConversationContext:
438
+ """Maintains context across multiple questions"""
439
+
440
+ session_id: str
441
+ started_at: datetime = field(default_factory=datetime.now)
442
+
443
+ # History
444
+ questions: List[str] = field(default_factory=list)
445
+ results: List[ExplorationResult] = field(default_factory=list)
446
+
447
+ # Current focus
448
+ current_entities: Set[str] = field(default_factory=set)
449
+ current_files: Set[str] = field(default_factory=set)
450
+
451
+ # External context (for future enhancement)
452
+ external_docs: List[str] = field(default_factory=list)
453
+
454
+ def add_result(self, result: ExplorationResult):
455
+ """Add a result and update context"""
456
+ self.questions.append(result.question)
457
+ self.results.append(result)
458
+
459
+ # Update current focus
460
+ self.current_entities.update(result.entities_found)
461
+ self.current_files.update(result.related_files)
462
+
463
+ def get_context_summary(self) -> str:
464
+ """Get a summary of the conversation context"""
465
+ return f"""
466
+ Session: {self.session_id}
467
+ Questions asked: {len(self.questions)}
468
+ Entities explored: {', '.join(self.current_entities) if self.current_entities else 'None'}
469
+ Files examined: {len(self.current_files)}
470
+ """
471
+
472
+
473
+ class InteractiveExplorer:
474
+ """
475
+ Interactive Q&A explorer for codebases.
476
+
477
+ This is the main interface for exploring code through questions.
478
+ It uses the question classifier to route questions to appropriate
479
+ handlers and maintains conversation context.
480
+ """
481
+
482
+ def __init__(
483
+ self,
484
+ config: ClaudeContextConfig,
485
+ embeddings: LocalEmbeddings,
486
+ milvus_manager: MilvusManager,
487
+ llm_client: Optional[Any] = None, # For future LLM integration
488
+ external_context: Optional[Dict] = None # For future context integration
489
+ ):
490
+ """
491
+ Initialize the explorer.
492
+
493
+ Args:
494
+ config: Configuration
495
+ embeddings: Embeddings model
496
+ milvus_manager: Milvus connection manager
497
+ llm_client: Optional LLM for reasoning questions
498
+ external_context: Optional external documentation
499
+ """
500
+ self.config = config
501
+ self.classifier = QuestionClassifier()
502
+ self.searcher = create_hybrid_searcher(config, embeddings, milvus_manager)
503
+ self.llm_client = llm_client
504
+ self.external_context = external_context or {}
505
+
506
+ # Session management
507
+ self.sessions: Dict[str, ConversationContext] = {}
508
+
509
+ logger.info("InteractiveExplorer initialized")
510
+
511
+ def start_session(self, session_id: Optional[str] = None) -> str:
512
+ """Start a new exploration session"""
513
+ if not session_id:
514
+ session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
515
+
516
+ self.sessions[session_id] = ConversationContext(session_id=session_id)
517
+ logger.info(f"Started exploration session: {session_id}")
518
+ return session_id
519
+
520
+ def ask(
521
+ self,
522
+ question: str,
523
+ session_id: Optional[str] = None,
524
+ include_code: bool = True,
525
+ max_results: int = 5
526
+ ) -> ExplorationResult:
527
+ """
528
+ Ask a question about the codebase.
529
+
530
+ Args:
531
+ question: The question to answer
532
+ session_id: Optional session ID for context
533
+ include_code: Whether to include code snippets
534
+ max_results: Maximum number of code results
535
+
536
+ Returns:
537
+ ExplorationResult with answer and evidence
538
+ """
539
+ start_time = datetime.now()
540
+
541
+ # Get or create session
542
+ if session_id and session_id in self.sessions:
543
+ context = self.sessions[session_id]
544
+ else:
545
+ context = ConversationContext(session_id=session_id or "default")
546
+
547
+ # Classify the question
548
+ intent = self.classifier.classify(question)
549
+
550
+ # Route to appropriate handler
551
+ if intent.primary_type in [QuestionType.DISCOVERY, QuestionType.DEFINITION]:
552
+ result = self._handle_discovery_question(question, intent, max_results)
553
+ elif intent.primary_type in [QuestionType.RELATIONSHIP, QuestionType.FLOW, QuestionType.STRUCTURE]:
554
+ result = self._handle_analysis_question(question, intent, max_results)
555
+ elif intent.primary_type in [QuestionType.REASONING, QuestionType.PATTERN, QuestionType.EVALUATION]:
556
+ result = self._handle_reasoning_question(question, intent, max_results)
557
+ else:
558
+ result = self._handle_unknown_question(question, intent)
559
+
560
+ # Calculate processing time
561
+ result.processing_time = (datetime.now() - start_time).total_seconds()
562
+
563
+ # Update session context
564
+ if session_id:
565
+ context.add_result(result)
566
+
567
+ return result
568
+
569
+ def _handle_discovery_question(
570
+ self,
571
+ question: str,
572
+ intent: QuestionIntent,
573
+ max_results: int
574
+ ) -> ExplorationResult:
575
+ """
576
+ Handle discovery/definition questions (Layer 1: Facts only).
577
+ These are "what" and "where" questions that need only search.
578
+ """
579
+ # Build search query from entities and keywords
580
+ search_terms = intent.entities + intent.keywords
581
+ search_query = ' '.join(search_terms) if search_terms else question
582
+
583
+ # Search for relevant code
584
+ code_results = self.searcher.search(
585
+ search_query,
586
+ limit=max_results
587
+ )
588
+
589
+ # Build answer from results
590
+ if code_results:
591
+ if intent.primary_type == QuestionType.DISCOVERY:
592
+ answer = self._build_discovery_answer(code_results, intent)
593
+ else: # DEFINITION
594
+ answer = self._build_definition_answer(code_results, intent)
595
+
596
+ # Extract found entities and files
597
+ entities_found = list(set(
598
+ r.chunk_name for r in code_results if r.chunk_name
599
+ ))
600
+ related_files = list(set(r.file_path for r in code_results))
601
+
602
+ # Suggest follow-ups
603
+ follow_ups = self._suggest_discovery_followups(code_results, intent)
604
+ else:
605
+ answer = f"I couldn't find any code related to: {', '.join(intent.entities) if intent.entities else question}"
606
+ entities_found = []
607
+ related_files = []
608
+ follow_ups = ["Try searching with different terms", "Be more specific about what you're looking for"]
609
+
610
+ return ExplorationResult(
611
+ question=question,
612
+ question_type=intent.primary_type,
613
+ confidence=intent.confidence,
614
+ answer=answer,
615
+ code_results=code_results,
616
+ entities_found=entities_found,
617
+ search_terms_used=search_terms,
618
+ follow_up_suggestions=follow_ups,
619
+ related_files=related_files
620
+ )
621
+
622
+ def _handle_analysis_question(
623
+ self,
624
+ question: str,
625
+ intent: QuestionIntent,
626
+ max_results: int
627
+ ) -> ExplorationResult:
628
+ """
629
+ Handle relationship/flow/structure questions (Layer 2: Analysis).
630
+ These need search plus relationship analysis.
631
+ """
632
+ # For now, use search with relationship-focused terms
633
+ # In future, add actual dependency analysis
634
+
635
+ # Add relationship keywords to search
636
+ relationship_terms = ["import", "call", "use", "depend", "inherit", "extend"]
637
+ search_terms = intent.entities + relationship_terms
638
+ search_query = ' '.join(search_terms) if search_terms else question
639
+
640
+ # Search for relationships
641
+ code_results = self.searcher.search(
642
+ search_query,
643
+ limit=max_results * 2 # Get more results for relationship analysis
644
+ )
645
+
646
+ # Analyze relationships in results
647
+ if code_results:
648
+ answer = self._build_relationship_answer(code_results, intent)
649
+ entities_found = self._extract_all_entities(code_results)
650
+ related_files = list(set(r.file_path for r in code_results))
651
+ follow_ups = self._suggest_relationship_followups(code_results, intent)
652
+ else:
653
+ answer = f"I couldn't find relationships for: {', '.join(intent.entities) if intent.entities else 'the specified components'}"
654
+ entities_found = []
655
+ related_files = []
656
+ follow_ups = ["Try specifying the exact component names", "Check if the components exist"]
657
+
658
+ return ExplorationResult(
659
+ question=question,
660
+ question_type=intent.primary_type,
661
+ confidence=intent.confidence,
662
+ answer=answer,
663
+ code_results=code_results[:max_results], # Limit displayed results
664
+ entities_found=entities_found,
665
+ search_terms_used=search_terms,
666
+ follow_up_suggestions=follow_ups,
667
+ related_files=related_files
668
+ )
669
+
670
+ def _handle_reasoning_question(
671
+ self,
672
+ question: str,
673
+ intent: QuestionIntent,
674
+ max_results: int
675
+ ) -> ExplorationResult:
676
+ """
677
+ Handle reasoning/pattern/evaluation questions (Layer 3: Reasoning).
678
+ These need search, analysis, and potentially LLM inference.
679
+ """
680
+ # Search for relevant code
681
+ search_terms = intent.entities + intent.keywords
682
+ search_query = ' '.join(search_terms) if search_terms else question
683
+
684
+ code_results = self.searcher.search(
685
+ search_query,
686
+ limit=max_results
687
+ )
688
+
689
+ if self.llm_client:
690
+ # Use LLM for reasoning (future enhancement)
691
+ answer = self._build_reasoning_answer_with_llm(code_results, intent, question)
692
+ used_llm = True
693
+ else:
694
+ # Fallback: Provide code evidence without reasoning
695
+ answer = self._build_reasoning_answer_without_llm(code_results, intent)
696
+ used_llm = False
697
+
698
+ entities_found = self._extract_all_entities(code_results) if code_results else []
699
+ related_files = list(set(r.file_path for r in code_results)) if code_results else []
700
+ follow_ups = self._suggest_reasoning_followups(code_results, intent)
701
+
702
+ return ExplorationResult(
703
+ question=question,
704
+ question_type=intent.primary_type,
705
+ confidence=intent.confidence,
706
+ answer=answer,
707
+ code_results=code_results,
708
+ entities_found=entities_found,
709
+ search_terms_used=search_terms,
710
+ follow_up_suggestions=follow_ups,
711
+ related_files=related_files,
712
+ used_llm=used_llm
713
+ )
714
+
715
+ def _handle_unknown_question(
716
+ self,
717
+ question: str,
718
+ intent: QuestionIntent
719
+ ) -> ExplorationResult:
720
+ """Handle questions that couldn't be classified"""
721
+
722
+ # Try a general search
723
+ code_results = self.searcher.search(question, limit=5)
724
+
725
+ if code_results:
726
+ answer = "I'm not sure what you're asking, but here's what I found:"
727
+ else:
728
+ answer = "I couldn't understand your question. Could you please rephrase it?"
729
+
730
+ # Suggest clarifications
731
+ follow_ups = self.classifier.suggest_clarifications(question, intent)
732
+
733
+ return ExplorationResult(
734
+ question=question,
735
+ question_type=QuestionType.UNKNOWN,
736
+ confidence=0.0,
737
+ answer=answer,
738
+ code_results=code_results,
739
+ follow_up_suggestions=follow_ups
740
+ )
741
+
742
+ # Answer building methods
743
+
744
+ def _build_discovery_answer(self, results: List[SearchResult], intent: QuestionIntent) -> str:
745
+ """Build answer for discovery questions"""
746
+ answer_parts = []
747
+
748
+ if intent.entities:
749
+ answer_parts.append(f"Here's what I found about {', '.join(intent.entities)}:")
750
+ else:
751
+ answer_parts.append("Here's what I found:")
752
+
753
+ # Group by type
754
+ by_type = {}
755
+ for r in results:
756
+ if r.chunk_type not in by_type:
757
+ by_type[r.chunk_type] = []
758
+ by_type[r.chunk_type].append(r)
759
+
760
+ for chunk_type, items in by_type.items():
761
+ answer_parts.append(f"\n{chunk_type.title()}s:")
762
+ for item in items[:3]: # Limit to 3 per type
763
+ if item.chunk_name:
764
+ answer_parts.append(f" • {item.chunk_name} in {item.file_name}")
765
+ else:
766
+ answer_parts.append(f" • {item.file_name} (lines {item.start_line}-{item.end_line})")
767
+
768
+ return '\n'.join(answer_parts)
769
+
770
+ def _build_definition_answer(self, results: List[SearchResult], intent: QuestionIntent) -> str:
771
+ """Build answer for definition questions"""
772
+ if not results:
773
+ return "Definition not found."
774
+
775
+ best_match = results[0]
776
+ answer = f"Found {best_match.chunk_type}: {best_match.chunk_name or 'unnamed'}\n"
777
+ answer += f"Location: {best_match.file_name} (lines {best_match.start_line}-{best_match.end_line})\n"
778
+
779
+ # Add language hint
780
+ if best_match.language:
781
+ answer += f"Language: {best_match.language}\n"
782
+
783
+ return answer
784
+
785
+ def _build_relationship_answer(self, results: List[SearchResult], intent: QuestionIntent) -> str:
786
+ """Build answer for relationship questions"""
787
+ if len(intent.entities) >= 2:
788
+ answer = f"Analyzing relationship between {' and '.join(intent.entities)}:\n"
789
+ else:
790
+ answer = "Found the following relationships:\n"
791
+
792
+ # Look for imports and calls
793
+ imports = [r for r in results if r.chunk_type == 'import_block']
794
+ functions = [r for r in results if r.chunk_type in ['function', 'method']]
795
+ classes = [r for r in results if r.chunk_type == 'class']
796
+
797
+ if imports:
798
+ answer += f"\nImports ({len(imports)} found):"
799
+ for imp in imports[:3]:
800
+ first_line = imp.content.split('\n')[0][:80]
801
+ answer += f"\n • {imp.file_name}: {first_line}"
802
+
803
+ if classes:
804
+ answer += f"\n\nClasses involved ({len(classes)} found):"
805
+ for cls in classes[:3]:
806
+ answer += f"\n • {cls.chunk_name} in {cls.file_name}"
807
+
808
+ if functions:
809
+ answer += f"\n\nFunctions/Methods ({len(functions)} found):"
810
+ for func in functions[:3]:
811
+ answer += f"\n • {func.chunk_name} in {func.file_name}"
812
+
813
+ return answer
814
+
815
+ def _build_reasoning_answer_without_llm(self, results: List[SearchResult], intent: QuestionIntent) -> str:
816
+ """Build reasoning answer without LLM"""
817
+ answer = "To answer 'why' questions, I need LLM integration. However, here's the relevant code:\n"
818
+
819
+ if results:
820
+ for r in results[:3]:
821
+ answer += f"\n• {r.chunk_type} '{r.chunk_name or 'unnamed'}' in {r.file_name}"
822
+ # Look for comments that might explain why
823
+ lines = r.content.split('\n')
824
+ for line in lines:
825
+ if '#' in line or '//' in line or '/*' in line:
826
+ comment = line.strip()
827
+ if len(comment) > 10: # Non-trivial comment
828
+ answer += f"\n Comment: {comment[:100]}"
829
+ break
830
+ else:
831
+ answer += "\nNo relevant code found to analyze."
832
+
833
+ answer += "\n\nNote: For deeper reasoning about design decisions, LLM integration would provide better insights."
834
+
835
+ return answer
836
+
837
+ def _build_reasoning_answer_with_llm(self, results: List[SearchResult], intent: QuestionIntent, question: str) -> str:
838
+ """Build reasoning answer with LLM (future enhancement)"""
839
+ # This is where we'd use the LLM with grounding
840
+ # For now, return the non-LLM answer
841
+ return self._build_reasoning_answer_without_llm(results, intent)
842
+
843
+ # Helper methods
844
+
845
+ def _extract_all_entities(self, results: List[SearchResult]) -> List[str]:
846
+ """Extract all entities from search results"""
847
+ entities = set()
848
+ for r in results:
849
+ if r.chunk_name:
850
+ entities.add(r.chunk_name)
851
+ # Could also extract entities from content
852
+ return list(entities)
853
+
854
+ def _suggest_discovery_followups(self, results: List[SearchResult], intent: QuestionIntent) -> List[str]:
855
+ """Suggest follow-up questions for discovery"""
856
+ suggestions = []
857
+
858
+ if results:
859
+ # Based on what was found
860
+ chunk_types = set(r.chunk_type for r in results)
861
+ if 'class' in chunk_types:
862
+ suggestions.append("What methods does this class have?")
863
+ if 'function' in chunk_types:
864
+ suggestions.append("What calls this function?")
865
+ if len(set(r.file_name for r in results)) > 1:
866
+ suggestions.append("How are these files related?")
867
+
868
+ return suggestions
869
+
870
+ def _suggest_relationship_followups(self, results: List[SearchResult], intent: QuestionIntent) -> List[str]:
871
+ """Suggest follow-up questions for relationships"""
872
+ suggestions = []
873
+
874
+ if len(intent.entities) >= 2:
875
+ suggestions.append(f"What's the data flow between {' and '.join(intent.entities[:2])}?")
876
+
877
+ if results:
878
+ suggestions.append("What are the dependencies of this component?")
879
+ suggestions.append("Are there any circular dependencies?")
880
+
881
+ return suggestions
882
+
883
+ def _suggest_reasoning_followups(self, results: List[SearchResult], intent: QuestionIntent) -> List[str]:
884
+ """Suggest follow-up questions for reasoning"""
885
+ return [
886
+ "What are the trade-offs of this approach?",
887
+ "How could this be improved?",
888
+ "What patterns are being used here?"
889
+ ]
890
+
891
+
892
+ # Example usage and testing
893
+ if __name__ == "__main__":
894
+ # Test the classifier
895
+ classifier = QuestionClassifier()
896
+
897
+ test_questions = [
898
+ # Discovery questions
899
+ "What are the main components?",
900
+ "Where is authentication handled?",
901
+ "List all API endpoints",
902
+
903
+ # Definition questions
904
+ "Show me the UserService class",
905
+ "Find the login function implementation",
906
+
907
+ # Relationship questions
908
+ "How do OrderService and PaymentService interact?",
909
+ "What calls the authenticate function?",
910
+
911
+ # Flow questions
912
+ "Trace the login flow",
913
+ "How does data flow from API to database?",
914
+
915
+ # Structure questions
916
+ "What's the architecture of this system?",
917
+ "How is the codebase organized?",
918
+
919
+ # Reasoning questions
920
+ "Why does this use Redis instead of memory cache?",
921
+ "What's the reason for separating auth into its own service?",
922
+
923
+ # Pattern questions
924
+ "What design patterns are used?",
925
+ "Is this using the repository pattern?",
926
+
927
+ # Evaluation questions
928
+ "How could the error handling be improved?",
929
+ "Are there any security concerns?",
930
+
931
+ # Ambiguous questions
932
+ "How does it work?",
933
+ "Tell me about the system"
934
+ ]
935
+
936
+ print("Question Classification Test")
937
+ print("=" * 60)
938
+
939
+ for question in test_questions:
940
+ intent = classifier.classify(question)
941
+ print(f"\nQ: {question}")
942
+ print(f" Type: {intent.primary_type.value} (confidence: {intent.confidence:.2f})")
943
+ print(f" Entities: {intent.entities}")
944
+ print(f" Keywords: {intent.keywords}")
945
+ print(f" Needs LLM: {intent.requires_llm}")
946
+ print(f" Approach: {intent.suggested_approach}")
947
+
948
+ if classifier.needs_clarification(intent):
949
+ print(" ⚠️ Needs clarification:")
950
+ for suggestion in classifier.suggest_clarifications(question, intent):
951
+ print(f" - {suggestion}")