code-finder 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. claude_context/__init__.py +33 -0
  2. claude_context/agentic_integration.py +309 -0
  3. claude_context/ast_chunker.py +646 -0
  4. claude_context/config.py +239 -0
  5. claude_context/context_manager.py +627 -0
  6. claude_context/embeddings.py +307 -0
  7. claude_context/embeddings_interface.py +226 -0
  8. claude_context/enhanced_ast_chunker.py +1129 -0
  9. claude_context/explorer.py +951 -0
  10. claude_context/explorer_with_context.py +1008 -0
  11. claude_context/indexer.py +893 -0
  12. claude_context/markdown_chunker.py +421 -0
  13. claude_context/mode_handler.py +1774 -0
  14. claude_context/query_metrics.py +164 -0
  15. claude_context/question_generator.py +800 -0
  16. claude_context/readme_extractor.py +485 -0
  17. claude_context/repository_adapter.py +399 -0
  18. claude_context/search.py +493 -0
  19. claude_context/skills/__init__.py +11 -0
  20. claude_context/skills/_cli_common.py +74 -0
  21. claude_context/skills/_index_manager.py +98 -0
  22. claude_context/skills/api_surface.py +219 -0
  23. claude_context/skills/evidence_retrieval.py +151 -0
  24. claude_context/skills/grounded_review.py +212 -0
  25. claude_context/synthesis/__init__.py +8 -0
  26. claude_context/synthesis/editor_agent.py +391 -0
  27. claude_context/synthesis/llm_synthesizer.py +153 -0
  28. claude_context/synthesis/logic_explainer.py +235 -0
  29. claude_context/synthesis/multi_review_pipeline.py +717 -0
  30. claude_context/synthesis/prompt_builder.py +439 -0
  31. claude_context/synthesis/providers.py +115 -0
  32. claude_context/synthesis/validators.py +458 -0
  33. code_finder-0.1.0.dist-info/METADATA +823 -0
  34. code_finder-0.1.0.dist-info/RECORD +37 -0
  35. code_finder-0.1.0.dist-info/WHEEL +5 -0
  36. code_finder-0.1.0.dist-info/entry_points.txt +4 -0
  37. code_finder-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1008 @@
1
+ """
2
+ Interactive Explorer with External Context Support
3
+
4
+ Extends the base InteractiveExplorer to integrate external context
5
+ (requirements, tickets, docs) with code exploration.
6
+ """
7
+
8
+ import logging
9
+ from typing import List, Dict, Any, Optional, Tuple
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+
13
+ from typing import Any
14
+ from .explorer import (
15
+ ExplorationResult,
16
+ ConversationContext,
17
+ QuestionType,
18
+ QuestionClassifier
19
+ )
20
+ from .context_manager import ExternalContextManager, ContextItem
21
+ from .search import SearchResult, HybridSearcher
22
+ from .config import ClaudeContextConfig
23
+ from .embeddings import LocalEmbeddings
24
+ from .synthesis.providers import create_generator, TextGenerator
25
+ from .query_metrics import QueryMetrics
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ @dataclass
31
+ class EnhancedExplorationResult(ExplorationResult):
32
+ """Extended result that includes external context references"""
33
+ context_items: List[ContextItem] = field(default_factory=list)
34
+ context_citations: Dict[str, str] = field(default_factory=dict)
35
+ rationale_points: List[Dict[str, Any]] = field(default_factory=list)
36
+ rationale_results: List[SearchResult] = field(default_factory=list)
37
+ rationale_snippets: List[str] = field(default_factory=list)
38
+
39
+ def format_with_context(self) -> str:
40
+ """Format the answer with context citations"""
41
+ formatted = self.answer
42
+
43
+ # Add context citations
44
+ if self.context_citations:
45
+ formatted += "\n\nšŸ“š Context References:"
46
+ for source, citation in self.context_citations.items():
47
+ formatted += f"\n • {source}: {citation}"
48
+
49
+ return formatted
50
+
51
+
52
+ class InteractiveExplorerWithContext:
53
+ """
54
+ Enhanced explorer that combines code exploration with external context.
55
+
56
+ This class composes the base explorer functionality with context management to:
57
+ 1. Search external context (requirements, tickets) alongside code
58
+ 2. Provide richer answers that explain both WHAT and WHY
59
+ 3. Allow dynamic context addition during Q&A
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ searcher: HybridSearcher,
65
+ context_manager: Optional[ExternalContextManager] = None,
66
+ enable_llm_fallback: bool = True
67
+ ):
68
+ """
69
+ Initialize the context-aware explorer.
70
+
71
+ Args:
72
+ searcher: HybridSearcher instance for code search
73
+ context_manager: ExternalContextManager for context handling
74
+ enable_llm_fallback: Enable LLM-based query interpretation for conceptual questions
75
+ """
76
+ self.searcher = searcher
77
+ self.context_manager = context_manager or ExternalContextManager()
78
+ self.context_enabled = True
79
+ self.classifier = QuestionClassifier()
80
+ self.conversation_history = []
81
+ self.enable_llm_fallback = enable_llm_fallback
82
+
83
+ # Initialize LLM generator for query interpretation (lazy init)
84
+ self._llm_generator: Optional[TextGenerator] = None
85
+ self._last_concept_type: str = "unknown" # Track concept type for result ranking
86
+
87
+ # Initialize metrics tracking
88
+ self.metrics = QueryMetrics()
89
+
90
+ logger.info(f"InteractiveExplorerWithContext initialized (LLM fallback: {enable_llm_fallback})")
91
+
92
+ def ask(self, question: str) -> EnhancedExplorationResult:
93
+ """
94
+ Enhanced ask method that searches both code and external context.
95
+
96
+ Uses a two-stage approach:
97
+ 1. Fast keyword-based search (BM25 + vector)
98
+ 2. If no results, fall back to LLM-based query interpretation
99
+
100
+ Args:
101
+ question: User's question
102
+
103
+ Returns:
104
+ EnhancedExplorationResult with code and context
105
+ """
106
+ # Classify the question
107
+ classification = self.classifier.classify(question)
108
+
109
+ candidate_results: List[SearchResult] = []
110
+
111
+ # Stage 1: Keyword-based search (fast path)
112
+ code_results = []
113
+ search_method = "keyword"
114
+
115
+ if classification.entities or classification.keywords:
116
+ # Search using entities and keywords
117
+ search_terms = list(classification.entities) + list(classification.keywords)
118
+ for term in search_terms[:3]: # Limit searches
119
+ results = self.searcher.search(term, limit=3)
120
+ code_results.extend(results)
121
+
122
+ # FIX #1: Filter out import-only chunks (low value)
123
+ # FIX #2: Boost class/function definitions (high value)
124
+ filtered_results = []
125
+ for r in code_results:
126
+ # Skip pure import blocks
127
+ chunk_type = getattr(r, 'chunk_type', '')
128
+ if chunk_type == 'import_block':
129
+ continue
130
+
131
+ # Boost definitions - these are more informative than imports
132
+ if any(keyword in (r.content or '') for keyword in ['class ', 'def ', 'async def ']):
133
+ # Mark as boosted for prioritization
134
+ r.boosted = True
135
+ else:
136
+ r.boosted = False
137
+
138
+ filtered_results.append(r)
139
+
140
+ # Sort: boosted results first, then by original score
141
+ filtered_results.sort(key=lambda x: (not getattr(x, 'boosted', False), -getattr(x, 'score', 0)))
142
+
143
+ # Remove duplicates
144
+ seen = set()
145
+ unique_results = []
146
+ for r in filtered_results:
147
+ key = (r.file_path, r.start_line)
148
+ if key not in seen:
149
+ seen.add(key)
150
+ unique_results.append(r)
151
+ candidate_results = unique_results
152
+
153
+ # Stage 2: LLM-based query interpretation (fallback for conceptual questions)
154
+ if not candidate_results and self.enable_llm_fallback:
155
+ logger.info(f"Keyword search returned no results, using LLM interpretation for: '{question}'")
156
+ try:
157
+ interpreted_queries = self._llm_interpret_question(question)
158
+ if interpreted_queries:
159
+ search_method = "llm-interpreted"
160
+ llm_results: List[SearchResult] = []
161
+ for query in interpreted_queries[:5]: # Top 5 queries
162
+ results = self.searcher.search(query, limit=3)
163
+ llm_results.extend(results)
164
+
165
+ # Apply same filtering as keyword search
166
+ filtered_results = []
167
+ for r in llm_results:
168
+ chunk_type = getattr(r, 'chunk_type', '')
169
+ if chunk_type == 'import_block':
170
+ continue
171
+ if any(keyword in (r.content or '') for keyword in ['class ', 'def ', 'async def ']):
172
+ r.boosted = True
173
+ else:
174
+ r.boosted = False
175
+ filtered_results.append(r)
176
+
177
+ filtered_results.sort(key=lambda x: (not getattr(x, 'boosted', False), -getattr(x, 'score', 0)))
178
+
179
+ # Remove duplicates
180
+ seen = set()
181
+ unique_results = []
182
+ for r in filtered_results:
183
+ key = (r.file_path, r.start_line)
184
+ if key not in seen:
185
+ seen.add(key)
186
+ unique_results.append(r)
187
+ candidate_results = unique_results
188
+
189
+ logger.info(f"LLM interpretation generated {len(interpreted_queries)} queries, found {len(candidate_results)} results")
190
+
191
+ # Rank results by importance if we have concept type
192
+ if hasattr(self, '_last_concept_type') and self._last_concept_type != "unknown":
193
+ candidate_results = self._rank_by_importance(candidate_results, self._last_concept_type)
194
+ logger.info(f"Results re-ranked by importance (concept: {self._last_concept_type})")
195
+
196
+ except Exception as e:
197
+ logger.warning(f"LLM interpretation failed: {e}")
198
+
199
+ code_results, rationale_results = self._separate_rationale(candidate_results)
200
+
201
+ # Build base answer from code
202
+ base_answer = self._build_code_answer(question, classification, code_results, search_method)
203
+ base_answer, rationale_points, rationale_snippets = self._augment_with_rationale(base_answer, rationale_results, None)
204
+
205
+ # Calculate confidence based on results found
206
+ # Boost confidence if LLM interpretation found results
207
+ initial_confidence = classification.confidence
208
+ if search_method == "llm-interpreted" and candidate_results:
209
+ # LLM interpretation succeeded - boost confidence significantly
210
+ initial_confidence = max(0.6, min(0.8, 0.5 + (len(candidate_results) * 0.1)))
211
+ logger.info(f"LLM interpretation found {len(candidate_results)} results, confidence boosted to {initial_confidence:.0%}")
212
+ elif candidate_results:
213
+ # Keyword search succeeded - use higher of classification confidence or result-based
214
+ result_confidence = min(0.9, 0.5 + (len(candidate_results) * 0.08))
215
+ initial_confidence = max(classification.confidence, result_confidence)
216
+
217
+ combined_rationale_points = list(rationale_points)
218
+
219
+ # Create enhanced result
220
+ enhanced_result = EnhancedExplorationResult(
221
+ question=question,
222
+ answer=base_answer,
223
+ question_type=classification.primary_type,
224
+ confidence=initial_confidence,
225
+ code_results=code_results,
226
+ follow_up_suggestions=[classification.suggested_approach],
227
+ entities_found=classification.entities,
228
+ rationale_points=combined_rationale_points,
229
+ rationale_results=rationale_results,
230
+ rationale_snippets=rationale_snippets
231
+ )
232
+
233
+ # Search external context if available
234
+ if self.context_manager.total_items > 0:
235
+ context_results = self._search_context(question)
236
+ if context_results:
237
+ enhanced_answer, context_rationale = self._enhance_with_context(
238
+ enhanced_result,
239
+ context_results
240
+ )
241
+ enhanced_result.answer = enhanced_answer
242
+ enhanced_result.context_items = [item for item, _ in context_results]
243
+ enhanced_result.context_citations = self._extract_citations(context_results)
244
+ combined_rationale_points.extend(context_rationale)
245
+
246
+ # Boost confidence if we have supporting context
247
+ if enhanced_result.confidence < 0.9 and context_results:
248
+ enhanced_result.confidence = min(0.9, enhanced_result.confidence + 0.2)
249
+
250
+ enhanced_result.rationale_points = combined_rationale_points
251
+
252
+ # Update conversation history
253
+ self.conversation_history.append(enhanced_result)
254
+
255
+ # Log metrics for monitoring
256
+ self.metrics.log_question(
257
+ question=question,
258
+ used_llm=(search_method == "llm-interpreted"),
259
+ results_count=len(candidate_results),
260
+ confidence=enhanced_result.confidence
261
+ )
262
+
263
+ return enhanced_result
264
+
265
+ def _rank_by_importance(self, results: List[SearchResult], concept_type: str) -> List[SearchResult]:
266
+ """
267
+ Rank search results by importance to codebase.
268
+
269
+ Generic approach: Prioritizes core functionality over extension points
270
+ based on file path patterns that work across domains.
271
+
272
+ Args:
273
+ results: Search results to rank
274
+ concept_type: Type of concept (core_functionality, extension_point, etc.)
275
+
276
+ Returns:
277
+ Re-ranked results with core functionality prioritized
278
+ """
279
+ if not results:
280
+ return results
281
+
282
+ ranked = []
283
+
284
+ for result in results:
285
+ score = getattr(result, 'combined_score', 0.5)
286
+
287
+ # Boost core functionality files (generic patterns)
288
+ if concept_type == "core_functionality":
289
+ core_patterns = [
290
+ "/models/", "/core/", "/engine/", "/lib/",
291
+ "/trainer/", "/inference/", "/quantization/",
292
+ "/controllers/", "/operators/", "/api/",
293
+ "/src/main/", "/pkg/core/"
294
+ ]
295
+ if any(pattern in result.file_path for pattern in core_patterns):
296
+ score *= 1.5 # 50% boost for core files
297
+ logger.debug(f"Boosted core file: {result.file_path}")
298
+
299
+ # De-prioritize peripheral files
300
+ peripheral_patterns = [
301
+ "/examples/", "/tests/", "/test/",
302
+ "/plugins/", "/extensions/", "/contrib/",
303
+ "/demos/", "/samples/", "/benchmarks/"
304
+ ]
305
+ if any(pattern in result.file_path for pattern in peripheral_patterns):
306
+ score *= 0.7 # 30% penalty for peripheral files
307
+ logger.debug(f"De-prioritized peripheral file: {result.file_path}")
308
+
309
+ # Boost based on chunk type
310
+ chunk_type = getattr(result, 'chunk_type', '')
311
+ if chunk_type in ['class', 'function', 'method']:
312
+ score *= 1.2 # Prefer actual implementation over imports/comments
313
+
314
+ ranked.append((result, score))
315
+
316
+ # Sort by adjusted score
317
+ ranked.sort(key=lambda x: x[1], reverse=True)
318
+
319
+ return [r for r, _ in ranked]
320
+
321
+ def _llm_interpret_question(self, question: str) -> List[str]:
322
+ """
323
+ Use LLM to interpret a conceptual question and generate search queries.
324
+
325
+ ENHANCED: Now uses structured JSON response with domain understanding,
326
+ concept type detection, and reasoning.
327
+
328
+ Args:
329
+ question: User's conceptual question
330
+
331
+ Returns:
332
+ List of search queries to try
333
+ """
334
+ # Lazy init LLM generator
335
+ if self._llm_generator is None:
336
+ try:
337
+ self._llm_generator = create_generator()
338
+ except Exception as e:
339
+ logger.warning(f"Could not initialize LLM generator: {e}")
340
+ return []
341
+
342
+ system_prompt = """You are a code search expert who understands diverse technical domains.
343
+
344
+ Given a user's question, generate targeted search queries by understanding:
345
+ - Domain context (AI/ML, Kubernetes, web frameworks, CLI tools, etc.)
346
+ - Core functionality vs extension points
347
+ - Domain-specific terminology
348
+ - File organization patterns
349
+
350
+ Return JSON with structured search strategy:
351
+ {
352
+ "primary_terms": ["most_relevant_terms_for_domain"],
353
+ "secondary_terms": ["related_concepts"],
354
+ "file_hints": ["likely_file_patterns"],
355
+ "concept_type": "core_functionality|extension_point|utility|configuration",
356
+ "reasoning": "Brief explanation of search strategy"
357
+ }
358
+
359
+ Examples:
360
+
361
+ Q: "How does quantization work?"
362
+ A: {
363
+ "primary_terms": ["quantize", "quantization", "int8"],
364
+ "secondary_terms": ["precision", "calibration", "weight compression"],
365
+ "file_hints": ["**/quant*.py", "**/compress*.py"],
366
+ "concept_type": "core_functionality",
367
+ "reasoning": "Quantization is core ML optimization, typically in dedicated modules"
368
+ }
369
+
370
+ Q: "How do I configure the operator?"
371
+ A: {
372
+ "primary_terms": ["ConfigMap", "configuration", "config"],
373
+ "secondary_terms": ["environment", "settings", "parameters"],
374
+ "file_hints": ["config/samples/*.yaml", "**/config*.go"],
375
+ "concept_type": "configuration",
376
+ "reasoning": "K8s operators use ConfigMaps for configuration"
377
+ }
378
+
379
+ Q: "How do I add a custom plugin?"
380
+ A: {
381
+ "primary_terms": ["register", "plugin", "extension"],
382
+ "secondary_terms": ["hook", "interface", "registry"],
383
+ "file_hints": ["**/plugins/*.py", "**/registry.py"],
384
+ "concept_type": "extension_point",
385
+ "reasoning": "Plugin systems use registration patterns"
386
+ }
387
+
388
+ Return ONLY valid JSON, no markdown formatting."""
389
+
390
+ user_prompt = f"Question: {question}\n\nGenerate search strategy JSON:"
391
+
392
+ try:
393
+ response = self._llm_generator.generate(
394
+ system_prompt=system_prompt,
395
+ user_prompt=user_prompt,
396
+ temperature=0.3,
397
+ max_tokens=300 # Increased for JSON response
398
+ )
399
+
400
+ # Try to parse JSON response
401
+ try:
402
+ import json
403
+ # Clean response (remove markdown if present)
404
+ cleaned = response.strip()
405
+ if cleaned.startswith("```json"):
406
+ cleaned = cleaned[7:]
407
+ if cleaned.startswith("```"):
408
+ cleaned = cleaned[3:]
409
+ if cleaned.endswith("```"):
410
+ cleaned = cleaned[:-3]
411
+ cleaned = cleaned.strip()
412
+
413
+ parsed = json.loads(cleaned)
414
+
415
+ # Extract queries with priority
416
+ queries = parsed.get("primary_terms", [])[:3]
417
+ queries.extend(parsed.get("secondary_terms", [])[:2])
418
+
419
+ # Store metadata for ranking
420
+ self._last_concept_type = parsed.get("concept_type", "unknown")
421
+
422
+ # Log reasoning for monitoring
423
+ reasoning = parsed.get("reasoning", "No reasoning provided")
424
+ logger.info(f"LLM search strategy:")
425
+ logger.info(f" Concept type: {self._last_concept_type}")
426
+ logger.info(f" Reasoning: {reasoning}")
427
+ logger.info(f" Queries: {queries}")
428
+
429
+ return queries[:5]
430
+
431
+ except json.JSONDecodeError as e:
432
+ # Fallback to comma-separated parsing
433
+ logger.warning(f"JSON parsing failed, falling back to comma-separated: {e}")
434
+ queries = [q.strip() for q in response.split(',')]
435
+ queries = [q for q in queries if q and len(q) > 2]
436
+ logger.info(f"LLM generated queries (fallback): {queries}")
437
+ return queries[:5]
438
+
439
+ except Exception as e:
440
+ logger.warning(f"LLM query generation failed: {e}")
441
+ return []
442
+
443
+ def _build_code_answer(self, question: str, classification, code_results: List[SearchResult], search_method: str = "keyword") -> str:
444
+ """
445
+ Build an answer based on code search results.
446
+
447
+ Args:
448
+ question: The original question
449
+ classification: Question classification
450
+ code_results: Code search results
451
+ search_method: How results were found ("keyword" or "llm-interpreted")
452
+
453
+ Returns:
454
+ Answer string
455
+ """
456
+ if not code_results:
457
+ suggestion = "Try rephrasing with more specific terms."
458
+ if search_method == "llm-interpreted":
459
+ suggestion = "Even with AI interpretation, I couldn't find relevant code. The codebase might not contain this functionality."
460
+ return f"I couldn't find specific code related to '{question}'. {suggestion}"
461
+
462
+ # Build answer based on question type
463
+ if classification.primary_type == QuestionType.DISCOVERY:
464
+ answer = "Based on the codebase:\n\n"
465
+ for result in code_results[:3]:
466
+ answer += f"• **{result.file_name}** ({result.language}): "
467
+ answer += f"Lines {result.start_line}-{result.end_line}\n"
468
+ if result.chunk_type:
469
+ answer += f" Type: {result.chunk_type}\n"
470
+
471
+ elif classification.primary_type == QuestionType.DEFINITION:
472
+ answer = f"Here's what I found:\n\n"
473
+ result = code_results[0]
474
+ answer += f"**{result.file_name}:{result.start_line}**\n"
475
+ answer += f"```{result.language}\n{result.content[:200]}...\n```"
476
+
477
+ elif classification.primary_type == QuestionType.RELATIONSHIP:
478
+ answer = "Code relationships found:\n\n"
479
+ for result in code_results[:3]:
480
+ answer += f"• {result.file_name}: {result.chunk_type or 'code'}\n"
481
+
482
+ else:
483
+ # Generic answer
484
+ answer = f"Found {len(code_results)} relevant code sections:\n\n"
485
+ for result in code_results[:3]:
486
+ answer += f"• {result.file_name}:{result.start_line}\n"
487
+
488
+ return answer
489
+
490
+ def _search_context(self, question: str) -> List[Tuple[ContextItem, float]]:
491
+ """
492
+ Search external context for relevant information.
493
+
494
+ Args:
495
+ question: Search query
496
+
497
+ Returns:
498
+ List of (ContextItem, relevance_score) tuples
499
+ """
500
+ try:
501
+ return self.context_manager.search_context(question, limit=3)
502
+ except Exception as e:
503
+ logger.warning(f"Context search failed: {e}")
504
+ return []
505
+
506
+ def _enhance_with_context(
507
+ self,
508
+ base_result: ExplorationResult,
509
+ context_results: List[Tuple[ContextItem, float]]
510
+ ) -> Tuple[str, List[Dict[str, Any]]]:
511
+ """
512
+ Enhance the base answer with context information.
513
+
514
+ Args:
515
+ base_result: Original code-based result
516
+ context_results: Relevant context items
517
+
518
+ Returns:
519
+ Tuple of (enhanced answer, rationale points)
520
+ """
521
+ enhanced = base_result.answer
522
+ rationale_points: List[Dict[str, Any]] = []
523
+
524
+ if context_results:
525
+ enhanced, rationale_points = self._restructure_answer_with_context(
526
+ base_result,
527
+ context_results
528
+ )
529
+
530
+ return enhanced, rationale_points
531
+
532
+ def _restructure_answer_with_context(
533
+ self,
534
+ base_result: ExplorationResult,
535
+ context_results: List[Tuple[ContextItem, float]]
536
+ ) -> Tuple[str, List[Dict[str, Any]]]:
537
+ """
538
+ Restructure the answer to integrate context naturally.
539
+
540
+ Args:
541
+ base_result: Original result
542
+ context_results: Context items to integrate
543
+
544
+ Returns:
545
+ Tuple of (restructured answer, rationale entries)
546
+ """
547
+ question_type = base_result.question_type if hasattr(base_result, 'question_type') else QuestionType.DISCOVERY
548
+ rationale_points: List[Dict[str, Any]] = []
549
+
550
+ if question_type == QuestionType.REASONING:
551
+ return self._build_reasoning_answer_with_context(base_result, context_results)
552
+ elif question_type == QuestionType.DISCOVERY:
553
+ return self._build_discovery_answer_with_context(base_result, context_results)
554
+ elif question_type == QuestionType.RELATIONSHIP:
555
+ return self._build_relationship_answer_with_context(base_result, context_results)
556
+ else:
557
+ # For other types, append context at the end
558
+ return self._append_context_to_answer(base_result, context_results)
559
+
560
+ def _build_reasoning_answer_with_context(
561
+ self,
562
+ base_result: ExplorationResult,
563
+ context_results: List[Tuple[ContextItem, float]]
564
+ ) -> Tuple[str, List[Dict[str, Any]]]:
565
+ """Build a reasoning answer that integrates context naturally."""
566
+
567
+ lines = ["Based on code analysis and project context:\n"]
568
+ rationale_points: List[Dict[str, Any]] = []
569
+
570
+ for item, score in context_results:
571
+ if item.type == "requirements":
572
+ lines.append(f"šŸ“„ **Requirement**: {item.get_summary()}")
573
+ if "acceptance" in item.content.lower():
574
+ for line in item.content.split('\n'):
575
+ if "acceptance" in line.lower() or "must" in line.lower():
576
+ snippet = line.strip()
577
+ lines.append(f" → {snippet}")
578
+ rationale_points.append(self._make_rationale_entry(item, snippet))
579
+ else:
580
+ rationale_points.append(self._make_rationale_entry(item, "Requirement informing implementation"))
581
+
582
+ elif item.type == "tickets":
583
+ ticket_id = item.metadata.get("ticket_id", "Ticket")
584
+ summary = item.get_summary()
585
+ lines.append(f"šŸŽ« **{ticket_id}**: {summary}")
586
+ rationale_points.append(self._make_rationale_entry(item, summary))
587
+ if "decision" in item.content.lower() or "chose" in item.content.lower():
588
+ lines.append(" → Decision documented in ticket")
589
+
590
+ elif item.type == "design_docs":
591
+ summary = item.get_summary()
592
+ lines.append(f"šŸ“ **Design Doc**: {summary}")
593
+ rationale_points.append(self._make_rationale_entry(item, summary))
594
+
595
+ elif item.type == "decisions":
596
+ summary = item.get_summary()
597
+ lines.append(f"šŸ¤” **Decision**: {summary}")
598
+ content_lower = item.content.lower()
599
+ if "latency" in content_lower:
600
+ detail = "Latency requirement referenced"
601
+ lines.append(f" → {detail}")
602
+ rationale_points.append(self._make_rationale_entry(item, detail))
603
+ else:
604
+ rationale_points.append(self._make_rationale_entry(item, summary))
605
+
606
+ lines.append("\nšŸ’» **Implementation**:")
607
+ if base_result.code_results:
608
+ for result in base_result.code_results[:2]:
609
+ impl_note = f"Implements context in {result.file_name}:{result.start_line}"
610
+ lines.append(f" • {result.file_name}:{result.start_line} - implements the requirement")
611
+ rationale_points.append({
612
+ "source": f"{result.file_path}:{result.start_line}",
613
+ "note": impl_note
614
+ })
615
+
616
+ if "because" in base_result.answer.lower():
617
+ lines.append("\n" + base_result.answer)
618
+
619
+ augmented, extra_points, _ = self._augment_with_rationale("\n".join(lines), base_result.rationale_results if isinstance(base_result, EnhancedExplorationResult) else [], lines)
620
+ rationale_points.extend(extra_points)
621
+ return augmented, rationale_points
622
+
623
+ def _build_discovery_answer_with_context(
624
+ self,
625
+ base_result: ExplorationResult,
626
+ context_results: List[Tuple[ContextItem, float]]
627
+ ) -> Tuple[str, List[Dict[str, Any]]]:
628
+ """Build a discovery answer enriched with context."""
629
+
630
+ lines = [base_result.answer]
631
+ rationale_points: List[Dict[str, Any]] = []
632
+
633
+ context_additions = []
634
+ for item, score in context_results:
635
+ if item.type == "requirements" and (
636
+ "feature" in item.content.lower() or "component" in item.content.lower()
637
+ ):
638
+ summary = item.get_summary()
639
+ context_additions.append(f"šŸ“„ Requirements mention: {summary}")
640
+ rationale_points.append(self._make_rationale_entry(item, summary))
641
+ elif item.type == "tickets" and "implement" in item.content.lower():
642
+ ticket_id = item.metadata.get("ticket_id", "Ticket")
643
+ summary = item.get_summary()
644
+ context_additions.append(f"šŸŽ« {ticket_id}: {summary}")
645
+ rationale_points.append(self._make_rationale_entry(item, summary))
646
+
647
+ if context_additions:
648
+ lines.append("\nšŸ“š **Additional Context**:")
649
+ lines.extend(f" {addition}" for addition in context_additions)
650
+
651
+ augmented, extra_points, _ = self._augment_with_rationale("\n".join(lines), base_result.rationale_results if isinstance(base_result, EnhancedExplorationResult) else [], lines)
652
+ rationale_points.extend(extra_points)
653
+ return augmented, rationale_points
654
+
655
+ def _build_relationship_answer_with_context(
656
+ self,
657
+ base_result: ExplorationResult,
658
+ context_results: List[Tuple[ContextItem, float]]
659
+ ) -> Tuple[str, List[Dict[str, Any]]]:
660
+ """Build a relationship answer with context about why relationships exist."""
661
+
662
+ lines = [base_result.answer]
663
+ rationale_points: List[Dict[str, Any]] = []
664
+
665
+ if context_results:
666
+ lines.append("\nšŸ“š **Context for these relationships**:")
667
+ for item, score in context_results:
668
+ if item.type == "design_docs":
669
+ summary = item.get_summary()
670
+ lines.append(f" šŸ“ Architecture: {summary}")
671
+ rationale_points.append(self._make_rationale_entry(item, summary))
672
+ elif item.type == "tickets" and (
673
+ "integration" in item.content.lower() or "connect" in item.content.lower()
674
+ ):
675
+ ticket_id = item.metadata.get("ticket_id", "Ticket")
676
+ summary = item.get_summary()
677
+ lines.append(f" šŸŽ« {ticket_id}: {summary}")
678
+ rationale_points.append(self._make_rationale_entry(item, summary))
679
+
680
+ augmented, extra_points, _ = self._augment_with_rationale("\n".join(lines), base_result.rationale_results if isinstance(base_result, EnhancedExplorationResult) else [], lines)
681
+ rationale_points.extend(extra_points)
682
+ return augmented, rationale_points
683
+
684
+ def _augment_with_rationale(
685
+ self,
686
+ base_answer: str,
687
+ rationale_results: List[SearchResult],
688
+ existing_lines: Optional[List[str]] = None
689
+ ) -> Tuple[str, List[Dict[str, Any]], List[str]]:
690
+ """Append rationale evidence from search results to the answer."""
691
+
692
+ if not rationale_results:
693
+ return base_answer, [], []
694
+
695
+ lines = (existing_lines[:] if existing_lines is not None else [base_answer.rstrip()])
696
+ if lines and lines[-1] != "":
697
+ lines.append("")
698
+ lines.append("🧠 **Rationale Evidence:**")
699
+ points: List[Dict[str, Any]] = []
700
+ snippets: List[str] = []
701
+
702
+ for result in rationale_results[:3]:
703
+ snippet = (result.content or "").strip().split("\n", 1)[0][:220]
704
+ if not snippet:
705
+ snippet = getattr(result, "chunk_name", "") or str(getattr(result, "chunk_type", "rationale"))
706
+ source = result.file_path or "rationale"
707
+ lines.append(f"• {source}: {snippet}")
708
+ points.append({
709
+ "source": f"{result.file_path}:{result.start_line}-{result.end_line}" if result.file_path else source,
710
+ "note": snippet,
711
+ "type": getattr(result, "chunk_type", "rationale"),
712
+ })
713
+ snippets.append(snippet)
714
+
715
+ return "\n".join(lines), points, snippets
716
+
717
+ def _separate_rationale(
718
+ self,
719
+ results: List[SearchResult]
720
+ ) -> Tuple[List[SearchResult], List[SearchResult]]:
721
+ """Split raw search results into code vs rationale buckets."""
722
+
723
+ code_results: List[SearchResult] = []
724
+ rationale_results: List[SearchResult] = []
725
+
726
+ for result in results or []:
727
+ if self._is_rationale_result(result):
728
+ rationale_results.append(result)
729
+ else:
730
+ code_results.append(result)
731
+
732
+ return code_results[:5], rationale_results[:3]
733
+
734
+ @staticmethod
735
+ def _is_rationale_result(result: SearchResult) -> bool:
736
+ chunk_type = str(getattr(result, "chunk_type", "") or "")
737
+ return chunk_type.startswith("rationale")
738
+
739
+ def _append_context_to_answer(
740
+ self,
741
+ base_result: ExplorationResult,
742
+ context_results: List[Tuple[ContextItem, float]]
743
+ ) -> Tuple[str, List[Dict[str, Any]]]:
744
+ """Generic method to append context to any answer type."""
745
+
746
+ lines = [base_result.answer]
747
+ rationale_points: List[Dict[str, Any]] = []
748
+
749
+ if context_results:
750
+ lines.append("\nšŸ“š **Related Context**:")
751
+ for item, score in context_results[:2]:
752
+ source = self._format_context_source(item)
753
+ summary = item.get_summary()
754
+ lines.append(f" • {source}: {summary}")
755
+ rationale_points.append(self._make_rationale_entry(item, summary))
756
+
757
+ augmented, extra_points, _ = self._augment_with_rationale("\n".join(lines), base_result.rationale_results if isinstance(base_result, EnhancedExplorationResult) else [], lines)
758
+ rationale_points.extend(extra_points)
759
+
760
+ return augmented, rationale_points
761
+
762
+ def _format_context_source(self, item: ContextItem) -> str:
763
+ """Format the source of a context item for display."""
764
+ if item.type == "tickets":
765
+ return f"šŸŽ« {item.metadata.get('ticket_id', 'Ticket')}"
766
+ elif item.type == "requirements":
767
+ return "šŸ“„ Requirements"
768
+ elif item.type == "design_docs":
769
+ return "šŸ“ Design"
770
+ elif item.type == "decisions":
771
+ return "šŸ¤” Decision"
772
+ else:
773
+ return "šŸ“‹ Context"
774
+
775
+ def _extract_citations(self, context_results: List[Tuple[ContextItem, float]]) -> Dict[str, str]:
776
+ """
777
+ Extract citations from context results for reference.
778
+
779
+ Args:
780
+ context_results: Context items with scores
781
+
782
+ Returns:
783
+ Dictionary of source -> citation
784
+ """
785
+ citations = {}
786
+ for item, score in context_results:
787
+ source = self._format_context_source(item)
788
+
789
+ # Extract a meaningful citation
790
+ if item.type == "tickets" and "ticket_id" in item.metadata:
791
+ citations[item.metadata["ticket_id"]] = item.get_summary()
792
+ elif item.type == "requirements" and item.source:
793
+ citations[item.source] = item.get_summary()
794
+ else:
795
+ citations[f"{item.type}_{item.id[:8]}"] = item.get_summary()
796
+
797
+ return citations
798
+
799
+ def _make_rationale_entry(self, item: ContextItem, note: str) -> Dict[str, Any]:
800
+ """Create a normalized rationale entry leveraging context metadata."""
801
+ return {
802
+ "context_id": item.id,
803
+ "source": item.source or self._format_context_source(item),
804
+ "type": item.type,
805
+ "summary": item.get_summary(),
806
+ "note": note,
807
+ }
808
+
809
+ def add_context_interactive(self) -> bool:
810
+ """
811
+ Interactive method to add context during Q&A session.
812
+
813
+ Returns:
814
+ True if context was added, False if cancelled
815
+ """
816
+ print("\nšŸ“‹ Add Context")
817
+ print("1. Paste text directly")
818
+ print("2. Add file/document")
819
+ print("3. Add Jira ticket (manual entry)")
820
+ print("4. Cancel")
821
+
822
+ choice = input("\nChoice [1-4]: ").strip()
823
+
824
+ if choice == "1":
825
+ return self._add_text_context()
826
+ elif choice == "2":
827
+ return self._add_file_context()
828
+ elif choice == "3":
829
+ return self._add_ticket_context()
830
+ else:
831
+ return False
832
+
833
+ def _add_text_context(self) -> bool:
834
+ """Add context by pasting text."""
835
+ print("\nPaste your text (enter '###' on a new line when done):")
836
+ lines = []
837
+ while True:
838
+ line = input()
839
+ if line.strip() == "###":
840
+ break
841
+ lines.append(line)
842
+
843
+ if not lines:
844
+ print("No text provided.")
845
+ return False
846
+
847
+ content = "\n".join(lines)
848
+ title = input("Brief title for this context: ").strip()
849
+
850
+ context_type = self._select_context_type()
851
+
852
+ try:
853
+ context_id = self.context_manager.add_context(
854
+ context_type=context_type,
855
+ content=content,
856
+ metadata={"title": title} if title else {},
857
+ source="manual_entry"
858
+ )
859
+ print(f"āœ… Added context: {context_id}")
860
+ return True
861
+ except Exception as e:
862
+ print(f"āŒ Failed to add context: {e}")
863
+ return False
864
+
865
+ def _add_file_context(self) -> bool:
866
+ """Add context from a file."""
867
+ file_path = input("File path: ").strip()
868
+ context_type = self._select_context_type()
869
+
870
+ try:
871
+ context_id = self.context_manager.add_file(file_path, context_type)
872
+ print(f"āœ… Added file context: {context_id}")
873
+ return True
874
+ except Exception as e:
875
+ print(f"āŒ Failed to add file: {e}")
876
+ return False
877
+
878
+ def _add_ticket_context(self) -> bool:
879
+ """Add a ticket as context."""
880
+ ticket_id = input("Ticket ID (e.g., PROJ-123): ").strip()
881
+ summary = input("Summary: ").strip()
882
+ print("Description (enter '###' on a new line when done):")
883
+
884
+ lines = []
885
+ while True:
886
+ line = input()
887
+ if line.strip() == "###":
888
+ break
889
+ lines.append(line)
890
+
891
+ description = "\n".join(lines)
892
+
893
+ try:
894
+ context_id = self.context_manager.add_jira_ticket(
895
+ ticket_id=ticket_id,
896
+ summary=summary,
897
+ description=description
898
+ )
899
+ print(f"āœ… Added ticket: {context_id}")
900
+ return True
901
+ except Exception as e:
902
+ print(f"āŒ Failed to add ticket: {e}")
903
+ return False
904
+
905
+ def _select_context_type(self) -> str:
906
+ """Helper to select context type."""
907
+ print("\nContext type:")
908
+ print("1. Requirements")
909
+ print("2. Design document")
910
+ print("3. Decision record")
911
+ print("4. Custom/Other")
912
+
913
+ choice = input("Choice [1-4]: ").strip()
914
+
915
+ type_map = {
916
+ "1": "requirements",
917
+ "2": "design_docs",
918
+ "3": "decisions",
919
+ "4": "custom"
920
+ }
921
+
922
+ return type_map.get(choice, "custom")
923
+
924
+ def get_context_summary(self) -> Dict[str, Any]:
925
+ """Get a summary of loaded context."""
926
+ return self.context_manager.get_summary()
927
+
928
+ def get_query_metrics(self) -> QueryMetrics:
929
+ """Get query interpretation metrics for monitoring."""
930
+ return self.metrics
931
+
932
+ def print_metrics_report(self):
933
+ """Print query interpretation metrics report."""
934
+ self.metrics.print_report()
935
+
936
+ def save_session(self, file_path: str):
937
+ """
938
+ Save the current session including context.
939
+
940
+ Args:
941
+ file_path: Path to save session to
942
+ """
943
+ # Save context
944
+ context_file = file_path.replace(".json", "_context.json")
945
+ self.context_manager.save_to_file(context_file)
946
+
947
+ # Save conversation (could be extended)
948
+ logger.info(f"Session saved to {file_path} and {context_file}")
949
+
950
+ def load_session(self, file_path: str):
951
+ """
952
+ Load a previous session including context.
953
+
954
+ Args:
955
+ file_path: Path to load session from
956
+ """
957
+ # Load context
958
+ context_file = file_path.replace(".json", "_context.json")
959
+ try:
960
+ self.context_manager.load_from_file(context_file)
961
+ logger.info(f"Session loaded from {file_path} and {context_file}")
962
+ except FileNotFoundError:
963
+ logger.warning(f"No saved context found at {context_file}")
964
+
965
+
966
+ if __name__ == "__main__":
967
+ # Test the enhanced explorer
968
+ print("Testing InteractiveExplorerWithContext")
969
+ print("=" * 50)
970
+
971
+ # This would normally use real searcher
972
+ from .search import HybridSearcher
973
+
974
+ # Mock searcher for testing
975
+ class MockSearcher:
976
+ def search(self, query, limit=5):
977
+ return []
978
+
979
+ # Create explorer with context
980
+ context_manager = ExternalContextManager()
981
+ explorer = InteractiveExplorerWithContext(MockSearcher(), context_manager)
982
+
983
+ # Add some test context
984
+ context_manager.add_context(
985
+ context_type="requirements",
986
+ content="The system must support 10,000 concurrent users with Redis-based sessions",
987
+ metadata={"title": "Scaling Requirements"},
988
+ source="requirements.md"
989
+ )
990
+
991
+ context_manager.add_jira_ticket(
992
+ ticket_id="PROJ-123",
993
+ summary="Implement distributed session management",
994
+ description="Need to support multiple server instances with shared session state",
995
+ additional_fields={"priority": "High"}
996
+ )
997
+
998
+ # Test asking a question
999
+ result = explorer.ask("Why do we use Redis for sessions?")
1000
+ print(f"\nQuestion: {result.question}")
1001
+ print(f"Answer: {result.format_with_context()}")
1002
+ print(f"Confidence: {result.confidence}")
1003
+
1004
+ # Show context summary
1005
+ summary = explorer.get_context_summary()
1006
+ print(f"\nContext loaded: {summary['total_items']} items")
1007
+
1008
+ print("\nāœ… Enhanced explorer test complete!")