mcp-vector-search 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +48 -1
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +35 -0
  7. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  8. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  9. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  10. mcp_vector_search/analysis/collectors/smells.py +325 -0
  11. mcp_vector_search/analysis/debt.py +516 -0
  12. mcp_vector_search/analysis/interpretation.py +685 -0
  13. mcp_vector_search/analysis/metrics.py +74 -1
  14. mcp_vector_search/analysis/reporters/__init__.py +3 -1
  15. mcp_vector_search/analysis/reporters/console.py +424 -0
  16. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  17. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  18. mcp_vector_search/analysis/storage/__init__.py +93 -0
  19. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  20. mcp_vector_search/analysis/storage/schema.py +245 -0
  21. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  22. mcp_vector_search/analysis/trends.py +308 -0
  23. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  24. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  25. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  26. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  27. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  28. mcp_vector_search/cli/commands/analyze.py +665 -11
  29. mcp_vector_search/cli/commands/chat.py +193 -0
  30. mcp_vector_search/cli/commands/index.py +600 -2
  31. mcp_vector_search/cli/commands/index_background.py +467 -0
  32. mcp_vector_search/cli/commands/search.py +194 -1
  33. mcp_vector_search/cli/commands/setup.py +64 -13
  34. mcp_vector_search/cli/commands/status.py +302 -3
  35. mcp_vector_search/cli/commands/visualize/cli.py +26 -10
  36. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +8 -4
  37. mcp_vector_search/cli/commands/visualize/graph_builder.py +167 -234
  38. mcp_vector_search/cli/commands/visualize/server.py +304 -15
  39. mcp_vector_search/cli/commands/visualize/templates/base.py +60 -6
  40. mcp_vector_search/cli/commands/visualize/templates/scripts.py +2100 -65
  41. mcp_vector_search/cli/commands/visualize/templates/styles.py +1297 -88
  42. mcp_vector_search/cli/didyoumean.py +5 -0
  43. mcp_vector_search/cli/main.py +16 -5
  44. mcp_vector_search/cli/output.py +134 -5
  45. mcp_vector_search/config/thresholds.py +89 -1
  46. mcp_vector_search/core/__init__.py +16 -0
  47. mcp_vector_search/core/database.py +39 -2
  48. mcp_vector_search/core/embeddings.py +24 -0
  49. mcp_vector_search/core/git.py +380 -0
  50. mcp_vector_search/core/indexer.py +445 -84
  51. mcp_vector_search/core/llm_client.py +9 -4
  52. mcp_vector_search/core/models.py +88 -1
  53. mcp_vector_search/core/relationships.py +473 -0
  54. mcp_vector_search/core/search.py +1 -1
  55. mcp_vector_search/mcp/server.py +795 -4
  56. mcp_vector_search/parsers/python.py +285 -5
  57. mcp_vector_search/utils/gitignore.py +0 -3
  58. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +3 -2
  59. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/RECORD +62 -39
  60. mcp_vector_search/cli/commands/visualize.py.original +0 -2536
  61. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +0 -0
  62. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +0 -0
  63. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -15,7 +15,7 @@ from .exceptions import SearchError
15
15
  LLMProvider = Literal["openai", "openrouter"]
16
16
 
17
17
  # Type alias for intent
18
- IntentType = Literal["find", "answer"]
18
+ IntentType = Literal["find", "answer", "analyze"]
19
19
 
20
20
 
21
21
  class LLMClient:
@@ -480,7 +480,7 @@ Select the top {top_n} most relevant results:"""
480
480
  query: User's natural language query
481
481
 
482
482
  Returns:
483
- Intent type: "find" or "answer"
483
+ Intent type: "find", "answer", or "analyze"
484
484
 
485
485
  Raises:
486
486
  SearchError: If API call fails
@@ -493,7 +493,12 @@ Select the top {top_n} most relevant results:"""
493
493
  2. "answer" - User wants an explanation/answer about the codebase
494
494
  Examples: "what does this do", "how does X work", "explain the architecture", "why is X used"
495
495
 
496
- Return ONLY the word "find" or "answer" with no other text."""
496
+ 3. "analyze" - User wants analysis of code quality, metrics, complexity, or smells
497
+ Examples: "what's complex", "code smells", "cognitive complexity", "quality issues",
498
+ "dependencies", "coupling", "circular dependencies", "getting worse", "improving",
499
+ "analyze the complexity", "find the worst code", "most complex functions"
500
+
501
+ Return ONLY the word "find", "answer", or "analyze" with no other text."""
497
502
 
498
503
  user_prompt = f"""Query: {query}
499
504
 
@@ -512,7 +517,7 @@ Intent:"""
512
517
  )
513
518
  intent = content.strip().lower()
514
519
 
515
- if intent not in ("find", "answer"):
520
+ if intent not in ("find", "answer", "analyze"):
516
521
  # Default to find if unclear
517
522
  logger.warning(
518
523
  f"Unclear intent '{intent}' for query '{query}', defaulting to 'find'"
@@ -151,6 +151,33 @@ class SearchResult(BaseModel):
151
151
  default=False, description="True if file no longer exists (stale index)"
152
152
  )
153
153
 
154
+ # Quality metrics (from structural analysis)
155
+ cognitive_complexity: int | None = Field(
156
+ default=None, description="Cognitive complexity score"
157
+ )
158
+ cyclomatic_complexity: int | None = Field(
159
+ default=None, description="Cyclomatic complexity score"
160
+ )
161
+ max_nesting_depth: int | None = Field(
162
+ default=None, description="Maximum nesting depth"
163
+ )
164
+ parameter_count: int | None = Field(
165
+ default=None, description="Number of function parameters"
166
+ )
167
+ lines_of_code: int | None = Field(
168
+ default=None, description="Lines of code in chunk"
169
+ )
170
+ complexity_grade: str | None = Field(
171
+ default=None, description="Complexity grade (A-F)"
172
+ )
173
+ code_smells: list[str] = Field(default=[], description="Detected code smells")
174
+ smell_count: int | None = Field(
175
+ default=None, description="Number of code smells detected"
176
+ )
177
+ quality_score: int | None = Field(
178
+ default=None, description="Overall quality score (0-100)"
179
+ )
180
+
154
181
  class Config:
155
182
  arbitrary_types_allowed = True
156
183
 
@@ -164,9 +191,47 @@ class SearchResult(BaseModel):
164
191
  """Get a human-readable location string."""
165
192
  return f"{self.file_path}:{self.start_line}-{self.end_line}"
166
193
 
194
+ def calculate_quality_score(self) -> int:
195
+ """Calculate quality score based on complexity grade and code smells.
196
+
197
+ Formula:
198
+ - Base: complexity_grade (A=100, B=80, C=60, D=40, F=20)
199
+ - Penalty: -10 per code smell
200
+ - Bonus: +20 if no smells (already factored into base if no smells)
201
+
202
+ Returns:
203
+ Quality score (0-100), or None if no quality metrics available
204
+ """
205
+ # If no quality metrics, return None (will be stored in quality_score field)
206
+ if self.complexity_grade is None:
207
+ return None
208
+
209
+ # Map complexity grade to base score
210
+ grade_scores = {
211
+ "A": 100,
212
+ "B": 80,
213
+ "C": 60,
214
+ "D": 40,
215
+ "F": 20,
216
+ }
217
+
218
+ base_score = grade_scores.get(self.complexity_grade, 0)
219
+
220
+ # Apply smell penalty
221
+ smell_count = self.smell_count or 0
222
+ penalty = smell_count * 10
223
+
224
+ # Calculate final score (with bonus for no smells already in base)
225
+ # Bonus: +20 if no smells (effectively makes A without smells = 100+20 = 120, capped at 100)
226
+ bonus = 20 if smell_count == 0 else 0
227
+ quality_score = base_score - penalty + bonus
228
+
229
+ # Clamp to 0-100 range
230
+ return max(0, min(100, quality_score))
231
+
167
232
  def to_dict(self) -> dict[str, Any]:
168
233
  """Convert to dictionary for serialization."""
169
- return {
234
+ result = {
170
235
  "content": self.content,
171
236
  "file_path": str(self.file_path),
172
237
  "start_line": self.start_line,
@@ -184,6 +249,28 @@ class SearchResult(BaseModel):
184
249
  "line_count": self.line_count,
185
250
  }
186
251
 
252
+ # Add quality metrics if available
253
+ if self.cognitive_complexity is not None:
254
+ result["cognitive_complexity"] = self.cognitive_complexity
255
+ if self.cyclomatic_complexity is not None:
256
+ result["cyclomatic_complexity"] = self.cyclomatic_complexity
257
+ if self.max_nesting_depth is not None:
258
+ result["max_nesting_depth"] = self.max_nesting_depth
259
+ if self.parameter_count is not None:
260
+ result["parameter_count"] = self.parameter_count
261
+ if self.lines_of_code is not None:
262
+ result["lines_of_code"] = self.lines_of_code
263
+ if self.complexity_grade is not None:
264
+ result["complexity_grade"] = self.complexity_grade
265
+ if self.code_smells:
266
+ result["code_smells"] = self.code_smells
267
+ if self.smell_count is not None:
268
+ result["smell_count"] = self.smell_count
269
+ if self.quality_score is not None:
270
+ result["quality_score"] = self.quality_score
271
+
272
+ return result
273
+
187
274
 
188
275
  class IndexStats(BaseModel):
189
276
  """Statistics about the search index."""
@@ -0,0 +1,473 @@
1
+ """Pre-computed relationship storage for instant visualization.
2
+
3
+ This module handles computing and storing code chunk relationships at index time,
4
+ eliminating the expensive computation during visualization startup.
5
+
6
+ Relationships stored:
7
+ - Semantic relationships: Which chunks are similar (based on embeddings)
8
+ - Caller relationships: Which chunks call which (based on AST analysis)
9
+ """
10
+
11
+ import ast
12
+ import asyncio
13
+ import json
14
+ import time
15
+ from datetime import UTC, datetime
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ from loguru import logger
20
+ from rich.console import Console
21
+ from rich.progress import (
22
+ BarColumn,
23
+ Progress,
24
+ SpinnerColumn,
25
+ TaskProgressColumn,
26
+ TextColumn,
27
+ )
28
+
29
+ from .models import CodeChunk
30
+
31
+ console = Console()
32
+
33
+
34
+ def extract_function_calls(code: str) -> set[str]:
35
+ """Extract actual function calls from Python code using AST.
36
+
37
+ Returns set of function names that are actually called (not just mentioned).
38
+ Avoids false positives from comments, docstrings, and string literals.
39
+
40
+ Args:
41
+ code: Python source code to analyze
42
+
43
+ Returns:
44
+ Set of function names that are actually called in the code
45
+ """
46
+ calls = set()
47
+ try:
48
+ tree = ast.parse(code)
49
+ for node in ast.walk(tree):
50
+ if isinstance(node, ast.Call):
51
+ # Handle direct calls: foo()
52
+ if isinstance(node.func, ast.Name):
53
+ calls.add(node.func.id)
54
+ # Handle method calls: obj.foo() - extract 'foo'
55
+ elif isinstance(node.func, ast.Attribute):
56
+ calls.add(node.func.attr)
57
+ return calls
58
+ except SyntaxError:
59
+ # If code can't be parsed (incomplete, etc.), fall back to empty set
60
+ # This is safer than false positives from naive substring matching
61
+ return set()
62
+
63
+
64
+ def extract_chunk_name(content: str, fallback: str = "chunk") -> str:
65
+ """Extract first meaningful word from chunk content for labeling.
66
+
67
+ Args:
68
+ content: The chunk's code content
69
+ fallback: Fallback name if no meaningful word found
70
+
71
+ Returns:
72
+ First meaningful identifier found in the content
73
+ """
74
+ import re
75
+
76
+ # Skip common keywords that aren't meaningful as chunk labels
77
+ skip_words = {
78
+ "def",
79
+ "class",
80
+ "function",
81
+ "const",
82
+ "let",
83
+ "var",
84
+ "import",
85
+ "from",
86
+ "return",
87
+ "if",
88
+ "else",
89
+ "elif",
90
+ "for",
91
+ "while",
92
+ "try",
93
+ "except",
94
+ "finally",
95
+ "with",
96
+ "as",
97
+ "async",
98
+ "await",
99
+ "yield",
100
+ "self",
101
+ "this",
102
+ "true",
103
+ "false",
104
+ "none",
105
+ "null",
106
+ "undefined",
107
+ "public",
108
+ "private",
109
+ "protected",
110
+ "static",
111
+ "export",
112
+ "default",
113
+ }
114
+
115
+ # Find all words (alphanumeric + underscore, at least 2 chars)
116
+ words = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]+\b", content)
117
+
118
+ for word in words:
119
+ if word.lower() not in skip_words:
120
+ return word
121
+
122
+ return fallback
123
+
124
+
125
+ class RelationshipStore:
126
+ """Store and load pre-computed chunk relationships.
127
+
128
+ Relationships are stored in .mcp-vector-search/relationships.json and include:
129
+ - Semantic links (similar chunks based on embeddings)
130
+ - Caller links (which chunks call which)
131
+ - Metadata (chunk count, computation time, version)
132
+ """
133
+
134
+ def __init__(self, project_root: Path):
135
+ """Initialize relationship store.
136
+
137
+ Args:
138
+ project_root: Root directory of the project
139
+ """
140
+ self.project_root = project_root
141
+ self.store_path = project_root / ".mcp-vector-search" / "relationships.json"
142
+
143
+ async def compute_and_store(
144
+ self,
145
+ chunks: list[CodeChunk],
146
+ database: Any,
147
+ max_concurrent_queries: int = 50,
148
+ background: bool = False,
149
+ ) -> dict[str, Any]:
150
+ """Compute relationships and save to disk.
151
+
152
+ This is called during indexing to pre-compute expensive relationships.
153
+ NOTE: Caller relationships are now lazy-loaded via /api/callers/{chunk_id}
154
+ to avoid the expensive O(n²) computation at startup.
155
+
156
+ Args:
157
+ chunks: List of all code chunks
158
+ database: Vector database instance for semantic search
159
+ max_concurrent_queries: Maximum number of concurrent database queries (default: 50)
160
+ background: If True, skip computation and return immediately (for background processing)
161
+
162
+ Returns:
163
+ Dictionary with relationship statistics
164
+ """
165
+ logger.info("Computing relationships for visualization...")
166
+ start_time = time.time()
167
+
168
+ # Filter to code chunks only
169
+ code_chunks = [
170
+ c for c in chunks if c.chunk_type in ["function", "method", "class"]
171
+ ]
172
+
173
+ # If background mode, create empty relationships file and return
174
+ # Actual computation will happen in background task
175
+ if background:
176
+ relationships = {
177
+ "version": "1.1",
178
+ "computed_at": datetime.now(UTC).isoformat(),
179
+ "chunk_count": len(chunks),
180
+ "code_chunk_count": len(code_chunks),
181
+ "computation_time_seconds": 0,
182
+ "semantic": [],
183
+ "callers": {},
184
+ "status": "pending", # Mark as pending background computation
185
+ }
186
+
187
+ # Save empty file
188
+ self.store_path.parent.mkdir(parents=True, exist_ok=True)
189
+ with open(self.store_path, "w") as f:
190
+ json.dump(relationships, f, indent=2)
191
+
192
+ logger.info("✓ Relationships marked for background computation")
193
+ return {
194
+ "semantic_links": 0,
195
+ "caller_relationships": 0,
196
+ "computation_time": 0,
197
+ "background": True,
198
+ }
199
+
200
+ # Compute semantic relationships only
201
+ # Caller relationships are lazy-loaded on-demand via API
202
+ logger.info(
203
+ f"Computing semantic relationships for {len(code_chunks)} chunks "
204
+ f"(max {max_concurrent_queries} concurrent queries)..."
205
+ )
206
+ semantic_links = await self._compute_semantic_relationships(
207
+ code_chunks, database, max_concurrent_queries
208
+ )
209
+
210
+ elapsed = time.time() - start_time
211
+
212
+ # Build relationship data (no caller_map - it's lazy loaded)
213
+ relationships = {
214
+ "version": "1.1", # Version bump for lazy callers
215
+ "computed_at": datetime.now(UTC).isoformat(),
216
+ "chunk_count": len(chunks),
217
+ "code_chunk_count": len(code_chunks),
218
+ "computation_time_seconds": elapsed,
219
+ "semantic": semantic_links,
220
+ "callers": {}, # Empty - loaded on-demand via /api/callers/{chunk_id}
221
+ "status": "complete",
222
+ }
223
+
224
+ # Save to disk
225
+ self.store_path.parent.mkdir(parents=True, exist_ok=True)
226
+ with open(self.store_path, "w") as f:
227
+ json.dump(relationships, f, indent=2)
228
+
229
+ logger.info(
230
+ f"✓ Computed {len(semantic_links)} semantic links in {elapsed:.1f}s "
231
+ "(callers lazy-loaded on-demand)"
232
+ )
233
+
234
+ return {
235
+ "semantic_links": len(semantic_links),
236
+ "caller_relationships": 0, # Now lazy-loaded
237
+ "computation_time": elapsed,
238
+ }
239
+
240
+ async def _compute_semantic_relationships(
241
+ self,
242
+ code_chunks: list[CodeChunk],
243
+ database: Any,
244
+ max_concurrent_queries: int = 50,
245
+ ) -> list[dict[str, Any]]:
246
+ """Compute semantic similarity relationships between chunks using async parallel processing.
247
+
248
+ Args:
249
+ code_chunks: List of code chunks (functions, methods, classes)
250
+ database: Vector database for similarity search
251
+ max_concurrent_queries: Maximum number of concurrent database queries (default: 50)
252
+
253
+ Returns:
254
+ List of semantic link dictionaries
255
+ """
256
+ semantic_links = []
257
+ semaphore = asyncio.Semaphore(max_concurrent_queries)
258
+ completed_count = 0
259
+ total_chunks = len(code_chunks)
260
+
261
+ # Use Rich progress bar
262
+ with Progress(
263
+ SpinnerColumn(),
264
+ TextColumn("[cyan]Computing semantic relationships...[/cyan]"),
265
+ BarColumn(bar_width=40),
266
+ TaskProgressColumn(),
267
+ TextColumn("[dim]{task.completed}/{task.total} chunks[/dim]"),
268
+ console=console,
269
+ transient=False,
270
+ ) as progress:
271
+ task = progress.add_task("semantic", total=total_chunks)
272
+
273
+ async def process_chunk(chunk: CodeChunk) -> list[dict[str, Any]]:
274
+ """Process a single chunk and return its semantic links."""
275
+ nonlocal completed_count
276
+
277
+ async with semaphore:
278
+ try:
279
+ # Search for similar chunks
280
+ similar_results = await database.search(
281
+ query=chunk.content[:500], # First 500 chars
282
+ limit=6, # Get 6 (exclude self = 5)
283
+ similarity_threshold=0.3,
284
+ )
285
+
286
+ chunk_links = []
287
+ source_chunk_id = chunk.chunk_id or chunk.id
288
+
289
+ # Filter out self and create links
290
+ for result in similar_results:
291
+ target_chunk = next(
292
+ (
293
+ c
294
+ for c in code_chunks
295
+ if str(c.file_path) == str(result.file_path)
296
+ and c.start_line == result.start_line
297
+ and c.end_line == result.end_line
298
+ ),
299
+ None,
300
+ )
301
+
302
+ if not target_chunk:
303
+ continue
304
+
305
+ target_chunk_id = target_chunk.chunk_id or target_chunk.id
306
+
307
+ # Skip self-references
308
+ if target_chunk_id == source_chunk_id:
309
+ continue
310
+
311
+ # Add semantic link
312
+ if result.similarity_score >= 0.2:
313
+ chunk_links.append(
314
+ {
315
+ "source": source_chunk_id,
316
+ "target": target_chunk_id,
317
+ "type": "semantic",
318
+ "similarity": result.similarity_score,
319
+ }
320
+ )
321
+
322
+ # Only keep top 5 per chunk
323
+ if len(chunk_links) >= 5:
324
+ break
325
+
326
+ # Update progress
327
+ completed_count += 1
328
+ progress.update(task, completed=completed_count)
329
+
330
+ return chunk_links
331
+
332
+ except Exception as e:
333
+ logger.debug(
334
+ f"Failed to compute semantic for {chunk.chunk_id}: {e}"
335
+ )
336
+ completed_count += 1
337
+ progress.update(task, completed=completed_count)
338
+ return []
339
+
340
+ # Process all chunks in parallel
341
+ tasks = [process_chunk(chunk) for chunk in code_chunks]
342
+ results = await asyncio.gather(*tasks, return_exceptions=True)
343
+
344
+ # Flatten results and handle exceptions
345
+ for result in results:
346
+ if isinstance(result, Exception):
347
+ logger.debug(f"Task failed with exception: {result}")
348
+ continue
349
+ semantic_links.extend(result)
350
+
351
+ return semantic_links
352
+
353
+ def _compute_caller_relationships(
354
+ self, chunks: list[CodeChunk]
355
+ ) -> dict[str, list[dict[str, Any]]]:
356
+ """Compute which chunks call which other chunks.
357
+
358
+ Args:
359
+ chunks: List of all code chunks
360
+
361
+ Returns:
362
+ Map of chunk_id -> list of caller info
363
+ """
364
+ caller_map = {}
365
+
366
+ code_chunks = [
367
+ c for c in chunks if c.chunk_type in ["function", "method", "class"]
368
+ ]
369
+
370
+ logger.debug(f"Processing {len(code_chunks)} code chunks for callers...")
371
+
372
+ for chunk_idx, chunk in enumerate(code_chunks):
373
+ if chunk_idx % 50 == 0: # Progress
374
+ logger.debug(f"Callers: {chunk_idx}/{len(code_chunks)} chunks")
375
+
376
+ chunk_id = chunk.chunk_id or chunk.id
377
+ file_path = str(chunk.file_path)
378
+ function_name = chunk.function_name or chunk.class_name
379
+
380
+ if not function_name:
381
+ continue
382
+
383
+ # Search other chunks that reference this function
384
+ for other_chunk in chunks:
385
+ other_file_path = str(other_chunk.file_path)
386
+
387
+ # Only track EXTERNAL callers (different file)
388
+ if other_file_path == file_path:
389
+ continue
390
+
391
+ # Extract actual function calls using AST
392
+ actual_calls = extract_function_calls(other_chunk.content)
393
+
394
+ # Check if this function is actually called
395
+ if function_name in actual_calls:
396
+ other_chunk_id = other_chunk.chunk_id or other_chunk.id
397
+
398
+ # Generate meaningful caller name
399
+ other_name = other_chunk.function_name or other_chunk.class_name
400
+ if not other_name:
401
+ other_name = extract_chunk_name(
402
+ other_chunk.content,
403
+ fallback=f"chunk_{other_chunk.start_line}",
404
+ )
405
+
406
+ # Skip __init__ functions as callers (noise)
407
+ if other_name == "__init__":
408
+ continue
409
+
410
+ if chunk_id not in caller_map:
411
+ caller_map[chunk_id] = []
412
+
413
+ # Store caller information
414
+ caller_map[chunk_id].append(
415
+ {
416
+ "file": other_file_path,
417
+ "chunk_id": other_chunk_id,
418
+ "name": other_name,
419
+ "type": other_chunk.chunk_type,
420
+ }
421
+ )
422
+
423
+ logger.debug(
424
+ f"Found call: {other_name} ({other_file_path}) -> "
425
+ f"{function_name} ({file_path})"
426
+ )
427
+
428
+ return caller_map
429
+
430
+ def load(self) -> dict[str, Any]:
431
+ """Load pre-computed relationships from disk.
432
+
433
+ Returns:
434
+ Dictionary with semantic and caller relationships, or empty structure if not found
435
+ """
436
+ if not self.store_path.exists():
437
+ logger.warning(
438
+ f"No pre-computed relationships found at {self.store_path}. "
439
+ "Run 'mcp-vector-search index' to compute relationships."
440
+ )
441
+ return {"semantic": [], "callers": {}}
442
+
443
+ try:
444
+ with open(self.store_path) as f:
445
+ data = json.load(f)
446
+
447
+ logger.info(
448
+ f"✓ Loaded {len(data.get('semantic', []))} semantic links and "
449
+ f"{sum(len(callers) for callers in data.get('callers', {}).values())} "
450
+ f"caller relationships (computed {data.get('computed_at', 'unknown')})"
451
+ )
452
+
453
+ return data
454
+ except Exception as e:
455
+ logger.error(f"Failed to load relationships: {e}")
456
+ return {"semantic": [], "callers": {}}
457
+
458
+ def exists(self) -> bool:
459
+ """Check if pre-computed relationships exist.
460
+
461
+ Returns:
462
+ True if relationships file exists
463
+ """
464
+ return self.store_path.exists()
465
+
466
+ def invalidate(self) -> None:
467
+ """Delete stored relationships (called when index changes).
468
+
469
+ This forces re-computation on next full index.
470
+ """
471
+ if self.store_path.exists():
472
+ self.store_path.unlink()
473
+ logger.debug("Invalidated pre-computed relationships")
@@ -217,7 +217,7 @@ class SemanticSearchEngine:
217
217
 
218
218
  except BaseException as e:
219
219
  # Re-raise system exceptions we should never catch
220
- if isinstance(e, (KeyboardInterrupt, SystemExit, GeneratorExit)):
220
+ if isinstance(e, KeyboardInterrupt | SystemExit | GeneratorExit):
221
221
  raise
222
222
 
223
223
  last_error = e