mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +111 -0
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +74 -0
  7. mcp_vector_search/analysis/collectors/base.py +164 -0
  8. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  9. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  10. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  11. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  12. mcp_vector_search/analysis/collectors/smells.py +325 -0
  13. mcp_vector_search/analysis/debt.py +516 -0
  14. mcp_vector_search/analysis/interpretation.py +685 -0
  15. mcp_vector_search/analysis/metrics.py +414 -0
  16. mcp_vector_search/analysis/reporters/__init__.py +7 -0
  17. mcp_vector_search/analysis/reporters/console.py +646 -0
  18. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  19. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  20. mcp_vector_search/analysis/storage/__init__.py +93 -0
  21. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  22. mcp_vector_search/analysis/storage/schema.py +245 -0
  23. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  24. mcp_vector_search/analysis/trends.py +308 -0
  25. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  26. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  27. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  28. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  29. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  30. mcp_vector_search/cli/commands/analyze.py +1062 -0
  31. mcp_vector_search/cli/commands/chat.py +1455 -0
  32. mcp_vector_search/cli/commands/index.py +621 -5
  33. mcp_vector_search/cli/commands/index_background.py +467 -0
  34. mcp_vector_search/cli/commands/init.py +13 -0
  35. mcp_vector_search/cli/commands/install.py +597 -335
  36. mcp_vector_search/cli/commands/install_old.py +8 -4
  37. mcp_vector_search/cli/commands/mcp.py +78 -6
  38. mcp_vector_search/cli/commands/reset.py +68 -26
  39. mcp_vector_search/cli/commands/search.py +224 -8
  40. mcp_vector_search/cli/commands/setup.py +1184 -0
  41. mcp_vector_search/cli/commands/status.py +339 -5
  42. mcp_vector_search/cli/commands/uninstall.py +276 -357
  43. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  44. mcp_vector_search/cli/commands/visualize/cli.py +292 -0
  45. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  46. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  47. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
  48. mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
  49. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  50. mcp_vector_search/cli/commands/visualize/server.py +600 -0
  51. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  52. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  53. mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
  54. mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
  55. mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
  56. mcp_vector_search/cli/didyoumean.py +27 -2
  57. mcp_vector_search/cli/main.py +127 -160
  58. mcp_vector_search/cli/output.py +158 -13
  59. mcp_vector_search/config/__init__.py +4 -0
  60. mcp_vector_search/config/default_thresholds.yaml +52 -0
  61. mcp_vector_search/config/settings.py +12 -0
  62. mcp_vector_search/config/thresholds.py +273 -0
  63. mcp_vector_search/core/__init__.py +16 -0
  64. mcp_vector_search/core/auto_indexer.py +3 -3
  65. mcp_vector_search/core/boilerplate.py +186 -0
  66. mcp_vector_search/core/config_utils.py +394 -0
  67. mcp_vector_search/core/database.py +406 -94
  68. mcp_vector_search/core/embeddings.py +24 -0
  69. mcp_vector_search/core/exceptions.py +11 -0
  70. mcp_vector_search/core/git.py +380 -0
  71. mcp_vector_search/core/git_hooks.py +4 -4
  72. mcp_vector_search/core/indexer.py +632 -54
  73. mcp_vector_search/core/llm_client.py +756 -0
  74. mcp_vector_search/core/models.py +91 -1
  75. mcp_vector_search/core/project.py +17 -0
  76. mcp_vector_search/core/relationships.py +473 -0
  77. mcp_vector_search/core/scheduler.py +11 -11
  78. mcp_vector_search/core/search.py +179 -29
  79. mcp_vector_search/mcp/server.py +819 -9
  80. mcp_vector_search/parsers/python.py +285 -5
  81. mcp_vector_search/utils/__init__.py +2 -0
  82. mcp_vector_search/utils/gitignore.py +0 -3
  83. mcp_vector_search/utils/gitignore_updater.py +212 -0
  84. mcp_vector_search/utils/monorepo.py +66 -4
  85. mcp_vector_search/utils/timing.py +10 -6
  86. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
  87. mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
  88. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
  89. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
  90. mcp_vector_search/cli/commands/visualize.py +0 -1467
  91. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  92. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -147,6 +147,36 @@ class SearchResult(BaseModel):
147
147
  context_before: list[str] = Field(default=[], description="Lines before the match")
148
148
  context_after: list[str] = Field(default=[], description="Lines after the match")
149
149
  highlights: list[str] = Field(default=[], description="Highlighted terms")
150
+ file_missing: bool = Field(
151
+ default=False, description="True if file no longer exists (stale index)"
152
+ )
153
+
154
+ # Quality metrics (from structural analysis)
155
+ cognitive_complexity: int | None = Field(
156
+ default=None, description="Cognitive complexity score"
157
+ )
158
+ cyclomatic_complexity: int | None = Field(
159
+ default=None, description="Cyclomatic complexity score"
160
+ )
161
+ max_nesting_depth: int | None = Field(
162
+ default=None, description="Maximum nesting depth"
163
+ )
164
+ parameter_count: int | None = Field(
165
+ default=None, description="Number of function parameters"
166
+ )
167
+ lines_of_code: int | None = Field(
168
+ default=None, description="Lines of code in chunk"
169
+ )
170
+ complexity_grade: str | None = Field(
171
+ default=None, description="Complexity grade (A-F)"
172
+ )
173
+ code_smells: list[str] = Field(default=[], description="Detected code smells")
174
+ smell_count: int | None = Field(
175
+ default=None, description="Number of code smells detected"
176
+ )
177
+ quality_score: int | None = Field(
178
+ default=None, description="Overall quality score (0-100)"
179
+ )
150
180
 
151
181
  class Config:
152
182
  arbitrary_types_allowed = True
@@ -161,9 +191,47 @@ class SearchResult(BaseModel):
161
191
  """Get a human-readable location string."""
162
192
  return f"{self.file_path}:{self.start_line}-{self.end_line}"
163
193
 
194
+ def calculate_quality_score(self) -> int:
195
+ """Calculate quality score based on complexity grade and code smells.
196
+
197
+ Formula:
198
+ - Base: complexity_grade (A=100, B=80, C=60, D=40, F=20)
199
+ - Penalty: -10 per code smell
200
+ - Bonus: +20 if no smells (already factored into base if no smells)
201
+
202
+ Returns:
203
+ Quality score (0-100), or None if no quality metrics available
204
+ """
205
+ # If no quality metrics, return None (will be stored in quality_score field)
206
+ if self.complexity_grade is None:
207
+ return None
208
+
209
+ # Map complexity grade to base score
210
+ grade_scores = {
211
+ "A": 100,
212
+ "B": 80,
213
+ "C": 60,
214
+ "D": 40,
215
+ "F": 20,
216
+ }
217
+
218
+ base_score = grade_scores.get(self.complexity_grade, 0)
219
+
220
+ # Apply smell penalty
221
+ smell_count = self.smell_count or 0
222
+ penalty = smell_count * 10
223
+
224
+ # Calculate final score (with bonus for no smells already in base)
225
+ # Bonus: +20 if no smells (effectively makes A without smells = 100+20 = 120, capped at 100)
226
+ bonus = 20 if smell_count == 0 else 0
227
+ quality_score = base_score - penalty + bonus
228
+
229
+ # Clamp to 0-100 range
230
+ return max(0, min(100, quality_score))
231
+
164
232
  def to_dict(self) -> dict[str, Any]:
165
233
  """Convert to dictionary for serialization."""
166
- return {
234
+ result = {
167
235
  "content": self.content,
168
236
  "file_path": str(self.file_path),
169
237
  "start_line": self.start_line,
@@ -181,6 +249,28 @@ class SearchResult(BaseModel):
181
249
  "line_count": self.line_count,
182
250
  }
183
251
 
252
+ # Add quality metrics if available
253
+ if self.cognitive_complexity is not None:
254
+ result["cognitive_complexity"] = self.cognitive_complexity
255
+ if self.cyclomatic_complexity is not None:
256
+ result["cyclomatic_complexity"] = self.cyclomatic_complexity
257
+ if self.max_nesting_depth is not None:
258
+ result["max_nesting_depth"] = self.max_nesting_depth
259
+ if self.parameter_count is not None:
260
+ result["parameter_count"] = self.parameter_count
261
+ if self.lines_of_code is not None:
262
+ result["lines_of_code"] = self.lines_of_code
263
+ if self.complexity_grade is not None:
264
+ result["complexity_grade"] = self.complexity_grade
265
+ if self.code_smells:
266
+ result["code_smells"] = self.code_smells
267
+ if self.smell_count is not None:
268
+ result["smell_count"] = self.smell_count
269
+ if self.quality_score is not None:
270
+ result["quality_score"] = self.quality_score
271
+
272
+ return result
273
+
184
274
 
185
275
  class IndexStats(BaseModel):
186
276
  """Statistics about the search index."""
@@ -107,6 +107,23 @@ class ProjectManager:
107
107
  index_path = get_default_index_path(self.project_root)
108
108
  index_path.mkdir(parents=True, exist_ok=True)
109
109
 
110
+ # Ensure .mcp-vector-search/ is in .gitignore
111
+ # This is a non-critical operation - failures are logged but don't block initialization
112
+ try:
113
+ from ..utils.gitignore_updater import ensure_gitignore_entry
114
+
115
+ ensure_gitignore_entry(
116
+ self.project_root,
117
+ pattern=".mcp-vector-search/",
118
+ comment="MCP Vector Search index directory",
119
+ )
120
+ except Exception as e:
121
+ # Log warning but continue initialization
122
+ logger.warning(f"Could not update .gitignore: {e}")
123
+ logger.info(
124
+ "Please manually add '.mcp-vector-search/' to your .gitignore file"
125
+ )
126
+
110
127
  # Detect languages and files
111
128
  detected_languages = self.detect_languages()
112
129
  file_count = self.count_indexable_files(
@@ -0,0 +1,473 @@
1
+ """Pre-computed relationship storage for instant visualization.
2
+
3
+ This module handles computing and storing code chunk relationships at index time,
4
+ eliminating the expensive computation during visualization startup.
5
+
6
+ Relationships stored:
7
+ - Semantic relationships: Which chunks are similar (based on embeddings)
8
+ - Caller relationships: Which chunks call which (based on AST analysis)
9
+ """
10
+
11
+ import ast
12
+ import asyncio
13
+ import json
14
+ import time
15
+ from datetime import UTC, datetime
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ from loguru import logger
20
+ from rich.console import Console
21
+ from rich.progress import (
22
+ BarColumn,
23
+ Progress,
24
+ SpinnerColumn,
25
+ TaskProgressColumn,
26
+ TextColumn,
27
+ )
28
+
29
+ from .models import CodeChunk
30
+
31
+ console = Console()
32
+
33
+
34
+ def extract_function_calls(code: str) -> set[str]:
35
+ """Extract actual function calls from Python code using AST.
36
+
37
+ Returns set of function names that are actually called (not just mentioned).
38
+ Avoids false positives from comments, docstrings, and string literals.
39
+
40
+ Args:
41
+ code: Python source code to analyze
42
+
43
+ Returns:
44
+ Set of function names that are actually called in the code
45
+ """
46
+ calls = set()
47
+ try:
48
+ tree = ast.parse(code)
49
+ for node in ast.walk(tree):
50
+ if isinstance(node, ast.Call):
51
+ # Handle direct calls: foo()
52
+ if isinstance(node.func, ast.Name):
53
+ calls.add(node.func.id)
54
+ # Handle method calls: obj.foo() - extract 'foo'
55
+ elif isinstance(node.func, ast.Attribute):
56
+ calls.add(node.func.attr)
57
+ return calls
58
+ except SyntaxError:
59
+ # If code can't be parsed (incomplete, etc.), fall back to empty set
60
+ # This is safer than false positives from naive substring matching
61
+ return set()
62
+
63
+
64
+ def extract_chunk_name(content: str, fallback: str = "chunk") -> str:
65
+ """Extract first meaningful word from chunk content for labeling.
66
+
67
+ Args:
68
+ content: The chunk's code content
69
+ fallback: Fallback name if no meaningful word found
70
+
71
+ Returns:
72
+ First meaningful identifier found in the content
73
+ """
74
+ import re
75
+
76
+ # Skip common keywords that aren't meaningful as chunk labels
77
+ skip_words = {
78
+ "def",
79
+ "class",
80
+ "function",
81
+ "const",
82
+ "let",
83
+ "var",
84
+ "import",
85
+ "from",
86
+ "return",
87
+ "if",
88
+ "else",
89
+ "elif",
90
+ "for",
91
+ "while",
92
+ "try",
93
+ "except",
94
+ "finally",
95
+ "with",
96
+ "as",
97
+ "async",
98
+ "await",
99
+ "yield",
100
+ "self",
101
+ "this",
102
+ "true",
103
+ "false",
104
+ "none",
105
+ "null",
106
+ "undefined",
107
+ "public",
108
+ "private",
109
+ "protected",
110
+ "static",
111
+ "export",
112
+ "default",
113
+ }
114
+
115
+ # Find all words (alphanumeric + underscore, at least 2 chars)
116
+ words = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]+\b", content)
117
+
118
+ for word in words:
119
+ if word.lower() not in skip_words:
120
+ return word
121
+
122
+ return fallback
123
+
124
+
125
+ class RelationshipStore:
126
+ """Store and load pre-computed chunk relationships.
127
+
128
+ Relationships are stored in .mcp-vector-search/relationships.json and include:
129
+ - Semantic links (similar chunks based on embeddings)
130
+ - Caller links (which chunks call which)
131
+ - Metadata (chunk count, computation time, version)
132
+ """
133
+
134
+ def __init__(self, project_root: Path):
135
+ """Initialize relationship store.
136
+
137
+ Args:
138
+ project_root: Root directory of the project
139
+ """
140
+ self.project_root = project_root
141
+ self.store_path = project_root / ".mcp-vector-search" / "relationships.json"
142
+
143
+ async def compute_and_store(
144
+ self,
145
+ chunks: list[CodeChunk],
146
+ database: Any,
147
+ max_concurrent_queries: int = 50,
148
+ background: bool = False,
149
+ ) -> dict[str, Any]:
150
+ """Compute relationships and save to disk.
151
+
152
+ This is called during indexing to pre-compute expensive relationships.
153
+ NOTE: Caller relationships are now lazy-loaded via /api/callers/{chunk_id}
154
+ to avoid the expensive O(n²) computation at startup.
155
+
156
+ Args:
157
+ chunks: List of all code chunks
158
+ database: Vector database instance for semantic search
159
+ max_concurrent_queries: Maximum number of concurrent database queries (default: 50)
160
+ background: If True, skip computation and return immediately (for background processing)
161
+
162
+ Returns:
163
+ Dictionary with relationship statistics
164
+ """
165
+ logger.info("Computing relationships for visualization...")
166
+ start_time = time.time()
167
+
168
+ # Filter to code chunks only
169
+ code_chunks = [
170
+ c for c in chunks if c.chunk_type in ["function", "method", "class"]
171
+ ]
172
+
173
+ # If background mode, create empty relationships file and return
174
+ # Actual computation will happen in background task
175
+ if background:
176
+ relationships = {
177
+ "version": "1.1",
178
+ "computed_at": datetime.now(UTC).isoformat(),
179
+ "chunk_count": len(chunks),
180
+ "code_chunk_count": len(code_chunks),
181
+ "computation_time_seconds": 0,
182
+ "semantic": [],
183
+ "callers": {},
184
+ "status": "pending", # Mark as pending background computation
185
+ }
186
+
187
+ # Save empty file
188
+ self.store_path.parent.mkdir(parents=True, exist_ok=True)
189
+ with open(self.store_path, "w") as f:
190
+ json.dump(relationships, f, indent=2)
191
+
192
+ logger.info("✓ Relationships marked for background computation")
193
+ return {
194
+ "semantic_links": 0,
195
+ "caller_relationships": 0,
196
+ "computation_time": 0,
197
+ "background": True,
198
+ }
199
+
200
+ # Compute semantic relationships only
201
+ # Caller relationships are lazy-loaded on-demand via API
202
+ logger.info(
203
+ f"Computing semantic relationships for {len(code_chunks)} chunks "
204
+ f"(max {max_concurrent_queries} concurrent queries)..."
205
+ )
206
+ semantic_links = await self._compute_semantic_relationships(
207
+ code_chunks, database, max_concurrent_queries
208
+ )
209
+
210
+ elapsed = time.time() - start_time
211
+
212
+ # Build relationship data (no caller_map - it's lazy loaded)
213
+ relationships = {
214
+ "version": "1.1", # Version bump for lazy callers
215
+ "computed_at": datetime.now(UTC).isoformat(),
216
+ "chunk_count": len(chunks),
217
+ "code_chunk_count": len(code_chunks),
218
+ "computation_time_seconds": elapsed,
219
+ "semantic": semantic_links,
220
+ "callers": {}, # Empty - loaded on-demand via /api/callers/{chunk_id}
221
+ "status": "complete",
222
+ }
223
+
224
+ # Save to disk
225
+ self.store_path.parent.mkdir(parents=True, exist_ok=True)
226
+ with open(self.store_path, "w") as f:
227
+ json.dump(relationships, f, indent=2)
228
+
229
+ logger.info(
230
+ f"✓ Computed {len(semantic_links)} semantic links in {elapsed:.1f}s "
231
+ "(callers lazy-loaded on-demand)"
232
+ )
233
+
234
+ return {
235
+ "semantic_links": len(semantic_links),
236
+ "caller_relationships": 0, # Now lazy-loaded
237
+ "computation_time": elapsed,
238
+ }
239
+
240
+ async def _compute_semantic_relationships(
241
+ self,
242
+ code_chunks: list[CodeChunk],
243
+ database: Any,
244
+ max_concurrent_queries: int = 50,
245
+ ) -> list[dict[str, Any]]:
246
+ """Compute semantic similarity relationships between chunks using async parallel processing.
247
+
248
+ Args:
249
+ code_chunks: List of code chunks (functions, methods, classes)
250
+ database: Vector database for similarity search
251
+ max_concurrent_queries: Maximum number of concurrent database queries (default: 50)
252
+
253
+ Returns:
254
+ List of semantic link dictionaries
255
+ """
256
+ semantic_links = []
257
+ semaphore = asyncio.Semaphore(max_concurrent_queries)
258
+ completed_count = 0
259
+ total_chunks = len(code_chunks)
260
+
261
+ # Use Rich progress bar
262
+ with Progress(
263
+ SpinnerColumn(),
264
+ TextColumn("[cyan]Computing semantic relationships...[/cyan]"),
265
+ BarColumn(bar_width=40),
266
+ TaskProgressColumn(),
267
+ TextColumn("[dim]{task.completed}/{task.total} chunks[/dim]"),
268
+ console=console,
269
+ transient=False,
270
+ ) as progress:
271
+ task = progress.add_task("semantic", total=total_chunks)
272
+
273
+ async def process_chunk(chunk: CodeChunk) -> list[dict[str, Any]]:
274
+ """Process a single chunk and return its semantic links."""
275
+ nonlocal completed_count
276
+
277
+ async with semaphore:
278
+ try:
279
+ # Search for similar chunks
280
+ similar_results = await database.search(
281
+ query=chunk.content[:500], # First 500 chars
282
+ limit=6, # Get 6 (exclude self = 5)
283
+ similarity_threshold=0.3,
284
+ )
285
+
286
+ chunk_links = []
287
+ source_chunk_id = chunk.chunk_id or chunk.id
288
+
289
+ # Filter out self and create links
290
+ for result in similar_results:
291
+ target_chunk = next(
292
+ (
293
+ c
294
+ for c in code_chunks
295
+ if str(c.file_path) == str(result.file_path)
296
+ and c.start_line == result.start_line
297
+ and c.end_line == result.end_line
298
+ ),
299
+ None,
300
+ )
301
+
302
+ if not target_chunk:
303
+ continue
304
+
305
+ target_chunk_id = target_chunk.chunk_id or target_chunk.id
306
+
307
+ # Skip self-references
308
+ if target_chunk_id == source_chunk_id:
309
+ continue
310
+
311
+ # Add semantic link
312
+ if result.similarity_score >= 0.2:
313
+ chunk_links.append(
314
+ {
315
+ "source": source_chunk_id,
316
+ "target": target_chunk_id,
317
+ "type": "semantic",
318
+ "similarity": result.similarity_score,
319
+ }
320
+ )
321
+
322
+ # Only keep top 5 per chunk
323
+ if len(chunk_links) >= 5:
324
+ break
325
+
326
+ # Update progress
327
+ completed_count += 1
328
+ progress.update(task, completed=completed_count)
329
+
330
+ return chunk_links
331
+
332
+ except Exception as e:
333
+ logger.debug(
334
+ f"Failed to compute semantic for {chunk.chunk_id}: {e}"
335
+ )
336
+ completed_count += 1
337
+ progress.update(task, completed=completed_count)
338
+ return []
339
+
340
+ # Process all chunks in parallel
341
+ tasks = [process_chunk(chunk) for chunk in code_chunks]
342
+ results = await asyncio.gather(*tasks, return_exceptions=True)
343
+
344
+ # Flatten results and handle exceptions
345
+ for result in results:
346
+ if isinstance(result, Exception):
347
+ logger.debug(f"Task failed with exception: {result}")
348
+ continue
349
+ semantic_links.extend(result)
350
+
351
+ return semantic_links
352
+
353
+ def _compute_caller_relationships(
354
+ self, chunks: list[CodeChunk]
355
+ ) -> dict[str, list[dict[str, Any]]]:
356
+ """Compute which chunks call which other chunks.
357
+
358
+ Args:
359
+ chunks: List of all code chunks
360
+
361
+ Returns:
362
+ Map of chunk_id -> list of caller info
363
+ """
364
+ caller_map = {}
365
+
366
+ code_chunks = [
367
+ c for c in chunks if c.chunk_type in ["function", "method", "class"]
368
+ ]
369
+
370
+ logger.debug(f"Processing {len(code_chunks)} code chunks for callers...")
371
+
372
+ for chunk_idx, chunk in enumerate(code_chunks):
373
+ if chunk_idx % 50 == 0: # Progress
374
+ logger.debug(f"Callers: {chunk_idx}/{len(code_chunks)} chunks")
375
+
376
+ chunk_id = chunk.chunk_id or chunk.id
377
+ file_path = str(chunk.file_path)
378
+ function_name = chunk.function_name or chunk.class_name
379
+
380
+ if not function_name:
381
+ continue
382
+
383
+ # Search other chunks that reference this function
384
+ for other_chunk in chunks:
385
+ other_file_path = str(other_chunk.file_path)
386
+
387
+ # Only track EXTERNAL callers (different file)
388
+ if other_file_path == file_path:
389
+ continue
390
+
391
+ # Extract actual function calls using AST
392
+ actual_calls = extract_function_calls(other_chunk.content)
393
+
394
+ # Check if this function is actually called
395
+ if function_name in actual_calls:
396
+ other_chunk_id = other_chunk.chunk_id or other_chunk.id
397
+
398
+ # Generate meaningful caller name
399
+ other_name = other_chunk.function_name or other_chunk.class_name
400
+ if not other_name:
401
+ other_name = extract_chunk_name(
402
+ other_chunk.content,
403
+ fallback=f"chunk_{other_chunk.start_line}",
404
+ )
405
+
406
+ # Skip __init__ functions as callers (noise)
407
+ if other_name == "__init__":
408
+ continue
409
+
410
+ if chunk_id not in caller_map:
411
+ caller_map[chunk_id] = []
412
+
413
+ # Store caller information
414
+ caller_map[chunk_id].append(
415
+ {
416
+ "file": other_file_path,
417
+ "chunk_id": other_chunk_id,
418
+ "name": other_name,
419
+ "type": other_chunk.chunk_type,
420
+ }
421
+ )
422
+
423
+ logger.debug(
424
+ f"Found call: {other_name} ({other_file_path}) -> "
425
+ f"{function_name} ({file_path})"
426
+ )
427
+
428
+ return caller_map
429
+
430
+ def load(self) -> dict[str, Any]:
431
+ """Load pre-computed relationships from disk.
432
+
433
+ Returns:
434
+ Dictionary with semantic and caller relationships, or empty structure if not found
435
+ """
436
+ if not self.store_path.exists():
437
+ logger.warning(
438
+ f"No pre-computed relationships found at {self.store_path}. "
439
+ "Run 'mcp-vector-search index' to compute relationships."
440
+ )
441
+ return {"semantic": [], "callers": {}}
442
+
443
+ try:
444
+ with open(self.store_path) as f:
445
+ data = json.load(f)
446
+
447
+ logger.info(
448
+ f"✓ Loaded {len(data.get('semantic', []))} semantic links and "
449
+ f"{sum(len(callers) for callers in data.get('callers', {}).values())} "
450
+ f"caller relationships (computed {data.get('computed_at', 'unknown')})"
451
+ )
452
+
453
+ return data
454
+ except Exception as e:
455
+ logger.error(f"Failed to load relationships: {e}")
456
+ return {"semantic": [], "callers": {}}
457
+
458
+ def exists(self) -> bool:
459
+ """Check if pre-computed relationships exist.
460
+
461
+ Returns:
462
+ True if relationships file exists
463
+ """
464
+ return self.store_path.exists()
465
+
466
+ def invalidate(self) -> None:
467
+ """Delete stored relationships (called when index changes).
468
+
469
+ This forces re-computation on next full index.
470
+ """
471
+ if self.store_path.exists():
472
+ self.store_path.unlink()
473
+ logger.debug("Invalidated pre-computed relationships")