mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +111 -0
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +74 -0
  7. mcp_vector_search/analysis/collectors/base.py +164 -0
  8. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  9. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  10. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  11. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  12. mcp_vector_search/analysis/collectors/smells.py +325 -0
  13. mcp_vector_search/analysis/debt.py +516 -0
  14. mcp_vector_search/analysis/interpretation.py +685 -0
  15. mcp_vector_search/analysis/metrics.py +414 -0
  16. mcp_vector_search/analysis/reporters/__init__.py +7 -0
  17. mcp_vector_search/analysis/reporters/console.py +646 -0
  18. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  19. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  20. mcp_vector_search/analysis/storage/__init__.py +93 -0
  21. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  22. mcp_vector_search/analysis/storage/schema.py +245 -0
  23. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  24. mcp_vector_search/analysis/trends.py +308 -0
  25. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  26. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  27. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  28. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  29. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  30. mcp_vector_search/cli/commands/analyze.py +1062 -0
  31. mcp_vector_search/cli/commands/chat.py +1455 -0
  32. mcp_vector_search/cli/commands/index.py +621 -5
  33. mcp_vector_search/cli/commands/index_background.py +467 -0
  34. mcp_vector_search/cli/commands/init.py +13 -0
  35. mcp_vector_search/cli/commands/install.py +597 -335
  36. mcp_vector_search/cli/commands/install_old.py +8 -4
  37. mcp_vector_search/cli/commands/mcp.py +78 -6
  38. mcp_vector_search/cli/commands/reset.py +68 -26
  39. mcp_vector_search/cli/commands/search.py +224 -8
  40. mcp_vector_search/cli/commands/setup.py +1184 -0
  41. mcp_vector_search/cli/commands/status.py +339 -5
  42. mcp_vector_search/cli/commands/uninstall.py +276 -357
  43. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  44. mcp_vector_search/cli/commands/visualize/cli.py +292 -0
  45. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  46. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  47. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
  48. mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
  49. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  50. mcp_vector_search/cli/commands/visualize/server.py +600 -0
  51. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  52. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  53. mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
  54. mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
  55. mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
  56. mcp_vector_search/cli/didyoumean.py +27 -2
  57. mcp_vector_search/cli/main.py +127 -160
  58. mcp_vector_search/cli/output.py +158 -13
  59. mcp_vector_search/config/__init__.py +4 -0
  60. mcp_vector_search/config/default_thresholds.yaml +52 -0
  61. mcp_vector_search/config/settings.py +12 -0
  62. mcp_vector_search/config/thresholds.py +273 -0
  63. mcp_vector_search/core/__init__.py +16 -0
  64. mcp_vector_search/core/auto_indexer.py +3 -3
  65. mcp_vector_search/core/boilerplate.py +186 -0
  66. mcp_vector_search/core/config_utils.py +394 -0
  67. mcp_vector_search/core/database.py +406 -94
  68. mcp_vector_search/core/embeddings.py +24 -0
  69. mcp_vector_search/core/exceptions.py +11 -0
  70. mcp_vector_search/core/git.py +380 -0
  71. mcp_vector_search/core/git_hooks.py +4 -4
  72. mcp_vector_search/core/indexer.py +632 -54
  73. mcp_vector_search/core/llm_client.py +756 -0
  74. mcp_vector_search/core/models.py +91 -1
  75. mcp_vector_search/core/project.py +17 -0
  76. mcp_vector_search/core/relationships.py +473 -0
  77. mcp_vector_search/core/scheduler.py +11 -11
  78. mcp_vector_search/core/search.py +179 -29
  79. mcp_vector_search/mcp/server.py +819 -9
  80. mcp_vector_search/parsers/python.py +285 -5
  81. mcp_vector_search/utils/__init__.py +2 -0
  82. mcp_vector_search/utils/gitignore.py +0 -3
  83. mcp_vector_search/utils/gitignore_updater.py +212 -0
  84. mcp_vector_search/utils/monorepo.py +66 -4
  85. mcp_vector_search/utils/timing.py +10 -6
  86. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
  87. mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
  88. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
  89. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
  90. mcp_vector_search/cli/commands/visualize.py +0 -1467
  91. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  92. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,647 @@
1
+ """Graph data construction logic for code visualization.
2
+
3
+ This module handles building the graph data structure from code chunks,
4
+ including nodes, links, semantic relationships, and cycle detection.
5
+ """
6
+
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from loguru import logger
12
+ from rich.console import Console
13
+
14
+ from ....analysis.trends import TrendTracker
15
+ from ....core.database import ChromaVectorDatabase
16
+ from ....core.directory_index import DirectoryIndex
17
+ from ....core.project import ProjectManager
18
+ from .state_manager import VisualizationState
19
+
20
+ console = Console()
21
+
22
+
23
+ def extract_chunk_name(content: str, fallback: str = "chunk") -> str:
24
+ """Extract first meaningful word from chunk content for labeling.
25
+
26
+ Args:
27
+ content: The chunk's code content
28
+ fallback: Fallback name if no meaningful word found
29
+
30
+ Returns:
31
+ First meaningful identifier found in the content
32
+
33
+ Examples:
34
+ >>> extract_chunk_name("def calculate_total(...)")
35
+ 'calculate_total'
36
+ >>> extract_chunk_name("class UserManager:")
37
+ 'UserManager'
38
+ >>> extract_chunk_name("# Comment about users")
39
+ 'users'
40
+ >>> extract_chunk_name("import pandas as pd")
41
+ 'pandas'
42
+ """
43
+ import re
44
+
45
+ # Skip common keywords that aren't meaningful as chunk labels
46
+ skip_words = {
47
+ "def",
48
+ "class",
49
+ "function",
50
+ "const",
51
+ "let",
52
+ "var",
53
+ "import",
54
+ "from",
55
+ "return",
56
+ "if",
57
+ "else",
58
+ "elif",
59
+ "for",
60
+ "while",
61
+ "try",
62
+ "except",
63
+ "finally",
64
+ "with",
65
+ "as",
66
+ "async",
67
+ "await",
68
+ "yield",
69
+ "self",
70
+ "this",
71
+ "true",
72
+ "false",
73
+ "none",
74
+ "null",
75
+ "undefined",
76
+ "public",
77
+ "private",
78
+ "protected",
79
+ "static",
80
+ "export",
81
+ "default",
82
+ }
83
+
84
+ # Find all words (alphanumeric + underscore, at least 2 chars)
85
+ words = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]+\b", content)
86
+
87
+ for word in words:
88
+ if word.lower() not in skip_words:
89
+ return word
90
+
91
+ return fallback
92
+
93
+
94
+ def get_subproject_color(subproject_name: str, index: int) -> str:
95
+ """Get a consistent color for a subproject.
96
+
97
+ Args:
98
+ subproject_name: Name of the subproject
99
+ index: Index of the subproject in the list
100
+
101
+ Returns:
102
+ Hex color code
103
+ """
104
+ # Color palette for subprojects (GitHub-style colors)
105
+ colors = [
106
+ "#238636", # Green
107
+ "#1f6feb", # Blue
108
+ "#d29922", # Yellow
109
+ "#8957e5", # Purple
110
+ "#da3633", # Red
111
+ "#bf8700", # Orange
112
+ "#1a7f37", # Dark green
113
+ "#0969da", # Dark blue
114
+ ]
115
+ return colors[index % len(colors)]
116
+
117
+
118
+ def parse_project_dependencies(project_root: Path, subprojects: dict) -> list[dict]:
119
+ """Parse package.json files to find inter-project dependencies.
120
+
121
+ Args:
122
+ project_root: Root directory of the monorepo
123
+ subprojects: Dictionary of subproject information
124
+
125
+ Returns:
126
+ List of dependency links between subprojects
127
+ """
128
+ dependency_links = []
129
+
130
+ for sp_name, sp_data in subprojects.items():
131
+ package_json = project_root / sp_data["path"] / "package.json"
132
+
133
+ if not package_json.exists():
134
+ continue
135
+
136
+ try:
137
+ with open(package_json) as f:
138
+ package_data = json.load(f)
139
+
140
+ # Check all dependency types
141
+ all_deps = {}
142
+ for dep_type in ["dependencies", "devDependencies", "peerDependencies"]:
143
+ if dep_type in package_data:
144
+ all_deps.update(package_data[dep_type])
145
+
146
+ # Find dependencies on other subprojects
147
+ for dep_name in all_deps.keys():
148
+ # Check if this dependency is another subproject
149
+ for other_sp_name in subprojects.keys():
150
+ if other_sp_name != sp_name and dep_name == other_sp_name:
151
+ # Found inter-project dependency
152
+ dependency_links.append(
153
+ {
154
+ "source": f"subproject_{sp_name}",
155
+ "target": f"subproject_{other_sp_name}",
156
+ "type": "dependency",
157
+ }
158
+ )
159
+
160
+ except Exception as e:
161
+ logger.debug(f"Failed to parse {package_json}: {e}")
162
+ continue
163
+
164
+ return dependency_links
165
+
166
+
167
+ def detect_cycles(chunks: list, caller_map: dict) -> list[list[str]]:
168
+ """Detect TRUE cycles in the call graph using DFS with three-color marking.
169
+
170
+ Uses three-color marking to distinguish between:
171
+ - WHITE (0): Unvisited node, not yet explored
172
+ - GRAY (1): Currently exploring, node is in the current DFS path
173
+ - BLACK (2): Fully explored, all descendants processed
174
+
175
+ A cycle exists when we encounter a GRAY node during traversal, which means
176
+ we've found a back edge to a node currently in the exploration path.
177
+
178
+ Args:
179
+ chunks: List of code chunks
180
+ caller_map: Map of chunk_id to list of caller info
181
+
182
+ Returns:
183
+ List of cycles found, where each cycle is a list of node IDs in the cycle path
184
+ """
185
+ cycles_found = []
186
+ # Three-color constants for DFS cycle detection
187
+ white, gray, black = 0, 1, 2 # noqa: N806
188
+ color = {chunk.chunk_id or chunk.id: white for chunk in chunks}
189
+
190
+ def dfs(node_id: str, path: list) -> None:
191
+ """DFS with three-color marking for accurate cycle detection.
192
+
193
+ Args:
194
+ node_id: Current node ID being visited
195
+ path: List of node IDs in current path (for cycle reconstruction)
196
+ """
197
+ if color.get(node_id, white) == black:
198
+ # Already fully explored, no cycle here
199
+ return
200
+
201
+ if color.get(node_id, white) == gray:
202
+ # Found a TRUE cycle! Node is in current path
203
+ try:
204
+ cycle_start = path.index(node_id)
205
+ cycle_nodes = path[cycle_start:] + [node_id] # Include back edge
206
+ # Only record if cycle length > 1 (avoid self-loops unless intentional)
207
+ if len(set(cycle_nodes)) > 1:
208
+ cycles_found.append(cycle_nodes)
209
+ except ValueError:
210
+ pass # Node not in path (shouldn't happen)
211
+ return
212
+
213
+ # Mark as currently exploring
214
+ color[node_id] = gray
215
+ path.append(node_id)
216
+
217
+ # Follow outgoing edges (external_callers → caller_id)
218
+ if node_id in caller_map:
219
+ for caller_info in caller_map[node_id]:
220
+ caller_id = caller_info["chunk_id"]
221
+ dfs(caller_id, path[:]) # Pass copy of path
222
+
223
+ # Mark as fully explored
224
+ path.pop()
225
+ color[node_id] = black
226
+
227
+ # Run DFS from each unvisited node
228
+ for chunk in chunks:
229
+ chunk_id = chunk.chunk_id or chunk.id
230
+ if color.get(chunk_id, white) == white:
231
+ dfs(chunk_id, [])
232
+
233
+ return cycles_found
234
+
235
+
236
+ async def build_graph_data(
237
+ chunks: list,
238
+ database: ChromaVectorDatabase,
239
+ project_manager: ProjectManager,
240
+ code_only: bool = False,
241
+ ) -> dict[str, Any]:
242
+ """Build complete graph data structure from chunks.
243
+
244
+ Args:
245
+ chunks: List of code chunks from the database
246
+ database: Vector database instance (for semantic search)
247
+ project_manager: Project manager instance
248
+ code_only: If True, exclude documentation chunks
249
+
250
+ Returns:
251
+ Dictionary containing nodes, links, and metadata
252
+ """
253
+ # Collect subprojects for monorepo support
254
+ subprojects = {}
255
+ for chunk in chunks:
256
+ if chunk.subproject_name and chunk.subproject_name not in subprojects:
257
+ subprojects[chunk.subproject_name] = {
258
+ "name": chunk.subproject_name,
259
+ "path": chunk.subproject_path,
260
+ "color": get_subproject_color(chunk.subproject_name, len(subprojects)),
261
+ }
262
+
263
+ # Build graph data structure
264
+ nodes = []
265
+ links = []
266
+ chunk_id_map = {} # Map chunk IDs to array indices
267
+ file_nodes = {} # Track file nodes by path
268
+ dir_nodes = {} # Track directory nodes by path
269
+
270
+ # Add subproject root nodes for monorepos
271
+ if subprojects:
272
+ console.print(
273
+ f"[cyan]Detected monorepo with {len(subprojects)} subprojects[/cyan]"
274
+ )
275
+ for sp_name, sp_data in subprojects.items():
276
+ node = {
277
+ "id": f"subproject_{sp_name}",
278
+ "name": sp_name,
279
+ "type": "subproject",
280
+ "file_path": sp_data["path"] or "",
281
+ "start_line": 0,
282
+ "end_line": 0,
283
+ "complexity": 0,
284
+ "color": sp_data["color"],
285
+ "depth": 0,
286
+ }
287
+ nodes.append(node)
288
+
289
+ # Load directory index for enhanced directory metadata
290
+ console.print("[cyan]Loading directory index...[/cyan]")
291
+ dir_index_path = (
292
+ project_manager.project_root / ".mcp-vector-search" / "directory_index.json"
293
+ )
294
+ dir_index = DirectoryIndex(dir_index_path)
295
+ dir_index.load()
296
+
297
+ # Create directory nodes from directory index
298
+ console.print(f"[green]✓[/green] Loaded {len(dir_index.directories)} directories")
299
+ for dir_path_str, directory in dir_index.directories.items():
300
+ dir_id = f"dir_{hash(dir_path_str) & 0xFFFFFFFF:08x}"
301
+
302
+ # Compute parent directory ID (convert Path to string for JSON serialization)
303
+ parent_dir_id = None
304
+ parent_path_str = str(directory.parent_path) if directory.parent_path else None
305
+ if parent_path_str:
306
+ parent_dir_id = f"dir_{hash(parent_path_str) & 0xFFFFFFFF:08x}"
307
+
308
+ dir_nodes[dir_path_str] = {
309
+ "id": dir_id,
310
+ "name": directory.name,
311
+ "type": "directory",
312
+ "file_path": dir_path_str,
313
+ "start_line": 0,
314
+ "end_line": 0,
315
+ "complexity": 0,
316
+ "depth": directory.depth,
317
+ "dir_path": dir_path_str,
318
+ "parent_id": parent_dir_id, # Link to parent directory
319
+ "parent_path": parent_path_str, # String for JSON serialization
320
+ "file_count": directory.file_count,
321
+ "subdirectory_count": directory.subdirectory_count,
322
+ "total_chunks": directory.total_chunks,
323
+ "languages": directory.languages or {},
324
+ "is_package": directory.is_package,
325
+ "last_modified": directory.last_modified,
326
+ }
327
+
328
+ # Create file nodes from chunks
329
+ # First pass: create file node entries
330
+ for chunk in chunks:
331
+ file_path_str = str(chunk.file_path)
332
+ file_path = Path(file_path_str)
333
+
334
+ # Create file node with parent directory reference
335
+ if file_path_str not in file_nodes:
336
+ file_id = f"file_{hash(file_path_str) & 0xFFFFFFFF:08x}"
337
+
338
+ # Convert absolute path to relative path for parent directory lookup
339
+ try:
340
+ relative_file_path = file_path.relative_to(project_manager.project_root)
341
+ parent_dir = relative_file_path.parent
342
+ # Use relative path for parent directory (matches directory_index)
343
+ parent_dir_str = str(parent_dir) if parent_dir != Path(".") else None
344
+ except ValueError:
345
+ # File is outside project root
346
+ parent_dir_str = None
347
+
348
+ # Look up parent directory ID from dir_nodes (must match exactly)
349
+ parent_dir_id = None
350
+ if parent_dir_str and parent_dir_str in dir_nodes:
351
+ parent_dir_id = dir_nodes[parent_dir_str]["id"]
352
+
353
+ file_nodes[file_path_str] = {
354
+ "id": file_id,
355
+ "name": file_path.name,
356
+ "type": "file",
357
+ "file_path": file_path_str,
358
+ "start_line": 0,
359
+ "end_line": 0,
360
+ "complexity": 0,
361
+ "depth": len(file_path.parts) - 1,
362
+ "parent_id": parent_dir_id, # Consistent with directory nodes
363
+ "parent_path": parent_dir_str,
364
+ "chunk_count": 0, # Will be computed below
365
+ }
366
+
367
+ # Second pass: count chunks per file (pre-compute for consistent sizing)
368
+ for chunk in chunks:
369
+ file_path_str = str(chunk.file_path)
370
+ if file_path_str in file_nodes:
371
+ file_nodes[file_path_str]["chunk_count"] += 1
372
+
373
+ # Add directory nodes to graph
374
+ for dir_node in dir_nodes.values():
375
+ nodes.append(dir_node)
376
+
377
+ # Add file nodes to graph
378
+ for file_node in file_nodes.values():
379
+ nodes.append(file_node)
380
+
381
+ # Link directories to their parent directories
382
+ for dir_node in dir_nodes.values():
383
+ if dir_node.get("parent_id"):
384
+ links.append(
385
+ {
386
+ "source": dir_node["parent_id"],
387
+ "target": dir_node["id"],
388
+ "type": "dir_containment",
389
+ }
390
+ )
391
+
392
+ # Skip ALL relationship computation at startup for instant loading
393
+ # Relationships are lazy-loaded on-demand via /api/relationships/{chunk_id}
394
+ # This avoids the expensive 5+ minute semantic computation
395
+ caller_map: dict = {} # Empty - callers lazy-loaded via API
396
+ console.print(
397
+ "[green]✓[/green] Skipping relationship computation (lazy-loaded on node expand)"
398
+ )
399
+
400
+ # Add chunk nodes
401
+ for chunk in chunks:
402
+ chunk_id = chunk.chunk_id or chunk.id
403
+
404
+ # Generate meaningful chunk name
405
+ chunk_name = chunk.function_name or chunk.class_name
406
+ if not chunk_name:
407
+ # Extract meaningful name from content
408
+ chunk_name = extract_chunk_name(
409
+ chunk.content, fallback=f"chunk_{chunk.start_line}"
410
+ )
411
+ logger.debug(
412
+ f"Generated chunk name '{chunk_name}' for {chunk.chunk_type} at {chunk.file_path}:{chunk.start_line}"
413
+ )
414
+
415
+ # Determine parent_id: use parent_chunk_id if exists, else use file node ID
416
+ file_path_str = str(chunk.file_path)
417
+ parent_id = chunk.parent_chunk_id
418
+ if not parent_id and file_path_str in file_nodes:
419
+ # Top-level chunk: set parent to file node for proper tree structure
420
+ parent_id = file_nodes[file_path_str]["id"]
421
+
422
+ node = {
423
+ "id": chunk_id,
424
+ "name": chunk_name,
425
+ "type": chunk.chunk_type,
426
+ "file_path": file_path_str,
427
+ "start_line": chunk.start_line,
428
+ "end_line": chunk.end_line,
429
+ "complexity": chunk.complexity_score,
430
+ "parent_id": parent_id, # Now properly set for all chunks
431
+ "depth": chunk.chunk_depth,
432
+ "content": chunk.content, # Add content for code viewer
433
+ "docstring": chunk.docstring,
434
+ "language": chunk.language,
435
+ }
436
+
437
+ # Add structural analysis metrics if available
438
+ if (
439
+ hasattr(chunk, "cognitive_complexity")
440
+ and chunk.cognitive_complexity is not None
441
+ ):
442
+ node["cognitive_complexity"] = chunk.cognitive_complexity
443
+ if (
444
+ hasattr(chunk, "cyclomatic_complexity")
445
+ and chunk.cyclomatic_complexity is not None
446
+ ):
447
+ node["cyclomatic_complexity"] = chunk.cyclomatic_complexity
448
+ if hasattr(chunk, "complexity_grade") and chunk.complexity_grade is not None:
449
+ node["complexity_grade"] = chunk.complexity_grade
450
+ if hasattr(chunk, "code_smells") and chunk.code_smells:
451
+ node["smells"] = chunk.code_smells
452
+ if hasattr(chunk, "smell_count") and chunk.smell_count is not None:
453
+ node["smell_count"] = chunk.smell_count
454
+ if hasattr(chunk, "quality_score") and chunk.quality_score is not None:
455
+ node["quality_score"] = chunk.quality_score
456
+ if hasattr(chunk, "lines_of_code") and chunk.lines_of_code is not None:
457
+ node["lines_of_code"] = chunk.lines_of_code
458
+
459
+ # Add caller information if available
460
+ if chunk_id in caller_map:
461
+ node["callers"] = caller_map[chunk_id]
462
+
463
+ # Add subproject info for monorepos
464
+ if chunk.subproject_name:
465
+ node["subproject"] = chunk.subproject_name
466
+ node["color"] = subprojects[chunk.subproject_name]["color"]
467
+
468
+ nodes.append(node)
469
+ chunk_id_map[node["id"]] = len(nodes) - 1
470
+
471
+ # NOTE: Directory parent→child links already created above via dir_containment
472
+ # (removed duplicate dir_hierarchy link creation that caused duplicate paths)
473
+
474
+ # Link directories to subprojects in monorepos (simple flat structure)
475
+ if subprojects:
476
+ for dir_path_str, dir_node in dir_nodes.items():
477
+ for sp_name, sp_data in subprojects.items():
478
+ if dir_path_str.startswith(sp_data.get("path", "")):
479
+ links.append(
480
+ {
481
+ "source": f"subproject_{sp_name}",
482
+ "target": dir_node["id"],
483
+ "type": "dir_containment",
484
+ }
485
+ )
486
+ break
487
+
488
+ # Link files to their parent directories
489
+ for _file_path_str, file_node in file_nodes.items():
490
+ if file_node.get("parent_id"):
491
+ links.append(
492
+ {
493
+ "source": file_node["parent_id"],
494
+ "target": file_node["id"],
495
+ "type": "dir_containment",
496
+ }
497
+ )
498
+
499
+ # Build hierarchical links from parent-child relationships
500
+ for chunk in chunks:
501
+ chunk_id = chunk.chunk_id or chunk.id
502
+ file_path = str(chunk.file_path)
503
+
504
+ # Link chunk to its file node if it has no parent (top-level chunks)
505
+ if not chunk.parent_chunk_id and file_path in file_nodes:
506
+ links.append(
507
+ {
508
+ "source": file_nodes[file_path]["id"],
509
+ "target": chunk_id,
510
+ "type": "file_containment",
511
+ }
512
+ )
513
+
514
+ # Link to subproject root if in monorepo
515
+ if chunk.subproject_name and not chunk.parent_chunk_id:
516
+ links.append(
517
+ {
518
+ "source": f"subproject_{chunk.subproject_name}",
519
+ "target": chunk_id,
520
+ "type": "subproject_containment",
521
+ }
522
+ )
523
+
524
+ # Link to parent chunk (class -> method hierarchy)
525
+ if chunk.parent_chunk_id and chunk.parent_chunk_id in chunk_id_map:
526
+ links.append(
527
+ {
528
+ "source": chunk.parent_chunk_id,
529
+ "target": chunk_id,
530
+ "type": "chunk_hierarchy", # Explicitly mark chunk parent-child relationships
531
+ }
532
+ )
533
+
534
+ # Semantic and caller relationships are lazy-loaded via /api/relationships/{chunk_id}
535
+ # No relationship links at startup for instant loading
536
+
537
+ # Parse inter-project dependencies for monorepos
538
+ if subprojects:
539
+ console.print("[cyan]Parsing inter-project dependencies...[/cyan]")
540
+ dep_links = parse_project_dependencies(
541
+ project_manager.project_root, subprojects
542
+ )
543
+ links.extend(dep_links)
544
+ if dep_links:
545
+ console.print(
546
+ f"[green]✓[/green] Found {len(dep_links)} inter-project dependencies"
547
+ )
548
+
549
+ # Get stats
550
+ stats = await database.get_stats()
551
+
552
+ # Load trend data for time series visualization
553
+ trend_tracker = TrendTracker(project_manager.project_root)
554
+ trend_summary = trend_tracker.get_trend_summary(days=90) # Last 90 days
555
+
556
+ # Build final graph data
557
+ graph_data = {
558
+ "nodes": nodes,
559
+ "links": links,
560
+ "metadata": {
561
+ "total_chunks": len(chunks),
562
+ "total_files": stats.total_files,
563
+ "languages": stats.languages,
564
+ "is_monorepo": len(subprojects) > 0,
565
+ "subprojects": list(subprojects.keys()) if subprojects else [],
566
+ },
567
+ "trends": trend_summary, # Include trend data for visualization
568
+ }
569
+
570
+ return graph_data
571
+
572
+
573
+ def apply_state(graph_data: dict, state: VisualizationState) -> dict:
574
+ """Apply visualization state to graph data.
575
+
576
+ Filters nodes and edges based on current visualization state,
577
+ including visibility and AST-only edge filtering.
578
+
579
+ Args:
580
+ graph_data: Full graph data dictionary (nodes, links, metadata)
581
+ state: Current visualization state
582
+
583
+ Returns:
584
+ Filtered graph data with only visible nodes and edges
585
+
586
+ Example:
587
+ >>> state = VisualizationState()
588
+ >>> state.expand_node("dir1", "directory", ["file1", "file2"])
589
+ >>> filtered = apply_state(graph_data, state)
590
+ >>> len(filtered["nodes"]) < len(graph_data["nodes"])
591
+ True
592
+ """
593
+ # Get visible node IDs from state
594
+ visible_node_ids = set(state.get_visible_nodes())
595
+
596
+ # Filter nodes
597
+ filtered_nodes = [
598
+ node for node in graph_data["nodes"] if node["id"] in visible_node_ids
599
+ ]
600
+
601
+ # Build node ID to node data map for quick lookup
602
+ node_map = {node["id"]: node for node in graph_data["nodes"]}
603
+
604
+ # Get visible edges from state (AST calls only in FILE_DETAIL mode)
605
+ expanded_file_id = None
606
+ if state.view_mode.value == "file_detail" and state.expansion_path:
607
+ # Find the file node in expansion path
608
+ for node_id in reversed(state.expansion_path):
609
+ node = node_map.get(node_id)
610
+ if node and node.get("type") == "file":
611
+ expanded_file_id = node_id
612
+ break
613
+
614
+ visible_edge_ids = state.get_visible_edges(
615
+ graph_data["links"], expanded_file_id=expanded_file_id
616
+ )
617
+
618
+ # Filter links to only visible edges
619
+ filtered_links = []
620
+ for link in graph_data["links"]:
621
+ source_id = link.get("source")
622
+ target_id = link.get("target")
623
+
624
+ # Skip if either node not visible
625
+ if source_id not in visible_node_ids or target_id not in visible_node_ids:
626
+ continue
627
+
628
+ # In FILE_DETAIL mode, only show edges in visible_edge_ids
629
+ if state.view_mode.value == "file_detail":
630
+ if (source_id, target_id) in visible_edge_ids:
631
+ filtered_links.append(link)
632
+ elif state.view_mode.value in ("tree_root", "tree_expanded"):
633
+ # In tree modes, show containment edges only
634
+ # Must include file_containment to link code chunks to their parent files
635
+ if link.get("type") in (
636
+ "dir_containment",
637
+ "dir_hierarchy",
638
+ "file_containment",
639
+ ):
640
+ filtered_links.append(link)
641
+
642
+ return {
643
+ "nodes": filtered_nodes,
644
+ "links": filtered_links,
645
+ "metadata": graph_data.get("metadata", {}),
646
+ "state": state.to_dict(), # Include serialized state
647
+ }