mcp-vector-search 0.12.6__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. mcp_vector_search/__init__.py +2 -2
  2. mcp_vector_search/analysis/__init__.py +64 -0
  3. mcp_vector_search/analysis/collectors/__init__.py +39 -0
  4. mcp_vector_search/analysis/collectors/base.py +164 -0
  5. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  6. mcp_vector_search/analysis/metrics.py +341 -0
  7. mcp_vector_search/analysis/reporters/__init__.py +5 -0
  8. mcp_vector_search/analysis/reporters/console.py +222 -0
  9. mcp_vector_search/cli/commands/analyze.py +408 -0
  10. mcp_vector_search/cli/commands/chat.py +1262 -0
  11. mcp_vector_search/cli/commands/index.py +21 -3
  12. mcp_vector_search/cli/commands/init.py +13 -0
  13. mcp_vector_search/cli/commands/install.py +597 -335
  14. mcp_vector_search/cli/commands/install_old.py +8 -4
  15. mcp_vector_search/cli/commands/mcp.py +78 -6
  16. mcp_vector_search/cli/commands/reset.py +68 -26
  17. mcp_vector_search/cli/commands/search.py +30 -7
  18. mcp_vector_search/cli/commands/setup.py +1133 -0
  19. mcp_vector_search/cli/commands/status.py +37 -2
  20. mcp_vector_search/cli/commands/uninstall.py +276 -357
  21. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  22. mcp_vector_search/cli/commands/visualize/cli.py +276 -0
  23. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  24. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  25. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  26. mcp_vector_search/cli/commands/visualize/graph_builder.py +714 -0
  27. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  28. mcp_vector_search/cli/commands/visualize/server.py +311 -0
  29. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  30. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  31. mcp_vector_search/cli/commands/visualize/templates/base.py +180 -0
  32. mcp_vector_search/cli/commands/visualize/templates/scripts.py +2507 -0
  33. mcp_vector_search/cli/commands/visualize/templates/styles.py +1313 -0
  34. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  35. mcp_vector_search/cli/didyoumean.py +22 -2
  36. mcp_vector_search/cli/main.py +115 -159
  37. mcp_vector_search/cli/output.py +24 -8
  38. mcp_vector_search/config/__init__.py +4 -0
  39. mcp_vector_search/config/default_thresholds.yaml +52 -0
  40. mcp_vector_search/config/settings.py +12 -0
  41. mcp_vector_search/config/thresholds.py +185 -0
  42. mcp_vector_search/core/auto_indexer.py +3 -3
  43. mcp_vector_search/core/boilerplate.py +186 -0
  44. mcp_vector_search/core/config_utils.py +394 -0
  45. mcp_vector_search/core/database.py +369 -94
  46. mcp_vector_search/core/exceptions.py +11 -0
  47. mcp_vector_search/core/git_hooks.py +4 -4
  48. mcp_vector_search/core/indexer.py +221 -4
  49. mcp_vector_search/core/llm_client.py +751 -0
  50. mcp_vector_search/core/models.py +3 -0
  51. mcp_vector_search/core/project.py +17 -0
  52. mcp_vector_search/core/scheduler.py +11 -11
  53. mcp_vector_search/core/search.py +179 -29
  54. mcp_vector_search/mcp/server.py +24 -5
  55. mcp_vector_search/utils/__init__.py +2 -0
  56. mcp_vector_search/utils/gitignore_updater.py +212 -0
  57. mcp_vector_search/utils/monorepo.py +66 -4
  58. mcp_vector_search/utils/timing.py +10 -6
  59. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/METADATA +182 -52
  60. mcp_vector_search-1.0.3.dist-info/RECORD +97 -0
  61. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/WHEEL +1 -1
  62. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/entry_points.txt +1 -0
  63. mcp_vector_search/cli/commands/visualize.py +0 -1467
  64. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  65. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,714 @@
1
+ """Graph data construction logic for code visualization.
2
+
3
+ This module handles building the graph data structure from code chunks,
4
+ including nodes, links, semantic relationships, and cycle detection.
5
+ """
6
+
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from loguru import logger
12
+ from rich.console import Console
13
+
14
+ from ....core.database import ChromaVectorDatabase
15
+ from ....core.directory_index import DirectoryIndex
16
+ from ....core.project import ProjectManager
17
+ from .state_manager import VisualizationState
18
+
19
+ console = Console()
20
+
21
+
22
+ def get_subproject_color(subproject_name: str, index: int) -> str:
23
+ """Get a consistent color for a subproject.
24
+
25
+ Args:
26
+ subproject_name: Name of the subproject
27
+ index: Index of the subproject in the list
28
+
29
+ Returns:
30
+ Hex color code
31
+ """
32
+ # Color palette for subprojects (GitHub-style colors)
33
+ colors = [
34
+ "#238636", # Green
35
+ "#1f6feb", # Blue
36
+ "#d29922", # Yellow
37
+ "#8957e5", # Purple
38
+ "#da3633", # Red
39
+ "#bf8700", # Orange
40
+ "#1a7f37", # Dark green
41
+ "#0969da", # Dark blue
42
+ ]
43
+ return colors[index % len(colors)]
44
+
45
+
46
+ def parse_project_dependencies(project_root: Path, subprojects: dict) -> list[dict]:
47
+ """Parse package.json files to find inter-project dependencies.
48
+
49
+ Args:
50
+ project_root: Root directory of the monorepo
51
+ subprojects: Dictionary of subproject information
52
+
53
+ Returns:
54
+ List of dependency links between subprojects
55
+ """
56
+ dependency_links = []
57
+
58
+ for sp_name, sp_data in subprojects.items():
59
+ package_json = project_root / sp_data["path"] / "package.json"
60
+
61
+ if not package_json.exists():
62
+ continue
63
+
64
+ try:
65
+ with open(package_json) as f:
66
+ package_data = json.load(f)
67
+
68
+ # Check all dependency types
69
+ all_deps = {}
70
+ for dep_type in ["dependencies", "devDependencies", "peerDependencies"]:
71
+ if dep_type in package_data:
72
+ all_deps.update(package_data[dep_type])
73
+
74
+ # Find dependencies on other subprojects
75
+ for dep_name in all_deps.keys():
76
+ # Check if this dependency is another subproject
77
+ for other_sp_name in subprojects.keys():
78
+ if other_sp_name != sp_name and dep_name == other_sp_name:
79
+ # Found inter-project dependency
80
+ dependency_links.append(
81
+ {
82
+ "source": f"subproject_{sp_name}",
83
+ "target": f"subproject_{other_sp_name}",
84
+ "type": "dependency",
85
+ }
86
+ )
87
+
88
+ except Exception as e:
89
+ logger.debug(f"Failed to parse {package_json}: {e}")
90
+ continue
91
+
92
+ return dependency_links
93
+
94
+
95
+ def detect_cycles(chunks: list, caller_map: dict) -> list[list[str]]:
96
+ """Detect TRUE cycles in the call graph using DFS with three-color marking.
97
+
98
+ Uses three-color marking to distinguish between:
99
+ - WHITE (0): Unvisited node, not yet explored
100
+ - GRAY (1): Currently exploring, node is in the current DFS path
101
+ - BLACK (2): Fully explored, all descendants processed
102
+
103
+ A cycle exists when we encounter a GRAY node during traversal, which means
104
+ we've found a back edge to a node currently in the exploration path.
105
+
106
+ Args:
107
+ chunks: List of code chunks
108
+ caller_map: Map of chunk_id to list of caller info
109
+
110
+ Returns:
111
+ List of cycles found, where each cycle is a list of node IDs in the cycle path
112
+ """
113
+ cycles_found = []
114
+ # Three-color constants for DFS cycle detection
115
+ white, gray, black = 0, 1, 2 # noqa: N806
116
+ color = {chunk.chunk_id or chunk.id: white for chunk in chunks}
117
+
118
+ def dfs(node_id: str, path: list) -> None:
119
+ """DFS with three-color marking for accurate cycle detection.
120
+
121
+ Args:
122
+ node_id: Current node ID being visited
123
+ path: List of node IDs in current path (for cycle reconstruction)
124
+ """
125
+ if color.get(node_id, white) == black:
126
+ # Already fully explored, no cycle here
127
+ return
128
+
129
+ if color.get(node_id, white) == gray:
130
+ # Found a TRUE cycle! Node is in current path
131
+ try:
132
+ cycle_start = path.index(node_id)
133
+ cycle_nodes = path[cycle_start:] + [node_id] # Include back edge
134
+ # Only record if cycle length > 1 (avoid self-loops unless intentional)
135
+ if len(set(cycle_nodes)) > 1:
136
+ cycles_found.append(cycle_nodes)
137
+ except ValueError:
138
+ pass # Node not in path (shouldn't happen)
139
+ return
140
+
141
+ # Mark as currently exploring
142
+ color[node_id] = gray
143
+ path.append(node_id)
144
+
145
+ # Follow outgoing edges (external_callers → caller_id)
146
+ if node_id in caller_map:
147
+ for caller_info in caller_map[node_id]:
148
+ caller_id = caller_info["chunk_id"]
149
+ dfs(caller_id, path[:]) # Pass copy of path
150
+
151
+ # Mark as fully explored
152
+ path.pop()
153
+ color[node_id] = black
154
+
155
+ # Run DFS from each unvisited node
156
+ for chunk in chunks:
157
+ chunk_id = chunk.chunk_id or chunk.id
158
+ if color.get(chunk_id, white) == white:
159
+ dfs(chunk_id, [])
160
+
161
+ return cycles_found
162
+
163
+
164
+ async def build_graph_data(
165
+ chunks: list,
166
+ database: ChromaVectorDatabase,
167
+ project_manager: ProjectManager,
168
+ code_only: bool = False,
169
+ ) -> dict[str, Any]:
170
+ """Build complete graph data structure from chunks.
171
+
172
+ Args:
173
+ chunks: List of code chunks from the database
174
+ database: Vector database instance (for semantic search)
175
+ project_manager: Project manager instance
176
+ code_only: If True, exclude documentation chunks
177
+
178
+ Returns:
179
+ Dictionary containing nodes, links, and metadata
180
+ """
181
+ # Collect subprojects for monorepo support
182
+ subprojects = {}
183
+ for chunk in chunks:
184
+ if chunk.subproject_name and chunk.subproject_name not in subprojects:
185
+ subprojects[chunk.subproject_name] = {
186
+ "name": chunk.subproject_name,
187
+ "path": chunk.subproject_path,
188
+ "color": get_subproject_color(chunk.subproject_name, len(subprojects)),
189
+ }
190
+
191
+ # Build graph data structure
192
+ nodes = []
193
+ links = []
194
+ chunk_id_map = {} # Map chunk IDs to array indices
195
+ file_nodes = {} # Track file nodes by path
196
+ dir_nodes = {} # Track directory nodes by path
197
+
198
+ # Add subproject root nodes for monorepos
199
+ if subprojects:
200
+ console.print(
201
+ f"[cyan]Detected monorepo with {len(subprojects)} subprojects[/cyan]"
202
+ )
203
+ for sp_name, sp_data in subprojects.items():
204
+ node = {
205
+ "id": f"subproject_{sp_name}",
206
+ "name": sp_name,
207
+ "type": "subproject",
208
+ "file_path": sp_data["path"] or "",
209
+ "start_line": 0,
210
+ "end_line": 0,
211
+ "complexity": 0,
212
+ "color": sp_data["color"],
213
+ "depth": 0,
214
+ }
215
+ nodes.append(node)
216
+
217
+ # Load directory index for enhanced directory metadata
218
+ console.print("[cyan]Loading directory index...[/cyan]")
219
+ dir_index_path = (
220
+ project_manager.project_root / ".mcp-vector-search" / "directory_index.json"
221
+ )
222
+ dir_index = DirectoryIndex(dir_index_path)
223
+ dir_index.load()
224
+
225
+ # Create directory nodes from directory index
226
+ console.print(f"[green]✓[/green] Loaded {len(dir_index.directories)} directories")
227
+ for dir_path_str, directory in dir_index.directories.items():
228
+ dir_id = f"dir_{hash(dir_path_str) & 0xFFFFFFFF:08x}"
229
+ dir_nodes[dir_path_str] = {
230
+ "id": dir_id,
231
+ "name": directory.name,
232
+ "type": "directory",
233
+ "file_path": dir_path_str,
234
+ "start_line": 0,
235
+ "end_line": 0,
236
+ "complexity": 0,
237
+ "depth": directory.depth,
238
+ "dir_path": dir_path_str,
239
+ "file_count": directory.file_count,
240
+ "subdirectory_count": directory.subdirectory_count,
241
+ "total_chunks": directory.total_chunks,
242
+ "languages": directory.languages or {},
243
+ "is_package": directory.is_package,
244
+ "last_modified": directory.last_modified,
245
+ }
246
+
247
+ # Create file nodes from chunks
248
+ for chunk in chunks:
249
+ file_path_str = str(chunk.file_path)
250
+ file_path = Path(file_path_str)
251
+
252
+ # Create file node with parent directory reference
253
+ if file_path_str not in file_nodes:
254
+ file_id = f"file_{hash(file_path_str) & 0xFFFFFFFF:08x}"
255
+
256
+ # Convert absolute path to relative path for parent directory lookup
257
+ try:
258
+ relative_file_path = file_path.relative_to(project_manager.project_root)
259
+ parent_dir = relative_file_path.parent
260
+ # Use relative path for parent directory (matches directory_index)
261
+ parent_dir_str = str(parent_dir) if parent_dir != Path(".") else None
262
+ except ValueError:
263
+ # File is outside project root
264
+ parent_dir_str = None
265
+
266
+ # Look up parent directory ID from dir_nodes (must match exactly)
267
+ parent_dir_id = None
268
+ if parent_dir_str and parent_dir_str in dir_nodes:
269
+ parent_dir_id = dir_nodes[parent_dir_str]["id"]
270
+
271
+ file_nodes[file_path_str] = {
272
+ "id": file_id,
273
+ "name": file_path.name,
274
+ "type": "file",
275
+ "file_path": file_path_str,
276
+ "start_line": 0,
277
+ "end_line": 0,
278
+ "complexity": 0,
279
+ "depth": len(file_path.parts) - 1,
280
+ "parent_dir_id": parent_dir_id,
281
+ "parent_dir_path": parent_dir_str,
282
+ }
283
+
284
+ # Add directory nodes to graph
285
+ for dir_node in dir_nodes.values():
286
+ nodes.append(dir_node)
287
+
288
+ # Add file nodes to graph
289
+ for file_node in file_nodes.values():
290
+ nodes.append(file_node)
291
+
292
+ # Compute semantic relationships for code chunks
293
+ console.print("[cyan]Computing semantic relationships...[/cyan]")
294
+ code_chunks = [c for c in chunks if c.chunk_type in ["function", "method", "class"]]
295
+ semantic_links = []
296
+
297
+ # Pre-compute top 5 semantic relationships for each code chunk
298
+ for i, chunk in enumerate(code_chunks):
299
+ if i % 20 == 0: # Progress indicator every 20 chunks
300
+ console.print(f"[dim]Processed {i}/{len(code_chunks)} chunks[/dim]")
301
+
302
+ try:
303
+ # Search for similar chunks using the chunk's content
304
+ similar_results = await database.search(
305
+ query=chunk.content[:500], # Use first 500 chars for query
306
+ limit=6, # Get 6 (exclude self = 5)
307
+ similarity_threshold=0.3, # Lower threshold to catch more relationships
308
+ )
309
+
310
+ # Filter out self and create semantic links
311
+ for result in similar_results:
312
+ # Construct target chunk_id from file_path and line numbers
313
+ target_chunk = next(
314
+ (
315
+ c
316
+ for c in chunks
317
+ if str(c.file_path) == str(result.file_path)
318
+ and c.start_line == result.start_line
319
+ and c.end_line == result.end_line
320
+ ),
321
+ None,
322
+ )
323
+
324
+ if not target_chunk:
325
+ continue
326
+
327
+ target_chunk_id = target_chunk.chunk_id or target_chunk.id
328
+
329
+ # Skip self-references
330
+ if target_chunk_id == (chunk.chunk_id or chunk.id):
331
+ continue
332
+
333
+ # Add semantic link with similarity score
334
+ if result.similarity_score >= 0.2:
335
+ semantic_links.append(
336
+ {
337
+ "source": chunk.chunk_id or chunk.id,
338
+ "target": target_chunk_id,
339
+ "type": "semantic",
340
+ "similarity": result.similarity_score,
341
+ }
342
+ )
343
+
344
+ # Only keep top 5
345
+ if (
346
+ len(
347
+ [
348
+ link
349
+ for link in semantic_links
350
+ if link["source"] == (chunk.chunk_id or chunk.id)
351
+ ]
352
+ )
353
+ >= 5
354
+ ):
355
+ break
356
+
357
+ except Exception as e:
358
+ logger.debug(
359
+ f"Failed to compute semantic relationships for {chunk.chunk_id}: {e}"
360
+ )
361
+ continue
362
+
363
+ console.print(
364
+ f"[green]✓[/green] Computed {len(semantic_links)} semantic relationships"
365
+ )
366
+
367
+ def extract_function_calls(code: str) -> set[str]:
368
+ """Extract actual function calls from Python code using AST.
369
+
370
+ Returns set of function names that are actually called (not just mentioned).
371
+ Avoids false positives from comments, docstrings, and string literals.
372
+
373
+ Args:
374
+ code: Python source code to analyze
375
+
376
+ Returns:
377
+ Set of function names that are actually called in the code
378
+ """
379
+ import ast
380
+
381
+ calls = set()
382
+ try:
383
+ tree = ast.parse(code)
384
+ for node in ast.walk(tree):
385
+ if isinstance(node, ast.Call):
386
+ # Handle direct calls: foo()
387
+ if isinstance(node.func, ast.Name):
388
+ calls.add(node.func.id)
389
+ # Handle method calls: obj.foo() - extract 'foo'
390
+ elif isinstance(node.func, ast.Attribute):
391
+ calls.add(node.func.attr)
392
+ return calls
393
+ except SyntaxError:
394
+ # If code can't be parsed (incomplete, etc.), fall back to empty set
395
+ # This is safer than false positives from naive substring matching
396
+ return set()
397
+
398
+ # Compute external caller relationships
399
+ console.print("[cyan]Computing external caller relationships...[/cyan]")
400
+ import time
401
+
402
+ start_time = time.time()
403
+ caller_map = {} # Map chunk_id -> list of caller info
404
+
405
+ logger.info(f"Processing {len(code_chunks)} code chunks for external callers...")
406
+ for chunk_idx, chunk in enumerate(code_chunks):
407
+ if chunk_idx % 50 == 0: # Progress every 50 chunks
408
+ elapsed = time.time() - start_time
409
+ logger.info(
410
+ f"Progress: {chunk_idx}/{len(code_chunks)} chunks ({elapsed:.1f}s elapsed)"
411
+ )
412
+ console.print(
413
+ f"[dim]Progress: {chunk_idx}/{len(code_chunks)} chunks ({elapsed:.1f}s)[/dim]"
414
+ )
415
+ chunk_id = chunk.chunk_id or chunk.id
416
+ file_path = str(chunk.file_path)
417
+ function_name = chunk.function_name or chunk.class_name
418
+
419
+ if not function_name:
420
+ continue
421
+
422
+ # Search for other chunks that reference this function/class name
423
+ other_chunks_count = 0
424
+ for other_chunk in chunks:
425
+ other_chunks_count += 1
426
+ if chunk_idx % 50 == 0 and other_chunks_count % 500 == 0: # Inner progress
427
+ logger.debug(
428
+ f" Chunk {chunk_idx}: Scanning {other_chunks_count}/{len(chunks)} chunks"
429
+ )
430
+ other_file_path = str(other_chunk.file_path)
431
+
432
+ # Only track EXTERNAL callers (different file)
433
+ if other_file_path == file_path:
434
+ continue
435
+
436
+ # Extract actual function calls using AST (avoids false positives)
437
+ actual_calls = extract_function_calls(other_chunk.content)
438
+
439
+ # Check if this function is actually called (not just mentioned in comments)
440
+ if function_name in actual_calls:
441
+ other_chunk_id = other_chunk.chunk_id or other_chunk.id
442
+ other_name = (
443
+ other_chunk.function_name
444
+ or other_chunk.class_name
445
+ or f"L{other_chunk.start_line}"
446
+ )
447
+
448
+ # Skip __init__ functions as callers - they are noise in "called by" lists
449
+ # (every class calls __init__ when constructing objects)
450
+ if other_name == "__init__":
451
+ continue
452
+
453
+ if chunk_id not in caller_map:
454
+ caller_map[chunk_id] = []
455
+
456
+ # Store caller information
457
+ caller_map[chunk_id].append(
458
+ {
459
+ "file": other_file_path,
460
+ "chunk_id": other_chunk_id,
461
+ "name": other_name,
462
+ "type": other_chunk.chunk_type,
463
+ }
464
+ )
465
+
466
+ logger.debug(
467
+ f"Found actual call: {other_name} ({other_file_path}) -> "
468
+ f"{function_name} ({file_path})"
469
+ )
470
+
471
+ # Count total caller relationships
472
+ total_callers = sum(len(callers) for callers in caller_map.values())
473
+ elapsed_total = time.time() - start_time
474
+ logger.info(f"Completed external caller computation in {elapsed_total:.1f}s")
475
+ console.print(
476
+ f"[green]✓[/green] Found {total_callers} external caller relationships ({elapsed_total:.1f}s)"
477
+ )
478
+
479
+ # Detect circular dependencies in caller relationships
480
+ console.print("[cyan]Detecting circular dependencies...[/cyan]")
481
+ cycles = detect_cycles(chunks, caller_map)
482
+
483
+ # Mark cycle links
484
+ cycle_links = []
485
+ if cycles:
486
+ console.print(f"[yellow]⚠ Found {len(cycles)} circular dependencies[/yellow]")
487
+
488
+ # For each cycle, create links marking the cycle
489
+ for cycle in cycles:
490
+ # Create links for the cycle path: A → B → C → A
491
+ for i in range(len(cycle)):
492
+ source = cycle[i]
493
+ target = cycle[(i + 1) % len(cycle)] # Wrap around to form cycle
494
+ cycle_links.append(
495
+ {
496
+ "source": source,
497
+ "target": target,
498
+ "type": "caller",
499
+ "is_cycle": True,
500
+ }
501
+ )
502
+ else:
503
+ console.print("[green]✓[/green] No circular dependencies detected")
504
+
505
+ # Add chunk nodes
506
+ for chunk in chunks:
507
+ chunk_id = chunk.chunk_id or chunk.id
508
+ node = {
509
+ "id": chunk_id,
510
+ "name": chunk.function_name or chunk.class_name or f"L{chunk.start_line}",
511
+ "type": chunk.chunk_type,
512
+ "file_path": str(chunk.file_path),
513
+ "start_line": chunk.start_line,
514
+ "end_line": chunk.end_line,
515
+ "complexity": chunk.complexity_score,
516
+ "parent_id": chunk.parent_chunk_id,
517
+ "depth": chunk.chunk_depth,
518
+ "content": chunk.content, # Add content for code viewer
519
+ "docstring": chunk.docstring,
520
+ "language": chunk.language,
521
+ }
522
+
523
+ # Add caller information if available
524
+ if chunk_id in caller_map:
525
+ node["callers"] = caller_map[chunk_id]
526
+
527
+ # Add subproject info for monorepos
528
+ if chunk.subproject_name:
529
+ node["subproject"] = chunk.subproject_name
530
+ node["color"] = subprojects[chunk.subproject_name]["color"]
531
+
532
+ nodes.append(node)
533
+ chunk_id_map[node["id"]] = len(nodes) - 1
534
+
535
+ # Link directories to their parent directories (hierarchical structure)
536
+ for dir_path_str, dir_info in dir_index.directories.items():
537
+ if dir_info.parent_path:
538
+ parent_path_str = str(dir_info.parent_path)
539
+ if parent_path_str in dir_nodes:
540
+ parent_dir_id = f"dir_{hash(parent_path_str) & 0xFFFFFFFF:08x}"
541
+ child_dir_id = f"dir_{hash(dir_path_str) & 0xFFFFFFFF:08x}"
542
+ links.append(
543
+ {
544
+ "source": parent_dir_id,
545
+ "target": child_dir_id,
546
+ "type": "dir_hierarchy",
547
+ }
548
+ )
549
+
550
+ # Link directories to subprojects in monorepos (simple flat structure)
551
+ if subprojects:
552
+ for dir_path_str, dir_node in dir_nodes.items():
553
+ for sp_name, sp_data in subprojects.items():
554
+ if dir_path_str.startswith(sp_data.get("path", "")):
555
+ links.append(
556
+ {
557
+ "source": f"subproject_{sp_name}",
558
+ "target": dir_node["id"],
559
+ "type": "dir_containment",
560
+ }
561
+ )
562
+ break
563
+
564
+ # Link files to their parent directories
565
+ for _file_path_str, file_node in file_nodes.items():
566
+ if file_node.get("parent_dir_id"):
567
+ links.append(
568
+ {
569
+ "source": file_node["parent_dir_id"],
570
+ "target": file_node["id"],
571
+ "type": "dir_containment",
572
+ }
573
+ )
574
+
575
+ # Build hierarchical links from parent-child relationships
576
+ for chunk in chunks:
577
+ chunk_id = chunk.chunk_id or chunk.id
578
+ file_path = str(chunk.file_path)
579
+
580
+ # Link chunk to its file node if it has no parent (top-level chunks)
581
+ if not chunk.parent_chunk_id and file_path in file_nodes:
582
+ links.append(
583
+ {
584
+ "source": file_nodes[file_path]["id"],
585
+ "target": chunk_id,
586
+ "type": "file_containment",
587
+ }
588
+ )
589
+
590
+ # Link to subproject root if in monorepo
591
+ if chunk.subproject_name and not chunk.parent_chunk_id:
592
+ links.append(
593
+ {
594
+ "source": f"subproject_{chunk.subproject_name}",
595
+ "target": chunk_id,
596
+ }
597
+ )
598
+
599
+ # Link to parent chunk
600
+ if chunk.parent_chunk_id and chunk.parent_chunk_id in chunk_id_map:
601
+ links.append(
602
+ {
603
+ "source": chunk.parent_chunk_id,
604
+ "target": chunk_id,
605
+ }
606
+ )
607
+
608
+ # Add semantic relationship links
609
+ links.extend(semantic_links)
610
+
611
+ # Add cycle links
612
+ links.extend(cycle_links)
613
+
614
+ # Parse inter-project dependencies for monorepos
615
+ if subprojects:
616
+ console.print("[cyan]Parsing inter-project dependencies...[/cyan]")
617
+ dep_links = parse_project_dependencies(
618
+ project_manager.project_root, subprojects
619
+ )
620
+ links.extend(dep_links)
621
+ if dep_links:
622
+ console.print(
623
+ f"[green]✓[/green] Found {len(dep_links)} inter-project dependencies"
624
+ )
625
+
626
+ # Get stats
627
+ stats = await database.get_stats()
628
+
629
+ # Build final graph data
630
+ graph_data = {
631
+ "nodes": nodes,
632
+ "links": links,
633
+ "metadata": {
634
+ "total_chunks": len(chunks),
635
+ "total_files": stats.total_files,
636
+ "languages": stats.languages,
637
+ "is_monorepo": len(subprojects) > 0,
638
+ "subprojects": list(subprojects.keys()) if subprojects else [],
639
+ },
640
+ }
641
+
642
+ return graph_data
643
+
644
+
645
+ def apply_state(graph_data: dict, state: VisualizationState) -> dict:
646
+ """Apply visualization state to graph data.
647
+
648
+ Filters nodes and edges based on current visualization state,
649
+ including visibility and AST-only edge filtering.
650
+
651
+ Args:
652
+ graph_data: Full graph data dictionary (nodes, links, metadata)
653
+ state: Current visualization state
654
+
655
+ Returns:
656
+ Filtered graph data with only visible nodes and edges
657
+
658
+ Example:
659
+ >>> state = VisualizationState()
660
+ >>> state.expand_node("dir1", "directory", ["file1", "file2"])
661
+ >>> filtered = apply_state(graph_data, state)
662
+ >>> len(filtered["nodes"]) < len(graph_data["nodes"])
663
+ True
664
+ """
665
+ # Get visible node IDs from state
666
+ visible_node_ids = set(state.get_visible_nodes())
667
+
668
+ # Filter nodes
669
+ filtered_nodes = [
670
+ node for node in graph_data["nodes"] if node["id"] in visible_node_ids
671
+ ]
672
+
673
+ # Build node ID to node data map for quick lookup
674
+ node_map = {node["id"]: node for node in graph_data["nodes"]}
675
+
676
+ # Get visible edges from state (AST calls only in FILE_DETAIL mode)
677
+ expanded_file_id = None
678
+ if state.view_mode.value == "file_detail" and state.expansion_path:
679
+ # Find the file node in expansion path
680
+ for node_id in reversed(state.expansion_path):
681
+ node = node_map.get(node_id)
682
+ if node and node.get("type") == "file":
683
+ expanded_file_id = node_id
684
+ break
685
+
686
+ visible_edge_ids = state.get_visible_edges(
687
+ graph_data["links"], expanded_file_id=expanded_file_id
688
+ )
689
+
690
+ # Filter links to only visible edges
691
+ filtered_links = []
692
+ for link in graph_data["links"]:
693
+ source_id = link.get("source")
694
+ target_id = link.get("target")
695
+
696
+ # Skip if either node not visible
697
+ if source_id not in visible_node_ids or target_id not in visible_node_ids:
698
+ continue
699
+
700
+ # In FILE_DETAIL mode, only show edges in visible_edge_ids
701
+ if state.view_mode.value == "file_detail":
702
+ if (source_id, target_id) in visible_edge_ids:
703
+ filtered_links.append(link)
704
+ elif state.view_mode.value in ("tree_root", "tree_expanded"):
705
+ # In tree modes, show containment edges only
706
+ if link.get("type") in ("dir_containment", "dir_hierarchy"):
707
+ filtered_links.append(link)
708
+
709
+ return {
710
+ "nodes": filtered_nodes,
711
+ "links": filtered_links,
712
+ "metadata": graph_data.get("metadata", {}),
713
+ "state": state.to_dict(), # Include serialized state
714
+ }