mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_vector_search/__init__.py +3 -3
- mcp_vector_search/analysis/__init__.py +111 -0
- mcp_vector_search/analysis/baseline/__init__.py +68 -0
- mcp_vector_search/analysis/baseline/comparator.py +462 -0
- mcp_vector_search/analysis/baseline/manager.py +621 -0
- mcp_vector_search/analysis/collectors/__init__.py +74 -0
- mcp_vector_search/analysis/collectors/base.py +164 -0
- mcp_vector_search/analysis/collectors/cohesion.py +463 -0
- mcp_vector_search/analysis/collectors/complexity.py +743 -0
- mcp_vector_search/analysis/collectors/coupling.py +1162 -0
- mcp_vector_search/analysis/collectors/halstead.py +514 -0
- mcp_vector_search/analysis/collectors/smells.py +325 -0
- mcp_vector_search/analysis/debt.py +516 -0
- mcp_vector_search/analysis/interpretation.py +685 -0
- mcp_vector_search/analysis/metrics.py +414 -0
- mcp_vector_search/analysis/reporters/__init__.py +7 -0
- mcp_vector_search/analysis/reporters/console.py +646 -0
- mcp_vector_search/analysis/reporters/markdown.py +480 -0
- mcp_vector_search/analysis/reporters/sarif.py +377 -0
- mcp_vector_search/analysis/storage/__init__.py +93 -0
- mcp_vector_search/analysis/storage/metrics_store.py +762 -0
- mcp_vector_search/analysis/storage/schema.py +245 -0
- mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
- mcp_vector_search/analysis/trends.py +308 -0
- mcp_vector_search/analysis/visualizer/__init__.py +90 -0
- mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
- mcp_vector_search/analysis/visualizer/exporter.py +484 -0
- mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
- mcp_vector_search/analysis/visualizer/schemas.py +525 -0
- mcp_vector_search/cli/commands/analyze.py +1062 -0
- mcp_vector_search/cli/commands/chat.py +1455 -0
- mcp_vector_search/cli/commands/index.py +621 -5
- mcp_vector_search/cli/commands/index_background.py +467 -0
- mcp_vector_search/cli/commands/init.py +13 -0
- mcp_vector_search/cli/commands/install.py +597 -335
- mcp_vector_search/cli/commands/install_old.py +8 -4
- mcp_vector_search/cli/commands/mcp.py +78 -6
- mcp_vector_search/cli/commands/reset.py +68 -26
- mcp_vector_search/cli/commands/search.py +224 -8
- mcp_vector_search/cli/commands/setup.py +1184 -0
- mcp_vector_search/cli/commands/status.py +339 -5
- mcp_vector_search/cli/commands/uninstall.py +276 -357
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +292 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +600 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
- mcp_vector_search/cli/didyoumean.py +27 -2
- mcp_vector_search/cli/main.py +127 -160
- mcp_vector_search/cli/output.py +158 -13
- mcp_vector_search/config/__init__.py +4 -0
- mcp_vector_search/config/default_thresholds.yaml +52 -0
- mcp_vector_search/config/settings.py +12 -0
- mcp_vector_search/config/thresholds.py +273 -0
- mcp_vector_search/core/__init__.py +16 -0
- mcp_vector_search/core/auto_indexer.py +3 -3
- mcp_vector_search/core/boilerplate.py +186 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/database.py +406 -94
- mcp_vector_search/core/embeddings.py +24 -0
- mcp_vector_search/core/exceptions.py +11 -0
- mcp_vector_search/core/git.py +380 -0
- mcp_vector_search/core/git_hooks.py +4 -4
- mcp_vector_search/core/indexer.py +632 -54
- mcp_vector_search/core/llm_client.py +756 -0
- mcp_vector_search/core/models.py +91 -1
- mcp_vector_search/core/project.py +17 -0
- mcp_vector_search/core/relationships.py +473 -0
- mcp_vector_search/core/scheduler.py +11 -11
- mcp_vector_search/core/search.py +179 -29
- mcp_vector_search/mcp/server.py +819 -9
- mcp_vector_search/parsers/python.py +285 -5
- mcp_vector_search/utils/__init__.py +2 -0
- mcp_vector_search/utils/gitignore.py +0 -3
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +66 -4
- mcp_vector_search/utils/timing.py +10 -6
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
- mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
- mcp_vector_search/cli/commands/visualize.py +0 -1467
- mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,647 @@
|
|
|
1
|
+
"""Graph data construction logic for code visualization.
|
|
2
|
+
|
|
3
|
+
This module handles building the graph data structure from code chunks,
|
|
4
|
+
including nodes, links, semantic relationships, and cycle detection.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from loguru import logger
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
|
|
14
|
+
from ....analysis.trends import TrendTracker
|
|
15
|
+
from ....core.database import ChromaVectorDatabase
|
|
16
|
+
from ....core.directory_index import DirectoryIndex
|
|
17
|
+
from ....core.project import ProjectManager
|
|
18
|
+
from .state_manager import VisualizationState
|
|
19
|
+
|
|
20
|
+
console = Console()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def extract_chunk_name(content: str, fallback: str = "chunk") -> str:
|
|
24
|
+
"""Extract first meaningful word from chunk content for labeling.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
content: The chunk's code content
|
|
28
|
+
fallback: Fallback name if no meaningful word found
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
First meaningful identifier found in the content
|
|
32
|
+
|
|
33
|
+
Examples:
|
|
34
|
+
>>> extract_chunk_name("def calculate_total(...)")
|
|
35
|
+
'calculate_total'
|
|
36
|
+
>>> extract_chunk_name("class UserManager:")
|
|
37
|
+
'UserManager'
|
|
38
|
+
>>> extract_chunk_name("# Comment about users")
|
|
39
|
+
'users'
|
|
40
|
+
>>> extract_chunk_name("import pandas as pd")
|
|
41
|
+
'pandas'
|
|
42
|
+
"""
|
|
43
|
+
import re
|
|
44
|
+
|
|
45
|
+
# Skip common keywords that aren't meaningful as chunk labels
|
|
46
|
+
skip_words = {
|
|
47
|
+
"def",
|
|
48
|
+
"class",
|
|
49
|
+
"function",
|
|
50
|
+
"const",
|
|
51
|
+
"let",
|
|
52
|
+
"var",
|
|
53
|
+
"import",
|
|
54
|
+
"from",
|
|
55
|
+
"return",
|
|
56
|
+
"if",
|
|
57
|
+
"else",
|
|
58
|
+
"elif",
|
|
59
|
+
"for",
|
|
60
|
+
"while",
|
|
61
|
+
"try",
|
|
62
|
+
"except",
|
|
63
|
+
"finally",
|
|
64
|
+
"with",
|
|
65
|
+
"as",
|
|
66
|
+
"async",
|
|
67
|
+
"await",
|
|
68
|
+
"yield",
|
|
69
|
+
"self",
|
|
70
|
+
"this",
|
|
71
|
+
"true",
|
|
72
|
+
"false",
|
|
73
|
+
"none",
|
|
74
|
+
"null",
|
|
75
|
+
"undefined",
|
|
76
|
+
"public",
|
|
77
|
+
"private",
|
|
78
|
+
"protected",
|
|
79
|
+
"static",
|
|
80
|
+
"export",
|
|
81
|
+
"default",
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# Find all words (alphanumeric + underscore, at least 2 chars)
|
|
85
|
+
words = re.findall(r"\b[a-zA-Z_][a-zA-Z0-9_]+\b", content)
|
|
86
|
+
|
|
87
|
+
for word in words:
|
|
88
|
+
if word.lower() not in skip_words:
|
|
89
|
+
return word
|
|
90
|
+
|
|
91
|
+
return fallback
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_subproject_color(subproject_name: str, index: int) -> str:
|
|
95
|
+
"""Get a consistent color for a subproject.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
subproject_name: Name of the subproject
|
|
99
|
+
index: Index of the subproject in the list
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Hex color code
|
|
103
|
+
"""
|
|
104
|
+
# Color palette for subprojects (GitHub-style colors)
|
|
105
|
+
colors = [
|
|
106
|
+
"#238636", # Green
|
|
107
|
+
"#1f6feb", # Blue
|
|
108
|
+
"#d29922", # Yellow
|
|
109
|
+
"#8957e5", # Purple
|
|
110
|
+
"#da3633", # Red
|
|
111
|
+
"#bf8700", # Orange
|
|
112
|
+
"#1a7f37", # Dark green
|
|
113
|
+
"#0969da", # Dark blue
|
|
114
|
+
]
|
|
115
|
+
return colors[index % len(colors)]
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def parse_project_dependencies(project_root: Path, subprojects: dict) -> list[dict]:
|
|
119
|
+
"""Parse package.json files to find inter-project dependencies.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
project_root: Root directory of the monorepo
|
|
123
|
+
subprojects: Dictionary of subproject information
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
List of dependency links between subprojects
|
|
127
|
+
"""
|
|
128
|
+
dependency_links = []
|
|
129
|
+
|
|
130
|
+
for sp_name, sp_data in subprojects.items():
|
|
131
|
+
package_json = project_root / sp_data["path"] / "package.json"
|
|
132
|
+
|
|
133
|
+
if not package_json.exists():
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
with open(package_json) as f:
|
|
138
|
+
package_data = json.load(f)
|
|
139
|
+
|
|
140
|
+
# Check all dependency types
|
|
141
|
+
all_deps = {}
|
|
142
|
+
for dep_type in ["dependencies", "devDependencies", "peerDependencies"]:
|
|
143
|
+
if dep_type in package_data:
|
|
144
|
+
all_deps.update(package_data[dep_type])
|
|
145
|
+
|
|
146
|
+
# Find dependencies on other subprojects
|
|
147
|
+
for dep_name in all_deps.keys():
|
|
148
|
+
# Check if this dependency is another subproject
|
|
149
|
+
for other_sp_name in subprojects.keys():
|
|
150
|
+
if other_sp_name != sp_name and dep_name == other_sp_name:
|
|
151
|
+
# Found inter-project dependency
|
|
152
|
+
dependency_links.append(
|
|
153
|
+
{
|
|
154
|
+
"source": f"subproject_{sp_name}",
|
|
155
|
+
"target": f"subproject_{other_sp_name}",
|
|
156
|
+
"type": "dependency",
|
|
157
|
+
}
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.debug(f"Failed to parse {package_json}: {e}")
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
return dependency_links
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def detect_cycles(chunks: list, caller_map: dict) -> list[list[str]]:
|
|
168
|
+
"""Detect TRUE cycles in the call graph using DFS with three-color marking.
|
|
169
|
+
|
|
170
|
+
Uses three-color marking to distinguish between:
|
|
171
|
+
- WHITE (0): Unvisited node, not yet explored
|
|
172
|
+
- GRAY (1): Currently exploring, node is in the current DFS path
|
|
173
|
+
- BLACK (2): Fully explored, all descendants processed
|
|
174
|
+
|
|
175
|
+
A cycle exists when we encounter a GRAY node during traversal, which means
|
|
176
|
+
we've found a back edge to a node currently in the exploration path.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
chunks: List of code chunks
|
|
180
|
+
caller_map: Map of chunk_id to list of caller info
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
List of cycles found, where each cycle is a list of node IDs in the cycle path
|
|
184
|
+
"""
|
|
185
|
+
cycles_found = []
|
|
186
|
+
# Three-color constants for DFS cycle detection
|
|
187
|
+
white, gray, black = 0, 1, 2 # noqa: N806
|
|
188
|
+
color = {chunk.chunk_id or chunk.id: white for chunk in chunks}
|
|
189
|
+
|
|
190
|
+
def dfs(node_id: str, path: list) -> None:
|
|
191
|
+
"""DFS with three-color marking for accurate cycle detection.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
node_id: Current node ID being visited
|
|
195
|
+
path: List of node IDs in current path (for cycle reconstruction)
|
|
196
|
+
"""
|
|
197
|
+
if color.get(node_id, white) == black:
|
|
198
|
+
# Already fully explored, no cycle here
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
if color.get(node_id, white) == gray:
|
|
202
|
+
# Found a TRUE cycle! Node is in current path
|
|
203
|
+
try:
|
|
204
|
+
cycle_start = path.index(node_id)
|
|
205
|
+
cycle_nodes = path[cycle_start:] + [node_id] # Include back edge
|
|
206
|
+
# Only record if cycle length > 1 (avoid self-loops unless intentional)
|
|
207
|
+
if len(set(cycle_nodes)) > 1:
|
|
208
|
+
cycles_found.append(cycle_nodes)
|
|
209
|
+
except ValueError:
|
|
210
|
+
pass # Node not in path (shouldn't happen)
|
|
211
|
+
return
|
|
212
|
+
|
|
213
|
+
# Mark as currently exploring
|
|
214
|
+
color[node_id] = gray
|
|
215
|
+
path.append(node_id)
|
|
216
|
+
|
|
217
|
+
# Follow outgoing edges (external_callers → caller_id)
|
|
218
|
+
if node_id in caller_map:
|
|
219
|
+
for caller_info in caller_map[node_id]:
|
|
220
|
+
caller_id = caller_info["chunk_id"]
|
|
221
|
+
dfs(caller_id, path[:]) # Pass copy of path
|
|
222
|
+
|
|
223
|
+
# Mark as fully explored
|
|
224
|
+
path.pop()
|
|
225
|
+
color[node_id] = black
|
|
226
|
+
|
|
227
|
+
# Run DFS from each unvisited node
|
|
228
|
+
for chunk in chunks:
|
|
229
|
+
chunk_id = chunk.chunk_id or chunk.id
|
|
230
|
+
if color.get(chunk_id, white) == white:
|
|
231
|
+
dfs(chunk_id, [])
|
|
232
|
+
|
|
233
|
+
return cycles_found
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
async def build_graph_data(
|
|
237
|
+
chunks: list,
|
|
238
|
+
database: ChromaVectorDatabase,
|
|
239
|
+
project_manager: ProjectManager,
|
|
240
|
+
code_only: bool = False,
|
|
241
|
+
) -> dict[str, Any]:
|
|
242
|
+
"""Build complete graph data structure from chunks.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
chunks: List of code chunks from the database
|
|
246
|
+
database: Vector database instance (for semantic search)
|
|
247
|
+
project_manager: Project manager instance
|
|
248
|
+
code_only: If True, exclude documentation chunks
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Dictionary containing nodes, links, and metadata
|
|
252
|
+
"""
|
|
253
|
+
# Collect subprojects for monorepo support
|
|
254
|
+
subprojects = {}
|
|
255
|
+
for chunk in chunks:
|
|
256
|
+
if chunk.subproject_name and chunk.subproject_name not in subprojects:
|
|
257
|
+
subprojects[chunk.subproject_name] = {
|
|
258
|
+
"name": chunk.subproject_name,
|
|
259
|
+
"path": chunk.subproject_path,
|
|
260
|
+
"color": get_subproject_color(chunk.subproject_name, len(subprojects)),
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
# Build graph data structure
|
|
264
|
+
nodes = []
|
|
265
|
+
links = []
|
|
266
|
+
chunk_id_map = {} # Map chunk IDs to array indices
|
|
267
|
+
file_nodes = {} # Track file nodes by path
|
|
268
|
+
dir_nodes = {} # Track directory nodes by path
|
|
269
|
+
|
|
270
|
+
# Add subproject root nodes for monorepos
|
|
271
|
+
if subprojects:
|
|
272
|
+
console.print(
|
|
273
|
+
f"[cyan]Detected monorepo with {len(subprojects)} subprojects[/cyan]"
|
|
274
|
+
)
|
|
275
|
+
for sp_name, sp_data in subprojects.items():
|
|
276
|
+
node = {
|
|
277
|
+
"id": f"subproject_{sp_name}",
|
|
278
|
+
"name": sp_name,
|
|
279
|
+
"type": "subproject",
|
|
280
|
+
"file_path": sp_data["path"] or "",
|
|
281
|
+
"start_line": 0,
|
|
282
|
+
"end_line": 0,
|
|
283
|
+
"complexity": 0,
|
|
284
|
+
"color": sp_data["color"],
|
|
285
|
+
"depth": 0,
|
|
286
|
+
}
|
|
287
|
+
nodes.append(node)
|
|
288
|
+
|
|
289
|
+
# Load directory index for enhanced directory metadata
|
|
290
|
+
console.print("[cyan]Loading directory index...[/cyan]")
|
|
291
|
+
dir_index_path = (
|
|
292
|
+
project_manager.project_root / ".mcp-vector-search" / "directory_index.json"
|
|
293
|
+
)
|
|
294
|
+
dir_index = DirectoryIndex(dir_index_path)
|
|
295
|
+
dir_index.load()
|
|
296
|
+
|
|
297
|
+
# Create directory nodes from directory index
|
|
298
|
+
console.print(f"[green]✓[/green] Loaded {len(dir_index.directories)} directories")
|
|
299
|
+
for dir_path_str, directory in dir_index.directories.items():
|
|
300
|
+
dir_id = f"dir_{hash(dir_path_str) & 0xFFFFFFFF:08x}"
|
|
301
|
+
|
|
302
|
+
# Compute parent directory ID (convert Path to string for JSON serialization)
|
|
303
|
+
parent_dir_id = None
|
|
304
|
+
parent_path_str = str(directory.parent_path) if directory.parent_path else None
|
|
305
|
+
if parent_path_str:
|
|
306
|
+
parent_dir_id = f"dir_{hash(parent_path_str) & 0xFFFFFFFF:08x}"
|
|
307
|
+
|
|
308
|
+
dir_nodes[dir_path_str] = {
|
|
309
|
+
"id": dir_id,
|
|
310
|
+
"name": directory.name,
|
|
311
|
+
"type": "directory",
|
|
312
|
+
"file_path": dir_path_str,
|
|
313
|
+
"start_line": 0,
|
|
314
|
+
"end_line": 0,
|
|
315
|
+
"complexity": 0,
|
|
316
|
+
"depth": directory.depth,
|
|
317
|
+
"dir_path": dir_path_str,
|
|
318
|
+
"parent_id": parent_dir_id, # Link to parent directory
|
|
319
|
+
"parent_path": parent_path_str, # String for JSON serialization
|
|
320
|
+
"file_count": directory.file_count,
|
|
321
|
+
"subdirectory_count": directory.subdirectory_count,
|
|
322
|
+
"total_chunks": directory.total_chunks,
|
|
323
|
+
"languages": directory.languages or {},
|
|
324
|
+
"is_package": directory.is_package,
|
|
325
|
+
"last_modified": directory.last_modified,
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
# Create file nodes from chunks
|
|
329
|
+
# First pass: create file node entries
|
|
330
|
+
for chunk in chunks:
|
|
331
|
+
file_path_str = str(chunk.file_path)
|
|
332
|
+
file_path = Path(file_path_str)
|
|
333
|
+
|
|
334
|
+
# Create file node with parent directory reference
|
|
335
|
+
if file_path_str not in file_nodes:
|
|
336
|
+
file_id = f"file_{hash(file_path_str) & 0xFFFFFFFF:08x}"
|
|
337
|
+
|
|
338
|
+
# Convert absolute path to relative path for parent directory lookup
|
|
339
|
+
try:
|
|
340
|
+
relative_file_path = file_path.relative_to(project_manager.project_root)
|
|
341
|
+
parent_dir = relative_file_path.parent
|
|
342
|
+
# Use relative path for parent directory (matches directory_index)
|
|
343
|
+
parent_dir_str = str(parent_dir) if parent_dir != Path(".") else None
|
|
344
|
+
except ValueError:
|
|
345
|
+
# File is outside project root
|
|
346
|
+
parent_dir_str = None
|
|
347
|
+
|
|
348
|
+
# Look up parent directory ID from dir_nodes (must match exactly)
|
|
349
|
+
parent_dir_id = None
|
|
350
|
+
if parent_dir_str and parent_dir_str in dir_nodes:
|
|
351
|
+
parent_dir_id = dir_nodes[parent_dir_str]["id"]
|
|
352
|
+
|
|
353
|
+
file_nodes[file_path_str] = {
|
|
354
|
+
"id": file_id,
|
|
355
|
+
"name": file_path.name,
|
|
356
|
+
"type": "file",
|
|
357
|
+
"file_path": file_path_str,
|
|
358
|
+
"start_line": 0,
|
|
359
|
+
"end_line": 0,
|
|
360
|
+
"complexity": 0,
|
|
361
|
+
"depth": len(file_path.parts) - 1,
|
|
362
|
+
"parent_id": parent_dir_id, # Consistent with directory nodes
|
|
363
|
+
"parent_path": parent_dir_str,
|
|
364
|
+
"chunk_count": 0, # Will be computed below
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
# Second pass: count chunks per file (pre-compute for consistent sizing)
|
|
368
|
+
for chunk in chunks:
|
|
369
|
+
file_path_str = str(chunk.file_path)
|
|
370
|
+
if file_path_str in file_nodes:
|
|
371
|
+
file_nodes[file_path_str]["chunk_count"] += 1
|
|
372
|
+
|
|
373
|
+
# Add directory nodes to graph
|
|
374
|
+
for dir_node in dir_nodes.values():
|
|
375
|
+
nodes.append(dir_node)
|
|
376
|
+
|
|
377
|
+
# Add file nodes to graph
|
|
378
|
+
for file_node in file_nodes.values():
|
|
379
|
+
nodes.append(file_node)
|
|
380
|
+
|
|
381
|
+
# Link directories to their parent directories
|
|
382
|
+
for dir_node in dir_nodes.values():
|
|
383
|
+
if dir_node.get("parent_id"):
|
|
384
|
+
links.append(
|
|
385
|
+
{
|
|
386
|
+
"source": dir_node["parent_id"],
|
|
387
|
+
"target": dir_node["id"],
|
|
388
|
+
"type": "dir_containment",
|
|
389
|
+
}
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Skip ALL relationship computation at startup for instant loading
|
|
393
|
+
# Relationships are lazy-loaded on-demand via /api/relationships/{chunk_id}
|
|
394
|
+
# This avoids the expensive 5+ minute semantic computation
|
|
395
|
+
caller_map: dict = {} # Empty - callers lazy-loaded via API
|
|
396
|
+
console.print(
|
|
397
|
+
"[green]✓[/green] Skipping relationship computation (lazy-loaded on node expand)"
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
# Add chunk nodes
|
|
401
|
+
for chunk in chunks:
|
|
402
|
+
chunk_id = chunk.chunk_id or chunk.id
|
|
403
|
+
|
|
404
|
+
# Generate meaningful chunk name
|
|
405
|
+
chunk_name = chunk.function_name or chunk.class_name
|
|
406
|
+
if not chunk_name:
|
|
407
|
+
# Extract meaningful name from content
|
|
408
|
+
chunk_name = extract_chunk_name(
|
|
409
|
+
chunk.content, fallback=f"chunk_{chunk.start_line}"
|
|
410
|
+
)
|
|
411
|
+
logger.debug(
|
|
412
|
+
f"Generated chunk name '{chunk_name}' for {chunk.chunk_type} at {chunk.file_path}:{chunk.start_line}"
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
# Determine parent_id: use parent_chunk_id if exists, else use file node ID
|
|
416
|
+
file_path_str = str(chunk.file_path)
|
|
417
|
+
parent_id = chunk.parent_chunk_id
|
|
418
|
+
if not parent_id and file_path_str in file_nodes:
|
|
419
|
+
# Top-level chunk: set parent to file node for proper tree structure
|
|
420
|
+
parent_id = file_nodes[file_path_str]["id"]
|
|
421
|
+
|
|
422
|
+
node = {
|
|
423
|
+
"id": chunk_id,
|
|
424
|
+
"name": chunk_name,
|
|
425
|
+
"type": chunk.chunk_type,
|
|
426
|
+
"file_path": file_path_str,
|
|
427
|
+
"start_line": chunk.start_line,
|
|
428
|
+
"end_line": chunk.end_line,
|
|
429
|
+
"complexity": chunk.complexity_score,
|
|
430
|
+
"parent_id": parent_id, # Now properly set for all chunks
|
|
431
|
+
"depth": chunk.chunk_depth,
|
|
432
|
+
"content": chunk.content, # Add content for code viewer
|
|
433
|
+
"docstring": chunk.docstring,
|
|
434
|
+
"language": chunk.language,
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
# Add structural analysis metrics if available
|
|
438
|
+
if (
|
|
439
|
+
hasattr(chunk, "cognitive_complexity")
|
|
440
|
+
and chunk.cognitive_complexity is not None
|
|
441
|
+
):
|
|
442
|
+
node["cognitive_complexity"] = chunk.cognitive_complexity
|
|
443
|
+
if (
|
|
444
|
+
hasattr(chunk, "cyclomatic_complexity")
|
|
445
|
+
and chunk.cyclomatic_complexity is not None
|
|
446
|
+
):
|
|
447
|
+
node["cyclomatic_complexity"] = chunk.cyclomatic_complexity
|
|
448
|
+
if hasattr(chunk, "complexity_grade") and chunk.complexity_grade is not None:
|
|
449
|
+
node["complexity_grade"] = chunk.complexity_grade
|
|
450
|
+
if hasattr(chunk, "code_smells") and chunk.code_smells:
|
|
451
|
+
node["smells"] = chunk.code_smells
|
|
452
|
+
if hasattr(chunk, "smell_count") and chunk.smell_count is not None:
|
|
453
|
+
node["smell_count"] = chunk.smell_count
|
|
454
|
+
if hasattr(chunk, "quality_score") and chunk.quality_score is not None:
|
|
455
|
+
node["quality_score"] = chunk.quality_score
|
|
456
|
+
if hasattr(chunk, "lines_of_code") and chunk.lines_of_code is not None:
|
|
457
|
+
node["lines_of_code"] = chunk.lines_of_code
|
|
458
|
+
|
|
459
|
+
# Add caller information if available
|
|
460
|
+
if chunk_id in caller_map:
|
|
461
|
+
node["callers"] = caller_map[chunk_id]
|
|
462
|
+
|
|
463
|
+
# Add subproject info for monorepos
|
|
464
|
+
if chunk.subproject_name:
|
|
465
|
+
node["subproject"] = chunk.subproject_name
|
|
466
|
+
node["color"] = subprojects[chunk.subproject_name]["color"]
|
|
467
|
+
|
|
468
|
+
nodes.append(node)
|
|
469
|
+
chunk_id_map[node["id"]] = len(nodes) - 1
|
|
470
|
+
|
|
471
|
+
# NOTE: Directory parent→child links already created above via dir_containment
|
|
472
|
+
# (removed duplicate dir_hierarchy link creation that caused duplicate paths)
|
|
473
|
+
|
|
474
|
+
# Link directories to subprojects in monorepos (simple flat structure)
|
|
475
|
+
if subprojects:
|
|
476
|
+
for dir_path_str, dir_node in dir_nodes.items():
|
|
477
|
+
for sp_name, sp_data in subprojects.items():
|
|
478
|
+
if dir_path_str.startswith(sp_data.get("path", "")):
|
|
479
|
+
links.append(
|
|
480
|
+
{
|
|
481
|
+
"source": f"subproject_{sp_name}",
|
|
482
|
+
"target": dir_node["id"],
|
|
483
|
+
"type": "dir_containment",
|
|
484
|
+
}
|
|
485
|
+
)
|
|
486
|
+
break
|
|
487
|
+
|
|
488
|
+
# Link files to their parent directories
|
|
489
|
+
for _file_path_str, file_node in file_nodes.items():
|
|
490
|
+
if file_node.get("parent_id"):
|
|
491
|
+
links.append(
|
|
492
|
+
{
|
|
493
|
+
"source": file_node["parent_id"],
|
|
494
|
+
"target": file_node["id"],
|
|
495
|
+
"type": "dir_containment",
|
|
496
|
+
}
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
# Build hierarchical links from parent-child relationships
|
|
500
|
+
for chunk in chunks:
|
|
501
|
+
chunk_id = chunk.chunk_id or chunk.id
|
|
502
|
+
file_path = str(chunk.file_path)
|
|
503
|
+
|
|
504
|
+
# Link chunk to its file node if it has no parent (top-level chunks)
|
|
505
|
+
if not chunk.parent_chunk_id and file_path in file_nodes:
|
|
506
|
+
links.append(
|
|
507
|
+
{
|
|
508
|
+
"source": file_nodes[file_path]["id"],
|
|
509
|
+
"target": chunk_id,
|
|
510
|
+
"type": "file_containment",
|
|
511
|
+
}
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
# Link to subproject root if in monorepo
|
|
515
|
+
if chunk.subproject_name and not chunk.parent_chunk_id:
|
|
516
|
+
links.append(
|
|
517
|
+
{
|
|
518
|
+
"source": f"subproject_{chunk.subproject_name}",
|
|
519
|
+
"target": chunk_id,
|
|
520
|
+
"type": "subproject_containment",
|
|
521
|
+
}
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
# Link to parent chunk (class -> method hierarchy)
|
|
525
|
+
if chunk.parent_chunk_id and chunk.parent_chunk_id in chunk_id_map:
|
|
526
|
+
links.append(
|
|
527
|
+
{
|
|
528
|
+
"source": chunk.parent_chunk_id,
|
|
529
|
+
"target": chunk_id,
|
|
530
|
+
"type": "chunk_hierarchy", # Explicitly mark chunk parent-child relationships
|
|
531
|
+
}
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
# Semantic and caller relationships are lazy-loaded via /api/relationships/{chunk_id}
|
|
535
|
+
# No relationship links at startup for instant loading
|
|
536
|
+
|
|
537
|
+
# Parse inter-project dependencies for monorepos
|
|
538
|
+
if subprojects:
|
|
539
|
+
console.print("[cyan]Parsing inter-project dependencies...[/cyan]")
|
|
540
|
+
dep_links = parse_project_dependencies(
|
|
541
|
+
project_manager.project_root, subprojects
|
|
542
|
+
)
|
|
543
|
+
links.extend(dep_links)
|
|
544
|
+
if dep_links:
|
|
545
|
+
console.print(
|
|
546
|
+
f"[green]✓[/green] Found {len(dep_links)} inter-project dependencies"
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
# Get stats
|
|
550
|
+
stats = await database.get_stats()
|
|
551
|
+
|
|
552
|
+
# Load trend data for time series visualization
|
|
553
|
+
trend_tracker = TrendTracker(project_manager.project_root)
|
|
554
|
+
trend_summary = trend_tracker.get_trend_summary(days=90) # Last 90 days
|
|
555
|
+
|
|
556
|
+
# Build final graph data
|
|
557
|
+
graph_data = {
|
|
558
|
+
"nodes": nodes,
|
|
559
|
+
"links": links,
|
|
560
|
+
"metadata": {
|
|
561
|
+
"total_chunks": len(chunks),
|
|
562
|
+
"total_files": stats.total_files,
|
|
563
|
+
"languages": stats.languages,
|
|
564
|
+
"is_monorepo": len(subprojects) > 0,
|
|
565
|
+
"subprojects": list(subprojects.keys()) if subprojects else [],
|
|
566
|
+
},
|
|
567
|
+
"trends": trend_summary, # Include trend data for visualization
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
return graph_data
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def apply_state(graph_data: dict, state: VisualizationState) -> dict:
|
|
574
|
+
"""Apply visualization state to graph data.
|
|
575
|
+
|
|
576
|
+
Filters nodes and edges based on current visualization state,
|
|
577
|
+
including visibility and AST-only edge filtering.
|
|
578
|
+
|
|
579
|
+
Args:
|
|
580
|
+
graph_data: Full graph data dictionary (nodes, links, metadata)
|
|
581
|
+
state: Current visualization state
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
Filtered graph data with only visible nodes and edges
|
|
585
|
+
|
|
586
|
+
Example:
|
|
587
|
+
>>> state = VisualizationState()
|
|
588
|
+
>>> state.expand_node("dir1", "directory", ["file1", "file2"])
|
|
589
|
+
>>> filtered = apply_state(graph_data, state)
|
|
590
|
+
>>> len(filtered["nodes"]) < len(graph_data["nodes"])
|
|
591
|
+
True
|
|
592
|
+
"""
|
|
593
|
+
# Get visible node IDs from state
|
|
594
|
+
visible_node_ids = set(state.get_visible_nodes())
|
|
595
|
+
|
|
596
|
+
# Filter nodes
|
|
597
|
+
filtered_nodes = [
|
|
598
|
+
node for node in graph_data["nodes"] if node["id"] in visible_node_ids
|
|
599
|
+
]
|
|
600
|
+
|
|
601
|
+
# Build node ID to node data map for quick lookup
|
|
602
|
+
node_map = {node["id"]: node for node in graph_data["nodes"]}
|
|
603
|
+
|
|
604
|
+
# Get visible edges from state (AST calls only in FILE_DETAIL mode)
|
|
605
|
+
expanded_file_id = None
|
|
606
|
+
if state.view_mode.value == "file_detail" and state.expansion_path:
|
|
607
|
+
# Find the file node in expansion path
|
|
608
|
+
for node_id in reversed(state.expansion_path):
|
|
609
|
+
node = node_map.get(node_id)
|
|
610
|
+
if node and node.get("type") == "file":
|
|
611
|
+
expanded_file_id = node_id
|
|
612
|
+
break
|
|
613
|
+
|
|
614
|
+
visible_edge_ids = state.get_visible_edges(
|
|
615
|
+
graph_data["links"], expanded_file_id=expanded_file_id
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
# Filter links to only visible edges
|
|
619
|
+
filtered_links = []
|
|
620
|
+
for link in graph_data["links"]:
|
|
621
|
+
source_id = link.get("source")
|
|
622
|
+
target_id = link.get("target")
|
|
623
|
+
|
|
624
|
+
# Skip if either node not visible
|
|
625
|
+
if source_id not in visible_node_ids or target_id not in visible_node_ids:
|
|
626
|
+
continue
|
|
627
|
+
|
|
628
|
+
# In FILE_DETAIL mode, only show edges in visible_edge_ids
|
|
629
|
+
if state.view_mode.value == "file_detail":
|
|
630
|
+
if (source_id, target_id) in visible_edge_ids:
|
|
631
|
+
filtered_links.append(link)
|
|
632
|
+
elif state.view_mode.value in ("tree_root", "tree_expanded"):
|
|
633
|
+
# In tree modes, show containment edges only
|
|
634
|
+
# Must include file_containment to link code chunks to their parent files
|
|
635
|
+
if link.get("type") in (
|
|
636
|
+
"dir_containment",
|
|
637
|
+
"dir_hierarchy",
|
|
638
|
+
"file_containment",
|
|
639
|
+
):
|
|
640
|
+
filtered_links.append(link)
|
|
641
|
+
|
|
642
|
+
return {
|
|
643
|
+
"nodes": filtered_nodes,
|
|
644
|
+
"links": filtered_links,
|
|
645
|
+
"metadata": graph_data.get("metadata", {}),
|
|
646
|
+
"state": state.to_dict(), # Include serialized state
|
|
647
|
+
}
|