mcp-vector-search 0.15.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +10 -0
- mcp_vector_search/cli/__init__.py +1 -0
- mcp_vector_search/cli/commands/__init__.py +1 -0
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/chat.py +534 -0
- mcp_vector_search/cli/commands/config.py +393 -0
- mcp_vector_search/cli/commands/demo.py +358 -0
- mcp_vector_search/cli/commands/index.py +762 -0
- mcp_vector_search/cli/commands/init.py +658 -0
- mcp_vector_search/cli/commands/install.py +869 -0
- mcp_vector_search/cli/commands/install_old.py +700 -0
- mcp_vector_search/cli/commands/mcp.py +1254 -0
- mcp_vector_search/cli/commands/reset.py +393 -0
- mcp_vector_search/cli/commands/search.py +796 -0
- mcp_vector_search/cli/commands/setup.py +1133 -0
- mcp_vector_search/cli/commands/status.py +584 -0
- mcp_vector_search/cli/commands/uninstall.py +404 -0
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +265 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +201 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
- mcp_vector_search/cli/commands/visualize.py.original +2536 -0
- mcp_vector_search/cli/commands/watch.py +287 -0
- mcp_vector_search/cli/didyoumean.py +520 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +295 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +484 -0
- mcp_vector_search/cli/output.py +414 -0
- mcp_vector_search/cli/suggestions.py +375 -0
- mcp_vector_search/config/__init__.py +1 -0
- mcp_vector_search/config/constants.py +24 -0
- mcp_vector_search/config/defaults.py +200 -0
- mcp_vector_search/config/settings.py +146 -0
- mcp_vector_search/core/__init__.py +1 -0
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/connection_pool.py +360 -0
- mcp_vector_search/core/database.py +1237 -0
- mcp_vector_search/core/directory_index.py +318 -0
- mcp_vector_search/core/embeddings.py +294 -0
- mcp_vector_search/core/exceptions.py +89 -0
- mcp_vector_search/core/factory.py +318 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +1002 -0
- mcp_vector_search/core/llm_client.py +453 -0
- mcp_vector_search/core/models.py +294 -0
- mcp_vector_search/core/project.py +350 -0
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +952 -0
- mcp_vector_search/core/watcher.py +322 -0
- mcp_vector_search/mcp/__init__.py +5 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +752 -0
- mcp_vector_search/parsers/__init__.py +8 -0
- mcp_vector_search/parsers/base.py +296 -0
- mcp_vector_search/parsers/dart.py +605 -0
- mcp_vector_search/parsers/html.py +413 -0
- mcp_vector_search/parsers/javascript.py +643 -0
- mcp_vector_search/parsers/php.py +694 -0
- mcp_vector_search/parsers/python.py +502 -0
- mcp_vector_search/parsers/registry.py +223 -0
- mcp_vector_search/parsers/ruby.py +678 -0
- mcp_vector_search/parsers/text.py +186 -0
- mcp_vector_search/parsers/utils.py +265 -0
- mcp_vector_search/py.typed +1 -0
- mcp_vector_search/utils/__init__.py +42 -0
- mcp_vector_search/utils/gitignore.py +250 -0
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +339 -0
- mcp_vector_search/utils/timing.py +338 -0
- mcp_vector_search/utils/version.py +47 -0
- mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
- mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
- mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
- mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
- mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,709 @@
|
|
|
1
|
+
"""Graph data construction logic for code visualization.
|
|
2
|
+
|
|
3
|
+
This module handles building the graph data structure from code chunks,
|
|
4
|
+
including nodes, links, semantic relationships, and cycle detection.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from loguru import logger
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
|
|
14
|
+
from ....core.database import ChromaVectorDatabase
|
|
15
|
+
from ....core.directory_index import DirectoryIndex
|
|
16
|
+
from ....core.project import ProjectManager
|
|
17
|
+
from .state_manager import VisualizationState
|
|
18
|
+
|
|
19
|
+
console = Console()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_subproject_color(subproject_name: str, index: int) -> str:
|
|
23
|
+
"""Get a consistent color for a subproject.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
subproject_name: Name of the subproject
|
|
27
|
+
index: Index of the subproject in the list
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Hex color code
|
|
31
|
+
"""
|
|
32
|
+
# Color palette for subprojects (GitHub-style colors)
|
|
33
|
+
colors = [
|
|
34
|
+
"#238636", # Green
|
|
35
|
+
"#1f6feb", # Blue
|
|
36
|
+
"#d29922", # Yellow
|
|
37
|
+
"#8957e5", # Purple
|
|
38
|
+
"#da3633", # Red
|
|
39
|
+
"#bf8700", # Orange
|
|
40
|
+
"#1a7f37", # Dark green
|
|
41
|
+
"#0969da", # Dark blue
|
|
42
|
+
]
|
|
43
|
+
return colors[index % len(colors)]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def parse_project_dependencies(project_root: Path, subprojects: dict) -> list[dict]:
|
|
47
|
+
"""Parse package.json files to find inter-project dependencies.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
project_root: Root directory of the monorepo
|
|
51
|
+
subprojects: Dictionary of subproject information
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
List of dependency links between subprojects
|
|
55
|
+
"""
|
|
56
|
+
dependency_links = []
|
|
57
|
+
|
|
58
|
+
for sp_name, sp_data in subprojects.items():
|
|
59
|
+
package_json = project_root / sp_data["path"] / "package.json"
|
|
60
|
+
|
|
61
|
+
if not package_json.exists():
|
|
62
|
+
continue
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
with open(package_json) as f:
|
|
66
|
+
package_data = json.load(f)
|
|
67
|
+
|
|
68
|
+
# Check all dependency types
|
|
69
|
+
all_deps = {}
|
|
70
|
+
for dep_type in ["dependencies", "devDependencies", "peerDependencies"]:
|
|
71
|
+
if dep_type in package_data:
|
|
72
|
+
all_deps.update(package_data[dep_type])
|
|
73
|
+
|
|
74
|
+
# Find dependencies on other subprojects
|
|
75
|
+
for dep_name in all_deps.keys():
|
|
76
|
+
# Check if this dependency is another subproject
|
|
77
|
+
for other_sp_name in subprojects.keys():
|
|
78
|
+
if other_sp_name != sp_name and dep_name == other_sp_name:
|
|
79
|
+
# Found inter-project dependency
|
|
80
|
+
dependency_links.append(
|
|
81
|
+
{
|
|
82
|
+
"source": f"subproject_{sp_name}",
|
|
83
|
+
"target": f"subproject_{other_sp_name}",
|
|
84
|
+
"type": "dependency",
|
|
85
|
+
}
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.debug(f"Failed to parse {package_json}: {e}")
|
|
90
|
+
continue
|
|
91
|
+
|
|
92
|
+
return dependency_links
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def detect_cycles(chunks: list, caller_map: dict) -> list[list[str]]:
|
|
96
|
+
"""Detect TRUE cycles in the call graph using DFS with three-color marking.
|
|
97
|
+
|
|
98
|
+
Uses three-color marking to distinguish between:
|
|
99
|
+
- WHITE (0): Unvisited node, not yet explored
|
|
100
|
+
- GRAY (1): Currently exploring, node is in the current DFS path
|
|
101
|
+
- BLACK (2): Fully explored, all descendants processed
|
|
102
|
+
|
|
103
|
+
A cycle exists when we encounter a GRAY node during traversal, which means
|
|
104
|
+
we've found a back edge to a node currently in the exploration path.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
chunks: List of code chunks
|
|
108
|
+
caller_map: Map of chunk_id to list of caller info
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
List of cycles found, where each cycle is a list of node IDs in the cycle path
|
|
112
|
+
"""
|
|
113
|
+
cycles_found = []
|
|
114
|
+
# Three-color constants for DFS cycle detection
|
|
115
|
+
white, gray, black = 0, 1, 2 # noqa: N806
|
|
116
|
+
color = {chunk.chunk_id or chunk.id: white for chunk in chunks}
|
|
117
|
+
|
|
118
|
+
def dfs(node_id: str, path: list) -> None:
|
|
119
|
+
"""DFS with three-color marking for accurate cycle detection.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
node_id: Current node ID being visited
|
|
123
|
+
path: List of node IDs in current path (for cycle reconstruction)
|
|
124
|
+
"""
|
|
125
|
+
if color.get(node_id, white) == black:
|
|
126
|
+
# Already fully explored, no cycle here
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
if color.get(node_id, white) == gray:
|
|
130
|
+
# Found a TRUE cycle! Node is in current path
|
|
131
|
+
try:
|
|
132
|
+
cycle_start = path.index(node_id)
|
|
133
|
+
cycle_nodes = path[cycle_start:] + [node_id] # Include back edge
|
|
134
|
+
# Only record if cycle length > 1 (avoid self-loops unless intentional)
|
|
135
|
+
if len(set(cycle_nodes)) > 1:
|
|
136
|
+
cycles_found.append(cycle_nodes)
|
|
137
|
+
except ValueError:
|
|
138
|
+
pass # Node not in path (shouldn't happen)
|
|
139
|
+
return
|
|
140
|
+
|
|
141
|
+
# Mark as currently exploring
|
|
142
|
+
color[node_id] = gray
|
|
143
|
+
path.append(node_id)
|
|
144
|
+
|
|
145
|
+
# Follow outgoing edges (external_callers → caller_id)
|
|
146
|
+
if node_id in caller_map:
|
|
147
|
+
for caller_info in caller_map[node_id]:
|
|
148
|
+
caller_id = caller_info["chunk_id"]
|
|
149
|
+
dfs(caller_id, path[:]) # Pass copy of path
|
|
150
|
+
|
|
151
|
+
# Mark as fully explored
|
|
152
|
+
path.pop()
|
|
153
|
+
color[node_id] = black
|
|
154
|
+
|
|
155
|
+
# Run DFS from each unvisited node
|
|
156
|
+
for chunk in chunks:
|
|
157
|
+
chunk_id = chunk.chunk_id or chunk.id
|
|
158
|
+
if color.get(chunk_id, white) == white:
|
|
159
|
+
dfs(chunk_id, [])
|
|
160
|
+
|
|
161
|
+
return cycles_found
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
async def build_graph_data(
|
|
165
|
+
chunks: list,
|
|
166
|
+
database: ChromaVectorDatabase,
|
|
167
|
+
project_manager: ProjectManager,
|
|
168
|
+
code_only: bool = False,
|
|
169
|
+
) -> dict[str, Any]:
|
|
170
|
+
"""Build complete graph data structure from chunks.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
chunks: List of code chunks from the database
|
|
174
|
+
database: Vector database instance (for semantic search)
|
|
175
|
+
project_manager: Project manager instance
|
|
176
|
+
code_only: If True, exclude documentation chunks
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
Dictionary containing nodes, links, and metadata
|
|
180
|
+
"""
|
|
181
|
+
# Collect subprojects for monorepo support
|
|
182
|
+
subprojects = {}
|
|
183
|
+
for chunk in chunks:
|
|
184
|
+
if chunk.subproject_name and chunk.subproject_name not in subprojects:
|
|
185
|
+
subprojects[chunk.subproject_name] = {
|
|
186
|
+
"name": chunk.subproject_name,
|
|
187
|
+
"path": chunk.subproject_path,
|
|
188
|
+
"color": get_subproject_color(chunk.subproject_name, len(subprojects)),
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
# Build graph data structure
|
|
192
|
+
nodes = []
|
|
193
|
+
links = []
|
|
194
|
+
chunk_id_map = {} # Map chunk IDs to array indices
|
|
195
|
+
file_nodes = {} # Track file nodes by path
|
|
196
|
+
dir_nodes = {} # Track directory nodes by path
|
|
197
|
+
|
|
198
|
+
# Add subproject root nodes for monorepos
|
|
199
|
+
if subprojects:
|
|
200
|
+
console.print(
|
|
201
|
+
f"[cyan]Detected monorepo with {len(subprojects)} subprojects[/cyan]"
|
|
202
|
+
)
|
|
203
|
+
for sp_name, sp_data in subprojects.items():
|
|
204
|
+
node = {
|
|
205
|
+
"id": f"subproject_{sp_name}",
|
|
206
|
+
"name": sp_name,
|
|
207
|
+
"type": "subproject",
|
|
208
|
+
"file_path": sp_data["path"] or "",
|
|
209
|
+
"start_line": 0,
|
|
210
|
+
"end_line": 0,
|
|
211
|
+
"complexity": 0,
|
|
212
|
+
"color": sp_data["color"],
|
|
213
|
+
"depth": 0,
|
|
214
|
+
}
|
|
215
|
+
nodes.append(node)
|
|
216
|
+
|
|
217
|
+
# Load directory index for enhanced directory metadata
|
|
218
|
+
console.print("[cyan]Loading directory index...[/cyan]")
|
|
219
|
+
dir_index_path = (
|
|
220
|
+
project_manager.project_root / ".mcp-vector-search" / "directory_index.json"
|
|
221
|
+
)
|
|
222
|
+
dir_index = DirectoryIndex(dir_index_path)
|
|
223
|
+
dir_index.load()
|
|
224
|
+
|
|
225
|
+
# Create directory nodes from directory index
|
|
226
|
+
console.print(f"[green]✓[/green] Loaded {len(dir_index.directories)} directories")
|
|
227
|
+
for dir_path_str, directory in dir_index.directories.items():
|
|
228
|
+
dir_id = f"dir_{hash(dir_path_str) & 0xFFFFFFFF:08x}"
|
|
229
|
+
dir_nodes[dir_path_str] = {
|
|
230
|
+
"id": dir_id,
|
|
231
|
+
"name": directory.name,
|
|
232
|
+
"type": "directory",
|
|
233
|
+
"file_path": dir_path_str,
|
|
234
|
+
"start_line": 0,
|
|
235
|
+
"end_line": 0,
|
|
236
|
+
"complexity": 0,
|
|
237
|
+
"depth": directory.depth,
|
|
238
|
+
"dir_path": dir_path_str,
|
|
239
|
+
"file_count": directory.file_count,
|
|
240
|
+
"subdirectory_count": directory.subdirectory_count,
|
|
241
|
+
"total_chunks": directory.total_chunks,
|
|
242
|
+
"languages": directory.languages or {},
|
|
243
|
+
"is_package": directory.is_package,
|
|
244
|
+
"last_modified": directory.last_modified,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
# Create file nodes from chunks
|
|
248
|
+
for chunk in chunks:
|
|
249
|
+
file_path_str = str(chunk.file_path)
|
|
250
|
+
file_path = Path(file_path_str)
|
|
251
|
+
|
|
252
|
+
# Create file node with parent directory reference
|
|
253
|
+
if file_path_str not in file_nodes:
|
|
254
|
+
file_id = f"file_{hash(file_path_str) & 0xFFFFFFFF:08x}"
|
|
255
|
+
|
|
256
|
+
# Convert absolute path to relative path for parent directory lookup
|
|
257
|
+
try:
|
|
258
|
+
relative_file_path = file_path.relative_to(project_manager.project_root)
|
|
259
|
+
parent_dir = relative_file_path.parent
|
|
260
|
+
# Use relative path for parent directory (matches directory_index)
|
|
261
|
+
parent_dir_str = str(parent_dir) if parent_dir != Path(".") else None
|
|
262
|
+
except ValueError:
|
|
263
|
+
# File is outside project root
|
|
264
|
+
parent_dir_str = None
|
|
265
|
+
|
|
266
|
+
# Look up parent directory ID from dir_nodes (must match exactly)
|
|
267
|
+
parent_dir_id = None
|
|
268
|
+
if parent_dir_str and parent_dir_str in dir_nodes:
|
|
269
|
+
parent_dir_id = dir_nodes[parent_dir_str]["id"]
|
|
270
|
+
|
|
271
|
+
file_nodes[file_path_str] = {
|
|
272
|
+
"id": file_id,
|
|
273
|
+
"name": file_path.name,
|
|
274
|
+
"type": "file",
|
|
275
|
+
"file_path": file_path_str,
|
|
276
|
+
"start_line": 0,
|
|
277
|
+
"end_line": 0,
|
|
278
|
+
"complexity": 0,
|
|
279
|
+
"depth": len(file_path.parts) - 1,
|
|
280
|
+
"parent_dir_id": parent_dir_id,
|
|
281
|
+
"parent_dir_path": parent_dir_str,
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
# Add directory nodes to graph
|
|
285
|
+
for dir_node in dir_nodes.values():
|
|
286
|
+
nodes.append(dir_node)
|
|
287
|
+
|
|
288
|
+
# Add file nodes to graph
|
|
289
|
+
for file_node in file_nodes.values():
|
|
290
|
+
nodes.append(file_node)
|
|
291
|
+
|
|
292
|
+
# Compute semantic relationships for code chunks
|
|
293
|
+
console.print("[cyan]Computing semantic relationships...[/cyan]")
|
|
294
|
+
code_chunks = [c for c in chunks if c.chunk_type in ["function", "method", "class"]]
|
|
295
|
+
semantic_links = []
|
|
296
|
+
|
|
297
|
+
# Pre-compute top 5 semantic relationships for each code chunk
|
|
298
|
+
for i, chunk in enumerate(code_chunks):
|
|
299
|
+
if i % 20 == 0: # Progress indicator every 20 chunks
|
|
300
|
+
console.print(f"[dim]Processed {i}/{len(code_chunks)} chunks[/dim]")
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
# Search for similar chunks using the chunk's content
|
|
304
|
+
similar_results = await database.search(
|
|
305
|
+
query=chunk.content[:500], # Use first 500 chars for query
|
|
306
|
+
limit=6, # Get 6 (exclude self = 5)
|
|
307
|
+
similarity_threshold=0.3, # Lower threshold to catch more relationships
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Filter out self and create semantic links
|
|
311
|
+
for result in similar_results:
|
|
312
|
+
# Construct target chunk_id from file_path and line numbers
|
|
313
|
+
target_chunk = next(
|
|
314
|
+
(
|
|
315
|
+
c
|
|
316
|
+
for c in chunks
|
|
317
|
+
if str(c.file_path) == str(result.file_path)
|
|
318
|
+
and c.start_line == result.start_line
|
|
319
|
+
and c.end_line == result.end_line
|
|
320
|
+
),
|
|
321
|
+
None,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
if not target_chunk:
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
target_chunk_id = target_chunk.chunk_id or target_chunk.id
|
|
328
|
+
|
|
329
|
+
# Skip self-references
|
|
330
|
+
if target_chunk_id == (chunk.chunk_id or chunk.id):
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
# Add semantic link with similarity score
|
|
334
|
+
if result.similarity_score >= 0.2:
|
|
335
|
+
semantic_links.append(
|
|
336
|
+
{
|
|
337
|
+
"source": chunk.chunk_id or chunk.id,
|
|
338
|
+
"target": target_chunk_id,
|
|
339
|
+
"type": "semantic",
|
|
340
|
+
"similarity": result.similarity_score,
|
|
341
|
+
}
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# Only keep top 5
|
|
345
|
+
if (
|
|
346
|
+
len(
|
|
347
|
+
[
|
|
348
|
+
link
|
|
349
|
+
for link in semantic_links
|
|
350
|
+
if link["source"] == (chunk.chunk_id or chunk.id)
|
|
351
|
+
]
|
|
352
|
+
)
|
|
353
|
+
>= 5
|
|
354
|
+
):
|
|
355
|
+
break
|
|
356
|
+
|
|
357
|
+
except Exception as e:
|
|
358
|
+
logger.debug(
|
|
359
|
+
f"Failed to compute semantic relationships for {chunk.chunk_id}: {e}"
|
|
360
|
+
)
|
|
361
|
+
continue
|
|
362
|
+
|
|
363
|
+
console.print(
|
|
364
|
+
f"[green]✓[/green] Computed {len(semantic_links)} semantic relationships"
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
def extract_function_calls(code: str) -> set[str]:
|
|
368
|
+
"""Extract actual function calls from Python code using AST.
|
|
369
|
+
|
|
370
|
+
Returns set of function names that are actually called (not just mentioned).
|
|
371
|
+
Avoids false positives from comments, docstrings, and string literals.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
code: Python source code to analyze
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
Set of function names that are actually called in the code
|
|
378
|
+
"""
|
|
379
|
+
import ast
|
|
380
|
+
|
|
381
|
+
calls = set()
|
|
382
|
+
try:
|
|
383
|
+
tree = ast.parse(code)
|
|
384
|
+
for node in ast.walk(tree):
|
|
385
|
+
if isinstance(node, ast.Call):
|
|
386
|
+
# Handle direct calls: foo()
|
|
387
|
+
if isinstance(node.func, ast.Name):
|
|
388
|
+
calls.add(node.func.id)
|
|
389
|
+
# Handle method calls: obj.foo() - extract 'foo'
|
|
390
|
+
elif isinstance(node.func, ast.Attribute):
|
|
391
|
+
calls.add(node.func.attr)
|
|
392
|
+
return calls
|
|
393
|
+
except SyntaxError:
|
|
394
|
+
# If code can't be parsed (incomplete, etc.), fall back to empty set
|
|
395
|
+
# This is safer than false positives from naive substring matching
|
|
396
|
+
return set()
|
|
397
|
+
|
|
398
|
+
# Compute external caller relationships
|
|
399
|
+
console.print("[cyan]Computing external caller relationships...[/cyan]")
|
|
400
|
+
import time
|
|
401
|
+
|
|
402
|
+
start_time = time.time()
|
|
403
|
+
caller_map = {} # Map chunk_id -> list of caller info
|
|
404
|
+
|
|
405
|
+
logger.info(f"Processing {len(code_chunks)} code chunks for external callers...")
|
|
406
|
+
for chunk_idx, chunk in enumerate(code_chunks):
|
|
407
|
+
if chunk_idx % 50 == 0: # Progress every 50 chunks
|
|
408
|
+
elapsed = time.time() - start_time
|
|
409
|
+
logger.info(
|
|
410
|
+
f"Progress: {chunk_idx}/{len(code_chunks)} chunks ({elapsed:.1f}s elapsed)"
|
|
411
|
+
)
|
|
412
|
+
console.print(
|
|
413
|
+
f"[dim]Progress: {chunk_idx}/{len(code_chunks)} chunks ({elapsed:.1f}s)[/dim]"
|
|
414
|
+
)
|
|
415
|
+
chunk_id = chunk.chunk_id or chunk.id
|
|
416
|
+
file_path = str(chunk.file_path)
|
|
417
|
+
function_name = chunk.function_name or chunk.class_name
|
|
418
|
+
|
|
419
|
+
if not function_name:
|
|
420
|
+
continue
|
|
421
|
+
|
|
422
|
+
# Search for other chunks that reference this function/class name
|
|
423
|
+
other_chunks_count = 0
|
|
424
|
+
for other_chunk in chunks:
|
|
425
|
+
other_chunks_count += 1
|
|
426
|
+
if chunk_idx % 50 == 0 and other_chunks_count % 500 == 0: # Inner progress
|
|
427
|
+
logger.debug(
|
|
428
|
+
f" Chunk {chunk_idx}: Scanning {other_chunks_count}/{len(chunks)} chunks"
|
|
429
|
+
)
|
|
430
|
+
other_file_path = str(other_chunk.file_path)
|
|
431
|
+
|
|
432
|
+
# Only track EXTERNAL callers (different file)
|
|
433
|
+
if other_file_path == file_path:
|
|
434
|
+
continue
|
|
435
|
+
|
|
436
|
+
# Extract actual function calls using AST (avoids false positives)
|
|
437
|
+
actual_calls = extract_function_calls(other_chunk.content)
|
|
438
|
+
|
|
439
|
+
# Check if this function is actually called (not just mentioned in comments)
|
|
440
|
+
if function_name in actual_calls:
|
|
441
|
+
other_chunk_id = other_chunk.chunk_id or other_chunk.id
|
|
442
|
+
other_name = (
|
|
443
|
+
other_chunk.function_name
|
|
444
|
+
or other_chunk.class_name
|
|
445
|
+
or f"L{other_chunk.start_line}"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
if chunk_id not in caller_map:
|
|
449
|
+
caller_map[chunk_id] = []
|
|
450
|
+
|
|
451
|
+
# Store caller information
|
|
452
|
+
caller_map[chunk_id].append(
|
|
453
|
+
{
|
|
454
|
+
"file": other_file_path,
|
|
455
|
+
"chunk_id": other_chunk_id,
|
|
456
|
+
"name": other_name,
|
|
457
|
+
"type": other_chunk.chunk_type,
|
|
458
|
+
}
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
logger.debug(
|
|
462
|
+
f"Found actual call: {other_name} ({other_file_path}) -> "
|
|
463
|
+
f"{function_name} ({file_path})"
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
# Count total caller relationships
|
|
467
|
+
total_callers = sum(len(callers) for callers in caller_map.values())
|
|
468
|
+
elapsed_total = time.time() - start_time
|
|
469
|
+
logger.info(f"Completed external caller computation in {elapsed_total:.1f}s")
|
|
470
|
+
console.print(
|
|
471
|
+
f"[green]✓[/green] Found {total_callers} external caller relationships ({elapsed_total:.1f}s)"
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
# Detect circular dependencies in caller relationships
|
|
475
|
+
console.print("[cyan]Detecting circular dependencies...[/cyan]")
|
|
476
|
+
cycles = detect_cycles(chunks, caller_map)
|
|
477
|
+
|
|
478
|
+
# Mark cycle links
|
|
479
|
+
cycle_links = []
|
|
480
|
+
if cycles:
|
|
481
|
+
console.print(f"[yellow]⚠ Found {len(cycles)} circular dependencies[/yellow]")
|
|
482
|
+
|
|
483
|
+
# For each cycle, create links marking the cycle
|
|
484
|
+
for cycle in cycles:
|
|
485
|
+
# Create links for the cycle path: A → B → C → A
|
|
486
|
+
for i in range(len(cycle)):
|
|
487
|
+
source = cycle[i]
|
|
488
|
+
target = cycle[(i + 1) % len(cycle)] # Wrap around to form cycle
|
|
489
|
+
cycle_links.append(
|
|
490
|
+
{
|
|
491
|
+
"source": source,
|
|
492
|
+
"target": target,
|
|
493
|
+
"type": "caller",
|
|
494
|
+
"is_cycle": True,
|
|
495
|
+
}
|
|
496
|
+
)
|
|
497
|
+
else:
|
|
498
|
+
console.print("[green]✓[/green] No circular dependencies detected")
|
|
499
|
+
|
|
500
|
+
# Add chunk nodes
|
|
501
|
+
for chunk in chunks:
|
|
502
|
+
chunk_id = chunk.chunk_id or chunk.id
|
|
503
|
+
node = {
|
|
504
|
+
"id": chunk_id,
|
|
505
|
+
"name": chunk.function_name or chunk.class_name or f"L{chunk.start_line}",
|
|
506
|
+
"type": chunk.chunk_type,
|
|
507
|
+
"file_path": str(chunk.file_path),
|
|
508
|
+
"start_line": chunk.start_line,
|
|
509
|
+
"end_line": chunk.end_line,
|
|
510
|
+
"complexity": chunk.complexity_score,
|
|
511
|
+
"parent_id": chunk.parent_chunk_id,
|
|
512
|
+
"depth": chunk.chunk_depth,
|
|
513
|
+
"content": chunk.content, # Add content for code viewer
|
|
514
|
+
"docstring": chunk.docstring,
|
|
515
|
+
"language": chunk.language,
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
# Add caller information if available
|
|
519
|
+
if chunk_id in caller_map:
|
|
520
|
+
node["callers"] = caller_map[chunk_id]
|
|
521
|
+
|
|
522
|
+
# Add subproject info for monorepos
|
|
523
|
+
if chunk.subproject_name:
|
|
524
|
+
node["subproject"] = chunk.subproject_name
|
|
525
|
+
node["color"] = subprojects[chunk.subproject_name]["color"]
|
|
526
|
+
|
|
527
|
+
nodes.append(node)
|
|
528
|
+
chunk_id_map[node["id"]] = len(nodes) - 1
|
|
529
|
+
|
|
530
|
+
# Link directories to their parent directories (hierarchical structure)
|
|
531
|
+
for dir_path_str, dir_info in dir_index.directories.items():
|
|
532
|
+
if dir_info.parent_path:
|
|
533
|
+
parent_path_str = str(dir_info.parent_path)
|
|
534
|
+
if parent_path_str in dir_nodes:
|
|
535
|
+
parent_dir_id = f"dir_{hash(parent_path_str) & 0xFFFFFFFF:08x}"
|
|
536
|
+
child_dir_id = f"dir_{hash(dir_path_str) & 0xFFFFFFFF:08x}"
|
|
537
|
+
links.append(
|
|
538
|
+
{
|
|
539
|
+
"source": parent_dir_id,
|
|
540
|
+
"target": child_dir_id,
|
|
541
|
+
"type": "dir_hierarchy",
|
|
542
|
+
}
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
# Link directories to subprojects in monorepos (simple flat structure)
|
|
546
|
+
if subprojects:
|
|
547
|
+
for dir_path_str, dir_node in dir_nodes.items():
|
|
548
|
+
for sp_name, sp_data in subprojects.items():
|
|
549
|
+
if dir_path_str.startswith(sp_data.get("path", "")):
|
|
550
|
+
links.append(
|
|
551
|
+
{
|
|
552
|
+
"source": f"subproject_{sp_name}",
|
|
553
|
+
"target": dir_node["id"],
|
|
554
|
+
"type": "dir_containment",
|
|
555
|
+
}
|
|
556
|
+
)
|
|
557
|
+
break
|
|
558
|
+
|
|
559
|
+
# Link files to their parent directories
|
|
560
|
+
for _file_path_str, file_node in file_nodes.items():
|
|
561
|
+
if file_node.get("parent_dir_id"):
|
|
562
|
+
links.append(
|
|
563
|
+
{
|
|
564
|
+
"source": file_node["parent_dir_id"],
|
|
565
|
+
"target": file_node["id"],
|
|
566
|
+
"type": "dir_containment",
|
|
567
|
+
}
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
# Build hierarchical links from parent-child relationships
|
|
571
|
+
for chunk in chunks:
|
|
572
|
+
chunk_id = chunk.chunk_id or chunk.id
|
|
573
|
+
file_path = str(chunk.file_path)
|
|
574
|
+
|
|
575
|
+
# Link chunk to its file node if it has no parent (top-level chunks)
|
|
576
|
+
if not chunk.parent_chunk_id and file_path in file_nodes:
|
|
577
|
+
links.append(
|
|
578
|
+
{
|
|
579
|
+
"source": file_nodes[file_path]["id"],
|
|
580
|
+
"target": chunk_id,
|
|
581
|
+
"type": "file_containment",
|
|
582
|
+
}
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
# Link to subproject root if in monorepo
|
|
586
|
+
if chunk.subproject_name and not chunk.parent_chunk_id:
|
|
587
|
+
links.append(
|
|
588
|
+
{
|
|
589
|
+
"source": f"subproject_{chunk.subproject_name}",
|
|
590
|
+
"target": chunk_id,
|
|
591
|
+
}
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
# Link to parent chunk
|
|
595
|
+
if chunk.parent_chunk_id and chunk.parent_chunk_id in chunk_id_map:
|
|
596
|
+
links.append(
|
|
597
|
+
{
|
|
598
|
+
"source": chunk.parent_chunk_id,
|
|
599
|
+
"target": chunk_id,
|
|
600
|
+
}
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
# Add semantic relationship links
|
|
604
|
+
links.extend(semantic_links)
|
|
605
|
+
|
|
606
|
+
# Add cycle links
|
|
607
|
+
links.extend(cycle_links)
|
|
608
|
+
|
|
609
|
+
# Parse inter-project dependencies for monorepos
|
|
610
|
+
if subprojects:
|
|
611
|
+
console.print("[cyan]Parsing inter-project dependencies...[/cyan]")
|
|
612
|
+
dep_links = parse_project_dependencies(
|
|
613
|
+
project_manager.project_root, subprojects
|
|
614
|
+
)
|
|
615
|
+
links.extend(dep_links)
|
|
616
|
+
if dep_links:
|
|
617
|
+
console.print(
|
|
618
|
+
f"[green]✓[/green] Found {len(dep_links)} inter-project dependencies"
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
# Get stats
|
|
622
|
+
stats = await database.get_stats()
|
|
623
|
+
|
|
624
|
+
# Build final graph data
|
|
625
|
+
graph_data = {
|
|
626
|
+
"nodes": nodes,
|
|
627
|
+
"links": links,
|
|
628
|
+
"metadata": {
|
|
629
|
+
"total_chunks": len(chunks),
|
|
630
|
+
"total_files": stats.total_files,
|
|
631
|
+
"languages": stats.languages,
|
|
632
|
+
"is_monorepo": len(subprojects) > 0,
|
|
633
|
+
"subprojects": list(subprojects.keys()) if subprojects else [],
|
|
634
|
+
},
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
return graph_data
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
def apply_state(graph_data: dict, state: VisualizationState) -> dict:
|
|
641
|
+
"""Apply visualization state to graph data.
|
|
642
|
+
|
|
643
|
+
Filters nodes and edges based on current visualization state,
|
|
644
|
+
including visibility and AST-only edge filtering.
|
|
645
|
+
|
|
646
|
+
Args:
|
|
647
|
+
graph_data: Full graph data dictionary (nodes, links, metadata)
|
|
648
|
+
state: Current visualization state
|
|
649
|
+
|
|
650
|
+
Returns:
|
|
651
|
+
Filtered graph data with only visible nodes and edges
|
|
652
|
+
|
|
653
|
+
Example:
|
|
654
|
+
>>> state = VisualizationState()
|
|
655
|
+
>>> state.expand_node("dir1", "directory", ["file1", "file2"])
|
|
656
|
+
>>> filtered = apply_state(graph_data, state)
|
|
657
|
+
>>> len(filtered["nodes"]) < len(graph_data["nodes"])
|
|
658
|
+
True
|
|
659
|
+
"""
|
|
660
|
+
# Get visible node IDs from state
|
|
661
|
+
visible_node_ids = set(state.get_visible_nodes())
|
|
662
|
+
|
|
663
|
+
# Filter nodes
|
|
664
|
+
filtered_nodes = [
|
|
665
|
+
node for node in graph_data["nodes"] if node["id"] in visible_node_ids
|
|
666
|
+
]
|
|
667
|
+
|
|
668
|
+
# Build node ID to node data map for quick lookup
|
|
669
|
+
node_map = {node["id"]: node for node in graph_data["nodes"]}
|
|
670
|
+
|
|
671
|
+
# Get visible edges from state (AST calls only in FILE_DETAIL mode)
|
|
672
|
+
expanded_file_id = None
|
|
673
|
+
if state.view_mode.value == "file_detail" and state.expansion_path:
|
|
674
|
+
# Find the file node in expansion path
|
|
675
|
+
for node_id in reversed(state.expansion_path):
|
|
676
|
+
node = node_map.get(node_id)
|
|
677
|
+
if node and node.get("type") == "file":
|
|
678
|
+
expanded_file_id = node_id
|
|
679
|
+
break
|
|
680
|
+
|
|
681
|
+
visible_edge_ids = state.get_visible_edges(
|
|
682
|
+
graph_data["links"], expanded_file_id=expanded_file_id
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
# Filter links to only visible edges
|
|
686
|
+
filtered_links = []
|
|
687
|
+
for link in graph_data["links"]:
|
|
688
|
+
source_id = link.get("source")
|
|
689
|
+
target_id = link.get("target")
|
|
690
|
+
|
|
691
|
+
# Skip if either node not visible
|
|
692
|
+
if source_id not in visible_node_ids or target_id not in visible_node_ids:
|
|
693
|
+
continue
|
|
694
|
+
|
|
695
|
+
# In FILE_DETAIL mode, only show edges in visible_edge_ids
|
|
696
|
+
if state.view_mode.value == "file_detail":
|
|
697
|
+
if (source_id, target_id) in visible_edge_ids:
|
|
698
|
+
filtered_links.append(link)
|
|
699
|
+
elif state.view_mode.value in ("tree_root", "tree_expanded"):
|
|
700
|
+
# In tree modes, show containment edges only
|
|
701
|
+
if link.get("type") in ("dir_containment", "dir_hierarchy"):
|
|
702
|
+
filtered_links.append(link)
|
|
703
|
+
|
|
704
|
+
return {
|
|
705
|
+
"nodes": filtered_nodes,
|
|
706
|
+
"links": filtered_links,
|
|
707
|
+
"metadata": graph_data.get("metadata", {}),
|
|
708
|
+
"state": state.to_dict(), # Include serialized state
|
|
709
|
+
}
|