codegraph-nav 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_nav/__init__.py +194 -0
- codegraph_nav/ast_grep_analyzer.py +448 -0
- codegraph_nav/cli.py +223 -0
- codegraph_nav/code_navigator.py +1328 -0
- codegraph_nav/code_search.py +1009 -0
- codegraph_nav/colors.py +209 -0
- codegraph_nav/completions.py +354 -0
- codegraph_nav/dart_analyzer.py +301 -0
- codegraph_nav/dependency_graph.py +814 -0
- codegraph_nav/domain/__init__.py +20 -0
- codegraph_nav/domain/routes.py +337 -0
- codegraph_nav/domain/schemas.py +229 -0
- codegraph_nav/domain/tags.py +87 -0
- codegraph_nav/exporters.py +563 -0
- codegraph_nav/go_analyzer.py +273 -0
- codegraph_nav/graph/__init__.py +72 -0
- codegraph_nav/graph/builder.py +409 -0
- codegraph_nav/graph/communities.py +402 -0
- codegraph_nav/graph/flows.py +311 -0
- codegraph_nav/graph/query.py +380 -0
- codegraph_nav/graph/schema.py +266 -0
- codegraph_nav/graph/search.py +257 -0
- codegraph_nav/graph/store.py +517 -0
- codegraph_nav/hints.py +195 -0
- codegraph_nav/import_resolver.py +891 -0
- codegraph_nav/js_ts_analyzer.py +564 -0
- codegraph_nav/line_reader.py +664 -0
- codegraph_nav/mcp/__init__.py +39 -0
- codegraph_nav/mcp/__main__.py +5 -0
- codegraph_nav/mcp/server.py +2228 -0
- codegraph_nav/py.typed +2 -0
- codegraph_nav/ruby_analyzer.py +259 -0
- codegraph_nav/rust_analyzer.py +379 -0
- codegraph_nav/token_efficient_renderer.py +743 -0
- codegraph_nav/watcher.py +382 -0
- codegraph_nav-0.1.0.dist-info/METADATA +487 -0
- codegraph_nav-0.1.0.dist-info/RECORD +41 -0
- codegraph_nav-0.1.0.dist-info/WHEEL +5 -0
- codegraph_nav-0.1.0.dist-info/entry_points.txt +4 -0
- codegraph_nav-0.1.0.dist-info/licenses/LICENSE +21 -0
- codegraph_nav-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,814 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""DependencyGraph - Architectural importance analysis using PageRank.
|
|
3
|
+
|
|
4
|
+
This module provides graph-based analysis of file dependencies to identify
|
|
5
|
+
architecturally critical files ("hubs") in a codebase. Unlike simple import
|
|
6
|
+
counting, PageRank propagates importance transitively, giving higher scores
|
|
7
|
+
to files imported by other important files.
|
|
8
|
+
|
|
9
|
+
Example:
|
|
10
|
+
>>> graph = DependencyGraph('/path/to/project')
|
|
11
|
+
>>> graph.build()
|
|
12
|
+
>>> critical = graph.get_critical_paths(top_n=10)
|
|
13
|
+
>>> for file, score in critical:
|
|
14
|
+
... print(f"{file}: {score:.4f}")
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import ast
|
|
18
|
+
import os
|
|
19
|
+
import re
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
import networkx as nx
|
|
26
|
+
|
|
27
|
+
HAS_NETWORKX = True
|
|
28
|
+
except ImportError:
|
|
29
|
+
HAS_NETWORKX = False
|
|
30
|
+
nx = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class FileNode:
|
|
35
|
+
"""Represents a file in the dependency graph.
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
path: Relative path from project root.
|
|
39
|
+
language: Detected programming language.
|
|
40
|
+
imports: List of import strings found in the file.
|
|
41
|
+
resolved_imports: List of resolved file paths this file imports.
|
|
42
|
+
importers: List of files that import this file.
|
|
43
|
+
pagerank: Computed PageRank score (architectural importance).
|
|
44
|
+
in_degree: Number of files importing this file.
|
|
45
|
+
out_degree: Number of files this file imports.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
path: str
|
|
49
|
+
language: str = ""
|
|
50
|
+
imports: list[str] = field(default_factory=list)
|
|
51
|
+
resolved_imports: list[str] = field(default_factory=list)
|
|
52
|
+
importers: list[str] = field(default_factory=list)
|
|
53
|
+
pagerank: float = 0.0
|
|
54
|
+
in_degree: int = 0
|
|
55
|
+
out_degree: int = 0
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class DependencyGraph:
|
|
59
|
+
"""Analyzes file-level dependencies and computes architectural importance.
|
|
60
|
+
|
|
61
|
+
This class builds a directed graph where nodes are files and edges represent
|
|
62
|
+
import relationships. It uses PageRank to compute "Architectural Importance"
|
|
63
|
+
scores, which are superior to simple import counting because:
|
|
64
|
+
|
|
65
|
+
1. **Transitive propagation**: If file A is imported by B and C, and B/C are
|
|
66
|
+
themselves highly important (imported by many important files), A gets
|
|
67
|
+
a higher score than if B/C were leaf nodes.
|
|
68
|
+
|
|
69
|
+
2. **Hub detection**: Identifies true architectural hubs - files that are
|
|
70
|
+
central to the codebase structure, not just frequently imported utilities.
|
|
71
|
+
|
|
72
|
+
3. **Noise resistance**: A file imported by many trivial test files won't
|
|
73
|
+
rank as high as one imported by core business logic modules.
|
|
74
|
+
|
|
75
|
+
Attributes:
|
|
76
|
+
root: Absolute path to the project root.
|
|
77
|
+
graph: NetworkX DiGraph representing file dependencies.
|
|
78
|
+
nodes: Dict mapping file paths to FileNode objects.
|
|
79
|
+
file_index: Index for fast import resolution.
|
|
80
|
+
module_name: Detected module name (from go.mod, pyproject.toml, etc.).
|
|
81
|
+
|
|
82
|
+
Example:
|
|
83
|
+
>>> dg = DependencyGraph('/my/project')
|
|
84
|
+
>>> dg.build()
|
|
85
|
+
>>>
|
|
86
|
+
>>> # Get top 10 most important files
|
|
87
|
+
>>> critical = dg.get_critical_paths(top_n=10)
|
|
88
|
+
>>>
|
|
89
|
+
>>> # Check if a specific file is a hub
|
|
90
|
+
>>> if dg.is_hub('src/core/config.py'):
|
|
91
|
+
... print("config.py is architecturally critical!")
|
|
92
|
+
>>>
|
|
93
|
+
>>> # Get all connected files
|
|
94
|
+
>>> connected = dg.get_connected_files('src/main.py')
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
# Supported file extensions by language
|
|
98
|
+
LANGUAGE_EXTENSIONS = {
|
|
99
|
+
"python": [".py"],
|
|
100
|
+
"javascript": [".js", ".jsx", ".mjs"],
|
|
101
|
+
"typescript": [".ts", ".tsx"],
|
|
102
|
+
"go": [".go"],
|
|
103
|
+
"rust": [".rs"],
|
|
104
|
+
"java": [".java"],
|
|
105
|
+
"ruby": [".rb"],
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
# Directories to ignore
|
|
109
|
+
IGNORED_DIRS = {
|
|
110
|
+
"node_modules",
|
|
111
|
+
"__pycache__",
|
|
112
|
+
".git",
|
|
113
|
+
".svn",
|
|
114
|
+
"venv",
|
|
115
|
+
"env",
|
|
116
|
+
".env",
|
|
117
|
+
"dist",
|
|
118
|
+
"build",
|
|
119
|
+
".next",
|
|
120
|
+
"coverage",
|
|
121
|
+
"vendor",
|
|
122
|
+
"target",
|
|
123
|
+
".tox",
|
|
124
|
+
"eggs",
|
|
125
|
+
".pytest_cache",
|
|
126
|
+
".mypy_cache",
|
|
127
|
+
".ruff_cache",
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
# PageRank parameters
|
|
131
|
+
DEFAULT_DAMPING = 0.85 # Standard damping factor
|
|
132
|
+
DEFAULT_MAX_ITER = 100 # Maximum iterations for convergence
|
|
133
|
+
DEFAULT_TOL = 1e-06 # Convergence tolerance
|
|
134
|
+
|
|
135
|
+
def __init__(self, root: str, damping: float | None = None):
|
|
136
|
+
"""Initialize the dependency graph analyzer.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
root: Path to the project root directory.
|
|
140
|
+
damping: PageRank damping factor (default: 0.85).
|
|
141
|
+
Higher values give more weight to direct imports.
|
|
142
|
+
|
|
143
|
+
Raises:
|
|
144
|
+
ImportError: If networkx is not installed.
|
|
145
|
+
ValueError: If root path doesn't exist.
|
|
146
|
+
"""
|
|
147
|
+
if not HAS_NETWORKX:
|
|
148
|
+
raise ImportError(
|
|
149
|
+
"networkx is required for DependencyGraph. " "Install with: pip install networkx"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
self.root = Path(root).resolve()
|
|
153
|
+
if not self.root.exists():
|
|
154
|
+
raise ValueError(f"Root path does not exist: {self.root}")
|
|
155
|
+
|
|
156
|
+
self.damping = damping or self.DEFAULT_DAMPING
|
|
157
|
+
self.graph: nx.DiGraph = nx.DiGraph()
|
|
158
|
+
self.nodes: dict[str, FileNode] = {}
|
|
159
|
+
self.file_index: dict[str, dict[str, list[str]]] = {} # index type -> key -> file paths
|
|
160
|
+
self.module_name: str = ""
|
|
161
|
+
self._built = False
|
|
162
|
+
|
|
163
|
+
def build(self, languages: list[str] | None = None) -> "DependencyGraph":
|
|
164
|
+
"""Scan the project and build the dependency graph.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
languages: List of languages to include (default: all supported).
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
self, for method chaining.
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
>>> dg = DependencyGraph('/project').build()
|
|
174
|
+
>>> dg = DependencyGraph('/project').build(languages=['python', 'typescript'])
|
|
175
|
+
"""
|
|
176
|
+
# Detect module/package name
|
|
177
|
+
self.module_name = self._detect_module_name()
|
|
178
|
+
|
|
179
|
+
# Scan all source files
|
|
180
|
+
files = self._scan_files(languages)
|
|
181
|
+
|
|
182
|
+
# Build file index for fast import resolution
|
|
183
|
+
self._build_file_index(files)
|
|
184
|
+
|
|
185
|
+
# Extract imports from each file
|
|
186
|
+
for file_path in files:
|
|
187
|
+
self._analyze_file(file_path)
|
|
188
|
+
|
|
189
|
+
# Resolve imports to actual files
|
|
190
|
+
self._resolve_all_imports()
|
|
191
|
+
|
|
192
|
+
# Build the NetworkX graph
|
|
193
|
+
self._build_networkx_graph()
|
|
194
|
+
|
|
195
|
+
# Compute PageRank scores
|
|
196
|
+
self._compute_pagerank()
|
|
197
|
+
|
|
198
|
+
self._built = True
|
|
199
|
+
return self
|
|
200
|
+
|
|
201
|
+
def _detect_module_name(self) -> str:
|
|
202
|
+
"""Detect the module/package name from config files."""
|
|
203
|
+
# Try go.mod
|
|
204
|
+
go_mod = self.root / "go.mod"
|
|
205
|
+
if go_mod.exists():
|
|
206
|
+
try:
|
|
207
|
+
content = go_mod.read_text()
|
|
208
|
+
for line in content.splitlines():
|
|
209
|
+
if line.startswith("module "):
|
|
210
|
+
return line.split()[1]
|
|
211
|
+
except Exception:
|
|
212
|
+
pass
|
|
213
|
+
|
|
214
|
+
# Try pyproject.toml
|
|
215
|
+
pyproject = self.root / "pyproject.toml"
|
|
216
|
+
if pyproject.exists():
|
|
217
|
+
try:
|
|
218
|
+
content = pyproject.read_text()
|
|
219
|
+
# Simple regex for [project] name or [tool.poetry] name
|
|
220
|
+
match = re.search(r'name\s*=\s*["\']([^"\']+)["\']', content)
|
|
221
|
+
if match:
|
|
222
|
+
return match.group(1)
|
|
223
|
+
except Exception:
|
|
224
|
+
pass
|
|
225
|
+
|
|
226
|
+
# Try package.json
|
|
227
|
+
package_json = self.root / "package.json"
|
|
228
|
+
if package_json.exists():
|
|
229
|
+
try:
|
|
230
|
+
import json
|
|
231
|
+
|
|
232
|
+
data = json.loads(package_json.read_text())
|
|
233
|
+
name: str = data.get("name", "")
|
|
234
|
+
return name
|
|
235
|
+
except Exception:
|
|
236
|
+
pass
|
|
237
|
+
|
|
238
|
+
# Fallback to directory name
|
|
239
|
+
return self.root.name
|
|
240
|
+
|
|
241
|
+
def _scan_files(self, languages: list[str] | None = None) -> list[str]:
|
|
242
|
+
"""Scan directory for source files.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
languages: Filter by languages (None = all).
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
List of relative file paths.
|
|
249
|
+
"""
|
|
250
|
+
files = []
|
|
251
|
+
extensions = set()
|
|
252
|
+
|
|
253
|
+
if languages:
|
|
254
|
+
for lang in languages:
|
|
255
|
+
extensions.update(self.LANGUAGE_EXTENSIONS.get(lang, []))
|
|
256
|
+
else:
|
|
257
|
+
for exts in self.LANGUAGE_EXTENSIONS.values():
|
|
258
|
+
extensions.update(exts)
|
|
259
|
+
|
|
260
|
+
for dirpath, dirnames, filenames in os.walk(self.root):
|
|
261
|
+
# Filter out ignored directories in-place
|
|
262
|
+
dirnames[:] = [d for d in dirnames if d not in self.IGNORED_DIRS]
|
|
263
|
+
|
|
264
|
+
for filename in filenames:
|
|
265
|
+
if any(filename.endswith(ext) for ext in extensions):
|
|
266
|
+
full_path = Path(dirpath) / filename
|
|
267
|
+
rel_path = str(full_path.relative_to(self.root))
|
|
268
|
+
files.append(rel_path)
|
|
269
|
+
|
|
270
|
+
return files
|
|
271
|
+
|
|
272
|
+
def _build_file_index(self, files: list[str]) -> None:
|
|
273
|
+
"""Build multi-key index for fast import resolution.
|
|
274
|
+
|
|
275
|
+
Creates indexes by:
|
|
276
|
+
- Exact path
|
|
277
|
+
- Path without extension
|
|
278
|
+
- All path suffixes (for nested packages)
|
|
279
|
+
- Directory (for package imports)
|
|
280
|
+
"""
|
|
281
|
+
self.file_index = {
|
|
282
|
+
"exact": {}, # exact path -> [files]
|
|
283
|
+
"no_ext": {}, # path without extension -> [files]
|
|
284
|
+
"suffix": {}, # path suffix -> [files]
|
|
285
|
+
"dir": {}, # directory -> [files]
|
|
286
|
+
"basename": {}, # just filename -> [files]
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
for path in files:
|
|
290
|
+
# Exact match
|
|
291
|
+
self._add_to_index("exact", path, path)
|
|
292
|
+
|
|
293
|
+
# Without extension
|
|
294
|
+
no_ext = str(Path(path).with_suffix(""))
|
|
295
|
+
self._add_to_index("no_ext", no_ext, path)
|
|
296
|
+
|
|
297
|
+
# Basename
|
|
298
|
+
basename = Path(path).stem
|
|
299
|
+
self._add_to_index("basename", basename, path)
|
|
300
|
+
|
|
301
|
+
# Directory
|
|
302
|
+
dir_path = str(Path(path).parent)
|
|
303
|
+
if dir_path != ".":
|
|
304
|
+
self._add_to_index("dir", dir_path, path)
|
|
305
|
+
|
|
306
|
+
# All suffixes (for nested package resolution)
|
|
307
|
+
# e.g., "src/core/config.py" indexed as:
|
|
308
|
+
# - "core/config.py"
|
|
309
|
+
# - "config.py"
|
|
310
|
+
parts = Path(path).parts
|
|
311
|
+
for i in range(1, len(parts)):
|
|
312
|
+
suffix = str(Path(*parts[i:]))
|
|
313
|
+
self._add_to_index("suffix", suffix, path)
|
|
314
|
+
# Also without extension
|
|
315
|
+
suffix_no_ext = str(Path(*parts[i:]).with_suffix(""))
|
|
316
|
+
self._add_to_index("suffix", suffix_no_ext, path)
|
|
317
|
+
|
|
318
|
+
def _add_to_index(self, index_type: str, key: str, path: str) -> None:
|
|
319
|
+
"""Add a path to a specific index."""
|
|
320
|
+
if key not in self.file_index[index_type]:
|
|
321
|
+
self.file_index[index_type][key] = []
|
|
322
|
+
if path not in self.file_index[index_type][key]:
|
|
323
|
+
self.file_index[index_type][key].append(path)
|
|
324
|
+
|
|
325
|
+
def _analyze_file(self, rel_path: str) -> None:
|
|
326
|
+
"""Extract imports from a single file."""
|
|
327
|
+
full_path = self.root / rel_path
|
|
328
|
+
language = self._detect_language(rel_path)
|
|
329
|
+
|
|
330
|
+
node = FileNode(path=rel_path, language=language)
|
|
331
|
+
|
|
332
|
+
try:
|
|
333
|
+
content = full_path.read_text(encoding="utf-8", errors="ignore")
|
|
334
|
+
|
|
335
|
+
if language == "python":
|
|
336
|
+
node.imports = self._extract_python_imports(content)
|
|
337
|
+
elif language in ("javascript", "typescript"):
|
|
338
|
+
node.imports = self._extract_js_ts_imports(content)
|
|
339
|
+
elif language == "go":
|
|
340
|
+
node.imports = self._extract_go_imports(content)
|
|
341
|
+
elif language == "rust":
|
|
342
|
+
node.imports = self._extract_rust_imports(content)
|
|
343
|
+
else:
|
|
344
|
+
node.imports = self._extract_generic_imports(content)
|
|
345
|
+
|
|
346
|
+
except Exception:
|
|
347
|
+
pass # Skip files we can't read
|
|
348
|
+
|
|
349
|
+
self.nodes[rel_path] = node
|
|
350
|
+
|
|
351
|
+
def _detect_language(self, path: str) -> str:
|
|
352
|
+
"""Detect language from file extension."""
|
|
353
|
+
ext = Path(path).suffix.lower()
|
|
354
|
+
for lang, extensions in self.LANGUAGE_EXTENSIONS.items():
|
|
355
|
+
if ext in extensions:
|
|
356
|
+
return lang
|
|
357
|
+
return ""
|
|
358
|
+
|
|
359
|
+
def _extract_python_imports(self, content: str) -> list[str]:
|
|
360
|
+
"""Extract imports from Python code using AST."""
|
|
361
|
+
imports = []
|
|
362
|
+
try:
|
|
363
|
+
tree = ast.parse(content)
|
|
364
|
+
for node in ast.walk(tree):
|
|
365
|
+
if isinstance(node, ast.Import):
|
|
366
|
+
for alias in node.names:
|
|
367
|
+
imports.append(alias.name)
|
|
368
|
+
elif isinstance(node, ast.ImportFrom):
|
|
369
|
+
module = node.module or ""
|
|
370
|
+
if node.level > 0: # Relative import
|
|
371
|
+
imports.append("." * node.level + module)
|
|
372
|
+
else:
|
|
373
|
+
imports.append(module)
|
|
374
|
+
except SyntaxError:
|
|
375
|
+
pass
|
|
376
|
+
return imports
|
|
377
|
+
|
|
378
|
+
def _extract_js_ts_imports(self, content: str) -> list[str]:
|
|
379
|
+
"""Extract imports from JavaScript/TypeScript code."""
|
|
380
|
+
imports = []
|
|
381
|
+
# Match: import ... from 'path' or require('path')
|
|
382
|
+
patterns = [
|
|
383
|
+
r'import\s+.*?\s+from\s+[\'"]([^\'"]+)[\'"]',
|
|
384
|
+
r'import\s+[\'"]([^\'"]+)[\'"]',
|
|
385
|
+
r'require\s*\(\s*[\'"]([^\'"]+)[\'"]\s*\)',
|
|
386
|
+
r'export\s+.*?\s+from\s+[\'"]([^\'"]+)[\'"]',
|
|
387
|
+
]
|
|
388
|
+
for pattern in patterns:
|
|
389
|
+
imports.extend(re.findall(pattern, content))
|
|
390
|
+
return imports
|
|
391
|
+
|
|
392
|
+
def _extract_go_imports(self, content: str) -> list[str]:
|
|
393
|
+
"""Extract imports from Go code."""
|
|
394
|
+
imports = []
|
|
395
|
+
# Match single import: import "path"
|
|
396
|
+
imports.extend(re.findall(r'import\s+"([^"]+)"', content))
|
|
397
|
+
# Match grouped imports: import ( "path1" "path2" )
|
|
398
|
+
block_match = re.search(r"import\s*\((.*?)\)", content, re.DOTALL)
|
|
399
|
+
if block_match:
|
|
400
|
+
imports.extend(re.findall(r'"([^"]+)"', block_match.group(1)))
|
|
401
|
+
return imports
|
|
402
|
+
|
|
403
|
+
def _extract_rust_imports(self, content: str) -> list[str]:
|
|
404
|
+
"""Extract imports from Rust code."""
|
|
405
|
+
imports = []
|
|
406
|
+
# Match: use crate::path, use super::path, use path
|
|
407
|
+
imports.extend(re.findall(r"use\s+([\w:]+)", content))
|
|
408
|
+
# Match: mod name
|
|
409
|
+
imports.extend(re.findall(r"mod\s+(\w+)", content))
|
|
410
|
+
return imports
|
|
411
|
+
|
|
412
|
+
def _extract_generic_imports(self, content: str) -> list[str]:
|
|
413
|
+
"""Fallback import extraction using common patterns."""
|
|
414
|
+
imports = []
|
|
415
|
+
patterns = [
|
|
416
|
+
r'import\s+[\'"]([^\'"]+)[\'"]',
|
|
417
|
+
r'require\s*[\'"]([^\'"]+)[\'"]',
|
|
418
|
+
r'from\s+[\'"]([^\'"]+)[\'"]',
|
|
419
|
+
]
|
|
420
|
+
for pattern in patterns:
|
|
421
|
+
imports.extend(re.findall(pattern, content))
|
|
422
|
+
return imports
|
|
423
|
+
|
|
424
|
+
def _resolve_all_imports(self) -> None:
|
|
425
|
+
"""Resolve import strings to actual file paths."""
|
|
426
|
+
for path, node in self.nodes.items():
|
|
427
|
+
resolved = []
|
|
428
|
+
for imp in node.imports:
|
|
429
|
+
files = self._resolve_import(imp, path, node.language)
|
|
430
|
+
# Only count single-file resolutions (not package imports)
|
|
431
|
+
if len(files) == 1:
|
|
432
|
+
resolved.append(files[0])
|
|
433
|
+
|
|
434
|
+
node.resolved_imports = list(set(resolved))
|
|
435
|
+
|
|
436
|
+
# Build reverse map (importers)
|
|
437
|
+
for imported_file in node.resolved_imports:
|
|
438
|
+
if imported_file in self.nodes:
|
|
439
|
+
self.nodes[imported_file].importers.append(path)
|
|
440
|
+
|
|
441
|
+
def _resolve_import(self, imp: str, from_file: str, language: str) -> list[str]:
|
|
442
|
+
"""Resolve an import string to file path(s).
|
|
443
|
+
|
|
444
|
+
Uses multiple strategies in order:
|
|
445
|
+
1. Relative path resolution (./foo, ../bar)
|
|
446
|
+
2. Module-prefixed path (for Go/Python internal packages)
|
|
447
|
+
3. Exact match
|
|
448
|
+
4. Suffix match
|
|
449
|
+
"""
|
|
450
|
+
# Normalize the import
|
|
451
|
+
normalized = self._normalize_import(imp, language)
|
|
452
|
+
from_dir = str(Path(from_file).parent)
|
|
453
|
+
|
|
454
|
+
# Strategy 1: Relative imports
|
|
455
|
+
if imp.startswith("."):
|
|
456
|
+
return self._resolve_relative_import(imp, from_dir)
|
|
457
|
+
|
|
458
|
+
# Strategy 2: Module-prefixed (internal package)
|
|
459
|
+
if self.module_name and imp.startswith(self.module_name):
|
|
460
|
+
rest = imp[len(self.module_name) :].lstrip("/.")
|
|
461
|
+
candidates = self._try_exact_match(rest)
|
|
462
|
+
if candidates:
|
|
463
|
+
return candidates
|
|
464
|
+
|
|
465
|
+
# Strategy 3: Exact match
|
|
466
|
+
candidates = self._try_exact_match(normalized)
|
|
467
|
+
if candidates:
|
|
468
|
+
return candidates
|
|
469
|
+
|
|
470
|
+
# Strategy 4: Suffix match
|
|
471
|
+
candidates = self._try_suffix_match(normalized)
|
|
472
|
+
if candidates:
|
|
473
|
+
return candidates
|
|
474
|
+
|
|
475
|
+
return []
|
|
476
|
+
|
|
477
|
+
def _normalize_import(self, imp: str, language: str) -> str:
|
|
478
|
+
"""Convert import syntax to a path-like format."""
|
|
479
|
+
imp = imp.strip("\"'`")
|
|
480
|
+
|
|
481
|
+
# Python dots to slashes: app.core.config -> app/core/config
|
|
482
|
+
if language == "python" and "." in imp and "/" not in imp:
|
|
483
|
+
if not imp.startswith("."):
|
|
484
|
+
imp = imp.replace(".", "/")
|
|
485
|
+
|
|
486
|
+
# Rust :: to slashes
|
|
487
|
+
if language == "rust":
|
|
488
|
+
if imp.startswith("crate::"):
|
|
489
|
+
imp = imp[7:].replace("::", "/")
|
|
490
|
+
elif "::" in imp:
|
|
491
|
+
imp = imp.replace("::", "/")
|
|
492
|
+
|
|
493
|
+
return imp
|
|
494
|
+
|
|
495
|
+
def _resolve_relative_import(self, imp: str, from_dir: str) -> list[str]:
|
|
496
|
+
"""Resolve ./foo or ../bar style imports."""
|
|
497
|
+
# Count parent levels
|
|
498
|
+
levels = 0
|
|
499
|
+
rest = imp
|
|
500
|
+
while rest.startswith(".."):
|
|
501
|
+
levels += 1
|
|
502
|
+
rest = rest[2:].lstrip("/")
|
|
503
|
+
rest = rest.lstrip("./")
|
|
504
|
+
|
|
505
|
+
# Navigate up
|
|
506
|
+
target_dir = Path(from_dir)
|
|
507
|
+
for _ in range(levels):
|
|
508
|
+
target_dir = target_dir.parent
|
|
509
|
+
|
|
510
|
+
# Build candidate path
|
|
511
|
+
if str(target_dir) == ".":
|
|
512
|
+
candidate = rest
|
|
513
|
+
else:
|
|
514
|
+
candidate = str(target_dir / rest)
|
|
515
|
+
|
|
516
|
+
return self._try_exact_match(candidate)
|
|
517
|
+
|
|
518
|
+
def _try_exact_match(self, path: str) -> list[str]:
|
|
519
|
+
"""Try to match path exactly (with common extensions)."""
|
|
520
|
+
extensions = [
|
|
521
|
+
"",
|
|
522
|
+
".py",
|
|
523
|
+
".js",
|
|
524
|
+
".ts",
|
|
525
|
+
".tsx",
|
|
526
|
+
".jsx",
|
|
527
|
+
".go",
|
|
528
|
+
".rs",
|
|
529
|
+
"/index.js",
|
|
530
|
+
"/index.ts",
|
|
531
|
+
"/index.tsx",
|
|
532
|
+
"/__init__.py",
|
|
533
|
+
"/mod.rs",
|
|
534
|
+
]
|
|
535
|
+
|
|
536
|
+
for ext in extensions:
|
|
537
|
+
candidate = path + ext
|
|
538
|
+
if candidate in self.file_index["exact"]:
|
|
539
|
+
return self.file_index["exact"][candidate]
|
|
540
|
+
if candidate in self.file_index["no_ext"]:
|
|
541
|
+
return self.file_index["no_ext"][candidate]
|
|
542
|
+
|
|
543
|
+
return []
|
|
544
|
+
|
|
545
|
+
def _try_suffix_match(self, normalized: str) -> list[str]:
|
|
546
|
+
"""Find files where path ends with normalized import."""
|
|
547
|
+
extensions = ["", ".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".rs"]
|
|
548
|
+
|
|
549
|
+
for ext in extensions:
|
|
550
|
+
candidate = normalized + ext
|
|
551
|
+
if candidate in self.file_index["suffix"]:
|
|
552
|
+
files = self.file_index["suffix"][candidate]
|
|
553
|
+
if len(files) == 1:
|
|
554
|
+
return files
|
|
555
|
+
|
|
556
|
+
return []
|
|
557
|
+
|
|
558
|
+
def _build_networkx_graph(self) -> None:
|
|
559
|
+
"""Build the NetworkX DiGraph from resolved imports."""
|
|
560
|
+
self.graph.clear()
|
|
561
|
+
|
|
562
|
+
# Add all nodes
|
|
563
|
+
for path in self.nodes:
|
|
564
|
+
self.graph.add_node(path)
|
|
565
|
+
|
|
566
|
+
# Add edges (importer -> imported)
|
|
567
|
+
# Direction: A imports B means edge from A to B
|
|
568
|
+
# PageRank will give higher scores to nodes with many incoming edges
|
|
569
|
+
for path, node in self.nodes.items():
|
|
570
|
+
for imported_file in node.resolved_imports:
|
|
571
|
+
if imported_file in self.nodes:
|
|
572
|
+
self.graph.add_edge(path, imported_file)
|
|
573
|
+
|
|
574
|
+
# Update degree stats
|
|
575
|
+
for path, node in self.nodes.items():
|
|
576
|
+
node.in_degree = self.graph.in_degree(path)
|
|
577
|
+
node.out_degree = self.graph.out_degree(path)
|
|
578
|
+
|
|
579
|
+
def _compute_pagerank(self) -> None:
|
|
580
|
+
"""Compute PageRank scores for all nodes."""
|
|
581
|
+
if len(self.graph) == 0:
|
|
582
|
+
return
|
|
583
|
+
|
|
584
|
+
try:
|
|
585
|
+
scores = nx.pagerank(
|
|
586
|
+
self.graph,
|
|
587
|
+
alpha=self.damping,
|
|
588
|
+
max_iter=self.DEFAULT_MAX_ITER,
|
|
589
|
+
tol=self.DEFAULT_TOL,
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
for path, score in scores.items():
|
|
593
|
+
if path in self.nodes:
|
|
594
|
+
self.nodes[path].pagerank = score
|
|
595
|
+
|
|
596
|
+
except (nx.NetworkXError, ImportError):
|
|
597
|
+
# Graph has issues (e.g., no edges), or networkx's pagerank backend
|
|
598
|
+
# (numpy/scipy) is not installed — assign uniform scores.
|
|
599
|
+
uniform = 1.0 / max(len(self.nodes), 1)
|
|
600
|
+
for node in self.nodes.values():
|
|
601
|
+
node.pagerank = uniform
|
|
602
|
+
|
|
603
|
+
def get_critical_paths(self, top_n: int = 10) -> list[tuple[str, float]]:
|
|
604
|
+
"""Get the top N architecturally important files.
|
|
605
|
+
|
|
606
|
+
Returns files ranked by PageRank score, which represents their
|
|
607
|
+
"Architectural Importance" - how central they are to the codebase
|
|
608
|
+
structure, considering transitive dependencies.
|
|
609
|
+
|
|
610
|
+
Args:
|
|
611
|
+
top_n: Number of top files to return.
|
|
612
|
+
|
|
613
|
+
Returns:
|
|
614
|
+
List of (file_path, pagerank_score) tuples, sorted by score descending.
|
|
615
|
+
|
|
616
|
+
Example:
|
|
617
|
+
>>> critical = dg.get_critical_paths(top_n=5)
|
|
618
|
+
>>> for path, score in critical:
|
|
619
|
+
... print(f"{path}: {score:.4f}")
|
|
620
|
+
src/core/config.py: 0.0842
|
|
621
|
+
src/utils/helpers.py: 0.0654
|
|
622
|
+
src/db/connection.py: 0.0521
|
|
623
|
+
"""
|
|
624
|
+
if not self._built:
|
|
625
|
+
raise RuntimeError("Graph not built. Call build() first.")
|
|
626
|
+
|
|
627
|
+
ranked = sorted(
|
|
628
|
+
[(path, node.pagerank) for path, node in self.nodes.items()],
|
|
629
|
+
key=lambda x: x[1],
|
|
630
|
+
reverse=True,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
return ranked[:top_n]
|
|
634
|
+
|
|
635
|
+
def is_hub(self, path: str, threshold: int = 3) -> bool:
|
|
636
|
+
"""Check if a file is a hub (imported by many files).
|
|
637
|
+
|
|
638
|
+
Args:
|
|
639
|
+
path: Relative file path.
|
|
640
|
+
threshold: Minimum number of importers to be considered a hub.
|
|
641
|
+
|
|
642
|
+
Returns:
|
|
643
|
+
True if the file has >= threshold importers.
|
|
644
|
+
"""
|
|
645
|
+
if path not in self.nodes:
|
|
646
|
+
return False
|
|
647
|
+
return self.nodes[path].in_degree >= threshold
|
|
648
|
+
|
|
649
|
+
def get_hub_files(self, threshold: int = 3) -> list[str]:
|
|
650
|
+
"""Get all files that are imported by >= threshold other files.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
threshold: Minimum importers to qualify as hub.
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
List of file paths that are hubs.
|
|
657
|
+
"""
|
|
658
|
+
return [path for path, node in self.nodes.items() if node.in_degree >= threshold]
|
|
659
|
+
|
|
660
|
+
def get_connected_files(self, path: str) -> list[str]:
|
|
661
|
+
"""Get all files connected to the given file (imports + importers).
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
path: Relative file path.
|
|
665
|
+
|
|
666
|
+
Returns:
|
|
667
|
+
List of connected file paths.
|
|
668
|
+
"""
|
|
669
|
+
if path not in self.nodes:
|
|
670
|
+
return []
|
|
671
|
+
|
|
672
|
+
node = self.nodes[path]
|
|
673
|
+
connected = set(node.resolved_imports) | set(node.importers)
|
|
674
|
+
connected.discard(path)
|
|
675
|
+
return list(connected)
|
|
676
|
+
|
|
677
|
+
def get_dependency_chain(self, path: str, depth: int = 3) -> dict[str, Any]:
|
|
678
|
+
"""Get dependency chain (what this file imports, recursively).
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
path: Starting file path.
|
|
682
|
+
depth: Maximum depth to traverse.
|
|
683
|
+
|
|
684
|
+
Returns:
|
|
685
|
+
Nested dict representing the dependency tree.
|
|
686
|
+
"""
|
|
687
|
+
|
|
688
|
+
def _build_chain(current: str, remaining_depth: int, seen: set[str]) -> dict:
|
|
689
|
+
if remaining_depth <= 0 or current in seen or current not in self.nodes:
|
|
690
|
+
return {}
|
|
691
|
+
|
|
692
|
+
seen.add(current)
|
|
693
|
+
node = self.nodes[current]
|
|
694
|
+
|
|
695
|
+
return {
|
|
696
|
+
"imports": {
|
|
697
|
+
dep: _build_chain(dep, remaining_depth - 1, seen.copy())
|
|
698
|
+
for dep in node.resolved_imports
|
|
699
|
+
if dep in self.nodes
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
return {path: _build_chain(path, depth, set())}
|
|
704
|
+
|
|
705
|
+
def get_importers_chain(self, path: str, depth: int = 3) -> dict[str, Any]:
|
|
706
|
+
"""Get reverse dependency chain (what imports this file, recursively).
|
|
707
|
+
|
|
708
|
+
Args:
|
|
709
|
+
path: Starting file path.
|
|
710
|
+
depth: Maximum depth to traverse.
|
|
711
|
+
|
|
712
|
+
Returns:
|
|
713
|
+
Nested dict representing who imports this file.
|
|
714
|
+
"""
|
|
715
|
+
|
|
716
|
+
def _build_chain(current: str, remaining_depth: int, seen: set[str]) -> dict:
|
|
717
|
+
if remaining_depth <= 0 or current in seen or current not in self.nodes:
|
|
718
|
+
return {}
|
|
719
|
+
|
|
720
|
+
seen.add(current)
|
|
721
|
+
node = self.nodes[current]
|
|
722
|
+
|
|
723
|
+
return {
|
|
724
|
+
"imported_by": {
|
|
725
|
+
imp: _build_chain(imp, remaining_depth - 1, seen.copy())
|
|
726
|
+
for imp in node.importers
|
|
727
|
+
if imp in self.nodes
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
return {path: _build_chain(path, depth, set())}
|
|
732
|
+
|
|
733
|
+
def get_stats(self) -> dict[str, Any]:
|
|
734
|
+
"""Get statistics about the dependency graph.
|
|
735
|
+
|
|
736
|
+
Returns:
|
|
737
|
+
Dict with graph statistics.
|
|
738
|
+
"""
|
|
739
|
+
if not self._built:
|
|
740
|
+
return {"error": "Graph not built"}
|
|
741
|
+
|
|
742
|
+
hub_files = self.get_hub_files()
|
|
743
|
+
|
|
744
|
+
return {
|
|
745
|
+
"total_files": len(self.nodes),
|
|
746
|
+
"total_edges": self.graph.number_of_edges(),
|
|
747
|
+
"hub_files": len(hub_files),
|
|
748
|
+
"avg_imports_per_file": (
|
|
749
|
+
sum(n.out_degree for n in self.nodes.values()) / max(len(self.nodes), 1)
|
|
750
|
+
),
|
|
751
|
+
"avg_importers_per_file": (
|
|
752
|
+
sum(n.in_degree for n in self.nodes.values()) / max(len(self.nodes), 1)
|
|
753
|
+
),
|
|
754
|
+
"languages": dict(self._count_by_language()),
|
|
755
|
+
"isolated_files": len(
|
|
756
|
+
[n for n in self.nodes.values() if n.in_degree == 0 and n.out_degree == 0]
|
|
757
|
+
),
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
def _count_by_language(self) -> dict[str, int]:
|
|
761
|
+
"""Count files by language."""
|
|
762
|
+
counts: dict[str, int] = {}
|
|
763
|
+
for node in self.nodes.values():
|
|
764
|
+
lang = node.language or "unknown"
|
|
765
|
+
counts[lang] = counts.get(lang, 0) + 1
|
|
766
|
+
return counts
|
|
767
|
+
|
|
768
|
+
def to_dict(self) -> dict[str, Any]:
|
|
769
|
+
"""Export the graph as a serializable dictionary.
|
|
770
|
+
|
|
771
|
+
Returns:
|
|
772
|
+
Dict that can be serialized to JSON.
|
|
773
|
+
"""
|
|
774
|
+
return {
|
|
775
|
+
"root": str(self.root),
|
|
776
|
+
"module": self.module_name,
|
|
777
|
+
"stats": self.get_stats(),
|
|
778
|
+
"critical_paths": self.get_critical_paths(top_n=20),
|
|
779
|
+
"nodes": {
|
|
780
|
+
path: {
|
|
781
|
+
"language": node.language,
|
|
782
|
+
"pagerank": node.pagerank,
|
|
783
|
+
"in_degree": node.in_degree,
|
|
784
|
+
"out_degree": node.out_degree,
|
|
785
|
+
"imports": node.resolved_imports,
|
|
786
|
+
"importers": node.importers,
|
|
787
|
+
}
|
|
788
|
+
for path, node in self.nodes.items()
|
|
789
|
+
},
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def analyze_repository(root: str, top_n: int = 10) -> dict[str, Any]:
|
|
794
|
+
"""Convenience function to analyze a repository.
|
|
795
|
+
|
|
796
|
+
Args:
|
|
797
|
+
root: Path to repository root.
|
|
798
|
+
top_n: Number of critical paths to return.
|
|
799
|
+
|
|
800
|
+
Returns:
|
|
801
|
+
Analysis results including critical paths and statistics.
|
|
802
|
+
|
|
803
|
+
Example:
|
|
804
|
+
>>> results = analyze_repository('/my/project')
|
|
805
|
+
>>> print(results['critical_paths'])
|
|
806
|
+
"""
|
|
807
|
+
graph = DependencyGraph(root)
|
|
808
|
+
graph.build()
|
|
809
|
+
|
|
810
|
+
return {
|
|
811
|
+
"critical_paths": graph.get_critical_paths(top_n=top_n),
|
|
812
|
+
"hub_files": graph.get_hub_files(),
|
|
813
|
+
"stats": graph.get_stats(),
|
|
814
|
+
}
|