codegraph-nav 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. codegraph_nav/__init__.py +194 -0
  2. codegraph_nav/ast_grep_analyzer.py +448 -0
  3. codegraph_nav/cli.py +223 -0
  4. codegraph_nav/code_navigator.py +1328 -0
  5. codegraph_nav/code_search.py +1009 -0
  6. codegraph_nav/colors.py +209 -0
  7. codegraph_nav/completions.py +354 -0
  8. codegraph_nav/dart_analyzer.py +301 -0
  9. codegraph_nav/dependency_graph.py +814 -0
  10. codegraph_nav/domain/__init__.py +20 -0
  11. codegraph_nav/domain/routes.py +337 -0
  12. codegraph_nav/domain/schemas.py +229 -0
  13. codegraph_nav/domain/tags.py +87 -0
  14. codegraph_nav/exporters.py +563 -0
  15. codegraph_nav/go_analyzer.py +273 -0
  16. codegraph_nav/graph/__init__.py +72 -0
  17. codegraph_nav/graph/builder.py +409 -0
  18. codegraph_nav/graph/communities.py +402 -0
  19. codegraph_nav/graph/flows.py +311 -0
  20. codegraph_nav/graph/query.py +380 -0
  21. codegraph_nav/graph/schema.py +266 -0
  22. codegraph_nav/graph/search.py +257 -0
  23. codegraph_nav/graph/store.py +517 -0
  24. codegraph_nav/hints.py +195 -0
  25. codegraph_nav/import_resolver.py +891 -0
  26. codegraph_nav/js_ts_analyzer.py +564 -0
  27. codegraph_nav/line_reader.py +664 -0
  28. codegraph_nav/mcp/__init__.py +39 -0
  29. codegraph_nav/mcp/__main__.py +5 -0
  30. codegraph_nav/mcp/server.py +2228 -0
  31. codegraph_nav/py.typed +2 -0
  32. codegraph_nav/ruby_analyzer.py +259 -0
  33. codegraph_nav/rust_analyzer.py +379 -0
  34. codegraph_nav/token_efficient_renderer.py +743 -0
  35. codegraph_nav/watcher.py +382 -0
  36. codegraph_nav-0.1.0.dist-info/METADATA +487 -0
  37. codegraph_nav-0.1.0.dist-info/RECORD +41 -0
  38. codegraph_nav-0.1.0.dist-info/WHEEL +5 -0
  39. codegraph_nav-0.1.0.dist-info/entry_points.txt +4 -0
  40. codegraph_nav-0.1.0.dist-info/licenses/LICENSE +21 -0
  41. codegraph_nav-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,814 @@
1
+ #!/usr/bin/env python3
2
+ """DependencyGraph - Architectural importance analysis using PageRank.
3
+
4
+ This module provides graph-based analysis of file dependencies to identify
5
+ architecturally critical files ("hubs") in a codebase. Unlike simple import
6
+ counting, PageRank propagates importance transitively, giving higher scores
7
+ to files imported by other important files.
8
+
9
+ Example:
10
+ >>> graph = DependencyGraph('/path/to/project')
11
+ >>> graph.build()
12
+ >>> critical = graph.get_critical_paths(top_n=10)
13
+ >>> for file, score in critical:
14
+ ... print(f"{file}: {score:.4f}")
15
+ """
16
+
17
+ import ast
18
+ import os
19
+ import re
20
+ from dataclasses import dataclass, field
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ try:
25
+ import networkx as nx
26
+
27
+ HAS_NETWORKX = True
28
+ except ImportError:
29
+ HAS_NETWORKX = False
30
+ nx = None
31
+
32
+
33
+ @dataclass
34
+ class FileNode:
35
+ """Represents a file in the dependency graph.
36
+
37
+ Attributes:
38
+ path: Relative path from project root.
39
+ language: Detected programming language.
40
+ imports: List of import strings found in the file.
41
+ resolved_imports: List of resolved file paths this file imports.
42
+ importers: List of files that import this file.
43
+ pagerank: Computed PageRank score (architectural importance).
44
+ in_degree: Number of files importing this file.
45
+ out_degree: Number of files this file imports.
46
+ """
47
+
48
+ path: str
49
+ language: str = ""
50
+ imports: list[str] = field(default_factory=list)
51
+ resolved_imports: list[str] = field(default_factory=list)
52
+ importers: list[str] = field(default_factory=list)
53
+ pagerank: float = 0.0
54
+ in_degree: int = 0
55
+ out_degree: int = 0
56
+
57
+
58
+ class DependencyGraph:
59
+ """Analyzes file-level dependencies and computes architectural importance.
60
+
61
+ This class builds a directed graph where nodes are files and edges represent
62
+ import relationships. It uses PageRank to compute "Architectural Importance"
63
+ scores, which are superior to simple import counting because:
64
+
65
+ 1. **Transitive propagation**: If file A is imported by B and C, and B/C are
66
+ themselves highly important (imported by many important files), A gets
67
+ a higher score than if B/C were leaf nodes.
68
+
69
+ 2. **Hub detection**: Identifies true architectural hubs - files that are
70
+ central to the codebase structure, not just frequently imported utilities.
71
+
72
+ 3. **Noise resistance**: A file imported by many trivial test files won't
73
+ rank as high as one imported by core business logic modules.
74
+
75
+ Attributes:
76
+ root: Absolute path to the project root.
77
+ graph: NetworkX DiGraph representing file dependencies.
78
+ nodes: Dict mapping file paths to FileNode objects.
79
+ file_index: Index for fast import resolution.
80
+ module_name: Detected module name (from go.mod, pyproject.toml, etc.).
81
+
82
+ Example:
83
+ >>> dg = DependencyGraph('/my/project')
84
+ >>> dg.build()
85
+ >>>
86
+ >>> # Get top 10 most important files
87
+ >>> critical = dg.get_critical_paths(top_n=10)
88
+ >>>
89
+ >>> # Check if a specific file is a hub
90
+ >>> if dg.is_hub('src/core/config.py'):
91
+ ... print("config.py is architecturally critical!")
92
+ >>>
93
+ >>> # Get all connected files
94
+ >>> connected = dg.get_connected_files('src/main.py')
95
+ """
96
+
97
+ # Supported file extensions by language
98
+ LANGUAGE_EXTENSIONS = {
99
+ "python": [".py"],
100
+ "javascript": [".js", ".jsx", ".mjs"],
101
+ "typescript": [".ts", ".tsx"],
102
+ "go": [".go"],
103
+ "rust": [".rs"],
104
+ "java": [".java"],
105
+ "ruby": [".rb"],
106
+ }
107
+
108
+ # Directories to ignore
109
+ IGNORED_DIRS = {
110
+ "node_modules",
111
+ "__pycache__",
112
+ ".git",
113
+ ".svn",
114
+ "venv",
115
+ "env",
116
+ ".env",
117
+ "dist",
118
+ "build",
119
+ ".next",
120
+ "coverage",
121
+ "vendor",
122
+ "target",
123
+ ".tox",
124
+ "eggs",
125
+ ".pytest_cache",
126
+ ".mypy_cache",
127
+ ".ruff_cache",
128
+ }
129
+
130
+ # PageRank parameters
131
+ DEFAULT_DAMPING = 0.85 # Standard damping factor
132
+ DEFAULT_MAX_ITER = 100 # Maximum iterations for convergence
133
+ DEFAULT_TOL = 1e-06 # Convergence tolerance
134
+
135
+ def __init__(self, root: str, damping: float | None = None):
136
+ """Initialize the dependency graph analyzer.
137
+
138
+ Args:
139
+ root: Path to the project root directory.
140
+ damping: PageRank damping factor (default: 0.85).
141
+ Higher values give more weight to direct imports.
142
+
143
+ Raises:
144
+ ImportError: If networkx is not installed.
145
+ ValueError: If root path doesn't exist.
146
+ """
147
+ if not HAS_NETWORKX:
148
+ raise ImportError(
149
+ "networkx is required for DependencyGraph. " "Install with: pip install networkx"
150
+ )
151
+
152
+ self.root = Path(root).resolve()
153
+ if not self.root.exists():
154
+ raise ValueError(f"Root path does not exist: {self.root}")
155
+
156
+ self.damping = damping or self.DEFAULT_DAMPING
157
+ self.graph: nx.DiGraph = nx.DiGraph()
158
+ self.nodes: dict[str, FileNode] = {}
159
+ self.file_index: dict[str, dict[str, list[str]]] = {} # index type -> key -> file paths
160
+ self.module_name: str = ""
161
+ self._built = False
162
+
163
+ def build(self, languages: list[str] | None = None) -> "DependencyGraph":
164
+ """Scan the project and build the dependency graph.
165
+
166
+ Args:
167
+ languages: List of languages to include (default: all supported).
168
+
169
+ Returns:
170
+ self, for method chaining.
171
+
172
+ Example:
173
+ >>> dg = DependencyGraph('/project').build()
174
+ >>> dg = DependencyGraph('/project').build(languages=['python', 'typescript'])
175
+ """
176
+ # Detect module/package name
177
+ self.module_name = self._detect_module_name()
178
+
179
+ # Scan all source files
180
+ files = self._scan_files(languages)
181
+
182
+ # Build file index for fast import resolution
183
+ self._build_file_index(files)
184
+
185
+ # Extract imports from each file
186
+ for file_path in files:
187
+ self._analyze_file(file_path)
188
+
189
+ # Resolve imports to actual files
190
+ self._resolve_all_imports()
191
+
192
+ # Build the NetworkX graph
193
+ self._build_networkx_graph()
194
+
195
+ # Compute PageRank scores
196
+ self._compute_pagerank()
197
+
198
+ self._built = True
199
+ return self
200
+
201
+ def _detect_module_name(self) -> str:
202
+ """Detect the module/package name from config files."""
203
+ # Try go.mod
204
+ go_mod = self.root / "go.mod"
205
+ if go_mod.exists():
206
+ try:
207
+ content = go_mod.read_text()
208
+ for line in content.splitlines():
209
+ if line.startswith("module "):
210
+ return line.split()[1]
211
+ except Exception:
212
+ pass
213
+
214
+ # Try pyproject.toml
215
+ pyproject = self.root / "pyproject.toml"
216
+ if pyproject.exists():
217
+ try:
218
+ content = pyproject.read_text()
219
+ # Simple regex for [project] name or [tool.poetry] name
220
+ match = re.search(r'name\s*=\s*["\']([^"\']+)["\']', content)
221
+ if match:
222
+ return match.group(1)
223
+ except Exception:
224
+ pass
225
+
226
+ # Try package.json
227
+ package_json = self.root / "package.json"
228
+ if package_json.exists():
229
+ try:
230
+ import json
231
+
232
+ data = json.loads(package_json.read_text())
233
+ name: str = data.get("name", "")
234
+ return name
235
+ except Exception:
236
+ pass
237
+
238
+ # Fallback to directory name
239
+ return self.root.name
240
+
241
+ def _scan_files(self, languages: list[str] | None = None) -> list[str]:
242
+ """Scan directory for source files.
243
+
244
+ Args:
245
+ languages: Filter by languages (None = all).
246
+
247
+ Returns:
248
+ List of relative file paths.
249
+ """
250
+ files = []
251
+ extensions = set()
252
+
253
+ if languages:
254
+ for lang in languages:
255
+ extensions.update(self.LANGUAGE_EXTENSIONS.get(lang, []))
256
+ else:
257
+ for exts in self.LANGUAGE_EXTENSIONS.values():
258
+ extensions.update(exts)
259
+
260
+ for dirpath, dirnames, filenames in os.walk(self.root):
261
+ # Filter out ignored directories in-place
262
+ dirnames[:] = [d for d in dirnames if d not in self.IGNORED_DIRS]
263
+
264
+ for filename in filenames:
265
+ if any(filename.endswith(ext) for ext in extensions):
266
+ full_path = Path(dirpath) / filename
267
+ rel_path = str(full_path.relative_to(self.root))
268
+ files.append(rel_path)
269
+
270
+ return files
271
+
272
+ def _build_file_index(self, files: list[str]) -> None:
273
+ """Build multi-key index for fast import resolution.
274
+
275
+ Creates indexes by:
276
+ - Exact path
277
+ - Path without extension
278
+ - All path suffixes (for nested packages)
279
+ - Directory (for package imports)
280
+ """
281
+ self.file_index = {
282
+ "exact": {}, # exact path -> [files]
283
+ "no_ext": {}, # path without extension -> [files]
284
+ "suffix": {}, # path suffix -> [files]
285
+ "dir": {}, # directory -> [files]
286
+ "basename": {}, # just filename -> [files]
287
+ }
288
+
289
+ for path in files:
290
+ # Exact match
291
+ self._add_to_index("exact", path, path)
292
+
293
+ # Without extension
294
+ no_ext = str(Path(path).with_suffix(""))
295
+ self._add_to_index("no_ext", no_ext, path)
296
+
297
+ # Basename
298
+ basename = Path(path).stem
299
+ self._add_to_index("basename", basename, path)
300
+
301
+ # Directory
302
+ dir_path = str(Path(path).parent)
303
+ if dir_path != ".":
304
+ self._add_to_index("dir", dir_path, path)
305
+
306
+ # All suffixes (for nested package resolution)
307
+ # e.g., "src/core/config.py" indexed as:
308
+ # - "core/config.py"
309
+ # - "config.py"
310
+ parts = Path(path).parts
311
+ for i in range(1, len(parts)):
312
+ suffix = str(Path(*parts[i:]))
313
+ self._add_to_index("suffix", suffix, path)
314
+ # Also without extension
315
+ suffix_no_ext = str(Path(*parts[i:]).with_suffix(""))
316
+ self._add_to_index("suffix", suffix_no_ext, path)
317
+
318
+ def _add_to_index(self, index_type: str, key: str, path: str) -> None:
319
+ """Add a path to a specific index."""
320
+ if key not in self.file_index[index_type]:
321
+ self.file_index[index_type][key] = []
322
+ if path not in self.file_index[index_type][key]:
323
+ self.file_index[index_type][key].append(path)
324
+
325
+ def _analyze_file(self, rel_path: str) -> None:
326
+ """Extract imports from a single file."""
327
+ full_path = self.root / rel_path
328
+ language = self._detect_language(rel_path)
329
+
330
+ node = FileNode(path=rel_path, language=language)
331
+
332
+ try:
333
+ content = full_path.read_text(encoding="utf-8", errors="ignore")
334
+
335
+ if language == "python":
336
+ node.imports = self._extract_python_imports(content)
337
+ elif language in ("javascript", "typescript"):
338
+ node.imports = self._extract_js_ts_imports(content)
339
+ elif language == "go":
340
+ node.imports = self._extract_go_imports(content)
341
+ elif language == "rust":
342
+ node.imports = self._extract_rust_imports(content)
343
+ else:
344
+ node.imports = self._extract_generic_imports(content)
345
+
346
+ except Exception:
347
+ pass # Skip files we can't read
348
+
349
+ self.nodes[rel_path] = node
350
+
351
+ def _detect_language(self, path: str) -> str:
352
+ """Detect language from file extension."""
353
+ ext = Path(path).suffix.lower()
354
+ for lang, extensions in self.LANGUAGE_EXTENSIONS.items():
355
+ if ext in extensions:
356
+ return lang
357
+ return ""
358
+
359
+ def _extract_python_imports(self, content: str) -> list[str]:
360
+ """Extract imports from Python code using AST."""
361
+ imports = []
362
+ try:
363
+ tree = ast.parse(content)
364
+ for node in ast.walk(tree):
365
+ if isinstance(node, ast.Import):
366
+ for alias in node.names:
367
+ imports.append(alias.name)
368
+ elif isinstance(node, ast.ImportFrom):
369
+ module = node.module or ""
370
+ if node.level > 0: # Relative import
371
+ imports.append("." * node.level + module)
372
+ else:
373
+ imports.append(module)
374
+ except SyntaxError:
375
+ pass
376
+ return imports
377
+
378
+ def _extract_js_ts_imports(self, content: str) -> list[str]:
379
+ """Extract imports from JavaScript/TypeScript code."""
380
+ imports = []
381
+ # Match: import ... from 'path' or require('path')
382
+ patterns = [
383
+ r'import\s+.*?\s+from\s+[\'"]([^\'"]+)[\'"]',
384
+ r'import\s+[\'"]([^\'"]+)[\'"]',
385
+ r'require\s*\(\s*[\'"]([^\'"]+)[\'"]\s*\)',
386
+ r'export\s+.*?\s+from\s+[\'"]([^\'"]+)[\'"]',
387
+ ]
388
+ for pattern in patterns:
389
+ imports.extend(re.findall(pattern, content))
390
+ return imports
391
+
392
+ def _extract_go_imports(self, content: str) -> list[str]:
393
+ """Extract imports from Go code."""
394
+ imports = []
395
+ # Match single import: import "path"
396
+ imports.extend(re.findall(r'import\s+"([^"]+)"', content))
397
+ # Match grouped imports: import ( "path1" "path2" )
398
+ block_match = re.search(r"import\s*\((.*?)\)", content, re.DOTALL)
399
+ if block_match:
400
+ imports.extend(re.findall(r'"([^"]+)"', block_match.group(1)))
401
+ return imports
402
+
403
+ def _extract_rust_imports(self, content: str) -> list[str]:
404
+ """Extract imports from Rust code."""
405
+ imports = []
406
+ # Match: use crate::path, use super::path, use path
407
+ imports.extend(re.findall(r"use\s+([\w:]+)", content))
408
+ # Match: mod name
409
+ imports.extend(re.findall(r"mod\s+(\w+)", content))
410
+ return imports
411
+
412
+ def _extract_generic_imports(self, content: str) -> list[str]:
413
+ """Fallback import extraction using common patterns."""
414
+ imports = []
415
+ patterns = [
416
+ r'import\s+[\'"]([^\'"]+)[\'"]',
417
+ r'require\s*[\'"]([^\'"]+)[\'"]',
418
+ r'from\s+[\'"]([^\'"]+)[\'"]',
419
+ ]
420
+ for pattern in patterns:
421
+ imports.extend(re.findall(pattern, content))
422
+ return imports
423
+
424
+ def _resolve_all_imports(self) -> None:
425
+ """Resolve import strings to actual file paths."""
426
+ for path, node in self.nodes.items():
427
+ resolved = []
428
+ for imp in node.imports:
429
+ files = self._resolve_import(imp, path, node.language)
430
+ # Only count single-file resolutions (not package imports)
431
+ if len(files) == 1:
432
+ resolved.append(files[0])
433
+
434
+ node.resolved_imports = list(set(resolved))
435
+
436
+ # Build reverse map (importers)
437
+ for imported_file in node.resolved_imports:
438
+ if imported_file in self.nodes:
439
+ self.nodes[imported_file].importers.append(path)
440
+
441
+ def _resolve_import(self, imp: str, from_file: str, language: str) -> list[str]:
442
+ """Resolve an import string to file path(s).
443
+
444
+ Uses multiple strategies in order:
445
+ 1. Relative path resolution (./foo, ../bar)
446
+ 2. Module-prefixed path (for Go/Python internal packages)
447
+ 3. Exact match
448
+ 4. Suffix match
449
+ """
450
+ # Normalize the import
451
+ normalized = self._normalize_import(imp, language)
452
+ from_dir = str(Path(from_file).parent)
453
+
454
+ # Strategy 1: Relative imports
455
+ if imp.startswith("."):
456
+ return self._resolve_relative_import(imp, from_dir)
457
+
458
+ # Strategy 2: Module-prefixed (internal package)
459
+ if self.module_name and imp.startswith(self.module_name):
460
+ rest = imp[len(self.module_name) :].lstrip("/.")
461
+ candidates = self._try_exact_match(rest)
462
+ if candidates:
463
+ return candidates
464
+
465
+ # Strategy 3: Exact match
466
+ candidates = self._try_exact_match(normalized)
467
+ if candidates:
468
+ return candidates
469
+
470
+ # Strategy 4: Suffix match
471
+ candidates = self._try_suffix_match(normalized)
472
+ if candidates:
473
+ return candidates
474
+
475
+ return []
476
+
477
+ def _normalize_import(self, imp: str, language: str) -> str:
478
+ """Convert import syntax to a path-like format."""
479
+ imp = imp.strip("\"'`")
480
+
481
+ # Python dots to slashes: app.core.config -> app/core/config
482
+ if language == "python" and "." in imp and "/" not in imp:
483
+ if not imp.startswith("."):
484
+ imp = imp.replace(".", "/")
485
+
486
+ # Rust :: to slashes
487
+ if language == "rust":
488
+ if imp.startswith("crate::"):
489
+ imp = imp[7:].replace("::", "/")
490
+ elif "::" in imp:
491
+ imp = imp.replace("::", "/")
492
+
493
+ return imp
494
+
495
+ def _resolve_relative_import(self, imp: str, from_dir: str) -> list[str]:
496
+ """Resolve ./foo or ../bar style imports."""
497
+ # Count parent levels
498
+ levels = 0
499
+ rest = imp
500
+ while rest.startswith(".."):
501
+ levels += 1
502
+ rest = rest[2:].lstrip("/")
503
+ rest = rest.lstrip("./")
504
+
505
+ # Navigate up
506
+ target_dir = Path(from_dir)
507
+ for _ in range(levels):
508
+ target_dir = target_dir.parent
509
+
510
+ # Build candidate path
511
+ if str(target_dir) == ".":
512
+ candidate = rest
513
+ else:
514
+ candidate = str(target_dir / rest)
515
+
516
+ return self._try_exact_match(candidate)
517
+
518
+ def _try_exact_match(self, path: str) -> list[str]:
519
+ """Try to match path exactly (with common extensions)."""
520
+ extensions = [
521
+ "",
522
+ ".py",
523
+ ".js",
524
+ ".ts",
525
+ ".tsx",
526
+ ".jsx",
527
+ ".go",
528
+ ".rs",
529
+ "/index.js",
530
+ "/index.ts",
531
+ "/index.tsx",
532
+ "/__init__.py",
533
+ "/mod.rs",
534
+ ]
535
+
536
+ for ext in extensions:
537
+ candidate = path + ext
538
+ if candidate in self.file_index["exact"]:
539
+ return self.file_index["exact"][candidate]
540
+ if candidate in self.file_index["no_ext"]:
541
+ return self.file_index["no_ext"][candidate]
542
+
543
+ return []
544
+
545
+ def _try_suffix_match(self, normalized: str) -> list[str]:
546
+ """Find files where path ends with normalized import."""
547
+ extensions = ["", ".py", ".js", ".ts", ".tsx", ".jsx", ".go", ".rs"]
548
+
549
+ for ext in extensions:
550
+ candidate = normalized + ext
551
+ if candidate in self.file_index["suffix"]:
552
+ files = self.file_index["suffix"][candidate]
553
+ if len(files) == 1:
554
+ return files
555
+
556
+ return []
557
+
558
+ def _build_networkx_graph(self) -> None:
559
+ """Build the NetworkX DiGraph from resolved imports."""
560
+ self.graph.clear()
561
+
562
+ # Add all nodes
563
+ for path in self.nodes:
564
+ self.graph.add_node(path)
565
+
566
+ # Add edges (importer -> imported)
567
+ # Direction: A imports B means edge from A to B
568
+ # PageRank will give higher scores to nodes with many incoming edges
569
+ for path, node in self.nodes.items():
570
+ for imported_file in node.resolved_imports:
571
+ if imported_file in self.nodes:
572
+ self.graph.add_edge(path, imported_file)
573
+
574
+ # Update degree stats
575
+ for path, node in self.nodes.items():
576
+ node.in_degree = self.graph.in_degree(path)
577
+ node.out_degree = self.graph.out_degree(path)
578
+
579
+ def _compute_pagerank(self) -> None:
580
+ """Compute PageRank scores for all nodes."""
581
+ if len(self.graph) == 0:
582
+ return
583
+
584
+ try:
585
+ scores = nx.pagerank(
586
+ self.graph,
587
+ alpha=self.damping,
588
+ max_iter=self.DEFAULT_MAX_ITER,
589
+ tol=self.DEFAULT_TOL,
590
+ )
591
+
592
+ for path, score in scores.items():
593
+ if path in self.nodes:
594
+ self.nodes[path].pagerank = score
595
+
596
+ except (nx.NetworkXError, ImportError):
597
+ # Graph has issues (e.g., no edges), or networkx's pagerank backend
598
+ # (numpy/scipy) is not installed — assign uniform scores.
599
+ uniform = 1.0 / max(len(self.nodes), 1)
600
+ for node in self.nodes.values():
601
+ node.pagerank = uniform
602
+
603
+ def get_critical_paths(self, top_n: int = 10) -> list[tuple[str, float]]:
604
+ """Get the top N architecturally important files.
605
+
606
+ Returns files ranked by PageRank score, which represents their
607
+ "Architectural Importance" - how central they are to the codebase
608
+ structure, considering transitive dependencies.
609
+
610
+ Args:
611
+ top_n: Number of top files to return.
612
+
613
+ Returns:
614
+ List of (file_path, pagerank_score) tuples, sorted by score descending.
615
+
616
+ Example:
617
+ >>> critical = dg.get_critical_paths(top_n=5)
618
+ >>> for path, score in critical:
619
+ ... print(f"{path}: {score:.4f}")
620
+ src/core/config.py: 0.0842
621
+ src/utils/helpers.py: 0.0654
622
+ src/db/connection.py: 0.0521
623
+ """
624
+ if not self._built:
625
+ raise RuntimeError("Graph not built. Call build() first.")
626
+
627
+ ranked = sorted(
628
+ [(path, node.pagerank) for path, node in self.nodes.items()],
629
+ key=lambda x: x[1],
630
+ reverse=True,
631
+ )
632
+
633
+ return ranked[:top_n]
634
+
635
+ def is_hub(self, path: str, threshold: int = 3) -> bool:
636
+ """Check if a file is a hub (imported by many files).
637
+
638
+ Args:
639
+ path: Relative file path.
640
+ threshold: Minimum number of importers to be considered a hub.
641
+
642
+ Returns:
643
+ True if the file has >= threshold importers.
644
+ """
645
+ if path not in self.nodes:
646
+ return False
647
+ return self.nodes[path].in_degree >= threshold
648
+
649
+ def get_hub_files(self, threshold: int = 3) -> list[str]:
650
+ """Get all files that are imported by >= threshold other files.
651
+
652
+ Args:
653
+ threshold: Minimum importers to qualify as hub.
654
+
655
+ Returns:
656
+ List of file paths that are hubs.
657
+ """
658
+ return [path for path, node in self.nodes.items() if node.in_degree >= threshold]
659
+
660
+ def get_connected_files(self, path: str) -> list[str]:
661
+ """Get all files connected to the given file (imports + importers).
662
+
663
+ Args:
664
+ path: Relative file path.
665
+
666
+ Returns:
667
+ List of connected file paths.
668
+ """
669
+ if path not in self.nodes:
670
+ return []
671
+
672
+ node = self.nodes[path]
673
+ connected = set(node.resolved_imports) | set(node.importers)
674
+ connected.discard(path)
675
+ return list(connected)
676
+
677
+ def get_dependency_chain(self, path: str, depth: int = 3) -> dict[str, Any]:
678
+ """Get dependency chain (what this file imports, recursively).
679
+
680
+ Args:
681
+ path: Starting file path.
682
+ depth: Maximum depth to traverse.
683
+
684
+ Returns:
685
+ Nested dict representing the dependency tree.
686
+ """
687
+
688
+ def _build_chain(current: str, remaining_depth: int, seen: set[str]) -> dict:
689
+ if remaining_depth <= 0 or current in seen or current not in self.nodes:
690
+ return {}
691
+
692
+ seen.add(current)
693
+ node = self.nodes[current]
694
+
695
+ return {
696
+ "imports": {
697
+ dep: _build_chain(dep, remaining_depth - 1, seen.copy())
698
+ for dep in node.resolved_imports
699
+ if dep in self.nodes
700
+ }
701
+ }
702
+
703
+ return {path: _build_chain(path, depth, set())}
704
+
705
+ def get_importers_chain(self, path: str, depth: int = 3) -> dict[str, Any]:
706
+ """Get reverse dependency chain (what imports this file, recursively).
707
+
708
+ Args:
709
+ path: Starting file path.
710
+ depth: Maximum depth to traverse.
711
+
712
+ Returns:
713
+ Nested dict representing who imports this file.
714
+ """
715
+
716
+ def _build_chain(current: str, remaining_depth: int, seen: set[str]) -> dict:
717
+ if remaining_depth <= 0 or current in seen or current not in self.nodes:
718
+ return {}
719
+
720
+ seen.add(current)
721
+ node = self.nodes[current]
722
+
723
+ return {
724
+ "imported_by": {
725
+ imp: _build_chain(imp, remaining_depth - 1, seen.copy())
726
+ for imp in node.importers
727
+ if imp in self.nodes
728
+ }
729
+ }
730
+
731
+ return {path: _build_chain(path, depth, set())}
732
+
733
+ def get_stats(self) -> dict[str, Any]:
734
+ """Get statistics about the dependency graph.
735
+
736
+ Returns:
737
+ Dict with graph statistics.
738
+ """
739
+ if not self._built:
740
+ return {"error": "Graph not built"}
741
+
742
+ hub_files = self.get_hub_files()
743
+
744
+ return {
745
+ "total_files": len(self.nodes),
746
+ "total_edges": self.graph.number_of_edges(),
747
+ "hub_files": len(hub_files),
748
+ "avg_imports_per_file": (
749
+ sum(n.out_degree for n in self.nodes.values()) / max(len(self.nodes), 1)
750
+ ),
751
+ "avg_importers_per_file": (
752
+ sum(n.in_degree for n in self.nodes.values()) / max(len(self.nodes), 1)
753
+ ),
754
+ "languages": dict(self._count_by_language()),
755
+ "isolated_files": len(
756
+ [n for n in self.nodes.values() if n.in_degree == 0 and n.out_degree == 0]
757
+ ),
758
+ }
759
+
760
+ def _count_by_language(self) -> dict[str, int]:
761
+ """Count files by language."""
762
+ counts: dict[str, int] = {}
763
+ for node in self.nodes.values():
764
+ lang = node.language or "unknown"
765
+ counts[lang] = counts.get(lang, 0) + 1
766
+ return counts
767
+
768
+ def to_dict(self) -> dict[str, Any]:
769
+ """Export the graph as a serializable dictionary.
770
+
771
+ Returns:
772
+ Dict that can be serialized to JSON.
773
+ """
774
+ return {
775
+ "root": str(self.root),
776
+ "module": self.module_name,
777
+ "stats": self.get_stats(),
778
+ "critical_paths": self.get_critical_paths(top_n=20),
779
+ "nodes": {
780
+ path: {
781
+ "language": node.language,
782
+ "pagerank": node.pagerank,
783
+ "in_degree": node.in_degree,
784
+ "out_degree": node.out_degree,
785
+ "imports": node.resolved_imports,
786
+ "importers": node.importers,
787
+ }
788
+ for path, node in self.nodes.items()
789
+ },
790
+ }
791
+
792
+
793
+ def analyze_repository(root: str, top_n: int = 10) -> dict[str, Any]:
794
+ """Convenience function to analyze a repository.
795
+
796
+ Args:
797
+ root: Path to repository root.
798
+ top_n: Number of critical paths to return.
799
+
800
+ Returns:
801
+ Analysis results including critical paths and statistics.
802
+
803
+ Example:
804
+ >>> results = analyze_repository('/my/project')
805
+ >>> print(results['critical_paths'])
806
+ """
807
+ graph = DependencyGraph(root)
808
+ graph.build()
809
+
810
+ return {
811
+ "critical_paths": graph.get_critical_paths(top_n=top_n),
812
+ "hub_files": graph.get_hub_files(),
813
+ "stats": graph.get_stats(),
814
+ }