mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_vector_search/__init__.py +3 -3
- mcp_vector_search/analysis/__init__.py +111 -0
- mcp_vector_search/analysis/baseline/__init__.py +68 -0
- mcp_vector_search/analysis/baseline/comparator.py +462 -0
- mcp_vector_search/analysis/baseline/manager.py +621 -0
- mcp_vector_search/analysis/collectors/__init__.py +74 -0
- mcp_vector_search/analysis/collectors/base.py +164 -0
- mcp_vector_search/analysis/collectors/cohesion.py +463 -0
- mcp_vector_search/analysis/collectors/complexity.py +743 -0
- mcp_vector_search/analysis/collectors/coupling.py +1162 -0
- mcp_vector_search/analysis/collectors/halstead.py +514 -0
- mcp_vector_search/analysis/collectors/smells.py +325 -0
- mcp_vector_search/analysis/debt.py +516 -0
- mcp_vector_search/analysis/interpretation.py +685 -0
- mcp_vector_search/analysis/metrics.py +414 -0
- mcp_vector_search/analysis/reporters/__init__.py +7 -0
- mcp_vector_search/analysis/reporters/console.py +646 -0
- mcp_vector_search/analysis/reporters/markdown.py +480 -0
- mcp_vector_search/analysis/reporters/sarif.py +377 -0
- mcp_vector_search/analysis/storage/__init__.py +93 -0
- mcp_vector_search/analysis/storage/metrics_store.py +762 -0
- mcp_vector_search/analysis/storage/schema.py +245 -0
- mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
- mcp_vector_search/analysis/trends.py +308 -0
- mcp_vector_search/analysis/visualizer/__init__.py +90 -0
- mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
- mcp_vector_search/analysis/visualizer/exporter.py +484 -0
- mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
- mcp_vector_search/analysis/visualizer/schemas.py +525 -0
- mcp_vector_search/cli/commands/analyze.py +1062 -0
- mcp_vector_search/cli/commands/chat.py +1455 -0
- mcp_vector_search/cli/commands/index.py +621 -5
- mcp_vector_search/cli/commands/index_background.py +467 -0
- mcp_vector_search/cli/commands/init.py +13 -0
- mcp_vector_search/cli/commands/install.py +597 -335
- mcp_vector_search/cli/commands/install_old.py +8 -4
- mcp_vector_search/cli/commands/mcp.py +78 -6
- mcp_vector_search/cli/commands/reset.py +68 -26
- mcp_vector_search/cli/commands/search.py +224 -8
- mcp_vector_search/cli/commands/setup.py +1184 -0
- mcp_vector_search/cli/commands/status.py +339 -5
- mcp_vector_search/cli/commands/uninstall.py +276 -357
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +292 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +600 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
- mcp_vector_search/cli/didyoumean.py +27 -2
- mcp_vector_search/cli/main.py +127 -160
- mcp_vector_search/cli/output.py +158 -13
- mcp_vector_search/config/__init__.py +4 -0
- mcp_vector_search/config/default_thresholds.yaml +52 -0
- mcp_vector_search/config/settings.py +12 -0
- mcp_vector_search/config/thresholds.py +273 -0
- mcp_vector_search/core/__init__.py +16 -0
- mcp_vector_search/core/auto_indexer.py +3 -3
- mcp_vector_search/core/boilerplate.py +186 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/database.py +406 -94
- mcp_vector_search/core/embeddings.py +24 -0
- mcp_vector_search/core/exceptions.py +11 -0
- mcp_vector_search/core/git.py +380 -0
- mcp_vector_search/core/git_hooks.py +4 -4
- mcp_vector_search/core/indexer.py +632 -54
- mcp_vector_search/core/llm_client.py +756 -0
- mcp_vector_search/core/models.py +91 -1
- mcp_vector_search/core/project.py +17 -0
- mcp_vector_search/core/relationships.py +473 -0
- mcp_vector_search/core/scheduler.py +11 -11
- mcp_vector_search/core/search.py +179 -29
- mcp_vector_search/mcp/server.py +819 -9
- mcp_vector_search/parsers/python.py +285 -5
- mcp_vector_search/utils/__init__.py +2 -0
- mcp_vector_search/utils/gitignore.py +0 -3
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +66 -4
- mcp_vector_search/utils/timing.py +10 -6
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
- mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
- mcp_vector_search/cli/commands/visualize.py +0 -1467
- mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,1162 @@
|
|
|
1
|
+
"""Coupling metric collectors for structural code analysis.
|
|
2
|
+
|
|
3
|
+
This module provides collectors for measuring coupling metrics:
|
|
4
|
+
- EfferentCouplingCollector: Counts outgoing dependencies (imports from this file)
|
|
5
|
+
- AfferentCouplingCollector: Counts incoming dependencies (files that import this file)
|
|
6
|
+
- InstabilityCalculator: Calculates instability metrics across the project
|
|
7
|
+
- CircularDependencyDetector: Detects circular/cyclic dependencies in import graph
|
|
8
|
+
|
|
9
|
+
Coupling metrics help identify architectural dependencies and potential refactoring needs.
|
|
10
|
+
Circular dependencies can lead to:
|
|
11
|
+
- Initialization issues and import errors
|
|
12
|
+
- Tight coupling and reduced maintainability
|
|
13
|
+
- Difficulty in testing and refactoring
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import sys
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import TYPE_CHECKING, Any
|
|
23
|
+
|
|
24
|
+
from .base import CollectorContext, MetricCollector
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from tree_sitter import Node
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# =============================================================================
|
|
31
|
+
# Circular Dependency Detection Types
|
|
32
|
+
# =============================================================================
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class NodeColor(Enum):
|
|
36
|
+
"""Node colors for DFS-based cycle detection.
|
|
37
|
+
|
|
38
|
+
Standard graph coloring algorithm:
|
|
39
|
+
- WHITE: Node not yet visited
|
|
40
|
+
- GRAY: Node currently being processed (in current DFS path)
|
|
41
|
+
- BLACK: Node fully processed (all descendants visited)
|
|
42
|
+
|
|
43
|
+
Cycle detection: If we encounter a GRAY node during DFS, we've found a cycle.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
WHITE = "white" # Unvisited
|
|
47
|
+
GRAY = "gray" # In current path (cycle if revisited)
|
|
48
|
+
BLACK = "black" # Fully processed
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class ImportGraph:
|
|
53
|
+
"""Directed graph representing import dependencies between files.
|
|
54
|
+
|
|
55
|
+
Nodes represent files, edges represent import relationships.
|
|
56
|
+
An edge from A to B means "A imports B".
|
|
57
|
+
|
|
58
|
+
Attributes:
|
|
59
|
+
adjacency_list: Maps file paths to list of files they import
|
|
60
|
+
|
|
61
|
+
Example:
|
|
62
|
+
graph = ImportGraph()
|
|
63
|
+
graph.add_edge("main.py", "utils.py")
|
|
64
|
+
graph.add_edge("utils.py", "helpers.py")
|
|
65
|
+
# main.py → utils.py → helpers.py
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
adjacency_list: dict[str, list[str]] = field(default_factory=dict)
|
|
69
|
+
|
|
70
|
+
def add_edge(self, from_file: str, to_file: str) -> None:
|
|
71
|
+
"""Add directed edge from from_file to to_file (from_file imports to_file).
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
from_file: Source file that contains the import
|
|
75
|
+
to_file: Target file being imported
|
|
76
|
+
"""
|
|
77
|
+
if from_file not in self.adjacency_list:
|
|
78
|
+
self.adjacency_list[from_file] = []
|
|
79
|
+
if to_file not in self.adjacency_list[from_file]:
|
|
80
|
+
self.adjacency_list[from_file].append(to_file)
|
|
81
|
+
|
|
82
|
+
def add_node(self, file_path: str) -> None:
|
|
83
|
+
"""Add node (file) to graph without any edges.
|
|
84
|
+
|
|
85
|
+
Useful for ensuring isolated files are tracked.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
file_path: Path to file to add as node
|
|
89
|
+
"""
|
|
90
|
+
if file_path not in self.adjacency_list:
|
|
91
|
+
self.adjacency_list[file_path] = []
|
|
92
|
+
|
|
93
|
+
def get_neighbors(self, file_path: str) -> list[str]:
|
|
94
|
+
"""Get list of files that file_path imports.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
file_path: File to get imports for
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
List of files imported by file_path
|
|
101
|
+
"""
|
|
102
|
+
return self.adjacency_list.get(file_path, [])
|
|
103
|
+
|
|
104
|
+
def get_all_files(self) -> list[str]:
|
|
105
|
+
"""Get all files in the graph.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
List of all file paths (nodes) in the graph
|
|
109
|
+
"""
|
|
110
|
+
# Include both keys and values to catch files that are imported but don't import anything
|
|
111
|
+
all_files = set(self.adjacency_list.keys())
|
|
112
|
+
for imports in self.adjacency_list.values():
|
|
113
|
+
all_files.update(imports)
|
|
114
|
+
return sorted(all_files)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class CircularDependency:
|
|
119
|
+
"""Represents a detected circular dependency cycle.
|
|
120
|
+
|
|
121
|
+
Attributes:
|
|
122
|
+
cycle_chain: List of files forming the cycle (first == last)
|
|
123
|
+
cycle_length: Number of unique files in cycle
|
|
124
|
+
|
|
125
|
+
Example:
|
|
126
|
+
cycle = CircularDependency(
|
|
127
|
+
cycle_chain=["a.py", "b.py", "c.py", "a.py"]
|
|
128
|
+
)
|
|
129
|
+
assert cycle.cycle_length == 3
|
|
130
|
+
assert cycle.format_chain() == "a.py → b.py → c.py → a.py"
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
cycle_chain: list[str]
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def cycle_length(self) -> int:
|
|
137
|
+
"""Number of unique files in cycle (excluding duplicate start/end)."""
|
|
138
|
+
return len(self.cycle_chain) - 1 if len(self.cycle_chain) > 1 else 0
|
|
139
|
+
|
|
140
|
+
def format_chain(self) -> str:
|
|
141
|
+
"""Format cycle as human-readable chain with arrows.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Formatted cycle string (e.g., "A → B → C → A")
|
|
145
|
+
"""
|
|
146
|
+
return " → ".join(self.cycle_chain)
|
|
147
|
+
|
|
148
|
+
def get_affected_files(self) -> list[str]:
|
|
149
|
+
"""Get unique list of files involved in this cycle.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Sorted list of unique file paths in cycle
|
|
153
|
+
"""
|
|
154
|
+
# Remove duplicate (last element equals first)
|
|
155
|
+
unique_files = (
|
|
156
|
+
set(self.cycle_chain[:-1])
|
|
157
|
+
if len(self.cycle_chain) > 1
|
|
158
|
+
else set(self.cycle_chain)
|
|
159
|
+
)
|
|
160
|
+
return sorted(unique_files)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class CircularDependencyDetector:
|
|
164
|
+
"""Detects circular dependencies in import graphs using DFS-based cycle detection.
|
|
165
|
+
|
|
166
|
+
Uses three-color DFS algorithm (Tarjan-inspired):
|
|
167
|
+
- WHITE: Unvisited node
|
|
168
|
+
- GRAY: Node in current DFS path (cycle if we revisit a GRAY node)
|
|
169
|
+
- BLACK: Fully processed node
|
|
170
|
+
|
|
171
|
+
This algorithm efficiently detects all elementary cycles in O(V+E) time.
|
|
172
|
+
|
|
173
|
+
Design Decisions:
|
|
174
|
+
- **Algorithm Choice**: DFS with color marking chosen over Tarjan's SCC because:
|
|
175
|
+
- Simpler implementation and easier to understand
|
|
176
|
+
- Directly provides cycle paths (not just strongly connected components)
|
|
177
|
+
- O(V+E) time complexity (same as Tarjan's)
|
|
178
|
+
- Better for reporting individual cycles to developers
|
|
179
|
+
|
|
180
|
+
- **Path Tracking**: Maintains explicit path stack during DFS to reconstruct cycles
|
|
181
|
+
- Enables user-friendly "A → B → C → A" output
|
|
182
|
+
- Memory overhead acceptable for typical codebases (<10K files)
|
|
183
|
+
|
|
184
|
+
- **Duplicate Cycle Handling**: Detects and reports all unique cycle instances
|
|
185
|
+
- Same cycle may be discovered multiple times from different starting points
|
|
186
|
+
- Deduplication handled by caller if needed
|
|
187
|
+
|
|
188
|
+
Trade-offs:
|
|
189
|
+
- **Simplicity vs. Optimization**: Chose simpler DFS over complex SCC algorithms
|
|
190
|
+
- Performance: Acceptable for codebases up to ~50K files
|
|
191
|
+
- Maintainability: Easier to debug and extend
|
|
192
|
+
- **Memory vs. Clarity**: Stores full path during DFS for clear error messages
|
|
193
|
+
- Alternative: Store only parent pointers (saves memory but harder to debug)
|
|
194
|
+
|
|
195
|
+
Example:
|
|
196
|
+
detector = CircularDependencyDetector(import_graph)
|
|
197
|
+
cycles = detector.detect_cycles()
|
|
198
|
+
|
|
199
|
+
if detector.has_cycles():
|
|
200
|
+
for cycle in cycles:
|
|
201
|
+
print(f"Cycle detected: {cycle.format_chain()}")
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
def __init__(self, import_graph: ImportGraph) -> None:
|
|
205
|
+
"""Initialize detector with import graph.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
import_graph: Graph of import dependencies to analyze
|
|
209
|
+
"""
|
|
210
|
+
self.graph = import_graph
|
|
211
|
+
self._cycles: list[CircularDependency] = []
|
|
212
|
+
self._colors: dict[str, NodeColor] = {}
|
|
213
|
+
self._path: list[str] = [] # Current DFS path for cycle reconstruction
|
|
214
|
+
|
|
215
|
+
def detect_cycles(self) -> list[CircularDependency]:
|
|
216
|
+
"""Detect all circular dependencies in the import graph.
|
|
217
|
+
|
|
218
|
+
Uses DFS with three-color marking:
|
|
219
|
+
1. WHITE: Node not yet visited
|
|
220
|
+
2. GRAY: Node in current DFS path (cycle if revisited)
|
|
221
|
+
3. BLACK: Node fully processed
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
List of CircularDependency objects for all detected cycles
|
|
225
|
+
|
|
226
|
+
Complexity:
|
|
227
|
+
Time: O(V + E) where V = files, E = import edges
|
|
228
|
+
Space: O(V) for color map and path stack
|
|
229
|
+
"""
|
|
230
|
+
self._cycles = []
|
|
231
|
+
self._colors = dict.fromkeys(self.graph.get_all_files(), NodeColor.WHITE)
|
|
232
|
+
self._path = []
|
|
233
|
+
|
|
234
|
+
# Run DFS from each unvisited node
|
|
235
|
+
for file in self.graph.get_all_files():
|
|
236
|
+
if self._colors[file] == NodeColor.WHITE:
|
|
237
|
+
self._dfs(file)
|
|
238
|
+
|
|
239
|
+
return self._cycles
|
|
240
|
+
|
|
241
|
+
def _dfs(self, node: str) -> None:
|
|
242
|
+
"""Depth-first search to detect cycles.
|
|
243
|
+
|
|
244
|
+
Core cycle detection logic:
|
|
245
|
+
- Mark node GRAY (in current path)
|
|
246
|
+
- Visit all neighbors
|
|
247
|
+
- If neighbor is GRAY → cycle detected (it's in current path)
|
|
248
|
+
- If neighbor is WHITE → recurse
|
|
249
|
+
- Mark node BLACK after processing all neighbors
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
node: Current file being visited
|
|
253
|
+
"""
|
|
254
|
+
self._colors[node] = NodeColor.GRAY
|
|
255
|
+
self._path.append(node)
|
|
256
|
+
|
|
257
|
+
# Visit all files that this file imports
|
|
258
|
+
for neighbor in self.graph.get_neighbors(node):
|
|
259
|
+
if self._colors[neighbor] == NodeColor.GRAY:
|
|
260
|
+
# Found cycle! Neighbor is in current path
|
|
261
|
+
self._record_cycle(neighbor)
|
|
262
|
+
elif self._colors[neighbor] == NodeColor.WHITE:
|
|
263
|
+
# Unvisited node, continue DFS
|
|
264
|
+
self._dfs(neighbor)
|
|
265
|
+
|
|
266
|
+
# Finished processing this node
|
|
267
|
+
self._path.pop()
|
|
268
|
+
self._colors[node] = NodeColor.BLACK
|
|
269
|
+
|
|
270
|
+
def _record_cycle(self, cycle_start: str) -> None:
|
|
271
|
+
"""Record detected cycle by extracting path from cycle_start to current node.
|
|
272
|
+
|
|
273
|
+
When we detect a cycle (encounter GRAY node), we extract the cycle from
|
|
274
|
+
the current DFS path stack.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
cycle_start: File where cycle begins (GRAY node we just encountered)
|
|
278
|
+
"""
|
|
279
|
+
# Find cycle_start in current path
|
|
280
|
+
try:
|
|
281
|
+
start_index = self._path.index(cycle_start)
|
|
282
|
+
except ValueError:
|
|
283
|
+
# Should not happen if algorithm is correct
|
|
284
|
+
return
|
|
285
|
+
|
|
286
|
+
# Extract cycle: [cycle_start, ..., current_node, cycle_start]
|
|
287
|
+
cycle_chain = self._path[start_index:] + [cycle_start]
|
|
288
|
+
self._cycles.append(CircularDependency(cycle_chain=cycle_chain))
|
|
289
|
+
|
|
290
|
+
def has_cycles(self) -> bool:
|
|
291
|
+
"""Check if any cycles were detected.
|
|
292
|
+
|
|
293
|
+
Note: Must call detect_cycles() first.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
True if cycles exist, False otherwise
|
|
297
|
+
"""
|
|
298
|
+
return len(self._cycles) > 0
|
|
299
|
+
|
|
300
|
+
def get_cycle_chains(self) -> list[str]:
|
|
301
|
+
"""Get human-readable cycle chains.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
List of formatted cycle strings (e.g., ["A → B → C → A"])
|
|
305
|
+
"""
|
|
306
|
+
return [cycle.format_chain() for cycle in self._cycles]
|
|
307
|
+
|
|
308
|
+
def get_affected_files(self) -> list[str]:
|
|
309
|
+
"""Get all unique files involved in any cycle.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
Sorted list of unique file paths involved in cycles
|
|
313
|
+
"""
|
|
314
|
+
affected = set()
|
|
315
|
+
for cycle in self._cycles:
|
|
316
|
+
affected.update(cycle.get_affected_files())
|
|
317
|
+
return sorted(affected)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def build_import_graph_from_dict(file_imports: dict[str, list[str]]) -> ImportGraph:
|
|
321
|
+
"""Build ImportGraph from dictionary of file imports.
|
|
322
|
+
|
|
323
|
+
Utility function to construct graph from parsed import data.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
file_imports: Dictionary mapping file paths to lists of imported files
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
ImportGraph with all edges added
|
|
330
|
+
|
|
331
|
+
Example:
|
|
332
|
+
imports = {
|
|
333
|
+
"main.py": ["utils.py", "config.py"],
|
|
334
|
+
"utils.py": ["helpers.py"],
|
|
335
|
+
"helpers.py": []
|
|
336
|
+
}
|
|
337
|
+
graph = build_import_graph_from_dict(imports)
|
|
338
|
+
"""
|
|
339
|
+
graph = ImportGraph()
|
|
340
|
+
|
|
341
|
+
# Add all files as nodes first (ensures isolated files are included)
|
|
342
|
+
for file_path in file_imports.keys():
|
|
343
|
+
graph.add_node(file_path)
|
|
344
|
+
|
|
345
|
+
# Add edges for imports
|
|
346
|
+
for file_path, imports in file_imports.items():
|
|
347
|
+
for imported_file in imports:
|
|
348
|
+
graph.add_edge(file_path, imported_file)
|
|
349
|
+
|
|
350
|
+
return graph
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
# =============================================================================
|
|
354
|
+
# Multi-language Import Statement Mappings
|
|
355
|
+
# =============================================================================
|
|
356
|
+
|
|
357
|
+
IMPORT_NODE_TYPES = {
|
|
358
|
+
"python": {
|
|
359
|
+
"import": ["import_statement", "import_from_statement"],
|
|
360
|
+
"module_name": ["dotted_name", "aliased_import"],
|
|
361
|
+
},
|
|
362
|
+
"javascript": {
|
|
363
|
+
"import": ["import_statement"],
|
|
364
|
+
"module_name": ["string", "import_clause"],
|
|
365
|
+
"require_call": ["call_expression"], # require('module')
|
|
366
|
+
},
|
|
367
|
+
"typescript": {
|
|
368
|
+
"import": ["import_statement"],
|
|
369
|
+
"module_name": ["string", "import_clause"],
|
|
370
|
+
"import_type": ["import_statement"], # import type { T } from 'mod'
|
|
371
|
+
"require_call": ["call_expression"],
|
|
372
|
+
},
|
|
373
|
+
"java": {
|
|
374
|
+
"import": ["import_declaration"],
|
|
375
|
+
"module_name": ["scoped_identifier"],
|
|
376
|
+
},
|
|
377
|
+
"rust": {
|
|
378
|
+
"import": ["use_declaration"],
|
|
379
|
+
"module_name": ["scoped_identifier"],
|
|
380
|
+
},
|
|
381
|
+
"php": {
|
|
382
|
+
"import": ["namespace_use_declaration"],
|
|
383
|
+
"module_name": ["qualified_name"],
|
|
384
|
+
},
|
|
385
|
+
"ruby": {
|
|
386
|
+
"import": ["call"], # require, require_relative
|
|
387
|
+
"module_name": ["string"],
|
|
388
|
+
},
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def get_import_node_types(language: str, category: str) -> list[str]:
|
|
393
|
+
"""Get tree-sitter node types for imports in a given language.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
language: Programming language identifier (e.g., "python", "javascript")
|
|
397
|
+
category: Category of import node ("import", "module_name", etc.)
|
|
398
|
+
|
|
399
|
+
Returns:
|
|
400
|
+
List of node type names for this language/category.
|
|
401
|
+
Returns empty list if language/category not found.
|
|
402
|
+
|
|
403
|
+
Examples:
|
|
404
|
+
>>> get_import_node_types("python", "import")
|
|
405
|
+
["import_statement", "import_from_statement"]
|
|
406
|
+
|
|
407
|
+
>>> get_import_node_types("javascript", "import")
|
|
408
|
+
["import_statement"]
|
|
409
|
+
"""
|
|
410
|
+
# Default to Python-like behavior for unknown languages
|
|
411
|
+
lang_mapping = IMPORT_NODE_TYPES.get(language, IMPORT_NODE_TYPES["python"])
|
|
412
|
+
return lang_mapping.get(category, [])
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def is_stdlib_module(module_name: str, language: str) -> bool:
|
|
416
|
+
"""Check if a module is from the standard library.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
module_name: Module name (e.g., "os", "sys", "fs")
|
|
420
|
+
language: Programming language
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
True if module is standard library, False otherwise
|
|
424
|
+
|
|
425
|
+
Examples:
|
|
426
|
+
>>> is_stdlib_module("os", "python")
|
|
427
|
+
True
|
|
428
|
+
|
|
429
|
+
>>> is_stdlib_module("requests", "python")
|
|
430
|
+
False
|
|
431
|
+
|
|
432
|
+
>>> is_stdlib_module("fs", "javascript")
|
|
433
|
+
True
|
|
434
|
+
"""
|
|
435
|
+
if language == "python":
|
|
436
|
+
# Python standard library check
|
|
437
|
+
# Use sys.stdlib_module_names (Python 3.10+) or hardcoded list
|
|
438
|
+
if hasattr(sys, "stdlib_module_names"):
|
|
439
|
+
return module_name.split(".")[0] in sys.stdlib_module_names
|
|
440
|
+
else:
|
|
441
|
+
# Fallback: common stdlib modules
|
|
442
|
+
common_stdlib = {
|
|
443
|
+
"os",
|
|
444
|
+
"sys",
|
|
445
|
+
"re",
|
|
446
|
+
"json",
|
|
447
|
+
"math",
|
|
448
|
+
"time",
|
|
449
|
+
"datetime",
|
|
450
|
+
"collections",
|
|
451
|
+
"itertools",
|
|
452
|
+
"functools",
|
|
453
|
+
"pathlib",
|
|
454
|
+
"typing",
|
|
455
|
+
"dataclasses",
|
|
456
|
+
"asyncio",
|
|
457
|
+
"contextlib",
|
|
458
|
+
"abc",
|
|
459
|
+
"io",
|
|
460
|
+
"logging",
|
|
461
|
+
"unittest",
|
|
462
|
+
"pytest",
|
|
463
|
+
}
|
|
464
|
+
return module_name.split(".")[0] in common_stdlib
|
|
465
|
+
|
|
466
|
+
elif language in ("javascript", "typescript"):
|
|
467
|
+
# Node.js built-in modules
|
|
468
|
+
nodejs_builtins = {
|
|
469
|
+
"fs",
|
|
470
|
+
"path",
|
|
471
|
+
"http",
|
|
472
|
+
"https",
|
|
473
|
+
"url",
|
|
474
|
+
"os",
|
|
475
|
+
"util",
|
|
476
|
+
"events",
|
|
477
|
+
"stream",
|
|
478
|
+
"buffer",
|
|
479
|
+
"crypto",
|
|
480
|
+
"child_process",
|
|
481
|
+
"cluster",
|
|
482
|
+
"dns",
|
|
483
|
+
"net",
|
|
484
|
+
"tls",
|
|
485
|
+
"dgram",
|
|
486
|
+
"readline",
|
|
487
|
+
"zlib",
|
|
488
|
+
"process",
|
|
489
|
+
"console",
|
|
490
|
+
"assert",
|
|
491
|
+
"timers",
|
|
492
|
+
}
|
|
493
|
+
return module_name.split("/")[0] in nodejs_builtins
|
|
494
|
+
|
|
495
|
+
return False
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def is_relative_import(module_name: str, language: str) -> bool:
|
|
499
|
+
"""Check if import is relative to current file.
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
module_name: Module path
|
|
503
|
+
language: Programming language
|
|
504
|
+
|
|
505
|
+
Returns:
|
|
506
|
+
True if import is relative, False otherwise
|
|
507
|
+
|
|
508
|
+
Examples:
|
|
509
|
+
>>> is_relative_import("./utils", "javascript")
|
|
510
|
+
True
|
|
511
|
+
|
|
512
|
+
>>> is_relative_import("lodash", "javascript")
|
|
513
|
+
False
|
|
514
|
+
|
|
515
|
+
>>> is_relative_import(".utils", "python")
|
|
516
|
+
True
|
|
517
|
+
"""
|
|
518
|
+
if language == "python":
|
|
519
|
+
# Python relative imports start with "."
|
|
520
|
+
return module_name.startswith(".")
|
|
521
|
+
elif language in ("javascript", "typescript"):
|
|
522
|
+
# JS/TS relative imports start with "./" or "../"
|
|
523
|
+
return module_name.startswith("./") or module_name.startswith("../")
|
|
524
|
+
return False
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
class EfferentCouplingCollector(MetricCollector):
|
|
528
|
+
"""Collects efferent coupling metrics (outgoing dependencies).
|
|
529
|
+
|
|
530
|
+
Efferent coupling (Ce) measures how many external modules/files a file
|
|
531
|
+
depends on. Higher Ce indicates fragility - changes to dependencies can
|
|
532
|
+
break this file.
|
|
533
|
+
|
|
534
|
+
Tracks:
|
|
535
|
+
- Total unique dependencies (efferent_coupling score)
|
|
536
|
+
- All imported modules
|
|
537
|
+
- Internal vs. external imports
|
|
538
|
+
- Standard library vs. third-party imports
|
|
539
|
+
|
|
540
|
+
Example:
|
|
541
|
+
# Python file with Ce = 3
|
|
542
|
+
import os # stdlib
|
|
543
|
+
from typing import List # stdlib (not counted, same base module)
|
|
544
|
+
import requests # external
|
|
545
|
+
from .utils import helper # internal
|
|
546
|
+
|
|
547
|
+
# Ce = 3 (os, requests, .utils)
|
|
548
|
+
"""
|
|
549
|
+
|
|
550
|
+
def __init__(self) -> None:
|
|
551
|
+
"""Initialize efferent coupling collector."""
|
|
552
|
+
self._imports: set[str] = set() # All unique imports
|
|
553
|
+
self._internal_imports: set[str] = set()
|
|
554
|
+
self._external_imports: set[str] = set()
|
|
555
|
+
|
|
556
|
+
@property
|
|
557
|
+
def name(self) -> str:
|
|
558
|
+
"""Return collector identifier.
|
|
559
|
+
|
|
560
|
+
Returns:
|
|
561
|
+
Collector name "efferent_coupling"
|
|
562
|
+
"""
|
|
563
|
+
return "efferent_coupling"
|
|
564
|
+
|
|
565
|
+
def collect_node(self, node: Node, context: CollectorContext, depth: int) -> None:
|
|
566
|
+
"""Process node and extract import statements.
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
node: Current tree-sitter AST node
|
|
570
|
+
context: Shared context with language and file info
|
|
571
|
+
depth: Current depth in AST (unused)
|
|
572
|
+
"""
|
|
573
|
+
language = context.language
|
|
574
|
+
node_type = node.type
|
|
575
|
+
|
|
576
|
+
# Check if this is an import statement
|
|
577
|
+
if node_type in get_import_node_types(language, "import"):
|
|
578
|
+
self._extract_import(node, context)
|
|
579
|
+
elif language in ("javascript", "typescript"):
|
|
580
|
+
# Handle require() calls in JS/TS
|
|
581
|
+
if node_type in get_import_node_types(language, "require_call"):
|
|
582
|
+
self._extract_require_call(node, context)
|
|
583
|
+
|
|
584
|
+
def _extract_import(self, node: Node, context: CollectorContext) -> None:
|
|
585
|
+
r"""Extract module name from import statement.
|
|
586
|
+
|
|
587
|
+
Handles:
|
|
588
|
+
- Python: import module, from module import X
|
|
589
|
+
- JavaScript/TypeScript: import ... from 'module'
|
|
590
|
+
- Java: import com.example.Class
|
|
591
|
+
- Rust: use std::collections::HashMap
|
|
592
|
+
- PHP: use MyNamespace\MyClass
|
|
593
|
+
- Ruby: require "module"
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
node: Import statement node
|
|
597
|
+
context: Collector context
|
|
598
|
+
"""
|
|
599
|
+
language = context.language
|
|
600
|
+
|
|
601
|
+
if language == "python":
|
|
602
|
+
# Python: import os, from os import path
|
|
603
|
+
# Look for dotted_name or module_name field
|
|
604
|
+
module_node = node.child_by_field_name("module_name")
|
|
605
|
+
if module_node:
|
|
606
|
+
module_name = module_node.text.decode("utf-8")
|
|
607
|
+
self._add_import(module_name, context)
|
|
608
|
+
else:
|
|
609
|
+
# Look for dotted_name child
|
|
610
|
+
for child in node.children:
|
|
611
|
+
if child.type == "dotted_name":
|
|
612
|
+
module_name = child.text.decode("utf-8")
|
|
613
|
+
self._add_import(module_name, context)
|
|
614
|
+
elif child.type == "aliased_import":
|
|
615
|
+
# import os as operating_system
|
|
616
|
+
for subchild in child.children:
|
|
617
|
+
if subchild.type == "dotted_name":
|
|
618
|
+
module_name = subchild.text.decode("utf-8")
|
|
619
|
+
self._add_import(module_name, context)
|
|
620
|
+
break
|
|
621
|
+
elif child.type == "relative_import":
|
|
622
|
+
# Relative import (from . import X)
|
|
623
|
+
dots = child.text.decode("utf-8")
|
|
624
|
+
self._add_import(dots, context)
|
|
625
|
+
break
|
|
626
|
+
|
|
627
|
+
elif language in ("javascript", "typescript"):
|
|
628
|
+
# JavaScript/TypeScript: import ... from 'module'
|
|
629
|
+
for child in node.children:
|
|
630
|
+
if child.type == "string":
|
|
631
|
+
module_str = child.text.decode("utf-8")
|
|
632
|
+
module_name = module_str.strip("\"'")
|
|
633
|
+
self._add_import(module_name, context)
|
|
634
|
+
|
|
635
|
+
elif language == "java":
|
|
636
|
+
for child in node.children:
|
|
637
|
+
if child.type == "scoped_identifier":
|
|
638
|
+
module_name = child.text.decode("utf-8")
|
|
639
|
+
self._add_import(module_name, context)
|
|
640
|
+
|
|
641
|
+
elif language == "rust":
|
|
642
|
+
for child in node.children:
|
|
643
|
+
if child.type == "scoped_identifier":
|
|
644
|
+
module_name = child.text.decode("utf-8")
|
|
645
|
+
self._add_import(module_name, context)
|
|
646
|
+
|
|
647
|
+
elif language == "php":
|
|
648
|
+
for child in node.children:
|
|
649
|
+
if child.type == "qualified_name":
|
|
650
|
+
module_name = child.text.decode("utf-8")
|
|
651
|
+
self._add_import(module_name, context)
|
|
652
|
+
|
|
653
|
+
elif language == "ruby":
|
|
654
|
+
# Ruby uses method calls for imports
|
|
655
|
+
if node.type == "call":
|
|
656
|
+
method_child = node.child_by_field_name("method")
|
|
657
|
+
if method_child and method_child.text.decode("utf-8") in [
|
|
658
|
+
"require",
|
|
659
|
+
"require_relative",
|
|
660
|
+
]:
|
|
661
|
+
args_child = node.child_by_field_name("arguments")
|
|
662
|
+
if args_child:
|
|
663
|
+
for child in args_child.children:
|
|
664
|
+
if child.type == "string":
|
|
665
|
+
module_str = child.text.decode("utf-8")
|
|
666
|
+
module_name = module_str.strip("\"'")
|
|
667
|
+
self._add_import(module_name, context)
|
|
668
|
+
|
|
669
|
+
def _extract_require_call(self, node: Node, context: CollectorContext) -> None:
|
|
670
|
+
"""Extract module name from require('module') call.
|
|
671
|
+
|
|
672
|
+
Handles:
|
|
673
|
+
- JavaScript/TypeScript: const x = require('module')
|
|
674
|
+
|
|
675
|
+
Args:
|
|
676
|
+
node: Call expression node
|
|
677
|
+
context: Collector context
|
|
678
|
+
"""
|
|
679
|
+
# Check if this is a require() call
|
|
680
|
+
function_node = node.child_by_field_name("function")
|
|
681
|
+
if function_node and function_node.type == "identifier":
|
|
682
|
+
function_name = function_node.text.decode("utf-8")
|
|
683
|
+
if function_name == "require":
|
|
684
|
+
args_node = node.child_by_field_name("arguments")
|
|
685
|
+
if args_node:
|
|
686
|
+
for child in args_node.children:
|
|
687
|
+
if child.type == "string":
|
|
688
|
+
module_str = child.text.decode("utf-8")
|
|
689
|
+
module_name = module_str.strip("\"'")
|
|
690
|
+
self._add_import(module_name, context)
|
|
691
|
+
|
|
692
|
+
def _add_import(self, module_name: str, context: CollectorContext) -> None:
|
|
693
|
+
"""Add import to tracking sets and classify as internal/external.
|
|
694
|
+
|
|
695
|
+
Args:
|
|
696
|
+
module_name: Imported module name
|
|
697
|
+
context: Collector context with language info
|
|
698
|
+
"""
|
|
699
|
+
language = context.language
|
|
700
|
+
|
|
701
|
+
# Add to all imports
|
|
702
|
+
self._imports.add(module_name)
|
|
703
|
+
|
|
704
|
+
# Classify import
|
|
705
|
+
if is_relative_import(module_name, language):
|
|
706
|
+
# Relative import = internal
|
|
707
|
+
self._internal_imports.add(module_name)
|
|
708
|
+
elif is_stdlib_module(module_name, language):
|
|
709
|
+
# Standard library = external (but not third-party)
|
|
710
|
+
self._external_imports.add(module_name)
|
|
711
|
+
else:
|
|
712
|
+
# Check if internal by checking if it starts with project root
|
|
713
|
+
# For now, treat non-relative, non-stdlib as external
|
|
714
|
+
# Future enhancement: project_root detection
|
|
715
|
+
self._external_imports.add(module_name)
|
|
716
|
+
|
|
717
|
+
def get_imported_modules(self) -> set[str]:
|
|
718
|
+
"""Get set of all imported module names.
|
|
719
|
+
|
|
720
|
+
Returns:
|
|
721
|
+
Set of module names imported by this file
|
|
722
|
+
"""
|
|
723
|
+
return self._imports.copy()
|
|
724
|
+
|
|
725
|
+
def finalize_function(
|
|
726
|
+
self, node: Node, context: CollectorContext
|
|
727
|
+
) -> dict[str, Any]:
|
|
728
|
+
"""Return empty dict - coupling is file-level, not function-level.
|
|
729
|
+
|
|
730
|
+
Coupling metrics are computed at file level during finalization.
|
|
731
|
+
|
|
732
|
+
Args:
|
|
733
|
+
node: Function definition node
|
|
734
|
+
context: Shared context
|
|
735
|
+
|
|
736
|
+
Returns:
|
|
737
|
+
Empty dictionary (no function-level coupling metrics)
|
|
738
|
+
"""
|
|
739
|
+
return {}
|
|
740
|
+
|
|
741
|
+
def get_file_metrics(self) -> dict[str, Any]:
|
|
742
|
+
"""Get file-level coupling metrics.
|
|
743
|
+
|
|
744
|
+
Returns:
|
|
745
|
+
Dictionary with efferent coupling metrics
|
|
746
|
+
"""
|
|
747
|
+
return {
|
|
748
|
+
"efferent_coupling": len(self._imports),
|
|
749
|
+
"imports": sorted(self._imports),
|
|
750
|
+
"internal_imports": sorted(self._internal_imports),
|
|
751
|
+
"external_imports": sorted(self._external_imports),
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
def reset(self) -> None:
|
|
755
|
+
"""Reset collector state for next file."""
|
|
756
|
+
self._imports.clear()
|
|
757
|
+
self._internal_imports.clear()
|
|
758
|
+
self._external_imports.clear()
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
class AfferentCouplingCollector(MetricCollector):
|
|
762
|
+
"""Tracks afferent coupling (Ca) - incoming dependencies.
|
|
763
|
+
|
|
764
|
+
Afferent coupling measures how many other files depend on this file
|
|
765
|
+
(i.e., how many files import this file). Higher Ca indicates this
|
|
766
|
+
file is more load-bearing - changes will affect many other files.
|
|
767
|
+
|
|
768
|
+
Interpretation:
|
|
769
|
+
- 0-2: Low coupling, changes affect few files
|
|
770
|
+
- 3-5: Moderate coupling, shared utility
|
|
771
|
+
- 6-10: High coupling, critical component
|
|
772
|
+
- 11+: Very high coupling, core infrastructure
|
|
773
|
+
|
|
774
|
+
Example:
|
|
775
|
+
# File A is imported by files B, C, D
|
|
776
|
+
# Afferent Coupling (Ca) = 3
|
|
777
|
+
|
|
778
|
+
Note: Afferent coupling requires project-wide import graph analysis.
|
|
779
|
+
Use build_import_graph() to construct the graph before creating this collector.
|
|
780
|
+
"""
|
|
781
|
+
|
|
782
|
+
def __init__(self, import_graph: dict[str, set[str]] | None = None) -> None:
|
|
783
|
+
"""Initialize afferent coupling collector.
|
|
784
|
+
|
|
785
|
+
Args:
|
|
786
|
+
import_graph: Pre-built import graph mapping module_name → set of importing files.
|
|
787
|
+
If None, afferent coupling will always be 0.
|
|
788
|
+
"""
|
|
789
|
+
self._import_graph = import_graph or {}
|
|
790
|
+
self._current_file: str | None = None
|
|
791
|
+
|
|
792
|
+
@property
|
|
793
|
+
def name(self) -> str:
|
|
794
|
+
"""Return collector identifier.
|
|
795
|
+
|
|
796
|
+
Returns:
|
|
797
|
+
Collector name "afferent_coupling"
|
|
798
|
+
"""
|
|
799
|
+
return "afferent_coupling"
|
|
800
|
+
|
|
801
|
+
def collect_node(self, node: Node, context: CollectorContext, depth: int) -> None:
|
|
802
|
+
"""Process node (no-op for afferent coupling).
|
|
803
|
+
|
|
804
|
+
Afferent coupling is computed from the import graph, not by traversing nodes.
|
|
805
|
+
|
|
806
|
+
Args:
|
|
807
|
+
node: Current tree-sitter AST node (unused)
|
|
808
|
+
context: Shared context with file path
|
|
809
|
+
depth: Current depth in AST (unused)
|
|
810
|
+
"""
|
|
811
|
+
# Store current file path for lookup
|
|
812
|
+
if context.file_path and not self._current_file:
|
|
813
|
+
self._current_file = context.file_path
|
|
814
|
+
|
|
815
|
+
def get_afferent_coupling(self, file_path: str) -> int:
|
|
816
|
+
"""Get count of files that import this file.
|
|
817
|
+
|
|
818
|
+
Args:
|
|
819
|
+
file_path: Path to the file to check
|
|
820
|
+
|
|
821
|
+
Returns:
|
|
822
|
+
Number of files that import this file
|
|
823
|
+
"""
|
|
824
|
+
# Normalize file path for lookup
|
|
825
|
+
normalized_path = self._normalize_path(file_path)
|
|
826
|
+
|
|
827
|
+
# Look up in import graph
|
|
828
|
+
if normalized_path in self._import_graph:
|
|
829
|
+
return len(self._import_graph[normalized_path])
|
|
830
|
+
|
|
831
|
+
return 0
|
|
832
|
+
|
|
833
|
+
def get_dependents(self, file_path: str) -> list[str]:
|
|
834
|
+
"""Get list of files that depend on this file.
|
|
835
|
+
|
|
836
|
+
Args:
|
|
837
|
+
file_path: Path to the file to check
|
|
838
|
+
|
|
839
|
+
Returns:
|
|
840
|
+
List of file paths that import this file
|
|
841
|
+
"""
|
|
842
|
+
normalized_path = self._normalize_path(file_path)
|
|
843
|
+
|
|
844
|
+
if normalized_path in self._import_graph:
|
|
845
|
+
return sorted(self._import_graph[normalized_path])
|
|
846
|
+
|
|
847
|
+
return []
|
|
848
|
+
|
|
849
|
+
def _normalize_path(self, file_path: str) -> str:
|
|
850
|
+
"""Normalize file path for consistent lookup.
|
|
851
|
+
|
|
852
|
+
Args:
|
|
853
|
+
file_path: File path to normalize
|
|
854
|
+
|
|
855
|
+
Returns:
|
|
856
|
+
Normalized file path
|
|
857
|
+
"""
|
|
858
|
+
# Convert to Path and resolve to absolute path
|
|
859
|
+
path = Path(file_path)
|
|
860
|
+
if path.is_absolute():
|
|
861
|
+
return str(path)
|
|
862
|
+
|
|
863
|
+
# If relative, return as-is (caller should ensure consistency)
|
|
864
|
+
return str(path)
|
|
865
|
+
|
|
866
|
+
def finalize_function(
|
|
867
|
+
self, node: Node, context: CollectorContext
|
|
868
|
+
) -> dict[str, Any]:
|
|
869
|
+
"""Return final afferent coupling metrics.
|
|
870
|
+
|
|
871
|
+
Note: This is called per function, but afferent coupling is a file-level metric.
|
|
872
|
+
|
|
873
|
+
Args:
|
|
874
|
+
node: Function definition node
|
|
875
|
+
context: Shared context with file path
|
|
876
|
+
|
|
877
|
+
Returns:
|
|
878
|
+
Dictionary with afferent_coupling count and dependents list
|
|
879
|
+
"""
|
|
880
|
+
file_path = context.file_path
|
|
881
|
+
return {
|
|
882
|
+
"afferent_coupling": self.get_afferent_coupling(file_path),
|
|
883
|
+
"dependents": self.get_dependents(file_path),
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
def reset(self) -> None:
|
|
887
|
+
"""Reset collector state for next file."""
|
|
888
|
+
self._current_file = None
|
|
889
|
+
|
|
890
|
+
|
|
891
|
+
def build_import_graph(
|
|
892
|
+
project_root: Path, files: list[Path], language: str = "python"
|
|
893
|
+
) -> dict[str, set[str]]:
|
|
894
|
+
"""Build project-wide import graph for afferent coupling analysis.
|
|
895
|
+
|
|
896
|
+
Analyzes all files in the project to construct a reverse dependency graph
|
|
897
|
+
mapping each module to the set of files that import it.
|
|
898
|
+
|
|
899
|
+
Args:
|
|
900
|
+
project_root: Root directory of the project
|
|
901
|
+
files: List of file paths to analyze
|
|
902
|
+
language: Programming language (default: "python")
|
|
903
|
+
|
|
904
|
+
Returns:
|
|
905
|
+
Dictionary mapping module_name → set of file paths that import it
|
|
906
|
+
|
|
907
|
+
Example:
|
|
908
|
+
>>> files = [Path("a.py"), Path("b.py"), Path("c.py")]
|
|
909
|
+
>>> graph = build_import_graph(Path("/project"), files)
|
|
910
|
+
>>> graph["module_x"]
|
|
911
|
+
{"a.py", "c.py"} # Both a.py and c.py import module_x
|
|
912
|
+
"""
|
|
913
|
+
import_graph: dict[str, set[str]] = {}
|
|
914
|
+
|
|
915
|
+
# Use tree-sitter to parse each file and extract imports
|
|
916
|
+
try:
|
|
917
|
+
from tree_sitter import Parser
|
|
918
|
+
|
|
919
|
+
# Get tree-sitter language
|
|
920
|
+
language_obj = _get_tree_sitter_language(language)
|
|
921
|
+
if not language_obj:
|
|
922
|
+
# Fallback: no tree-sitter support, return empty graph
|
|
923
|
+
return import_graph
|
|
924
|
+
|
|
925
|
+
parser = Parser()
|
|
926
|
+
parser.set_language(language_obj)
|
|
927
|
+
|
|
928
|
+
except ImportError:
|
|
929
|
+
# Tree-sitter not available, return empty graph
|
|
930
|
+
return import_graph
|
|
931
|
+
|
|
932
|
+
# Create efferent coupling collector to extract imports
|
|
933
|
+
efferent_collector = EfferentCouplingCollector()
|
|
934
|
+
|
|
935
|
+
for file_path in files:
|
|
936
|
+
# Skip non-existent files
|
|
937
|
+
if not file_path.exists():
|
|
938
|
+
continue
|
|
939
|
+
|
|
940
|
+
# Read file content
|
|
941
|
+
try:
|
|
942
|
+
source_code = file_path.read_bytes()
|
|
943
|
+
except OSError:
|
|
944
|
+
continue
|
|
945
|
+
|
|
946
|
+
# Parse with tree-sitter
|
|
947
|
+
tree = parser.parse(source_code)
|
|
948
|
+
if not tree or not tree.root_node:
|
|
949
|
+
continue
|
|
950
|
+
|
|
951
|
+
# Create context for this file
|
|
952
|
+
context = CollectorContext(
|
|
953
|
+
file_path=str(file_path.relative_to(project_root)),
|
|
954
|
+
source_code=source_code,
|
|
955
|
+
language=language,
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
# Traverse AST and collect imports
|
|
959
|
+
efferent_collector.reset()
|
|
960
|
+
_traverse_tree(tree.root_node, context, efferent_collector)
|
|
961
|
+
|
|
962
|
+
# Get imported modules for this file
|
|
963
|
+
imported_modules = efferent_collector.get_imported_modules()
|
|
964
|
+
|
|
965
|
+
# Update import graph (reverse mapping)
|
|
966
|
+
file_key = str(file_path.relative_to(project_root))
|
|
967
|
+
for module_name in imported_modules:
|
|
968
|
+
if module_name not in import_graph:
|
|
969
|
+
import_graph[module_name] = set()
|
|
970
|
+
import_graph[module_name].add(file_key)
|
|
971
|
+
|
|
972
|
+
return import_graph
|
|
973
|
+
|
|
974
|
+
|
|
975
|
+
def _traverse_tree(
|
|
976
|
+
node: Node, context: CollectorContext, collector: EfferentCouplingCollector
|
|
977
|
+
) -> None:
|
|
978
|
+
"""Recursively traverse tree-sitter AST and collect imports.
|
|
979
|
+
|
|
980
|
+
Args:
|
|
981
|
+
node: Current AST node
|
|
982
|
+
context: Collector context
|
|
983
|
+
collector: Efferent coupling collector to accumulate imports
|
|
984
|
+
"""
|
|
985
|
+
# Process current node
|
|
986
|
+
collector.collect_node(node, context, depth=0)
|
|
987
|
+
|
|
988
|
+
# Recursively process children
|
|
989
|
+
for child in node.children:
|
|
990
|
+
_traverse_tree(child, context, collector)
|
|
991
|
+
|
|
992
|
+
|
|
993
|
+
def _get_tree_sitter_language(language: str) -> Any: # noqa: ARG001
|
|
994
|
+
"""Get tree-sitter Language object for the given language.
|
|
995
|
+
|
|
996
|
+
Args:
|
|
997
|
+
language: Programming language identifier
|
|
998
|
+
|
|
999
|
+
Returns:
|
|
1000
|
+
Tree-sitter Language object, or None if not available
|
|
1001
|
+
"""
|
|
1002
|
+
try:
|
|
1003
|
+
# Language loading depends on tree-sitter installation
|
|
1004
|
+
# This is a simplified version - actual implementation should handle
|
|
1005
|
+
# loading compiled language libraries properly
|
|
1006
|
+
# In a real implementation, this would load the compiled language library
|
|
1007
|
+
# For now, return None to indicate unsupported
|
|
1008
|
+
return None
|
|
1009
|
+
|
|
1010
|
+
except ImportError:
|
|
1011
|
+
return None
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
class InstabilityCalculator:
|
|
1015
|
+
"""Calculator for instability metrics across the project.
|
|
1016
|
+
|
|
1017
|
+
Instability (I) = Ce / (Ce + Ca) measures how much a file depends on others
|
|
1018
|
+
vs. how much others depend on it.
|
|
1019
|
+
|
|
1020
|
+
Interpretation:
|
|
1021
|
+
- I = 0.0-0.3: Stable (maximally stable at 0.0)
|
|
1022
|
+
- I = 0.3-0.7: Balanced
|
|
1023
|
+
- I = 0.7-1.0: Unstable (maximally unstable at 1.0)
|
|
1024
|
+
|
|
1025
|
+
Stable files should contain abstractions and core logic.
|
|
1026
|
+
Unstable files should contain concrete implementations and glue code.
|
|
1027
|
+
"""
|
|
1028
|
+
|
|
1029
|
+
def __init__(
|
|
1030
|
+
self,
|
|
1031
|
+
efferent_collector: EfferentCouplingCollector,
|
|
1032
|
+
afferent_collector: AfferentCouplingCollector,
|
|
1033
|
+
) -> None:
|
|
1034
|
+
"""Initialize instability calculator.
|
|
1035
|
+
|
|
1036
|
+
Args:
|
|
1037
|
+
efferent_collector: Collector for outgoing dependencies
|
|
1038
|
+
afferent_collector: Collector for incoming dependencies
|
|
1039
|
+
"""
|
|
1040
|
+
self._efferent_collector = efferent_collector
|
|
1041
|
+
self._afferent_collector = afferent_collector
|
|
1042
|
+
|
|
1043
|
+
def calculate_instability(self, file_path: str) -> float:
|
|
1044
|
+
"""Calculate instability for a single file.
|
|
1045
|
+
|
|
1046
|
+
Args:
|
|
1047
|
+
file_path: Path to the file
|
|
1048
|
+
|
|
1049
|
+
Returns:
|
|
1050
|
+
Instability value from 0.0 (stable) to 1.0 (unstable)
|
|
1051
|
+
"""
|
|
1052
|
+
ce = len(self._efferent_collector.get_imported_modules())
|
|
1053
|
+
ca = self._afferent_collector.get_afferent_coupling(file_path)
|
|
1054
|
+
|
|
1055
|
+
total = ce + ca
|
|
1056
|
+
if total == 0:
|
|
1057
|
+
return 0.0
|
|
1058
|
+
|
|
1059
|
+
return ce / total
|
|
1060
|
+
|
|
1061
|
+
def calculate_project_instability(
|
|
1062
|
+
self, file_metrics: dict[str, Any]
|
|
1063
|
+
) -> dict[str, float]:
|
|
1064
|
+
"""Calculate instability for all files in the project.
|
|
1065
|
+
|
|
1066
|
+
Args:
|
|
1067
|
+
file_metrics: Dictionary mapping file_path → file metrics
|
|
1068
|
+
|
|
1069
|
+
Returns:
|
|
1070
|
+
Dictionary mapping file_path → instability value
|
|
1071
|
+
"""
|
|
1072
|
+
instability_map: dict[str, float] = {}
|
|
1073
|
+
|
|
1074
|
+
for file_path in file_metrics:
|
|
1075
|
+
# Get coupling metrics from file_metrics
|
|
1076
|
+
if "coupling" in file_metrics[file_path]:
|
|
1077
|
+
coupling = file_metrics[file_path]["coupling"]
|
|
1078
|
+
ce = coupling.get("efferent_coupling", 0)
|
|
1079
|
+
ca = coupling.get("afferent_coupling", 0)
|
|
1080
|
+
|
|
1081
|
+
total = ce + ca
|
|
1082
|
+
if total == 0:
|
|
1083
|
+
instability = 0.0
|
|
1084
|
+
else:
|
|
1085
|
+
instability = ce / total
|
|
1086
|
+
|
|
1087
|
+
instability_map[file_path] = instability
|
|
1088
|
+
|
|
1089
|
+
return instability_map
|
|
1090
|
+
|
|
1091
|
+
def get_stability_grade(self, instability: float) -> str:
|
|
1092
|
+
"""Get letter grade for instability value.
|
|
1093
|
+
|
|
1094
|
+
Args:
|
|
1095
|
+
instability: Instability value (0.0-1.0)
|
|
1096
|
+
|
|
1097
|
+
Returns:
|
|
1098
|
+
Letter grade from A to F
|
|
1099
|
+
|
|
1100
|
+
Grade thresholds:
|
|
1101
|
+
- A: 0.0-0.2 (very stable)
|
|
1102
|
+
- B: 0.2-0.4 (stable)
|
|
1103
|
+
- C: 0.4-0.6 (balanced)
|
|
1104
|
+
- D: 0.6-0.8 (unstable)
|
|
1105
|
+
- F: 0.8-1.0 (very unstable)
|
|
1106
|
+
"""
|
|
1107
|
+
if instability <= 0.2:
|
|
1108
|
+
return "A"
|
|
1109
|
+
elif instability <= 0.4:
|
|
1110
|
+
return "B"
|
|
1111
|
+
elif instability <= 0.6:
|
|
1112
|
+
return "C"
|
|
1113
|
+
elif instability <= 0.8:
|
|
1114
|
+
return "D"
|
|
1115
|
+
else:
|
|
1116
|
+
return "F"
|
|
1117
|
+
|
|
1118
|
+
def get_stability_category(self, instability: float) -> str:
|
|
1119
|
+
"""Get stability category for instability value.
|
|
1120
|
+
|
|
1121
|
+
Args:
|
|
1122
|
+
instability: Instability value (0.0-1.0)
|
|
1123
|
+
|
|
1124
|
+
Returns:
|
|
1125
|
+
Category: "Stable", "Balanced", or "Unstable"
|
|
1126
|
+
"""
|
|
1127
|
+
if instability <= 0.3:
|
|
1128
|
+
return "Stable"
|
|
1129
|
+
elif instability <= 0.7:
|
|
1130
|
+
return "Balanced"
|
|
1131
|
+
else:
|
|
1132
|
+
return "Unstable"
|
|
1133
|
+
|
|
1134
|
+
def get_most_stable_files(
|
|
1135
|
+
self, instability_map: dict[str, float], limit: int = 10
|
|
1136
|
+
) -> list[tuple[str, float]]:
|
|
1137
|
+
"""Get most stable files (lowest instability).
|
|
1138
|
+
|
|
1139
|
+
Args:
|
|
1140
|
+
instability_map: Dictionary mapping file_path → instability
|
|
1141
|
+
limit: Maximum number of files to return
|
|
1142
|
+
|
|
1143
|
+
Returns:
|
|
1144
|
+
List of (file_path, instability) tuples, sorted by stability
|
|
1145
|
+
"""
|
|
1146
|
+
sorted_files = sorted(instability_map.items(), key=lambda x: x[1])
|
|
1147
|
+
return sorted_files[:limit]
|
|
1148
|
+
|
|
1149
|
+
def get_most_unstable_files(
|
|
1150
|
+
self, instability_map: dict[str, float], limit: int = 10
|
|
1151
|
+
) -> list[tuple[str, float]]:
|
|
1152
|
+
"""Get most unstable files (highest instability).
|
|
1153
|
+
|
|
1154
|
+
Args:
|
|
1155
|
+
instability_map: Dictionary mapping file_path → instability
|
|
1156
|
+
limit: Maximum number of files to return
|
|
1157
|
+
|
|
1158
|
+
Returns:
|
|
1159
|
+
List of (file_path, instability) tuples, sorted by instability (descending)
|
|
1160
|
+
"""
|
|
1161
|
+
sorted_files = sorted(instability_map.items(), key=lambda x: x[1], reverse=True)
|
|
1162
|
+
return sorted_files[:limit]
|