mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +111 -0
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +74 -0
  7. mcp_vector_search/analysis/collectors/base.py +164 -0
  8. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  9. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  10. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  11. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  12. mcp_vector_search/analysis/collectors/smells.py +325 -0
  13. mcp_vector_search/analysis/debt.py +516 -0
  14. mcp_vector_search/analysis/interpretation.py +685 -0
  15. mcp_vector_search/analysis/metrics.py +414 -0
  16. mcp_vector_search/analysis/reporters/__init__.py +7 -0
  17. mcp_vector_search/analysis/reporters/console.py +646 -0
  18. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  19. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  20. mcp_vector_search/analysis/storage/__init__.py +93 -0
  21. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  22. mcp_vector_search/analysis/storage/schema.py +245 -0
  23. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  24. mcp_vector_search/analysis/trends.py +308 -0
  25. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  26. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  27. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  28. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  29. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  30. mcp_vector_search/cli/commands/analyze.py +1062 -0
  31. mcp_vector_search/cli/commands/chat.py +1455 -0
  32. mcp_vector_search/cli/commands/index.py +621 -5
  33. mcp_vector_search/cli/commands/index_background.py +467 -0
  34. mcp_vector_search/cli/commands/init.py +13 -0
  35. mcp_vector_search/cli/commands/install.py +597 -335
  36. mcp_vector_search/cli/commands/install_old.py +8 -4
  37. mcp_vector_search/cli/commands/mcp.py +78 -6
  38. mcp_vector_search/cli/commands/reset.py +68 -26
  39. mcp_vector_search/cli/commands/search.py +224 -8
  40. mcp_vector_search/cli/commands/setup.py +1184 -0
  41. mcp_vector_search/cli/commands/status.py +339 -5
  42. mcp_vector_search/cli/commands/uninstall.py +276 -357
  43. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  44. mcp_vector_search/cli/commands/visualize/cli.py +292 -0
  45. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  46. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  47. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
  48. mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
  49. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  50. mcp_vector_search/cli/commands/visualize/server.py +600 -0
  51. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  52. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  53. mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
  54. mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
  55. mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
  56. mcp_vector_search/cli/didyoumean.py +27 -2
  57. mcp_vector_search/cli/main.py +127 -160
  58. mcp_vector_search/cli/output.py +158 -13
  59. mcp_vector_search/config/__init__.py +4 -0
  60. mcp_vector_search/config/default_thresholds.yaml +52 -0
  61. mcp_vector_search/config/settings.py +12 -0
  62. mcp_vector_search/config/thresholds.py +273 -0
  63. mcp_vector_search/core/__init__.py +16 -0
  64. mcp_vector_search/core/auto_indexer.py +3 -3
  65. mcp_vector_search/core/boilerplate.py +186 -0
  66. mcp_vector_search/core/config_utils.py +394 -0
  67. mcp_vector_search/core/database.py +406 -94
  68. mcp_vector_search/core/embeddings.py +24 -0
  69. mcp_vector_search/core/exceptions.py +11 -0
  70. mcp_vector_search/core/git.py +380 -0
  71. mcp_vector_search/core/git_hooks.py +4 -4
  72. mcp_vector_search/core/indexer.py +632 -54
  73. mcp_vector_search/core/llm_client.py +756 -0
  74. mcp_vector_search/core/models.py +91 -1
  75. mcp_vector_search/core/project.py +17 -0
  76. mcp_vector_search/core/relationships.py +473 -0
  77. mcp_vector_search/core/scheduler.py +11 -11
  78. mcp_vector_search/core/search.py +179 -29
  79. mcp_vector_search/mcp/server.py +819 -9
  80. mcp_vector_search/parsers/python.py +285 -5
  81. mcp_vector_search/utils/__init__.py +2 -0
  82. mcp_vector_search/utils/gitignore.py +0 -3
  83. mcp_vector_search/utils/gitignore_updater.py +212 -0
  84. mcp_vector_search/utils/monorepo.py +66 -4
  85. mcp_vector_search/utils/timing.py +10 -6
  86. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
  87. mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
  88. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
  89. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
  90. mcp_vector_search/cli/commands/visualize.py +0 -1467
  91. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  92. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1162 @@
1
+ """Coupling metric collectors for structural code analysis.
2
+
3
+ This module provides collectors for measuring coupling metrics:
4
+ - EfferentCouplingCollector: Counts outgoing dependencies (imports from this file)
5
+ - AfferentCouplingCollector: Counts incoming dependencies (files that import this file)
6
+ - InstabilityCalculator: Calculates instability metrics across the project
7
+ - CircularDependencyDetector: Detects circular/cyclic dependencies in import graph
8
+
9
+ Coupling metrics help identify architectural dependencies and potential refactoring needs.
10
+ Circular dependencies can lead to:
11
+ - Initialization issues and import errors
12
+ - Tight coupling and reduced maintainability
13
+ - Difficulty in testing and refactoring
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import sys
19
+ from dataclasses import dataclass, field
20
+ from enum import Enum
21
+ from pathlib import Path
22
+ from typing import TYPE_CHECKING, Any
23
+
24
+ from .base import CollectorContext, MetricCollector
25
+
26
+ if TYPE_CHECKING:
27
+ from tree_sitter import Node
28
+
29
+
30
+ # =============================================================================
31
+ # Circular Dependency Detection Types
32
+ # =============================================================================
33
+
34
+
35
+ class NodeColor(Enum):
36
+ """Node colors for DFS-based cycle detection.
37
+
38
+ Standard graph coloring algorithm:
39
+ - WHITE: Node not yet visited
40
+ - GRAY: Node currently being processed (in current DFS path)
41
+ - BLACK: Node fully processed (all descendants visited)
42
+
43
+ Cycle detection: If we encounter a GRAY node during DFS, we've found a cycle.
44
+ """
45
+
46
+ WHITE = "white" # Unvisited
47
+ GRAY = "gray" # In current path (cycle if revisited)
48
+ BLACK = "black" # Fully processed
49
+
50
+
51
+ @dataclass
52
+ class ImportGraph:
53
+ """Directed graph representing import dependencies between files.
54
+
55
+ Nodes represent files, edges represent import relationships.
56
+ An edge from A to B means "A imports B".
57
+
58
+ Attributes:
59
+ adjacency_list: Maps file paths to list of files they import
60
+
61
+ Example:
62
+ graph = ImportGraph()
63
+ graph.add_edge("main.py", "utils.py")
64
+ graph.add_edge("utils.py", "helpers.py")
65
+ # main.py → utils.py → helpers.py
66
+ """
67
+
68
+ adjacency_list: dict[str, list[str]] = field(default_factory=dict)
69
+
70
+ def add_edge(self, from_file: str, to_file: str) -> None:
71
+ """Add directed edge from from_file to to_file (from_file imports to_file).
72
+
73
+ Args:
74
+ from_file: Source file that contains the import
75
+ to_file: Target file being imported
76
+ """
77
+ if from_file not in self.adjacency_list:
78
+ self.adjacency_list[from_file] = []
79
+ if to_file not in self.adjacency_list[from_file]:
80
+ self.adjacency_list[from_file].append(to_file)
81
+
82
+ def add_node(self, file_path: str) -> None:
83
+ """Add node (file) to graph without any edges.
84
+
85
+ Useful for ensuring isolated files are tracked.
86
+
87
+ Args:
88
+ file_path: Path to file to add as node
89
+ """
90
+ if file_path not in self.adjacency_list:
91
+ self.adjacency_list[file_path] = []
92
+
93
+ def get_neighbors(self, file_path: str) -> list[str]:
94
+ """Get list of files that file_path imports.
95
+
96
+ Args:
97
+ file_path: File to get imports for
98
+
99
+ Returns:
100
+ List of files imported by file_path
101
+ """
102
+ return self.adjacency_list.get(file_path, [])
103
+
104
+ def get_all_files(self) -> list[str]:
105
+ """Get all files in the graph.
106
+
107
+ Returns:
108
+ List of all file paths (nodes) in the graph
109
+ """
110
+ # Include both keys and values to catch files that are imported but don't import anything
111
+ all_files = set(self.adjacency_list.keys())
112
+ for imports in self.adjacency_list.values():
113
+ all_files.update(imports)
114
+ return sorted(all_files)
115
+
116
+
117
+ @dataclass
118
+ class CircularDependency:
119
+ """Represents a detected circular dependency cycle.
120
+
121
+ Attributes:
122
+ cycle_chain: List of files forming the cycle (first == last)
123
+ cycle_length: Number of unique files in cycle
124
+
125
+ Example:
126
+ cycle = CircularDependency(
127
+ cycle_chain=["a.py", "b.py", "c.py", "a.py"]
128
+ )
129
+ assert cycle.cycle_length == 3
130
+ assert cycle.format_chain() == "a.py → b.py → c.py → a.py"
131
+ """
132
+
133
+ cycle_chain: list[str]
134
+
135
+ @property
136
+ def cycle_length(self) -> int:
137
+ """Number of unique files in cycle (excluding duplicate start/end)."""
138
+ return len(self.cycle_chain) - 1 if len(self.cycle_chain) > 1 else 0
139
+
140
+ def format_chain(self) -> str:
141
+ """Format cycle as human-readable chain with arrows.
142
+
143
+ Returns:
144
+ Formatted cycle string (e.g., "A → B → C → A")
145
+ """
146
+ return " → ".join(self.cycle_chain)
147
+
148
+ def get_affected_files(self) -> list[str]:
149
+ """Get unique list of files involved in this cycle.
150
+
151
+ Returns:
152
+ Sorted list of unique file paths in cycle
153
+ """
154
+ # Remove duplicate (last element equals first)
155
+ unique_files = (
156
+ set(self.cycle_chain[:-1])
157
+ if len(self.cycle_chain) > 1
158
+ else set(self.cycle_chain)
159
+ )
160
+ return sorted(unique_files)
161
+
162
+
163
+ class CircularDependencyDetector:
164
+ """Detects circular dependencies in import graphs using DFS-based cycle detection.
165
+
166
+ Uses three-color DFS algorithm (Tarjan-inspired):
167
+ - WHITE: Unvisited node
168
+ - GRAY: Node in current DFS path (cycle if we revisit a GRAY node)
169
+ - BLACK: Fully processed node
170
+
171
+ This algorithm efficiently detects all elementary cycles in O(V+E) time.
172
+
173
+ Design Decisions:
174
+ - **Algorithm Choice**: DFS with color marking chosen over Tarjan's SCC because:
175
+ - Simpler implementation and easier to understand
176
+ - Directly provides cycle paths (not just strongly connected components)
177
+ - O(V+E) time complexity (same as Tarjan's)
178
+ - Better for reporting individual cycles to developers
179
+
180
+ - **Path Tracking**: Maintains explicit path stack during DFS to reconstruct cycles
181
+ - Enables user-friendly "A → B → C → A" output
182
+ - Memory overhead acceptable for typical codebases (<10K files)
183
+
184
+ - **Duplicate Cycle Handling**: Detects and reports all unique cycle instances
185
+ - Same cycle may be discovered multiple times from different starting points
186
+ - Deduplication handled by caller if needed
187
+
188
+ Trade-offs:
189
+ - **Simplicity vs. Optimization**: Chose simpler DFS over complex SCC algorithms
190
+ - Performance: Acceptable for codebases up to ~50K files
191
+ - Maintainability: Easier to debug and extend
192
+ - **Memory vs. Clarity**: Stores full path during DFS for clear error messages
193
+ - Alternative: Store only parent pointers (saves memory but harder to debug)
194
+
195
+ Example:
196
+ detector = CircularDependencyDetector(import_graph)
197
+ cycles = detector.detect_cycles()
198
+
199
+ if detector.has_cycles():
200
+ for cycle in cycles:
201
+ print(f"Cycle detected: {cycle.format_chain()}")
202
+ """
203
+
204
+ def __init__(self, import_graph: ImportGraph) -> None:
205
+ """Initialize detector with import graph.
206
+
207
+ Args:
208
+ import_graph: Graph of import dependencies to analyze
209
+ """
210
+ self.graph = import_graph
211
+ self._cycles: list[CircularDependency] = []
212
+ self._colors: dict[str, NodeColor] = {}
213
+ self._path: list[str] = [] # Current DFS path for cycle reconstruction
214
+
215
+ def detect_cycles(self) -> list[CircularDependency]:
216
+ """Detect all circular dependencies in the import graph.
217
+
218
+ Uses DFS with three-color marking:
219
+ 1. WHITE: Node not yet visited
220
+ 2. GRAY: Node in current DFS path (cycle if revisited)
221
+ 3. BLACK: Node fully processed
222
+
223
+ Returns:
224
+ List of CircularDependency objects for all detected cycles
225
+
226
+ Complexity:
227
+ Time: O(V + E) where V = files, E = import edges
228
+ Space: O(V) for color map and path stack
229
+ """
230
+ self._cycles = []
231
+ self._colors = dict.fromkeys(self.graph.get_all_files(), NodeColor.WHITE)
232
+ self._path = []
233
+
234
+ # Run DFS from each unvisited node
235
+ for file in self.graph.get_all_files():
236
+ if self._colors[file] == NodeColor.WHITE:
237
+ self._dfs(file)
238
+
239
+ return self._cycles
240
+
241
+ def _dfs(self, node: str) -> None:
242
+ """Depth-first search to detect cycles.
243
+
244
+ Core cycle detection logic:
245
+ - Mark node GRAY (in current path)
246
+ - Visit all neighbors
247
+ - If neighbor is GRAY → cycle detected (it's in current path)
248
+ - If neighbor is WHITE → recurse
249
+ - Mark node BLACK after processing all neighbors
250
+
251
+ Args:
252
+ node: Current file being visited
253
+ """
254
+ self._colors[node] = NodeColor.GRAY
255
+ self._path.append(node)
256
+
257
+ # Visit all files that this file imports
258
+ for neighbor in self.graph.get_neighbors(node):
259
+ if self._colors[neighbor] == NodeColor.GRAY:
260
+ # Found cycle! Neighbor is in current path
261
+ self._record_cycle(neighbor)
262
+ elif self._colors[neighbor] == NodeColor.WHITE:
263
+ # Unvisited node, continue DFS
264
+ self._dfs(neighbor)
265
+
266
+ # Finished processing this node
267
+ self._path.pop()
268
+ self._colors[node] = NodeColor.BLACK
269
+
270
+ def _record_cycle(self, cycle_start: str) -> None:
271
+ """Record detected cycle by extracting path from cycle_start to current node.
272
+
273
+ When we detect a cycle (encounter GRAY node), we extract the cycle from
274
+ the current DFS path stack.
275
+
276
+ Args:
277
+ cycle_start: File where cycle begins (GRAY node we just encountered)
278
+ """
279
+ # Find cycle_start in current path
280
+ try:
281
+ start_index = self._path.index(cycle_start)
282
+ except ValueError:
283
+ # Should not happen if algorithm is correct
284
+ return
285
+
286
+ # Extract cycle: [cycle_start, ..., current_node, cycle_start]
287
+ cycle_chain = self._path[start_index:] + [cycle_start]
288
+ self._cycles.append(CircularDependency(cycle_chain=cycle_chain))
289
+
290
+ def has_cycles(self) -> bool:
291
+ """Check if any cycles were detected.
292
+
293
+ Note: Must call detect_cycles() first.
294
+
295
+ Returns:
296
+ True if cycles exist, False otherwise
297
+ """
298
+ return len(self._cycles) > 0
299
+
300
+ def get_cycle_chains(self) -> list[str]:
301
+ """Get human-readable cycle chains.
302
+
303
+ Returns:
304
+ List of formatted cycle strings (e.g., ["A → B → C → A"])
305
+ """
306
+ return [cycle.format_chain() for cycle in self._cycles]
307
+
308
+ def get_affected_files(self) -> list[str]:
309
+ """Get all unique files involved in any cycle.
310
+
311
+ Returns:
312
+ Sorted list of unique file paths involved in cycles
313
+ """
314
+ affected = set()
315
+ for cycle in self._cycles:
316
+ affected.update(cycle.get_affected_files())
317
+ return sorted(affected)
318
+
319
+
320
+ def build_import_graph_from_dict(file_imports: dict[str, list[str]]) -> ImportGraph:
321
+ """Build ImportGraph from dictionary of file imports.
322
+
323
+ Utility function to construct graph from parsed import data.
324
+
325
+ Args:
326
+ file_imports: Dictionary mapping file paths to lists of imported files
327
+
328
+ Returns:
329
+ ImportGraph with all edges added
330
+
331
+ Example:
332
+ imports = {
333
+ "main.py": ["utils.py", "config.py"],
334
+ "utils.py": ["helpers.py"],
335
+ "helpers.py": []
336
+ }
337
+ graph = build_import_graph_from_dict(imports)
338
+ """
339
+ graph = ImportGraph()
340
+
341
+ # Add all files as nodes first (ensures isolated files are included)
342
+ for file_path in file_imports.keys():
343
+ graph.add_node(file_path)
344
+
345
+ # Add edges for imports
346
+ for file_path, imports in file_imports.items():
347
+ for imported_file in imports:
348
+ graph.add_edge(file_path, imported_file)
349
+
350
+ return graph
351
+
352
+
353
+ # =============================================================================
354
+ # Multi-language Import Statement Mappings
355
+ # =============================================================================
356
+
357
+ IMPORT_NODE_TYPES = {
358
+ "python": {
359
+ "import": ["import_statement", "import_from_statement"],
360
+ "module_name": ["dotted_name", "aliased_import"],
361
+ },
362
+ "javascript": {
363
+ "import": ["import_statement"],
364
+ "module_name": ["string", "import_clause"],
365
+ "require_call": ["call_expression"], # require('module')
366
+ },
367
+ "typescript": {
368
+ "import": ["import_statement"],
369
+ "module_name": ["string", "import_clause"],
370
+ "import_type": ["import_statement"], # import type { T } from 'mod'
371
+ "require_call": ["call_expression"],
372
+ },
373
+ "java": {
374
+ "import": ["import_declaration"],
375
+ "module_name": ["scoped_identifier"],
376
+ },
377
+ "rust": {
378
+ "import": ["use_declaration"],
379
+ "module_name": ["scoped_identifier"],
380
+ },
381
+ "php": {
382
+ "import": ["namespace_use_declaration"],
383
+ "module_name": ["qualified_name"],
384
+ },
385
+ "ruby": {
386
+ "import": ["call"], # require, require_relative
387
+ "module_name": ["string"],
388
+ },
389
+ }
390
+
391
+
392
+ def get_import_node_types(language: str, category: str) -> list[str]:
393
+ """Get tree-sitter node types for imports in a given language.
394
+
395
+ Args:
396
+ language: Programming language identifier (e.g., "python", "javascript")
397
+ category: Category of import node ("import", "module_name", etc.)
398
+
399
+ Returns:
400
+ List of node type names for this language/category.
401
+ Returns empty list if language/category not found.
402
+
403
+ Examples:
404
+ >>> get_import_node_types("python", "import")
405
+ ["import_statement", "import_from_statement"]
406
+
407
+ >>> get_import_node_types("javascript", "import")
408
+ ["import_statement"]
409
+ """
410
+ # Default to Python-like behavior for unknown languages
411
+ lang_mapping = IMPORT_NODE_TYPES.get(language, IMPORT_NODE_TYPES["python"])
412
+ return lang_mapping.get(category, [])
413
+
414
+
415
+ def is_stdlib_module(module_name: str, language: str) -> bool:
416
+ """Check if a module is from the standard library.
417
+
418
+ Args:
419
+ module_name: Module name (e.g., "os", "sys", "fs")
420
+ language: Programming language
421
+
422
+ Returns:
423
+ True if module is standard library, False otherwise
424
+
425
+ Examples:
426
+ >>> is_stdlib_module("os", "python")
427
+ True
428
+
429
+ >>> is_stdlib_module("requests", "python")
430
+ False
431
+
432
+ >>> is_stdlib_module("fs", "javascript")
433
+ True
434
+ """
435
+ if language == "python":
436
+ # Python standard library check
437
+ # Use sys.stdlib_module_names (Python 3.10+) or hardcoded list
438
+ if hasattr(sys, "stdlib_module_names"):
439
+ return module_name.split(".")[0] in sys.stdlib_module_names
440
+ else:
441
+ # Fallback: common stdlib modules
442
+ common_stdlib = {
443
+ "os",
444
+ "sys",
445
+ "re",
446
+ "json",
447
+ "math",
448
+ "time",
449
+ "datetime",
450
+ "collections",
451
+ "itertools",
452
+ "functools",
453
+ "pathlib",
454
+ "typing",
455
+ "dataclasses",
456
+ "asyncio",
457
+ "contextlib",
458
+ "abc",
459
+ "io",
460
+ "logging",
461
+ "unittest",
462
+ "pytest",
463
+ }
464
+ return module_name.split(".")[0] in common_stdlib
465
+
466
+ elif language in ("javascript", "typescript"):
467
+ # Node.js built-in modules
468
+ nodejs_builtins = {
469
+ "fs",
470
+ "path",
471
+ "http",
472
+ "https",
473
+ "url",
474
+ "os",
475
+ "util",
476
+ "events",
477
+ "stream",
478
+ "buffer",
479
+ "crypto",
480
+ "child_process",
481
+ "cluster",
482
+ "dns",
483
+ "net",
484
+ "tls",
485
+ "dgram",
486
+ "readline",
487
+ "zlib",
488
+ "process",
489
+ "console",
490
+ "assert",
491
+ "timers",
492
+ }
493
+ return module_name.split("/")[0] in nodejs_builtins
494
+
495
+ return False
496
+
497
+
498
+ def is_relative_import(module_name: str, language: str) -> bool:
499
+ """Check if import is relative to current file.
500
+
501
+ Args:
502
+ module_name: Module path
503
+ language: Programming language
504
+
505
+ Returns:
506
+ True if import is relative, False otherwise
507
+
508
+ Examples:
509
+ >>> is_relative_import("./utils", "javascript")
510
+ True
511
+
512
+ >>> is_relative_import("lodash", "javascript")
513
+ False
514
+
515
+ >>> is_relative_import(".utils", "python")
516
+ True
517
+ """
518
+ if language == "python":
519
+ # Python relative imports start with "."
520
+ return module_name.startswith(".")
521
+ elif language in ("javascript", "typescript"):
522
+ # JS/TS relative imports start with "./" or "../"
523
+ return module_name.startswith("./") or module_name.startswith("../")
524
+ return False
525
+
526
+
527
+ class EfferentCouplingCollector(MetricCollector):
528
+ """Collects efferent coupling metrics (outgoing dependencies).
529
+
530
+ Efferent coupling (Ce) measures how many external modules/files a file
531
+ depends on. Higher Ce indicates fragility - changes to dependencies can
532
+ break this file.
533
+
534
+ Tracks:
535
+ - Total unique dependencies (efferent_coupling score)
536
+ - All imported modules
537
+ - Internal vs. external imports
538
+ - Standard library vs. third-party imports
539
+
540
+ Example:
541
+ # Python file with Ce = 3
542
+ import os # stdlib
543
+ from typing import List # stdlib (not counted, same base module)
544
+ import requests # external
545
+ from .utils import helper # internal
546
+
547
+ # Ce = 3 (os, requests, .utils)
548
+ """
549
+
550
+ def __init__(self) -> None:
551
+ """Initialize efferent coupling collector."""
552
+ self._imports: set[str] = set() # All unique imports
553
+ self._internal_imports: set[str] = set()
554
+ self._external_imports: set[str] = set()
555
+
556
+ @property
557
+ def name(self) -> str:
558
+ """Return collector identifier.
559
+
560
+ Returns:
561
+ Collector name "efferent_coupling"
562
+ """
563
+ return "efferent_coupling"
564
+
565
+ def collect_node(self, node: Node, context: CollectorContext, depth: int) -> None:
566
+ """Process node and extract import statements.
567
+
568
+ Args:
569
+ node: Current tree-sitter AST node
570
+ context: Shared context with language and file info
571
+ depth: Current depth in AST (unused)
572
+ """
573
+ language = context.language
574
+ node_type = node.type
575
+
576
+ # Check if this is an import statement
577
+ if node_type in get_import_node_types(language, "import"):
578
+ self._extract_import(node, context)
579
+ elif language in ("javascript", "typescript"):
580
+ # Handle require() calls in JS/TS
581
+ if node_type in get_import_node_types(language, "require_call"):
582
+ self._extract_require_call(node, context)
583
+
584
+ def _extract_import(self, node: Node, context: CollectorContext) -> None:
585
+ r"""Extract module name from import statement.
586
+
587
+ Handles:
588
+ - Python: import module, from module import X
589
+ - JavaScript/TypeScript: import ... from 'module'
590
+ - Java: import com.example.Class
591
+ - Rust: use std::collections::HashMap
592
+ - PHP: use MyNamespace\MyClass
593
+ - Ruby: require "module"
594
+
595
+ Args:
596
+ node: Import statement node
597
+ context: Collector context
598
+ """
599
+ language = context.language
600
+
601
+ if language == "python":
602
+ # Python: import os, from os import path
603
+ # Look for dotted_name or module_name field
604
+ module_node = node.child_by_field_name("module_name")
605
+ if module_node:
606
+ module_name = module_node.text.decode("utf-8")
607
+ self._add_import(module_name, context)
608
+ else:
609
+ # Look for dotted_name child
610
+ for child in node.children:
611
+ if child.type == "dotted_name":
612
+ module_name = child.text.decode("utf-8")
613
+ self._add_import(module_name, context)
614
+ elif child.type == "aliased_import":
615
+ # import os as operating_system
616
+ for subchild in child.children:
617
+ if subchild.type == "dotted_name":
618
+ module_name = subchild.text.decode("utf-8")
619
+ self._add_import(module_name, context)
620
+ break
621
+ elif child.type == "relative_import":
622
+ # Relative import (from . import X)
623
+ dots = child.text.decode("utf-8")
624
+ self._add_import(dots, context)
625
+ break
626
+
627
+ elif language in ("javascript", "typescript"):
628
+ # JavaScript/TypeScript: import ... from 'module'
629
+ for child in node.children:
630
+ if child.type == "string":
631
+ module_str = child.text.decode("utf-8")
632
+ module_name = module_str.strip("\"'")
633
+ self._add_import(module_name, context)
634
+
635
+ elif language == "java":
636
+ for child in node.children:
637
+ if child.type == "scoped_identifier":
638
+ module_name = child.text.decode("utf-8")
639
+ self._add_import(module_name, context)
640
+
641
+ elif language == "rust":
642
+ for child in node.children:
643
+ if child.type == "scoped_identifier":
644
+ module_name = child.text.decode("utf-8")
645
+ self._add_import(module_name, context)
646
+
647
+ elif language == "php":
648
+ for child in node.children:
649
+ if child.type == "qualified_name":
650
+ module_name = child.text.decode("utf-8")
651
+ self._add_import(module_name, context)
652
+
653
+ elif language == "ruby":
654
+ # Ruby uses method calls for imports
655
+ if node.type == "call":
656
+ method_child = node.child_by_field_name("method")
657
+ if method_child and method_child.text.decode("utf-8") in [
658
+ "require",
659
+ "require_relative",
660
+ ]:
661
+ args_child = node.child_by_field_name("arguments")
662
+ if args_child:
663
+ for child in args_child.children:
664
+ if child.type == "string":
665
+ module_str = child.text.decode("utf-8")
666
+ module_name = module_str.strip("\"'")
667
+ self._add_import(module_name, context)
668
+
669
+ def _extract_require_call(self, node: Node, context: CollectorContext) -> None:
670
+ """Extract module name from require('module') call.
671
+
672
+ Handles:
673
+ - JavaScript/TypeScript: const x = require('module')
674
+
675
+ Args:
676
+ node: Call expression node
677
+ context: Collector context
678
+ """
679
+ # Check if this is a require() call
680
+ function_node = node.child_by_field_name("function")
681
+ if function_node and function_node.type == "identifier":
682
+ function_name = function_node.text.decode("utf-8")
683
+ if function_name == "require":
684
+ args_node = node.child_by_field_name("arguments")
685
+ if args_node:
686
+ for child in args_node.children:
687
+ if child.type == "string":
688
+ module_str = child.text.decode("utf-8")
689
+ module_name = module_str.strip("\"'")
690
+ self._add_import(module_name, context)
691
+
692
+ def _add_import(self, module_name: str, context: CollectorContext) -> None:
693
+ """Add import to tracking sets and classify as internal/external.
694
+
695
+ Args:
696
+ module_name: Imported module name
697
+ context: Collector context with language info
698
+ """
699
+ language = context.language
700
+
701
+ # Add to all imports
702
+ self._imports.add(module_name)
703
+
704
+ # Classify import
705
+ if is_relative_import(module_name, language):
706
+ # Relative import = internal
707
+ self._internal_imports.add(module_name)
708
+ elif is_stdlib_module(module_name, language):
709
+ # Standard library = external (but not third-party)
710
+ self._external_imports.add(module_name)
711
+ else:
712
+ # Check if internal by checking if it starts with project root
713
+ # For now, treat non-relative, non-stdlib as external
714
+ # Future enhancement: project_root detection
715
+ self._external_imports.add(module_name)
716
+
717
+ def get_imported_modules(self) -> set[str]:
718
+ """Get set of all imported module names.
719
+
720
+ Returns:
721
+ Set of module names imported by this file
722
+ """
723
+ return self._imports.copy()
724
+
725
+ def finalize_function(
726
+ self, node: Node, context: CollectorContext
727
+ ) -> dict[str, Any]:
728
+ """Return empty dict - coupling is file-level, not function-level.
729
+
730
+ Coupling metrics are computed at file level during finalization.
731
+
732
+ Args:
733
+ node: Function definition node
734
+ context: Shared context
735
+
736
+ Returns:
737
+ Empty dictionary (no function-level coupling metrics)
738
+ """
739
+ return {}
740
+
741
+ def get_file_metrics(self) -> dict[str, Any]:
742
+ """Get file-level coupling metrics.
743
+
744
+ Returns:
745
+ Dictionary with efferent coupling metrics
746
+ """
747
+ return {
748
+ "efferent_coupling": len(self._imports),
749
+ "imports": sorted(self._imports),
750
+ "internal_imports": sorted(self._internal_imports),
751
+ "external_imports": sorted(self._external_imports),
752
+ }
753
+
754
+ def reset(self) -> None:
755
+ """Reset collector state for next file."""
756
+ self._imports.clear()
757
+ self._internal_imports.clear()
758
+ self._external_imports.clear()
759
+
760
+
761
+ class AfferentCouplingCollector(MetricCollector):
762
+ """Tracks afferent coupling (Ca) - incoming dependencies.
763
+
764
+ Afferent coupling measures how many other files depend on this file
765
+ (i.e., how many files import this file). Higher Ca indicates this
766
+ file is more load-bearing - changes will affect many other files.
767
+
768
+ Interpretation:
769
+ - 0-2: Low coupling, changes affect few files
770
+ - 3-5: Moderate coupling, shared utility
771
+ - 6-10: High coupling, critical component
772
+ - 11+: Very high coupling, core infrastructure
773
+
774
+ Example:
775
+ # File A is imported by files B, C, D
776
+ # Afferent Coupling (Ca) = 3
777
+
778
+ Note: Afferent coupling requires project-wide import graph analysis.
779
+ Use build_import_graph() to construct the graph before creating this collector.
780
+ """
781
+
782
+ def __init__(self, import_graph: dict[str, set[str]] | None = None) -> None:
783
+ """Initialize afferent coupling collector.
784
+
785
+ Args:
786
+ import_graph: Pre-built import graph mapping module_name → set of importing files.
787
+ If None, afferent coupling will always be 0.
788
+ """
789
+ self._import_graph = import_graph or {}
790
+ self._current_file: str | None = None
791
+
792
+ @property
793
+ def name(self) -> str:
794
+ """Return collector identifier.
795
+
796
+ Returns:
797
+ Collector name "afferent_coupling"
798
+ """
799
+ return "afferent_coupling"
800
+
801
+ def collect_node(self, node: Node, context: CollectorContext, depth: int) -> None:
802
+ """Process node (no-op for afferent coupling).
803
+
804
+ Afferent coupling is computed from the import graph, not by traversing nodes.
805
+
806
+ Args:
807
+ node: Current tree-sitter AST node (unused)
808
+ context: Shared context with file path
809
+ depth: Current depth in AST (unused)
810
+ """
811
+ # Store current file path for lookup
812
+ if context.file_path and not self._current_file:
813
+ self._current_file = context.file_path
814
+
815
+ def get_afferent_coupling(self, file_path: str) -> int:
816
+ """Get count of files that import this file.
817
+
818
+ Args:
819
+ file_path: Path to the file to check
820
+
821
+ Returns:
822
+ Number of files that import this file
823
+ """
824
+ # Normalize file path for lookup
825
+ normalized_path = self._normalize_path(file_path)
826
+
827
+ # Look up in import graph
828
+ if normalized_path in self._import_graph:
829
+ return len(self._import_graph[normalized_path])
830
+
831
+ return 0
832
+
833
+ def get_dependents(self, file_path: str) -> list[str]:
834
+ """Get list of files that depend on this file.
835
+
836
+ Args:
837
+ file_path: Path to the file to check
838
+
839
+ Returns:
840
+ List of file paths that import this file
841
+ """
842
+ normalized_path = self._normalize_path(file_path)
843
+
844
+ if normalized_path in self._import_graph:
845
+ return sorted(self._import_graph[normalized_path])
846
+
847
+ return []
848
+
849
+ def _normalize_path(self, file_path: str) -> str:
850
+ """Normalize file path for consistent lookup.
851
+
852
+ Args:
853
+ file_path: File path to normalize
854
+
855
+ Returns:
856
+ Normalized file path
857
+ """
858
+ # Convert to Path and resolve to absolute path
859
+ path = Path(file_path)
860
+ if path.is_absolute():
861
+ return str(path)
862
+
863
+ # If relative, return as-is (caller should ensure consistency)
864
+ return str(path)
865
+
866
+ def finalize_function(
867
+ self, node: Node, context: CollectorContext
868
+ ) -> dict[str, Any]:
869
+ """Return final afferent coupling metrics.
870
+
871
+ Note: This is called per function, but afferent coupling is a file-level metric.
872
+
873
+ Args:
874
+ node: Function definition node
875
+ context: Shared context with file path
876
+
877
+ Returns:
878
+ Dictionary with afferent_coupling count and dependents list
879
+ """
880
+ file_path = context.file_path
881
+ return {
882
+ "afferent_coupling": self.get_afferent_coupling(file_path),
883
+ "dependents": self.get_dependents(file_path),
884
+ }
885
+
886
+ def reset(self) -> None:
887
+ """Reset collector state for next file."""
888
+ self._current_file = None
889
+
890
+
891
+ def build_import_graph(
892
+ project_root: Path, files: list[Path], language: str = "python"
893
+ ) -> dict[str, set[str]]:
894
+ """Build project-wide import graph for afferent coupling analysis.
895
+
896
+ Analyzes all files in the project to construct a reverse dependency graph
897
+ mapping each module to the set of files that import it.
898
+
899
+ Args:
900
+ project_root: Root directory of the project
901
+ files: List of file paths to analyze
902
+ language: Programming language (default: "python")
903
+
904
+ Returns:
905
+ Dictionary mapping module_name → set of file paths that import it
906
+
907
+ Example:
908
+ >>> files = [Path("a.py"), Path("b.py"), Path("c.py")]
909
+ >>> graph = build_import_graph(Path("/project"), files)
910
+ >>> graph["module_x"]
911
+ {"a.py", "c.py"} # Both a.py and c.py import module_x
912
+ """
913
+ import_graph: dict[str, set[str]] = {}
914
+
915
+ # Use tree-sitter to parse each file and extract imports
916
+ try:
917
+ from tree_sitter import Parser
918
+
919
+ # Get tree-sitter language
920
+ language_obj = _get_tree_sitter_language(language)
921
+ if not language_obj:
922
+ # Fallback: no tree-sitter support, return empty graph
923
+ return import_graph
924
+
925
+ parser = Parser()
926
+ parser.set_language(language_obj)
927
+
928
+ except ImportError:
929
+ # Tree-sitter not available, return empty graph
930
+ return import_graph
931
+
932
+ # Create efferent coupling collector to extract imports
933
+ efferent_collector = EfferentCouplingCollector()
934
+
935
+ for file_path in files:
936
+ # Skip non-existent files
937
+ if not file_path.exists():
938
+ continue
939
+
940
+ # Read file content
941
+ try:
942
+ source_code = file_path.read_bytes()
943
+ except OSError:
944
+ continue
945
+
946
+ # Parse with tree-sitter
947
+ tree = parser.parse(source_code)
948
+ if not tree or not tree.root_node:
949
+ continue
950
+
951
+ # Create context for this file
952
+ context = CollectorContext(
953
+ file_path=str(file_path.relative_to(project_root)),
954
+ source_code=source_code,
955
+ language=language,
956
+ )
957
+
958
+ # Traverse AST and collect imports
959
+ efferent_collector.reset()
960
+ _traverse_tree(tree.root_node, context, efferent_collector)
961
+
962
+ # Get imported modules for this file
963
+ imported_modules = efferent_collector.get_imported_modules()
964
+
965
+ # Update import graph (reverse mapping)
966
+ file_key = str(file_path.relative_to(project_root))
967
+ for module_name in imported_modules:
968
+ if module_name not in import_graph:
969
+ import_graph[module_name] = set()
970
+ import_graph[module_name].add(file_key)
971
+
972
+ return import_graph
973
+
974
+
975
+ def _traverse_tree(
976
+ node: Node, context: CollectorContext, collector: EfferentCouplingCollector
977
+ ) -> None:
978
+ """Recursively traverse tree-sitter AST and collect imports.
979
+
980
+ Args:
981
+ node: Current AST node
982
+ context: Collector context
983
+ collector: Efferent coupling collector to accumulate imports
984
+ """
985
+ # Process current node
986
+ collector.collect_node(node, context, depth=0)
987
+
988
+ # Recursively process children
989
+ for child in node.children:
990
+ _traverse_tree(child, context, collector)
991
+
992
+
993
+ def _get_tree_sitter_language(language: str) -> Any: # noqa: ARG001
994
+ """Get tree-sitter Language object for the given language.
995
+
996
+ Args:
997
+ language: Programming language identifier
998
+
999
+ Returns:
1000
+ Tree-sitter Language object, or None if not available
1001
+ """
1002
+ try:
1003
+ # Language loading depends on tree-sitter installation
1004
+ # This is a simplified version - actual implementation should handle
1005
+ # loading compiled language libraries properly
1006
+ # In a real implementation, this would load the compiled language library
1007
+ # For now, return None to indicate unsupported
1008
+ return None
1009
+
1010
+ except ImportError:
1011
+ return None
1012
+
1013
+
1014
+ class InstabilityCalculator:
1015
+ """Calculator for instability metrics across the project.
1016
+
1017
+ Instability (I) = Ce / (Ce + Ca) measures how much a file depends on others
1018
+ vs. how much others depend on it.
1019
+
1020
+ Interpretation:
1021
+ - I = 0.0-0.3: Stable (maximally stable at 0.0)
1022
+ - I = 0.3-0.7: Balanced
1023
+ - I = 0.7-1.0: Unstable (maximally unstable at 1.0)
1024
+
1025
+ Stable files should contain abstractions and core logic.
1026
+ Unstable files should contain concrete implementations and glue code.
1027
+ """
1028
+
1029
+ def __init__(
1030
+ self,
1031
+ efferent_collector: EfferentCouplingCollector,
1032
+ afferent_collector: AfferentCouplingCollector,
1033
+ ) -> None:
1034
+ """Initialize instability calculator.
1035
+
1036
+ Args:
1037
+ efferent_collector: Collector for outgoing dependencies
1038
+ afferent_collector: Collector for incoming dependencies
1039
+ """
1040
+ self._efferent_collector = efferent_collector
1041
+ self._afferent_collector = afferent_collector
1042
+
1043
+ def calculate_instability(self, file_path: str) -> float:
1044
+ """Calculate instability for a single file.
1045
+
1046
+ Args:
1047
+ file_path: Path to the file
1048
+
1049
+ Returns:
1050
+ Instability value from 0.0 (stable) to 1.0 (unstable)
1051
+ """
1052
+ ce = len(self._efferent_collector.get_imported_modules())
1053
+ ca = self._afferent_collector.get_afferent_coupling(file_path)
1054
+
1055
+ total = ce + ca
1056
+ if total == 0:
1057
+ return 0.0
1058
+
1059
+ return ce / total
1060
+
1061
+ def calculate_project_instability(
1062
+ self, file_metrics: dict[str, Any]
1063
+ ) -> dict[str, float]:
1064
+ """Calculate instability for all files in the project.
1065
+
1066
+ Args:
1067
+ file_metrics: Dictionary mapping file_path → file metrics
1068
+
1069
+ Returns:
1070
+ Dictionary mapping file_path → instability value
1071
+ """
1072
+ instability_map: dict[str, float] = {}
1073
+
1074
+ for file_path in file_metrics:
1075
+ # Get coupling metrics from file_metrics
1076
+ if "coupling" in file_metrics[file_path]:
1077
+ coupling = file_metrics[file_path]["coupling"]
1078
+ ce = coupling.get("efferent_coupling", 0)
1079
+ ca = coupling.get("afferent_coupling", 0)
1080
+
1081
+ total = ce + ca
1082
+ if total == 0:
1083
+ instability = 0.0
1084
+ else:
1085
+ instability = ce / total
1086
+
1087
+ instability_map[file_path] = instability
1088
+
1089
+ return instability_map
1090
+
1091
+ def get_stability_grade(self, instability: float) -> str:
1092
+ """Get letter grade for instability value.
1093
+
1094
+ Args:
1095
+ instability: Instability value (0.0-1.0)
1096
+
1097
+ Returns:
1098
+ Letter grade from A to F
1099
+
1100
+ Grade thresholds:
1101
+ - A: 0.0-0.2 (very stable)
1102
+ - B: 0.2-0.4 (stable)
1103
+ - C: 0.4-0.6 (balanced)
1104
+ - D: 0.6-0.8 (unstable)
1105
+ - F: 0.8-1.0 (very unstable)
1106
+ """
1107
+ if instability <= 0.2:
1108
+ return "A"
1109
+ elif instability <= 0.4:
1110
+ return "B"
1111
+ elif instability <= 0.6:
1112
+ return "C"
1113
+ elif instability <= 0.8:
1114
+ return "D"
1115
+ else:
1116
+ return "F"
1117
+
1118
+ def get_stability_category(self, instability: float) -> str:
1119
+ """Get stability category for instability value.
1120
+
1121
+ Args:
1122
+ instability: Instability value (0.0-1.0)
1123
+
1124
+ Returns:
1125
+ Category: "Stable", "Balanced", or "Unstable"
1126
+ """
1127
+ if instability <= 0.3:
1128
+ return "Stable"
1129
+ elif instability <= 0.7:
1130
+ return "Balanced"
1131
+ else:
1132
+ return "Unstable"
1133
+
1134
+ def get_most_stable_files(
1135
+ self, instability_map: dict[str, float], limit: int = 10
1136
+ ) -> list[tuple[str, float]]:
1137
+ """Get most stable files (lowest instability).
1138
+
1139
+ Args:
1140
+ instability_map: Dictionary mapping file_path → instability
1141
+ limit: Maximum number of files to return
1142
+
1143
+ Returns:
1144
+ List of (file_path, instability) tuples, sorted by stability
1145
+ """
1146
+ sorted_files = sorted(instability_map.items(), key=lambda x: x[1])
1147
+ return sorted_files[:limit]
1148
+
1149
+ def get_most_unstable_files(
1150
+ self, instability_map: dict[str, float], limit: int = 10
1151
+ ) -> list[tuple[str, float]]:
1152
+ """Get most unstable files (highest instability).
1153
+
1154
+ Args:
1155
+ instability_map: Dictionary mapping file_path → instability
1156
+ limit: Maximum number of files to return
1157
+
1158
+ Returns:
1159
+ List of (file_path, instability) tuples, sorted by instability (descending)
1160
+ """
1161
+ sorted_files = sorted(instability_map.items(), key=lambda x: x[1], reverse=True)
1162
+ return sorted_files[:limit]