mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_vector_search/__init__.py +3 -3
- mcp_vector_search/analysis/__init__.py +111 -0
- mcp_vector_search/analysis/baseline/__init__.py +68 -0
- mcp_vector_search/analysis/baseline/comparator.py +462 -0
- mcp_vector_search/analysis/baseline/manager.py +621 -0
- mcp_vector_search/analysis/collectors/__init__.py +74 -0
- mcp_vector_search/analysis/collectors/base.py +164 -0
- mcp_vector_search/analysis/collectors/cohesion.py +463 -0
- mcp_vector_search/analysis/collectors/complexity.py +743 -0
- mcp_vector_search/analysis/collectors/coupling.py +1162 -0
- mcp_vector_search/analysis/collectors/halstead.py +514 -0
- mcp_vector_search/analysis/collectors/smells.py +325 -0
- mcp_vector_search/analysis/debt.py +516 -0
- mcp_vector_search/analysis/interpretation.py +685 -0
- mcp_vector_search/analysis/metrics.py +414 -0
- mcp_vector_search/analysis/reporters/__init__.py +7 -0
- mcp_vector_search/analysis/reporters/console.py +646 -0
- mcp_vector_search/analysis/reporters/markdown.py +480 -0
- mcp_vector_search/analysis/reporters/sarif.py +377 -0
- mcp_vector_search/analysis/storage/__init__.py +93 -0
- mcp_vector_search/analysis/storage/metrics_store.py +762 -0
- mcp_vector_search/analysis/storage/schema.py +245 -0
- mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
- mcp_vector_search/analysis/trends.py +308 -0
- mcp_vector_search/analysis/visualizer/__init__.py +90 -0
- mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
- mcp_vector_search/analysis/visualizer/exporter.py +484 -0
- mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
- mcp_vector_search/analysis/visualizer/schemas.py +525 -0
- mcp_vector_search/cli/commands/analyze.py +1062 -0
- mcp_vector_search/cli/commands/chat.py +1455 -0
- mcp_vector_search/cli/commands/index.py +621 -5
- mcp_vector_search/cli/commands/index_background.py +467 -0
- mcp_vector_search/cli/commands/init.py +13 -0
- mcp_vector_search/cli/commands/install.py +597 -335
- mcp_vector_search/cli/commands/install_old.py +8 -4
- mcp_vector_search/cli/commands/mcp.py +78 -6
- mcp_vector_search/cli/commands/reset.py +68 -26
- mcp_vector_search/cli/commands/search.py +224 -8
- mcp_vector_search/cli/commands/setup.py +1184 -0
- mcp_vector_search/cli/commands/status.py +339 -5
- mcp_vector_search/cli/commands/uninstall.py +276 -357
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +292 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +600 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
- mcp_vector_search/cli/didyoumean.py +27 -2
- mcp_vector_search/cli/main.py +127 -160
- mcp_vector_search/cli/output.py +158 -13
- mcp_vector_search/config/__init__.py +4 -0
- mcp_vector_search/config/default_thresholds.yaml +52 -0
- mcp_vector_search/config/settings.py +12 -0
- mcp_vector_search/config/thresholds.py +273 -0
- mcp_vector_search/core/__init__.py +16 -0
- mcp_vector_search/core/auto_indexer.py +3 -3
- mcp_vector_search/core/boilerplate.py +186 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/database.py +406 -94
- mcp_vector_search/core/embeddings.py +24 -0
- mcp_vector_search/core/exceptions.py +11 -0
- mcp_vector_search/core/git.py +380 -0
- mcp_vector_search/core/git_hooks.py +4 -4
- mcp_vector_search/core/indexer.py +632 -54
- mcp_vector_search/core/llm_client.py +756 -0
- mcp_vector_search/core/models.py +91 -1
- mcp_vector_search/core/project.py +17 -0
- mcp_vector_search/core/relationships.py +473 -0
- mcp_vector_search/core/scheduler.py +11 -11
- mcp_vector_search/core/search.py +179 -29
- mcp_vector_search/mcp/server.py +819 -9
- mcp_vector_search/parsers/python.py +285 -5
- mcp_vector_search/utils/__init__.py +2 -0
- mcp_vector_search/utils/gitignore.py +0 -3
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +66 -4
- mcp_vector_search/utils/timing.py +10 -6
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
- mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
- mcp_vector_search/cli/commands/visualize.py +0 -1467
- mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Base collector interface for metric collection during AST traversal."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from tree_sitter import Node
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class CollectorContext:
|
|
15
|
+
"""Shared context passed to all collectors during AST traversal.
|
|
16
|
+
|
|
17
|
+
Provides state management for tracking the current position and scope
|
|
18
|
+
during tree-sitter AST traversal. All collectors receive the same
|
|
19
|
+
context instance and can read/modify it during traversal.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
file_path: Path to the file being analyzed
|
|
23
|
+
source_code: Raw source code as bytes (required by tree-sitter)
|
|
24
|
+
language: Programming language identifier (e.g., "python", "javascript")
|
|
25
|
+
current_function: Name of the function currently being analyzed
|
|
26
|
+
current_class: Name of the class currently being analyzed
|
|
27
|
+
nesting_stack: Stack tracking nested scopes (for depth calculation)
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
context = CollectorContext(
|
|
31
|
+
file_path="/path/to/file.py",
|
|
32
|
+
source_code=b"def foo(): pass",
|
|
33
|
+
language="python"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# During traversal
|
|
37
|
+
context.current_function = "foo"
|
|
38
|
+
context.nesting_stack.append("function")
|
|
39
|
+
depth = len(context.nesting_stack)
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
file_path: str
|
|
43
|
+
source_code: bytes
|
|
44
|
+
language: str
|
|
45
|
+
|
|
46
|
+
# Accumulator for current function being analyzed
|
|
47
|
+
current_function: str | None = None
|
|
48
|
+
current_class: str | None = None
|
|
49
|
+
|
|
50
|
+
# Stack for tracking nesting
|
|
51
|
+
nesting_stack: list[str] = field(default_factory=list)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class MetricCollector(ABC):
|
|
55
|
+
"""Abstract base class for metric collectors.
|
|
56
|
+
|
|
57
|
+
Collectors implement the visitor pattern for AST traversal. Each collector
|
|
58
|
+
is responsible for tracking specific metrics (e.g., complexity, nesting)
|
|
59
|
+
during tree-sitter node traversal.
|
|
60
|
+
|
|
61
|
+
Lifecycle:
|
|
62
|
+
1. collect_node() called for each AST node during traversal
|
|
63
|
+
2. Collector accumulates state during traversal
|
|
64
|
+
3. finalize_function() called when exiting a function/method
|
|
65
|
+
4. reset() called to prepare for next function
|
|
66
|
+
|
|
67
|
+
Subclasses must implement:
|
|
68
|
+
- name: Unique identifier for the collector
|
|
69
|
+
- collect_node(): Process individual AST nodes
|
|
70
|
+
- finalize_function(): Return final metrics for completed function
|
|
71
|
+
|
|
72
|
+
Example Implementation:
|
|
73
|
+
class ComplexityCollector(MetricCollector):
|
|
74
|
+
def __init__(self):
|
|
75
|
+
self._complexity = 0
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def name(self) -> str:
|
|
79
|
+
return "complexity"
|
|
80
|
+
|
|
81
|
+
def collect_node(self, node: Node, context: CollectorContext, depth: int) -> None:
|
|
82
|
+
if node.type in ("if_statement", "while_statement"):
|
|
83
|
+
self._complexity += 1
|
|
84
|
+
|
|
85
|
+
def finalize_function(self, node: Node, context: CollectorContext) -> dict[str, Any]:
|
|
86
|
+
return {"cognitive_complexity": self._complexity}
|
|
87
|
+
|
|
88
|
+
def reset(self) -> None:
|
|
89
|
+
self._complexity = 0
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
@abstractmethod
|
|
94
|
+
def name(self) -> str:
|
|
95
|
+
"""Unique identifier for this collector.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Collector name (e.g., "complexity", "nesting", "parameters")
|
|
99
|
+
"""
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
@abstractmethod
|
|
103
|
+
def collect_node(self, node: Node, context: CollectorContext, depth: int) -> None:
|
|
104
|
+
"""Called for each AST node during traversal.
|
|
105
|
+
|
|
106
|
+
Collectors accumulate state here by examining node properties
|
|
107
|
+
and updating internal counters/state.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
node: Current tree-sitter AST node being visited
|
|
111
|
+
context: Shared context with file info and current scope
|
|
112
|
+
depth: Current nesting depth in the AST
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
def collect_node(self, node, context, depth):
|
|
116
|
+
if node.type == "if_statement":
|
|
117
|
+
self._if_count += 1
|
|
118
|
+
elif node.type == "for_statement":
|
|
119
|
+
self._loop_count += 1
|
|
120
|
+
"""
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
@abstractmethod
|
|
124
|
+
def finalize_function(
|
|
125
|
+
self, node: Node, context: CollectorContext
|
|
126
|
+
) -> dict[str, Any]:
|
|
127
|
+
"""Called when exiting a function/method.
|
|
128
|
+
|
|
129
|
+
Returns final metrics for this function. The returned dictionary
|
|
130
|
+
should contain metric names as keys and their values.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
node: The function/method definition node
|
|
134
|
+
context: Shared context with file info and scope
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Dictionary of metric names to values
|
|
138
|
+
Example: {"cognitive_complexity": 5, "max_nesting": 3}
|
|
139
|
+
|
|
140
|
+
Example:
|
|
141
|
+
def finalize_function(self, node, context):
|
|
142
|
+
return {
|
|
143
|
+
"cognitive_complexity": self._complexity,
|
|
144
|
+
"nesting_depth": self._max_depth
|
|
145
|
+
}
|
|
146
|
+
"""
|
|
147
|
+
pass
|
|
148
|
+
|
|
149
|
+
def reset(self) -> None:
|
|
150
|
+
"""Reset collector state for next function.
|
|
151
|
+
|
|
152
|
+
Called after finalize_function() to prepare the collector
|
|
153
|
+
for analyzing the next function/method.
|
|
154
|
+
|
|
155
|
+
Default implementation does nothing. Override if collector
|
|
156
|
+
maintains state that needs clearing.
|
|
157
|
+
|
|
158
|
+
Example:
|
|
159
|
+
def reset(self):
|
|
160
|
+
self._complexity = 0
|
|
161
|
+
self._nesting_stack.clear()
|
|
162
|
+
self._max_depth = 0
|
|
163
|
+
"""
|
|
164
|
+
return # Default no-op implementation - subclasses override if needed
|
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
"""LCOM4 cohesion metric collector.
|
|
2
|
+
|
|
3
|
+
LCOM4 (Lack of Cohesion of Methods version 4) measures class cohesion
|
|
4
|
+
by counting connected components in the method-attribute graph.
|
|
5
|
+
|
|
6
|
+
A class is cohesive when its methods work together using shared attributes.
|
|
7
|
+
LCOM4 counts how many disconnected groups of methods exist:
|
|
8
|
+
- LCOM4 = 1: Perfect cohesion (all methods connected)
|
|
9
|
+
- LCOM4 > 1: Poor cohesion (class should potentially be split)
|
|
10
|
+
|
|
11
|
+
Example:
|
|
12
|
+
# Cohesive class (LCOM4 = 1)
|
|
13
|
+
class GoodClass:
|
|
14
|
+
def method_a(self):
|
|
15
|
+
return self.x + self.y
|
|
16
|
+
|
|
17
|
+
def method_b(self):
|
|
18
|
+
return self.x * self.y # Shares x, y with method_a
|
|
19
|
+
|
|
20
|
+
# Incohesive class (LCOM4 = 2)
|
|
21
|
+
class BadClass:
|
|
22
|
+
def method_a(self):
|
|
23
|
+
return self.x + self.y # Group 1
|
|
24
|
+
|
|
25
|
+
def method_c(self):
|
|
26
|
+
return self.z + self.w # Group 2 (no shared attributes)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
from dataclasses import dataclass, field
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import TYPE_CHECKING
|
|
34
|
+
|
|
35
|
+
from loguru import logger
|
|
36
|
+
|
|
37
|
+
if TYPE_CHECKING:
|
|
38
|
+
from tree_sitter import Node
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class MethodAttributeAccess:
|
|
43
|
+
"""Tracks which attributes a method accesses.
|
|
44
|
+
|
|
45
|
+
Attributes:
|
|
46
|
+
method_name: Name of the method
|
|
47
|
+
attributes: Set of instance attributes accessed (e.g., {"x", "y"})
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
method_name: str
|
|
51
|
+
attributes: set[str] = field(default_factory=set)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class ClassCohesion:
|
|
56
|
+
"""LCOM4 result for a single class.
|
|
57
|
+
|
|
58
|
+
Attributes:
|
|
59
|
+
class_name: Name of the class
|
|
60
|
+
lcom4: Number of connected components (1=cohesive, >1=incohesive)
|
|
61
|
+
method_count: Total number of methods in class
|
|
62
|
+
attribute_count: Total number of instance attributes accessed
|
|
63
|
+
method_attributes: Detailed mapping of method names to their attributes
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
class_name: str
|
|
67
|
+
lcom4: int
|
|
68
|
+
method_count: int
|
|
69
|
+
attribute_count: int
|
|
70
|
+
method_attributes: dict[str, set[str]] = field(default_factory=dict)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class FileCohesion:
|
|
75
|
+
"""Cohesion metrics for all classes in a file.
|
|
76
|
+
|
|
77
|
+
Attributes:
|
|
78
|
+
file_path: Path to the analyzed file
|
|
79
|
+
classes: List of per-class cohesion results
|
|
80
|
+
avg_lcom4: Average LCOM4 across all classes
|
|
81
|
+
max_lcom4: Maximum LCOM4 value (worst cohesion)
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
file_path: Path
|
|
85
|
+
classes: list[ClassCohesion] = field(default_factory=list)
|
|
86
|
+
avg_lcom4: float = 0.0
|
|
87
|
+
max_lcom4: int = 0
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class UnionFind:
|
|
91
|
+
"""Union-Find data structure for connected components.
|
|
92
|
+
|
|
93
|
+
Efficiently tracks and merges disjoint sets to count connected
|
|
94
|
+
components in the method-attribute graph.
|
|
95
|
+
|
|
96
|
+
Example:
|
|
97
|
+
uf = UnionFind(["method_a", "method_b", "method_c"])
|
|
98
|
+
uf.union("method_a", "method_b") # Connect a and b
|
|
99
|
+
uf.count_components() # Returns 2 (groups: {a,b}, {c})
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
def __init__(self, items: list[str]) -> None:
|
|
103
|
+
"""Initialize union-find with independent items.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
items: List of method names to track
|
|
107
|
+
"""
|
|
108
|
+
self.parent = {item: item for item in items}
|
|
109
|
+
self.rank = dict.fromkeys(items, 0)
|
|
110
|
+
|
|
111
|
+
def find(self, item: str) -> str:
|
|
112
|
+
"""Find root of item's set with path compression.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
item: Method name to find root for
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Root of the set containing item
|
|
119
|
+
"""
|
|
120
|
+
if self.parent[item] != item:
|
|
121
|
+
self.parent[item] = self.find(self.parent[item]) # Path compression
|
|
122
|
+
return self.parent[item]
|
|
123
|
+
|
|
124
|
+
def union(self, item1: str, item2: str) -> None:
|
|
125
|
+
"""Merge sets containing item1 and item2.
|
|
126
|
+
|
|
127
|
+
Uses union by rank for efficiency.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
item1: First method name
|
|
131
|
+
item2: Second method name
|
|
132
|
+
"""
|
|
133
|
+
root1, root2 = self.find(item1), self.find(item2)
|
|
134
|
+
if root1 != root2:
|
|
135
|
+
# Union by rank: attach smaller tree under larger
|
|
136
|
+
if self.rank[root1] < self.rank[root2]:
|
|
137
|
+
root1, root2 = root2, root1
|
|
138
|
+
self.parent[root2] = root1
|
|
139
|
+
if self.rank[root1] == self.rank[root2]:
|
|
140
|
+
self.rank[root1] += 1
|
|
141
|
+
|
|
142
|
+
def count_components(self) -> int:
|
|
143
|
+
"""Count number of connected components.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Number of disjoint sets (LCOM4 value)
|
|
147
|
+
"""
|
|
148
|
+
return len({self.find(item) for item in self.parent})
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class LCOM4Calculator:
|
|
152
|
+
"""Calculate LCOM4 cohesion metric for Python classes.
|
|
153
|
+
|
|
154
|
+
Algorithm:
|
|
155
|
+
1. For each class, extract methods and their attribute accesses
|
|
156
|
+
2. Build undirected graph: nodes=methods, edges=shared attributes
|
|
157
|
+
3. Count connected components using Union-Find
|
|
158
|
+
4. LCOM4 = number of components (1=cohesive, >1=potentially split)
|
|
159
|
+
|
|
160
|
+
Example:
|
|
161
|
+
calculator = LCOM4Calculator()
|
|
162
|
+
result = calculator.calculate_file_cohesion(
|
|
163
|
+
Path("my_file.py"),
|
|
164
|
+
"class MyClass:\\n def foo(self): return self.x\\n"
|
|
165
|
+
)
|
|
166
|
+
print(f"LCOM4: {result.classes[0].lcom4}")
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
def __init__(self) -> None:
|
|
170
|
+
"""Initialize LCOM4 calculator with tree-sitter parser."""
|
|
171
|
+
self._parser = None
|
|
172
|
+
self._language = None
|
|
173
|
+
self._initialize_parser()
|
|
174
|
+
|
|
175
|
+
def _initialize_parser(self) -> None:
|
|
176
|
+
"""Initialize Tree-sitter parser for Python."""
|
|
177
|
+
try:
|
|
178
|
+
from tree_sitter_language_pack import get_language, get_parser
|
|
179
|
+
|
|
180
|
+
self._language = get_language("python")
|
|
181
|
+
self._parser = get_parser("python")
|
|
182
|
+
logger.debug("Python Tree-sitter parser initialized for LCOM4")
|
|
183
|
+
except Exception as e:
|
|
184
|
+
logger.warning(f"Tree-sitter initialization failed: {e}")
|
|
185
|
+
self._parser = None
|
|
186
|
+
self._language = None
|
|
187
|
+
|
|
188
|
+
def calculate_file_cohesion(self, file_path: Path, content: str) -> FileCohesion:
|
|
189
|
+
"""Calculate LCOM4 for all classes in a file.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
file_path: Path to the file (for reporting)
|
|
193
|
+
content: Source code content as string
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
FileCohesion with per-class LCOM4 results
|
|
197
|
+
"""
|
|
198
|
+
if not self._parser:
|
|
199
|
+
logger.warning("Tree-sitter parser not available, returning empty result")
|
|
200
|
+
return FileCohesion(file_path=file_path)
|
|
201
|
+
|
|
202
|
+
tree = self._parser.parse(bytes(content, "utf8"))
|
|
203
|
+
classes = self._find_classes(tree.root_node)
|
|
204
|
+
|
|
205
|
+
class_cohesions = []
|
|
206
|
+
for class_node in classes:
|
|
207
|
+
cohesion = self._calculate_class_cohesion(class_node, content)
|
|
208
|
+
if cohesion:
|
|
209
|
+
class_cohesions.append(cohesion)
|
|
210
|
+
|
|
211
|
+
# Calculate aggregate metrics
|
|
212
|
+
if class_cohesions:
|
|
213
|
+
avg_lcom4 = sum(c.lcom4 for c in class_cohesions) / len(class_cohesions)
|
|
214
|
+
max_lcom4 = max(c.lcom4 for c in class_cohesions)
|
|
215
|
+
else:
|
|
216
|
+
avg_lcom4 = 0.0
|
|
217
|
+
max_lcom4 = 0
|
|
218
|
+
|
|
219
|
+
return FileCohesion(
|
|
220
|
+
file_path=file_path,
|
|
221
|
+
classes=class_cohesions,
|
|
222
|
+
avg_lcom4=avg_lcom4,
|
|
223
|
+
max_lcom4=max_lcom4,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
def _find_classes(self, root: Node) -> list[Node]:
|
|
227
|
+
"""Find all class definitions in the AST.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
root: Root AST node
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
List of class_definition nodes
|
|
234
|
+
"""
|
|
235
|
+
classes = []
|
|
236
|
+
|
|
237
|
+
def visit(node: Node) -> None:
|
|
238
|
+
if node.type == "class_definition":
|
|
239
|
+
classes.append(node)
|
|
240
|
+
for child in node.children:
|
|
241
|
+
visit(child)
|
|
242
|
+
|
|
243
|
+
visit(root)
|
|
244
|
+
return classes
|
|
245
|
+
|
|
246
|
+
def _calculate_class_cohesion(
|
|
247
|
+
self, class_node: Node, content: str
|
|
248
|
+
) -> ClassCohesion | None:
|
|
249
|
+
"""Calculate LCOM4 for a single class.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
class_node: AST node for class definition
|
|
253
|
+
content: Source code (for extracting text)
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
ClassCohesion result, or None if class has no methods
|
|
257
|
+
"""
|
|
258
|
+
class_name = self._get_class_name(class_node, content)
|
|
259
|
+
methods = self._extract_methods(class_node)
|
|
260
|
+
|
|
261
|
+
if not methods:
|
|
262
|
+
logger.debug(f"Class {class_name} has no methods, skipping LCOM4")
|
|
263
|
+
return None
|
|
264
|
+
|
|
265
|
+
# Extract attribute accesses for each method
|
|
266
|
+
method_attributes: dict[str, set[str]] = {}
|
|
267
|
+
for method_node in methods:
|
|
268
|
+
method_name = self._get_method_name(method_node, content)
|
|
269
|
+
# Skip special methods that don't access self
|
|
270
|
+
if self._is_static_or_class_method(method_node):
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
attributes = self._find_attribute_accesses(method_node, content)
|
|
274
|
+
if method_name and attributes:
|
|
275
|
+
method_attributes[method_name] = attributes
|
|
276
|
+
|
|
277
|
+
# Handle edge cases
|
|
278
|
+
if not method_attributes:
|
|
279
|
+
# No methods with attribute accesses
|
|
280
|
+
lcom4 = len(methods) if methods else 0
|
|
281
|
+
return ClassCohesion(
|
|
282
|
+
class_name=class_name,
|
|
283
|
+
lcom4=lcom4,
|
|
284
|
+
method_count=len(methods),
|
|
285
|
+
attribute_count=0,
|
|
286
|
+
method_attributes={},
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Calculate LCOM4 using connected components
|
|
290
|
+
lcom4 = self._calculate_lcom4(method_attributes)
|
|
291
|
+
|
|
292
|
+
# Count unique attributes
|
|
293
|
+
all_attributes = set()
|
|
294
|
+
for attrs in method_attributes.values():
|
|
295
|
+
all_attributes.update(attrs)
|
|
296
|
+
|
|
297
|
+
return ClassCohesion(
|
|
298
|
+
class_name=class_name,
|
|
299
|
+
lcom4=lcom4,
|
|
300
|
+
method_count=len(methods),
|
|
301
|
+
attribute_count=len(all_attributes),
|
|
302
|
+
method_attributes=method_attributes,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
def _get_class_name(self, class_node: Node, content: str) -> str:
|
|
306
|
+
"""Extract class name from class definition node.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
class_node: Class definition AST node
|
|
310
|
+
content: Source code
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Class name or "UnknownClass"
|
|
314
|
+
"""
|
|
315
|
+
name_node = class_node.child_by_field_name("name")
|
|
316
|
+
if name_node:
|
|
317
|
+
return content[name_node.start_byte : name_node.end_byte]
|
|
318
|
+
return "UnknownClass"
|
|
319
|
+
|
|
320
|
+
def _extract_methods(self, class_node: Node) -> list[Node]:
|
|
321
|
+
"""Extract method nodes from a class.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
class_node: Class definition AST node
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
List of function_definition nodes that are methods
|
|
328
|
+
"""
|
|
329
|
+
methods = []
|
|
330
|
+
body = class_node.child_by_field_name("body")
|
|
331
|
+
if not body:
|
|
332
|
+
return methods
|
|
333
|
+
|
|
334
|
+
for child in body.children:
|
|
335
|
+
if child.type == "function_definition":
|
|
336
|
+
methods.append(child)
|
|
337
|
+
elif child.type == "decorated_definition":
|
|
338
|
+
# Decorated methods: @decorator\ndef method(...)
|
|
339
|
+
# Find the function_definition inside
|
|
340
|
+
for subchild in child.children:
|
|
341
|
+
if subchild.type == "function_definition":
|
|
342
|
+
methods.append(subchild)
|
|
343
|
+
break
|
|
344
|
+
|
|
345
|
+
return methods
|
|
346
|
+
|
|
347
|
+
def _get_method_name(self, method_node: Node, content: str) -> str | None:
|
|
348
|
+
"""Extract method name from function definition.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
method_node: Function definition AST node
|
|
352
|
+
content: Source code
|
|
353
|
+
|
|
354
|
+
Returns:
|
|
355
|
+
Method name or None
|
|
356
|
+
"""
|
|
357
|
+
name_node = method_node.child_by_field_name("name")
|
|
358
|
+
if name_node:
|
|
359
|
+
return content[name_node.start_byte : name_node.end_byte]
|
|
360
|
+
return None
|
|
361
|
+
|
|
362
|
+
def _is_static_or_class_method(self, method_node: Node) -> bool:
|
|
363
|
+
"""Check if method is @staticmethod or @classmethod.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
method_node: Function definition AST node
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
True if method is static or class method
|
|
370
|
+
"""
|
|
371
|
+
# Check if parent is decorated_definition (for decorated methods)
|
|
372
|
+
parent = method_node.parent
|
|
373
|
+
if parent and parent.type == "decorated_definition":
|
|
374
|
+
# Look for decorators in parent's children
|
|
375
|
+
for child in parent.children:
|
|
376
|
+
if child.type == "decorator":
|
|
377
|
+
decorator_text = child.text.decode("utf-8")
|
|
378
|
+
if (
|
|
379
|
+
"@staticmethod" in decorator_text
|
|
380
|
+
or "@classmethod" in decorator_text
|
|
381
|
+
):
|
|
382
|
+
return True
|
|
383
|
+
|
|
384
|
+
# Also check direct children (in case structure is different)
|
|
385
|
+
for child in method_node.children:
|
|
386
|
+
if child.type == "decorator":
|
|
387
|
+
decorator_text = child.text.decode("utf-8")
|
|
388
|
+
if (
|
|
389
|
+
"@staticmethod" in decorator_text
|
|
390
|
+
or "@classmethod" in decorator_text
|
|
391
|
+
):
|
|
392
|
+
return True
|
|
393
|
+
|
|
394
|
+
return False
|
|
395
|
+
|
|
396
|
+
def _find_attribute_accesses(self, method_node: Node, content: str) -> set[str]:
|
|
397
|
+
"""Find all self.attribute accesses in a method.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
method_node: Function definition AST node
|
|
401
|
+
content: Source code
|
|
402
|
+
|
|
403
|
+
Returns:
|
|
404
|
+
Set of attribute names accessed via self
|
|
405
|
+
"""
|
|
406
|
+
attributes = set()
|
|
407
|
+
|
|
408
|
+
def visit(node: Node) -> None:
|
|
409
|
+
# Look for attribute access: self.attribute
|
|
410
|
+
if node.type == "attribute":
|
|
411
|
+
# Check if object is 'self'
|
|
412
|
+
obj_node = node.child_by_field_name("object")
|
|
413
|
+
if obj_node and obj_node.type == "identifier":
|
|
414
|
+
obj_name = content[obj_node.start_byte : obj_node.end_byte]
|
|
415
|
+
if obj_name == "self":
|
|
416
|
+
# Extract attribute name
|
|
417
|
+
attr_node = node.child_by_field_name("attribute")
|
|
418
|
+
if attr_node:
|
|
419
|
+
attr_name = content[
|
|
420
|
+
attr_node.start_byte : attr_node.end_byte
|
|
421
|
+
]
|
|
422
|
+
attributes.add(attr_name)
|
|
423
|
+
|
|
424
|
+
for child in node.children:
|
|
425
|
+
visit(child)
|
|
426
|
+
|
|
427
|
+
visit(method_node)
|
|
428
|
+
return attributes
|
|
429
|
+
|
|
430
|
+
def _calculate_lcom4(self, method_attributes: dict[str, set[str]]) -> int:
|
|
431
|
+
"""Calculate LCOM4 using connected components.
|
|
432
|
+
|
|
433
|
+
Uses Union-Find to efficiently count connected components
|
|
434
|
+
in the method-attribute graph.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
method_attributes: Mapping of method names to their attributes
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
LCOM4 value (number of connected components)
|
|
441
|
+
"""
|
|
442
|
+
if not method_attributes:
|
|
443
|
+
return 0
|
|
444
|
+
|
|
445
|
+
methods = list(method_attributes.keys())
|
|
446
|
+
|
|
447
|
+
# Edge case: single method
|
|
448
|
+
if len(methods) == 1:
|
|
449
|
+
return 1
|
|
450
|
+
|
|
451
|
+
# Initialize union-find
|
|
452
|
+
uf = UnionFind(methods)
|
|
453
|
+
|
|
454
|
+
# Connect methods that share attributes
|
|
455
|
+
methods_list = list(methods)
|
|
456
|
+
for i, method1 in enumerate(methods_list):
|
|
457
|
+
for method2 in methods_list[i + 1 :]:
|
|
458
|
+
# Check if methods share any attributes
|
|
459
|
+
shared = method_attributes[method1] & method_attributes[method2]
|
|
460
|
+
if shared:
|
|
461
|
+
uf.union(method1, method2)
|
|
462
|
+
|
|
463
|
+
return uf.count_components()
|