mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_vector_search/__init__.py +3 -3
- mcp_vector_search/analysis/__init__.py +111 -0
- mcp_vector_search/analysis/baseline/__init__.py +68 -0
- mcp_vector_search/analysis/baseline/comparator.py +462 -0
- mcp_vector_search/analysis/baseline/manager.py +621 -0
- mcp_vector_search/analysis/collectors/__init__.py +74 -0
- mcp_vector_search/analysis/collectors/base.py +164 -0
- mcp_vector_search/analysis/collectors/cohesion.py +463 -0
- mcp_vector_search/analysis/collectors/complexity.py +743 -0
- mcp_vector_search/analysis/collectors/coupling.py +1162 -0
- mcp_vector_search/analysis/collectors/halstead.py +514 -0
- mcp_vector_search/analysis/collectors/smells.py +325 -0
- mcp_vector_search/analysis/debt.py +516 -0
- mcp_vector_search/analysis/interpretation.py +685 -0
- mcp_vector_search/analysis/metrics.py +414 -0
- mcp_vector_search/analysis/reporters/__init__.py +7 -0
- mcp_vector_search/analysis/reporters/console.py +646 -0
- mcp_vector_search/analysis/reporters/markdown.py +480 -0
- mcp_vector_search/analysis/reporters/sarif.py +377 -0
- mcp_vector_search/analysis/storage/__init__.py +93 -0
- mcp_vector_search/analysis/storage/metrics_store.py +762 -0
- mcp_vector_search/analysis/storage/schema.py +245 -0
- mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
- mcp_vector_search/analysis/trends.py +308 -0
- mcp_vector_search/analysis/visualizer/__init__.py +90 -0
- mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
- mcp_vector_search/analysis/visualizer/exporter.py +484 -0
- mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
- mcp_vector_search/analysis/visualizer/schemas.py +525 -0
- mcp_vector_search/cli/commands/analyze.py +1062 -0
- mcp_vector_search/cli/commands/chat.py +1455 -0
- mcp_vector_search/cli/commands/index.py +621 -5
- mcp_vector_search/cli/commands/index_background.py +467 -0
- mcp_vector_search/cli/commands/init.py +13 -0
- mcp_vector_search/cli/commands/install.py +597 -335
- mcp_vector_search/cli/commands/install_old.py +8 -4
- mcp_vector_search/cli/commands/mcp.py +78 -6
- mcp_vector_search/cli/commands/reset.py +68 -26
- mcp_vector_search/cli/commands/search.py +224 -8
- mcp_vector_search/cli/commands/setup.py +1184 -0
- mcp_vector_search/cli/commands/status.py +339 -5
- mcp_vector_search/cli/commands/uninstall.py +276 -357
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +292 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +600 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
- mcp_vector_search/cli/didyoumean.py +27 -2
- mcp_vector_search/cli/main.py +127 -160
- mcp_vector_search/cli/output.py +158 -13
- mcp_vector_search/config/__init__.py +4 -0
- mcp_vector_search/config/default_thresholds.yaml +52 -0
- mcp_vector_search/config/settings.py +12 -0
- mcp_vector_search/config/thresholds.py +273 -0
- mcp_vector_search/core/__init__.py +16 -0
- mcp_vector_search/core/auto_indexer.py +3 -3
- mcp_vector_search/core/boilerplate.py +186 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/database.py +406 -94
- mcp_vector_search/core/embeddings.py +24 -0
- mcp_vector_search/core/exceptions.py +11 -0
- mcp_vector_search/core/git.py +380 -0
- mcp_vector_search/core/git_hooks.py +4 -4
- mcp_vector_search/core/indexer.py +632 -54
- mcp_vector_search/core/llm_client.py +756 -0
- mcp_vector_search/core/models.py +91 -1
- mcp_vector_search/core/project.py +17 -0
- mcp_vector_search/core/relationships.py +473 -0
- mcp_vector_search/core/scheduler.py +11 -11
- mcp_vector_search/core/search.py +179 -29
- mcp_vector_search/mcp/server.py +819 -9
- mcp_vector_search/parsers/python.py +285 -5
- mcp_vector_search/utils/__init__.py +2 -0
- mcp_vector_search/utils/gitignore.py +0 -3
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +66 -4
- mcp_vector_search/utils/timing.py +10 -6
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
- mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
- mcp_vector_search/cli/commands/visualize.py +0 -1467
- mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,621 @@
|
|
|
1
|
+
"""Baseline storage and retrieval manager.
|
|
2
|
+
|
|
3
|
+
This module provides the BaselineManager class for persisting and loading
|
|
4
|
+
metric snapshots (baselines) to/from JSON files.
|
|
5
|
+
|
|
6
|
+
Design Decisions:
|
|
7
|
+
- JSON format for human readability and simplicity (Phase 2)
|
|
8
|
+
- Storage location: ~/.mcp-vector-search/baselines/ by default
|
|
9
|
+
- Includes git metadata (commit, branch) for traceability
|
|
10
|
+
- Includes tool version for compatibility validation
|
|
11
|
+
- Atomic writes with temp file + rename for data integrity
|
|
12
|
+
|
|
13
|
+
Storage Format:
|
|
14
|
+
Baselines are stored as JSON files with structure:
|
|
15
|
+
{
|
|
16
|
+
"version": "1.0",
|
|
17
|
+
"baseline_name": "main-branch",
|
|
18
|
+
"created_at": "2025-12-11T15:30:00Z",
|
|
19
|
+
"tool_version": "v0.18.0",
|
|
20
|
+
"git_info": {"commit": "abc123", "branch": "main"},
|
|
21
|
+
"project": {"path": "/path/to/project", "file_count": 42},
|
|
22
|
+
"aggregate_metrics": {...},
|
|
23
|
+
"files": {...}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
Error Handling:
|
|
27
|
+
- BaselineNotFoundError: Baseline doesn't exist
|
|
28
|
+
- BaselineExistsError: Baseline already exists (use overwrite=True)
|
|
29
|
+
- BaselineCorruptedError: JSON parsing failed or invalid structure
|
|
30
|
+
- OSError: Filesystem permission issues (propagated with clear message)
|
|
31
|
+
|
|
32
|
+
Performance:
|
|
33
|
+
- Save: O(n) where n=files, typically 50-100ms for 100 files
|
|
34
|
+
- Load: O(n), typically 20-50ms for 100 files
|
|
35
|
+
- List: O(k) where k=number of baselines, <10ms typical
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
from __future__ import annotations
|
|
39
|
+
|
|
40
|
+
import json
|
|
41
|
+
import subprocess
|
|
42
|
+
from dataclasses import asdict, dataclass
|
|
43
|
+
from datetime import datetime
|
|
44
|
+
from pathlib import Path
|
|
45
|
+
from typing import Any
|
|
46
|
+
|
|
47
|
+
from loguru import logger
|
|
48
|
+
|
|
49
|
+
from ...utils.version import get_version_string
|
|
50
|
+
from ..metrics import ProjectMetrics
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class BaselineError(Exception):
|
|
54
|
+
"""Base exception for baseline-related errors."""
|
|
55
|
+
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class BaselineNotFoundError(BaselineError):
|
|
60
|
+
"""Baseline file does not exist."""
|
|
61
|
+
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class BaselineExistsError(BaselineError):
|
|
66
|
+
"""Baseline file already exists."""
|
|
67
|
+
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class BaselineCorruptedError(BaselineError):
|
|
72
|
+
"""Baseline file is corrupted or invalid."""
|
|
73
|
+
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class GitInfo:
|
|
79
|
+
"""Git repository information for baseline traceability.
|
|
80
|
+
|
|
81
|
+
Attributes:
|
|
82
|
+
commit: Git commit hash (full SHA-1)
|
|
83
|
+
branch: Current branch name (None if detached HEAD)
|
|
84
|
+
remote: Remote repository name (e.g., "origin")
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
commit: str | None = None
|
|
88
|
+
branch: str | None = None
|
|
89
|
+
remote: str | None = None
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass
|
|
93
|
+
class BaselineMetadata:
|
|
94
|
+
"""Metadata for a baseline snapshot.
|
|
95
|
+
|
|
96
|
+
Attributes:
|
|
97
|
+
baseline_name: Human-readable identifier
|
|
98
|
+
created_at: ISO timestamp when baseline was created
|
|
99
|
+
tool_version: Version of mcp-vector-search used
|
|
100
|
+
git_info: Git repository information
|
|
101
|
+
project_path: Absolute path to project root
|
|
102
|
+
file_count: Number of files in baseline
|
|
103
|
+
function_count: Total number of functions analyzed
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
baseline_name: str
|
|
107
|
+
created_at: str
|
|
108
|
+
tool_version: str
|
|
109
|
+
git_info: GitInfo
|
|
110
|
+
project_path: str
|
|
111
|
+
file_count: int
|
|
112
|
+
function_count: int
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class BaselineManager:
|
|
116
|
+
"""Manage baseline snapshot storage and retrieval.
|
|
117
|
+
|
|
118
|
+
This class handles persisting ProjectMetrics to JSON files and loading
|
|
119
|
+
them back for comparison. Baselines are stored in a user-specific
|
|
120
|
+
directory for easy access across projects.
|
|
121
|
+
|
|
122
|
+
Storage Strategy:
|
|
123
|
+
- Primary: ~/.mcp-vector-search/baselines/
|
|
124
|
+
- File naming: {baseline_name}.json
|
|
125
|
+
- Atomic writes: temp file + rename
|
|
126
|
+
|
|
127
|
+
Example:
|
|
128
|
+
>>> manager = BaselineManager()
|
|
129
|
+
>>> metrics = ProjectMetrics(project_root="/path/to/project")
|
|
130
|
+
>>> manager.save_baseline("main-branch", metrics)
|
|
131
|
+
>>> baseline = manager.load_baseline("main-branch")
|
|
132
|
+
>>> print(f"Baseline has {baseline.total_files} files")
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
BASELINE_VERSION = "1.0"
|
|
136
|
+
|
|
137
|
+
def __init__(self, storage_dir: Path | None = None):
|
|
138
|
+
"""Initialize baseline manager.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
storage_dir: Optional custom storage directory.
|
|
142
|
+
Defaults to ~/.mcp-vector-search/baselines/
|
|
143
|
+
"""
|
|
144
|
+
if storage_dir is None:
|
|
145
|
+
# Default storage location
|
|
146
|
+
storage_dir = Path.home() / ".mcp-vector-search" / "baselines"
|
|
147
|
+
|
|
148
|
+
self.storage_dir = storage_dir.resolve()
|
|
149
|
+
|
|
150
|
+
# Ensure storage directory exists
|
|
151
|
+
self.storage_dir.mkdir(parents=True, exist_ok=True)
|
|
152
|
+
logger.debug(f"Baseline storage directory: {self.storage_dir}")
|
|
153
|
+
|
|
154
|
+
def get_baseline_path(self, baseline_name: str) -> Path:
|
|
155
|
+
"""Get path to baseline file.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
baseline_name: Baseline identifier
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Path to baseline JSON file
|
|
162
|
+
"""
|
|
163
|
+
# Sanitize baseline name (alphanumeric + hyphens/underscores)
|
|
164
|
+
safe_name = "".join(
|
|
165
|
+
c if c.isalnum() or c in "-_" else "_" for c in baseline_name
|
|
166
|
+
)
|
|
167
|
+
return self.storage_dir / f"{safe_name}.json"
|
|
168
|
+
|
|
169
|
+
def save_baseline(
|
|
170
|
+
self,
|
|
171
|
+
baseline_name: str,
|
|
172
|
+
metrics: ProjectMetrics,
|
|
173
|
+
overwrite: bool = False,
|
|
174
|
+
description: str | None = None,
|
|
175
|
+
) -> Path:
|
|
176
|
+
"""Save metrics as a baseline snapshot.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
baseline_name: Human-readable identifier (e.g., "main-branch", "v1.2.0")
|
|
180
|
+
metrics: ProjectMetrics to save
|
|
181
|
+
overwrite: Allow overwriting existing baseline (default: False)
|
|
182
|
+
description: Optional description for baseline
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
Path to saved baseline file
|
|
186
|
+
|
|
187
|
+
Raises:
|
|
188
|
+
BaselineExistsError: If baseline exists and overwrite=False
|
|
189
|
+
OSError: If filesystem write fails
|
|
190
|
+
|
|
191
|
+
Performance: O(n) where n is number of files, typically 50-100ms
|
|
192
|
+
|
|
193
|
+
Example:
|
|
194
|
+
>>> manager = BaselineManager()
|
|
195
|
+
>>> metrics = ProjectMetrics(project_root="/path/to/project")
|
|
196
|
+
>>> path = manager.save_baseline("main-branch", metrics)
|
|
197
|
+
>>> print(f"Saved to {path}")
|
|
198
|
+
"""
|
|
199
|
+
baseline_path = self.get_baseline_path(baseline_name)
|
|
200
|
+
|
|
201
|
+
# Check if baseline exists
|
|
202
|
+
if baseline_path.exists() and not overwrite:
|
|
203
|
+
raise BaselineExistsError(
|
|
204
|
+
f"Baseline '{baseline_name}' already exists at {baseline_path}. "
|
|
205
|
+
f"Use overwrite=True to replace it."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Collect git information
|
|
209
|
+
git_info = self._get_git_info(Path(metrics.project_root))
|
|
210
|
+
|
|
211
|
+
# Build baseline data structure
|
|
212
|
+
baseline_data = {
|
|
213
|
+
"version": self.BASELINE_VERSION,
|
|
214
|
+
"baseline_name": baseline_name,
|
|
215
|
+
"created_at": datetime.now().isoformat(),
|
|
216
|
+
"tool_version": get_version_string(include_build=True),
|
|
217
|
+
"description": description,
|
|
218
|
+
"git_info": asdict(git_info),
|
|
219
|
+
"project": {
|
|
220
|
+
"path": metrics.project_root,
|
|
221
|
+
"file_count": metrics.total_files,
|
|
222
|
+
"function_count": metrics.total_functions,
|
|
223
|
+
"class_count": metrics.total_classes,
|
|
224
|
+
},
|
|
225
|
+
# Serialize ProjectMetrics
|
|
226
|
+
"aggregate_metrics": self._serialize_aggregate_metrics(metrics),
|
|
227
|
+
"files": self._serialize_files(metrics),
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
# Atomic write: write to temp file, then rename
|
|
231
|
+
temp_path = baseline_path.with_suffix(".tmp")
|
|
232
|
+
try:
|
|
233
|
+
with temp_path.open("w", encoding="utf-8") as f:
|
|
234
|
+
json.dump(baseline_data, f, indent=2, ensure_ascii=False)
|
|
235
|
+
|
|
236
|
+
# Atomic rename (POSIX guarantees atomicity)
|
|
237
|
+
temp_path.replace(baseline_path)
|
|
238
|
+
|
|
239
|
+
file_size = baseline_path.stat().st_size
|
|
240
|
+
logger.info(
|
|
241
|
+
f"Saved baseline '{baseline_name}' to {baseline_path} "
|
|
242
|
+
f"({file_size // 1024} KB)"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
return baseline_path
|
|
246
|
+
|
|
247
|
+
except Exception as e:
|
|
248
|
+
# Clean up temp file on error
|
|
249
|
+
if temp_path.exists():
|
|
250
|
+
temp_path.unlink()
|
|
251
|
+
logger.error(f"Failed to save baseline: {e}")
|
|
252
|
+
raise
|
|
253
|
+
|
|
254
|
+
def load_baseline(self, baseline_name: str) -> ProjectMetrics:
|
|
255
|
+
"""Load baseline from storage.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
baseline_name: Baseline identifier
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
ProjectMetrics restored from baseline
|
|
262
|
+
|
|
263
|
+
Raises:
|
|
264
|
+
BaselineNotFoundError: If baseline doesn't exist
|
|
265
|
+
BaselineCorruptedError: If JSON is invalid or missing required fields
|
|
266
|
+
|
|
267
|
+
Performance: O(n) where n is number of files, typically 20-50ms
|
|
268
|
+
|
|
269
|
+
Example:
|
|
270
|
+
>>> manager = BaselineManager()
|
|
271
|
+
>>> baseline = manager.load_baseline("main-branch")
|
|
272
|
+
>>> print(f"Baseline from {baseline.analyzed_at}")
|
|
273
|
+
"""
|
|
274
|
+
baseline_path = self.get_baseline_path(baseline_name)
|
|
275
|
+
|
|
276
|
+
if not baseline_path.exists():
|
|
277
|
+
# Provide helpful error with available baselines
|
|
278
|
+
available = self.list_baselines()
|
|
279
|
+
available_str = ", ".join(b.baseline_name for b in available[:5])
|
|
280
|
+
raise BaselineNotFoundError(
|
|
281
|
+
f"Baseline '{baseline_name}' not found at {baseline_path}. "
|
|
282
|
+
f"Available baselines: {available_str or 'none'}"
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
with baseline_path.open("r", encoding="utf-8") as f:
|
|
287
|
+
data = json.load(f)
|
|
288
|
+
|
|
289
|
+
# Validate baseline structure
|
|
290
|
+
self._validate_baseline(data)
|
|
291
|
+
|
|
292
|
+
# Deserialize back to ProjectMetrics
|
|
293
|
+
metrics = self._deserialize_project_metrics(data)
|
|
294
|
+
|
|
295
|
+
logger.info(
|
|
296
|
+
f"Loaded baseline '{baseline_name}' "
|
|
297
|
+
f"({metrics.total_files} files, {metrics.total_functions} functions)"
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
return metrics
|
|
301
|
+
|
|
302
|
+
except json.JSONDecodeError as e:
|
|
303
|
+
logger.error(f"Baseline file is corrupted: {e}")
|
|
304
|
+
raise BaselineCorruptedError(
|
|
305
|
+
f"Baseline '{baseline_name}' is corrupted: {e}"
|
|
306
|
+
)
|
|
307
|
+
except KeyError as e:
|
|
308
|
+
logger.error(f"Baseline missing required field: {e}")
|
|
309
|
+
raise BaselineCorruptedError(
|
|
310
|
+
f"Baseline '{baseline_name}' is missing required field: {e}"
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
def list_baselines(self) -> list[BaselineMetadata]:
|
|
314
|
+
"""List all available baselines.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
List of baseline metadata sorted by creation time (newest first)
|
|
318
|
+
|
|
319
|
+
Performance: O(k) where k is number of baselines, typically <10ms
|
|
320
|
+
|
|
321
|
+
Example:
|
|
322
|
+
>>> manager = BaselineManager()
|
|
323
|
+
>>> baselines = manager.list_baselines()
|
|
324
|
+
>>> for baseline in baselines:
|
|
325
|
+
... print(f"{baseline.baseline_name}: {baseline.file_count} files")
|
|
326
|
+
"""
|
|
327
|
+
baselines = []
|
|
328
|
+
|
|
329
|
+
# Scan storage directory for .json files
|
|
330
|
+
for baseline_path in self.storage_dir.glob("*.json"):
|
|
331
|
+
try:
|
|
332
|
+
with baseline_path.open("r", encoding="utf-8") as f:
|
|
333
|
+
data = json.load(f)
|
|
334
|
+
|
|
335
|
+
# Extract metadata
|
|
336
|
+
metadata = BaselineMetadata(
|
|
337
|
+
baseline_name=data.get("baseline_name", baseline_path.stem),
|
|
338
|
+
created_at=data.get("created_at", "unknown"),
|
|
339
|
+
tool_version=data.get("tool_version", "unknown"),
|
|
340
|
+
git_info=GitInfo(**data.get("git_info", {})),
|
|
341
|
+
project_path=data.get("project", {}).get("path", "unknown"),
|
|
342
|
+
file_count=data.get("project", {}).get("file_count", 0),
|
|
343
|
+
function_count=data.get("project", {}).get("function_count", 0),
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
baselines.append(metadata)
|
|
347
|
+
|
|
348
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
349
|
+
# Skip corrupted baselines
|
|
350
|
+
logger.warning(f"Skipping corrupted baseline {baseline_path}: {e}")
|
|
351
|
+
continue
|
|
352
|
+
|
|
353
|
+
# Sort by creation time (newest first)
|
|
354
|
+
baselines.sort(key=lambda b: b.created_at, reverse=True)
|
|
355
|
+
|
|
356
|
+
logger.debug(f"Found {len(baselines)} baselines")
|
|
357
|
+
return baselines
|
|
358
|
+
|
|
359
|
+
def delete_baseline(self, baseline_name: str) -> None:
|
|
360
|
+
"""Delete a baseline.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
baseline_name: Baseline identifier
|
|
364
|
+
|
|
365
|
+
Raises:
|
|
366
|
+
BaselineNotFoundError: If baseline doesn't exist
|
|
367
|
+
|
|
368
|
+
Example:
|
|
369
|
+
>>> manager = BaselineManager()
|
|
370
|
+
>>> manager.delete_baseline("old-baseline")
|
|
371
|
+
"""
|
|
372
|
+
baseline_path = self.get_baseline_path(baseline_name)
|
|
373
|
+
|
|
374
|
+
if not baseline_path.exists():
|
|
375
|
+
raise BaselineNotFoundError(
|
|
376
|
+
f"Baseline '{baseline_name}' not found at {baseline_path}"
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
baseline_path.unlink()
|
|
380
|
+
logger.info(f"Deleted baseline '{baseline_name}' from {baseline_path}")
|
|
381
|
+
|
|
382
|
+
def _get_git_info(self, project_root: Path) -> GitInfo:
|
|
383
|
+
"""Extract git information from project repository.
|
|
384
|
+
|
|
385
|
+
Args:
|
|
386
|
+
project_root: Project root directory
|
|
387
|
+
|
|
388
|
+
Returns:
|
|
389
|
+
GitInfo with commit, branch, and remote (if available)
|
|
390
|
+
|
|
391
|
+
Note: Does not raise exceptions. Returns GitInfo with None values if git unavailable.
|
|
392
|
+
"""
|
|
393
|
+
git_info = GitInfo()
|
|
394
|
+
|
|
395
|
+
try:
|
|
396
|
+
# Get commit hash
|
|
397
|
+
result = subprocess.run(
|
|
398
|
+
["git", "rev-parse", "HEAD"],
|
|
399
|
+
cwd=project_root,
|
|
400
|
+
capture_output=True,
|
|
401
|
+
text=True,
|
|
402
|
+
check=True,
|
|
403
|
+
timeout=5,
|
|
404
|
+
)
|
|
405
|
+
git_info.commit = result.stdout.strip()
|
|
406
|
+
|
|
407
|
+
# Get branch name
|
|
408
|
+
result = subprocess.run(
|
|
409
|
+
["git", "rev-parse", "--abbrev-ref", "HEAD"],
|
|
410
|
+
cwd=project_root,
|
|
411
|
+
capture_output=True,
|
|
412
|
+
text=True,
|
|
413
|
+
check=True,
|
|
414
|
+
timeout=5,
|
|
415
|
+
)
|
|
416
|
+
branch = result.stdout.strip()
|
|
417
|
+
git_info.branch = branch if branch != "HEAD" else None
|
|
418
|
+
|
|
419
|
+
# Get remote name (if exists)
|
|
420
|
+
result = subprocess.run(
|
|
421
|
+
["git", "remote"],
|
|
422
|
+
cwd=project_root,
|
|
423
|
+
capture_output=True,
|
|
424
|
+
text=True,
|
|
425
|
+
check=True,
|
|
426
|
+
timeout=5,
|
|
427
|
+
)
|
|
428
|
+
remotes = result.stdout.strip().split("\n")
|
|
429
|
+
git_info.remote = remotes[0] if remotes and remotes[0] else None
|
|
430
|
+
|
|
431
|
+
except (
|
|
432
|
+
subprocess.CalledProcessError,
|
|
433
|
+
FileNotFoundError,
|
|
434
|
+
subprocess.TimeoutExpired,
|
|
435
|
+
):
|
|
436
|
+
# Git not available or not a git repo
|
|
437
|
+
logger.debug("Git information unavailable")
|
|
438
|
+
|
|
439
|
+
return git_info
|
|
440
|
+
|
|
441
|
+
def _serialize_aggregate_metrics(self, metrics: ProjectMetrics) -> dict[str, Any]:
|
|
442
|
+
"""Serialize project-level aggregate metrics.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
metrics: ProjectMetrics to serialize
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
Dictionary of aggregate metrics
|
|
449
|
+
"""
|
|
450
|
+
# Compute grade distribution
|
|
451
|
+
grade_dist = dict.fromkeys(["A", "B", "C", "D", "F"], 0)
|
|
452
|
+
for file_metrics in metrics.files.values():
|
|
453
|
+
for chunk in file_metrics.chunks:
|
|
454
|
+
grade_dist[chunk.complexity_grade] += 1
|
|
455
|
+
|
|
456
|
+
# Collect all complexity values for statistics
|
|
457
|
+
all_cc = [
|
|
458
|
+
chunk.cognitive_complexity
|
|
459
|
+
for file_metrics in metrics.files.values()
|
|
460
|
+
for chunk in file_metrics.chunks
|
|
461
|
+
]
|
|
462
|
+
|
|
463
|
+
all_cyc = [
|
|
464
|
+
chunk.cyclomatic_complexity
|
|
465
|
+
for file_metrics in metrics.files.values()
|
|
466
|
+
for chunk in file_metrics.chunks
|
|
467
|
+
]
|
|
468
|
+
|
|
469
|
+
all_nesting = [
|
|
470
|
+
chunk.max_nesting_depth
|
|
471
|
+
for file_metrics in metrics.files.values()
|
|
472
|
+
for chunk in file_metrics.chunks
|
|
473
|
+
]
|
|
474
|
+
|
|
475
|
+
all_params = [
|
|
476
|
+
chunk.parameter_count
|
|
477
|
+
for file_metrics in metrics.files.values()
|
|
478
|
+
for chunk in file_metrics.chunks
|
|
479
|
+
]
|
|
480
|
+
|
|
481
|
+
return {
|
|
482
|
+
"cognitive_complexity": {
|
|
483
|
+
"sum": sum(all_cc),
|
|
484
|
+
"avg": sum(all_cc) / len(all_cc) if all_cc else 0.0,
|
|
485
|
+
"max": max(all_cc) if all_cc else 0,
|
|
486
|
+
"grade_distribution": grade_dist,
|
|
487
|
+
},
|
|
488
|
+
"cyclomatic_complexity": {
|
|
489
|
+
"sum": sum(all_cyc),
|
|
490
|
+
"avg": sum(all_cyc) / len(all_cyc) if all_cyc else 0.0,
|
|
491
|
+
"max": max(all_cyc) if all_cyc else 0,
|
|
492
|
+
},
|
|
493
|
+
"nesting_depth": {
|
|
494
|
+
"max": max(all_nesting) if all_nesting else 0,
|
|
495
|
+
"avg": sum(all_nesting) / len(all_nesting) if all_nesting else 0.0,
|
|
496
|
+
},
|
|
497
|
+
"parameter_count": {
|
|
498
|
+
"max": max(all_params) if all_params else 0,
|
|
499
|
+
"avg": sum(all_params) / len(all_params) if all_params else 0.0,
|
|
500
|
+
},
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
def _serialize_files(self, metrics: ProjectMetrics) -> dict[str, Any]:
|
|
504
|
+
"""Serialize file-level metrics.
|
|
505
|
+
|
|
506
|
+
Args:
|
|
507
|
+
metrics: ProjectMetrics to serialize
|
|
508
|
+
|
|
509
|
+
Returns:
|
|
510
|
+
Dictionary mapping file paths to serialized FileMetrics
|
|
511
|
+
"""
|
|
512
|
+
files_data = {}
|
|
513
|
+
|
|
514
|
+
for file_path, file_metrics in metrics.files.items():
|
|
515
|
+
files_data[file_path] = {
|
|
516
|
+
"file_path": file_metrics.file_path,
|
|
517
|
+
"total_lines": file_metrics.total_lines,
|
|
518
|
+
"code_lines": file_metrics.code_lines,
|
|
519
|
+
"comment_lines": file_metrics.comment_lines,
|
|
520
|
+
"blank_lines": file_metrics.blank_lines,
|
|
521
|
+
"function_count": file_metrics.function_count,
|
|
522
|
+
"class_count": file_metrics.class_count,
|
|
523
|
+
"method_count": file_metrics.method_count,
|
|
524
|
+
"total_complexity": file_metrics.total_complexity,
|
|
525
|
+
"avg_complexity": file_metrics.avg_complexity,
|
|
526
|
+
"max_complexity": file_metrics.max_complexity,
|
|
527
|
+
"chunks": [
|
|
528
|
+
{
|
|
529
|
+
"cognitive_complexity": chunk.cognitive_complexity,
|
|
530
|
+
"cyclomatic_complexity": chunk.cyclomatic_complexity,
|
|
531
|
+
"max_nesting_depth": chunk.max_nesting_depth,
|
|
532
|
+
"parameter_count": chunk.parameter_count,
|
|
533
|
+
"lines_of_code": chunk.lines_of_code,
|
|
534
|
+
"smells": chunk.smells,
|
|
535
|
+
"complexity_grade": chunk.complexity_grade,
|
|
536
|
+
}
|
|
537
|
+
for chunk in file_metrics.chunks
|
|
538
|
+
],
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
return files_data
|
|
542
|
+
|
|
543
|
+
def _deserialize_project_metrics(self, data: dict[str, Any]) -> ProjectMetrics:
|
|
544
|
+
"""Deserialize JSON data back to ProjectMetrics.
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
data: JSON data from baseline file
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
ProjectMetrics instance
|
|
551
|
+
"""
|
|
552
|
+
from ..metrics import ChunkMetrics, FileMetrics
|
|
553
|
+
|
|
554
|
+
# Deserialize files
|
|
555
|
+
files = {}
|
|
556
|
+
for file_path, file_data in data["files"].items():
|
|
557
|
+
# Deserialize chunks
|
|
558
|
+
chunks = [
|
|
559
|
+
ChunkMetrics(
|
|
560
|
+
cognitive_complexity=chunk_data["cognitive_complexity"],
|
|
561
|
+
cyclomatic_complexity=chunk_data["cyclomatic_complexity"],
|
|
562
|
+
max_nesting_depth=chunk_data["max_nesting_depth"],
|
|
563
|
+
parameter_count=chunk_data["parameter_count"],
|
|
564
|
+
lines_of_code=chunk_data["lines_of_code"],
|
|
565
|
+
smells=chunk_data.get("smells", []),
|
|
566
|
+
)
|
|
567
|
+
for chunk_data in file_data["chunks"]
|
|
568
|
+
]
|
|
569
|
+
|
|
570
|
+
file_metrics = FileMetrics(
|
|
571
|
+
file_path=file_data["file_path"],
|
|
572
|
+
total_lines=file_data["total_lines"],
|
|
573
|
+
code_lines=file_data["code_lines"],
|
|
574
|
+
comment_lines=file_data["comment_lines"],
|
|
575
|
+
blank_lines=file_data["blank_lines"],
|
|
576
|
+
function_count=file_data["function_count"],
|
|
577
|
+
class_count=file_data["class_count"],
|
|
578
|
+
method_count=file_data["method_count"],
|
|
579
|
+
total_complexity=file_data["total_complexity"],
|
|
580
|
+
avg_complexity=file_data["avg_complexity"],
|
|
581
|
+
max_complexity=file_data["max_complexity"],
|
|
582
|
+
chunks=chunks,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
files[file_path] = file_metrics
|
|
586
|
+
|
|
587
|
+
# Create ProjectMetrics
|
|
588
|
+
metrics = ProjectMetrics(
|
|
589
|
+
project_root=data["project"]["path"],
|
|
590
|
+
analyzed_at=datetime.fromisoformat(data["created_at"]),
|
|
591
|
+
total_files=data["project"]["file_count"],
|
|
592
|
+
total_functions=data["project"]["function_count"],
|
|
593
|
+
total_classes=data["project"].get("class_count", 0),
|
|
594
|
+
files=files,
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
# Recompute aggregates
|
|
598
|
+
metrics.compute_aggregates()
|
|
599
|
+
|
|
600
|
+
return metrics
|
|
601
|
+
|
|
602
|
+
def _validate_baseline(self, data: dict[str, Any]) -> None:
|
|
603
|
+
"""Validate baseline data structure.
|
|
604
|
+
|
|
605
|
+
Args:
|
|
606
|
+
data: JSON data from baseline file
|
|
607
|
+
|
|
608
|
+
Raises:
|
|
609
|
+
BaselineCorruptedError: If required fields are missing
|
|
610
|
+
"""
|
|
611
|
+
required_fields = ["version", "baseline_name", "created_at", "project", "files"]
|
|
612
|
+
|
|
613
|
+
for field in required_fields:
|
|
614
|
+
if field not in data:
|
|
615
|
+
raise BaselineCorruptedError(f"Missing required field: {field}")
|
|
616
|
+
|
|
617
|
+
# Validate version compatibility
|
|
618
|
+
if data["version"] != self.BASELINE_VERSION:
|
|
619
|
+
logger.warning(
|
|
620
|
+
f"Baseline version mismatch: {data['version']} vs {self.BASELINE_VERSION}"
|
|
621
|
+
)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Metric collector implementations.
|
|
2
|
+
|
|
3
|
+
This module provides the base interface and context for implementing
|
|
4
|
+
metric collectors that traverse AST nodes during code analysis.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
from mcp_vector_search.analysis.collectors import MetricCollector, CollectorContext
|
|
8
|
+
|
|
9
|
+
class MyCollector(MetricCollector):
|
|
10
|
+
@property
|
|
11
|
+
def name(self) -> str:
|
|
12
|
+
return "my_collector"
|
|
13
|
+
|
|
14
|
+
def collect_node(self, node, context, depth):
|
|
15
|
+
# Process node
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
def finalize_function(self, node, context):
|
|
19
|
+
return {"my_metric": 42}
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .base import CollectorContext, MetricCollector
|
|
23
|
+
from .cohesion import (
|
|
24
|
+
ClassCohesion,
|
|
25
|
+
FileCohesion,
|
|
26
|
+
LCOM4Calculator,
|
|
27
|
+
MethodAttributeAccess,
|
|
28
|
+
UnionFind,
|
|
29
|
+
)
|
|
30
|
+
from .complexity import (
|
|
31
|
+
CognitiveComplexityCollector,
|
|
32
|
+
CyclomaticComplexityCollector,
|
|
33
|
+
MethodCountCollector,
|
|
34
|
+
NestingDepthCollector,
|
|
35
|
+
ParameterCountCollector,
|
|
36
|
+
)
|
|
37
|
+
from .coupling import (
|
|
38
|
+
AfferentCouplingCollector,
|
|
39
|
+
CircularDependency,
|
|
40
|
+
CircularDependencyDetector,
|
|
41
|
+
EfferentCouplingCollector,
|
|
42
|
+
ImportGraph,
|
|
43
|
+
InstabilityCalculator,
|
|
44
|
+
NodeColor,
|
|
45
|
+
build_import_graph,
|
|
46
|
+
build_import_graph_from_dict,
|
|
47
|
+
)
|
|
48
|
+
from .halstead import HalsteadCollector, HalsteadMetrics
|
|
49
|
+
|
|
50
|
+
__all__ = [
|
|
51
|
+
"CollectorContext",
|
|
52
|
+
"MetricCollector",
|
|
53
|
+
"CognitiveComplexityCollector",
|
|
54
|
+
"CyclomaticComplexityCollector",
|
|
55
|
+
"NestingDepthCollector",
|
|
56
|
+
"ParameterCountCollector",
|
|
57
|
+
"MethodCountCollector",
|
|
58
|
+
"EfferentCouplingCollector",
|
|
59
|
+
"AfferentCouplingCollector",
|
|
60
|
+
"InstabilityCalculator",
|
|
61
|
+
"build_import_graph",
|
|
62
|
+
"build_import_graph_from_dict",
|
|
63
|
+
"ImportGraph",
|
|
64
|
+
"CircularDependency",
|
|
65
|
+
"CircularDependencyDetector",
|
|
66
|
+
"NodeColor",
|
|
67
|
+
"ClassCohesion",
|
|
68
|
+
"FileCohesion",
|
|
69
|
+
"LCOM4Calculator",
|
|
70
|
+
"MethodAttributeAccess",
|
|
71
|
+
"UnionFind",
|
|
72
|
+
"HalsteadCollector",
|
|
73
|
+
"HalsteadMetrics",
|
|
74
|
+
]
|