mcp-vector-search 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +48 -1
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +35 -0
  7. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  8. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  9. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  10. mcp_vector_search/analysis/collectors/smells.py +325 -0
  11. mcp_vector_search/analysis/debt.py +516 -0
  12. mcp_vector_search/analysis/interpretation.py +685 -0
  13. mcp_vector_search/analysis/metrics.py +74 -1
  14. mcp_vector_search/analysis/reporters/__init__.py +3 -1
  15. mcp_vector_search/analysis/reporters/console.py +424 -0
  16. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  17. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  18. mcp_vector_search/analysis/storage/__init__.py +93 -0
  19. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  20. mcp_vector_search/analysis/storage/schema.py +245 -0
  21. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  22. mcp_vector_search/analysis/trends.py +308 -0
  23. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  24. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  25. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  26. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  27. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  28. mcp_vector_search/cli/commands/analyze.py +665 -11
  29. mcp_vector_search/cli/commands/chat.py +193 -0
  30. mcp_vector_search/cli/commands/index.py +600 -2
  31. mcp_vector_search/cli/commands/index_background.py +467 -0
  32. mcp_vector_search/cli/commands/search.py +194 -1
  33. mcp_vector_search/cli/commands/setup.py +64 -13
  34. mcp_vector_search/cli/commands/status.py +302 -3
  35. mcp_vector_search/cli/commands/visualize/cli.py +26 -10
  36. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +8 -4
  37. mcp_vector_search/cli/commands/visualize/graph_builder.py +167 -234
  38. mcp_vector_search/cli/commands/visualize/server.py +304 -15
  39. mcp_vector_search/cli/commands/visualize/templates/base.py +60 -6
  40. mcp_vector_search/cli/commands/visualize/templates/scripts.py +2100 -65
  41. mcp_vector_search/cli/commands/visualize/templates/styles.py +1297 -88
  42. mcp_vector_search/cli/didyoumean.py +5 -0
  43. mcp_vector_search/cli/main.py +16 -5
  44. mcp_vector_search/cli/output.py +134 -5
  45. mcp_vector_search/config/thresholds.py +89 -1
  46. mcp_vector_search/core/__init__.py +16 -0
  47. mcp_vector_search/core/database.py +39 -2
  48. mcp_vector_search/core/embeddings.py +24 -0
  49. mcp_vector_search/core/git.py +380 -0
  50. mcp_vector_search/core/indexer.py +445 -84
  51. mcp_vector_search/core/llm_client.py +9 -4
  52. mcp_vector_search/core/models.py +88 -1
  53. mcp_vector_search/core/relationships.py +473 -0
  54. mcp_vector_search/core/search.py +1 -1
  55. mcp_vector_search/mcp/server.py +795 -4
  56. mcp_vector_search/parsers/python.py +285 -5
  57. mcp_vector_search/utils/gitignore.py +0 -3
  58. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +3 -2
  59. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/RECORD +62 -39
  60. mcp_vector_search/cli/commands/visualize.py.original +0 -2536
  61. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +0 -0
  62. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +0 -0
  63. {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,377 @@
1
+ """SARIF 2.1.0 output format for code analysis results.
2
+
3
+ This module provides SARIF (Static Analysis Results Interchange Format) 2.1.0
4
+ compliant output for code smells and structural analysis results. SARIF is an
5
+ OASIS standard format for sharing static analysis results between tools.
6
+
7
+ SARIF Specification: https://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html
8
+
9
+ Example:
10
+ >>> from pathlib import Path
11
+ >>> from ..collectors.smells import CodeSmell, SmellSeverity
12
+ >>> reporter = SARIFReporter()
13
+ >>> smells = [CodeSmell(...)]
14
+ >>> reporter.write_sarif(smells, Path("report.sarif"))
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import hashlib
20
+ import json
21
+ from dataclasses import dataclass
22
+ from datetime import UTC, datetime
23
+ from pathlib import Path
24
+ from typing import TYPE_CHECKING, Any
25
+
26
+ if TYPE_CHECKING:
27
+ from ..collectors.smells import CodeSmell, SmellSeverity
28
+
29
+
30
+ @dataclass
31
+ class SARIFReporter:
32
+ """Generate SARIF 2.1.0 formatted reports for code analysis.
33
+
34
+ SARIF (Static Analysis Results Interchange Format) is an industry-standard
35
+ JSON-based format for static analysis tool output, enabling integration with
36
+ IDEs, CI/CD systems, and security tools.
37
+
38
+ Attributes:
39
+ tool_name: Name of the analysis tool (default: "MCP Vector Search")
40
+ tool_version: Version of the tool (default: from package)
41
+ tool_uri: URI to tool documentation/homepage
42
+ include_help_text: Include help text for each rule (default: True)
43
+ include_fingerprints: Include result fingerprints for deduplication (default: True)
44
+
45
+ Example:
46
+ >>> reporter = SARIFReporter()
47
+ >>> sarif_doc = reporter.generate_sarif(code_smells, base_path=Path("/project"))
48
+ >>> reporter.write_sarif(code_smells, Path("report.sarif"))
49
+ """
50
+
51
+ tool_name: str = "MCP Vector Search"
52
+ tool_version: str = "1.0.3"
53
+ tool_uri: str = "https://github.com/bobmatnyc/mcp-vector-search"
54
+ include_help_text: bool = True
55
+ include_fingerprints: bool = True
56
+
57
+ def generate_sarif(
58
+ self, smells: list[CodeSmell], base_path: Path | None = None
59
+ ) -> dict[str, Any]:
60
+ """Generate SARIF 2.1.0 document from code smells.
61
+
62
+ Creates a complete SARIF document with tool metadata, rules, and results.
63
+ All file paths are made relative to base_path if provided.
64
+
65
+ Args:
66
+ smells: List of detected code smells to report
67
+ base_path: Base directory for making paths relative (optional)
68
+ If None, uses absolute paths
69
+
70
+ Returns:
71
+ Dictionary containing SARIF 2.1.0 compliant document structure
72
+
73
+ Example:
74
+ >>> smells = [CodeSmell(name="Long Method", ...)]
75
+ >>> sarif = reporter.generate_sarif(smells, Path.cwd())
76
+ >>> print(json.dumps(sarif, indent=2))
77
+ """
78
+ # Build unique rules from all smells
79
+ rules = self._build_rules(smells)
80
+
81
+ # Convert smells to SARIF results
82
+ results = [self._smell_to_result(smell, base_path) for smell in smells]
83
+
84
+ # Build complete SARIF document
85
+ sarif_doc = {
86
+ "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
87
+ "version": "2.1.0",
88
+ "runs": [
89
+ {
90
+ "tool": {
91
+ "driver": {
92
+ "name": self.tool_name,
93
+ "version": self.tool_version,
94
+ "informationUri": self.tool_uri,
95
+ "rules": rules,
96
+ }
97
+ },
98
+ "results": results,
99
+ "invocations": [
100
+ {
101
+ "executionSuccessful": True,
102
+ "endTimeUtc": datetime.now(UTC).isoformat(),
103
+ }
104
+ ],
105
+ }
106
+ ],
107
+ }
108
+
109
+ return sarif_doc
110
+
111
+ def write_sarif(
112
+ self,
113
+ smells: list[CodeSmell],
114
+ output_path: Path,
115
+ base_path: Path | None = None,
116
+ indent: int = 2,
117
+ ) -> None:
118
+ """Write SARIF report to file.
119
+
120
+ Generates SARIF document and writes it to the specified path with
121
+ pretty-printing for readability.
122
+
123
+ Args:
124
+ smells: List of code smells to report
125
+ output_path: Path where SARIF file should be written
126
+ base_path: Base directory for relative paths (optional)
127
+ indent: JSON indentation level (default: 2, 0 for compact)
128
+
129
+ Raises:
130
+ IOError: If file cannot be written
131
+ OSError: If directory does not exist
132
+
133
+ Example:
134
+ >>> reporter.write_sarif(smells, Path("report.sarif"), indent=2)
135
+ """
136
+ sarif_doc = self.generate_sarif(smells, base_path)
137
+
138
+ # Ensure parent directory exists
139
+ output_path.parent.mkdir(parents=True, exist_ok=True)
140
+
141
+ # Write with pretty-printing
142
+ with open(output_path, "w", encoding="utf-8") as f:
143
+ if indent > 0:
144
+ json.dump(sarif_doc, f, indent=indent, ensure_ascii=False)
145
+ else:
146
+ json.dump(sarif_doc, f, ensure_ascii=False)
147
+
148
+ def _severity_to_level(self, severity: SmellSeverity) -> str:
149
+ """Map SmellSeverity to SARIF level.
150
+
151
+ SARIF defines three result levels: error, warning, note.
152
+ We map our SmellSeverity enum to these levels.
153
+
154
+ Args:
155
+ severity: SmellSeverity enum value
156
+
157
+ Returns:
158
+ SARIF level string ("error", "warning", or "note")
159
+
160
+ Mapping:
161
+ ERROR -> "error" (requires immediate attention)
162
+ WARNING -> "warning" (should be addressed)
163
+ INFO -> "note" (informational)
164
+ """
165
+ from ..collectors.smells import SmellSeverity
166
+
167
+ mapping = {
168
+ SmellSeverity.ERROR: "error",
169
+ SmellSeverity.WARNING: "warning",
170
+ SmellSeverity.INFO: "note",
171
+ }
172
+ return mapping.get(severity, "warning")
173
+
174
+ def _smell_to_rule_id(self, smell_name: str) -> str:
175
+ """Convert smell name to SARIF rule ID (kebab-case).
176
+
177
+ Transforms human-readable smell names to kebab-case IDs suitable
178
+ for use as SARIF rule identifiers.
179
+
180
+ Args:
181
+ smell_name: Human-readable smell name (e.g., "Long Method")
182
+
183
+ Returns:
184
+ Kebab-case rule ID (e.g., "long-method")
185
+
186
+ Examples:
187
+ >>> reporter._smell_to_rule_id("Long Method")
188
+ 'long-method'
189
+ >>> reporter._smell_to_rule_id("God Class")
190
+ 'god-class'
191
+ >>> reporter._smell_to_rule_id("Deep_Nesting")
192
+ 'deep-nesting'
193
+ """
194
+ return smell_name.lower().replace(" ", "-").replace("_", "-")
195
+
196
+ def _build_rules(self, smells: list[CodeSmell]) -> list[dict[str, Any]]:
197
+ """Generate unique rules from code smells.
198
+
199
+ Creates SARIF rule definitions for all unique smell types found.
200
+ Each rule includes ID, description, and help text.
201
+
202
+ Args:
203
+ smells: List of code smells to extract rules from
204
+
205
+ Returns:
206
+ List of SARIF rule objects with metadata
207
+
208
+ SARIF Rule Structure:
209
+ - id: Unique rule identifier (kebab-case)
210
+ - shortDescription: Brief rule description
211
+ - help: Detailed help text with suggestions
212
+ - properties: Additional metadata (optional)
213
+ """
214
+ # Track unique smell types
215
+ unique_smells: dict[str, CodeSmell] = {}
216
+ for smell in smells:
217
+ rule_id = self._smell_to_rule_id(smell.name)
218
+ if rule_id not in unique_smells:
219
+ unique_smells[rule_id] = smell
220
+
221
+ # Build rule definitions
222
+ rules = []
223
+ for rule_id, smell in unique_smells.items():
224
+ rule = {
225
+ "id": rule_id,
226
+ "shortDescription": {"text": smell.name},
227
+ "fullDescription": {"text": self._get_smell_description(smell.name)},
228
+ }
229
+
230
+ # Add help text if enabled
231
+ if self.include_help_text and smell.suggestion:
232
+ rule["help"] = {"text": smell.suggestion}
233
+
234
+ # Add default severity configuration
235
+ rule["defaultConfiguration"] = {
236
+ "level": self._severity_to_level(smell.severity)
237
+ }
238
+
239
+ rules.append(rule)
240
+
241
+ return rules
242
+
243
+ def _get_smell_description(self, smell_name: str) -> str:
244
+ """Get detailed description for code smell type.
245
+
246
+ Provides comprehensive descriptions for each smell type to help
247
+ developers understand what the issue is and why it matters.
248
+
249
+ Args:
250
+ smell_name: Name of the code smell
251
+
252
+ Returns:
253
+ Detailed description explaining the smell and its impact
254
+ """
255
+ descriptions = {
256
+ "Long Method": "Method or function exceeds recommended length thresholds, making it harder to understand, test, and maintain. Long methods often indicate that the function is doing too much and violates the Single Responsibility Principle.",
257
+ "Deep Nesting": "Code has excessive nesting depth (nested if/for/while blocks), reducing readability and increasing cognitive complexity. Deep nesting makes it harder to understand control flow and increases the likelihood of bugs.",
258
+ "Long Parameter List": "Function or method has too many parameters, making the API difficult to use and understand. Consider using parameter objects, builder pattern, or decomposing the function into smaller pieces.",
259
+ "God Class": "Class has too many responsibilities, indicated by high method count and large size. This violates the Single Responsibility Principle and makes the class difficult to maintain, test, and reason about.",
260
+ "Complex Method": "Method has high cyclomatic complexity (many decision points), making it difficult to test and prone to bugs. High complexity indicates complex control flow that should be simplified or decomposed.",
261
+ }
262
+
263
+ return descriptions.get(
264
+ smell_name,
265
+ f"Code smell detected: {smell_name}. Consider refactoring to improve maintainability.",
266
+ )
267
+
268
+ def _smell_to_result(
269
+ self, smell: CodeSmell, base_path: Path | None = None
270
+ ) -> dict[str, Any]:
271
+ """Convert CodeSmell to SARIF result object.
272
+
273
+ Transforms a CodeSmell into SARIF result format with location,
274
+ message, and optional fingerprint for deduplication.
275
+
276
+ Args:
277
+ smell: Code smell to convert
278
+ base_path: Base path for making file paths relative (optional)
279
+
280
+ Returns:
281
+ SARIF result object with location and message
282
+
283
+ SARIF Result Structure:
284
+ - ruleId: Reference to rule definition
285
+ - level: Severity level (error/warning/note)
286
+ - message: Human-readable message
287
+ - locations: Where the issue was found
288
+ - partialFingerprints: For result deduplication (optional)
289
+ """
290
+ # Parse location string (format: "file:line" or "file")
291
+ location_parts = smell.location.rsplit(":", 1)
292
+ file_path = location_parts[0]
293
+ line_number = int(location_parts[1]) if len(location_parts) > 1 else None
294
+
295
+ # Make path relative if base_path provided
296
+ if base_path:
297
+ try:
298
+ file_path_obj = Path(file_path)
299
+ if file_path_obj.is_absolute():
300
+ file_path = str(file_path_obj.relative_to(base_path))
301
+ except (ValueError, OSError):
302
+ # Keep original path if relative_to fails
303
+ pass
304
+
305
+ # Build SARIF result
306
+ result: dict[str, Any] = {
307
+ "ruleId": self._smell_to_rule_id(smell.name),
308
+ "level": self._severity_to_level(smell.severity),
309
+ "message": {
310
+ "text": (
311
+ f"{smell.name}: {smell.description}"
312
+ if smell.description
313
+ else smell.name
314
+ )
315
+ },
316
+ "locations": [
317
+ {
318
+ "physicalLocation": {
319
+ "artifactLocation": {
320
+ "uri": file_path,
321
+ }
322
+ }
323
+ }
324
+ ],
325
+ }
326
+
327
+ # Add region (line number) if available
328
+ if line_number is not None:
329
+ result["locations"][0]["physicalLocation"]["region"] = {
330
+ "startLine": line_number
331
+ }
332
+
333
+ # Add fingerprint for deduplication if enabled
334
+ if self.include_fingerprints:
335
+ result["partialFingerprints"] = {
336
+ "primaryLocationLineHash": self._compute_fingerprint(smell)
337
+ }
338
+
339
+ # Add additional properties
340
+ result["properties"] = {
341
+ "metricValue": smell.metric_value,
342
+ "threshold": smell.threshold,
343
+ }
344
+
345
+ return result
346
+
347
+ def _compute_fingerprint(self, smell: CodeSmell) -> str:
348
+ """Generate stable fingerprint for result deduplication.
349
+
350
+ Creates a SHA-256 hash of key smell attributes to enable
351
+ deduplication across runs and comparison of results over time.
352
+
353
+ Args:
354
+ smell: Code smell to fingerprint
355
+
356
+ Returns:
357
+ 16-character hex string fingerprint
358
+
359
+ Fingerprint Includes:
360
+ - Smell name (type of issue)
361
+ - Location (file:line)
362
+ - Metric value (normalized to string)
363
+
364
+ Example:
365
+ >>> smell = CodeSmell(name="Long Method", location="file.py:10", ...)
366
+ >>> fingerprint = reporter._compute_fingerprint(smell)
367
+ >>> len(fingerprint)
368
+ 16
369
+ """
370
+ # Normalize metric value to avoid floating point differences
371
+ normalized_metric = f"{smell.metric_value:.1f}"
372
+
373
+ # Build fingerprint content
374
+ content = f"{smell.name}:{smell.location}:{normalized_metric}"
375
+
376
+ # Compute SHA-256 and take first 16 characters
377
+ return hashlib.sha256(content.encode()).hexdigest()[:16]
@@ -0,0 +1,93 @@
1
+ """Storage module for metrics persistence and historical tracking.
2
+
3
+ This module provides SQLite-based storage for code metrics, enabling:
4
+ - Historical tracking of file and project metrics over time
5
+ - Trend analysis to identify improving/degrading code quality
6
+ - Snapshot comparison for release-to-release analysis
7
+ - Code smell tracking and remediation monitoring
8
+
9
+ Public API:
10
+ - MetricsStore: Main storage interface
11
+ - ProjectSnapshot: Project-wide metrics at a point in time
12
+ - TrendData: Trend analysis results
13
+ - GitInfo: Git metadata for traceability
14
+
15
+ Exceptions:
16
+ - MetricsStoreError: Base exception for storage errors
17
+ - DatabaseLockedError: Database locked by another process
18
+ - DuplicateEntryError: Attempted duplicate entry
19
+
20
+ Example Usage:
21
+ >>> from mcp_vector_search.analysis.storage import MetricsStore
22
+ >>> from mcp_vector_search.analysis.metrics import ProjectMetrics
23
+ >>>
24
+ >>> # Initialize store (uses default ~/.mcp-vector-search/metrics.db)
25
+ >>> store = MetricsStore()
26
+ >>>
27
+ >>> # Save complete snapshot
28
+ >>> metrics = ProjectMetrics(project_root="/path/to/project")
29
+ >>> # ... populate metrics ...
30
+ >>> snapshot_id = store.save_complete_snapshot(metrics)
31
+ >>>
32
+ >>> # Query history
33
+ >>> history = store.get_project_history("/path/to/project", limit=10)
34
+ >>> for snapshot in history:
35
+ ... print(f"{snapshot.timestamp}: {snapshot.avg_complexity:.2f}")
36
+ >>>
37
+ >>> # Analyze trends
38
+ >>> trends = store.get_trends("/path/to/project", days=30)
39
+ >>> if trends.improving:
40
+ ... print(f"Complexity improving at {abs(trends.change_rate):.4f}/day")
41
+ >>> else:
42
+ ... print(f"Complexity degrading at {trends.change_rate:.4f}/day")
43
+ >>>
44
+ >>> store.close()
45
+
46
+ Context Manager Usage:
47
+ >>> with MetricsStore() as store:
48
+ ... snapshot_id = store.save_complete_snapshot(metrics)
49
+ ... # Connection automatically closed
50
+
51
+ See Also:
52
+ - schema.py: Database schema definitions
53
+ - metrics_store.py: MetricsStore implementation
54
+ """
55
+
56
+ from .metrics_store import (
57
+ DatabaseLockedError,
58
+ DuplicateEntryError,
59
+ GitInfo,
60
+ MetricsStore,
61
+ MetricsStoreError,
62
+ ProjectSnapshot,
63
+ TrendData,
64
+ )
65
+ from .schema import SCHEMA_VERSION
66
+ from .trend_tracker import (
67
+ FileRegression,
68
+ TrendDirection,
69
+ TrendTracker,
70
+ )
71
+ from .trend_tracker import (
72
+ TrendData as EnhancedTrendData,
73
+ )
74
+
75
+ __all__ = [
76
+ # Main storage class
77
+ "MetricsStore",
78
+ # Trend tracking
79
+ "TrendTracker",
80
+ "TrendDirection",
81
+ "FileRegression",
82
+ # Data classes
83
+ "ProjectSnapshot",
84
+ "TrendData",
85
+ "EnhancedTrendData",
86
+ "GitInfo",
87
+ # Exceptions
88
+ "MetricsStoreError",
89
+ "DatabaseLockedError",
90
+ "DuplicateEntryError",
91
+ # Schema version
92
+ "SCHEMA_VERSION",
93
+ ]