mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +111 -0
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +74 -0
  7. mcp_vector_search/analysis/collectors/base.py +164 -0
  8. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  9. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  10. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  11. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  12. mcp_vector_search/analysis/collectors/smells.py +325 -0
  13. mcp_vector_search/analysis/debt.py +516 -0
  14. mcp_vector_search/analysis/interpretation.py +685 -0
  15. mcp_vector_search/analysis/metrics.py +414 -0
  16. mcp_vector_search/analysis/reporters/__init__.py +7 -0
  17. mcp_vector_search/analysis/reporters/console.py +646 -0
  18. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  19. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  20. mcp_vector_search/analysis/storage/__init__.py +93 -0
  21. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  22. mcp_vector_search/analysis/storage/schema.py +245 -0
  23. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  24. mcp_vector_search/analysis/trends.py +308 -0
  25. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  26. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  27. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  28. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  29. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  30. mcp_vector_search/cli/commands/analyze.py +1062 -0
  31. mcp_vector_search/cli/commands/chat.py +1455 -0
  32. mcp_vector_search/cli/commands/index.py +621 -5
  33. mcp_vector_search/cli/commands/index_background.py +467 -0
  34. mcp_vector_search/cli/commands/init.py +13 -0
  35. mcp_vector_search/cli/commands/install.py +597 -335
  36. mcp_vector_search/cli/commands/install_old.py +8 -4
  37. mcp_vector_search/cli/commands/mcp.py +78 -6
  38. mcp_vector_search/cli/commands/reset.py +68 -26
  39. mcp_vector_search/cli/commands/search.py +224 -8
  40. mcp_vector_search/cli/commands/setup.py +1184 -0
  41. mcp_vector_search/cli/commands/status.py +339 -5
  42. mcp_vector_search/cli/commands/uninstall.py +276 -357
  43. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  44. mcp_vector_search/cli/commands/visualize/cli.py +292 -0
  45. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  46. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  47. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
  48. mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
  49. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  50. mcp_vector_search/cli/commands/visualize/server.py +600 -0
  51. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  52. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  53. mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
  54. mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
  55. mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
  56. mcp_vector_search/cli/didyoumean.py +27 -2
  57. mcp_vector_search/cli/main.py +127 -160
  58. mcp_vector_search/cli/output.py +158 -13
  59. mcp_vector_search/config/__init__.py +4 -0
  60. mcp_vector_search/config/default_thresholds.yaml +52 -0
  61. mcp_vector_search/config/settings.py +12 -0
  62. mcp_vector_search/config/thresholds.py +273 -0
  63. mcp_vector_search/core/__init__.py +16 -0
  64. mcp_vector_search/core/auto_indexer.py +3 -3
  65. mcp_vector_search/core/boilerplate.py +186 -0
  66. mcp_vector_search/core/config_utils.py +394 -0
  67. mcp_vector_search/core/database.py +406 -94
  68. mcp_vector_search/core/embeddings.py +24 -0
  69. mcp_vector_search/core/exceptions.py +11 -0
  70. mcp_vector_search/core/git.py +380 -0
  71. mcp_vector_search/core/git_hooks.py +4 -4
  72. mcp_vector_search/core/indexer.py +632 -54
  73. mcp_vector_search/core/llm_client.py +756 -0
  74. mcp_vector_search/core/models.py +91 -1
  75. mcp_vector_search/core/project.py +17 -0
  76. mcp_vector_search/core/relationships.py +473 -0
  77. mcp_vector_search/core/scheduler.py +11 -11
  78. mcp_vector_search/core/search.py +179 -29
  79. mcp_vector_search/mcp/server.py +819 -9
  80. mcp_vector_search/parsers/python.py +285 -5
  81. mcp_vector_search/utils/__init__.py +2 -0
  82. mcp_vector_search/utils/gitignore.py +0 -3
  83. mcp_vector_search/utils/gitignore_updater.py +212 -0
  84. mcp_vector_search/utils/monorepo.py +66 -4
  85. mcp_vector_search/utils/timing.py +10 -6
  86. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
  87. mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
  88. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
  89. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
  90. mcp_vector_search/cli/commands/visualize.py +0 -1467
  91. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  92. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,414 @@
1
+ """Metric dataclasses for structural code analysis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ if TYPE_CHECKING:
10
+ from ..config.thresholds import ThresholdConfig
11
+
12
+
13
+ @dataclass
14
+ class ChunkMetrics:
15
+ """Metrics for a single code chunk (function/class/method).
16
+
17
+ Tracks complexity metrics, code smells, and computes quality grades
18
+ for individual code chunks.
19
+
20
+ Attributes:
21
+ cognitive_complexity: Cognitive complexity score (how hard to understand)
22
+ cyclomatic_complexity: Cyclomatic complexity (number of decision paths)
23
+ max_nesting_depth: Maximum nesting level (if/for/while/try depth)
24
+ parameter_count: Number of function parameters
25
+ lines_of_code: Total lines in the chunk
26
+ smells: List of detected code smells (e.g., "too_many_parameters")
27
+ complexity_grade: Computed A-F grade based on cognitive complexity
28
+ """
29
+
30
+ cognitive_complexity: int = 0
31
+ cyclomatic_complexity: int = 0
32
+ max_nesting_depth: int = 0
33
+ parameter_count: int = 0
34
+ lines_of_code: int = 0
35
+
36
+ # Halstead metrics (Phase 4)
37
+ halstead_volume: float | None = None
38
+ halstead_difficulty: float | None = None
39
+ halstead_effort: float | None = None
40
+ halstead_bugs: float | None = None
41
+
42
+ # Code smells detected
43
+ smells: list[str] = field(default_factory=list)
44
+
45
+ # Computed grades (A-F scale)
46
+ complexity_grade: str = field(init=False, default="A")
47
+
48
+ def __post_init__(self) -> None:
49
+ """Initialize computed fields after dataclass initialization."""
50
+ self.complexity_grade = self._compute_grade()
51
+
52
+ def _compute_grade(self, thresholds: ThresholdConfig | None = None) -> str:
53
+ """Compute A-F grade based on cognitive complexity.
54
+
55
+ Args:
56
+ thresholds: Optional custom threshold configuration.
57
+ If None, uses default thresholds.
58
+
59
+ Grade thresholds (defaults):
60
+ - A: 0-5 (excellent)
61
+ - B: 6-10 (good)
62
+ - C: 11-20 (acceptable)
63
+ - D: 21-30 (needs improvement)
64
+ - F: 31+ (refactor recommended)
65
+
66
+ Returns:
67
+ Letter grade from A to F
68
+ """
69
+ if thresholds is None:
70
+ # Use default thresholds
71
+ if self.cognitive_complexity <= 5:
72
+ return "A"
73
+ elif self.cognitive_complexity <= 10:
74
+ return "B"
75
+ elif self.cognitive_complexity <= 20:
76
+ return "C"
77
+ elif self.cognitive_complexity <= 30:
78
+ return "D"
79
+ else:
80
+ return "F"
81
+ else:
82
+ # Use custom thresholds
83
+ return thresholds.get_grade(self.cognitive_complexity)
84
+
85
+ def to_metadata(self) -> dict[str, Any]:
86
+ """Flatten metrics for ChromaDB metadata storage.
87
+
88
+ ChromaDB supports: str, int, float, bool.
89
+ Lists are converted to JSON strings for compatibility.
90
+
91
+ Returns:
92
+ Dictionary of flattened metrics compatible with ChromaDB
93
+ """
94
+ import json
95
+
96
+ metadata = {
97
+ "cognitive_complexity": self.cognitive_complexity,
98
+ "cyclomatic_complexity": self.cyclomatic_complexity,
99
+ "max_nesting_depth": self.max_nesting_depth,
100
+ "parameter_count": self.parameter_count,
101
+ "lines_of_code": self.lines_of_code,
102
+ "complexity_grade": self.complexity_grade,
103
+ "code_smells": json.dumps(self.smells), # Convert list to JSON string
104
+ "smell_count": len(self.smells),
105
+ }
106
+
107
+ # Add Halstead metrics if available
108
+ if self.halstead_volume is not None:
109
+ metadata["halstead_volume"] = self.halstead_volume
110
+ if self.halstead_difficulty is not None:
111
+ metadata["halstead_difficulty"] = self.halstead_difficulty
112
+ if self.halstead_effort is not None:
113
+ metadata["halstead_effort"] = self.halstead_effort
114
+ if self.halstead_bugs is not None:
115
+ metadata["halstead_bugs"] = self.halstead_bugs
116
+
117
+ return metadata
118
+
119
+
120
+ @dataclass
121
+ class CouplingMetrics:
122
+ """Coupling metrics for a file.
123
+
124
+ Tracks dependencies between files to measure coupling.
125
+
126
+ Attributes:
127
+ efferent_coupling: Number of files this file depends on (outgoing dependencies)
128
+ afferent_coupling: Number of files that depend on this file (incoming dependencies)
129
+ imports: List of all imported modules
130
+ internal_imports: Imports from same project
131
+ external_imports: Third-party and standard library imports
132
+ dependents: List of files that import this file
133
+ instability: Ratio Ce / (Ce + Ca), measures resistance to change (0-1)
134
+ """
135
+
136
+ efferent_coupling: int = 0 # Ce - outgoing dependencies
137
+ afferent_coupling: int = 0 # Ca - incoming dependencies
138
+ imports: list[str] = field(default_factory=list)
139
+ internal_imports: list[str] = field(default_factory=list)
140
+ external_imports: list[str] = field(default_factory=list)
141
+ dependents: list[str] = field(default_factory=list)
142
+
143
+ @property
144
+ def instability(self) -> float:
145
+ """Calculate instability metric (0-1).
146
+
147
+ Instability = Ce / (Ce + Ca)
148
+
149
+ Interpretation:
150
+ - 0.0: Maximally stable (many incoming, few outgoing)
151
+ - 0.5: Balanced (equal incoming and outgoing)
152
+ - 1.0: Maximally unstable (many outgoing, few incoming)
153
+
154
+ Returns:
155
+ Instability ratio from 0.0 to 1.0
156
+ """
157
+ total = self.efferent_coupling + self.afferent_coupling
158
+ if total == 0:
159
+ return 0.0
160
+ return self.efferent_coupling / total
161
+
162
+
163
+ @dataclass
164
+ class FileMetrics:
165
+ """Aggregated metrics for an entire file.
166
+
167
+ Tracks file-level statistics and aggregates chunk metrics for all
168
+ functions/classes within the file.
169
+
170
+ Attributes:
171
+ file_path: Relative or absolute path to the file
172
+ total_lines: Total lines in file (including blank/comments)
173
+ code_lines: Lines containing code
174
+ comment_lines: Lines containing comments
175
+ blank_lines: Blank lines
176
+ function_count: Number of top-level functions
177
+ class_count: Number of classes
178
+ method_count: Number of methods (functions inside classes)
179
+ total_complexity: Sum of cognitive complexity across all chunks
180
+ avg_complexity: Average cognitive complexity per chunk
181
+ max_complexity: Maximum cognitive complexity in any chunk
182
+ chunks: List of chunk metrics for each function/class
183
+ coupling: Coupling metrics for this file
184
+ """
185
+
186
+ file_path: str
187
+ total_lines: int = 0
188
+ code_lines: int = 0
189
+ comment_lines: int = 0
190
+ blank_lines: int = 0
191
+
192
+ function_count: int = 0
193
+ class_count: int = 0
194
+ method_count: int = 0
195
+
196
+ # Aggregated complexity
197
+ total_complexity: int = 0
198
+ avg_complexity: float = 0.0
199
+ max_complexity: int = 0
200
+
201
+ # Coupling metrics (Phase 3)
202
+ efferent_coupling: int = 0 # Outgoing dependencies
203
+ imports: list[str] = field(default_factory=list) # All imported modules
204
+ internal_imports: list[str] = field(default_factory=list) # Same-project imports
205
+ external_imports: list[str] = field(
206
+ default_factory=list
207
+ ) # Third-party/stdlib imports
208
+
209
+ # Chunk metrics for each function/class
210
+ chunks: list[ChunkMetrics] = field(default_factory=list)
211
+
212
+ # Coupling metrics
213
+ coupling: CouplingMetrics = field(default_factory=CouplingMetrics)
214
+
215
+ def compute_aggregates(self) -> None:
216
+ """Compute aggregate metrics from chunk metrics.
217
+
218
+ Calculates total_complexity, avg_complexity, and max_complexity
219
+ by aggregating values from all chunks.
220
+ """
221
+ if not self.chunks:
222
+ self.total_complexity = 0
223
+ self.avg_complexity = 0.0
224
+ self.max_complexity = 0
225
+ return
226
+
227
+ # Compute complexity aggregates
228
+ complexities = [chunk.cognitive_complexity for chunk in self.chunks]
229
+ self.total_complexity = sum(complexities)
230
+ self.avg_complexity = self.total_complexity / len(self.chunks)
231
+ self.max_complexity = max(complexities)
232
+
233
+ @property
234
+ def health_score(self) -> float:
235
+ """Calculate 0.0-1.0 health score based on metrics.
236
+
237
+ Health score considers:
238
+ - Average complexity (lower is better)
239
+ - Code smells count (fewer is better)
240
+ - Comment ratio (balanced is better)
241
+
242
+ Returns:
243
+ Health score from 0.0 (poor) to 1.0 (excellent)
244
+ """
245
+ score = 1.0
246
+
247
+ # Penalty for high average complexity (A=0%, B=-10%, C=-20%, D=-30%, F=-50%)
248
+ if self.avg_complexity > 30:
249
+ score -= 0.5
250
+ elif self.avg_complexity > 20:
251
+ score -= 0.3
252
+ elif self.avg_complexity > 10:
253
+ score -= 0.2
254
+ elif self.avg_complexity > 5:
255
+ score -= 0.1
256
+
257
+ # Penalty for code smells (up to -30%)
258
+ total_smells = sum(len(chunk.smells) for chunk in self.chunks)
259
+ smell_penalty = min(0.3, total_smells * 0.05) # 5% per smell, max 30%
260
+ score -= smell_penalty
261
+
262
+ # Penalty for poor comment ratio (ideal: 10-30%)
263
+ if self.total_lines > 0:
264
+ comment_ratio = self.comment_lines / self.total_lines
265
+ if comment_ratio < 0.1: # Too few comments
266
+ score -= 0.1
267
+ elif comment_ratio > 0.5: # Too many comments (suspicious)
268
+ score -= 0.1
269
+
270
+ return max(0.0, score) # Clamp to 0.0 minimum
271
+
272
+
273
+ @dataclass
274
+ class ProjectMetrics:
275
+ """Project-wide metric aggregates.
276
+
277
+ Tracks project-level statistics and identifies complexity hotspots
278
+ across the entire codebase.
279
+
280
+ Attributes:
281
+ project_root: Root directory of the project
282
+ analyzed_at: Timestamp when analysis was performed
283
+ total_files: Total number of analyzed files
284
+ total_lines: Total lines across all files
285
+ total_functions: Total number of functions
286
+ total_classes: Total number of classes
287
+ files: Dictionary mapping file paths to FileMetrics
288
+ avg_file_complexity: Average complexity across all files
289
+ hotspots: List of file paths with highest complexity (top 10)
290
+ """
291
+
292
+ project_root: str
293
+ analyzed_at: datetime = field(default_factory=datetime.now)
294
+
295
+ total_files: int = 0
296
+ total_lines: int = 0
297
+ total_functions: int = 0
298
+ total_classes: int = 0
299
+
300
+ # File metrics indexed by path
301
+ files: dict[str, FileMetrics] = field(default_factory=dict)
302
+
303
+ # Project-wide aggregates
304
+ avg_file_complexity: float = 0.0
305
+ hotspots: list[str] = field(default_factory=list) # Top 10 complex files
306
+
307
+ def compute_aggregates(self) -> None:
308
+ """Compute project-wide aggregates from file metrics.
309
+
310
+ Calculates:
311
+ - Total files, lines, functions, classes
312
+ - Average file complexity
313
+ - Identifies complexity hotspots
314
+ """
315
+ if not self.files:
316
+ self.total_files = 0
317
+ self.total_lines = 0
318
+ self.total_functions = 0
319
+ self.total_classes = 0
320
+ self.avg_file_complexity = 0.0
321
+ self.hotspots = []
322
+ return
323
+
324
+ # Compute totals
325
+ self.total_files = len(self.files)
326
+ self.total_lines = sum(f.total_lines for f in self.files.values())
327
+ self.total_functions = sum(f.function_count for f in self.files.values())
328
+ self.total_classes = sum(f.class_count for f in self.files.values())
329
+
330
+ # Compute average file complexity
331
+ file_complexities = [f.avg_complexity for f in self.files.values() if f.chunks]
332
+ if file_complexities:
333
+ self.avg_file_complexity = sum(file_complexities) / len(file_complexities)
334
+ else:
335
+ self.avg_file_complexity = 0.0
336
+
337
+ # Identify hotspots (top 10 most complex files)
338
+ hotspot_files = self.get_hotspots(limit=10)
339
+ self.hotspots = [f.file_path for f in hotspot_files]
340
+
341
+ def get_hotspots(self, limit: int = 10) -> list[FileMetrics]:
342
+ """Return top N most complex files.
343
+
344
+ Complexity is determined by average cognitive complexity per chunk.
345
+ Files with no chunks are excluded.
346
+
347
+ Args:
348
+ limit: Maximum number of hotspots to return
349
+
350
+ Returns:
351
+ List of FileMetrics sorted by complexity (highest first)
352
+ """
353
+ # Filter files with chunks and sort by avg complexity
354
+ files_with_complexity = [f for f in self.files.values() if f.chunks]
355
+ sorted_files = sorted(
356
+ files_with_complexity, key=lambda f: f.avg_complexity, reverse=True
357
+ )
358
+ return sorted_files[:limit]
359
+
360
+ def to_summary(self) -> dict[str, Any]:
361
+ """Generate summary dict for reporting.
362
+
363
+ Returns:
364
+ Dictionary containing project summary with key metrics
365
+ """
366
+ return {
367
+ "project_root": self.project_root,
368
+ "analyzed_at": self.analyzed_at.isoformat(),
369
+ "total_files": self.total_files,
370
+ "total_lines": self.total_lines,
371
+ "total_functions": self.total_functions,
372
+ "total_classes": self.total_classes,
373
+ "avg_file_complexity": round(self.avg_file_complexity, 2),
374
+ "hotspots": self.hotspots,
375
+ "complexity_distribution": self._compute_grade_distribution(),
376
+ "health_metrics": {
377
+ "avg_health_score": self._compute_avg_health_score(),
378
+ "files_needing_attention": self._count_files_needing_attention(),
379
+ },
380
+ }
381
+
382
+ def _compute_grade_distribution(self) -> dict[str, int]:
383
+ """Compute distribution of complexity grades across all chunks.
384
+
385
+ Returns:
386
+ Dictionary mapping grade (A-F) to count of chunks
387
+ """
388
+ distribution: dict[str, int] = {"A": 0, "B": 0, "C": 0, "D": 0, "F": 0}
389
+
390
+ for file_metrics in self.files.values():
391
+ for chunk in file_metrics.chunks:
392
+ distribution[chunk.complexity_grade] += 1
393
+
394
+ return distribution
395
+
396
+ def _compute_avg_health_score(self) -> float:
397
+ """Compute average health score across all files.
398
+
399
+ Returns:
400
+ Average health score from 0.0 to 1.0
401
+ """
402
+ if not self.files:
403
+ return 1.0
404
+
405
+ health_scores = [f.health_score for f in self.files.values()]
406
+ return sum(health_scores) / len(health_scores)
407
+
408
+ def _count_files_needing_attention(self) -> int:
409
+ """Count files with health score below 0.7.
410
+
411
+ Returns:
412
+ Number of files that need attention
413
+ """
414
+ return sum(1 for f in self.files.values() if f.health_score < 0.7)
@@ -0,0 +1,7 @@
1
+ """Analysis reporters for outputting metrics in various formats."""
2
+
3
+ from .console import ConsoleReporter
4
+ from .markdown import MarkdownReporter
5
+ from .sarif import SARIFReporter
6
+
7
+ __all__ = ["ConsoleReporter", "MarkdownReporter", "SARIFReporter"]