mcp-vector-search 0.12.6__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. mcp_vector_search/__init__.py +2 -2
  2. mcp_vector_search/analysis/__init__.py +64 -0
  3. mcp_vector_search/analysis/collectors/__init__.py +39 -0
  4. mcp_vector_search/analysis/collectors/base.py +164 -0
  5. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  6. mcp_vector_search/analysis/metrics.py +341 -0
  7. mcp_vector_search/analysis/reporters/__init__.py +5 -0
  8. mcp_vector_search/analysis/reporters/console.py +222 -0
  9. mcp_vector_search/cli/commands/analyze.py +408 -0
  10. mcp_vector_search/cli/commands/chat.py +1262 -0
  11. mcp_vector_search/cli/commands/index.py +21 -3
  12. mcp_vector_search/cli/commands/init.py +13 -0
  13. mcp_vector_search/cli/commands/install.py +597 -335
  14. mcp_vector_search/cli/commands/install_old.py +8 -4
  15. mcp_vector_search/cli/commands/mcp.py +78 -6
  16. mcp_vector_search/cli/commands/reset.py +68 -26
  17. mcp_vector_search/cli/commands/search.py +30 -7
  18. mcp_vector_search/cli/commands/setup.py +1133 -0
  19. mcp_vector_search/cli/commands/status.py +37 -2
  20. mcp_vector_search/cli/commands/uninstall.py +276 -357
  21. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  22. mcp_vector_search/cli/commands/visualize/cli.py +276 -0
  23. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  24. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  25. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  26. mcp_vector_search/cli/commands/visualize/graph_builder.py +714 -0
  27. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  28. mcp_vector_search/cli/commands/visualize/server.py +311 -0
  29. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  30. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  31. mcp_vector_search/cli/commands/visualize/templates/base.py +180 -0
  32. mcp_vector_search/cli/commands/visualize/templates/scripts.py +2507 -0
  33. mcp_vector_search/cli/commands/visualize/templates/styles.py +1313 -0
  34. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  35. mcp_vector_search/cli/didyoumean.py +22 -2
  36. mcp_vector_search/cli/main.py +115 -159
  37. mcp_vector_search/cli/output.py +24 -8
  38. mcp_vector_search/config/__init__.py +4 -0
  39. mcp_vector_search/config/default_thresholds.yaml +52 -0
  40. mcp_vector_search/config/settings.py +12 -0
  41. mcp_vector_search/config/thresholds.py +185 -0
  42. mcp_vector_search/core/auto_indexer.py +3 -3
  43. mcp_vector_search/core/boilerplate.py +186 -0
  44. mcp_vector_search/core/config_utils.py +394 -0
  45. mcp_vector_search/core/database.py +369 -94
  46. mcp_vector_search/core/exceptions.py +11 -0
  47. mcp_vector_search/core/git_hooks.py +4 -4
  48. mcp_vector_search/core/indexer.py +221 -4
  49. mcp_vector_search/core/llm_client.py +751 -0
  50. mcp_vector_search/core/models.py +3 -0
  51. mcp_vector_search/core/project.py +17 -0
  52. mcp_vector_search/core/scheduler.py +11 -11
  53. mcp_vector_search/core/search.py +179 -29
  54. mcp_vector_search/mcp/server.py +24 -5
  55. mcp_vector_search/utils/__init__.py +2 -0
  56. mcp_vector_search/utils/gitignore_updater.py +212 -0
  57. mcp_vector_search/utils/monorepo.py +66 -4
  58. mcp_vector_search/utils/timing.py +10 -6
  59. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/METADATA +182 -52
  60. mcp_vector_search-1.0.3.dist-info/RECORD +97 -0
  61. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/WHEEL +1 -1
  62. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/entry_points.txt +1 -0
  63. mcp_vector_search/cli/commands/visualize.py +0 -1467
  64. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  65. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/licenses/LICENSE +0 -0
@@ -126,7 +126,7 @@ class GitHookManager:
126
126
  python_path = sys.executable
127
127
  project_root = str(self.project_root)
128
128
 
129
- script = f'''#!/bin/bash
129
+ script = f"""#!/bin/bash
130
130
  # MCP Vector Search Hook - {hook_type}
131
131
  # Auto-generated - do not edit manually
132
132
 
@@ -158,7 +158,7 @@ $MCP_CMD auto-index check --auto-reindex --max-files 10 &> /dev/null || true
158
158
 
159
159
  # Exit successfully (don't block Git operations)
160
160
  exit 0
161
- '''
161
+ """
162
162
  return script
163
163
 
164
164
  def _integrate_with_existing_hook(self, hook_file: Path, our_script: str) -> bool:
@@ -275,7 +275,7 @@ class GitChangeDetector:
275
275
  Set of changed file paths
276
276
  """
277
277
  try:
278
- result = subprocess.run(
278
+ result = subprocess.run( # nosec B607
279
279
  ["git", "diff", "--name-only", commit_hash, "HEAD"],
280
280
  cwd=project_root,
281
281
  capture_output=True,
@@ -306,7 +306,7 @@ class GitChangeDetector:
306
306
  Set of changed file paths
307
307
  """
308
308
  try:
309
- result = subprocess.run(
309
+ result = subprocess.run( # nosec B607
310
310
  ["git", "diff", "--name-only", "HEAD~1", "HEAD"],
311
311
  cwd=project_root,
312
312
  capture_output=True,
@@ -5,11 +5,14 @@ import json
5
5
  import os
6
6
  from datetime import UTC, datetime
7
7
  from pathlib import Path
8
+ from typing import Any
8
9
 
9
10
  from loguru import logger
10
11
  from packaging import version
11
12
 
12
13
  from .. import __version__
14
+ from ..analysis.collectors.base import MetricCollector
15
+ from ..analysis.metrics import ChunkMetrics
13
16
  from ..config.defaults import ALLOWED_DOTFILES, DEFAULT_IGNORE_PATTERNS
14
17
  from ..config.settings import ProjectConfig
15
18
  from ..parsers.registry import get_parser_registry
@@ -20,6 +23,19 @@ from .directory_index import DirectoryIndex
20
23
  from .exceptions import ParsingError
21
24
  from .models import CodeChunk, IndexStats
22
25
 
26
+ # Extension to language mapping for metric collection
27
+ EXTENSION_TO_LANGUAGE = {
28
+ ".py": "python",
29
+ ".js": "javascript",
30
+ ".ts": "typescript",
31
+ ".jsx": "javascript",
32
+ ".tsx": "typescript",
33
+ ".java": "java",
34
+ ".rs": "rust",
35
+ ".php": "php",
36
+ ".rb": "ruby",
37
+ }
38
+
23
39
 
24
40
  class SemanticIndexer:
25
41
  """Semantic indexer for parsing and indexing code files."""
@@ -33,6 +49,7 @@ class SemanticIndexer:
33
49
  max_workers: int | None = None,
34
50
  batch_size: int = 10,
35
51
  debug: bool = False,
52
+ collectors: list[MetricCollector] | None = None,
36
53
  ) -> None:
37
54
  """Initialize semantic indexer.
38
55
 
@@ -44,6 +61,7 @@ class SemanticIndexer:
44
61
  max_workers: Maximum number of worker threads for parallel processing
45
62
  batch_size: Number of files to process in each batch
46
63
  debug: Enable debug output for hierarchy building
64
+ collectors: Metric collectors to run during indexing (defaults to all complexity collectors)
47
65
  """
48
66
  self.database = database
49
67
  self.project_root = project_root
@@ -63,6 +81,11 @@ class SemanticIndexer:
63
81
  self._ignore_patterns = set(DEFAULT_IGNORE_PATTERNS)
64
82
  self.debug = debug
65
83
 
84
+ # Initialize metric collectors
85
+ self.collectors = (
86
+ collectors if collectors is not None else self._default_collectors()
87
+ )
88
+
66
89
  # Safely get event loop for max_workers
67
90
  try:
68
91
  loop = asyncio.get_event_loop()
@@ -110,6 +133,144 @@ class SemanticIndexer:
110
133
  # Load existing directory index
111
134
  self.directory_index.load()
112
135
 
136
+ def _default_collectors(self) -> list[MetricCollector]:
137
+ """Return default set of metric collectors.
138
+
139
+ Returns:
140
+ List of all complexity collectors (cognitive, cyclomatic, nesting, parameters, methods)
141
+ """
142
+ from ..analysis.collectors.complexity import (
143
+ CognitiveComplexityCollector,
144
+ CyclomaticComplexityCollector,
145
+ MethodCountCollector,
146
+ NestingDepthCollector,
147
+ ParameterCountCollector,
148
+ )
149
+
150
+ return [
151
+ CognitiveComplexityCollector(),
152
+ CyclomaticComplexityCollector(),
153
+ NestingDepthCollector(),
154
+ ParameterCountCollector(),
155
+ MethodCountCollector(),
156
+ ]
157
+
158
+ def _collect_metrics(
159
+ self, chunk: CodeChunk, source_code: bytes, language: str
160
+ ) -> ChunkMetrics | None:
161
+ """Collect metrics for a code chunk.
162
+
163
+ This is a simplified version that estimates metrics from chunk content
164
+ without full TreeSitter traversal. Future implementation will use
165
+ TreeSitter node traversal for accurate metric collection.
166
+
167
+ Args:
168
+ chunk: The parsed code chunk
169
+ source_code: Raw source code bytes
170
+ language: Programming language identifier
171
+
172
+ Returns:
173
+ ChunkMetrics for the chunk, or None if no metrics collected
174
+ """
175
+ # For now, create basic metrics from chunk content
176
+ # TODO: Implement full TreeSitter traversal in Phase 2
177
+ lines_of_code = chunk.line_count
178
+
179
+ # Estimate complexity from simple heuristics
180
+ content = chunk.content
181
+ cognitive_complexity = self._estimate_cognitive_complexity(content)
182
+ cyclomatic_complexity = self._estimate_cyclomatic_complexity(content)
183
+ max_nesting_depth = self._estimate_nesting_depth(content)
184
+ parameter_count = len(chunk.parameters) if chunk.parameters else 0
185
+
186
+ metrics = ChunkMetrics(
187
+ cognitive_complexity=cognitive_complexity,
188
+ cyclomatic_complexity=cyclomatic_complexity,
189
+ max_nesting_depth=max_nesting_depth,
190
+ parameter_count=parameter_count,
191
+ lines_of_code=lines_of_code,
192
+ )
193
+
194
+ return metrics
195
+
196
+ def _estimate_cognitive_complexity(self, content: str) -> int:
197
+ """Estimate cognitive complexity from content (simplified heuristic).
198
+
199
+ Args:
200
+ content: Code content
201
+
202
+ Returns:
203
+ Estimated cognitive complexity score
204
+ """
205
+ # Simple heuristic: count control flow keywords
206
+ keywords = [
207
+ "if",
208
+ "elif",
209
+ "else",
210
+ "for",
211
+ "while",
212
+ "try",
213
+ "except",
214
+ "case",
215
+ "when",
216
+ ]
217
+ complexity = 0
218
+ for keyword in keywords:
219
+ complexity += content.count(f" {keyword} ")
220
+ complexity += content.count(f"\t{keyword} ")
221
+ complexity += content.count(f"\n{keyword} ")
222
+ return complexity
223
+
224
+ def _estimate_cyclomatic_complexity(self, content: str) -> int:
225
+ """Estimate cyclomatic complexity from content (simplified heuristic).
226
+
227
+ Args:
228
+ content: Code content
229
+
230
+ Returns:
231
+ Estimated cyclomatic complexity score (minimum 1)
232
+ """
233
+ # Start with baseline of 1
234
+ complexity = 1
235
+
236
+ # Count decision points
237
+ keywords = [
238
+ "if",
239
+ "elif",
240
+ "for",
241
+ "while",
242
+ "case",
243
+ "when",
244
+ "&&",
245
+ "||",
246
+ "and",
247
+ "or",
248
+ ]
249
+ for keyword in keywords:
250
+ complexity += content.count(keyword)
251
+
252
+ return complexity
253
+
254
+ def _estimate_nesting_depth(self, content: str) -> int:
255
+ """Estimate maximum nesting depth from indentation (simplified heuristic).
256
+
257
+ Args:
258
+ content: Code content
259
+
260
+ Returns:
261
+ Estimated maximum nesting depth
262
+ """
263
+ max_depth = 0
264
+ for line in content.split("\n"):
265
+ # Count leading whitespace (4 spaces or 1 tab = 1 level)
266
+ leading = len(line) - len(line.lstrip())
267
+ if "\t" in line[:leading]:
268
+ depth = line[:leading].count("\t")
269
+ else:
270
+ depth = leading // 4
271
+ max_depth = max(max_depth, depth)
272
+ return max_depth
273
+
113
274
  async def index_project(
114
275
  self,
115
276
  force_reindex: bool = False,
@@ -379,8 +540,34 @@ class SemanticIndexer:
379
540
  f"After hierarchy build: {methods_with_parents}/{len([c for c in chunks_with_hierarchy if c.chunk_type in ('method', 'function')])} methods have parents"
380
541
  )
381
542
 
382
- # Add chunks to database
383
- await self.database.add_chunks(chunks_with_hierarchy)
543
+ # Collect metrics for chunks (if collectors are enabled)
544
+ chunk_metrics: dict[str, Any] | None = None
545
+ if self.collectors:
546
+ try:
547
+ # Read source code
548
+ source_code = file_path.read_bytes()
549
+
550
+ # Detect language from file extension
551
+ language = EXTENSION_TO_LANGUAGE.get(
552
+ file_path.suffix.lower(), "unknown"
553
+ )
554
+
555
+ # Collect metrics for each chunk
556
+ chunk_metrics = {}
557
+ for chunk in chunks_with_hierarchy:
558
+ metrics = self._collect_metrics(chunk, source_code, language)
559
+ if metrics:
560
+ chunk_metrics[chunk.chunk_id] = metrics.to_metadata()
561
+
562
+ logger.debug(
563
+ f"Collected metrics for {len(chunk_metrics)} chunks from {file_path}"
564
+ )
565
+ except Exception as e:
566
+ logger.warning(f"Failed to collect metrics for {file_path}: {e}")
567
+ chunk_metrics = None
568
+
569
+ # Add chunks to database with metrics
570
+ await self.database.add_chunks(chunks_with_hierarchy, metrics=chunk_metrics)
384
571
 
385
572
  # Update metadata after successful indexing
386
573
  metadata = self._load_index_metadata()
@@ -839,8 +1026,38 @@ class SemanticIndexer:
839
1026
  # Build hierarchical relationships
840
1027
  chunks_with_hierarchy = self._build_chunk_hierarchy(chunks)
841
1028
 
842
- # Add chunks to database
843
- await self.database.add_chunks(chunks_with_hierarchy)
1029
+ # Collect metrics for chunks (if collectors are enabled)
1030
+ chunk_metrics: dict[str, Any] | None = None
1031
+ if self.collectors:
1032
+ try:
1033
+ # Read source code
1034
+ source_code = file_path.read_bytes()
1035
+
1036
+ # Detect language from file extension
1037
+ language = EXTENSION_TO_LANGUAGE.get(
1038
+ file_path.suffix.lower(), "unknown"
1039
+ )
1040
+
1041
+ # Collect metrics for each chunk
1042
+ chunk_metrics = {}
1043
+ for chunk in chunks_with_hierarchy:
1044
+ metrics = self._collect_metrics(
1045
+ chunk, source_code, language
1046
+ )
1047
+ if metrics:
1048
+ chunk_metrics[chunk.chunk_id] = (
1049
+ metrics.to_metadata()
1050
+ )
1051
+ except Exception as e:
1052
+ logger.warning(
1053
+ f"Failed to collect metrics for {file_path}: {e}"
1054
+ )
1055
+ chunk_metrics = None
1056
+
1057
+ # Add chunks to database with metrics
1058
+ await self.database.add_chunks(
1059
+ chunks_with_hierarchy, metrics=chunk_metrics
1060
+ )
844
1061
  chunks_added = len(chunks)
845
1062
  logger.debug(f"Indexed {chunks_added} chunks from {file_path}")
846
1063