mcp-vector-search 0.12.6__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_vector_search/__init__.py +2 -2
- mcp_vector_search/analysis/__init__.py +64 -0
- mcp_vector_search/analysis/collectors/__init__.py +39 -0
- mcp_vector_search/analysis/collectors/base.py +164 -0
- mcp_vector_search/analysis/collectors/complexity.py +743 -0
- mcp_vector_search/analysis/metrics.py +341 -0
- mcp_vector_search/analysis/reporters/__init__.py +5 -0
- mcp_vector_search/analysis/reporters/console.py +222 -0
- mcp_vector_search/cli/commands/analyze.py +408 -0
- mcp_vector_search/cli/commands/chat.py +1262 -0
- mcp_vector_search/cli/commands/index.py +21 -3
- mcp_vector_search/cli/commands/init.py +13 -0
- mcp_vector_search/cli/commands/install.py +597 -335
- mcp_vector_search/cli/commands/install_old.py +8 -4
- mcp_vector_search/cli/commands/mcp.py +78 -6
- mcp_vector_search/cli/commands/reset.py +68 -26
- mcp_vector_search/cli/commands/search.py +30 -7
- mcp_vector_search/cli/commands/setup.py +1133 -0
- mcp_vector_search/cli/commands/status.py +37 -2
- mcp_vector_search/cli/commands/uninstall.py +276 -357
- mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
- mcp_vector_search/cli/commands/visualize/cli.py +276 -0
- mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
- mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
- mcp_vector_search/cli/commands/visualize/graph_builder.py +714 -0
- mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
- mcp_vector_search/cli/commands/visualize/server.py +311 -0
- mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
- mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
- mcp_vector_search/cli/commands/visualize/templates/base.py +180 -0
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +2507 -0
- mcp_vector_search/cli/commands/visualize/templates/styles.py +1313 -0
- mcp_vector_search/cli/commands/visualize.py.original +2536 -0
- mcp_vector_search/cli/didyoumean.py +22 -2
- mcp_vector_search/cli/main.py +115 -159
- mcp_vector_search/cli/output.py +24 -8
- mcp_vector_search/config/__init__.py +4 -0
- mcp_vector_search/config/default_thresholds.yaml +52 -0
- mcp_vector_search/config/settings.py +12 -0
- mcp_vector_search/config/thresholds.py +185 -0
- mcp_vector_search/core/auto_indexer.py +3 -3
- mcp_vector_search/core/boilerplate.py +186 -0
- mcp_vector_search/core/config_utils.py +394 -0
- mcp_vector_search/core/database.py +369 -94
- mcp_vector_search/core/exceptions.py +11 -0
- mcp_vector_search/core/git_hooks.py +4 -4
- mcp_vector_search/core/indexer.py +221 -4
- mcp_vector_search/core/llm_client.py +751 -0
- mcp_vector_search/core/models.py +3 -0
- mcp_vector_search/core/project.py +17 -0
- mcp_vector_search/core/scheduler.py +11 -11
- mcp_vector_search/core/search.py +179 -29
- mcp_vector_search/mcp/server.py +24 -5
- mcp_vector_search/utils/__init__.py +2 -0
- mcp_vector_search/utils/gitignore_updater.py +212 -0
- mcp_vector_search/utils/monorepo.py +66 -4
- mcp_vector_search/utils/timing.py +10 -6
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/METADATA +182 -52
- mcp_vector_search-1.0.3.dist-info/RECORD +97 -0
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/WHEEL +1 -1
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/entry_points.txt +1 -0
- mcp_vector_search/cli/commands/visualize.py +0 -1467
- mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
- {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -126,7 +126,7 @@ class GitHookManager:
|
|
|
126
126
|
python_path = sys.executable
|
|
127
127
|
project_root = str(self.project_root)
|
|
128
128
|
|
|
129
|
-
script = f
|
|
129
|
+
script = f"""#!/bin/bash
|
|
130
130
|
# MCP Vector Search Hook - {hook_type}
|
|
131
131
|
# Auto-generated - do not edit manually
|
|
132
132
|
|
|
@@ -158,7 +158,7 @@ $MCP_CMD auto-index check --auto-reindex --max-files 10 &> /dev/null || true
|
|
|
158
158
|
|
|
159
159
|
# Exit successfully (don't block Git operations)
|
|
160
160
|
exit 0
|
|
161
|
-
|
|
161
|
+
"""
|
|
162
162
|
return script
|
|
163
163
|
|
|
164
164
|
def _integrate_with_existing_hook(self, hook_file: Path, our_script: str) -> bool:
|
|
@@ -275,7 +275,7 @@ class GitChangeDetector:
|
|
|
275
275
|
Set of changed file paths
|
|
276
276
|
"""
|
|
277
277
|
try:
|
|
278
|
-
result = subprocess.run(
|
|
278
|
+
result = subprocess.run( # nosec B607
|
|
279
279
|
["git", "diff", "--name-only", commit_hash, "HEAD"],
|
|
280
280
|
cwd=project_root,
|
|
281
281
|
capture_output=True,
|
|
@@ -306,7 +306,7 @@ class GitChangeDetector:
|
|
|
306
306
|
Set of changed file paths
|
|
307
307
|
"""
|
|
308
308
|
try:
|
|
309
|
-
result = subprocess.run(
|
|
309
|
+
result = subprocess.run( # nosec B607
|
|
310
310
|
["git", "diff", "--name-only", "HEAD~1", "HEAD"],
|
|
311
311
|
cwd=project_root,
|
|
312
312
|
capture_output=True,
|
|
@@ -5,11 +5,14 @@ import json
|
|
|
5
5
|
import os
|
|
6
6
|
from datetime import UTC, datetime
|
|
7
7
|
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
8
9
|
|
|
9
10
|
from loguru import logger
|
|
10
11
|
from packaging import version
|
|
11
12
|
|
|
12
13
|
from .. import __version__
|
|
14
|
+
from ..analysis.collectors.base import MetricCollector
|
|
15
|
+
from ..analysis.metrics import ChunkMetrics
|
|
13
16
|
from ..config.defaults import ALLOWED_DOTFILES, DEFAULT_IGNORE_PATTERNS
|
|
14
17
|
from ..config.settings import ProjectConfig
|
|
15
18
|
from ..parsers.registry import get_parser_registry
|
|
@@ -20,6 +23,19 @@ from .directory_index import DirectoryIndex
|
|
|
20
23
|
from .exceptions import ParsingError
|
|
21
24
|
from .models import CodeChunk, IndexStats
|
|
22
25
|
|
|
26
|
+
# Extension to language mapping for metric collection
|
|
27
|
+
EXTENSION_TO_LANGUAGE = {
|
|
28
|
+
".py": "python",
|
|
29
|
+
".js": "javascript",
|
|
30
|
+
".ts": "typescript",
|
|
31
|
+
".jsx": "javascript",
|
|
32
|
+
".tsx": "typescript",
|
|
33
|
+
".java": "java",
|
|
34
|
+
".rs": "rust",
|
|
35
|
+
".php": "php",
|
|
36
|
+
".rb": "ruby",
|
|
37
|
+
}
|
|
38
|
+
|
|
23
39
|
|
|
24
40
|
class SemanticIndexer:
|
|
25
41
|
"""Semantic indexer for parsing and indexing code files."""
|
|
@@ -33,6 +49,7 @@ class SemanticIndexer:
|
|
|
33
49
|
max_workers: int | None = None,
|
|
34
50
|
batch_size: int = 10,
|
|
35
51
|
debug: bool = False,
|
|
52
|
+
collectors: list[MetricCollector] | None = None,
|
|
36
53
|
) -> None:
|
|
37
54
|
"""Initialize semantic indexer.
|
|
38
55
|
|
|
@@ -44,6 +61,7 @@ class SemanticIndexer:
|
|
|
44
61
|
max_workers: Maximum number of worker threads for parallel processing
|
|
45
62
|
batch_size: Number of files to process in each batch
|
|
46
63
|
debug: Enable debug output for hierarchy building
|
|
64
|
+
collectors: Metric collectors to run during indexing (defaults to all complexity collectors)
|
|
47
65
|
"""
|
|
48
66
|
self.database = database
|
|
49
67
|
self.project_root = project_root
|
|
@@ -63,6 +81,11 @@ class SemanticIndexer:
|
|
|
63
81
|
self._ignore_patterns = set(DEFAULT_IGNORE_PATTERNS)
|
|
64
82
|
self.debug = debug
|
|
65
83
|
|
|
84
|
+
# Initialize metric collectors
|
|
85
|
+
self.collectors = (
|
|
86
|
+
collectors if collectors is not None else self._default_collectors()
|
|
87
|
+
)
|
|
88
|
+
|
|
66
89
|
# Safely get event loop for max_workers
|
|
67
90
|
try:
|
|
68
91
|
loop = asyncio.get_event_loop()
|
|
@@ -110,6 +133,144 @@ class SemanticIndexer:
|
|
|
110
133
|
# Load existing directory index
|
|
111
134
|
self.directory_index.load()
|
|
112
135
|
|
|
136
|
+
def _default_collectors(self) -> list[MetricCollector]:
|
|
137
|
+
"""Return default set of metric collectors.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
List of all complexity collectors (cognitive, cyclomatic, nesting, parameters, methods)
|
|
141
|
+
"""
|
|
142
|
+
from ..analysis.collectors.complexity import (
|
|
143
|
+
CognitiveComplexityCollector,
|
|
144
|
+
CyclomaticComplexityCollector,
|
|
145
|
+
MethodCountCollector,
|
|
146
|
+
NestingDepthCollector,
|
|
147
|
+
ParameterCountCollector,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return [
|
|
151
|
+
CognitiveComplexityCollector(),
|
|
152
|
+
CyclomaticComplexityCollector(),
|
|
153
|
+
NestingDepthCollector(),
|
|
154
|
+
ParameterCountCollector(),
|
|
155
|
+
MethodCountCollector(),
|
|
156
|
+
]
|
|
157
|
+
|
|
158
|
+
def _collect_metrics(
|
|
159
|
+
self, chunk: CodeChunk, source_code: bytes, language: str
|
|
160
|
+
) -> ChunkMetrics | None:
|
|
161
|
+
"""Collect metrics for a code chunk.
|
|
162
|
+
|
|
163
|
+
This is a simplified version that estimates metrics from chunk content
|
|
164
|
+
without full TreeSitter traversal. Future implementation will use
|
|
165
|
+
TreeSitter node traversal for accurate metric collection.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
chunk: The parsed code chunk
|
|
169
|
+
source_code: Raw source code bytes
|
|
170
|
+
language: Programming language identifier
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
ChunkMetrics for the chunk, or None if no metrics collected
|
|
174
|
+
"""
|
|
175
|
+
# For now, create basic metrics from chunk content
|
|
176
|
+
# TODO: Implement full TreeSitter traversal in Phase 2
|
|
177
|
+
lines_of_code = chunk.line_count
|
|
178
|
+
|
|
179
|
+
# Estimate complexity from simple heuristics
|
|
180
|
+
content = chunk.content
|
|
181
|
+
cognitive_complexity = self._estimate_cognitive_complexity(content)
|
|
182
|
+
cyclomatic_complexity = self._estimate_cyclomatic_complexity(content)
|
|
183
|
+
max_nesting_depth = self._estimate_nesting_depth(content)
|
|
184
|
+
parameter_count = len(chunk.parameters) if chunk.parameters else 0
|
|
185
|
+
|
|
186
|
+
metrics = ChunkMetrics(
|
|
187
|
+
cognitive_complexity=cognitive_complexity,
|
|
188
|
+
cyclomatic_complexity=cyclomatic_complexity,
|
|
189
|
+
max_nesting_depth=max_nesting_depth,
|
|
190
|
+
parameter_count=parameter_count,
|
|
191
|
+
lines_of_code=lines_of_code,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return metrics
|
|
195
|
+
|
|
196
|
+
def _estimate_cognitive_complexity(self, content: str) -> int:
|
|
197
|
+
"""Estimate cognitive complexity from content (simplified heuristic).
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
content: Code content
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Estimated cognitive complexity score
|
|
204
|
+
"""
|
|
205
|
+
# Simple heuristic: count control flow keywords
|
|
206
|
+
keywords = [
|
|
207
|
+
"if",
|
|
208
|
+
"elif",
|
|
209
|
+
"else",
|
|
210
|
+
"for",
|
|
211
|
+
"while",
|
|
212
|
+
"try",
|
|
213
|
+
"except",
|
|
214
|
+
"case",
|
|
215
|
+
"when",
|
|
216
|
+
]
|
|
217
|
+
complexity = 0
|
|
218
|
+
for keyword in keywords:
|
|
219
|
+
complexity += content.count(f" {keyword} ")
|
|
220
|
+
complexity += content.count(f"\t{keyword} ")
|
|
221
|
+
complexity += content.count(f"\n{keyword} ")
|
|
222
|
+
return complexity
|
|
223
|
+
|
|
224
|
+
def _estimate_cyclomatic_complexity(self, content: str) -> int:
|
|
225
|
+
"""Estimate cyclomatic complexity from content (simplified heuristic).
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
content: Code content
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Estimated cyclomatic complexity score (minimum 1)
|
|
232
|
+
"""
|
|
233
|
+
# Start with baseline of 1
|
|
234
|
+
complexity = 1
|
|
235
|
+
|
|
236
|
+
# Count decision points
|
|
237
|
+
keywords = [
|
|
238
|
+
"if",
|
|
239
|
+
"elif",
|
|
240
|
+
"for",
|
|
241
|
+
"while",
|
|
242
|
+
"case",
|
|
243
|
+
"when",
|
|
244
|
+
"&&",
|
|
245
|
+
"||",
|
|
246
|
+
"and",
|
|
247
|
+
"or",
|
|
248
|
+
]
|
|
249
|
+
for keyword in keywords:
|
|
250
|
+
complexity += content.count(keyword)
|
|
251
|
+
|
|
252
|
+
return complexity
|
|
253
|
+
|
|
254
|
+
def _estimate_nesting_depth(self, content: str) -> int:
|
|
255
|
+
"""Estimate maximum nesting depth from indentation (simplified heuristic).
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
content: Code content
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Estimated maximum nesting depth
|
|
262
|
+
"""
|
|
263
|
+
max_depth = 0
|
|
264
|
+
for line in content.split("\n"):
|
|
265
|
+
# Count leading whitespace (4 spaces or 1 tab = 1 level)
|
|
266
|
+
leading = len(line) - len(line.lstrip())
|
|
267
|
+
if "\t" in line[:leading]:
|
|
268
|
+
depth = line[:leading].count("\t")
|
|
269
|
+
else:
|
|
270
|
+
depth = leading // 4
|
|
271
|
+
max_depth = max(max_depth, depth)
|
|
272
|
+
return max_depth
|
|
273
|
+
|
|
113
274
|
async def index_project(
|
|
114
275
|
self,
|
|
115
276
|
force_reindex: bool = False,
|
|
@@ -379,8 +540,34 @@ class SemanticIndexer:
|
|
|
379
540
|
f"After hierarchy build: {methods_with_parents}/{len([c for c in chunks_with_hierarchy if c.chunk_type in ('method', 'function')])} methods have parents"
|
|
380
541
|
)
|
|
381
542
|
|
|
382
|
-
#
|
|
383
|
-
|
|
543
|
+
# Collect metrics for chunks (if collectors are enabled)
|
|
544
|
+
chunk_metrics: dict[str, Any] | None = None
|
|
545
|
+
if self.collectors:
|
|
546
|
+
try:
|
|
547
|
+
# Read source code
|
|
548
|
+
source_code = file_path.read_bytes()
|
|
549
|
+
|
|
550
|
+
# Detect language from file extension
|
|
551
|
+
language = EXTENSION_TO_LANGUAGE.get(
|
|
552
|
+
file_path.suffix.lower(), "unknown"
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
# Collect metrics for each chunk
|
|
556
|
+
chunk_metrics = {}
|
|
557
|
+
for chunk in chunks_with_hierarchy:
|
|
558
|
+
metrics = self._collect_metrics(chunk, source_code, language)
|
|
559
|
+
if metrics:
|
|
560
|
+
chunk_metrics[chunk.chunk_id] = metrics.to_metadata()
|
|
561
|
+
|
|
562
|
+
logger.debug(
|
|
563
|
+
f"Collected metrics for {len(chunk_metrics)} chunks from {file_path}"
|
|
564
|
+
)
|
|
565
|
+
except Exception as e:
|
|
566
|
+
logger.warning(f"Failed to collect metrics for {file_path}: {e}")
|
|
567
|
+
chunk_metrics = None
|
|
568
|
+
|
|
569
|
+
# Add chunks to database with metrics
|
|
570
|
+
await self.database.add_chunks(chunks_with_hierarchy, metrics=chunk_metrics)
|
|
384
571
|
|
|
385
572
|
# Update metadata after successful indexing
|
|
386
573
|
metadata = self._load_index_metadata()
|
|
@@ -839,8 +1026,38 @@ class SemanticIndexer:
|
|
|
839
1026
|
# Build hierarchical relationships
|
|
840
1027
|
chunks_with_hierarchy = self._build_chunk_hierarchy(chunks)
|
|
841
1028
|
|
|
842
|
-
#
|
|
843
|
-
|
|
1029
|
+
# Collect metrics for chunks (if collectors are enabled)
|
|
1030
|
+
chunk_metrics: dict[str, Any] | None = None
|
|
1031
|
+
if self.collectors:
|
|
1032
|
+
try:
|
|
1033
|
+
# Read source code
|
|
1034
|
+
source_code = file_path.read_bytes()
|
|
1035
|
+
|
|
1036
|
+
# Detect language from file extension
|
|
1037
|
+
language = EXTENSION_TO_LANGUAGE.get(
|
|
1038
|
+
file_path.suffix.lower(), "unknown"
|
|
1039
|
+
)
|
|
1040
|
+
|
|
1041
|
+
# Collect metrics for each chunk
|
|
1042
|
+
chunk_metrics = {}
|
|
1043
|
+
for chunk in chunks_with_hierarchy:
|
|
1044
|
+
metrics = self._collect_metrics(
|
|
1045
|
+
chunk, source_code, language
|
|
1046
|
+
)
|
|
1047
|
+
if metrics:
|
|
1048
|
+
chunk_metrics[chunk.chunk_id] = (
|
|
1049
|
+
metrics.to_metadata()
|
|
1050
|
+
)
|
|
1051
|
+
except Exception as e:
|
|
1052
|
+
logger.warning(
|
|
1053
|
+
f"Failed to collect metrics for {file_path}: {e}"
|
|
1054
|
+
)
|
|
1055
|
+
chunk_metrics = None
|
|
1056
|
+
|
|
1057
|
+
# Add chunks to database with metrics
|
|
1058
|
+
await self.database.add_chunks(
|
|
1059
|
+
chunks_with_hierarchy, metrics=chunk_metrics
|
|
1060
|
+
)
|
|
844
1061
|
chunks_added = len(chunks)
|
|
845
1062
|
logger.debug(f"Indexed {chunks_added} chunks from {file_path}")
|
|
846
1063
|
|