mcp-vector-search 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +9 -0
- mcp_vector_search/cli/__init__.py +1 -0
- mcp_vector_search/cli/commands/__init__.py +1 -0
- mcp_vector_search/cli/commands/config.py +303 -0
- mcp_vector_search/cli/commands/index.py +304 -0
- mcp_vector_search/cli/commands/init.py +212 -0
- mcp_vector_search/cli/commands/search.py +395 -0
- mcp_vector_search/cli/commands/status.py +340 -0
- mcp_vector_search/cli/commands/watch.py +288 -0
- mcp_vector_search/cli/main.py +117 -0
- mcp_vector_search/cli/output.py +242 -0
- mcp_vector_search/config/__init__.py +1 -0
- mcp_vector_search/config/defaults.py +175 -0
- mcp_vector_search/config/settings.py +108 -0
- mcp_vector_search/core/__init__.py +1 -0
- mcp_vector_search/core/database.py +431 -0
- mcp_vector_search/core/embeddings.py +250 -0
- mcp_vector_search/core/exceptions.py +66 -0
- mcp_vector_search/core/indexer.py +310 -0
- mcp_vector_search/core/models.py +174 -0
- mcp_vector_search/core/project.py +304 -0
- mcp_vector_search/core/search.py +324 -0
- mcp_vector_search/core/watcher.py +320 -0
- mcp_vector_search/mcp/__init__.py +1 -0
- mcp_vector_search/parsers/__init__.py +1 -0
- mcp_vector_search/parsers/base.py +180 -0
- mcp_vector_search/parsers/javascript.py +238 -0
- mcp_vector_search/parsers/python.py +407 -0
- mcp_vector_search/parsers/registry.py +187 -0
- mcp_vector_search/py.typed +1 -0
- mcp_vector_search-0.0.3.dist-info/METADATA +333 -0
- mcp_vector_search-0.0.3.dist-info/RECORD +35 -0
- mcp_vector_search-0.0.3.dist-info/WHEEL +4 -0
- mcp_vector_search-0.0.3.dist-info/entry_points.txt +2 -0
- mcp_vector_search-0.0.3.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Main CLI application for MCP Vector Search."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
from loguru import logger
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.traceback import install
|
|
10
|
+
|
|
11
|
+
from .. import __version__
|
|
12
|
+
from .commands.config import config_app
|
|
13
|
+
from .commands.index import index_app
|
|
14
|
+
from .commands.init import init_app
|
|
15
|
+
from .commands.search import search_app
|
|
16
|
+
from .commands.status import status_app
|
|
17
|
+
from .commands.watch import app as watch_app
|
|
18
|
+
from .output import setup_logging
|
|
19
|
+
|
|
20
|
+
# Install rich traceback handler
|
|
21
|
+
install(show_locals=True)
|
|
22
|
+
|
|
23
|
+
# Create console for rich output
|
|
24
|
+
console = Console()
|
|
25
|
+
|
|
26
|
+
# Create main Typer app
|
|
27
|
+
app = typer.Typer(
|
|
28
|
+
name="mcp-vector-search",
|
|
29
|
+
help="CLI-first semantic code search with MCP integration",
|
|
30
|
+
add_completion=False,
|
|
31
|
+
rich_markup_mode="rich",
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
# Add subcommands
|
|
35
|
+
app.add_typer(init_app, name="init", help="Initialize project for semantic search")
|
|
36
|
+
app.add_typer(index_app, name="index", help="Index codebase for semantic search")
|
|
37
|
+
app.add_typer(search_app, name="search", help="Search code semantically")
|
|
38
|
+
app.add_typer(status_app, name="status", help="Show project status and statistics")
|
|
39
|
+
app.add_typer(config_app, name="config", help="Manage project configuration")
|
|
40
|
+
app.add_typer(watch_app, name="watch", help="Watch for file changes and update index")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@app.callback()
|
|
44
|
+
def main(
|
|
45
|
+
ctx: typer.Context,
|
|
46
|
+
version: bool = typer.Option(
|
|
47
|
+
False, "--version", "-v", help="Show version and exit"
|
|
48
|
+
),
|
|
49
|
+
verbose: bool = typer.Option(
|
|
50
|
+
False, "--verbose", help="Enable verbose logging"
|
|
51
|
+
),
|
|
52
|
+
quiet: bool = typer.Option(
|
|
53
|
+
False, "--quiet", help="Suppress non-error output"
|
|
54
|
+
),
|
|
55
|
+
project_root: Optional[Path] = typer.Option(
|
|
56
|
+
None,
|
|
57
|
+
"--project-root",
|
|
58
|
+
"-p",
|
|
59
|
+
help="Project root directory (auto-detected if not specified)",
|
|
60
|
+
exists=True,
|
|
61
|
+
file_okay=False,
|
|
62
|
+
dir_okay=True,
|
|
63
|
+
readable=True,
|
|
64
|
+
),
|
|
65
|
+
) -> None:
|
|
66
|
+
"""MCP Vector Search - CLI-first semantic code search with MCP integration.
|
|
67
|
+
|
|
68
|
+
A modern, lightweight tool for semantic code search using ChromaDB and Tree-sitter.
|
|
69
|
+
Designed for local development with optional MCP server integration.
|
|
70
|
+
"""
|
|
71
|
+
if version:
|
|
72
|
+
console.print(f"mcp-vector-search version {__version__}")
|
|
73
|
+
raise typer.Exit()
|
|
74
|
+
|
|
75
|
+
# Setup logging
|
|
76
|
+
log_level = "DEBUG" if verbose else "WARNING" if quiet else "INFO"
|
|
77
|
+
setup_logging(log_level)
|
|
78
|
+
|
|
79
|
+
# Store global options in context
|
|
80
|
+
ctx.ensure_object(dict)
|
|
81
|
+
ctx.obj["verbose"] = verbose
|
|
82
|
+
ctx.obj["quiet"] = quiet
|
|
83
|
+
ctx.obj["project_root"] = project_root
|
|
84
|
+
|
|
85
|
+
if verbose:
|
|
86
|
+
logger.info(f"MCP Vector Search v{__version__}")
|
|
87
|
+
if project_root:
|
|
88
|
+
logger.info(f"Using project root: {project_root}")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@app.command()
|
|
92
|
+
def version() -> None:
|
|
93
|
+
"""Show version information."""
|
|
94
|
+
console.print(f"[bold blue]mcp-vector-search[/bold blue] version [green]{__version__}[/green]")
|
|
95
|
+
console.print("\n[dim]CLI-first semantic code search with MCP integration[/dim]")
|
|
96
|
+
console.print("[dim]Built with ChromaDB, Tree-sitter, and modern Python[/dim]")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@app.command()
|
|
100
|
+
def doctor() -> None:
|
|
101
|
+
"""Check system dependencies and configuration."""
|
|
102
|
+
from .commands.status import check_dependencies
|
|
103
|
+
|
|
104
|
+
console.print("[bold blue]MCP Vector Search - System Check[/bold blue]\n")
|
|
105
|
+
|
|
106
|
+
# Check dependencies
|
|
107
|
+
deps_ok = check_dependencies()
|
|
108
|
+
|
|
109
|
+
if deps_ok:
|
|
110
|
+
console.print("\n[green]✓ All dependencies are available[/green]")
|
|
111
|
+
else:
|
|
112
|
+
console.print("\n[red]✗ Some dependencies are missing[/red]")
|
|
113
|
+
console.print("Run [code]pip install mcp-vector-search[/code] to install missing dependencies")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == "__main__":
|
|
117
|
+
app()
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""Rich formatting and display utilities for CLI."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
6
|
+
|
|
7
|
+
from loguru import logger
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.panel import Panel
|
|
10
|
+
from rich.progress import (
|
|
11
|
+
BarColumn,
|
|
12
|
+
MofNCompleteColumn,
|
|
13
|
+
Progress,
|
|
14
|
+
SpinnerColumn,
|
|
15
|
+
TextColumn,
|
|
16
|
+
TimeElapsedColumn,
|
|
17
|
+
)
|
|
18
|
+
from rich.syntax import Syntax
|
|
19
|
+
from rich.table import Table
|
|
20
|
+
from rich.text import Text
|
|
21
|
+
|
|
22
|
+
from ..core.models import ProjectInfo, SearchResult
|
|
23
|
+
|
|
24
|
+
# Global console instance
|
|
25
|
+
console = Console()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def setup_logging(level: str = "INFO") -> None:
|
|
29
|
+
"""Setup structured logging with rich formatting.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
level: Log level (DEBUG, INFO, WARNING, ERROR)
|
|
33
|
+
"""
|
|
34
|
+
# Remove default handler
|
|
35
|
+
logger.remove()
|
|
36
|
+
|
|
37
|
+
# Add console handler with rich formatting
|
|
38
|
+
logger.add(
|
|
39
|
+
sys.stderr,
|
|
40
|
+
level=level,
|
|
41
|
+
format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
|
|
42
|
+
colorize=True,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def print_success(message: str) -> None:
|
|
47
|
+
"""Print success message."""
|
|
48
|
+
console.print(f"[green]✓[/green] {message}")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def print_error(message: str) -> None:
|
|
52
|
+
"""Print error message."""
|
|
53
|
+
console.print(f"[red]✗[/red] {message}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def print_warning(message: str) -> None:
|
|
57
|
+
"""Print warning message."""
|
|
58
|
+
console.print(f"[yellow]⚠[/yellow] {message}")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def print_info(message: str) -> None:
|
|
62
|
+
"""Print info message."""
|
|
63
|
+
console.print(f"[blue]ℹ[/blue] {message}")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def create_progress() -> Progress:
|
|
67
|
+
"""Create a progress bar for long-running operations."""
|
|
68
|
+
return Progress(
|
|
69
|
+
SpinnerColumn(),
|
|
70
|
+
TextColumn("[progress.description]{task.description}"),
|
|
71
|
+
BarColumn(),
|
|
72
|
+
MofNCompleteColumn(),
|
|
73
|
+
TimeElapsedColumn(),
|
|
74
|
+
console=console,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def print_project_info(project_info: ProjectInfo) -> None:
|
|
79
|
+
"""Print project information in a formatted table."""
|
|
80
|
+
table = Table(title="Project Information", show_header=False)
|
|
81
|
+
table.add_column("Property", style="cyan", no_wrap=True)
|
|
82
|
+
table.add_column("Value", style="white")
|
|
83
|
+
|
|
84
|
+
table.add_row("Name", project_info.name)
|
|
85
|
+
table.add_row("Root Path", str(project_info.root_path))
|
|
86
|
+
table.add_row("Config Path", str(project_info.config_path))
|
|
87
|
+
table.add_row("Index Path", str(project_info.index_path))
|
|
88
|
+
table.add_row("Initialized", "✓" if project_info.is_initialized else "✗")
|
|
89
|
+
table.add_row("Languages", ", ".join(project_info.languages) if project_info.languages else "None detected")
|
|
90
|
+
table.add_row("Indexable Files", str(project_info.file_count))
|
|
91
|
+
|
|
92
|
+
console.print(table)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def print_search_results(
|
|
96
|
+
results: List[SearchResult],
|
|
97
|
+
query: str,
|
|
98
|
+
show_content: bool = True,
|
|
99
|
+
max_content_lines: int = 10,
|
|
100
|
+
) -> None:
|
|
101
|
+
"""Print search results in a formatted display."""
|
|
102
|
+
if not results:
|
|
103
|
+
print_warning(f"No results found for query: '{query}'")
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
console.print(f"\n[bold blue]Search Results for:[/bold blue] [green]'{query}'[/green]")
|
|
107
|
+
console.print(f"[dim]Found {len(results)} results[/dim]\n")
|
|
108
|
+
|
|
109
|
+
for i, result in enumerate(results, 1):
|
|
110
|
+
# Create result header
|
|
111
|
+
header = f"[bold]{i}. {result.file_path.name}[/bold]"
|
|
112
|
+
if result.function_name:
|
|
113
|
+
header += f" → [cyan]{result.function_name}()[/cyan]"
|
|
114
|
+
if result.class_name:
|
|
115
|
+
header += f" in [yellow]{result.class_name}[/yellow]"
|
|
116
|
+
|
|
117
|
+
# Add location and similarity
|
|
118
|
+
location = f"[dim]{result.location}[/dim]"
|
|
119
|
+
similarity = f"[green]{result.similarity_score:.2%}[/green]"
|
|
120
|
+
|
|
121
|
+
console.print(f"{header}")
|
|
122
|
+
console.print(f" {location} | Similarity: {similarity}")
|
|
123
|
+
|
|
124
|
+
# Show code content if requested
|
|
125
|
+
if show_content and result.content:
|
|
126
|
+
content_lines = result.content.splitlines()
|
|
127
|
+
if len(content_lines) > max_content_lines:
|
|
128
|
+
content_lines = content_lines[:max_content_lines]
|
|
129
|
+
content_lines.append("...")
|
|
130
|
+
|
|
131
|
+
content = "\n".join(content_lines)
|
|
132
|
+
|
|
133
|
+
# Create syntax-highlighted code block
|
|
134
|
+
syntax = Syntax(
|
|
135
|
+
content,
|
|
136
|
+
result.language,
|
|
137
|
+
theme="monokai",
|
|
138
|
+
line_numbers=True,
|
|
139
|
+
start_line=result.start_line,
|
|
140
|
+
word_wrap=True,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
console.print(Panel(syntax, border_style="dim"))
|
|
144
|
+
|
|
145
|
+
console.print() # Empty line between results
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def print_index_stats(stats: Dict[str, Any]) -> None:
|
|
149
|
+
"""Print indexing statistics."""
|
|
150
|
+
table = Table(title="Index Statistics", show_header=False)
|
|
151
|
+
table.add_column("Metric", style="cyan", no_wrap=True)
|
|
152
|
+
table.add_column("Value", style="white")
|
|
153
|
+
|
|
154
|
+
table.add_row("Total Files", str(stats.get("total_indexable_files", 0)))
|
|
155
|
+
table.add_row("Indexed Files", str(stats.get("indexed_files", 0)))
|
|
156
|
+
table.add_row("Total Chunks", str(stats.get("total_chunks", 0)))
|
|
157
|
+
|
|
158
|
+
# Language distribution
|
|
159
|
+
languages = stats.get("languages", {})
|
|
160
|
+
if languages:
|
|
161
|
+
lang_str = ", ".join(f"{lang}: {count}" for lang, count in languages.items())
|
|
162
|
+
table.add_row("Languages", lang_str)
|
|
163
|
+
|
|
164
|
+
# File extensions
|
|
165
|
+
extensions = stats.get("file_extensions", [])
|
|
166
|
+
if extensions:
|
|
167
|
+
table.add_row("Extensions", ", ".join(extensions))
|
|
168
|
+
|
|
169
|
+
console.print(table)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def print_config(config_dict: Dict[str, Any]) -> None:
|
|
173
|
+
"""Print configuration in a formatted table."""
|
|
174
|
+
table = Table(title="Configuration", show_header=False)
|
|
175
|
+
table.add_column("Setting", style="cyan", no_wrap=True)
|
|
176
|
+
table.add_column("Value", style="white")
|
|
177
|
+
|
|
178
|
+
for key, value in config_dict.items():
|
|
179
|
+
if isinstance(value, (list, dict)):
|
|
180
|
+
value_str = str(value)
|
|
181
|
+
elif isinstance(value, Path):
|
|
182
|
+
value_str = str(value)
|
|
183
|
+
else:
|
|
184
|
+
value_str = str(value)
|
|
185
|
+
|
|
186
|
+
table.add_row(key.replace("_", " ").title(), value_str)
|
|
187
|
+
|
|
188
|
+
console.print(table)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def confirm_action(message: str, default: bool = False) -> bool:
|
|
192
|
+
"""Ask for user confirmation."""
|
|
193
|
+
default_str = "Y/n" if default else "y/N"
|
|
194
|
+
response = console.input(f"{message} [{default_str}]: ").strip().lower()
|
|
195
|
+
|
|
196
|
+
if not response:
|
|
197
|
+
return default
|
|
198
|
+
|
|
199
|
+
return response in ("y", "yes", "true", "1")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def print_banner() -> None:
|
|
203
|
+
"""Print application banner."""
|
|
204
|
+
banner = """
|
|
205
|
+
[bold blue]MCP Vector Search[/bold blue]
|
|
206
|
+
[dim]CLI-first semantic code search with MCP integration[/dim]
|
|
207
|
+
"""
|
|
208
|
+
console.print(Panel(banner.strip(), border_style="blue"))
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def format_file_path(file_path: Path, project_root: Optional[Path] = None) -> str:
|
|
212
|
+
"""Format file path for display (relative to project root if possible)."""
|
|
213
|
+
if project_root:
|
|
214
|
+
try:
|
|
215
|
+
relative_path = file_path.relative_to(project_root)
|
|
216
|
+
return str(relative_path)
|
|
217
|
+
except ValueError:
|
|
218
|
+
pass
|
|
219
|
+
|
|
220
|
+
return str(file_path)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def print_dependency_status(name: str, available: bool, version: Optional[str] = None) -> None:
|
|
224
|
+
"""Print dependency status."""
|
|
225
|
+
if available:
|
|
226
|
+
version_str = f" ({version})" if version else ""
|
|
227
|
+
console.print(f"[green]✓[/green] {name}{version_str}")
|
|
228
|
+
else:
|
|
229
|
+
console.print(f"[red]✗[/red] {name} - Not available")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def print_json(data: Any, title: Optional[str] = None) -> None:
|
|
233
|
+
"""Print data as formatted JSON."""
|
|
234
|
+
import json
|
|
235
|
+
|
|
236
|
+
json_str = json.dumps(data, indent=2, default=str)
|
|
237
|
+
syntax = Syntax(json_str, "json", theme="monokai")
|
|
238
|
+
|
|
239
|
+
if title:
|
|
240
|
+
console.print(Panel(syntax, title=title, border_style="blue"))
|
|
241
|
+
else:
|
|
242
|
+
console.print(syntax)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Configuration management for MCP Vector Search."""
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""Default configurations for MCP Vector Search."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Dict, List
|
|
5
|
+
|
|
6
|
+
# Default file extensions to index (prioritize supported languages)
|
|
7
|
+
DEFAULT_FILE_EXTENSIONS = [
|
|
8
|
+
".py", # Python (fully supported)
|
|
9
|
+
".js", # JavaScript (fully supported)
|
|
10
|
+
".ts", # TypeScript (fully supported)
|
|
11
|
+
".jsx", # React JSX (fully supported)
|
|
12
|
+
".tsx", # React TSX (fully supported)
|
|
13
|
+
".mjs", # ES6 modules (fully supported)
|
|
14
|
+
".java", # Java (fallback parsing)
|
|
15
|
+
".cpp", # C++ (fallback parsing)
|
|
16
|
+
".c", # C (fallback parsing)
|
|
17
|
+
".h", # C/C++ headers (fallback parsing)
|
|
18
|
+
".hpp", # C++ headers (fallback parsing)
|
|
19
|
+
".cs", # C# (fallback parsing)
|
|
20
|
+
".go", # Go (fallback parsing)
|
|
21
|
+
".rs", # Rust (fallback parsing)
|
|
22
|
+
".php", # PHP (fallback parsing)
|
|
23
|
+
".rb", # Ruby (fallback parsing)
|
|
24
|
+
".swift", # Swift (fallback parsing)
|
|
25
|
+
".kt", # Kotlin (fallback parsing)
|
|
26
|
+
".scala", # Scala (fallback parsing)
|
|
27
|
+
".sh", # Shell scripts (fallback parsing)
|
|
28
|
+
".bash", # Bash scripts (fallback parsing)
|
|
29
|
+
".zsh", # Zsh scripts (fallback parsing)
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
# Language mappings for parsers
|
|
33
|
+
LANGUAGE_MAPPINGS: Dict[str, str] = {
|
|
34
|
+
".py": "python",
|
|
35
|
+
".pyw": "python",
|
|
36
|
+
".js": "javascript",
|
|
37
|
+
".jsx": "javascript",
|
|
38
|
+
".mjs": "javascript",
|
|
39
|
+
".ts": "typescript",
|
|
40
|
+
".tsx": "typescript",
|
|
41
|
+
".java": "java",
|
|
42
|
+
".cpp": "cpp",
|
|
43
|
+
".c": "c",
|
|
44
|
+
".h": "c",
|
|
45
|
+
".hpp": "cpp",
|
|
46
|
+
".cs": "c_sharp",
|
|
47
|
+
".go": "go",
|
|
48
|
+
".rs": "rust",
|
|
49
|
+
".php": "php",
|
|
50
|
+
".rb": "ruby",
|
|
51
|
+
".swift": "swift",
|
|
52
|
+
".kt": "kotlin",
|
|
53
|
+
".scala": "scala",
|
|
54
|
+
".sh": "bash",
|
|
55
|
+
".bash": "bash",
|
|
56
|
+
".zsh": "bash",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Default embedding models by use case
|
|
60
|
+
DEFAULT_EMBEDDING_MODELS = {
|
|
61
|
+
"code": "microsoft/codebert-base",
|
|
62
|
+
"multilingual": "sentence-transformers/all-MiniLM-L6-v2",
|
|
63
|
+
"fast": "sentence-transformers/all-MiniLM-L12-v2",
|
|
64
|
+
"precise": "microsoft/unixcoder-base",
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Default similarity thresholds by language
|
|
68
|
+
DEFAULT_SIMILARITY_THRESHOLDS = {
|
|
69
|
+
"python": 0.75,
|
|
70
|
+
"javascript": 0.70,
|
|
71
|
+
"typescript": 0.70,
|
|
72
|
+
"java": 0.75,
|
|
73
|
+
"cpp": 0.70,
|
|
74
|
+
"c": 0.70,
|
|
75
|
+
"go": 0.75,
|
|
76
|
+
"rust": 0.75,
|
|
77
|
+
"default": 0.70,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
# Default chunk sizes by language (in tokens)
|
|
81
|
+
DEFAULT_CHUNK_SIZES = {
|
|
82
|
+
"python": 512,
|
|
83
|
+
"javascript": 384,
|
|
84
|
+
"typescript": 384,
|
|
85
|
+
"java": 512,
|
|
86
|
+
"cpp": 384,
|
|
87
|
+
"c": 384,
|
|
88
|
+
"go": 512,
|
|
89
|
+
"rust": 512,
|
|
90
|
+
"default": 512,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
# Directories to ignore during indexing
|
|
94
|
+
DEFAULT_IGNORE_PATTERNS = [
|
|
95
|
+
".git",
|
|
96
|
+
".svn",
|
|
97
|
+
".hg",
|
|
98
|
+
"__pycache__",
|
|
99
|
+
".pytest_cache",
|
|
100
|
+
"node_modules",
|
|
101
|
+
".venv",
|
|
102
|
+
"venv",
|
|
103
|
+
".env",
|
|
104
|
+
"build",
|
|
105
|
+
"dist",
|
|
106
|
+
"target",
|
|
107
|
+
".idea",
|
|
108
|
+
".vscode",
|
|
109
|
+
"*.egg-info",
|
|
110
|
+
".DS_Store",
|
|
111
|
+
"Thumbs.db",
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
# File patterns to ignore
|
|
115
|
+
DEFAULT_IGNORE_FILES = [
|
|
116
|
+
"*.pyc",
|
|
117
|
+
"*.pyo",
|
|
118
|
+
"*.pyd",
|
|
119
|
+
"*.so",
|
|
120
|
+
"*.dll",
|
|
121
|
+
"*.dylib",
|
|
122
|
+
"*.exe",
|
|
123
|
+
"*.bin",
|
|
124
|
+
"*.obj",
|
|
125
|
+
"*.o",
|
|
126
|
+
"*.a",
|
|
127
|
+
"*.lib",
|
|
128
|
+
"*.jar",
|
|
129
|
+
"*.war",
|
|
130
|
+
"*.ear",
|
|
131
|
+
"*.zip",
|
|
132
|
+
"*.tar",
|
|
133
|
+
"*.gz",
|
|
134
|
+
"*.bz2",
|
|
135
|
+
"*.xz",
|
|
136
|
+
"*.7z",
|
|
137
|
+
"*.rar",
|
|
138
|
+
"*.iso",
|
|
139
|
+
"*.dmg",
|
|
140
|
+
"*.img",
|
|
141
|
+
"*.log",
|
|
142
|
+
"*.tmp",
|
|
143
|
+
"*.temp",
|
|
144
|
+
"*.cache",
|
|
145
|
+
"*.lock",
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
def get_default_config_path(project_root: Path) -> Path:
|
|
149
|
+
"""Get the default configuration file path for a project."""
|
|
150
|
+
return project_root / ".mcp-vector-search" / "config.json"
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def get_default_index_path(project_root: Path) -> Path:
|
|
154
|
+
"""Get the default index directory path for a project."""
|
|
155
|
+
return project_root / ".mcp-vector-search"
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def get_default_cache_path(project_root: Path) -> Path:
|
|
159
|
+
"""Get the default cache directory path for a project."""
|
|
160
|
+
return project_root / ".mcp-vector-search" / "cache"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def get_language_from_extension(extension: str) -> str:
|
|
164
|
+
"""Get the language name from file extension."""
|
|
165
|
+
return LANGUAGE_MAPPINGS.get(extension.lower(), "text")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_similarity_threshold(language: str) -> float:
|
|
169
|
+
"""Get the default similarity threshold for a language."""
|
|
170
|
+
return DEFAULT_SIMILARITY_THRESHOLDS.get(language.lower(), DEFAULT_SIMILARITY_THRESHOLDS["default"])
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def get_chunk_size(language: str) -> int:
|
|
174
|
+
"""Get the default chunk size for a language."""
|
|
175
|
+
return DEFAULT_CHUNK_SIZES.get(language.lower(), DEFAULT_CHUNK_SIZES["default"])
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Pydantic configuration schemas for MCP Vector Search."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import Field, validator
|
|
7
|
+
from pydantic_settings import BaseSettings
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ProjectConfig(BaseSettings):
|
|
11
|
+
"""Type-safe project configuration with validation."""
|
|
12
|
+
|
|
13
|
+
project_root: Path = Field(..., description="Project root directory")
|
|
14
|
+
index_path: Path = Field(
|
|
15
|
+
default=".mcp-vector-search", description="Index storage path"
|
|
16
|
+
)
|
|
17
|
+
file_extensions: List[str] = Field(
|
|
18
|
+
default=[".py", ".js", ".ts", ".jsx", ".tsx"],
|
|
19
|
+
description="File extensions to index",
|
|
20
|
+
)
|
|
21
|
+
embedding_model: str = Field(
|
|
22
|
+
default="microsoft/codebert-base", description="Embedding model name"
|
|
23
|
+
)
|
|
24
|
+
similarity_threshold: float = Field(
|
|
25
|
+
default=0.75, ge=0.0, le=1.0, description="Similarity threshold"
|
|
26
|
+
)
|
|
27
|
+
max_chunk_size: int = Field(
|
|
28
|
+
default=512, gt=0, description="Maximum chunk size in tokens"
|
|
29
|
+
)
|
|
30
|
+
languages: List[str] = Field(default=[], description="Detected programming languages")
|
|
31
|
+
watch_files: bool = Field(
|
|
32
|
+
default=False, description="Enable file watching for incremental updates"
|
|
33
|
+
)
|
|
34
|
+
cache_embeddings: bool = Field(
|
|
35
|
+
default=True, description="Enable embedding caching"
|
|
36
|
+
)
|
|
37
|
+
max_cache_size: int = Field(
|
|
38
|
+
default=1000, gt=0, description="Maximum number of cached embeddings"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
@validator("project_root", "index_path")
|
|
42
|
+
def validate_paths(cls, v: Path) -> Path:
|
|
43
|
+
"""Ensure paths are absolute and normalized."""
|
|
44
|
+
return v.resolve()
|
|
45
|
+
|
|
46
|
+
@validator("file_extensions")
|
|
47
|
+
def validate_extensions(cls, v: List[str]) -> List[str]:
|
|
48
|
+
"""Ensure extensions start with dot."""
|
|
49
|
+
return [ext if ext.startswith(".") else f".{ext}" for ext in v]
|
|
50
|
+
|
|
51
|
+
class Config:
|
|
52
|
+
env_prefix = "MCP_VECTOR_SEARCH_"
|
|
53
|
+
case_sensitive = False
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class DatabaseConfig(BaseSettings):
|
|
57
|
+
"""Database configuration settings."""
|
|
58
|
+
|
|
59
|
+
persist_directory: Optional[Path] = Field(
|
|
60
|
+
default=None, description="ChromaDB persistence directory"
|
|
61
|
+
)
|
|
62
|
+
collection_name: str = Field(
|
|
63
|
+
default="code_search", description="ChromaDB collection name"
|
|
64
|
+
)
|
|
65
|
+
batch_size: int = Field(
|
|
66
|
+
default=32, gt=0, description="Batch size for embedding operations"
|
|
67
|
+
)
|
|
68
|
+
enable_telemetry: bool = Field(
|
|
69
|
+
default=False, description="Enable ChromaDB telemetry"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@validator("persist_directory")
|
|
73
|
+
def validate_persist_directory(cls, v: Optional[Path]) -> Optional[Path]:
|
|
74
|
+
"""Ensure persist directory is absolute if provided."""
|
|
75
|
+
return v.resolve() if v else None
|
|
76
|
+
|
|
77
|
+
class Config:
|
|
78
|
+
env_prefix = "MCP_VECTOR_SEARCH_DB_"
|
|
79
|
+
case_sensitive = False
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class SearchConfig(BaseSettings):
|
|
83
|
+
"""Search configuration settings."""
|
|
84
|
+
|
|
85
|
+
default_limit: int = Field(
|
|
86
|
+
default=10, gt=0, description="Default number of search results"
|
|
87
|
+
)
|
|
88
|
+
max_limit: int = Field(
|
|
89
|
+
default=100, gt=0, description="Maximum number of search results"
|
|
90
|
+
)
|
|
91
|
+
enable_reranking: bool = Field(
|
|
92
|
+
default=True, description="Enable result reranking"
|
|
93
|
+
)
|
|
94
|
+
context_lines: int = Field(
|
|
95
|
+
default=3, ge=0, description="Number of context lines to include"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
@validator("max_limit")
|
|
99
|
+
def validate_max_limit(cls, v: int, values: dict) -> int:
|
|
100
|
+
"""Ensure max_limit is greater than default_limit."""
|
|
101
|
+
default_limit = values.get("default_limit", 10)
|
|
102
|
+
if v < default_limit:
|
|
103
|
+
raise ValueError("max_limit must be greater than or equal to default_limit")
|
|
104
|
+
return v
|
|
105
|
+
|
|
106
|
+
class Config:
|
|
107
|
+
env_prefix = "MCP_VECTOR_SEARCH_SEARCH_"
|
|
108
|
+
case_sensitive = False
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core functionality for MCP Vector Search."""
|