mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (49) hide show
  1. mcp_vector_search/__init__.py +3 -2
  2. mcp_vector_search/cli/commands/auto_index.py +397 -0
  3. mcp_vector_search/cli/commands/config.py +88 -40
  4. mcp_vector_search/cli/commands/index.py +198 -52
  5. mcp_vector_search/cli/commands/init.py +471 -58
  6. mcp_vector_search/cli/commands/install.py +284 -0
  7. mcp_vector_search/cli/commands/mcp.py +495 -0
  8. mcp_vector_search/cli/commands/search.py +241 -87
  9. mcp_vector_search/cli/commands/status.py +184 -58
  10. mcp_vector_search/cli/commands/watch.py +34 -35
  11. mcp_vector_search/cli/didyoumean.py +184 -0
  12. mcp_vector_search/cli/export.py +320 -0
  13. mcp_vector_search/cli/history.py +292 -0
  14. mcp_vector_search/cli/interactive.py +342 -0
  15. mcp_vector_search/cli/main.py +175 -27
  16. mcp_vector_search/cli/output.py +63 -45
  17. mcp_vector_search/config/defaults.py +50 -36
  18. mcp_vector_search/config/settings.py +49 -35
  19. mcp_vector_search/core/auto_indexer.py +298 -0
  20. mcp_vector_search/core/connection_pool.py +322 -0
  21. mcp_vector_search/core/database.py +335 -25
  22. mcp_vector_search/core/embeddings.py +73 -29
  23. mcp_vector_search/core/exceptions.py +19 -2
  24. mcp_vector_search/core/factory.py +310 -0
  25. mcp_vector_search/core/git_hooks.py +345 -0
  26. mcp_vector_search/core/indexer.py +237 -73
  27. mcp_vector_search/core/models.py +21 -19
  28. mcp_vector_search/core/project.py +73 -58
  29. mcp_vector_search/core/scheduler.py +330 -0
  30. mcp_vector_search/core/search.py +574 -86
  31. mcp_vector_search/core/watcher.py +48 -46
  32. mcp_vector_search/mcp/__init__.py +4 -0
  33. mcp_vector_search/mcp/__main__.py +25 -0
  34. mcp_vector_search/mcp/server.py +701 -0
  35. mcp_vector_search/parsers/base.py +30 -31
  36. mcp_vector_search/parsers/javascript.py +74 -48
  37. mcp_vector_search/parsers/python.py +57 -49
  38. mcp_vector_search/parsers/registry.py +47 -32
  39. mcp_vector_search/parsers/text.py +179 -0
  40. mcp_vector_search/utils/__init__.py +40 -0
  41. mcp_vector_search/utils/gitignore.py +229 -0
  42. mcp_vector_search/utils/timing.py +334 -0
  43. mcp_vector_search/utils/version.py +47 -0
  44. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/METADATA +173 -7
  45. mcp_vector_search-0.4.12.dist-info/RECORD +54 -0
  46. mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
  47. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/WHEEL +0 -0
  48. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/entry_points.txt +0 -0
  49. {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.12.dist-info}/licenses/LICENSE +0 -0
@@ -1,21 +1,35 @@
1
1
  """Main CLI application for MCP Vector Search."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import Optional
5
4
 
6
5
  import typer
7
6
  from loguru import logger
8
7
  from rich.console import Console
9
8
  from rich.traceback import install
10
9
 
11
- from .. import __version__
10
+ from .. import __build__, __version__
11
+ from .commands.auto_index import auto_index_app
12
12
  from .commands.config import config_app
13
13
  from .commands.index import index_app
14
- from .commands.init import init_app
15
- from .commands.search import search_app
14
+ from .commands.init import (
15
+ init_app,
16
+ main as init_main,
17
+ check_initialization as init_check,
18
+ init_mcp_integration,
19
+ list_embedding_models,
20
+ )
21
+ from .commands.install import install_app
22
+ from .commands.mcp import mcp_app
23
+ from .commands.search import (
24
+ search_app,
25
+ search_context_cmd,
26
+ search_main,
27
+ search_similar_cmd,
28
+ )
16
29
  from .commands.status import status_app
17
30
  from .commands.watch import app as watch_app
18
- from .output import setup_logging
31
+ from .didyoumean import create_enhanced_typer, add_common_suggestions
32
+ from .output import print_error, setup_logging
19
33
 
20
34
  # Install rich traceback handler
21
35
  install(show_locals=True)
@@ -23,21 +37,132 @@ install(show_locals=True)
23
37
  # Create console for rich output
24
38
  console = Console()
25
39
 
26
- # Create main Typer app
27
- app = typer.Typer(
40
+ # Create main Typer app with "did you mean" functionality
41
+ app = create_enhanced_typer(
28
42
  name="mcp-vector-search",
29
43
  help="CLI-first semantic code search with MCP integration",
30
44
  add_completion=False,
31
45
  rich_markup_mode="rich",
32
46
  )
33
47
 
34
- # Add subcommands
35
- app.add_typer(init_app, name="init", help="Initialize project for semantic search")
48
+ # Add install command directly (not as subcommand app)
49
+ from .commands.install import main as install_main, demo as install_demo
50
+ from .commands.status import main as status_main
51
+ app.command("install", help="🚀 Install mcp-vector-search in projects")(install_main)
52
+ app.command("demo", help="🎬 Run installation demo with sample project")(install_demo)
53
+ app.command("status", help="📊 Show project status and statistics")(status_main)
54
+ # Register init as a direct command
55
+ app.command("init", help="🔧 Initialize project for semantic search")(init_main)
56
+ # Add init subcommands as separate commands
57
+ app.command("init-check", help="Check if project is initialized")(init_check)
58
+ app.command("init-mcp", help="Install/fix Claude Code MCP integration")(init_mcp_integration)
59
+ app.command("init-models", help="List available embedding models")(list_embedding_models)
36
60
  app.add_typer(index_app, name="index", help="Index codebase for semantic search")
37
- app.add_typer(search_app, name="search", help="Search code semantically")
38
- app.add_typer(status_app, name="status", help="Show project status and statistics")
39
61
  app.add_typer(config_app, name="config", help="Manage project configuration")
40
62
  app.add_typer(watch_app, name="watch", help="Watch for file changes and update index")
63
+ app.add_typer(auto_index_app, name="auto-index", help="Manage automatic indexing")
64
+ app.add_typer(mcp_app, name="mcp", help="Manage Claude Code MCP integration")
65
+
66
+ # Add search command - simplified syntax as default
67
+ app.command("search", help="Search code semantically")(search_main)
68
+
69
+ # Keep old nested structure for backward compatibility
70
+ app.add_typer(search_app, name="search-legacy", help="Legacy search commands", hidden=True)
71
+ app.add_typer(status_app, name="status-legacy", help="Legacy status commands", hidden=True)
72
+ app.command("find", help="Search code semantically (alias for search)")(search_main)
73
+ app.command("search-similar", help="Find code similar to a specific file or function")(
74
+ search_similar_cmd
75
+ )
76
+ app.command("search-context", help="Search for code based on contextual description")(
77
+ search_context_cmd
78
+ )
79
+
80
+
81
+ # Add interactive search command
82
+ @app.command("interactive")
83
+ def interactive_search(
84
+ ctx: typer.Context,
85
+ project_root: Path | None = typer.Option(
86
+ None, "--project-root", "-p", help="Project root directory"
87
+ ),
88
+ ) -> None:
89
+ """Start an interactive search session with filtering and refinement."""
90
+ import asyncio
91
+
92
+ from .interactive import start_interactive_search
93
+
94
+ root = project_root or ctx.obj.get("project_root") or Path.cwd()
95
+
96
+ try:
97
+ asyncio.run(start_interactive_search(root))
98
+ except KeyboardInterrupt:
99
+ console.print("\n[yellow]Interactive search cancelled[/yellow]")
100
+ except Exception as e:
101
+ print_error(f"Interactive search failed: {e}")
102
+ raise typer.Exit(1)
103
+
104
+
105
+ # Add history management commands
106
+ @app.command("history")
107
+ def show_history(
108
+ ctx: typer.Context,
109
+ limit: int = typer.Option(20, "--limit", "-l", help="Number of entries to show"),
110
+ project_root: Path | None = typer.Option(
111
+ None, "--project-root", "-p", help="Project root directory"
112
+ ),
113
+ ) -> None:
114
+ """Show search history."""
115
+ from .history import show_search_history
116
+
117
+ root = project_root or ctx.obj.get("project_root") or Path.cwd()
118
+ show_search_history(root, limit)
119
+
120
+
121
+ @app.command("favorites")
122
+ def show_favorites_cmd(
123
+ ctx: typer.Context,
124
+ project_root: Path | None = typer.Option(
125
+ None, "--project-root", "-p", help="Project root directory"
126
+ ),
127
+ ) -> None:
128
+ """Show favorite queries."""
129
+ from .history import show_favorites
130
+
131
+ root = project_root or ctx.obj.get("project_root") or Path.cwd()
132
+ show_favorites(root)
133
+
134
+
135
+ @app.command("add-favorite")
136
+ def add_favorite(
137
+ ctx: typer.Context,
138
+ query: str = typer.Argument(..., help="Query to add to favorites"),
139
+ description: str | None = typer.Option(None, "--desc", help="Optional description"),
140
+ project_root: Path | None = typer.Option(
141
+ None, "--project-root", "-p", help="Project root directory"
142
+ ),
143
+ ) -> None:
144
+ """Add a query to favorites."""
145
+ from .history import SearchHistory
146
+
147
+ root = project_root or ctx.obj.get("project_root") or Path.cwd()
148
+ history_manager = SearchHistory(root)
149
+ history_manager.add_favorite(query, description)
150
+
151
+
152
+ @app.command("remove-favorite")
153
+ def remove_favorite(
154
+ ctx: typer.Context,
155
+ query: str = typer.Argument(..., help="Query to remove from favorites"),
156
+ project_root: Path | None = typer.Option(
157
+ None, "--project-root", "-p", help="Project root directory"
158
+ ),
159
+ ) -> None:
160
+ """Remove a query from favorites."""
161
+ from .history import SearchHistory
162
+
163
+ root = project_root or ctx.obj.get("project_root") or Path.cwd()
164
+ history_manager = SearchHistory(root)
165
+ history_manager.remove_favorite(query)
41
166
 
42
167
 
43
168
  @app.callback()
@@ -46,13 +171,9 @@ def main(
46
171
  version: bool = typer.Option(
47
172
  False, "--version", "-v", help="Show version and exit"
48
173
  ),
49
- verbose: bool = typer.Option(
50
- False, "--verbose", help="Enable verbose logging"
51
- ),
52
- quiet: bool = typer.Option(
53
- False, "--quiet", help="Suppress non-error output"
54
- ),
55
- project_root: Optional[Path] = typer.Option(
174
+ verbose: bool = typer.Option(False, "--verbose", help="Enable verbose logging"),
175
+ quiet: bool = typer.Option(False, "--quiet", help="Suppress non-error output"),
176
+ project_root: Path | None = typer.Option(
56
177
  None,
57
178
  "--project-root",
58
179
  "-p",
@@ -64,16 +185,16 @@ def main(
64
185
  ),
65
186
  ) -> None:
66
187
  """MCP Vector Search - CLI-first semantic code search with MCP integration.
67
-
188
+
68
189
  A modern, lightweight tool for semantic code search using ChromaDB and Tree-sitter.
69
190
  Designed for local development with optional MCP server integration.
70
191
  """
71
192
  if version:
72
- console.print(f"mcp-vector-search version {__version__}")
193
+ console.print(f"mcp-vector-search version {__version__} (build {__build__})")
73
194
  raise typer.Exit()
74
195
 
75
196
  # Setup logging
76
- log_level = "DEBUG" if verbose else "WARNING" if quiet else "INFO"
197
+ log_level = "DEBUG" if verbose else "ERROR" if quiet else "WARNING"
77
198
  setup_logging(log_level)
78
199
 
79
200
  # Store global options in context
@@ -83,7 +204,7 @@ def main(
83
204
  ctx.obj["project_root"] = project_root
84
205
 
85
206
  if verbose:
86
- logger.info(f"MCP Vector Search v{__version__}")
207
+ logger.info(f"MCP Vector Search v{__version__} (build {__build__})")
87
208
  if project_root:
88
209
  logger.info(f"Using project root: {project_root}")
89
210
 
@@ -91,26 +212,53 @@ def main(
91
212
  @app.command()
92
213
  def version() -> None:
93
214
  """Show version information."""
94
- console.print(f"[bold blue]mcp-vector-search[/bold blue] version [green]{__version__}[/green]")
215
+ console.print(
216
+ f"[bold blue]mcp-vector-search[/bold blue] version [green]{__version__}[/green] [dim](build {__build__})[/dim]"
217
+ )
95
218
  console.print("\n[dim]CLI-first semantic code search with MCP integration[/dim]")
96
219
  console.print("[dim]Built with ChromaDB, Tree-sitter, and modern Python[/dim]")
97
220
 
98
221
 
222
+ def handle_command_error(ctx, param, value):
223
+ """Handle command errors with suggestions."""
224
+ if ctx.resilient_parsing:
225
+ return
226
+
227
+ # This will be called when a command is not found
228
+ import click
229
+ try:
230
+ return value
231
+ except click.UsageError as e:
232
+ if "No such command" in str(e):
233
+ # Extract the command name from the error
234
+ import re
235
+ match = re.search(r"No such command '([^']+)'", str(e))
236
+ if match:
237
+ command_name = match.group(1)
238
+ add_common_suggestions(ctx, command_name)
239
+ raise
240
+
241
+
99
242
  @app.command()
100
243
  def doctor() -> None:
101
244
  """Check system dependencies and configuration."""
102
245
  from .commands.status import check_dependencies
103
-
246
+
104
247
  console.print("[bold blue]MCP Vector Search - System Check[/bold blue]\n")
105
-
248
+
106
249
  # Check dependencies
107
250
  deps_ok = check_dependencies()
108
-
251
+
109
252
  if deps_ok:
110
253
  console.print("\n[green]✓ All dependencies are available[/green]")
111
254
  else:
112
255
  console.print("\n[red]✗ Some dependencies are missing[/red]")
113
- console.print("Run [code]pip install mcp-vector-search[/code] to install missing dependencies")
256
+ console.print(
257
+ "Run [code]pip install mcp-vector-search[/code] to install missing dependencies"
258
+ )
259
+
260
+
261
+
114
262
 
115
263
 
116
264
  if __name__ == "__main__":
@@ -2,7 +2,7 @@
2
2
 
3
3
  import sys
4
4
  from pathlib import Path
5
- from typing import Any, Dict, List, Optional
5
+ from typing import Any
6
6
 
7
7
  from loguru import logger
8
8
  from rich.console import Console
@@ -17,7 +17,6 @@ from rich.progress import (
17
17
  )
18
18
  from rich.syntax import Syntax
19
19
  from rich.table import Table
20
- from rich.text import Text
21
20
 
22
21
  from ..core.models import ProjectInfo, SearchResult
23
22
 
@@ -25,22 +24,32 @@ from ..core.models import ProjectInfo, SearchResult
25
24
  console = Console()
26
25
 
27
26
 
28
- def setup_logging(level: str = "INFO") -> None:
27
+ def setup_logging(level: str = "WARNING") -> None:
29
28
  """Setup structured logging with rich formatting.
30
-
29
+
31
30
  Args:
32
31
  level: Log level (DEBUG, INFO, WARNING, ERROR)
33
32
  """
34
- # Remove default handler
33
+ # Remove all existing handlers
35
34
  logger.remove()
36
-
37
- # Add console handler with rich formatting
38
- logger.add(
39
- sys.stderr,
40
- level=level,
41
- format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
42
- colorize=True,
43
- )
35
+
36
+ # Only add console handler if level is DEBUG or INFO
37
+ # For WARNING and ERROR, we want minimal output
38
+ if level in ["DEBUG", "INFO"]:
39
+ logger.add(
40
+ sys.stderr,
41
+ level=level,
42
+ format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
43
+ colorize=True,
44
+ )
45
+ else:
46
+ # For WARNING and ERROR, use minimal format and only show WARNING+ messages
47
+ logger.add(
48
+ sys.stderr,
49
+ level=level,
50
+ format="<level>{level}</level>: <level>{message}</level>",
51
+ colorize=True,
52
+ )
44
53
 
45
54
 
46
55
  def print_success(message: str) -> None:
@@ -80,20 +89,25 @@ def print_project_info(project_info: ProjectInfo) -> None:
80
89
  table = Table(title="Project Information", show_header=False)
81
90
  table.add_column("Property", style="cyan", no_wrap=True)
82
91
  table.add_column("Value", style="white")
83
-
92
+
84
93
  table.add_row("Name", project_info.name)
85
94
  table.add_row("Root Path", str(project_info.root_path))
86
95
  table.add_row("Config Path", str(project_info.config_path))
87
96
  table.add_row("Index Path", str(project_info.index_path))
88
97
  table.add_row("Initialized", "✓" if project_info.is_initialized else "✗")
89
- table.add_row("Languages", ", ".join(project_info.languages) if project_info.languages else "None detected")
98
+ table.add_row(
99
+ "Languages",
100
+ ", ".join(project_info.languages)
101
+ if project_info.languages
102
+ else "None detected",
103
+ )
90
104
  table.add_row("Indexable Files", str(project_info.file_count))
91
-
105
+
92
106
  console.print(table)
93
107
 
94
108
 
95
109
  def print_search_results(
96
- results: List[SearchResult],
110
+ results: list[SearchResult],
97
111
  query: str,
98
112
  show_content: bool = True,
99
113
  max_content_lines: int = 10,
@@ -102,10 +116,12 @@ def print_search_results(
102
116
  if not results:
103
117
  print_warning(f"No results found for query: '{query}'")
104
118
  return
105
-
106
- console.print(f"\n[bold blue]Search Results for:[/bold blue] [green]'{query}'[/green]")
119
+
120
+ console.print(
121
+ f"\n[bold blue]Search Results for:[/bold blue] [green]'{query}'[/green]"
122
+ )
107
123
  console.print(f"[dim]Found {len(results)} results[/dim]\n")
108
-
124
+
109
125
  for i, result in enumerate(results, 1):
110
126
  # Create result header
111
127
  header = f"[bold]{i}. {result.file_path.name}[/bold]"
@@ -113,23 +129,23 @@ def print_search_results(
113
129
  header += f" → [cyan]{result.function_name}()[/cyan]"
114
130
  if result.class_name:
115
131
  header += f" in [yellow]{result.class_name}[/yellow]"
116
-
132
+
117
133
  # Add location and similarity
118
134
  location = f"[dim]{result.location}[/dim]"
119
135
  similarity = f"[green]{result.similarity_score:.2%}[/green]"
120
-
136
+
121
137
  console.print(f"{header}")
122
138
  console.print(f" {location} | Similarity: {similarity}")
123
-
139
+
124
140
  # Show code content if requested
125
141
  if show_content and result.content:
126
142
  content_lines = result.content.splitlines()
127
143
  if len(content_lines) > max_content_lines:
128
144
  content_lines = content_lines[:max_content_lines]
129
145
  content_lines.append("...")
130
-
146
+
131
147
  content = "\n".join(content_lines)
132
-
148
+
133
149
  # Create syntax-highlighted code block
134
150
  syntax = Syntax(
135
151
  content,
@@ -139,52 +155,52 @@ def print_search_results(
139
155
  start_line=result.start_line,
140
156
  word_wrap=True,
141
157
  )
142
-
158
+
143
159
  console.print(Panel(syntax, border_style="dim"))
144
-
160
+
145
161
  console.print() # Empty line between results
146
162
 
147
163
 
148
- def print_index_stats(stats: Dict[str, Any]) -> None:
164
+ def print_index_stats(stats: dict[str, Any]) -> None:
149
165
  """Print indexing statistics."""
150
166
  table = Table(title="Index Statistics", show_header=False)
151
167
  table.add_column("Metric", style="cyan", no_wrap=True)
152
168
  table.add_column("Value", style="white")
153
-
169
+
154
170
  table.add_row("Total Files", str(stats.get("total_indexable_files", 0)))
155
171
  table.add_row("Indexed Files", str(stats.get("indexed_files", 0)))
156
172
  table.add_row("Total Chunks", str(stats.get("total_chunks", 0)))
157
-
173
+
158
174
  # Language distribution
159
175
  languages = stats.get("languages", {})
160
176
  if languages:
161
177
  lang_str = ", ".join(f"{lang}: {count}" for lang, count in languages.items())
162
178
  table.add_row("Languages", lang_str)
163
-
179
+
164
180
  # File extensions
165
181
  extensions = stats.get("file_extensions", [])
166
182
  if extensions:
167
183
  table.add_row("Extensions", ", ".join(extensions))
168
-
184
+
169
185
  console.print(table)
170
186
 
171
187
 
172
- def print_config(config_dict: Dict[str, Any]) -> None:
188
+ def print_config(config_dict: dict[str, Any]) -> None:
173
189
  """Print configuration in a formatted table."""
174
190
  table = Table(title="Configuration", show_header=False)
175
191
  table.add_column("Setting", style="cyan", no_wrap=True)
176
192
  table.add_column("Value", style="white")
177
-
193
+
178
194
  for key, value in config_dict.items():
179
- if isinstance(value, (list, dict)):
195
+ if isinstance(value, list | dict):
180
196
  value_str = str(value)
181
197
  elif isinstance(value, Path):
182
198
  value_str = str(value)
183
199
  else:
184
200
  value_str = str(value)
185
-
201
+
186
202
  table.add_row(key.replace("_", " ").title(), value_str)
187
-
203
+
188
204
  console.print(table)
189
205
 
190
206
 
@@ -192,10 +208,10 @@ def confirm_action(message: str, default: bool = False) -> bool:
192
208
  """Ask for user confirmation."""
193
209
  default_str = "Y/n" if default else "y/N"
194
210
  response = console.input(f"{message} [{default_str}]: ").strip().lower()
195
-
211
+
196
212
  if not response:
197
213
  return default
198
-
214
+
199
215
  return response in ("y", "yes", "true", "1")
200
216
 
201
217
 
@@ -208,7 +224,7 @@ def print_banner() -> None:
208
224
  console.print(Panel(banner.strip(), border_style="blue"))
209
225
 
210
226
 
211
- def format_file_path(file_path: Path, project_root: Optional[Path] = None) -> str:
227
+ def format_file_path(file_path: Path, project_root: Path | None = None) -> str:
212
228
  """Format file path for display (relative to project root if possible)."""
213
229
  if project_root:
214
230
  try:
@@ -216,11 +232,13 @@ def format_file_path(file_path: Path, project_root: Optional[Path] = None) -> st
216
232
  return str(relative_path)
217
233
  except ValueError:
218
234
  pass
219
-
235
+
220
236
  return str(file_path)
221
237
 
222
238
 
223
- def print_dependency_status(name: str, available: bool, version: Optional[str] = None) -> None:
239
+ def print_dependency_status(
240
+ name: str, available: bool, version: str | None = None
241
+ ) -> None:
224
242
  """Print dependency status."""
225
243
  if available:
226
244
  version_str = f" ({version})" if version else ""
@@ -229,13 +247,13 @@ def print_dependency_status(name: str, available: bool, version: Optional[str] =
229
247
  console.print(f"[red]✗[/red] {name} - Not available")
230
248
 
231
249
 
232
- def print_json(data: Any, title: Optional[str] = None) -> None:
250
+ def print_json(data: Any, title: str | None = None) -> None:
233
251
  """Print data as formatted JSON."""
234
252
  import json
235
-
253
+
236
254
  json_str = json.dumps(data, indent=2, default=str)
237
255
  syntax = Syntax(json_str, "json", theme="monokai")
238
-
256
+
239
257
  if title:
240
258
  console.print(Panel(syntax, title=title, border_style="blue"))
241
259
  else:
@@ -1,36 +1,38 @@
1
1
  """Default configurations for MCP Vector Search."""
2
2
 
3
3
  from pathlib import Path
4
- from typing import Dict, List
5
4
 
6
5
  # Default file extensions to index (prioritize supported languages)
7
6
  DEFAULT_FILE_EXTENSIONS = [
8
- ".py", # Python (fully supported)
9
- ".js", # JavaScript (fully supported)
10
- ".ts", # TypeScript (fully supported)
11
- ".jsx", # React JSX (fully supported)
12
- ".tsx", # React TSX (fully supported)
13
- ".mjs", # ES6 modules (fully supported)
14
- ".java", # Java (fallback parsing)
15
- ".cpp", # C++ (fallback parsing)
16
- ".c", # C (fallback parsing)
17
- ".h", # C/C++ headers (fallback parsing)
18
- ".hpp", # C++ headers (fallback parsing)
19
- ".cs", # C# (fallback parsing)
20
- ".go", # Go (fallback parsing)
21
- ".rs", # Rust (fallback parsing)
22
- ".php", # PHP (fallback parsing)
23
- ".rb", # Ruby (fallback parsing)
24
- ".swift", # Swift (fallback parsing)
25
- ".kt", # Kotlin (fallback parsing)
26
- ".scala", # Scala (fallback parsing)
27
- ".sh", # Shell scripts (fallback parsing)
28
- ".bash", # Bash scripts (fallback parsing)
29
- ".zsh", # Zsh scripts (fallback parsing)
7
+ ".py", # Python (fully supported)
8
+ ".js", # JavaScript (fully supported)
9
+ ".ts", # TypeScript (fully supported)
10
+ ".jsx", # React JSX (fully supported)
11
+ ".tsx", # React TSX (fully supported)
12
+ ".mjs", # ES6 modules (fully supported)
13
+ ".java", # Java (fallback parsing)
14
+ ".cpp", # C++ (fallback parsing)
15
+ ".c", # C (fallback parsing)
16
+ ".h", # C/C++ headers (fallback parsing)
17
+ ".hpp", # C++ headers (fallback parsing)
18
+ ".cs", # C# (fallback parsing)
19
+ ".go", # Go (fallback parsing)
20
+ ".rs", # Rust (fallback parsing)
21
+ ".php", # PHP (fallback parsing)
22
+ ".rb", # Ruby (fallback parsing)
23
+ ".swift", # Swift (fallback parsing)
24
+ ".kt", # Kotlin (fallback parsing)
25
+ ".scala", # Scala (fallback parsing)
26
+ ".sh", # Shell scripts (fallback parsing)
27
+ ".bash", # Bash scripts (fallback parsing)
28
+ ".zsh", # Zsh scripts (fallback parsing)
29
+ ".json", # JSON configuration files
30
+ ".md", # Markdown documentation
31
+ ".txt", # Plain text files
30
32
  ]
31
33
 
32
34
  # Language mappings for parsers
33
- LANGUAGE_MAPPINGS: Dict[str, str] = {
35
+ LANGUAGE_MAPPINGS: dict[str, str] = {
34
36
  ".py": "python",
35
37
  ".pyw": "python",
36
38
  ".js": "javascript",
@@ -54,27 +56,33 @@ LANGUAGE_MAPPINGS: Dict[str, str] = {
54
56
  ".sh": "bash",
55
57
  ".bash": "bash",
56
58
  ".zsh": "bash",
59
+ ".json": "json",
60
+ ".md": "markdown",
61
+ ".txt": "text",
57
62
  }
58
63
 
59
64
  # Default embedding models by use case
60
65
  DEFAULT_EMBEDDING_MODELS = {
61
- "code": "microsoft/codebert-base",
66
+ "code": "sentence-transformers/all-MiniLM-L6-v2", # Changed from microsoft/codebert-base which doesn't exist
62
67
  "multilingual": "sentence-transformers/all-MiniLM-L6-v2",
63
68
  "fast": "sentence-transformers/all-MiniLM-L12-v2",
64
- "precise": "microsoft/unixcoder-base",
69
+ "precise": "sentence-transformers/all-mpnet-base-v2", # Changed from microsoft/unixcoder-base
65
70
  }
66
71
 
67
72
  # Default similarity thresholds by language
68
73
  DEFAULT_SIMILARITY_THRESHOLDS = {
69
- "python": 0.75,
70
- "javascript": 0.70,
71
- "typescript": 0.70,
72
- "java": 0.75,
73
- "cpp": 0.70,
74
- "c": 0.70,
75
- "go": 0.75,
76
- "rust": 0.75,
77
- "default": 0.70,
74
+ "python": 0.3,
75
+ "javascript": 0.3,
76
+ "typescript": 0.3,
77
+ "java": 0.3,
78
+ "cpp": 0.3,
79
+ "c": 0.3,
80
+ "go": 0.3,
81
+ "rust": 0.3,
82
+ "json": 0.4, # JSON files may have more structural similarity
83
+ "markdown": 0.3, # Markdown documentation
84
+ "text": 0.3, # Plain text files
85
+ "default": 0.3,
78
86
  }
79
87
 
80
88
  # Default chunk sizes by language (in tokens)
@@ -87,6 +95,9 @@ DEFAULT_CHUNK_SIZES = {
87
95
  "c": 384,
88
96
  "go": 512,
89
97
  "rust": 512,
98
+ "json": 256, # JSON files are often smaller and more structured
99
+ "markdown": 512, # Markdown documentation can be chunked normally
100
+ "text": 384, # Plain text files with paragraph-based chunking
90
101
  "default": 512,
91
102
  }
92
103
 
@@ -145,6 +156,7 @@ DEFAULT_IGNORE_FILES = [
145
156
  "*.lock",
146
157
  ]
147
158
 
159
+
148
160
  def get_default_config_path(project_root: Path) -> Path:
149
161
  """Get the default configuration file path for a project."""
150
162
  return project_root / ".mcp-vector-search" / "config.json"
@@ -167,7 +179,9 @@ def get_language_from_extension(extension: str) -> str:
167
179
 
168
180
  def get_similarity_threshold(language: str) -> float:
169
181
  """Get the default similarity threshold for a language."""
170
- return DEFAULT_SIMILARITY_THRESHOLDS.get(language.lower(), DEFAULT_SIMILARITY_THRESHOLDS["default"])
182
+ return DEFAULT_SIMILARITY_THRESHOLDS.get(
183
+ language.lower(), DEFAULT_SIMILARITY_THRESHOLDS["default"]
184
+ )
171
185
 
172
186
 
173
187
  def get_chunk_size(language: str) -> int: