mcp-vector-search 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (35) hide show
  1. mcp_vector_search/__init__.py +9 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/config.py +303 -0
  5. mcp_vector_search/cli/commands/index.py +304 -0
  6. mcp_vector_search/cli/commands/init.py +212 -0
  7. mcp_vector_search/cli/commands/search.py +395 -0
  8. mcp_vector_search/cli/commands/status.py +340 -0
  9. mcp_vector_search/cli/commands/watch.py +288 -0
  10. mcp_vector_search/cli/main.py +117 -0
  11. mcp_vector_search/cli/output.py +242 -0
  12. mcp_vector_search/config/__init__.py +1 -0
  13. mcp_vector_search/config/defaults.py +175 -0
  14. mcp_vector_search/config/settings.py +108 -0
  15. mcp_vector_search/core/__init__.py +1 -0
  16. mcp_vector_search/core/database.py +431 -0
  17. mcp_vector_search/core/embeddings.py +250 -0
  18. mcp_vector_search/core/exceptions.py +66 -0
  19. mcp_vector_search/core/indexer.py +310 -0
  20. mcp_vector_search/core/models.py +174 -0
  21. mcp_vector_search/core/project.py +304 -0
  22. mcp_vector_search/core/search.py +324 -0
  23. mcp_vector_search/core/watcher.py +320 -0
  24. mcp_vector_search/mcp/__init__.py +1 -0
  25. mcp_vector_search/parsers/__init__.py +1 -0
  26. mcp_vector_search/parsers/base.py +180 -0
  27. mcp_vector_search/parsers/javascript.py +238 -0
  28. mcp_vector_search/parsers/python.py +407 -0
  29. mcp_vector_search/parsers/registry.py +187 -0
  30. mcp_vector_search/py.typed +1 -0
  31. mcp_vector_search-0.0.3.dist-info/METADATA +333 -0
  32. mcp_vector_search-0.0.3.dist-info/RECORD +35 -0
  33. mcp_vector_search-0.0.3.dist-info/WHEEL +4 -0
  34. mcp_vector_search-0.0.3.dist-info/entry_points.txt +2 -0
  35. mcp_vector_search-0.0.3.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,9 @@
1
+ """MCP Vector Search - CLI-first semantic code search with MCP integration."""
2
+
3
+ __version__ = "0.0.3"
4
+ __author__ = "Robert Matsuoka"
5
+ __email__ = "bobmatnyc@gmail.com"
6
+
7
+ from .core.exceptions import MCPVectorSearchError
8
+
9
+ __all__ = ["MCPVectorSearchError", "__version__"]
@@ -0,0 +1 @@
1
+ """CLI module for MCP Vector Search."""
@@ -0,0 +1 @@
1
+ """CLI commands for MCP Vector Search."""
@@ -0,0 +1,303 @@
1
+ """Config command for MCP Vector Search CLI."""
2
+
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ import typer
7
+ from loguru import logger
8
+
9
+ from ...core.exceptions import ConfigurationError, ProjectNotFoundError
10
+ from ...core.project import ProjectManager
11
+ from ..output import (
12
+ console,
13
+ print_config,
14
+ print_error,
15
+ print_info,
16
+ print_json,
17
+ print_success,
18
+ )
19
+
20
+ # Create config subcommand app
21
+ config_app = typer.Typer(help="Manage project configuration")
22
+
23
+
24
+ @config_app.command()
25
+ def show(
26
+ ctx: typer.Context,
27
+ json_output: bool = typer.Option(
28
+ False,
29
+ "--json",
30
+ help="Output configuration in JSON format",
31
+ ),
32
+ ) -> None:
33
+ """Show current project configuration."""
34
+ try:
35
+ project_root = ctx.obj.get("project_root") or Path.cwd()
36
+ project_manager = ProjectManager(project_root)
37
+
38
+ if not project_manager.is_initialized():
39
+ raise ProjectNotFoundError(
40
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
41
+ )
42
+
43
+ config = project_manager.load_config()
44
+ config_dict = config.dict()
45
+
46
+ if json_output:
47
+ print_json(config_dict, title="Project Configuration")
48
+ else:
49
+ console.print("[bold blue]Project Configuration[/bold blue]\n")
50
+ print_config(config_dict)
51
+
52
+ except ProjectNotFoundError as e:
53
+ print_error(str(e))
54
+ raise typer.Exit(1)
55
+ except Exception as e:
56
+ logger.error(f"Failed to show configuration: {e}")
57
+ print_error(f"Failed to show configuration: {e}")
58
+ raise typer.Exit(1)
59
+
60
+
61
+ @config_app.command()
62
+ def set(
63
+ ctx: typer.Context,
64
+ key: str = typer.Argument(..., help="Configuration key to set"),
65
+ value: str = typer.Argument(..., help="Configuration value"),
66
+ ) -> None:
67
+ """Set a configuration value.
68
+
69
+ Examples:
70
+ mcp-vector-search config set similarity_threshold 0.8
71
+ mcp-vector-search config set embedding_model microsoft/unixcoder-base
72
+ mcp-vector-search config set cache_embeddings true
73
+ """
74
+ try:
75
+ project_root = ctx.obj.get("project_root") or Path.cwd()
76
+ project_manager = ProjectManager(project_root)
77
+
78
+ if not project_manager.is_initialized():
79
+ raise ProjectNotFoundError(
80
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
81
+ )
82
+
83
+ config = project_manager.load_config()
84
+
85
+ # Parse and validate the value
86
+ parsed_value = _parse_config_value(key, value)
87
+
88
+ # Update configuration
89
+ if hasattr(config, key):
90
+ setattr(config, key, parsed_value)
91
+ project_manager.save_config(config)
92
+ print_success(f"Set {key} = {parsed_value}")
93
+ else:
94
+ print_error(f"Unknown configuration key: {key}")
95
+ _show_available_keys()
96
+ raise typer.Exit(1)
97
+
98
+ except (ProjectNotFoundError, ConfigurationError) as e:
99
+ print_error(str(e))
100
+ raise typer.Exit(1)
101
+ except Exception as e:
102
+ logger.error(f"Failed to set configuration: {e}")
103
+ print_error(f"Failed to set configuration: {e}")
104
+ raise typer.Exit(1)
105
+
106
+
107
+ @config_app.command()
108
+ def get(
109
+ ctx: typer.Context,
110
+ key: str = typer.Argument(..., help="Configuration key to get"),
111
+ ) -> None:
112
+ """Get a specific configuration value."""
113
+ try:
114
+ project_root = ctx.obj.get("project_root") or Path.cwd()
115
+ project_manager = ProjectManager(project_root)
116
+
117
+ if not project_manager.is_initialized():
118
+ raise ProjectNotFoundError(
119
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
120
+ )
121
+
122
+ config = project_manager.load_config()
123
+
124
+ if hasattr(config, key):
125
+ value = getattr(config, key)
126
+ console.print(f"[cyan]{key}[/cyan]: {value}")
127
+ else:
128
+ print_error(f"Unknown configuration key: {key}")
129
+ _show_available_keys()
130
+ raise typer.Exit(1)
131
+
132
+ except ProjectNotFoundError as e:
133
+ print_error(str(e))
134
+ raise typer.Exit(1)
135
+ except Exception as e:
136
+ logger.error(f"Failed to get configuration: {e}")
137
+ print_error(f"Failed to get configuration: {e}")
138
+ raise typer.Exit(1)
139
+
140
+
141
+ @config_app.command()
142
+ def reset(
143
+ ctx: typer.Context,
144
+ key: Optional[str] = typer.Argument(None, help="Configuration key to reset (resets all if not specified)"),
145
+ confirm: bool = typer.Option(
146
+ False,
147
+ "--yes",
148
+ "-y",
149
+ help="Skip confirmation prompt",
150
+ ),
151
+ ) -> None:
152
+ """Reset configuration to defaults."""
153
+ try:
154
+ project_root = ctx.obj.get("project_root") or Path.cwd()
155
+ project_manager = ProjectManager(project_root)
156
+
157
+ if not project_manager.is_initialized():
158
+ raise ProjectNotFoundError(
159
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
160
+ )
161
+
162
+ if not confirm:
163
+ from ..output import confirm_action
164
+ if key:
165
+ message = f"Reset '{key}' to default value?"
166
+ else:
167
+ message = "Reset all configuration to defaults?"
168
+
169
+ if not confirm_action(message, default=False):
170
+ print_info("Reset cancelled")
171
+ raise typer.Exit(0)
172
+
173
+ if key:
174
+ # Reset specific key
175
+ config = project_manager.load_config()
176
+ default_value = _get_default_value(key)
177
+
178
+ if hasattr(config, key):
179
+ setattr(config, key, default_value)
180
+ project_manager.save_config(config)
181
+ print_success(f"Reset {key} to default value: {default_value}")
182
+ else:
183
+ print_error(f"Unknown configuration key: {key}")
184
+ raise typer.Exit(1)
185
+ else:
186
+ # Reset all configuration by re-initializing
187
+ from ...config.defaults import DEFAULT_EMBEDDING_MODELS, DEFAULT_FILE_EXTENSIONS
188
+
189
+ config = project_manager.initialize(
190
+ file_extensions=DEFAULT_FILE_EXTENSIONS,
191
+ embedding_model=DEFAULT_EMBEDDING_MODELS["code"],
192
+ similarity_threshold=0.75,
193
+ force=True,
194
+ )
195
+ print_success("Reset all configuration to defaults")
196
+
197
+ except (ProjectNotFoundError, ConfigurationError) as e:
198
+ print_error(str(e))
199
+ raise typer.Exit(1)
200
+ except Exception as e:
201
+ logger.error(f"Failed to reset configuration: {e}")
202
+ print_error(f"Failed to reset configuration: {e}")
203
+ raise typer.Exit(1)
204
+
205
+
206
+ @config_app.command("list-keys")
207
+ def list_keys() -> None:
208
+ """List all available configuration keys."""
209
+ _show_available_keys()
210
+
211
+
212
+ def _parse_config_value(key: str, value: str):
213
+ """Parse configuration value based on key type."""
214
+ # Boolean values
215
+ if key in ["cache_embeddings", "watch_files"]:
216
+ return value.lower() in ("true", "yes", "1", "on")
217
+
218
+ # Float values
219
+ if key in ["similarity_threshold"]:
220
+ try:
221
+ parsed = float(value)
222
+ if key == "similarity_threshold" and not (0.0 <= parsed <= 1.0):
223
+ raise ValueError("Similarity threshold must be between 0.0 and 1.0")
224
+ return parsed
225
+ except ValueError as e:
226
+ raise ConfigurationError(f"Invalid float value for {key}: {value}") from e
227
+
228
+ # Integer values
229
+ if key in ["max_chunk_size", "max_cache_size"]:
230
+ try:
231
+ parsed = int(value)
232
+ if parsed <= 0:
233
+ raise ValueError("Value must be positive")
234
+ return parsed
235
+ except ValueError as e:
236
+ raise ConfigurationError(f"Invalid integer value for {key}: {value}") from e
237
+
238
+ # List values
239
+ if key in ["file_extensions", "languages"]:
240
+ if value.startswith("[") and value.endswith("]"):
241
+ # JSON-style list
242
+ import json
243
+ try:
244
+ return json.loads(value)
245
+ except json.JSONDecodeError as e:
246
+ raise ConfigurationError(f"Invalid JSON list for {key}: {value}") from e
247
+ else:
248
+ # Comma-separated list
249
+ items = [item.strip() for item in value.split(",")]
250
+ if key == "file_extensions":
251
+ # Ensure extensions start with dot
252
+ items = [ext if ext.startswith(".") else f".{ext}" for ext in items]
253
+ return items
254
+
255
+ # Path values
256
+ if key in ["project_root", "index_path"]:
257
+ return Path(value)
258
+
259
+ # String values (default)
260
+ return value
261
+
262
+
263
+ def _get_default_value(key: str):
264
+ """Get default value for a configuration key."""
265
+ from ...config.defaults import DEFAULT_EMBEDDING_MODELS, DEFAULT_FILE_EXTENSIONS
266
+
267
+ defaults = {
268
+ "file_extensions": DEFAULT_FILE_EXTENSIONS,
269
+ "embedding_model": DEFAULT_EMBEDDING_MODELS["code"],
270
+ "similarity_threshold": 0.75,
271
+ "max_chunk_size": 512,
272
+ "languages": [],
273
+ "watch_files": False,
274
+ "cache_embeddings": True,
275
+ "max_cache_size": 1000,
276
+ }
277
+
278
+ return defaults.get(key, "")
279
+
280
+
281
+ def _show_available_keys() -> None:
282
+ """Show all available configuration keys."""
283
+ console.print("\n[bold blue]Available Configuration Keys:[/bold blue]")
284
+
285
+ keys_info = [
286
+ ("file_extensions", "List of file extensions to index", "list"),
287
+ ("embedding_model", "Embedding model name", "string"),
288
+ ("similarity_threshold", "Similarity threshold (0.0-1.0)", "float"),
289
+ ("max_chunk_size", "Maximum chunk size in tokens", "integer"),
290
+ ("languages", "Detected programming languages", "list"),
291
+ ("watch_files", "Enable file watching", "boolean"),
292
+ ("cache_embeddings", "Enable embedding caching", "boolean"),
293
+ ("max_cache_size", "Maximum cache size", "integer"),
294
+ ]
295
+
296
+ for key, description, value_type in keys_info:
297
+ console.print(f" [cyan]{key}[/cyan] ({value_type}): {description}")
298
+
299
+ console.print("\n[dim]Use 'mcp-vector-search config set <key> <value>' to change values[/dim]")
300
+
301
+
302
+ if __name__ == "__main__":
303
+ config_app()
@@ -0,0 +1,304 @@
1
+ """Index command for MCP Vector Search CLI."""
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+ import typer
8
+ from loguru import logger
9
+
10
+ from ...config.defaults import get_default_cache_path
11
+ from ...core.database import ChromaVectorDatabase
12
+ from ...core.embeddings import create_embedding_function, BatchEmbeddingProcessor
13
+ from ...core.exceptions import ProjectNotFoundError
14
+ from ...core.indexer import SemanticIndexer
15
+ from ...core.project import ProjectManager
16
+ from ..output import (
17
+ console,
18
+ create_progress,
19
+ print_error,
20
+ print_index_stats,
21
+ print_info,
22
+ print_success,
23
+ )
24
+
25
+ # Create index subcommand app
26
+ index_app = typer.Typer(help="Index codebase for semantic search")
27
+
28
+
29
+ @index_app.command()
30
+ def main(
31
+ ctx: typer.Context,
32
+ watch: bool = typer.Option(
33
+ False,
34
+ "--watch",
35
+ "-w",
36
+ help="Watch for file changes and update index incrementally",
37
+ ),
38
+ incremental: bool = typer.Option(
39
+ True,
40
+ "--incremental/--full",
41
+ help="Use incremental indexing (skip unchanged files)",
42
+ ),
43
+ extensions: Optional[str] = typer.Option(
44
+ None,
45
+ "--extensions",
46
+ "-e",
47
+ help="Override file extensions to index (comma-separated)",
48
+ ),
49
+ force: bool = typer.Option(
50
+ False,
51
+ "--force",
52
+ "-f",
53
+ help="Force reindexing of all files",
54
+ ),
55
+ batch_size: int = typer.Option(
56
+ 32,
57
+ "--batch-size",
58
+ "-b",
59
+ help="Batch size for embedding generation",
60
+ min=1,
61
+ max=128,
62
+ ),
63
+ ) -> None:
64
+ """Index your codebase for semantic search.
65
+
66
+ This command parses your code files using Tree-sitter, generates embeddings
67
+ using the configured model, and stores them in ChromaDB for fast semantic search.
68
+
69
+ Examples:
70
+ mcp-vector-search index
71
+ mcp-vector-search index --force --extensions .py,.js
72
+ mcp-vector-search index --watch
73
+ """
74
+ try:
75
+ project_root = ctx.obj.get("project_root") or Path.cwd()
76
+
77
+ # Run async indexing
78
+ asyncio.run(run_indexing(
79
+ project_root=project_root,
80
+ watch=watch,
81
+ incremental=incremental,
82
+ extensions=extensions,
83
+ force_reindex=force,
84
+ batch_size=batch_size,
85
+ show_progress=True,
86
+ ))
87
+
88
+ except KeyboardInterrupt:
89
+ print_info("Indexing interrupted by user")
90
+ raise typer.Exit(0)
91
+ except Exception as e:
92
+ logger.error(f"Indexing failed: {e}")
93
+ print_error(f"Indexing failed: {e}")
94
+ raise typer.Exit(1)
95
+
96
+
97
+ async def run_indexing(
98
+ project_root: Path,
99
+ watch: bool = False,
100
+ incremental: bool = True,
101
+ extensions: Optional[str] = None,
102
+ force_reindex: bool = False,
103
+ batch_size: int = 32,
104
+ show_progress: bool = True,
105
+ ) -> None:
106
+ """Run the indexing process."""
107
+ # Load project configuration
108
+ project_manager = ProjectManager(project_root)
109
+
110
+ if not project_manager.is_initialized():
111
+ raise ProjectNotFoundError(
112
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
113
+ )
114
+
115
+ config = project_manager.load_config()
116
+
117
+ # Override extensions if provided
118
+ file_extensions = config.file_extensions
119
+ if extensions:
120
+ file_extensions = [ext.strip() for ext in extensions.split(",")]
121
+ file_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in file_extensions]
122
+
123
+ print_info(f"Indexing project: {project_root}")
124
+ print_info(f"File extensions: {', '.join(file_extensions)}")
125
+ print_info(f"Embedding model: {config.embedding_model}")
126
+
127
+ # Setup embedding function and cache
128
+ cache_dir = get_default_cache_path(project_root) if config.cache_embeddings else None
129
+ embedding_function, cache = create_embedding_function(
130
+ model_name=config.embedding_model,
131
+ cache_dir=cache_dir,
132
+ cache_size=config.max_cache_size,
133
+ )
134
+
135
+ # Setup database
136
+ database = ChromaVectorDatabase(
137
+ persist_directory=config.index_path,
138
+ embedding_function=embedding_function,
139
+ )
140
+
141
+ # Setup indexer
142
+ indexer = SemanticIndexer(
143
+ database=database,
144
+ project_root=project_root,
145
+ file_extensions=file_extensions,
146
+ )
147
+
148
+ try:
149
+ async with database:
150
+ if watch:
151
+ await _run_watch_mode(indexer, show_progress)
152
+ else:
153
+ await _run_batch_indexing(indexer, force_reindex, show_progress)
154
+
155
+ except Exception as e:
156
+ logger.error(f"Indexing error: {e}")
157
+ raise
158
+
159
+
160
+ async def _run_batch_indexing(
161
+ indexer: SemanticIndexer,
162
+ force_reindex: bool,
163
+ show_progress: bool,
164
+ ) -> None:
165
+ """Run batch indexing of all files."""
166
+ if show_progress:
167
+ with create_progress() as progress:
168
+ task = progress.add_task("Indexing files...", total=None)
169
+
170
+ # Start indexing
171
+ indexed_count = await indexer.index_project(
172
+ force_reindex=force_reindex,
173
+ show_progress=False, # We handle progress here
174
+ )
175
+
176
+ progress.update(task, completed=indexed_count, total=indexed_count)
177
+ else:
178
+ indexed_count = await indexer.index_project(
179
+ force_reindex=force_reindex,
180
+ show_progress=show_progress,
181
+ )
182
+
183
+ print_success(f"Indexed {indexed_count} files")
184
+
185
+ # Show statistics
186
+ stats = await indexer.get_indexing_stats()
187
+ print_index_stats(stats)
188
+
189
+
190
+ async def _run_watch_mode(indexer: SemanticIndexer, show_progress: bool) -> None:
191
+ """Run indexing in watch mode."""
192
+ print_info("Starting watch mode - press Ctrl+C to stop")
193
+
194
+ # TODO: Implement file watching with incremental updates
195
+ # This would use the watchdog library to monitor file changes
196
+ # and call indexer.reindex_file() for changed files
197
+
198
+ print_error("Watch mode not yet implemented")
199
+ raise NotImplementedError("Watch mode will be implemented in Phase 1B")
200
+
201
+
202
+ @index_app.command("reindex")
203
+ def reindex_file(
204
+ ctx: typer.Context,
205
+ file_path: Path = typer.Argument(
206
+ ...,
207
+ help="File to reindex",
208
+ exists=True,
209
+ file_okay=True,
210
+ dir_okay=False,
211
+ readable=True,
212
+ ),
213
+ ) -> None:
214
+ """Reindex a specific file."""
215
+ try:
216
+ project_root = ctx.obj.get("project_root") or Path.cwd()
217
+
218
+ asyncio.run(_reindex_single_file(project_root, file_path))
219
+
220
+ except Exception as e:
221
+ logger.error(f"Reindexing failed: {e}")
222
+ print_error(f"Reindexing failed: {e}")
223
+ raise typer.Exit(1)
224
+
225
+
226
+ async def _reindex_single_file(project_root: Path, file_path: Path) -> None:
227
+ """Reindex a single file."""
228
+ # Load project configuration
229
+ project_manager = ProjectManager(project_root)
230
+ config = project_manager.load_config()
231
+
232
+ # Setup components
233
+ embedding_function, cache = create_embedding_function(
234
+ model_name=config.embedding_model,
235
+ cache_dir=get_default_cache_path(project_root) if config.cache_embeddings else None,
236
+ )
237
+
238
+ database = ChromaVectorDatabase(
239
+ persist_directory=config.index_path,
240
+ embedding_function=embedding_function,
241
+ )
242
+
243
+ indexer = SemanticIndexer(
244
+ database=database,
245
+ project_root=project_root,
246
+ file_extensions=config.file_extensions,
247
+ )
248
+
249
+ async with database:
250
+ success = await indexer.reindex_file(file_path)
251
+
252
+ if success:
253
+ print_success(f"Reindexed: {file_path}")
254
+ else:
255
+ print_error(f"Failed to reindex: {file_path}")
256
+
257
+
258
+ @index_app.command("clean")
259
+ def clean_index(
260
+ ctx: typer.Context,
261
+ confirm: bool = typer.Option(
262
+ False,
263
+ "--yes",
264
+ "-y",
265
+ help="Skip confirmation prompt",
266
+ ),
267
+ ) -> None:
268
+ """Clean the search index (remove all indexed data)."""
269
+ try:
270
+ project_root = ctx.obj.get("project_root") or Path.cwd()
271
+
272
+ if not confirm:
273
+ from ..output import confirm_action
274
+ if not confirm_action("This will delete all indexed data. Continue?", default=False):
275
+ print_info("Clean operation cancelled")
276
+ raise typer.Exit(0)
277
+
278
+ asyncio.run(_clean_index(project_root))
279
+
280
+ except Exception as e:
281
+ logger.error(f"Clean failed: {e}")
282
+ print_error(f"Clean failed: {e}")
283
+ raise typer.Exit(1)
284
+
285
+
286
+ async def _clean_index(project_root: Path) -> None:
287
+ """Clean the search index."""
288
+ project_manager = ProjectManager(project_root)
289
+ config = project_manager.load_config()
290
+
291
+ # Setup database
292
+ embedding_function, _ = create_embedding_function(config.embedding_model)
293
+ database = ChromaVectorDatabase(
294
+ persist_directory=config.index_path,
295
+ embedding_function=embedding_function,
296
+ )
297
+
298
+ async with database:
299
+ await database.reset()
300
+ print_success("Index cleaned successfully")
301
+
302
+
303
+ if __name__ == "__main__":
304
+ index_app()