mcp-vector-search 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (35) hide show
  1. mcp_vector_search/__init__.py +9 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/config.py +303 -0
  5. mcp_vector_search/cli/commands/index.py +304 -0
  6. mcp_vector_search/cli/commands/init.py +212 -0
  7. mcp_vector_search/cli/commands/search.py +395 -0
  8. mcp_vector_search/cli/commands/status.py +340 -0
  9. mcp_vector_search/cli/commands/watch.py +288 -0
  10. mcp_vector_search/cli/main.py +117 -0
  11. mcp_vector_search/cli/output.py +242 -0
  12. mcp_vector_search/config/__init__.py +1 -0
  13. mcp_vector_search/config/defaults.py +175 -0
  14. mcp_vector_search/config/settings.py +108 -0
  15. mcp_vector_search/core/__init__.py +1 -0
  16. mcp_vector_search/core/database.py +431 -0
  17. mcp_vector_search/core/embeddings.py +250 -0
  18. mcp_vector_search/core/exceptions.py +66 -0
  19. mcp_vector_search/core/indexer.py +310 -0
  20. mcp_vector_search/core/models.py +174 -0
  21. mcp_vector_search/core/project.py +304 -0
  22. mcp_vector_search/core/search.py +324 -0
  23. mcp_vector_search/core/watcher.py +320 -0
  24. mcp_vector_search/mcp/__init__.py +1 -0
  25. mcp_vector_search/parsers/__init__.py +1 -0
  26. mcp_vector_search/parsers/base.py +180 -0
  27. mcp_vector_search/parsers/javascript.py +238 -0
  28. mcp_vector_search/parsers/python.py +407 -0
  29. mcp_vector_search/parsers/registry.py +187 -0
  30. mcp_vector_search/py.typed +1 -0
  31. mcp_vector_search-0.0.3.dist-info/METADATA +333 -0
  32. mcp_vector_search-0.0.3.dist-info/RECORD +35 -0
  33. mcp_vector_search-0.0.3.dist-info/WHEEL +4 -0
  34. mcp_vector_search-0.0.3.dist-info/entry_points.txt +2 -0
  35. mcp_vector_search-0.0.3.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,212 @@
1
+ """Init command for MCP Vector Search CLI."""
2
+
3
+ from pathlib import Path
4
+ from typing import List, Optional
5
+
6
+ import typer
7
+ from loguru import logger
8
+
9
+ from ...config.defaults import DEFAULT_EMBEDDING_MODELS, DEFAULT_FILE_EXTENSIONS
10
+ from ...core.exceptions import ProjectInitializationError
11
+ from ...core.project import ProjectManager
12
+ from ..output import (
13
+ confirm_action,
14
+ console,
15
+ print_error,
16
+ print_info,
17
+ print_project_info,
18
+ print_success,
19
+ )
20
+
21
+ # Create init subcommand app
22
+ init_app = typer.Typer(help="Initialize project for semantic search")
23
+
24
+
25
+ @init_app.command()
26
+ def main(
27
+ ctx: typer.Context,
28
+ config_file: Optional[Path] = typer.Option(
29
+ None,
30
+ "--config",
31
+ "-c",
32
+ help="Configuration file to use",
33
+ exists=True,
34
+ file_okay=True,
35
+ dir_okay=False,
36
+ readable=True,
37
+ ),
38
+ extensions: Optional[str] = typer.Option(
39
+ None,
40
+ "--extensions",
41
+ "-e",
42
+ help="Comma-separated list of file extensions to index (e.g., '.py,.js,.ts')",
43
+ ),
44
+ embedding_model: str = typer.Option(
45
+ DEFAULT_EMBEDDING_MODELS["code"],
46
+ "--embedding-model",
47
+ "-m",
48
+ help="Embedding model to use for semantic search",
49
+ ),
50
+ similarity_threshold: float = typer.Option(
51
+ 0.75,
52
+ "--similarity-threshold",
53
+ "-s",
54
+ help="Similarity threshold for search results (0.0 to 1.0)",
55
+ min=0.0,
56
+ max=1.0,
57
+ ),
58
+ force: bool = typer.Option(
59
+ False,
60
+ "--force",
61
+ "-f",
62
+ help="Force re-initialization if project is already initialized",
63
+ ),
64
+ auto_index: bool = typer.Option(
65
+ False,
66
+ "--auto-index",
67
+ help="Automatically start indexing after initialization",
68
+ ),
69
+ ) -> None:
70
+ """Initialize a project for semantic code search.
71
+
72
+ This command sets up the necessary configuration and directory structure
73
+ for MCP Vector Search in your project. It will:
74
+
75
+ - Create a .mcp-vector-search directory for storing the index and configuration
76
+ - Detect programming languages in your project
77
+ - Set up default configuration based on your project structure
78
+ - Optionally start indexing your codebase
79
+
80
+ Examples:
81
+ mcp-vector-search init
82
+ mcp-vector-search init --extensions .py,.js,.ts --auto-index
83
+ mcp-vector-search init --embedding-model microsoft/unixcoder-base --force
84
+ """
85
+ try:
86
+ # Get project root from context or auto-detect
87
+ project_root = ctx.obj.get("project_root")
88
+ if not project_root:
89
+ project_root = Path.cwd()
90
+
91
+ print_info(f"Initializing project at: {project_root}")
92
+
93
+ # Create project manager
94
+ project_manager = ProjectManager(project_root)
95
+
96
+ # Check if already initialized
97
+ if project_manager.is_initialized() and not force:
98
+ print_error("Project is already initialized")
99
+ print_info("Use --force to re-initialize or run 'mcp-vector-search status' to see current configuration")
100
+ raise typer.Exit(1)
101
+
102
+ # Parse file extensions
103
+ file_extensions = None
104
+ if extensions:
105
+ file_extensions = [ext.strip() for ext in extensions.split(",")]
106
+ # Ensure extensions start with dot
107
+ file_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in file_extensions]
108
+ else:
109
+ file_extensions = DEFAULT_FILE_EXTENSIONS
110
+
111
+ # Show what will be initialized
112
+ console.print("\n[bold blue]Initialization Settings:[/bold blue]")
113
+ console.print(f" Project Root: {project_root}")
114
+ console.print(f" File Extensions: {', '.join(file_extensions)}")
115
+ console.print(f" Embedding Model: {embedding_model}")
116
+ console.print(f" Similarity Threshold: {similarity_threshold}")
117
+
118
+ # Confirm initialization
119
+ if not force and not confirm_action("\nProceed with initialization?", default=True):
120
+ print_info("Initialization cancelled")
121
+ raise typer.Exit(0)
122
+
123
+ # Initialize project
124
+ console.print("\n[bold]Initializing project...[/bold]")
125
+
126
+ config = project_manager.initialize(
127
+ file_extensions=file_extensions,
128
+ embedding_model=embedding_model,
129
+ similarity_threshold=similarity_threshold,
130
+ force=force,
131
+ )
132
+
133
+ print_success("Project initialized successfully!")
134
+
135
+ # Show project information
136
+ console.print()
137
+ project_info = project_manager.get_project_info()
138
+ print_project_info(project_info)
139
+
140
+ # Offer to start indexing
141
+ if auto_index or confirm_action("\nStart indexing your codebase now?", default=True):
142
+ console.print("\n[bold]Starting indexing...[/bold]")
143
+
144
+ # Import and run indexing (avoid circular imports)
145
+ import asyncio
146
+ from .index import run_indexing
147
+
148
+ try:
149
+ asyncio.run(run_indexing(
150
+ project_root=project_root,
151
+ force_reindex=False,
152
+ show_progress=True,
153
+ ))
154
+ print_success("Indexing completed!")
155
+ except Exception as e:
156
+ print_error(f"Indexing failed: {e}")
157
+ print_info("You can run 'mcp-vector-search index' later to index your codebase")
158
+ else:
159
+ print_info("Run 'mcp-vector-search index' to index your codebase")
160
+
161
+ # Show next steps
162
+ console.print("\n[bold green]Next Steps:[/bold green]")
163
+ console.print(" 1. Run [code]mcp-vector-search index[/code] to index your codebase (if not done)")
164
+ console.print(" 2. Run [code]mcp-vector-search search 'your query'[/code] to search your code")
165
+ console.print(" 3. Run [code]mcp-vector-search status[/code] to check indexing status")
166
+
167
+ except ProjectInitializationError as e:
168
+ print_error(f"Initialization failed: {e}")
169
+ raise typer.Exit(1)
170
+ except Exception as e:
171
+ logger.error(f"Unexpected error during initialization: {e}")
172
+ print_error(f"Unexpected error: {e}")
173
+ raise typer.Exit(1)
174
+
175
+
176
+ @init_app.command("check")
177
+ def check_initialization(ctx: typer.Context) -> None:
178
+ """Check if the current project is initialized for MCP Vector Search."""
179
+ try:
180
+ project_root = ctx.obj.get("project_root") or Path.cwd()
181
+ project_manager = ProjectManager(project_root)
182
+
183
+ if project_manager.is_initialized():
184
+ print_success(f"Project is initialized at {project_root}")
185
+
186
+ # Show project info
187
+ project_info = project_manager.get_project_info()
188
+ print_project_info(project_info)
189
+ else:
190
+ print_error(f"Project is not initialized at {project_root}")
191
+ print_info("Run 'mcp-vector-search init' to initialize the project")
192
+ raise typer.Exit(1)
193
+
194
+ except Exception as e:
195
+ logger.error(f"Error checking initialization: {e}")
196
+ print_error(f"Error: {e}")
197
+ raise typer.Exit(1)
198
+
199
+
200
+ @init_app.command("models")
201
+ def list_embedding_models() -> None:
202
+ """List available embedding models."""
203
+ console.print("[bold blue]Available Embedding Models:[/bold blue]\n")
204
+
205
+ for category, model in DEFAULT_EMBEDDING_MODELS.items():
206
+ console.print(f"[cyan]{category.title()}:[/cyan] {model}")
207
+
208
+ console.print("\n[dim]You can also use any model from Hugging Face that's compatible with sentence-transformers[/dim]")
209
+
210
+
211
+ if __name__ == "__main__":
212
+ init_app()
@@ -0,0 +1,395 @@
1
+ """Search command for MCP Vector Search CLI."""
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+ import typer
8
+ from loguru import logger
9
+
10
+ from ...core.database import ChromaVectorDatabase
11
+ from ...core.embeddings import create_embedding_function
12
+ from ...core.exceptions import ProjectNotFoundError
13
+ from ...core.project import ProjectManager
14
+ from ...core.search import SemanticSearchEngine
15
+ from ..output import (
16
+ console,
17
+ print_error,
18
+ print_info,
19
+ print_search_results,
20
+ print_warning,
21
+ )
22
+
23
+ # Create search subcommand app
24
+ search_app = typer.Typer(help="Search code semantically")
25
+
26
+
27
+ @search_app.command()
28
+ def main(
29
+ ctx: typer.Context,
30
+ query: str = typer.Argument(..., help="Search query"),
31
+ limit: int = typer.Option(
32
+ 10,
33
+ "--limit",
34
+ "-l",
35
+ help="Maximum number of results to return",
36
+ min=1,
37
+ max=100,
38
+ ),
39
+ files: Optional[str] = typer.Option(
40
+ None,
41
+ "--files",
42
+ "-f",
43
+ help="Filter by file patterns (e.g., '*.py' or 'src/*.js')",
44
+ ),
45
+ language: Optional[str] = typer.Option(
46
+ None,
47
+ "--language",
48
+ help="Filter by programming language",
49
+ ),
50
+ function_name: Optional[str] = typer.Option(
51
+ None,
52
+ "--function",
53
+ help="Filter by function name",
54
+ ),
55
+ class_name: Optional[str] = typer.Option(
56
+ None,
57
+ "--class",
58
+ help="Filter by class name",
59
+ ),
60
+ similarity_threshold: Optional[float] = typer.Option(
61
+ None,
62
+ "--threshold",
63
+ "-t",
64
+ help="Minimum similarity threshold (0.0 to 1.0)",
65
+ min=0.0,
66
+ max=1.0,
67
+ ),
68
+ no_content: bool = typer.Option(
69
+ False,
70
+ "--no-content",
71
+ help="Don't show code content in results",
72
+ ),
73
+ json_output: bool = typer.Option(
74
+ False,
75
+ "--json",
76
+ help="Output results in JSON format",
77
+ ),
78
+ ) -> None:
79
+ """Search your codebase semantically.
80
+
81
+ This command performs semantic search across your indexed codebase,
82
+ finding code that is conceptually similar to your query even if it
83
+ doesn't contain the exact keywords.
84
+
85
+ Examples:
86
+ mcp-vector-search search "authentication middleware"
87
+ mcp-vector-search search "database connection" --language python
88
+ mcp-vector-search search "error handling" --files "*.js" --limit 5
89
+ mcp-vector-search search "user validation" --function validate --json
90
+ """
91
+ try:
92
+ project_root = ctx.obj.get("project_root") or Path.cwd()
93
+
94
+ asyncio.run(run_search(
95
+ project_root=project_root,
96
+ query=query,
97
+ limit=limit,
98
+ files=files,
99
+ language=language,
100
+ function_name=function_name,
101
+ class_name=class_name,
102
+ similarity_threshold=similarity_threshold,
103
+ show_content=not no_content,
104
+ json_output=json_output,
105
+ ))
106
+
107
+ except Exception as e:
108
+ logger.error(f"Search failed: {e}")
109
+ print_error(f"Search failed: {e}")
110
+ raise typer.Exit(1)
111
+
112
+
113
+ async def run_search(
114
+ project_root: Path,
115
+ query: str,
116
+ limit: int = 10,
117
+ files: Optional[str] = None,
118
+ language: Optional[str] = None,
119
+ function_name: Optional[str] = None,
120
+ class_name: Optional[str] = None,
121
+ similarity_threshold: Optional[float] = None,
122
+ show_content: bool = True,
123
+ json_output: bool = False,
124
+ ) -> None:
125
+ """Run semantic search."""
126
+ # Load project configuration
127
+ project_manager = ProjectManager(project_root)
128
+
129
+ if not project_manager.is_initialized():
130
+ raise ProjectNotFoundError(
131
+ f"Project not initialized at {project_root}. Run 'mcp-vector-search init' first."
132
+ )
133
+
134
+ config = project_manager.load_config()
135
+
136
+ # Setup database and search engine
137
+ embedding_function, _ = create_embedding_function(config.embedding_model)
138
+ database = ChromaVectorDatabase(
139
+ persist_directory=config.index_path,
140
+ embedding_function=embedding_function,
141
+ )
142
+
143
+ search_engine = SemanticSearchEngine(
144
+ database=database,
145
+ project_root=project_root,
146
+ similarity_threshold=similarity_threshold or config.similarity_threshold,
147
+ )
148
+
149
+ # Build filters
150
+ filters = {}
151
+ if language:
152
+ filters["language"] = language
153
+ if function_name:
154
+ filters["function_name"] = function_name
155
+ if class_name:
156
+ filters["class_name"] = class_name
157
+ if files:
158
+ # Simple file pattern matching (could be enhanced)
159
+ filters["file_path"] = files
160
+
161
+ try:
162
+ async with database:
163
+ results = await search_engine.search(
164
+ query=query,
165
+ limit=limit,
166
+ filters=filters if filters else None,
167
+ similarity_threshold=similarity_threshold,
168
+ include_context=show_content,
169
+ )
170
+
171
+ if json_output:
172
+ from ..output import print_json
173
+ results_data = [result.to_dict() for result in results]
174
+ print_json(results_data, title="Search Results")
175
+ else:
176
+ print_search_results(
177
+ results=results,
178
+ query=query,
179
+ show_content=show_content,
180
+ )
181
+
182
+ except Exception as e:
183
+ logger.error(f"Search execution failed: {e}")
184
+ raise
185
+
186
+
187
+ @search_app.command("similar")
188
+ def search_similar(
189
+ ctx: typer.Context,
190
+ file_path: Path = typer.Argument(
191
+ ...,
192
+ help="Reference file path",
193
+ exists=True,
194
+ file_okay=True,
195
+ dir_okay=False,
196
+ readable=True,
197
+ ),
198
+ function_name: Optional[str] = typer.Option(
199
+ None,
200
+ "--function",
201
+ "-f",
202
+ help="Specific function name to find similar code for",
203
+ ),
204
+ limit: int = typer.Option(
205
+ 10,
206
+ "--limit",
207
+ "-l",
208
+ help="Maximum number of results",
209
+ min=1,
210
+ max=100,
211
+ ),
212
+ similarity_threshold: Optional[float] = typer.Option(
213
+ None,
214
+ "--threshold",
215
+ "-t",
216
+ help="Minimum similarity threshold",
217
+ min=0.0,
218
+ max=1.0,
219
+ ),
220
+ json_output: bool = typer.Option(
221
+ False,
222
+ "--json",
223
+ help="Output results in JSON format",
224
+ ),
225
+ ) -> None:
226
+ """Find code similar to a specific file or function.
227
+
228
+ Examples:
229
+ mcp-vector-search search similar src/auth.py
230
+ mcp-vector-search search similar src/utils.py --function validate_email
231
+ """
232
+ try:
233
+ project_root = ctx.obj.get("project_root") or Path.cwd()
234
+
235
+ asyncio.run(run_similar_search(
236
+ project_root=project_root,
237
+ file_path=file_path,
238
+ function_name=function_name,
239
+ limit=limit,
240
+ similarity_threshold=similarity_threshold,
241
+ json_output=json_output,
242
+ ))
243
+
244
+ except Exception as e:
245
+ logger.error(f"Similar search failed: {e}")
246
+ print_error(f"Similar search failed: {e}")
247
+ raise typer.Exit(1)
248
+
249
+
250
+ async def run_similar_search(
251
+ project_root: Path,
252
+ file_path: Path,
253
+ function_name: Optional[str] = None,
254
+ limit: int = 10,
255
+ similarity_threshold: Optional[float] = None,
256
+ json_output: bool = False,
257
+ ) -> None:
258
+ """Run similar code search."""
259
+ project_manager = ProjectManager(project_root)
260
+ config = project_manager.load_config()
261
+
262
+ embedding_function, _ = create_embedding_function(config.embedding_model)
263
+ database = ChromaVectorDatabase(
264
+ persist_directory=config.index_path,
265
+ embedding_function=embedding_function,
266
+ )
267
+
268
+ search_engine = SemanticSearchEngine(
269
+ database=database,
270
+ project_root=project_root,
271
+ similarity_threshold=similarity_threshold or config.similarity_threshold,
272
+ )
273
+
274
+ async with database:
275
+ results = await search_engine.search_similar(
276
+ file_path=file_path,
277
+ function_name=function_name,
278
+ limit=limit,
279
+ similarity_threshold=similarity_threshold,
280
+ )
281
+
282
+ if json_output:
283
+ from ..output import print_json
284
+ results_data = [result.to_dict() for result in results]
285
+ print_json(results_data, title="Similar Code Results")
286
+ else:
287
+ query_desc = f"{file_path}"
288
+ if function_name:
289
+ query_desc += f" → {function_name}()"
290
+
291
+ print_search_results(
292
+ results=results,
293
+ query=f"Similar to: {query_desc}",
294
+ show_content=True,
295
+ )
296
+
297
+
298
+ @search_app.command("context")
299
+ def search_context(
300
+ ctx: typer.Context,
301
+ description: str = typer.Argument(..., help="Context description"),
302
+ focus: Optional[str] = typer.Option(
303
+ None,
304
+ "--focus",
305
+ help="Comma-separated focus areas (e.g., 'security,authentication')",
306
+ ),
307
+ limit: int = typer.Option(
308
+ 10,
309
+ "--limit",
310
+ "-l",
311
+ help="Maximum number of results",
312
+ min=1,
313
+ max=100,
314
+ ),
315
+ json_output: bool = typer.Option(
316
+ False,
317
+ "--json",
318
+ help="Output results in JSON format",
319
+ ),
320
+ ) -> None:
321
+ """Search for code based on contextual description.
322
+
323
+ Examples:
324
+ mcp-vector-search search context "implement rate limiting"
325
+ mcp-vector-search search context "user authentication" --focus security,middleware
326
+ """
327
+ try:
328
+ project_root = ctx.obj.get("project_root") or Path.cwd()
329
+
330
+ focus_areas = None
331
+ if focus:
332
+ focus_areas = [area.strip() for area in focus.split(",")]
333
+
334
+ asyncio.run(run_context_search(
335
+ project_root=project_root,
336
+ description=description,
337
+ focus_areas=focus_areas,
338
+ limit=limit,
339
+ json_output=json_output,
340
+ ))
341
+
342
+ except Exception as e:
343
+ logger.error(f"Context search failed: {e}")
344
+ print_error(f"Context search failed: {e}")
345
+ raise typer.Exit(1)
346
+
347
+
348
+ async def run_context_search(
349
+ project_root: Path,
350
+ description: str,
351
+ focus_areas: Optional[List[str]] = None,
352
+ limit: int = 10,
353
+ json_output: bool = False,
354
+ ) -> None:
355
+ """Run contextual search."""
356
+ project_manager = ProjectManager(project_root)
357
+ config = project_manager.load_config()
358
+
359
+ embedding_function, _ = create_embedding_function(config.embedding_model)
360
+ database = ChromaVectorDatabase(
361
+ persist_directory=config.index_path,
362
+ embedding_function=embedding_function,
363
+ )
364
+
365
+ search_engine = SemanticSearchEngine(
366
+ database=database,
367
+ project_root=project_root,
368
+ similarity_threshold=config.similarity_threshold,
369
+ )
370
+
371
+ async with database:
372
+ results = await search_engine.search_by_context(
373
+ context_description=description,
374
+ focus_areas=focus_areas,
375
+ limit=limit,
376
+ )
377
+
378
+ if json_output:
379
+ from ..output import print_json
380
+ results_data = [result.to_dict() for result in results]
381
+ print_json(results_data, title="Context Search Results")
382
+ else:
383
+ query_desc = description
384
+ if focus_areas:
385
+ query_desc += f" (focus: {', '.join(focus_areas)})"
386
+
387
+ print_search_results(
388
+ results=results,
389
+ query=query_desc,
390
+ show_content=True,
391
+ )
392
+
393
+
394
+ if __name__ == "__main__":
395
+ search_app()