mcp-vector-search 0.15.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (86) hide show
  1. mcp_vector_search/__init__.py +10 -0
  2. mcp_vector_search/cli/__init__.py +1 -0
  3. mcp_vector_search/cli/commands/__init__.py +1 -0
  4. mcp_vector_search/cli/commands/auto_index.py +397 -0
  5. mcp_vector_search/cli/commands/chat.py +534 -0
  6. mcp_vector_search/cli/commands/config.py +393 -0
  7. mcp_vector_search/cli/commands/demo.py +358 -0
  8. mcp_vector_search/cli/commands/index.py +762 -0
  9. mcp_vector_search/cli/commands/init.py +658 -0
  10. mcp_vector_search/cli/commands/install.py +869 -0
  11. mcp_vector_search/cli/commands/install_old.py +700 -0
  12. mcp_vector_search/cli/commands/mcp.py +1254 -0
  13. mcp_vector_search/cli/commands/reset.py +393 -0
  14. mcp_vector_search/cli/commands/search.py +796 -0
  15. mcp_vector_search/cli/commands/setup.py +1133 -0
  16. mcp_vector_search/cli/commands/status.py +584 -0
  17. mcp_vector_search/cli/commands/uninstall.py +404 -0
  18. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  19. mcp_vector_search/cli/commands/visualize/cli.py +265 -0
  20. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  21. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  22. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  23. mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
  24. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  25. mcp_vector_search/cli/commands/visualize/server.py +201 -0
  26. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  27. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  28. mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
  29. mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
  30. mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
  31. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  32. mcp_vector_search/cli/commands/watch.py +287 -0
  33. mcp_vector_search/cli/didyoumean.py +520 -0
  34. mcp_vector_search/cli/export.py +320 -0
  35. mcp_vector_search/cli/history.py +295 -0
  36. mcp_vector_search/cli/interactive.py +342 -0
  37. mcp_vector_search/cli/main.py +484 -0
  38. mcp_vector_search/cli/output.py +414 -0
  39. mcp_vector_search/cli/suggestions.py +375 -0
  40. mcp_vector_search/config/__init__.py +1 -0
  41. mcp_vector_search/config/constants.py +24 -0
  42. mcp_vector_search/config/defaults.py +200 -0
  43. mcp_vector_search/config/settings.py +146 -0
  44. mcp_vector_search/core/__init__.py +1 -0
  45. mcp_vector_search/core/auto_indexer.py +298 -0
  46. mcp_vector_search/core/config_utils.py +394 -0
  47. mcp_vector_search/core/connection_pool.py +360 -0
  48. mcp_vector_search/core/database.py +1237 -0
  49. mcp_vector_search/core/directory_index.py +318 -0
  50. mcp_vector_search/core/embeddings.py +294 -0
  51. mcp_vector_search/core/exceptions.py +89 -0
  52. mcp_vector_search/core/factory.py +318 -0
  53. mcp_vector_search/core/git_hooks.py +345 -0
  54. mcp_vector_search/core/indexer.py +1002 -0
  55. mcp_vector_search/core/llm_client.py +453 -0
  56. mcp_vector_search/core/models.py +294 -0
  57. mcp_vector_search/core/project.py +350 -0
  58. mcp_vector_search/core/scheduler.py +330 -0
  59. mcp_vector_search/core/search.py +952 -0
  60. mcp_vector_search/core/watcher.py +322 -0
  61. mcp_vector_search/mcp/__init__.py +5 -0
  62. mcp_vector_search/mcp/__main__.py +25 -0
  63. mcp_vector_search/mcp/server.py +752 -0
  64. mcp_vector_search/parsers/__init__.py +8 -0
  65. mcp_vector_search/parsers/base.py +296 -0
  66. mcp_vector_search/parsers/dart.py +605 -0
  67. mcp_vector_search/parsers/html.py +413 -0
  68. mcp_vector_search/parsers/javascript.py +643 -0
  69. mcp_vector_search/parsers/php.py +694 -0
  70. mcp_vector_search/parsers/python.py +502 -0
  71. mcp_vector_search/parsers/registry.py +223 -0
  72. mcp_vector_search/parsers/ruby.py +678 -0
  73. mcp_vector_search/parsers/text.py +186 -0
  74. mcp_vector_search/parsers/utils.py +265 -0
  75. mcp_vector_search/py.typed +1 -0
  76. mcp_vector_search/utils/__init__.py +42 -0
  77. mcp_vector_search/utils/gitignore.py +250 -0
  78. mcp_vector_search/utils/gitignore_updater.py +212 -0
  79. mcp_vector_search/utils/monorepo.py +339 -0
  80. mcp_vector_search/utils/timing.py +338 -0
  81. mcp_vector_search/utils/version.py +47 -0
  82. mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
  83. mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
  84. mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
  85. mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
  86. mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,453 @@
1
+ """LLM client for intelligent code search using OpenAI or OpenRouter API."""
2
+
3
+ import os
4
+ import re
5
+ from typing import Any, Literal
6
+
7
+ import httpx
8
+ from loguru import logger
9
+
10
+ from .exceptions import SearchError
11
+
12
+ # Type alias for provider
13
+ LLMProvider = Literal["openai", "openrouter"]
14
+
15
+
16
+ class LLMClient:
17
+ """Client for LLM-powered intelligent search orchestration.
18
+
19
+ Supports both OpenAI and OpenRouter APIs:
20
+ 1. Generate multiple targeted search queries from natural language
21
+ 2. Analyze search results and select most relevant ones
22
+ 3. Provide contextual explanations for results
23
+
24
+ Provider Selection Priority:
25
+ 1. Explicit provider parameter
26
+ 2. Preferred provider from config
27
+ 3. Auto-detect: OpenAI if available, otherwise OpenRouter
28
+ """
29
+
30
+ # Default models for each provider (comparable performance/cost)
31
+ DEFAULT_MODELS = {
32
+ "openai": "gpt-4o-mini", # Fast, cheap, comparable to claude-3-haiku
33
+ "openrouter": "anthropic/claude-3-haiku",
34
+ }
35
+
36
+ # API endpoints
37
+ API_ENDPOINTS = {
38
+ "openai": "https://api.openai.com/v1/chat/completions",
39
+ "openrouter": "https://openrouter.ai/api/v1/chat/completions",
40
+ }
41
+
42
+ TIMEOUT_SECONDS = 30.0
43
+
44
+ def __init__(
45
+ self,
46
+ api_key: str | None = None,
47
+ model: str | None = None,
48
+ timeout: float = TIMEOUT_SECONDS,
49
+ provider: LLMProvider | None = None,
50
+ openai_api_key: str | None = None,
51
+ openrouter_api_key: str | None = None,
52
+ ) -> None:
53
+ """Initialize LLM client.
54
+
55
+ Args:
56
+ api_key: API key (deprecated, use provider-specific keys)
57
+ model: Model to use (defaults based on provider)
58
+ timeout: Request timeout in seconds
59
+ provider: Explicit provider ('openai' or 'openrouter')
60
+ openai_api_key: OpenAI API key (or use OPENAI_API_KEY env var)
61
+ openrouter_api_key: OpenRouter API key (or use OPENROUTER_API_KEY env var)
62
+
63
+ Raises:
64
+ ValueError: If no API key is found for any provider
65
+ """
66
+ # Get API keys from environment or parameters
67
+ self.openai_key = openai_api_key or os.environ.get("OPENAI_API_KEY")
68
+ self.openrouter_key = openrouter_api_key or os.environ.get("OPENROUTER_API_KEY")
69
+
70
+ # Support deprecated api_key parameter (assume OpenRouter for backward compatibility)
71
+ if api_key and not self.openrouter_key:
72
+ self.openrouter_key = api_key
73
+
74
+ # Determine which provider to use
75
+ if provider:
76
+ # Explicit provider specified
77
+ self.provider: LLMProvider = provider
78
+ if provider == "openai" and not self.openai_key:
79
+ raise ValueError(
80
+ "OpenAI provider specified but OPENAI_API_KEY not found. "
81
+ "Please set OPENAI_API_KEY environment variable."
82
+ )
83
+ elif provider == "openrouter" and not self.openrouter_key:
84
+ raise ValueError(
85
+ "OpenRouter provider specified but OPENROUTER_API_KEY not found. "
86
+ "Please set OPENROUTER_API_KEY environment variable."
87
+ )
88
+ else:
89
+ # Auto-detect provider (prefer OpenAI if both are available)
90
+ if self.openai_key:
91
+ self.provider = "openai"
92
+ elif self.openrouter_key:
93
+ self.provider = "openrouter"
94
+ else:
95
+ raise ValueError(
96
+ "No API key found. Please set OPENAI_API_KEY or OPENROUTER_API_KEY "
97
+ "environment variable, or pass openai_api_key or openrouter_api_key parameter."
98
+ )
99
+
100
+ # Set API key and endpoint based on provider
101
+ if self.provider == "openai":
102
+ self.api_key = self.openai_key
103
+ self.api_endpoint = self.API_ENDPOINTS["openai"]
104
+ self.model = model or os.environ.get(
105
+ "OPENAI_MODEL", self.DEFAULT_MODELS["openai"]
106
+ )
107
+ else:
108
+ self.api_key = self.openrouter_key
109
+ self.api_endpoint = self.API_ENDPOINTS["openrouter"]
110
+ self.model = model or os.environ.get(
111
+ "OPENROUTER_MODEL", self.DEFAULT_MODELS["openrouter"]
112
+ )
113
+
114
+ self.timeout = timeout
115
+
116
+ logger.debug(
117
+ f"Initialized LLM client with provider: {self.provider}, model: {self.model}"
118
+ )
119
+
120
+ async def generate_search_queries(
121
+ self, natural_language_query: str, limit: int = 3
122
+ ) -> list[str]:
123
+ """Generate targeted search queries from natural language.
124
+
125
+ Args:
126
+ natural_language_query: User's natural language query
127
+ limit: Maximum number of search queries to generate
128
+
129
+ Returns:
130
+ List of targeted search queries
131
+
132
+ Raises:
133
+ SearchError: If API call fails
134
+ """
135
+ system_prompt = """You are a code search expert. Your task is to convert natural language questions about code into targeted search queries.
136
+
137
+ Given a natural language query, generate {limit} specific search queries that will help find the relevant code.
138
+
139
+ Rules:
140
+ 1. Each query should target a different aspect of the question
141
+ 2. Use technical terms and identifiers when possible
142
+ 3. Keep queries concise (3-7 words each)
143
+ 4. Focus on code patterns, function names, class names, or concepts
144
+ 5. Return ONLY the search queries, one per line, no explanations
145
+
146
+ Example:
147
+ Input: "where is the similarity_threshold parameter set?"
148
+ Output:
149
+ similarity_threshold default value
150
+ similarity_threshold configuration
151
+ SemanticSearchEngine init threshold"""
152
+
153
+ user_prompt = f"""Natural language query: {natural_language_query}
154
+
155
+ Generate {limit} targeted search queries:"""
156
+
157
+ try:
158
+ messages = [
159
+ {"role": "system", "content": system_prompt.format(limit=limit)},
160
+ {"role": "user", "content": user_prompt},
161
+ ]
162
+
163
+ response = await self._chat_completion(messages)
164
+
165
+ # Parse queries from response
166
+ content = (
167
+ response.get("choices", [{}])[0].get("message", {}).get("content", "")
168
+ )
169
+ queries = [q.strip() for q in content.strip().split("\n") if q.strip()]
170
+
171
+ logger.debug(
172
+ f"Generated {len(queries)} search queries from: '{natural_language_query}'"
173
+ )
174
+
175
+ return queries[:limit]
176
+
177
+ except Exception as e:
178
+ logger.error(f"Failed to generate search queries: {e}")
179
+ raise SearchError(f"LLM query generation failed: {e}") from e
180
+
181
+ async def analyze_and_rank_results(
182
+ self,
183
+ original_query: str,
184
+ search_results: dict[str, list[Any]],
185
+ top_n: int = 5,
186
+ ) -> list[dict[str, Any]]:
187
+ """Analyze search results and select the most relevant ones.
188
+
189
+ Args:
190
+ original_query: Original natural language query
191
+ search_results: Dictionary mapping search queries to their results
192
+ top_n: Number of top results to return
193
+
194
+ Returns:
195
+ List of ranked results with explanations
196
+
197
+ Raises:
198
+ SearchError: If API call fails
199
+ """
200
+ # Format results for LLM analysis
201
+ results_summary = self._format_results_for_analysis(search_results)
202
+
203
+ system_prompt = """You are a code search expert. Your task is to analyze search results and identify the most relevant ones for answering a user's question.
204
+
205
+ Given:
206
+ 1. A natural language query
207
+ 2. Multiple search results from different queries
208
+
209
+ Select the top {top_n} most relevant results that best answer the user's question.
210
+
211
+ For each selected result, provide:
212
+ 1. Result identifier (e.g., "Query 1, Result 2")
213
+ 2. Relevance level: "High", "Medium", or "Low"
214
+ 3. Brief explanation (1-2 sentences) of why this result is relevant
215
+
216
+ Format your response as:
217
+ RESULT: [identifier]
218
+ RELEVANCE: [level]
219
+ EXPLANATION: [why this matches]
220
+
221
+ ---
222
+
223
+ Only include the top {top_n} results."""
224
+
225
+ user_prompt = f"""Original Question: {original_query}
226
+
227
+ Search Results:
228
+ {results_summary}
229
+
230
+ Select the top {top_n} most relevant results:"""
231
+
232
+ try:
233
+ messages = [
234
+ {"role": "system", "content": system_prompt.format(top_n=top_n)},
235
+ {"role": "user", "content": user_prompt},
236
+ ]
237
+
238
+ response = await self._chat_completion(messages)
239
+
240
+ # Parse LLM response
241
+ content = (
242
+ response.get("choices", [{}])[0].get("message", {}).get("content", "")
243
+ )
244
+
245
+ ranked_results = self._parse_ranking_response(
246
+ content, search_results, top_n
247
+ )
248
+
249
+ logger.debug(f"Ranked {len(ranked_results)} results from LLM analysis")
250
+
251
+ return ranked_results
252
+
253
+ except Exception as e:
254
+ logger.error(f"Failed to analyze results: {e}")
255
+ raise SearchError(f"LLM analysis failed: {e}") from e
256
+
257
+ async def _chat_completion(self, messages: list[dict[str, str]]) -> dict[str, Any]:
258
+ """Make chat completion request to OpenAI or OpenRouter API.
259
+
260
+ Args:
261
+ messages: List of message dictionaries with role and content
262
+
263
+ Returns:
264
+ API response dictionary
265
+
266
+ Raises:
267
+ SearchError: If API request fails
268
+ """
269
+ # Build headers based on provider
270
+ headers = {
271
+ "Authorization": f"Bearer {self.api_key}",
272
+ "Content-Type": "application/json",
273
+ }
274
+
275
+ # OpenRouter-specific headers
276
+ if self.provider == "openrouter":
277
+ headers["HTTP-Referer"] = "https://github.com/bobmatnyc/mcp-vector-search"
278
+ headers["X-Title"] = "MCP Vector Search"
279
+
280
+ payload = {
281
+ "model": self.model,
282
+ "messages": messages,
283
+ }
284
+
285
+ provider_name = self.provider.capitalize()
286
+
287
+ try:
288
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
289
+ response = await client.post(
290
+ self.api_endpoint,
291
+ headers=headers,
292
+ json=payload,
293
+ )
294
+
295
+ response.raise_for_status()
296
+ return response.json()
297
+
298
+ except httpx.TimeoutException as e:
299
+ logger.error(f"{provider_name} API timeout after {self.timeout}s")
300
+ raise SearchError(
301
+ f"LLM request timed out after {self.timeout} seconds. "
302
+ "Try a simpler query or check your network connection."
303
+ ) from e
304
+
305
+ except httpx.HTTPStatusError as e:
306
+ status_code = e.response.status_code
307
+ error_msg = f"{provider_name} API error (HTTP {status_code})"
308
+
309
+ if status_code == 401:
310
+ env_var = (
311
+ "OPENAI_API_KEY"
312
+ if self.provider == "openai"
313
+ else "OPENROUTER_API_KEY"
314
+ )
315
+ error_msg = f"Invalid {provider_name} API key. Please check {env_var} environment variable."
316
+ elif status_code == 429:
317
+ error_msg = f"{provider_name} API rate limit exceeded. Please wait and try again."
318
+ elif status_code >= 500:
319
+ error_msg = f"{provider_name} API server error. Please try again later."
320
+
321
+ logger.error(error_msg)
322
+ raise SearchError(error_msg) from e
323
+
324
+ except Exception as e:
325
+ logger.error(f"{provider_name} API request failed: {e}")
326
+ raise SearchError(f"LLM request failed: {e}") from e
327
+
328
+ def _format_results_for_analysis(self, search_results: dict[str, list[Any]]) -> str:
329
+ """Format search results for LLM analysis.
330
+
331
+ Args:
332
+ search_results: Dictionary mapping search queries to their results
333
+
334
+ Returns:
335
+ Formatted string representation of results
336
+ """
337
+ formatted = []
338
+
339
+ for i, (query, results) in enumerate(search_results.items(), 1):
340
+ formatted.append(f"\n=== Query {i}: {query} ===")
341
+
342
+ if not results:
343
+ formatted.append(" No results found.")
344
+ continue
345
+
346
+ for j, result in enumerate(results[:5], 1): # Top 5 per query
347
+ # Extract key information from SearchResult
348
+ file_path = str(result.file_path)
349
+ similarity = result.similarity_score
350
+ content_preview = result.content[:150].replace("\n", " ")
351
+
352
+ formatted.append(
353
+ f"\n Result {j}:\n"
354
+ f" File: {file_path}\n"
355
+ f" Similarity: {similarity:.3f}\n"
356
+ f" Preview: {content_preview}..."
357
+ )
358
+
359
+ if result.function_name:
360
+ formatted.append(f" Function: {result.function_name}")
361
+ if result.class_name:
362
+ formatted.append(f" Class: {result.class_name}")
363
+
364
+ return "\n".join(formatted)
365
+
366
+ def _parse_ranking_response(
367
+ self,
368
+ llm_response: str,
369
+ search_results: dict[str, list[Any]],
370
+ top_n: int,
371
+ ) -> list[dict[str, Any]]:
372
+ """Parse LLM ranking response into structured results.
373
+
374
+ Args:
375
+ llm_response: Raw LLM response text
376
+ search_results: Original search results dictionary
377
+ top_n: Maximum number of results to return
378
+
379
+ Returns:
380
+ List of ranked results with metadata
381
+ """
382
+ ranked = []
383
+ current_result = {}
384
+
385
+ for line in llm_response.split("\n"):
386
+ line = line.strip()
387
+
388
+ if line.startswith("RESULT:"):
389
+ if current_result:
390
+ ranked.append(current_result)
391
+ current_result = {"identifier": line.replace("RESULT:", "").strip()}
392
+
393
+ elif line.startswith("RELEVANCE:"):
394
+ current_result["relevance"] = line.replace("RELEVANCE:", "").strip()
395
+
396
+ elif line.startswith("EXPLANATION:"):
397
+ current_result["explanation"] = line.replace("EXPLANATION:", "").strip()
398
+
399
+ # Add last result
400
+ if current_result:
401
+ ranked.append(current_result)
402
+
403
+ # Map identifiers back to actual SearchResult objects
404
+ enriched_results = []
405
+
406
+ for item in ranked[:top_n]:
407
+ identifier = item.get("identifier", "")
408
+
409
+ # Parse identifier (e.g., "Query 1, Result 2" or "Query 1, Result 2 (filename.py)")
410
+ try:
411
+ parts = identifier.split(",")
412
+ query_part = parts[0].replace("Query", "").strip()
413
+ result_part = parts[1].replace("Result", "").strip()
414
+
415
+ # Handle case where LLM includes filename in parentheses: "5 (config.py)"
416
+ # Extract just the number
417
+ query_match = re.match(r"(\d+)", query_part)
418
+ result_match = re.match(r"(\d+)", result_part)
419
+
420
+ if not query_match or not result_match:
421
+ logger.warning(
422
+ f"Could not extract numbers from identifier '{identifier}'"
423
+ )
424
+ continue
425
+
426
+ query_idx = int(query_match.group(1)) - 1
427
+ result_idx = int(result_match.group(1)) - 1
428
+
429
+ # Get corresponding query and result
430
+ queries = list(search_results.keys())
431
+ if query_idx < len(queries):
432
+ query = queries[query_idx]
433
+ results = search_results[query]
434
+
435
+ if result_idx < len(results):
436
+ actual_result = results[result_idx]
437
+
438
+ enriched_results.append(
439
+ {
440
+ "result": actual_result,
441
+ "query": query,
442
+ "relevance": item.get("relevance", "Medium"),
443
+ "explanation": item.get(
444
+ "explanation", "Relevant to query"
445
+ ),
446
+ }
447
+ )
448
+
449
+ except (ValueError, IndexError) as e:
450
+ logger.warning(f"Failed to parse result identifier '{identifier}': {e}")
451
+ continue
452
+
453
+ return enriched_results