mcp-vector-search 0.12.6__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. mcp_vector_search/__init__.py +2 -2
  2. mcp_vector_search/analysis/__init__.py +64 -0
  3. mcp_vector_search/analysis/collectors/__init__.py +39 -0
  4. mcp_vector_search/analysis/collectors/base.py +164 -0
  5. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  6. mcp_vector_search/analysis/metrics.py +341 -0
  7. mcp_vector_search/analysis/reporters/__init__.py +5 -0
  8. mcp_vector_search/analysis/reporters/console.py +222 -0
  9. mcp_vector_search/cli/commands/analyze.py +408 -0
  10. mcp_vector_search/cli/commands/chat.py +1262 -0
  11. mcp_vector_search/cli/commands/index.py +21 -3
  12. mcp_vector_search/cli/commands/init.py +13 -0
  13. mcp_vector_search/cli/commands/install.py +597 -335
  14. mcp_vector_search/cli/commands/install_old.py +8 -4
  15. mcp_vector_search/cli/commands/mcp.py +78 -6
  16. mcp_vector_search/cli/commands/reset.py +68 -26
  17. mcp_vector_search/cli/commands/search.py +30 -7
  18. mcp_vector_search/cli/commands/setup.py +1133 -0
  19. mcp_vector_search/cli/commands/status.py +37 -2
  20. mcp_vector_search/cli/commands/uninstall.py +276 -357
  21. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  22. mcp_vector_search/cli/commands/visualize/cli.py +276 -0
  23. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  24. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  25. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
  26. mcp_vector_search/cli/commands/visualize/graph_builder.py +714 -0
  27. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  28. mcp_vector_search/cli/commands/visualize/server.py +311 -0
  29. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  30. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  31. mcp_vector_search/cli/commands/visualize/templates/base.py +180 -0
  32. mcp_vector_search/cli/commands/visualize/templates/scripts.py +2507 -0
  33. mcp_vector_search/cli/commands/visualize/templates/styles.py +1313 -0
  34. mcp_vector_search/cli/commands/visualize.py.original +2536 -0
  35. mcp_vector_search/cli/didyoumean.py +22 -2
  36. mcp_vector_search/cli/main.py +115 -159
  37. mcp_vector_search/cli/output.py +24 -8
  38. mcp_vector_search/config/__init__.py +4 -0
  39. mcp_vector_search/config/default_thresholds.yaml +52 -0
  40. mcp_vector_search/config/settings.py +12 -0
  41. mcp_vector_search/config/thresholds.py +185 -0
  42. mcp_vector_search/core/auto_indexer.py +3 -3
  43. mcp_vector_search/core/boilerplate.py +186 -0
  44. mcp_vector_search/core/config_utils.py +394 -0
  45. mcp_vector_search/core/database.py +369 -94
  46. mcp_vector_search/core/exceptions.py +11 -0
  47. mcp_vector_search/core/git_hooks.py +4 -4
  48. mcp_vector_search/core/indexer.py +221 -4
  49. mcp_vector_search/core/llm_client.py +751 -0
  50. mcp_vector_search/core/models.py +3 -0
  51. mcp_vector_search/core/project.py +17 -0
  52. mcp_vector_search/core/scheduler.py +11 -11
  53. mcp_vector_search/core/search.py +179 -29
  54. mcp_vector_search/mcp/server.py +24 -5
  55. mcp_vector_search/utils/__init__.py +2 -0
  56. mcp_vector_search/utils/gitignore_updater.py +212 -0
  57. mcp_vector_search/utils/monorepo.py +66 -4
  58. mcp_vector_search/utils/timing.py +10 -6
  59. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/METADATA +182 -52
  60. mcp_vector_search-1.0.3.dist-info/RECORD +97 -0
  61. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/WHEEL +1 -1
  62. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/entry_points.txt +1 -0
  63. mcp_vector_search/cli/commands/visualize.py +0 -1467
  64. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  65. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.0.3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,751 @@
1
+ """LLM client for intelligent code search using OpenAI or OpenRouter API."""
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ from collections.abc import AsyncIterator
7
+ from typing import Any, Literal
8
+
9
+ import httpx
10
+ from loguru import logger
11
+
12
+ from .exceptions import SearchError
13
+
14
+ # Type alias for provider
15
+ LLMProvider = Literal["openai", "openrouter"]
16
+
17
+ # Type alias for intent
18
+ IntentType = Literal["find", "answer"]
19
+
20
+
21
+ class LLMClient:
22
+ """Client for LLM-powered intelligent search orchestration.
23
+
24
+ Supports both OpenAI and OpenRouter APIs:
25
+ 1. Generate multiple targeted search queries from natural language
26
+ 2. Analyze search results and select most relevant ones
27
+ 3. Provide contextual explanations for results
28
+
29
+ Provider Selection Priority:
30
+ 1. Explicit provider parameter
31
+ 2. Preferred provider from config
32
+ 3. Auto-detect: OpenAI if available, otherwise OpenRouter
33
+ """
34
+
35
+ # Default models for each provider (comparable performance/cost)
36
+ DEFAULT_MODELS = {
37
+ "openai": "gpt-4o-mini", # Fast, cheap, comparable to claude-3-haiku
38
+ "openrouter": "anthropic/claude-3-haiku",
39
+ }
40
+
41
+ # Advanced "thinking" models for complex queries (--think flag)
42
+ THINKING_MODELS = {
43
+ "openai": "gpt-4o", # More capable, better reasoning
44
+ "openrouter": "anthropic/claude-sonnet-4", # Claude Sonnet 4 for deep analysis
45
+ }
46
+
47
+ # API endpoints
48
+ API_ENDPOINTS = {
49
+ "openai": "https://api.openai.com/v1/chat/completions",
50
+ "openrouter": "https://openrouter.ai/api/v1/chat/completions",
51
+ }
52
+
53
+ TIMEOUT_SECONDS = 30.0
54
+
55
+ def __init__(
56
+ self,
57
+ api_key: str | None = None,
58
+ model: str | None = None,
59
+ timeout: float = TIMEOUT_SECONDS,
60
+ provider: LLMProvider | None = None,
61
+ openai_api_key: str | None = None,
62
+ openrouter_api_key: str | None = None,
63
+ think: bool = False,
64
+ ) -> None:
65
+ """Initialize LLM client.
66
+
67
+ Args:
68
+ api_key: API key (deprecated, use provider-specific keys)
69
+ model: Model to use (defaults based on provider)
70
+ timeout: Request timeout in seconds
71
+ provider: Explicit provider ('openai' or 'openrouter')
72
+ openai_api_key: OpenAI API key (or use OPENAI_API_KEY env var)
73
+ openrouter_api_key: OpenRouter API key (or use OPENROUTER_API_KEY env var)
74
+ think: Use advanced "thinking" model for complex queries
75
+
76
+ Raises:
77
+ ValueError: If no API key is found for any provider
78
+ """
79
+ self.think = think
80
+ # Get API keys from environment or parameters
81
+ self.openai_key = openai_api_key or os.environ.get("OPENAI_API_KEY")
82
+ self.openrouter_key = openrouter_api_key or os.environ.get("OPENROUTER_API_KEY")
83
+
84
+ # Support deprecated api_key parameter (assume OpenRouter for backward compatibility)
85
+ if api_key and not self.openrouter_key:
86
+ self.openrouter_key = api_key
87
+
88
+ # Determine which provider to use
89
+ if provider:
90
+ # Explicit provider specified
91
+ self.provider: LLMProvider = provider
92
+ if provider == "openai" and not self.openai_key:
93
+ raise ValueError(
94
+ "OpenAI provider specified but OPENAI_API_KEY not found. "
95
+ "Please set OPENAI_API_KEY environment variable."
96
+ )
97
+ elif provider == "openrouter" and not self.openrouter_key:
98
+ raise ValueError(
99
+ "OpenRouter provider specified but OPENROUTER_API_KEY not found. "
100
+ "Please set OPENROUTER_API_KEY environment variable."
101
+ )
102
+ else:
103
+ # Auto-detect provider (prefer OpenAI if both are available)
104
+ if self.openai_key:
105
+ self.provider = "openai"
106
+ elif self.openrouter_key:
107
+ self.provider = "openrouter"
108
+ else:
109
+ raise ValueError(
110
+ "No API key found. Please set OPENAI_API_KEY or OPENROUTER_API_KEY "
111
+ "environment variable, or pass openai_api_key or openrouter_api_key parameter."
112
+ )
113
+
114
+ # Set API key and endpoint based on provider
115
+ # Select model: explicit > env var > thinking model > default model
116
+ if self.provider == "openai":
117
+ self.api_key = self.openai_key
118
+ self.api_endpoint = self.API_ENDPOINTS["openai"]
119
+ default_model = (
120
+ self.THINKING_MODELS["openai"]
121
+ if think
122
+ else self.DEFAULT_MODELS["openai"]
123
+ )
124
+ self.model = model or os.environ.get("OPENAI_MODEL", default_model)
125
+ else:
126
+ self.api_key = self.openrouter_key
127
+ self.api_endpoint = self.API_ENDPOINTS["openrouter"]
128
+ default_model = (
129
+ self.THINKING_MODELS["openrouter"]
130
+ if think
131
+ else self.DEFAULT_MODELS["openrouter"]
132
+ )
133
+ self.model = model or os.environ.get("OPENROUTER_MODEL", default_model)
134
+
135
+ self.timeout = timeout
136
+
137
+ logger.debug(
138
+ f"Initialized LLM client with provider: {self.provider}, model: {self.model}"
139
+ )
140
+
141
+ async def generate_search_queries(
142
+ self, natural_language_query: str, limit: int = 3
143
+ ) -> list[str]:
144
+ """Generate targeted search queries from natural language.
145
+
146
+ Args:
147
+ natural_language_query: User's natural language query
148
+ limit: Maximum number of search queries to generate
149
+
150
+ Returns:
151
+ List of targeted search queries
152
+
153
+ Raises:
154
+ SearchError: If API call fails
155
+ """
156
+ system_prompt = """You are a code search expert. Your task is to convert natural language questions about code into targeted search queries.
157
+
158
+ Given a natural language query, generate {limit} specific search queries that will help find the relevant code.
159
+
160
+ Rules:
161
+ 1. Each query should target a different aspect of the question
162
+ 2. Use technical terms and identifiers when possible
163
+ 3. Keep queries concise (3-7 words each)
164
+ 4. Focus on code patterns, function names, class names, or concepts
165
+ 5. Return ONLY the search queries, one per line, no explanations
166
+
167
+ Example:
168
+ Input: "where is the similarity_threshold parameter set?"
169
+ Output:
170
+ similarity_threshold default value
171
+ similarity_threshold configuration
172
+ SemanticSearchEngine init threshold"""
173
+
174
+ user_prompt = f"""Natural language query: {natural_language_query}
175
+
176
+ Generate {limit} targeted search queries:"""
177
+
178
+ try:
179
+ messages = [
180
+ {"role": "system", "content": system_prompt.format(limit=limit)},
181
+ {"role": "user", "content": user_prompt},
182
+ ]
183
+
184
+ response = await self._chat_completion(messages)
185
+
186
+ # Parse queries from response
187
+ content = (
188
+ response.get("choices", [{}])[0].get("message", {}).get("content", "")
189
+ )
190
+ queries = [q.strip() for q in content.strip().split("\n") if q.strip()]
191
+
192
+ logger.debug(
193
+ f"Generated {len(queries)} search queries from: '{natural_language_query}'"
194
+ )
195
+
196
+ return queries[:limit]
197
+
198
+ except Exception as e:
199
+ logger.error(f"Failed to generate search queries: {e}")
200
+ raise SearchError(f"LLM query generation failed: {e}") from e
201
+
202
+ async def analyze_and_rank_results(
203
+ self,
204
+ original_query: str,
205
+ search_results: dict[str, list[Any]],
206
+ top_n: int = 5,
207
+ ) -> list[dict[str, Any]]:
208
+ """Analyze search results and select the most relevant ones.
209
+
210
+ Args:
211
+ original_query: Original natural language query
212
+ search_results: Dictionary mapping search queries to their results
213
+ top_n: Number of top results to return
214
+
215
+ Returns:
216
+ List of ranked results with explanations
217
+
218
+ Raises:
219
+ SearchError: If API call fails
220
+ """
221
+ # Format results for LLM analysis
222
+ results_summary = self._format_results_for_analysis(search_results)
223
+
224
+ system_prompt = """You are a code search expert. Your task is to analyze search results and identify the most relevant ones for answering a user's question.
225
+
226
+ Given:
227
+ 1. A natural language query
228
+ 2. Multiple search results from different queries
229
+
230
+ Select the top {top_n} most relevant results that best answer the user's question.
231
+
232
+ For each selected result, provide:
233
+ 1. Result identifier (e.g., "Query 1, Result 2")
234
+ 2. Relevance level: "High", "Medium", or "Low"
235
+ 3. Brief explanation (1-2 sentences) of why this result is relevant
236
+
237
+ Format your response as:
238
+ RESULT: [identifier]
239
+ RELEVANCE: [level]
240
+ EXPLANATION: [why this matches]
241
+
242
+ ---
243
+
244
+ Only include the top {top_n} results."""
245
+
246
+ user_prompt = f"""Original Question: {original_query}
247
+
248
+ Search Results:
249
+ {results_summary}
250
+
251
+ Select the top {top_n} most relevant results:"""
252
+
253
+ try:
254
+ messages = [
255
+ {"role": "system", "content": system_prompt.format(top_n=top_n)},
256
+ {"role": "user", "content": user_prompt},
257
+ ]
258
+
259
+ response = await self._chat_completion(messages)
260
+
261
+ # Parse LLM response
262
+ content = (
263
+ response.get("choices", [{}])[0].get("message", {}).get("content", "")
264
+ )
265
+
266
+ ranked_results = self._parse_ranking_response(
267
+ content, search_results, top_n
268
+ )
269
+
270
+ logger.debug(f"Ranked {len(ranked_results)} results from LLM analysis")
271
+
272
+ return ranked_results
273
+
274
+ except Exception as e:
275
+ logger.error(f"Failed to analyze results: {e}")
276
+ raise SearchError(f"LLM analysis failed: {e}") from e
277
+
278
+ async def _chat_completion(self, messages: list[dict[str, str]]) -> dict[str, Any]:
279
+ """Make chat completion request to OpenAI or OpenRouter API.
280
+
281
+ Args:
282
+ messages: List of message dictionaries with role and content
283
+
284
+ Returns:
285
+ API response dictionary
286
+
287
+ Raises:
288
+ SearchError: If API request fails
289
+ """
290
+ # Build headers based on provider
291
+ headers = {
292
+ "Authorization": f"Bearer {self.api_key}",
293
+ "Content-Type": "application/json",
294
+ }
295
+
296
+ # OpenRouter-specific headers
297
+ if self.provider == "openrouter":
298
+ headers["HTTP-Referer"] = "https://github.com/bobmatnyc/mcp-vector-search"
299
+ headers["X-Title"] = "MCP Vector Search"
300
+
301
+ payload = {
302
+ "model": self.model,
303
+ "messages": messages,
304
+ }
305
+
306
+ provider_name = self.provider.capitalize()
307
+
308
+ try:
309
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
310
+ response = await client.post(
311
+ self.api_endpoint,
312
+ headers=headers,
313
+ json=payload,
314
+ )
315
+
316
+ response.raise_for_status()
317
+ return response.json()
318
+
319
+ except httpx.TimeoutException as e:
320
+ logger.error(f"{provider_name} API timeout after {self.timeout}s")
321
+ raise SearchError(
322
+ f"LLM request timed out after {self.timeout} seconds. "
323
+ "Try a simpler query or check your network connection."
324
+ ) from e
325
+
326
+ except httpx.HTTPStatusError as e:
327
+ status_code = e.response.status_code
328
+ error_msg = f"{provider_name} API error (HTTP {status_code})"
329
+
330
+ if status_code == 401:
331
+ env_var = (
332
+ "OPENAI_API_KEY"
333
+ if self.provider == "openai"
334
+ else "OPENROUTER_API_KEY"
335
+ )
336
+ error_msg = f"Invalid {provider_name} API key. Please check {env_var} environment variable."
337
+ elif status_code == 429:
338
+ error_msg = f"{provider_name} API rate limit exceeded. Please wait and try again."
339
+ elif status_code >= 500:
340
+ error_msg = f"{provider_name} API server error. Please try again later."
341
+
342
+ logger.error(error_msg)
343
+ raise SearchError(error_msg) from e
344
+
345
+ except Exception as e:
346
+ logger.error(f"{provider_name} API request failed: {e}")
347
+ raise SearchError(f"LLM request failed: {e}") from e
348
+
349
+ def _format_results_for_analysis(self, search_results: dict[str, list[Any]]) -> str:
350
+ """Format search results for LLM analysis.
351
+
352
+ Args:
353
+ search_results: Dictionary mapping search queries to their results
354
+
355
+ Returns:
356
+ Formatted string representation of results
357
+ """
358
+ formatted = []
359
+
360
+ for i, (query, results) in enumerate(search_results.items(), 1):
361
+ formatted.append(f"\n=== Query {i}: {query} ===")
362
+
363
+ if not results:
364
+ formatted.append(" No results found.")
365
+ continue
366
+
367
+ for j, result in enumerate(results[:5], 1): # Top 5 per query
368
+ # Extract key information from SearchResult
369
+ file_path = str(result.file_path)
370
+ similarity = result.similarity_score
371
+ content_preview = result.content[:150].replace("\n", " ")
372
+
373
+ formatted.append(
374
+ f"\n Result {j}:\n"
375
+ f" File: {file_path}\n"
376
+ f" Similarity: {similarity:.3f}\n"
377
+ f" Preview: {content_preview}..."
378
+ )
379
+
380
+ if result.function_name:
381
+ formatted.append(f" Function: {result.function_name}")
382
+ if result.class_name:
383
+ formatted.append(f" Class: {result.class_name}")
384
+
385
+ return "\n".join(formatted)
386
+
387
+ def _parse_ranking_response(
388
+ self,
389
+ llm_response: str,
390
+ search_results: dict[str, list[Any]],
391
+ top_n: int,
392
+ ) -> list[dict[str, Any]]:
393
+ """Parse LLM ranking response into structured results.
394
+
395
+ Args:
396
+ llm_response: Raw LLM response text
397
+ search_results: Original search results dictionary
398
+ top_n: Maximum number of results to return
399
+
400
+ Returns:
401
+ List of ranked results with metadata
402
+ """
403
+ ranked = []
404
+ current_result = {}
405
+
406
+ for line in llm_response.split("\n"):
407
+ line = line.strip()
408
+
409
+ if line.startswith("RESULT:"):
410
+ if current_result:
411
+ ranked.append(current_result)
412
+ current_result = {"identifier": line.replace("RESULT:", "").strip()}
413
+
414
+ elif line.startswith("RELEVANCE:"):
415
+ current_result["relevance"] = line.replace("RELEVANCE:", "").strip()
416
+
417
+ elif line.startswith("EXPLANATION:"):
418
+ current_result["explanation"] = line.replace("EXPLANATION:", "").strip()
419
+
420
+ # Add last result
421
+ if current_result:
422
+ ranked.append(current_result)
423
+
424
+ # Map identifiers back to actual SearchResult objects
425
+ enriched_results = []
426
+
427
+ for item in ranked[:top_n]:
428
+ identifier = item.get("identifier", "")
429
+
430
+ # Parse identifier (e.g., "Query 1, Result 2" or "Query 1, Result 2 (filename.py)")
431
+ try:
432
+ parts = identifier.split(",")
433
+ query_part = parts[0].replace("Query", "").strip()
434
+ result_part = parts[1].replace("Result", "").strip()
435
+
436
+ # Handle case where LLM includes filename in parentheses: "5 (config.py)"
437
+ # Extract just the number
438
+ query_match = re.match(r"(\d+)", query_part)
439
+ result_match = re.match(r"(\d+)", result_part)
440
+
441
+ if not query_match or not result_match:
442
+ logger.warning(
443
+ f"Could not extract numbers from identifier '{identifier}'"
444
+ )
445
+ continue
446
+
447
+ query_idx = int(query_match.group(1)) - 1
448
+ result_idx = int(result_match.group(1)) - 1
449
+
450
+ # Get corresponding query and result
451
+ queries = list(search_results.keys())
452
+ if query_idx < len(queries):
453
+ query = queries[query_idx]
454
+ results = search_results[query]
455
+
456
+ if result_idx < len(results):
457
+ actual_result = results[result_idx]
458
+
459
+ enriched_results.append(
460
+ {
461
+ "result": actual_result,
462
+ "query": query,
463
+ "relevance": item.get("relevance", "Medium"),
464
+ "explanation": item.get(
465
+ "explanation", "Relevant to query"
466
+ ),
467
+ }
468
+ )
469
+
470
+ except (ValueError, IndexError) as e:
471
+ logger.warning(f"Failed to parse result identifier '{identifier}': {e}")
472
+ continue
473
+
474
+ return enriched_results
475
+
476
+ async def detect_intent(self, query: str) -> IntentType:
477
+ """Detect user intent from query.
478
+
479
+ Args:
480
+ query: User's natural language query
481
+
482
+ Returns:
483
+ Intent type: "find" or "answer"
484
+
485
+ Raises:
486
+ SearchError: If API call fails
487
+ """
488
+ system_prompt = """You are a code search intent classifier. Classify the user's query into ONE of these categories:
489
+
490
+ 1. "find" - User wants to locate/search for something in the codebase
491
+ Examples: "where is X", "find the function that", "show me the code for", "locate X"
492
+
493
+ 2. "answer" - User wants an explanation/answer about the codebase
494
+ Examples: "what does this do", "how does X work", "explain the architecture", "why is X used"
495
+
496
+ Return ONLY the word "find" or "answer" with no other text."""
497
+
498
+ user_prompt = f"""Query: {query}
499
+
500
+ Intent:"""
501
+
502
+ try:
503
+ messages = [
504
+ {"role": "system", "content": system_prompt},
505
+ {"role": "user", "content": user_prompt},
506
+ ]
507
+
508
+ response = await self._chat_completion(messages)
509
+
510
+ content = (
511
+ response.get("choices", [{}])[0].get("message", {}).get("content", "")
512
+ )
513
+ intent = content.strip().lower()
514
+
515
+ if intent not in ("find", "answer"):
516
+ # Default to find if unclear
517
+ logger.warning(
518
+ f"Unclear intent '{intent}' for query '{query}', defaulting to 'find'"
519
+ )
520
+ return "find"
521
+
522
+ logger.debug(f"Detected intent '{intent}' for query: '{query}'")
523
+ return intent # type: ignore
524
+
525
+ except Exception as e:
526
+ logger.error(f"Failed to detect intent: {e}, defaulting to 'find'")
527
+ return "find"
528
+
529
+ async def stream_chat_completion(
530
+ self, messages: list[dict[str, str]]
531
+ ) -> AsyncIterator[str]:
532
+ """Stream chat completion response chunk by chunk.
533
+
534
+ Args:
535
+ messages: List of message dictionaries with role and content
536
+
537
+ Yields:
538
+ Text chunks from the streaming response
539
+
540
+ Raises:
541
+ SearchError: If API request fails
542
+ """
543
+ headers = {
544
+ "Authorization": f"Bearer {self.api_key}",
545
+ "Content-Type": "application/json",
546
+ }
547
+
548
+ if self.provider == "openrouter":
549
+ headers["HTTP-Referer"] = "https://github.com/bobmatnyc/mcp-vector-search"
550
+ headers["X-Title"] = "MCP Vector Search"
551
+
552
+ payload = {
553
+ "model": self.model,
554
+ "messages": messages,
555
+ "stream": True,
556
+ }
557
+
558
+ provider_name = self.provider.capitalize()
559
+
560
+ try:
561
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
562
+ async with client.stream(
563
+ "POST", self.api_endpoint, headers=headers, json=payload
564
+ ) as response:
565
+ response.raise_for_status()
566
+
567
+ async for line in response.aiter_lines():
568
+ line = line.strip()
569
+
570
+ # Skip empty lines and comments
571
+ if not line or line.startswith(":"):
572
+ continue
573
+
574
+ # Parse SSE format: "data: {json}"
575
+ if line.startswith("data: "):
576
+ data = line[6:] # Remove "data: " prefix
577
+
578
+ # Check for end of stream
579
+ if data == "[DONE]":
580
+ break
581
+
582
+ try:
583
+ chunk = json.loads(data)
584
+ content = (
585
+ chunk.get("choices", [{}])[0]
586
+ .get("delta", {})
587
+ .get("content")
588
+ )
589
+
590
+ if content:
591
+ yield content
592
+
593
+ except json.JSONDecodeError as e:
594
+ logger.warning(f"Failed to parse SSE chunk: {e}")
595
+ continue
596
+
597
+ except httpx.TimeoutException as e:
598
+ logger.error(f"{provider_name} API timeout after {self.timeout}s")
599
+ raise SearchError(
600
+ f"LLM request timed out after {self.timeout} seconds. "
601
+ "Try a simpler query or check your network connection."
602
+ ) from e
603
+
604
+ except httpx.HTTPStatusError as e:
605
+ status_code = e.response.status_code
606
+ error_msg = f"{provider_name} API error (HTTP {status_code})"
607
+
608
+ if status_code == 401:
609
+ env_var = (
610
+ "OPENAI_API_KEY"
611
+ if self.provider == "openai"
612
+ else "OPENROUTER_API_KEY"
613
+ )
614
+ error_msg = f"Invalid {provider_name} API key. Please check {env_var} environment variable."
615
+ elif status_code == 429:
616
+ error_msg = f"{provider_name} API rate limit exceeded. Please wait and try again."
617
+ elif status_code >= 500:
618
+ error_msg = f"{provider_name} API server error. Please try again later."
619
+
620
+ logger.error(error_msg)
621
+ raise SearchError(error_msg) from e
622
+
623
+ except Exception as e:
624
+ logger.error(f"{provider_name} streaming request failed: {e}")
625
+ raise SearchError(f"LLM streaming failed: {e}") from e
626
+
627
+ async def generate_answer(
628
+ self,
629
+ query: str,
630
+ context: str,
631
+ conversation_history: list[dict[str, str]] | None = None,
632
+ ) -> str:
633
+ """Generate answer to user question using codebase context.
634
+
635
+ Args:
636
+ query: User's question
637
+ context: Relevant code context from search results
638
+ conversation_history: Previous conversation messages (optional)
639
+
640
+ Returns:
641
+ LLM response text
642
+
643
+ Raises:
644
+ SearchError: If API call fails
645
+ """
646
+ system_prompt = f"""You are a helpful code assistant analyzing a codebase. Answer the user's questions based on the provided code context.
647
+
648
+ Code Context:
649
+ {context}
650
+
651
+ Guidelines:
652
+ - Be concise but thorough in explanations
653
+ - Reference specific functions, classes, or files when relevant
654
+ - Use code examples from the context when helpful
655
+ - If the context doesn't contain enough information, say so
656
+ - Use markdown formatting for code snippets"""
657
+
658
+ messages = [{"role": "system", "content": system_prompt}]
659
+
660
+ # Add conversation history if provided
661
+ if conversation_history:
662
+ messages.extend(conversation_history)
663
+
664
+ # Add current query
665
+ messages.append({"role": "user", "content": query})
666
+
667
+ try:
668
+ response = await self._chat_completion(messages)
669
+ content = (
670
+ response.get("choices", [{}])[0].get("message", {}).get("content", "")
671
+ )
672
+
673
+ logger.debug(f"Generated answer for query: '{query}'")
674
+ return content
675
+
676
+ except Exception as e:
677
+ logger.error(f"Failed to generate answer: {e}")
678
+ raise SearchError(f"Failed to generate answer: {e}") from e
679
+
680
+ async def chat_with_tools(
681
+ self, messages: list[dict[str, Any]], tools: list[dict[str, Any]]
682
+ ) -> dict[str, Any]:
683
+ """Chat completion with tool/function calling support.
684
+
685
+ Args:
686
+ messages: List of message dictionaries
687
+ tools: List of tool definitions
688
+
689
+ Returns:
690
+ API response with tool calls or final message
691
+
692
+ Raises:
693
+ SearchError: If API request fails
694
+ """
695
+ headers = {
696
+ "Authorization": f"Bearer {self.api_key}",
697
+ "Content-Type": "application/json",
698
+ }
699
+
700
+ if self.provider == "openrouter":
701
+ headers["HTTP-Referer"] = "https://github.com/bobmatnyc/mcp-vector-search"
702
+ headers["X-Title"] = "MCP Vector Search"
703
+
704
+ payload = {
705
+ "model": self.model,
706
+ "messages": messages,
707
+ "tools": tools,
708
+ "tool_choice": "auto",
709
+ }
710
+
711
+ provider_name = self.provider.capitalize()
712
+
713
+ try:
714
+ async with httpx.AsyncClient(timeout=self.timeout) as client:
715
+ response = await client.post(
716
+ self.api_endpoint,
717
+ headers=headers,
718
+ json=payload,
719
+ )
720
+
721
+ response.raise_for_status()
722
+ return response.json()
723
+
724
+ except httpx.TimeoutException as e:
725
+ logger.error(f"{provider_name} API timeout after {self.timeout}s")
726
+ raise SearchError(
727
+ f"LLM request timed out after {self.timeout} seconds."
728
+ ) from e
729
+
730
+ except httpx.HTTPStatusError as e:
731
+ status_code = e.response.status_code
732
+ error_msg = f"{provider_name} API error (HTTP {status_code})"
733
+
734
+ if status_code == 401:
735
+ env_var = (
736
+ "OPENAI_API_KEY"
737
+ if self.provider == "openai"
738
+ else "OPENROUTER_API_KEY"
739
+ )
740
+ error_msg = f"Invalid {provider_name} API key. Check {env_var}."
741
+ elif status_code == 429:
742
+ error_msg = f"{provider_name} API rate limit exceeded."
743
+ elif status_code >= 500:
744
+ error_msg = f"{provider_name} API server error."
745
+
746
+ logger.error(error_msg)
747
+ raise SearchError(error_msg) from e
748
+
749
+ except Exception as e:
750
+ logger.error(f"{provider_name} API request failed: {e}")
751
+ raise SearchError(f"LLM request failed: {e}") from e