iflow-mcp_anton-prosterity-documentation-search-enhanced 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. documentation_search_enhanced/__init__.py +14 -0
  2. documentation_search_enhanced/__main__.py +6 -0
  3. documentation_search_enhanced/config.json +1674 -0
  4. documentation_search_enhanced/config_manager.py +233 -0
  5. documentation_search_enhanced/config_validator.py +79 -0
  6. documentation_search_enhanced/content_enhancer.py +578 -0
  7. documentation_search_enhanced/docker_manager.py +87 -0
  8. documentation_search_enhanced/logger.py +179 -0
  9. documentation_search_enhanced/main.py +2170 -0
  10. documentation_search_enhanced/project_generator.py +260 -0
  11. documentation_search_enhanced/project_scanner.py +85 -0
  12. documentation_search_enhanced/reranker.py +230 -0
  13. documentation_search_enhanced/site_index_builder.py +274 -0
  14. documentation_search_enhanced/site_index_downloader.py +222 -0
  15. documentation_search_enhanced/site_search.py +1325 -0
  16. documentation_search_enhanced/smart_search.py +473 -0
  17. documentation_search_enhanced/snyk_integration.py +657 -0
  18. documentation_search_enhanced/vector_search.py +303 -0
  19. documentation_search_enhanced/version_resolver.py +189 -0
  20. documentation_search_enhanced/vulnerability_scanner.py +545 -0
  21. documentation_search_enhanced/web_scraper.py +117 -0
  22. iflow_mcp_anton_prosterity_documentation_search_enhanced-1.9.0.dist-info/METADATA +195 -0
  23. iflow_mcp_anton_prosterity_documentation_search_enhanced-1.9.0.dist-info/RECORD +26 -0
  24. iflow_mcp_anton_prosterity_documentation_search_enhanced-1.9.0.dist-info/WHEEL +4 -0
  25. iflow_mcp_anton_prosterity_documentation_search_enhanced-1.9.0.dist-info/entry_points.txt +2 -0
  26. iflow_mcp_anton_prosterity_documentation_search_enhanced-1.9.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,2170 @@
1
+ import json
2
+ import os
3
+ import hashlib
4
+ import time
5
+ import logging
6
+ from contextlib import asynccontextmanager
7
+ from datetime import datetime, timedelta
8
+ from pathlib import Path
9
+ from typing import Annotated, Any, Dict, List, Optional
10
+ import asyncio
11
+ import anyio
12
+ import httpx
13
+ from mcp.server.fastmcp import FastMCP
14
+ from dotenv import load_dotenv
15
+ from pydantic import BeforeValidator
16
+ from .smart_search import smart_search, SearchResult
17
+ from .web_scraper import scraper
18
+ from .site_search import (
19
+ load_preindexed_state,
20
+ preindex_site,
21
+ save_preindexed_state,
22
+ search_site_via_sitemap,
23
+ )
24
+ from .site_index_downloader import (
25
+ ensure_site_index_file,
26
+ load_site_index_settings_from_env,
27
+ )
28
+ from .config_validator import validate_config, Config as AppConfig
29
+ from .content_enhancer import content_enhancer
30
+ from .version_resolver import version_resolver
31
+ import sys
32
+ import atexit
33
+
34
+ # Load the environment variables
35
+ load_dotenv()
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+ USER_AGENT = "docs-app/1.0"
40
+ SERPER_URL = "https://google.serper.dev/search"
41
+
42
+ # Environment variables (removing API key exposure)
43
+ SERPER_API_KEY = os.getenv("SERPER_API_KEY")
44
+
45
+
46
+ @asynccontextmanager
47
+ async def mcp_lifespan(_: FastMCP):
48
+ async def async_heartbeat() -> None:
49
+ # Work around environments where asyncio's cross-thread wakeups can be delayed.
50
+ # The MCP stdio transport uses AnyIO worker threads for stdin/stdout; without
51
+ # periodic loop wake-ups, those thread completions may not be processed.
52
+ while True:
53
+ await anyio.sleep(0.1)
54
+
55
+ async with anyio.create_task_group() as tg:
56
+ tg.start_soon(async_heartbeat)
57
+
58
+ try:
59
+ global http_client
60
+ if http_client is None:
61
+ http_client = httpx.AsyncClient(timeout=httpx.Timeout(30.0, read=60.0))
62
+
63
+ settings = load_site_index_settings_from_env(cwd=os.getcwd())
64
+ try:
65
+ result = await ensure_site_index_file(
66
+ http_client,
67
+ settings=settings,
68
+ user_agent=USER_AGENT,
69
+ )
70
+ status = result.get("status")
71
+ if status == "downloaded":
72
+ logger.debug(
73
+ "Downloaded docs search index: %s (%s)",
74
+ result.get("path"),
75
+ result.get("url"),
76
+ )
77
+ elif status == "error":
78
+ print(
79
+ f"⚠️ Docs search index download failed: {result.get('errors') or result.get('error')}",
80
+ file=sys.stderr,
81
+ )
82
+ except Exception as e:
83
+ print(f"⚠️ Docs search index download failed: {e}", file=sys.stderr)
84
+
85
+ if load_preindexed_state(settings.path):
86
+ logger.debug("Loaded docs search index: %s", settings.path)
87
+
88
+ yield {}
89
+ finally:
90
+ tg.cancel_scope.cancel()
91
+ await shutdown_resources()
92
+
93
+
94
+ # Initialize the MCP server
95
+ mcp = FastMCP("documentation_search_enhanced", lifespan=mcp_lifespan)
96
+
97
+
98
+ def _normalize_libraries(value: Any) -> List[str]:
99
+ if value is None:
100
+ return []
101
+ if isinstance(value, str):
102
+ parts = [part.strip() for part in value.split(",")]
103
+ return [part for part in parts if part]
104
+ if isinstance(value, (list, tuple, set)):
105
+ libraries: List[str] = []
106
+ for item in value:
107
+ if item is None:
108
+ continue
109
+ item_str = str(item).strip()
110
+ if item_str:
111
+ libraries.append(item_str)
112
+ return libraries
113
+ return [str(value).strip()]
114
+
115
+
116
+ LibrariesParam = Annotated[List[str], BeforeValidator(_normalize_libraries)]
117
+
118
+
119
+ # Simple in-memory cache with TTL
120
+ class SimpleCache:
121
+ def __init__(
122
+ self,
123
+ ttl_hours: int = 24,
124
+ max_entries: int = 1000,
125
+ persistence_enabled: bool = False,
126
+ persist_path: Optional[str] = None,
127
+ ):
128
+ self.cache: Dict[str, Dict[str, Any]] = {}
129
+ self.ttl_hours = ttl_hours
130
+ self.max_entries = max_entries
131
+ self.persistence_enabled = persistence_enabled
132
+ self.persist_path = persist_path
133
+ self._lock = asyncio.Lock()
134
+
135
+ if self.persistence_enabled and self.persist_path:
136
+ self._load_from_disk()
137
+
138
+ def _is_expired(self, timestamp: datetime) -> bool:
139
+ return datetime.now() - timestamp > timedelta(hours=self.ttl_hours)
140
+
141
+ async def get(self, key: str) -> Optional[str]:
142
+ async with self._lock:
143
+ if key in self.cache:
144
+ entry = self.cache[key]
145
+ if not self._is_expired(entry["timestamp"]):
146
+ return entry["data"]
147
+ del self.cache[key]
148
+ return None
149
+
150
+ async def set(self, key: str, data: str) -> None:
151
+ async with self._lock:
152
+ await self._cleanup_locked()
153
+
154
+ if len(self.cache) >= self.max_entries:
155
+ oldest_key = min(
156
+ self.cache.keys(), key=lambda k: self.cache[k]["timestamp"]
157
+ )
158
+ del self.cache[oldest_key]
159
+
160
+ self.cache[key] = {"data": data, "timestamp": datetime.now()}
161
+
162
+ await self._persist_locked()
163
+
164
+ async def clear_expired(self) -> None:
165
+ async with self._lock:
166
+ await self._cleanup_locked()
167
+ await self._persist_locked()
168
+
169
+ async def stats(self) -> Dict[str, Any]:
170
+ async with self._lock:
171
+ expired_count = sum(
172
+ 1
173
+ for entry in self.cache.values()
174
+ if self._is_expired(entry["timestamp"])
175
+ )
176
+ return {
177
+ "total_entries": len(self.cache),
178
+ "expired_entries": expired_count,
179
+ "active_entries": len(self.cache) - expired_count,
180
+ "max_entries": self.max_entries,
181
+ "ttl_hours": self.ttl_hours,
182
+ "memory_usage_estimate": f"{len(str(self.cache)) / 1024:.2f} KB",
183
+ }
184
+
185
+ async def clear(self) -> int:
186
+ async with self._lock:
187
+ removed = len(self.cache)
188
+ self.cache.clear()
189
+ await self._persist_locked()
190
+ return removed
191
+
192
+ async def _cleanup_locked(self) -> None:
193
+ expired_keys = [
194
+ k for k, v in self.cache.items() if self._is_expired(v["timestamp"])
195
+ ]
196
+ for key in expired_keys:
197
+ del self.cache[key]
198
+
199
+ def _load_from_disk(self) -> None:
200
+ try:
201
+ if not os.path.exists(self.persist_path or ""):
202
+ return
203
+ with open(self.persist_path, "r", encoding="utf-8") as fh:
204
+ raw = json.load(fh)
205
+ for key, entry in raw.items():
206
+ try:
207
+ timestamp = datetime.fromisoformat(entry["timestamp"])
208
+ if not self._is_expired(timestamp):
209
+ self.cache[key] = {
210
+ "data": entry["data"],
211
+ "timestamp": timestamp,
212
+ }
213
+ except Exception:
214
+ continue
215
+ except Exception as exc:
216
+ print(f"⚠️ Failed to load cache persistence: {exc}", file=sys.stderr)
217
+
218
+ async def _persist_locked(self) -> None:
219
+ if not (self.persistence_enabled and self.persist_path):
220
+ return
221
+ try:
222
+ serialisable = {
223
+ key: {
224
+ "data": value["data"],
225
+ "timestamp": value["timestamp"].isoformat(),
226
+ }
227
+ for key, value in self.cache.items()
228
+ if not self._is_expired(value["timestamp"])
229
+ }
230
+ tmp_path = f"{self.persist_path}.tmp"
231
+ with open(tmp_path, "w", encoding="utf-8") as fh:
232
+ json.dump(serialisable, fh)
233
+ os.replace(tmp_path, self.persist_path)
234
+ except Exception as exc:
235
+ print(f"⚠️ Failed to persist cache: {exc}", file=sys.stderr)
236
+
237
+
238
+ class TokenBucketRateLimiter:
239
+ def __init__(self, requests_per_minute: int, burst: int):
240
+ self.capacity = max(burst, requests_per_minute, 1)
241
+ self.refill_rate = requests_per_minute / 60 if requests_per_minute > 0 else 0
242
+ self.tokens: Dict[str, float] = {}
243
+ self.last_refill: Dict[str, float] = {}
244
+ self._lock = asyncio.Lock()
245
+
246
+ async def acquire(self, key: str = "global") -> None:
247
+ if self.refill_rate == 0:
248
+ return
249
+
250
+ while True:
251
+ async with self._lock:
252
+ now = time.monotonic()
253
+ tokens = self.tokens.get(key, float(self.capacity))
254
+ last = self.last_refill.get(key, now)
255
+
256
+ elapsed = now - last
257
+ if elapsed > 0:
258
+ tokens = min(self.capacity, tokens + elapsed * self.refill_rate)
259
+
260
+ if tokens >= 1:
261
+ self.tokens[key] = tokens - 1
262
+ self.last_refill[key] = now
263
+ return
264
+
265
+ wait_time = (1 - tokens) / self.refill_rate if self.refill_rate else 0
266
+ self.tokens[key] = tokens
267
+ self.last_refill[key] = now
268
+
269
+ await asyncio.sleep(wait_time)
270
+
271
+
272
+ def load_config() -> AppConfig:
273
+ """Load and validate the configuration file.
274
+
275
+ Priority:
276
+ 1. Looks for `config.json` in the current working directory.
277
+ 2. Falls back to the `config.json` bundled with the package.
278
+ """
279
+ config_data = None
280
+ local_config_path = os.path.join(os.getcwd(), "config.json")
281
+
282
+ try:
283
+ # 1. Prioritize local config file
284
+ if os.path.exists(local_config_path):
285
+ logger.debug("Found local config.json. Loading...")
286
+ with open(local_config_path, "r") as f:
287
+ config_data = json.load(f)
288
+ else:
289
+ # 2. Fallback to packaged config
290
+ try:
291
+ packaged_config_path = Path(__file__).with_name("config.json")
292
+ config_data = json.loads(
293
+ packaged_config_path.read_text(encoding="utf-8")
294
+ )
295
+ except (FileNotFoundError, json.JSONDecodeError):
296
+ # This is a critical failure if the package is broken
297
+ print("FATAL: Packaged config.json not found.", file=sys.stderr)
298
+ raise
299
+
300
+ except Exception as e:
301
+ print(f"FATAL: Could not read config.json. Error: {e}", file=sys.stderr)
302
+ raise
303
+
304
+ if not config_data:
305
+ raise FileNotFoundError("Could not find or load config.json")
306
+
307
+ try:
308
+ validated_config = validate_config(config_data)
309
+ logger.debug("Configuration successfully loaded and validated.")
310
+ return validated_config
311
+ except Exception as e: # Pydantic's ValidationError
312
+ print(
313
+ "❌ FATAL: Configuration validation failed. Please check your config.json.",
314
+ file=sys.stderr,
315
+ )
316
+ print(e, file=sys.stderr)
317
+ raise
318
+
319
+
320
+ # Load configuration
321
+ config_model = load_config()
322
+ config = config_model.model_dump() # Use the dict version for existing logic
323
+ real_time_search_enabled = (
324
+ config.get("server_config", {}).get("features", {}).get("real_time_search", True)
325
+ )
326
+ docs_urls = {}
327
+ # Handle both old simple URL format and new enhanced format
328
+ for lib_name, lib_data in config.get("docs_urls", {}).items():
329
+ if isinstance(lib_data, dict):
330
+ docs_urls[lib_name] = str(lib_data.get("url") or "").strip()
331
+ else:
332
+ docs_urls[lib_name] = str(lib_data or "").strip()
333
+
334
+ cache_config = config.get("cache", {"enabled": False})
335
+ cache_persistence_enabled = cache_config.get("persistence_enabled", False)
336
+ cache_persist_path = cache_config.get("persist_path")
337
+ if cache_persistence_enabled and not cache_persist_path:
338
+ cache_persist_path = os.path.join(os.getcwd(), ".docs_cache.json")
339
+
340
+ # Initialize cache if enabled
341
+ cache = (
342
+ SimpleCache(
343
+ ttl_hours=cache_config.get("ttl_hours", 24),
344
+ max_entries=cache_config.get("max_entries", 1000),
345
+ persistence_enabled=cache_persistence_enabled,
346
+ persist_path=cache_persist_path,
347
+ )
348
+ if cache_config.get("enabled", False)
349
+ else None
350
+ )
351
+
352
+ site_index_settings = load_site_index_settings_from_env(cwd=os.getcwd())
353
+ site_index_path = site_index_settings.path
354
+
355
+ http_client: Optional[httpx.AsyncClient] = None
356
+ scrape_semaphore = asyncio.Semaphore(
357
+ config.get("server_config", {}).get("max_concurrent_requests", 10)
358
+ )
359
+
360
+ rate_limit_config = config.get("rate_limiting", {"enabled": False})
361
+ rate_limiter = (
362
+ TokenBucketRateLimiter(
363
+ requests_per_minute=rate_limit_config.get("requests_per_minute", 60),
364
+ burst=rate_limit_config.get("burst_requests", 10),
365
+ )
366
+ if rate_limit_config.get("enabled", False)
367
+ else None
368
+ )
369
+
370
+
371
+ async def enforce_rate_limit(tool_name: str) -> None:
372
+ if rate_limiter:
373
+ await rate_limiter.acquire(tool_name)
374
+
375
+
376
+ async def search_web_with_retry(
377
+ query: str, max_retries: int = 3, num_results: int = 3
378
+ ) -> dict:
379
+ """Search documentation pages, with retries.
380
+
381
+ Uses Serper when configured; otherwise falls back to on-site docs search
382
+ (MkDocs/Sphinx indexes when available, otherwise sitemap discovery).
383
+ """
384
+ global http_client
385
+ if http_client is None:
386
+ http_client = httpx.AsyncClient(timeout=httpx.Timeout(30.0, read=60.0))
387
+
388
+ if not SERPER_API_KEY:
389
+ try:
390
+ return await search_site_via_sitemap(
391
+ query,
392
+ http_client,
393
+ user_agent=USER_AGENT,
394
+ num_results=num_results,
395
+ allow_network=real_time_search_enabled,
396
+ )
397
+ except Exception as e:
398
+ print(f"Fallback site search failed: {e}", file=sys.stderr)
399
+ return {"organic": []}
400
+
401
+ payload = json.dumps({"q": query, "num": num_results})
402
+ headers = {
403
+ "X-API-KEY": SERPER_API_KEY,
404
+ "Content-Type": "application/json",
405
+ "User-Agent": USER_AGENT,
406
+ }
407
+
408
+ for attempt in range(max_retries):
409
+ try:
410
+ response = await http_client.post(
411
+ SERPER_URL,
412
+ headers=headers,
413
+ content=payload,
414
+ )
415
+ response.raise_for_status()
416
+ return response.json()
417
+
418
+ except httpx.TimeoutException:
419
+ if attempt == max_retries - 1:
420
+ print(
421
+ f"Timeout after {max_retries} attempts for query: {query}",
422
+ file=sys.stderr,
423
+ )
424
+ break
425
+ await asyncio.sleep(2**attempt) # Exponential backoff
426
+
427
+ except httpx.HTTPStatusError as e:
428
+ if e.response.status_code == 429: # Rate limited
429
+ if attempt == max_retries - 1:
430
+ print(f"Rate limited after {max_retries} attempts", file=sys.stderr)
431
+ break
432
+ await asyncio.sleep(2 ** (attempt + 2)) # Longer wait for rate limits
433
+ else:
434
+ print(f"HTTP error {e.response.status_code}: {e}", file=sys.stderr)
435
+ break
436
+
437
+ except Exception as e:
438
+ if attempt == max_retries - 1:
439
+ print(
440
+ f"Unexpected error after {max_retries} attempts: {e}",
441
+ file=sys.stderr,
442
+ )
443
+ break
444
+ await asyncio.sleep(2**attempt)
445
+
446
+ # Serper is optional; fall back to sitemap search if it fails.
447
+ try:
448
+ return await search_site_via_sitemap(
449
+ query,
450
+ http_client,
451
+ user_agent=USER_AGENT,
452
+ num_results=num_results,
453
+ allow_network=real_time_search_enabled,
454
+ )
455
+ except Exception as e:
456
+ print(f"Fallback site search failed: {e}", file=sys.stderr)
457
+ return {"organic": []}
458
+
459
+
460
+ async def fetch_url_with_cache(url: str, max_retries: int = 3) -> str:
461
+ """Fetch URL content with caching and a Playwright-based scraper."""
462
+ cache_key = hashlib.md5(url.encode()).hexdigest()
463
+
464
+ if cache:
465
+ cached_content = await cache.get(cache_key)
466
+ if cached_content:
467
+ return cached_content
468
+
469
+ # Use the new Playwright scraper
470
+ async with scrape_semaphore:
471
+ content = await scraper.scrape_url(url)
472
+
473
+ if cache and "Error:" not in content:
474
+ await cache.set(cache_key, content)
475
+
476
+ return content
477
+
478
+
479
+ # Backward compatibility aliases
480
+ async def search_web(query: str, num_results: int = 3) -> dict:
481
+ return await search_web_with_retry(query, num_results=num_results)
482
+
483
+
484
+ async def fetch_url(url: str) -> str:
485
+ return await fetch_url_with_cache(url)
486
+
487
+
488
+ # Configure smart search now that the helpers are in place
489
+ smart_search.configure(docs_urls, search_web)
490
+
491
+
492
+ async def shutdown_resources() -> None:
493
+ global http_client
494
+ if http_client:
495
+ await http_client.aclose()
496
+ http_client = None
497
+ await scraper.close()
498
+
499
+
500
+ def _cleanup_sync() -> None:
501
+ try:
502
+ loop = asyncio.get_running_loop()
503
+ except RuntimeError:
504
+ try:
505
+ asyncio.run(shutdown_resources())
506
+ except RuntimeError:
507
+ loop = asyncio.new_event_loop()
508
+ asyncio.set_event_loop(loop)
509
+ loop.run_until_complete(shutdown_resources())
510
+ loop.close()
511
+ else:
512
+ loop.create_task(shutdown_resources())
513
+
514
+
515
+ atexit.register(_cleanup_sync)
516
+
517
+
518
+ def get_versioned_docs_url(library: str, version: str, lib_config: Dict) -> str:
519
+ """
520
+ Build version-specific documentation URL.
521
+
522
+ Args:
523
+ library: Library name
524
+ version: Requested version (e.g., "4.2", "stable", "latest")
525
+ lib_config: Library configuration from config.json
526
+
527
+ Returns:
528
+ Versioned documentation URL
529
+ """
530
+ base_url = str(lib_config.get("url") or "")
531
+
532
+ # If version is "latest", return base URL as-is
533
+ if version == "latest":
534
+ return base_url
535
+
536
+ # Check if library supports version templates
537
+ template = lib_config.get("version_url_template")
538
+ if template:
539
+ return template.format(version=version)
540
+
541
+ # Handle common patterns by replacing stable/latest in URL
542
+ versioned_url = base_url.replace("/stable/", f"/{version}/")
543
+ versioned_url = versioned_url.replace("/latest/", f"/{version}/")
544
+
545
+ return versioned_url
546
+
547
+
548
+ @mcp.tool()
549
+ async def get_docs(
550
+ query: str,
551
+ libraries: LibrariesParam,
552
+ version: str = "latest",
553
+ auto_detect_version: bool = False,
554
+ ):
555
+ """
556
+ Search documentation for a given query and one or more libraries.
557
+
558
+ Args:
559
+ query: The query to search for (e.g., "Chroma DB")
560
+ libraries: A single library or a list of libraries to search in (e.g., "langchain" or ["fastapi", "django"])
561
+ version: Library version to search (e.g., "4.2", "stable", "latest"). Default: "latest"
562
+ auto_detect_version: Automatically detect installed package version. Default: False
563
+
564
+ Returns:
565
+ Dictionary with structured summaries and supporting metadata
566
+ """
567
+ await enforce_rate_limit("get_docs")
568
+
569
+ if isinstance(libraries, str):
570
+ libraries = [lib.strip() for lib in libraries.split(",") if lib.strip()]
571
+
572
+ config_dict = config_model.model_dump()
573
+ library_summaries: List[Dict[str, Any]] = []
574
+ summary_sections: List[str] = []
575
+
576
+ for library in libraries:
577
+ # Resolve version (with auto-detection if enabled)
578
+ resolved_version = await version_resolver.resolve_version(
579
+ library=library,
580
+ requested_version=version,
581
+ auto_detect=auto_detect_version,
582
+ project_path=".",
583
+ )
584
+ lib_entry: Dict[str, Any] = {
585
+ "library": library,
586
+ "requested_query": query,
587
+ "status": "searched",
588
+ "results": [],
589
+ }
590
+
591
+ lib_config = config_dict.get("docs_urls", {}).get(library, {})
592
+ auto_approve = lib_config.get("auto_approve", True)
593
+
594
+ if not auto_approve:
595
+ print(
596
+ f"⚠️ Requesting approval to search {library} documentation...",
597
+ file=sys.stderr,
598
+ )
599
+
600
+ docs_root = docs_urls.get(library)
601
+ if not docs_root:
602
+ lib_entry.update(
603
+ {
604
+ "status": "unsupported",
605
+ "message": f"Library '{library}' not supported by this tool",
606
+ }
607
+ )
608
+ library_summaries.append(lib_entry)
609
+ summary_sections.append(
610
+ f"### {library}\n- Unsupported library; no documentation root configured."
611
+ )
612
+ continue
613
+
614
+ # Get version-specific URL
615
+ versioned_url = get_versioned_docs_url(library, resolved_version, lib_config)
616
+
617
+ # Build search query with version context
618
+ search_query = f"site:{versioned_url} {query}"
619
+ if resolved_version != "latest" and not lib_config.get("version_url_template"):
620
+ # Add version to query if URL doesn't support versioning
621
+ search_query += f" version {resolved_version}"
622
+
623
+ search_results = await search_web(search_query, num_results=5)
624
+ organic_results = (search_results.get("organic") or [])[:3]
625
+
626
+ if not organic_results:
627
+ lib_entry.update(
628
+ {
629
+ "status": "no_results",
630
+ "message": "No indexed documentation results returned",
631
+ }
632
+ )
633
+ library_summaries.append(lib_entry)
634
+ summary_sections.append(f"### {library}\n- No results for query '{query}'.")
635
+ continue
636
+
637
+ fetch_tasks = [fetch_url(result["link"]) for result in organic_results]
638
+ fetched_contents = await asyncio.gather(*fetch_tasks, return_exceptions=True)
639
+
640
+ library_lines = [f"### {library}"]
641
+ for result, content in zip(organic_results, fetched_contents):
642
+ entry: Dict[str, Any] = {
643
+ "title": result.get("title") or result.get("link"),
644
+ "url": result.get("link"),
645
+ "source_snippet": result.get("snippet", ""),
646
+ }
647
+
648
+ if isinstance(content, Exception):
649
+ error_message = str(content)
650
+ entry["status"] = "error"
651
+ entry["error"] = error_message
652
+ library_lines.append(
653
+ f"- {entry['title']}: failed to fetch ({error_message})"
654
+ )
655
+ else:
656
+ content_str = str(content)
657
+ summary = content_enhancer.generate_summary(content_str, query)
658
+ code_snippet_count = len(
659
+ content_enhancer.extract_code_snippets(content_str)
660
+ )
661
+
662
+ entry.update(
663
+ {
664
+ "status": "ok",
665
+ "summary": summary,
666
+ "code_snippet_count": code_snippet_count,
667
+ }
668
+ )
669
+
670
+ bullet_summary = summary if summary else "No summary extracted."
671
+ library_lines.append(
672
+ f"- {entry['title']}: {bullet_summary} (code snippets: {code_snippet_count})"
673
+ )
674
+
675
+ lib_entry["results"].append(entry)
676
+
677
+ lib_entry["total_results"] = len(lib_entry["results"])
678
+ library_summaries.append(lib_entry)
679
+ summary_sections.append("\n".join(library_lines))
680
+
681
+ if cache:
682
+ await cache.clear_expired()
683
+
684
+ return {
685
+ "query": query,
686
+ "libraries": library_summaries,
687
+ "summary_markdown": "\n\n".join(summary_sections),
688
+ }
689
+
690
+
691
+ @mcp.tool()
692
+ async def suggest_libraries(partial_name: str):
693
+ """
694
+ Suggest libraries based on partial input for auto-completion.
695
+
696
+ Args:
697
+ partial_name: Partial library name to search for (e.g. "lang" -> ["langchain"])
698
+
699
+ Returns:
700
+ List of matching library names
701
+ """
702
+ if not partial_name:
703
+ return list(sorted(docs_urls.keys()))
704
+
705
+ partial_lower = partial_name.lower()
706
+ suggestions = []
707
+
708
+ # Exact matches first
709
+ for lib in docs_urls.keys():
710
+ if lib.lower() == partial_lower:
711
+ suggestions.append(lib)
712
+
713
+ # Starts with matches
714
+ for lib in docs_urls.keys():
715
+ if lib.lower().startswith(partial_lower) and lib not in suggestions:
716
+ suggestions.append(lib)
717
+
718
+ # Contains matches
719
+ for lib in docs_urls.keys():
720
+ if partial_lower in lib.lower() and lib not in suggestions:
721
+ suggestions.append(lib)
722
+
723
+ return sorted(suggestions[:10]) # Limit to top 10 suggestions
724
+
725
+
726
+ @mcp.tool()
727
+ async def health_check():
728
+ """
729
+ Check the health and availability of documentation sources.
730
+
731
+ Returns:
732
+ Dictionary with health status of each library's documentation site
733
+ """
734
+ results = {}
735
+
736
+ # Test a sample of libraries to avoid overwhelming servers
737
+ sample_libraries = list(docs_urls.items())[:5]
738
+
739
+ for library, url in sample_libraries:
740
+ start_time = time.time()
741
+ try:
742
+ async with httpx.AsyncClient() as client:
743
+ response = await client.head(
744
+ str(url),
745
+ timeout=httpx.Timeout(10.0),
746
+ headers={"User-Agent": USER_AGENT},
747
+ follow_redirects=True,
748
+ )
749
+ response_time = time.time() - start_time
750
+ results[library] = {
751
+ "status": "healthy",
752
+ "status_code": response.status_code,
753
+ "response_time_ms": round(response_time * 1000, 2),
754
+ "url": url,
755
+ }
756
+ except httpx.TimeoutException:
757
+ results[library] = {
758
+ "status": "timeout",
759
+ "error": "Request timed out",
760
+ "url": url,
761
+ }
762
+ except Exception as e:
763
+ results[library] = {"status": "error", "error": str(e), "url": url}
764
+
765
+ # Add cache stats if caching is enabled
766
+ if cache:
767
+ cache_stats = await cache.stats()
768
+ results["_cache_stats"] = {"enabled": True, **cache_stats}
769
+ else:
770
+ results["_cache_stats"] = {"enabled": False}
771
+
772
+ return results
773
+
774
+
775
+ @mcp.tool()
776
+ async def clear_cache():
777
+ """
778
+ Clear the documentation cache to force fresh fetches.
779
+
780
+ Returns:
781
+ Status message about cache clearing
782
+ """
783
+ if cache:
784
+ entries_cleared = await cache.clear()
785
+ return f"Cache cleared. Removed {entries_cleared} cached entries."
786
+ else:
787
+ return "Caching is not enabled."
788
+
789
+
790
+ @mcp.tool()
791
+ async def get_cache_stats():
792
+ """
793
+ Get statistics about the current cache usage.
794
+
795
+ Returns:
796
+ Dictionary with cache statistics
797
+ """
798
+ if not cache:
799
+ return {"enabled": False, "message": "Caching is not enabled"}
800
+
801
+ stats = await cache.stats()
802
+ details = {
803
+ "enabled": True,
804
+ **stats,
805
+ }
806
+ details["persistence"] = {
807
+ "enabled": cache.persistence_enabled,
808
+ "path": cache.persist_path,
809
+ }
810
+ return details
811
+
812
+
813
+ @mcp.tool()
814
+ async def preindex_docs(
815
+ libraries: LibrariesParam,
816
+ include_sitemap: bool = False,
817
+ persist_path: Optional[str] = None,
818
+ max_concurrent_sites: int = 3,
819
+ ):
820
+ """
821
+ Pre-download and persist docs site indexes for Serper-free search.
822
+
823
+ This caches MkDocs/Sphinx search indexes (and optionally sitemaps) to disk so the
824
+ server can search supported documentation sites without requiring Serper.
825
+ """
826
+ await enforce_rate_limit("preindex_docs")
827
+
828
+ targets = libraries or sorted(docs_urls.keys())
829
+ if not targets:
830
+ return {
831
+ "status": "no_targets",
832
+ "message": "No libraries configured to preindex",
833
+ }
834
+
835
+ global http_client
836
+ if http_client is None:
837
+ http_client = httpx.AsyncClient(timeout=httpx.Timeout(30.0, read=60.0))
838
+
839
+ concurrency = max(1, min(int(max_concurrent_sites), 10))
840
+ semaphore = asyncio.Semaphore(concurrency)
841
+
842
+ async def _run_one(library: str) -> Dict[str, Any]:
843
+ docs_root = docs_urls.get(library)
844
+ if not docs_root:
845
+ return {"library": library, "status": "unsupported"}
846
+
847
+ async with semaphore:
848
+ summary = await preindex_site(
849
+ docs_root,
850
+ http_client,
851
+ user_agent=USER_AGENT,
852
+ include_sitemap=include_sitemap,
853
+ )
854
+ summary["library"] = library
855
+ return summary
856
+
857
+ results = await asyncio.gather(*[_run_one(lib) for lib in targets])
858
+
859
+ path = persist_path or site_index_path
860
+ try:
861
+ save_preindexed_state(path)
862
+ persisted: Dict[str, Any] = {"status": "ok", "path": path}
863
+ except Exception as e:
864
+ persisted = {"status": "error", "path": path, "error": str(e)}
865
+
866
+ return {
867
+ "status": "ok",
868
+ "persist": persisted,
869
+ "real_time_search_enabled": real_time_search_enabled,
870
+ "include_sitemap": include_sitemap,
871
+ "max_concurrent_sites": concurrency,
872
+ "total_libraries": len(targets),
873
+ "results": results,
874
+ }
875
+
876
+
877
+ @mcp.tool()
878
+ async def semantic_search(
879
+ query: str,
880
+ libraries: LibrariesParam,
881
+ context: Optional[str] = None,
882
+ version: str = "latest",
883
+ auto_detect_version: bool = False,
884
+ use_vector_rerank: bool = True,
885
+ ):
886
+ """
887
+ Enhanced semantic search across one or more libraries with AI-powered relevance ranking.
888
+
889
+ Uses hybrid search combining:
890
+ - Vector embeddings for semantic similarity (50% weight)
891
+ - Keyword matching for precise results (30% weight)
892
+ - Source authority and metadata (20% weight)
893
+
894
+ Args:
895
+ query: The search query.
896
+ libraries: A single library or a list of libraries to search in.
897
+ context: Optional context about your project or use case.
898
+ version: Library version to search (e.g., "4.2", "stable", "latest"). Default: "latest"
899
+ auto_detect_version: Automatically detect installed package version. Default: False
900
+ use_vector_rerank: Enable vector-based semantic reranking for better relevance. Default: True
901
+
902
+ Returns:
903
+ Enhanced search results with AI-powered relevance scores and metadata, ranked across all libraries.
904
+ """
905
+ from .reranker import get_reranker
906
+
907
+ await enforce_rate_limit("semantic_search")
908
+
909
+ if isinstance(libraries, str):
910
+ libraries = [lib.strip() for lib in libraries.split(",") if lib.strip()]
911
+
912
+ search_tasks = [
913
+ smart_search.semantic_search(query, lib, context) for lib in libraries
914
+ ]
915
+
916
+ try:
917
+ results_by_library = await asyncio.gather(*search_tasks, return_exceptions=True)
918
+
919
+ all_results: List[SearchResult] = []
920
+ for res_list in results_by_library:
921
+ if not isinstance(res_list, Exception):
922
+ all_results.extend(res_list) # type: ignore
923
+
924
+ # Apply vector-based reranking for better semantic relevance
925
+ if use_vector_rerank and all_results:
926
+ try:
927
+ reranker = get_reranker()
928
+ all_results = await reranker.rerank(
929
+ all_results, query, use_semantic=True
930
+ )
931
+ except ImportError:
932
+ logger.warning(
933
+ "Vector search dependencies not installed. "
934
+ "Falling back to basic relevance sorting. "
935
+ "Install with: pip install documentation-search-enhanced[vector]"
936
+ )
937
+ all_results.sort(key=lambda r: r.relevance_score, reverse=True)
938
+ else:
939
+ # Fallback to basic relevance score sorting
940
+ all_results.sort(key=lambda r: r.relevance_score, reverse=True)
941
+
942
+ return {
943
+ "query": query,
944
+ "libraries_searched": libraries,
945
+ "total_results": len(all_results),
946
+ "vector_rerank_enabled": use_vector_rerank,
947
+ "results": [
948
+ {
949
+ "source_library": result.source_library,
950
+ "title": result.title,
951
+ "url": result.url,
952
+ "snippet": (
953
+ result.snippet[:300] + "..."
954
+ if len(result.snippet) > 300
955
+ else result.snippet
956
+ ),
957
+ "relevance_score": result.relevance_score,
958
+ "content_type": result.content_type,
959
+ "difficulty_level": result.difficulty_level,
960
+ "estimated_read_time": f"{result.estimated_read_time} min",
961
+ "has_code_examples": result.code_snippets_count > 0,
962
+ }
963
+ for result in all_results[:10] # Top 10 combined results
964
+ ],
965
+ }
966
+ except Exception as e:
967
+ return {"error": f"Search failed: {str(e)}", "results": []}
968
+
969
+
970
+ @mcp.tool()
971
+ async def filtered_search(
972
+ query: str,
973
+ library: str,
974
+ content_type: Optional[str] = None,
975
+ difficulty_level: Optional[str] = None,
976
+ has_code_examples: Optional[bool] = None,
977
+ version: str = "latest",
978
+ auto_detect_version: bool = False,
979
+ ):
980
+ """
981
+ Search with advanced filtering options.
982
+
983
+ Args:
984
+ query: The search query
985
+ library: The library to search in
986
+ content_type: Filter by content type ("tutorial", "reference", "example", "guide")
987
+ difficulty_level: Filter by difficulty ("beginner", "intermediate", "advanced")
988
+ has_code_examples: Filter for content with code examples (true/false)
989
+ version: Library version to search (e.g., "4.2", "stable", "latest"). Default: "latest"
990
+ auto_detect_version: Automatically detect installed package version. Default: False
991
+
992
+ Returns:
993
+ Filtered search results matching specified criteria
994
+ """
995
+ from .smart_search import filtered_search, SearchFilters
996
+
997
+ await enforce_rate_limit("filtered_search")
998
+
999
+ filters = SearchFilters(
1000
+ content_type=content_type,
1001
+ difficulty_level=difficulty_level,
1002
+ has_code_examples=has_code_examples,
1003
+ )
1004
+
1005
+ try:
1006
+ results = await filtered_search.search_with_filters(query, library, filters)
1007
+
1008
+ return {
1009
+ "query": query,
1010
+ "library": library,
1011
+ "filters_applied": {
1012
+ "content_type": content_type,
1013
+ "difficulty_level": difficulty_level,
1014
+ "has_code_examples": has_code_examples,
1015
+ },
1016
+ "total_results": len(results),
1017
+ "results": [
1018
+ {
1019
+ "title": result.title,
1020
+ "url": result.url,
1021
+ "snippet": (
1022
+ result.snippet[:200] + "..."
1023
+ if len(result.snippet) > 200
1024
+ else result.snippet
1025
+ ),
1026
+ "relevance_score": result.relevance_score,
1027
+ "content_type": result.content_type,
1028
+ "difficulty_level": result.difficulty_level,
1029
+ "estimated_read_time": f"{result.estimated_read_time} min",
1030
+ }
1031
+ for result in results[:10]
1032
+ ],
1033
+ }
1034
+ except Exception as e:
1035
+ return {"error": f"Filtered search failed: {str(e)}", "results": []}
1036
+
1037
+
1038
+ @mcp.tool()
1039
+ async def get_learning_path(library: str, experience_level: str = "beginner"):
1040
+ """
1041
+ Get a structured learning path for a library based on experience level.
1042
+
1043
+ Args:
1044
+ library: The library to create a learning path for
1045
+ experience_level: Your current level ("beginner", "intermediate", "advanced")
1046
+
1047
+ Returns:
1048
+ Structured learning path with progressive topics and resources
1049
+ """
1050
+ # Dynamic learning path generation based on difficulty
1051
+ level_topics = {
1052
+ "beginner": [
1053
+ "Getting Started",
1054
+ "Basic Concepts",
1055
+ "First Examples",
1056
+ "Common Patterns",
1057
+ ],
1058
+ "intermediate": [
1059
+ "Advanced Features",
1060
+ "Best Practices",
1061
+ "Integration",
1062
+ "Testing",
1063
+ ],
1064
+ "advanced": [
1065
+ "Performance Optimization",
1066
+ "Advanced Architecture",
1067
+ "Production Deployment",
1068
+ "Monitoring",
1069
+ ],
1070
+ }
1071
+
1072
+ if experience_level not in level_topics:
1073
+ return {"error": f"Experience level {experience_level} not supported"}
1074
+
1075
+ learning_steps = []
1076
+ for i, topic in enumerate(level_topics[experience_level]):
1077
+ learning_steps.append(
1078
+ {
1079
+ "step": i + 1,
1080
+ "topic": f"{library.title()} - {topic}",
1081
+ "content_type": "tutorial",
1082
+ "search_query": f"{library} {topic.lower()}",
1083
+ "target_library": library,
1084
+ "estimated_time": "2-4 hours",
1085
+ }
1086
+ )
1087
+
1088
+ return {
1089
+ "library": library,
1090
+ "experience_level": experience_level,
1091
+ "total_topics": len(learning_steps),
1092
+ "estimated_total_time": f"{len(learning_steps) * 2}-{len(learning_steps) * 4} hours",
1093
+ "learning_path": learning_steps,
1094
+ "next_level": {
1095
+ "beginner": "intermediate",
1096
+ "intermediate": "advanced",
1097
+ "advanced": "Consider specializing in specific areas or exploring related technologies",
1098
+ }.get(experience_level, ""),
1099
+ }
1100
+
1101
+
1102
+ # Removed 1000+ lines of hardcoded learning path data
1103
+ @mcp.tool()
1104
+ async def get_code_examples(
1105
+ library: str,
1106
+ topic: str,
1107
+ language: str = "python",
1108
+ version: str = "latest",
1109
+ auto_detect_version: bool = False,
1110
+ ):
1111
+ """
1112
+ Get curated code examples for a specific topic and library.
1113
+
1114
+ Args:
1115
+ library: The library to search for examples
1116
+ topic: The specific topic or feature
1117
+ language: Programming language for examples
1118
+ version: Library version to search (e.g., "4.2", "stable", "latest"). Default: "latest"
1119
+ auto_detect_version: Automatically detect installed package version. Default: False
1120
+
1121
+ Returns:
1122
+ Curated code examples with explanations
1123
+ """
1124
+
1125
+ await enforce_rate_limit("get_code_examples")
1126
+
1127
+ # Enhanced query for code-specific search
1128
+ code_query = f"{library} {topic} example code {language}"
1129
+
1130
+ try:
1131
+ # Use filtered search to find examples with code
1132
+ from .smart_search import filtered_search, SearchFilters
1133
+
1134
+ filters = SearchFilters(content_type="example", has_code_examples=True)
1135
+
1136
+ results = await filtered_search.search_with_filters(
1137
+ code_query, library, filters
1138
+ )
1139
+
1140
+ if not results:
1141
+ # Fallback to regular search
1142
+ if library not in docs_urls:
1143
+ return {"error": f"Library {library} not supported"}
1144
+
1145
+ query = f"site:{docs_urls[library]} {code_query}"
1146
+ search_results = await search_web(query)
1147
+
1148
+ if not search_results.get("organic"):
1149
+ return {"error": "No code examples found"}
1150
+
1151
+ # Process first result for code extraction
1152
+ first_result = search_results["organic"][0]
1153
+ content = await fetch_url(first_result["link"])
1154
+
1155
+ # Extract code snippets (simplified)
1156
+ code_blocks = []
1157
+ import re
1158
+
1159
+ code_pattern = r"```(?:python|javascript|typescript|js)?\n(.*?)```"
1160
+ matches = re.finditer(code_pattern, content, re.DOTALL)
1161
+
1162
+ for i, match in enumerate(matches):
1163
+ if i >= 3: # Limit to 3 examples
1164
+ break
1165
+ code_blocks.append(
1166
+ {
1167
+ "example": i + 1,
1168
+ "code": match.group(1).strip(),
1169
+ "language": language,
1170
+ "source_url": first_result["link"],
1171
+ }
1172
+ )
1173
+
1174
+ return {
1175
+ "library": library,
1176
+ "topic": topic,
1177
+ "language": language,
1178
+ "total_examples": len(code_blocks),
1179
+ "examples": code_blocks,
1180
+ }
1181
+
1182
+ else:
1183
+ # Process enhanced results
1184
+ examples = []
1185
+ for i, result in enumerate(results[:3]):
1186
+ examples.append(
1187
+ {
1188
+ "example": i + 1,
1189
+ "title": result.title,
1190
+ "description": (
1191
+ result.snippet[:200] + "..."
1192
+ if len(result.snippet) > 200
1193
+ else result.snippet
1194
+ ),
1195
+ "url": result.url,
1196
+ "difficulty": result.difficulty_level,
1197
+ "estimated_read_time": f"{result.estimated_read_time} min",
1198
+ }
1199
+ )
1200
+
1201
+ return {
1202
+ "library": library,
1203
+ "topic": topic,
1204
+ "language": language,
1205
+ "total_examples": len(examples),
1206
+ "examples": examples,
1207
+ }
1208
+
1209
+ except Exception as e:
1210
+ return {"error": f"Failed to get code examples: {str(e)}"}
1211
+
1212
+
1213
+ @mcp.tool()
1214
+ async def get_environment_config():
1215
+ """
1216
+ Get current environment configuration and settings.
1217
+
1218
+ Returns:
1219
+ Current environment configuration details
1220
+ """
1221
+ from .config_manager import config_manager
1222
+
1223
+ config = config_manager.get_config()
1224
+
1225
+ return {
1226
+ "environment": config_manager.environment,
1227
+ "server_config": {
1228
+ "logging_level": config["server_config"]["logging_level"],
1229
+ "max_concurrent_requests": config["server_config"][
1230
+ "max_concurrent_requests"
1231
+ ],
1232
+ "request_timeout_seconds": config["server_config"][
1233
+ "request_timeout_seconds"
1234
+ ],
1235
+ },
1236
+ "cache_config": {
1237
+ "enabled": config["cache"]["enabled"],
1238
+ "ttl_hours": config["cache"]["ttl_hours"],
1239
+ "max_entries": config["cache"]["max_entries"],
1240
+ },
1241
+ "rate_limiting": {
1242
+ "enabled": config["rate_limiting"]["enabled"],
1243
+ "requests_per_minute": config["rate_limiting"]["requests_per_minute"],
1244
+ },
1245
+ "features": config["server_config"]["features"],
1246
+ "total_libraries": len(config_manager.get_docs_urls()),
1247
+ "available_libraries": list(config_manager.get_docs_urls().keys())[
1248
+ :10
1249
+ ], # Show first 10
1250
+ }
1251
+
1252
+
1253
+ @mcp.tool()
1254
+ async def scan_library_vulnerabilities(library_name: str, ecosystem: str = "PyPI"):
1255
+ """
1256
+ Comprehensive vulnerability scan using OSINT sources (OSV, GitHub Advisories, Safety DB).
1257
+
1258
+ Args:
1259
+ library_name: Name of the library to scan (e.g., "fastapi", "react")
1260
+ ecosystem: Package ecosystem ("PyPI", "npm", "Maven", "Go", etc.)
1261
+
1262
+ Returns:
1263
+ Detailed security report with vulnerabilities, severity levels, and recommendations
1264
+ """
1265
+ await enforce_rate_limit("scan_library_vulnerabilities")
1266
+
1267
+ from .vulnerability_scanner import vulnerability_scanner
1268
+
1269
+ try:
1270
+ # Perform comprehensive scan
1271
+ security_report = await vulnerability_scanner.scan_library(
1272
+ library_name, ecosystem
1273
+ )
1274
+
1275
+ return {
1276
+ "scan_results": security_report.to_dict(),
1277
+ "summary": {
1278
+ "library": security_report.library_name,
1279
+ "ecosystem": security_report.ecosystem,
1280
+ "security_score": security_report.security_score,
1281
+ "risk_level": (
1282
+ "🚨 High Risk"
1283
+ if security_report.security_score < 50
1284
+ else (
1285
+ "⚠️ Medium Risk"
1286
+ if security_report.security_score < 70
1287
+ else (
1288
+ "✅ Low Risk"
1289
+ if security_report.security_score < 90
1290
+ else "🛡️ Excellent"
1291
+ )
1292
+ )
1293
+ ),
1294
+ "critical_vulnerabilities": security_report.critical_count,
1295
+ "total_vulnerabilities": security_report.total_vulnerabilities,
1296
+ "primary_recommendation": (
1297
+ security_report.recommendations[0]
1298
+ if security_report.recommendations
1299
+ else "No specific recommendations"
1300
+ ),
1301
+ },
1302
+ "scan_timestamp": security_report.scan_date,
1303
+ "sources": [
1304
+ "OSV Database",
1305
+ "GitHub Security Advisories",
1306
+ "Safety DB (PyPI only)",
1307
+ ],
1308
+ }
1309
+
1310
+ except Exception as e:
1311
+ return {
1312
+ "error": f"Vulnerability scan failed: {str(e)}",
1313
+ "library": library_name,
1314
+ "ecosystem": ecosystem,
1315
+ "scan_timestamp": datetime.now().isoformat(),
1316
+ }
1317
+
1318
+
1319
+ @mcp.tool()
1320
+ async def get_security_summary(library_name: str, ecosystem: str = "PyPI"):
1321
+ """
1322
+ Get quick security overview for a library without detailed vulnerability list.
1323
+
1324
+ Args:
1325
+ library_name: Name of the library
1326
+ ecosystem: Package ecosystem (default: PyPI)
1327
+
1328
+ Returns:
1329
+ Concise security summary with score and basic recommendations
1330
+ """
1331
+ await enforce_rate_limit("get_security_summary")
1332
+
1333
+ from .vulnerability_scanner import security_integration
1334
+
1335
+ try:
1336
+ summary = await security_integration.get_security_summary(
1337
+ library_name, ecosystem
1338
+ )
1339
+
1340
+ # Add security badge
1341
+ score = summary.get("security_score", 50)
1342
+ if score >= 90:
1343
+ badge = "🛡️ EXCELLENT"
1344
+ elif score >= 70:
1345
+ badge = "✅ SECURE"
1346
+ elif score >= 50:
1347
+ badge = "⚠️ CAUTION"
1348
+ else:
1349
+ badge = "🚨 HIGH RISK"
1350
+
1351
+ return {
1352
+ "library": library_name,
1353
+ "ecosystem": ecosystem,
1354
+ "security_badge": badge,
1355
+ "security_score": score,
1356
+ "status": summary.get("status", "unknown"),
1357
+ "vulnerabilities": {
1358
+ "total": summary.get("total_vulnerabilities", 0),
1359
+ "critical": summary.get("critical_vulnerabilities", 0),
1360
+ },
1361
+ "recommendation": summary.get(
1362
+ "primary_recommendation", "No recommendations available"
1363
+ ),
1364
+ "last_scanned": datetime.now().isoformat(),
1365
+ }
1366
+
1367
+ except Exception as e:
1368
+ return {
1369
+ "library": library_name,
1370
+ "ecosystem": ecosystem,
1371
+ "security_badge": "❓ UNKNOWN",
1372
+ "security_score": None,
1373
+ "status": "scan_failed",
1374
+ "error": str(e),
1375
+ }
1376
+
1377
+
1378
+ @mcp.tool()
1379
+ async def compare_library_security(libraries: List[str], ecosystem: str = "PyPI"):
1380
+ """
1381
+ Compare security scores across multiple libraries to help with selection.
1382
+
1383
+ Args:
1384
+ libraries: List of library names to compare
1385
+ ecosystem: Package ecosystem for all libraries
1386
+
1387
+ Returns:
1388
+ Security comparison with rankings and recommendations
1389
+ """
1390
+ await enforce_rate_limit("compare_library_security")
1391
+
1392
+ from .vulnerability_scanner import security_integration
1393
+
1394
+ if len(libraries) > 10:
1395
+ return {"error": "Maximum 10 libraries allowed for comparison"}
1396
+
1397
+ results = []
1398
+
1399
+ # Scan all libraries in parallel for faster comparison
1400
+ scan_tasks = [
1401
+ security_integration.get_security_summary(lib, ecosystem) for lib in libraries
1402
+ ]
1403
+
1404
+ try:
1405
+ summaries = await asyncio.gather(*scan_tasks, return_exceptions=True)
1406
+
1407
+ for i, (library, summary_item) in enumerate(zip(libraries, summaries)):
1408
+ if isinstance(summary_item, Exception):
1409
+ results.append(
1410
+ {
1411
+ "library": library,
1412
+ "security_score": 0,
1413
+ "status": "scan_failed",
1414
+ "error": str(summary_item),
1415
+ }
1416
+ )
1417
+ else:
1418
+ summary = summary_item
1419
+ results.append(
1420
+ {
1421
+ "library": library,
1422
+ "security_score": summary.get("security_score", 0), # type: ignore
1423
+ "status": summary.get("status", "unknown"), # type: ignore
1424
+ "vulnerabilities": summary.get("total_vulnerabilities", 0), # type: ignore
1425
+ "critical_vulnerabilities": summary.get(
1426
+ "critical_vulnerabilities", 0
1427
+ ), # type: ignore
1428
+ "recommendation": summary.get("primary_recommendation", ""), # type: ignore
1429
+ }
1430
+ )
1431
+
1432
+ # Sort by security score (highest first)
1433
+ results.sort(key=lambda x: x.get("security_score", 0), reverse=True)
1434
+
1435
+ # Add rankings
1436
+ for i, result in enumerate(results):
1437
+ result["rank"] = i + 1
1438
+ score = result.get("security_score", 0)
1439
+ if score >= 90:
1440
+ result["rating"] = "🛡️ Excellent"
1441
+ elif score >= 70:
1442
+ result["rating"] = "✅ Secure"
1443
+ elif score >= 50:
1444
+ result["rating"] = "⚠️ Caution"
1445
+ else:
1446
+ result["rating"] = "🚨 High Risk"
1447
+
1448
+ # Generate overall recommendation
1449
+ if results:
1450
+ best_lib = results[0]
1451
+
1452
+ if best_lib.get("security_score", 0) >= 80:
1453
+ overall_rec = (
1454
+ f"✅ Recommended: {best_lib['library']} has excellent security"
1455
+ )
1456
+ elif best_lib.get("security_score", 0) >= 60:
1457
+ overall_rec = f"⚠️ Proceed with caution: {best_lib['library']} is the most secure option"
1458
+ else:
1459
+ overall_rec = "🚨 Security concerns: All libraries have significant vulnerabilities"
1460
+ else:
1461
+ overall_rec = "Unable to generate recommendation"
1462
+
1463
+ return {
1464
+ "comparison_results": results,
1465
+ "total_libraries": len(libraries),
1466
+ "scan_timestamp": datetime.now().isoformat(),
1467
+ "overall_recommendation": overall_rec,
1468
+ "ecosystem": ecosystem,
1469
+ }
1470
+
1471
+ except Exception as e:
1472
+ return {
1473
+ "error": f"Security comparison failed: {str(e)}",
1474
+ "libraries": libraries,
1475
+ "ecosystem": ecosystem,
1476
+ }
1477
+
1478
+
1479
+ @mcp.tool()
1480
+ async def suggest_secure_libraries(
1481
+ partial_name: str, include_security_score: bool = True
1482
+ ):
1483
+ """
1484
+ Enhanced library suggestions that include security scores for informed decisions.
1485
+
1486
+ Args:
1487
+ partial_name: Partial library name to search for
1488
+ include_security_score: Whether to include security scores (slower but more informative)
1489
+
1490
+ Returns:
1491
+ Library suggestions with optional security information
1492
+ """
1493
+ await enforce_rate_limit("suggest_secure_libraries")
1494
+
1495
+ # Get basic suggestions first
1496
+ basic_suggestions = await suggest_libraries(partial_name)
1497
+
1498
+ if not include_security_score or not basic_suggestions:
1499
+ return {
1500
+ "suggestions": basic_suggestions,
1501
+ "partial_name": partial_name,
1502
+ "security_info_included": False,
1503
+ }
1504
+
1505
+ # Add security information for top 5 suggestions
1506
+ from .vulnerability_scanner import security_integration
1507
+
1508
+ enhanced_suggestions = []
1509
+ top_suggestions = basic_suggestions[:5] # Limit to avoid too many API calls
1510
+
1511
+ # Get security scores in parallel
1512
+ security_tasks = [
1513
+ security_integration.get_security_summary(lib, "PyPI")
1514
+ for lib in top_suggestions
1515
+ ]
1516
+
1517
+ try:
1518
+ security_results = await asyncio.gather(*security_tasks, return_exceptions=True)
1519
+
1520
+ for lib, sec_res_item in zip(top_suggestions, security_results):
1521
+ suggestion = {"library": lib}
1522
+
1523
+ if isinstance(sec_res_item, Exception):
1524
+ suggestion.update(
1525
+ {
1526
+ "security_score": None,
1527
+ "security_status": "unknown",
1528
+ "security_badge": "❓",
1529
+ }
1530
+ )
1531
+ else:
1532
+ security_result = sec_res_item
1533
+ score = security_result.get("security_score", 50)
1534
+ suggestion.update(
1535
+ {
1536
+ "security_score": score,
1537
+ "security_status": security_result.get("status", "unknown"), # type: ignore
1538
+ "security_badge": (
1539
+ "🛡️"
1540
+ if score >= 90
1541
+ else "✅"
1542
+ if score >= 70
1543
+ else "⚠️"
1544
+ if score >= 50
1545
+ else "🚨"
1546
+ ),
1547
+ "vulnerabilities": security_result.get(
1548
+ "total_vulnerabilities", 0
1549
+ ), # type: ignore
1550
+ }
1551
+ )
1552
+
1553
+ enhanced_suggestions.append(suggestion)
1554
+
1555
+ # Add remaining suggestions without security info
1556
+ for lib in basic_suggestions[5:]:
1557
+ enhanced_suggestions.append(
1558
+ {
1559
+ "library": lib,
1560
+ "security_score": None,
1561
+ "security_status": "not_scanned",
1562
+ "note": "Use scan_library_vulnerabilities for security details",
1563
+ }
1564
+ )
1565
+
1566
+ # Sort by security score for enhanced suggestions
1567
+ enhanced_suggestions.sort(
1568
+ key=lambda x: x.get("security_score") or 0, reverse=True
1569
+ )
1570
+
1571
+ return {
1572
+ "suggestions": enhanced_suggestions,
1573
+ "partial_name": partial_name,
1574
+ "security_info_included": True,
1575
+ "total_suggestions": len(enhanced_suggestions),
1576
+ "note": "Libraries with security scores are sorted by security rating",
1577
+ }
1578
+
1579
+ except Exception as e:
1580
+ return {
1581
+ "suggestions": [{"library": lib} for lib in basic_suggestions],
1582
+ "partial_name": partial_name,
1583
+ "security_info_included": False,
1584
+ "error": f"Security enhancement failed: {str(e)}",
1585
+ }
1586
+
1587
+
1588
+ @mcp.tool()
1589
+ async def scan_project_dependencies(project_path: str = "."):
1590
+ """
1591
+ Scans project dependencies from files like pyproject.toml or requirements.txt for vulnerabilities.
1592
+
1593
+ Args:
1594
+ project_path: The path to the project directory (defaults to current directory).
1595
+
1596
+ Returns:
1597
+ A comprehensive security report of all project dependencies.
1598
+ """
1599
+ from .vulnerability_scanner import vulnerability_scanner
1600
+ from .project_scanner import find_and_parse_dependencies
1601
+
1602
+ parsed_info = find_and_parse_dependencies(project_path)
1603
+
1604
+ if not parsed_info:
1605
+ return {
1606
+ "error": "No dependency file found.",
1607
+ "message": "Supported files are pyproject.toml, requirements.txt, or package.json.",
1608
+ }
1609
+
1610
+ filename, ecosystem, dependencies = parsed_info
1611
+
1612
+ if not dependencies:
1613
+ return {
1614
+ "summary": {
1615
+ "dependency_file": filename,
1616
+ "ecosystem": ecosystem,
1617
+ "total_dependencies": 0,
1618
+ "vulnerable_count": 0,
1619
+ "overall_project_risk": "None",
1620
+ "message": "No dependencies found to scan.",
1621
+ },
1622
+ "vulnerable_packages": [],
1623
+ }
1624
+
1625
+ total_deps = len(dependencies)
1626
+ logger.debug(
1627
+ "Found %s dependencies in %s. Scanning for vulnerabilities...",
1628
+ total_deps,
1629
+ filename,
1630
+ )
1631
+
1632
+ scan_tasks = [
1633
+ vulnerability_scanner.scan_library(name, ecosystem)
1634
+ for name in dependencies.keys()
1635
+ ]
1636
+
1637
+ results = await asyncio.gather(*scan_tasks, return_exceptions=True)
1638
+
1639
+ vulnerable_deps = []
1640
+ for i, report_item in enumerate(results):
1641
+ dep_name = list(dependencies.keys())[i]
1642
+ if isinstance(report_item, Exception):
1643
+ # Could log this error
1644
+ continue
1645
+ else:
1646
+ report = report_item
1647
+ if report.vulnerabilities: # type: ignore
1648
+ vulnerable_deps.append(
1649
+ {
1650
+ "library": dep_name,
1651
+ "version": dependencies[dep_name],
1652
+ "vulnerability_count": report.total_vulnerabilities, # type: ignore
1653
+ "security_score": report.security_score,
1654
+ "summary": (
1655
+ report.recommendations[0]
1656
+ if report.recommendations
1657
+ else "Update to the latest version."
1658
+ ),
1659
+ "details": [
1660
+ vuln.to_dict() for vuln in report.vulnerabilities[:3]
1661
+ ], # Top 3
1662
+ }
1663
+ )
1664
+
1665
+ vulnerable_deps.sort(key=lambda x: x["security_score"])
1666
+
1667
+ return {
1668
+ "summary": {
1669
+ "dependency_file": filename,
1670
+ "ecosystem": ecosystem,
1671
+ "total_dependencies": total_deps,
1672
+ "vulnerable_count": len(vulnerable_deps),
1673
+ "overall_project_risk": (
1674
+ "High"
1675
+ if any(d["security_score"] < 50 for d in vulnerable_deps)
1676
+ else (
1677
+ "Medium"
1678
+ if any(d["security_score"] < 70 for d in vulnerable_deps)
1679
+ else "Low"
1680
+ )
1681
+ ),
1682
+ },
1683
+ "vulnerable_packages": vulnerable_deps,
1684
+ }
1685
+
1686
+
1687
+ @mcp.tool()
1688
+ async def generate_project_starter(project_name: str, template: str):
1689
+ """
1690
+ Generates a starter project from a template (e.g., 'fastapi', 'react-vite').
1691
+
1692
+ Args:
1693
+ project_name: The name for the new project directory.
1694
+ template: The project template to use.
1695
+
1696
+ Returns:
1697
+ A summary of the created project structure.
1698
+ """
1699
+ from .project_generator import generate_project
1700
+
1701
+ try:
1702
+ result = generate_project(project_name, template)
1703
+
1704
+ # Provide a more user-friendly summary
1705
+ summary = f"✅ Successfully created '{result['project_name']}' using the '{result['template_used']}' template.\n"
1706
+ summary += f"Location: {result['project_path']}\n\n"
1707
+ summary += "Next steps:\n"
1708
+
1709
+ if template == "fastapi":
1710
+ summary += f"1. cd {result['project_name']}\n"
1711
+ summary += "2. uv pip sync\n"
1712
+ summary += "3. uv run uvicorn main:app --reload\n"
1713
+ elif template == "react-vite":
1714
+ summary += f"1. cd {result['project_name']}\n"
1715
+ summary += "2. npm install\n"
1716
+ summary += "3. npm run dev\n"
1717
+
1718
+ result["user_summary"] = summary
1719
+ return result
1720
+
1721
+ except (ValueError, FileExistsError) as e:
1722
+ return {"error": str(e)}
1723
+
1724
+
1725
+ @mcp.tool()
1726
+ async def manage_dev_environment(service: str, project_path: str = "."):
1727
+ """
1728
+ Manages local development environments using Docker Compose.
1729
+
1730
+ Args:
1731
+ service: The service to set up (e.g., 'postgres', 'redis').
1732
+ project_path: The path to the project directory.
1733
+
1734
+ Returns:
1735
+ A confirmation message with the next steps.
1736
+ """
1737
+ from .docker_manager import create_docker_compose, TEMPLATES
1738
+
1739
+ try:
1740
+ if service not in TEMPLATES:
1741
+ return {
1742
+ "error": f"Service '{service}' not supported.",
1743
+ "available_services": list(TEMPLATES.keys()),
1744
+ }
1745
+
1746
+ compose_file = create_docker_compose(service, project_path)
1747
+
1748
+ return {
1749
+ "status": "success",
1750
+ "message": f"✅ Successfully created 'docker-compose.yml' for '{service}' in '{project_path}'.",
1751
+ "next_steps": [
1752
+ f"1. Review the generated file: {compose_file}",
1753
+ "2. Run the service: docker-compose up -d",
1754
+ "3. To stop the service: docker-compose down",
1755
+ ],
1756
+ "service_details": TEMPLATES[service]["services"],
1757
+ }
1758
+
1759
+ except (ValueError, FileExistsError) as e:
1760
+ return {"error": str(e)}
1761
+ except Exception as e:
1762
+ return {"error": f"An unexpected error occurred: {str(e)}"}
1763
+
1764
+
1765
+ @mcp.tool()
1766
+ async def get_current_config():
1767
+ """
1768
+ Returns the current, active configuration of the MCP server.
1769
+ This allows users to view the default config and use it as a template for local overrides.
1770
+ """
1771
+ try:
1772
+ # The `config` global is a dictionary created from the Pydantic model
1773
+ # at startup, so it represents the active configuration.
1774
+ return config
1775
+ except Exception as e:
1776
+ return {"error": f"Could not retrieve configuration: {str(e)}"}
1777
+
1778
+
1779
+ @mcp.tool()
1780
+ async def snyk_scan_library(
1781
+ library_name: str, version: str = "latest", ecosystem: str = "pypi"
1782
+ ):
1783
+ """
1784
+ Scan a library using Snyk for comprehensive security analysis.
1785
+
1786
+ Args:
1787
+ library_name: Name of the library to scan
1788
+ version: Version of the library (default: "latest")
1789
+ ecosystem: Package ecosystem ("pypi", "npm", "maven", etc.)
1790
+
1791
+ Returns:
1792
+ Detailed security report from Snyk including vulnerabilities, licenses, and remediation advice
1793
+ """
1794
+ from .snyk_integration import snyk_integration
1795
+
1796
+ try:
1797
+ # Test connection first
1798
+ connection_test = await snyk_integration.test_connection()
1799
+ if connection_test["status"] != "connected":
1800
+ return {
1801
+ "error": "Snyk integration not configured",
1802
+ "details": connection_test.get("error", "Unknown error"),
1803
+ "setup_instructions": [
1804
+ "1. Sign up for Snyk account at https://snyk.io",
1805
+ "2. Get API token from your Snyk account settings",
1806
+ "3. Set SNYK_API_KEY environment variable",
1807
+ "4. Optionally set SNYK_ORG_ID for organization-specific scans",
1808
+ ],
1809
+ }
1810
+
1811
+ # Perform the scan
1812
+ package_info = await snyk_integration.scan_package(
1813
+ library_name, version, ecosystem
1814
+ )
1815
+
1816
+ return {
1817
+ "library": library_name,
1818
+ "version": version,
1819
+ "ecosystem": ecosystem,
1820
+ "scan_timestamp": datetime.now().isoformat(),
1821
+ "vulnerability_summary": {
1822
+ "total": len(package_info.vulnerabilities),
1823
+ "critical": package_info.severity_counts.get("critical", 0),
1824
+ "high": package_info.severity_counts.get("high", 0),
1825
+ "medium": package_info.severity_counts.get("medium", 0),
1826
+ "low": package_info.severity_counts.get("low", 0),
1827
+ },
1828
+ "vulnerabilities": [
1829
+ {
1830
+ "id": vuln.id,
1831
+ "title": vuln.title,
1832
+ "severity": vuln.severity.value,
1833
+ "cvss_score": vuln.cvss_score,
1834
+ "cve": vuln.cve,
1835
+ "is_patchable": vuln.is_patchable,
1836
+ "upgrade_path": vuln.upgrade_path[:3] if vuln.upgrade_path else [],
1837
+ "snyk_url": f"https://snyk.io/vuln/{vuln.id}",
1838
+ }
1839
+ for vuln in package_info.vulnerabilities[:10] # Limit to top 10
1840
+ ],
1841
+ "license_info": [
1842
+ {
1843
+ "name": license.name,
1844
+ "type": license.type,
1845
+ "spdx_id": license.spdx_id,
1846
+ "is_deprecated": license.is_deprecated,
1847
+ }
1848
+ for license in package_info.licenses
1849
+ ],
1850
+ "recommendations": [
1851
+ "🔍 Review all critical and high severity vulnerabilities",
1852
+ "📦 Update to latest secure version if available",
1853
+ "⚖️ Ensure license compliance with your organization's policies",
1854
+ "🔄 Set up continuous monitoring for this package",
1855
+ ],
1856
+ }
1857
+
1858
+ except Exception as e:
1859
+ return {
1860
+ "error": f"Snyk scan failed: {str(e)}",
1861
+ "library": library_name,
1862
+ "version": version,
1863
+ }
1864
+
1865
+
1866
+ @mcp.tool()
1867
+ async def snyk_scan_project(project_path: str = "."):
1868
+ """
1869
+ Scan entire project dependencies using Snyk.
1870
+
1871
+ Args:
1872
+ project_path: Path to the project directory (default: current directory)
1873
+
1874
+ Returns:
1875
+ Comprehensive security report for all project dependencies
1876
+ """
1877
+ from .snyk_integration import snyk_integration
1878
+ from .project_scanner import find_and_parse_dependencies
1879
+
1880
+ try:
1881
+ # Find project dependencies
1882
+ dep_result = find_and_parse_dependencies(project_path)
1883
+ if not dep_result:
1884
+ return {
1885
+ "error": "No supported dependency files found",
1886
+ "supported_files": [
1887
+ "pyproject.toml",
1888
+ "requirements.txt",
1889
+ "package.json",
1890
+ ],
1891
+ "project_path": project_path,
1892
+ }
1893
+
1894
+ filename, ecosystem, dependencies = dep_result
1895
+ manifest_path = os.path.join(project_path, filename)
1896
+
1897
+ # Test Snyk connection
1898
+ connection_test = await snyk_integration.test_connection()
1899
+ if connection_test["status"] != "connected":
1900
+ return {
1901
+ "error": "Snyk integration not configured",
1902
+ "details": connection_test.get("error", "Unknown error"),
1903
+ }
1904
+
1905
+ # Scan the project manifest
1906
+ scan_result = await snyk_integration.scan_project_manifest(
1907
+ manifest_path, ecosystem
1908
+ )
1909
+
1910
+ if "error" in scan_result:
1911
+ return scan_result
1912
+
1913
+ # Enhance with additional analysis
1914
+ high_priority_vulns = [
1915
+ vuln
1916
+ for vuln in scan_result["vulnerabilities"]
1917
+ if vuln.get("severity") in ["critical", "high"]
1918
+ ]
1919
+
1920
+ return {
1921
+ "project_path": project_path,
1922
+ "manifest_file": filename,
1923
+ "ecosystem": ecosystem,
1924
+ "scan_timestamp": scan_result["scan_timestamp"],
1925
+ "summary": {
1926
+ **scan_result["summary"],
1927
+ "high_priority_vulnerabilities": len(high_priority_vulns),
1928
+ "security_score": max(
1929
+ 0,
1930
+ 100
1931
+ - (
1932
+ len(
1933
+ [
1934
+ v
1935
+ for v in scan_result["vulnerabilities"]
1936
+ if v.get("severity") == "critical"
1937
+ ]
1938
+ )
1939
+ * 25
1940
+ + len(
1941
+ [
1942
+ v
1943
+ for v in scan_result["vulnerabilities"]
1944
+ if v.get("severity") == "high"
1945
+ ]
1946
+ )
1947
+ * 15
1948
+ + len(
1949
+ [
1950
+ v
1951
+ for v in scan_result["vulnerabilities"]
1952
+ if v.get("severity") == "medium"
1953
+ ]
1954
+ )
1955
+ * 5
1956
+ + len(
1957
+ [
1958
+ v
1959
+ for v in scan_result["vulnerabilities"]
1960
+ if v.get("severity") == "low"
1961
+ ]
1962
+ )
1963
+ * 1
1964
+ ),
1965
+ ),
1966
+ },
1967
+ "high_priority_vulnerabilities": high_priority_vulns[:10],
1968
+ "license_issues": scan_result["license_issues"],
1969
+ "remediation_summary": {
1970
+ "patches_available": len(
1971
+ [v for v in scan_result["vulnerabilities"] if v.get("is_patchable")]
1972
+ ),
1973
+ "upgrades_available": len(
1974
+ [v for v in scan_result["vulnerabilities"] if v.get("upgrade_path")]
1975
+ ),
1976
+ "total_fixable": len(
1977
+ [
1978
+ v
1979
+ for v in scan_result["vulnerabilities"]
1980
+ if v.get("is_patchable") or v.get("upgrade_path")
1981
+ ]
1982
+ ),
1983
+ },
1984
+ "next_steps": [
1985
+ "🚨 Address all critical vulnerabilities immediately",
1986
+ "📦 Update packages with available security patches",
1987
+ "🔍 Review medium and low priority issues",
1988
+ "⚖️ Check license compliance for flagged packages",
1989
+ "🔄 Set up continuous monitoring with Snyk",
1990
+ ],
1991
+ }
1992
+
1993
+ except Exception as e:
1994
+ return {"error": f"Project scan failed: {str(e)}", "project_path": project_path}
1995
+
1996
+
1997
+ @mcp.tool()
1998
+ async def snyk_license_check(project_path: str = ".", policy: str = "permissive"):
1999
+ """
2000
+ Check license compliance for project dependencies using Snyk.
2001
+
2002
+ Args:
2003
+ project_path: Path to the project directory
2004
+ policy: License policy to apply ("permissive", "copyleft-limited", "strict")
2005
+
2006
+ Returns:
2007
+ License compliance report with risk assessment
2008
+ """
2009
+ from .snyk_integration import snyk_integration
2010
+ from .project_scanner import find_and_parse_dependencies
2011
+
2012
+ try:
2013
+ # Find project dependencies
2014
+ dep_result = find_and_parse_dependencies(project_path)
2015
+ if not dep_result:
2016
+ return {"error": "No supported dependency files found"}
2017
+
2018
+ filename, ecosystem, dependencies = dep_result
2019
+
2020
+ # Convert dependencies to list of tuples
2021
+ packages = [(name, version) for name, version in dependencies.items()]
2022
+
2023
+ # Get license compliance report
2024
+ compliance_report = await snyk_integration.get_license_compliance(
2025
+ packages, ecosystem
2026
+ )
2027
+
2028
+ # Apply policy-specific analysis
2029
+ policy_rules = {
2030
+ "permissive": {
2031
+ "allowed": {"MIT", "Apache-2.0", "BSD-2-Clause", "BSD-3-Clause", "ISC"},
2032
+ "restricted": {
2033
+ "GPL-2.0",
2034
+ "GPL-3.0",
2035
+ "LGPL-2.1",
2036
+ "LGPL-3.0",
2037
+ "AGPL-3.0",
2038
+ },
2039
+ "name": "Permissive Policy",
2040
+ },
2041
+ "copyleft-limited": {
2042
+ "allowed": {
2043
+ "MIT",
2044
+ "Apache-2.0",
2045
+ "BSD-2-Clause",
2046
+ "BSD-3-Clause",
2047
+ "ISC",
2048
+ "LGPL-2.1",
2049
+ "LGPL-3.0",
2050
+ },
2051
+ "restricted": {"GPL-2.0", "GPL-3.0", "AGPL-3.0"},
2052
+ "name": "Limited Copyleft Policy",
2053
+ },
2054
+ "strict": {
2055
+ "allowed": {"MIT", "Apache-2.0", "BSD-2-Clause", "BSD-3-Clause"},
2056
+ "restricted": {
2057
+ "GPL-2.0",
2058
+ "GPL-3.0",
2059
+ "LGPL-2.1",
2060
+ "LGPL-3.0",
2061
+ "AGPL-3.0",
2062
+ },
2063
+ "name": "Strict Policy",
2064
+ },
2065
+ }
2066
+
2067
+ selected_policy = policy_rules.get(policy, policy_rules["permissive"])
2068
+
2069
+ # Risk assessment
2070
+ risk_assessment = {
2071
+ "policy_applied": selected_policy["name"],
2072
+ "overall_compliance": (
2073
+ "compliant"
2074
+ if compliance_report["non_compliant_packages"] == 0
2075
+ else "non-compliant"
2076
+ ),
2077
+ "risk_level": (
2078
+ "low"
2079
+ if compliance_report["non_compliant_packages"] == 0
2080
+ else (
2081
+ "high"
2082
+ if compliance_report["non_compliant_packages"] > 5
2083
+ else "medium"
2084
+ )
2085
+ ),
2086
+ "action_required": compliance_report["non_compliant_packages"] > 0,
2087
+ }
2088
+
2089
+ return {
2090
+ "project_path": project_path,
2091
+ "policy": selected_policy["name"],
2092
+ "scan_timestamp": datetime.now().isoformat(),
2093
+ "compliance_summary": compliance_report,
2094
+ "risk_assessment": risk_assessment,
2095
+ "recommendations": [
2096
+ "📋 Review all non-compliant packages",
2097
+ "🔄 Find alternative packages with compatible licenses",
2098
+ "⚖️ Consult legal team for high-risk licenses",
2099
+ "📝 Document license decisions for audit trail",
2100
+ ],
2101
+ }
2102
+
2103
+ except Exception as e:
2104
+ return {
2105
+ "error": f"License check failed: {str(e)}",
2106
+ "project_path": project_path,
2107
+ }
2108
+
2109
+
2110
+ @mcp.tool()
2111
+ async def snyk_monitor_project(project_path: str = "."):
2112
+ """
2113
+ Set up continuous monitoring for a project with Snyk.
2114
+
2115
+ Args:
2116
+ project_path: Path to the project directory
2117
+
2118
+ Returns:
2119
+ Status of monitoring setup and project details
2120
+ """
2121
+ from .snyk_integration import snyk_integration
2122
+
2123
+ try:
2124
+ # Test connection and get organization info
2125
+ connection_test = await snyk_integration.test_connection()
2126
+ if connection_test["status"] != "connected":
2127
+ return {
2128
+ "error": "Snyk integration not configured",
2129
+ "details": connection_test.get("error", "Unknown error"),
2130
+ "setup_required": [
2131
+ "Set SNYK_API_KEY environment variable",
2132
+ "Set SNYK_ORG_ID environment variable",
2133
+ "Ensure you have organization admin privileges",
2134
+ ],
2135
+ }
2136
+
2137
+ # Set up monitoring
2138
+ monitor_result = await snyk_integration.monitor_project(project_path)
2139
+
2140
+ if "error" in monitor_result:
2141
+ return monitor_result
2142
+
2143
+ return {
2144
+ "status": "success",
2145
+ "monitoring_enabled": True,
2146
+ "project_details": monitor_result,
2147
+ "organization": connection_test.get("organizations", []),
2148
+ "next_steps": [
2149
+ "🔔 Configure alert preferences in Snyk dashboard",
2150
+ "📊 Review security reports regularly",
2151
+ "🔄 Enable automatic PRs for security updates",
2152
+ "📈 Set up integration with CI/CD pipeline",
2153
+ ],
2154
+ "dashboard_url": "https://app.snyk.io/org/your-org/projects",
2155
+ }
2156
+
2157
+ except Exception as e:
2158
+ return {
2159
+ "error": f"Monitoring setup failed: {str(e)}",
2160
+ "project_path": project_path,
2161
+ }
2162
+
2163
+
2164
+ def main():
2165
+ """Main entry point for the MCP server."""
2166
+ mcp.run(transport="stdio")
2167
+
2168
+
2169
+ if __name__ == "__main__":
2170
+ main()