iflow-mcp_anton-prosterity-documentation-search-enhanced 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. documentation_search_enhanced/__init__.py +14 -0
  2. documentation_search_enhanced/__main__.py +6 -0
  3. documentation_search_enhanced/config.json +1674 -0
  4. documentation_search_enhanced/config_manager.py +233 -0
  5. documentation_search_enhanced/config_validator.py +79 -0
  6. documentation_search_enhanced/content_enhancer.py +578 -0
  7. documentation_search_enhanced/docker_manager.py +87 -0
  8. documentation_search_enhanced/logger.py +179 -0
  9. documentation_search_enhanced/main.py +2170 -0
  10. documentation_search_enhanced/project_generator.py +260 -0
  11. documentation_search_enhanced/project_scanner.py +85 -0
  12. documentation_search_enhanced/reranker.py +230 -0
  13. documentation_search_enhanced/site_index_builder.py +274 -0
  14. documentation_search_enhanced/site_index_downloader.py +222 -0
  15. documentation_search_enhanced/site_search.py +1325 -0
  16. documentation_search_enhanced/smart_search.py +473 -0
  17. documentation_search_enhanced/snyk_integration.py +657 -0
  18. documentation_search_enhanced/vector_search.py +303 -0
  19. documentation_search_enhanced/version_resolver.py +189 -0
  20. documentation_search_enhanced/vulnerability_scanner.py +545 -0
  21. documentation_search_enhanced/web_scraper.py +117 -0
  22. iflow_mcp_anton_prosterity_documentation_search_enhanced-1.9.0.dist-info/METADATA +195 -0
  23. iflow_mcp_anton_prosterity_documentation_search_enhanced-1.9.0.dist-info/RECORD +26 -0
  24. iflow_mcp_anton_prosterity_documentation_search_enhanced-1.9.0.dist-info/WHEEL +4 -0
  25. iflow_mcp_anton_prosterity_documentation_search_enhanced-1.9.0.dist-info/entry_points.txt +2 -0
  26. iflow_mcp_anton_prosterity_documentation_search_enhanced-1.9.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,260 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Generates boilerplate code and file structures for new projects.
4
+ """
5
+
6
+ import os
7
+ from typing import Dict, List, TypedDict, NotRequired
8
+
9
+ # --- Project Templates ---
10
+
11
+ TEMPLATES: Dict[str, Dict[str, str]] = {
12
+ "fastapi": {
13
+ "main.py": """
14
+ from fastapi import FastAPI
15
+
16
+ app = FastAPI(
17
+ title="My FastAPI Project",
18
+ description="A new project generated by MCP.",
19
+ version="0.1.0",
20
+ )
21
+
22
+ @app.get("/")
23
+ async def read_root():
24
+ return {"message": "Hello, World!"}
25
+
26
+ @app.get("/items/{item_id}")
27
+ async def read_item(item_id: int, q: str | None = None):
28
+ return {"item_id": item_id, "q": q}
29
+ """,
30
+ "pyproject.toml": """
31
+ [project]
32
+ name = "PROJECT_NAME_PLACEHOLDER"
33
+ version = "0.1.0"
34
+ description = "A new FastAPI project."
35
+ authors = [{ name = "Your Name", email = "you@example.com" }]
36
+ requires-python = ">=3.12"
37
+ dependencies = [
38
+ "fastapi",
39
+ "uvicorn[standard]",
40
+ ]
41
+
42
+ [project.optional-dependencies]
43
+ dev = ["pytest"]
44
+ """,
45
+ "README.md": """
46
+ # PROJECT_NAME_PLACEHOLDER
47
+
48
+ A new FastAPI project generated by Documentation Search Enhanced MCP.
49
+
50
+ ## To run:
51
+ 1. `uv pip sync`
52
+ 2. `uv run uvicorn main:app --reload`
53
+
54
+ ## To test:
55
+ `uv run pytest`
56
+ """,
57
+ ".gitignore": """
58
+ __pycache__/
59
+ *.pyc
60
+ .env
61
+ .venv/
62
+ dist/
63
+ build/
64
+ *.egg-info
65
+ """,
66
+ "tests/test_main.py": """
67
+ from fastapi.testclient import TestClient
68
+ from main import app
69
+
70
+ client = TestClient(app)
71
+
72
+ def test_read_root():
73
+ response = client.get("/")
74
+ assert response.status_code == 200
75
+ assert response.json() == {"message": "Hello, World!"}
76
+ """,
77
+ },
78
+ "react-vite": {
79
+ "index.html": """
80
+ <!doctype html>
81
+ <html lang="en">
82
+ <head>
83
+ <meta charset="UTF-8" />
84
+ <link rel="icon" type="image/svg+xml" href="/vite.svg" />
85
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
86
+ <title>PROJECT_NAME_PLACEHOLDER</title>
87
+ </head>
88
+ <body>
89
+ <div id="root"></div>
90
+ <script type="module" src="/src/main.jsx"></script>
91
+ </body>
92
+ </html>
93
+ """,
94
+ "package.json": """
95
+ {{
96
+ "name": "PROJECT_NAME_PLACEHOLDER",
97
+ "private": true,
98
+ "version": "0.0.0",
99
+ "type": "module",
100
+ "scripts": {{
101
+ "dev": "vite",
102
+ "build": "vite build",
103
+ "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0",
104
+ "preview": "vite preview"
105
+ }},
106
+ "dependencies": {{
107
+ "react": "^18.2.0",
108
+ "react-dom": "^18.2.0"
109
+ }},
110
+ "devDependencies": {{
111
+ "@types/react": "^18.2.15",
112
+ "@types/react-dom": "^18.2.7",
113
+ "@vitejs/plugin-react": "^4.0.3",
114
+ "eslint": "^8.45.0",
115
+ "eslint-plugin-react": "^7.32.2",
116
+ "eslint-plugin-react-hooks": "^4.6.0",
117
+ "eslint-plugin-react-refresh": "^0.4.3",
118
+ "vite": "^4.4.5"
119
+ }}
120
+ }}
121
+ """,
122
+ "vite.config.js": """
123
+ import { defineConfig } from 'vite'
124
+ import react from '@vitejs/plugin-react'
125
+
126
+ // https://vitejs.dev/config/
127
+ export default defineConfig({
128
+ plugins: [react()],
129
+ })
130
+ """,
131
+ ".gitignore": """
132
+ # Logs
133
+ logs
134
+ *.log
135
+
136
+ # Runtime data
137
+ pids
138
+ *.pid
139
+ *.seed
140
+ *.pid.lock
141
+
142
+ # Dependency directories
143
+ node_modules/
144
+ dist/
145
+
146
+ # IDE files
147
+ .idea/
148
+ .vscode/
149
+
150
+ # Environment variables
151
+ .env
152
+ .env.local
153
+ """,
154
+ "src/App.jsx": """
155
+ import './App.css'
156
+
157
+ function App() {
158
+ return (
159
+ <>
160
+ <h1>PROJECT_NAME_PLACEHOLDER</h1>
161
+ <p className="read-the-docs">
162
+ React + Vite project generated by MCP.
163
+ </p>
164
+ </>
165
+ )
166
+ }
167
+
168
+ export default App
169
+ """,
170
+ "src/main.jsx": """
171
+ import React from 'react'
172
+ import ReactDOM from 'react-dom/client'
173
+ import App from './App.jsx'
174
+ import './index.css'
175
+
176
+ ReactDOM.createRoot(document.getElementById('root')).render(
177
+ <React.StrictMode>
178
+ <App />
179
+ </React.StrictMode>,
180
+ )
181
+ """,
182
+ "src/index.css": """
183
+ :root {
184
+ font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
185
+ }
186
+ """,
187
+ "src/App.css": """
188
+ #root {
189
+ max-width: 1280px;
190
+ margin: 0 auto;
191
+ padding: 2rem;
192
+ text-align: center;
193
+ }
194
+ """,
195
+ },
196
+ }
197
+
198
+
199
+ class ProjectCreationSummary(TypedDict):
200
+ project_name: str
201
+ template_used: str
202
+ project_path: str
203
+ directories_created: List[str]
204
+ files_created: List[str]
205
+ user_summary: NotRequired[str]
206
+
207
+
208
+ def generate_project(
209
+ project_name: str, template_name: str, base_path: str = "."
210
+ ) -> ProjectCreationSummary:
211
+ """
212
+ Generates a new project from a template.
213
+
214
+ Args:
215
+ project_name: The name of the new project (will be created as a directory).
216
+ template_name: The name of the template to use (e.g., 'fastapi').
217
+ base_path: The path where the project directory will be created.
218
+
219
+ Returns:
220
+ A dictionary summarizing the created files and directories.
221
+ """
222
+ if template_name not in TEMPLATES:
223
+ raise ValueError(
224
+ f"Template '{template_name}' not found. Available templates: {list(TEMPLATES.keys())}"
225
+ )
226
+
227
+ project_path = os.path.join(base_path, project_name)
228
+ if os.path.exists(project_path):
229
+ raise FileExistsError(f"Directory '{project_path}' already exists.")
230
+
231
+ os.makedirs(project_path)
232
+
233
+ template = TEMPLATES[template_name]
234
+ created_files = []
235
+ created_dirs = {project_path}
236
+
237
+ for file_path, content in template.items():
238
+ # Handle nested directories
239
+ full_path = os.path.join(project_path, file_path)
240
+ dir_name = os.path.dirname(full_path)
241
+
242
+ if not os.path.exists(dir_name):
243
+ os.makedirs(dir_name)
244
+ created_dirs.add(dir_name)
245
+
246
+ # Replace project name placeholder
247
+ formatted_content = content.replace("PROJECT_NAME_PLACEHOLDER", project_name)
248
+
249
+ with open(full_path, "w", encoding="utf-8") as f:
250
+ f.write(formatted_content.strip())
251
+
252
+ created_files.append(full_path)
253
+
254
+ return {
255
+ "project_name": project_name,
256
+ "template_used": template_name,
257
+ "project_path": project_path,
258
+ "directories_created": sorted(list(created_dirs)),
259
+ "files_created": sorted(created_files),
260
+ }
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Scans project directories to find and parse dependency files.
4
+ """
5
+
6
+ import os
7
+ import json
8
+ import sys
9
+ from typing import Dict, Tuple, Optional
10
+ import re
11
+ import tomllib
12
+
13
+
14
+ def _parse_requirement(req: str) -> Tuple[str, str]:
15
+ """Parses a requirement string (e.g., 'fastapi==0.1.0' or 'django>=3.2')."""
16
+ match = re.match(r"([a-zA-Z0-9\-_]+)\s*([~<>=!]=?)\s*([0-9\.\*a-zA-Z]+)", req)
17
+ if match:
18
+ name, specifier, version = match.groups()
19
+ return name.strip(), f"{specifier}{version}"
20
+ return req.strip(), "latest"
21
+
22
+
23
+ def parse_pyproject_toml(content: str) -> Dict[str, str]:
24
+ """Parses dependencies from pyproject.toml content."""
25
+ data = tomllib.loads(content)
26
+ dependencies = data.get("project", {}).get("dependencies", [])
27
+
28
+ parsed_deps = {}
29
+ for req in dependencies:
30
+ name, version = _parse_requirement(req)
31
+ parsed_deps[name] = version
32
+
33
+ return parsed_deps
34
+
35
+
36
+ def parse_requirements_txt(content: str) -> Dict[str, str]:
37
+ """Parses dependencies from requirements.txt content."""
38
+ lines = content.splitlines()
39
+ parsed_deps = {}
40
+ for line in lines:
41
+ line = line.strip()
42
+ if line and not line.startswith("#"):
43
+ name, version = _parse_requirement(line)
44
+ parsed_deps[name] = version
45
+ return parsed_deps
46
+
47
+
48
+ def parse_package_json(content: str) -> Dict[str, str]:
49
+ """Parses dependencies from package.json content."""
50
+ data = json.loads(content)
51
+ deps = data.get("dependencies", {})
52
+ dev_deps = data.get("devDependencies", {})
53
+ deps.update(dev_deps)
54
+ return deps
55
+
56
+
57
+ def find_and_parse_dependencies(
58
+ directory: str,
59
+ ) -> Optional[Tuple[str, str, Dict[str, str]]]:
60
+ """
61
+ Finds and parses the most relevant dependency file in a directory.
62
+
63
+ Returns:
64
+ A tuple of (file_path, ecosystem, dependencies_dict) or None.
65
+ """
66
+ supported_files = {
67
+ "pyproject.toml": ("PyPI", parse_pyproject_toml),
68
+ "requirements.txt": ("PyPI", parse_requirements_txt),
69
+ "package.json": ("npm", parse_package_json),
70
+ }
71
+
72
+ for filename, (ecosystem, parser_func) in supported_files.items():
73
+ file_path = os.path.join(directory, filename)
74
+ if os.path.exists(file_path):
75
+ try:
76
+ with open(file_path, "r", encoding="utf-8") as f:
77
+ content = f.read()
78
+ dependencies = parser_func(content)
79
+ return filename, ecosystem, dependencies
80
+ except Exception as e:
81
+ print(f"⚠️ Error parsing {filename}: {e}", file=sys.stderr)
82
+ # Continue to the next file type if parsing fails
83
+ continue
84
+
85
+ return None
@@ -0,0 +1,230 @@
1
+ """Search result reranking using hybrid scoring (vector + keyword + metadata)."""
2
+
3
+ import logging
4
+ import re
5
+ from typing import List, Optional
6
+
7
+ from .vector_search import get_vector_engine
8
+ from .smart_search import SearchResult
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class SearchReranker:
14
+ """
15
+ Rerank search results using a hybrid scoring approach:
16
+ - Semantic similarity (vector embeddings): 50% weight
17
+ - Keyword matching relevance: 30% weight
18
+ - Source authority/freshness: 20% weight
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ semantic_weight: float = 0.5,
24
+ keyword_weight: float = 0.3,
25
+ metadata_weight: float = 0.2,
26
+ ):
27
+ """
28
+ Initialize the reranker with configurable weights.
29
+
30
+ Args:
31
+ semantic_weight: Weight for vector similarity score (0-1)
32
+ keyword_weight: Weight for keyword matching score (0-1)
33
+ metadata_weight: Weight for metadata scoring (0-1)
34
+ """
35
+ self.semantic_weight = semantic_weight
36
+ self.keyword_weight = keyword_weight
37
+ self.metadata_weight = metadata_weight
38
+
39
+ # Ensure weights sum to 1.0
40
+ total = semantic_weight + keyword_weight + metadata_weight
41
+ if abs(total - 1.0) > 0.01:
42
+ logger.warning(f"Reranker weights sum to {total}, normalizing to 1.0")
43
+ self.semantic_weight /= total
44
+ self.keyword_weight /= total
45
+ self.metadata_weight /= total
46
+
47
+ self.vector_engine = get_vector_engine()
48
+
49
+ async def rerank(
50
+ self,
51
+ results: List[SearchResult],
52
+ query: str,
53
+ use_semantic: bool = True,
54
+ ) -> List[SearchResult]:
55
+ """
56
+ Rerank search results using hybrid scoring.
57
+
58
+ Args:
59
+ results: List of search results to rerank
60
+ query: Original search query
61
+ use_semantic: Whether to use semantic scoring (can be disabled for speed)
62
+
63
+ Returns:
64
+ Reranked list of search results
65
+ """
66
+ if not results:
67
+ return results
68
+
69
+ logger.debug(f"Reranking {len(results)} results for query: {query[:50]}...")
70
+
71
+ # Calculate scores for each result
72
+ scored_results = []
73
+ for result in results:
74
+ score = 0.0
75
+
76
+ # 1. Semantic similarity score (if enabled)
77
+ if use_semantic:
78
+ semantic_score = await self._calculate_semantic_score(
79
+ query, result.snippet + " " + result.title
80
+ )
81
+ score += semantic_score * self.semantic_weight
82
+ else:
83
+ # If semantic disabled, redistribute weight to keyword matching
84
+ score += result.relevance_score * (
85
+ self.semantic_weight + self.keyword_weight
86
+ )
87
+
88
+ # 2. Keyword matching score (use existing relevance_score)
89
+ if not use_semantic:
90
+ # Already included above
91
+ pass
92
+ else:
93
+ score += result.relevance_score * self.keyword_weight
94
+
95
+ # 3. Metadata scoring (authority, content quality indicators)
96
+ metadata_score = self._calculate_metadata_score(result)
97
+ score += metadata_score * self.metadata_weight
98
+
99
+ # Store the hybrid score
100
+ result.relevance_score = score
101
+ scored_results.append(result)
102
+
103
+ # Sort by hybrid score
104
+ scored_results.sort(key=lambda r: r.relevance_score, reverse=True)
105
+
106
+ logger.debug(
107
+ f"Reranked results. Top score: {scored_results[0].relevance_score:.3f}"
108
+ )
109
+ return scored_results
110
+
111
+ async def _calculate_semantic_score(self, query: str, document: str) -> float:
112
+ """
113
+ Calculate semantic similarity between query and document.
114
+
115
+ Args:
116
+ query: Search query
117
+ document: Document text (title + snippet)
118
+
119
+ Returns:
120
+ Similarity score between 0 and 1
121
+ """
122
+ try:
123
+ # Generate embeddings
124
+ query_embedding = self.vector_engine.embed_documents([query])
125
+ doc_embedding = self.vector_engine.embed_documents([document])
126
+
127
+ # Calculate cosine similarity
128
+ import numpy as np
129
+
130
+ query_norm = query_embedding / np.linalg.norm(query_embedding)
131
+ doc_norm = doc_embedding / np.linalg.norm(doc_embedding)
132
+ similarity = np.dot(query_norm[0], doc_norm[0])
133
+
134
+ # Convert to 0-1 range (cosine similarity is -1 to 1)
135
+ score = (similarity + 1) / 2
136
+ return float(score)
137
+
138
+ except Exception as e:
139
+ logger.warning(f"Error calculating semantic score: {e}")
140
+ return 0.5 # Neutral score on error
141
+
142
+ def _calculate_metadata_score(self, result: SearchResult) -> float:
143
+ """
144
+ Calculate metadata-based score considering:
145
+ - Source authority (official docs > blogs > forums)
146
+ - Content type (tutorials/guides > reference > examples)
147
+ - Code examples presence
148
+ - Estimated quality indicators
149
+
150
+ Args:
151
+ result: Search result to score
152
+
153
+ Returns:
154
+ Metadata score between 0 and 1
155
+ """
156
+ score = 0.5 # Base score
157
+
158
+ # Source authority scoring
159
+ url_lower = result.url.lower()
160
+ if any(
161
+ domain in url_lower
162
+ for domain in [
163
+ "docs.python.org",
164
+ "fastapi.tiangolo.com",
165
+ "reactjs.org",
166
+ "docs.djangoproject.com",
167
+ ]
168
+ ):
169
+ score += 0.3 # Official documentation
170
+ elif any(
171
+ domain in url_lower
172
+ for domain in ["github.com", "readthedocs.io", "readthedocs.org"]
173
+ ):
174
+ score += 0.2 # Authoritative sources
175
+ elif any(
176
+ domain in url_lower
177
+ for domain in ["stackoverflow.com", "medium.com", "dev.to"]
178
+ ):
179
+ score += 0.1 # Community sources
180
+
181
+ # Content type scoring
182
+ content_type_scores = {
183
+ "tutorial": 0.2,
184
+ "guide": 0.2,
185
+ "reference": 0.15,
186
+ "example": 0.1,
187
+ }
188
+ score += content_type_scores.get(result.content_type.lower(), 0)
189
+
190
+ # Code examples boost
191
+ if result.code_snippets_count > 0:
192
+ score += 0.1
193
+
194
+ # URL structure quality (indicates well-organized docs)
195
+ if self._has_good_url_structure(result.url):
196
+ score += 0.05
197
+
198
+ # Normalize to 0-1 range
199
+ return min(1.0, max(0.0, score))
200
+
201
+ def _has_good_url_structure(self, url: str) -> bool:
202
+ """
203
+ Check if URL has good structure (versioned, organized).
204
+
205
+ Args:
206
+ url: URL to check
207
+
208
+ Returns:
209
+ True if URL has good structure
210
+ """
211
+ # Check for version in URL
212
+ has_version = bool(re.search(r"/v?\d+\.\d+/|/stable/|/latest/", url))
213
+
214
+ # Check for organized path structure
215
+ path_depth = len([p for p in url.split("/") if p]) - 2 # Exclude domain
216
+ has_good_depth = 2 <= path_depth <= 6
217
+
218
+ return has_version or has_good_depth
219
+
220
+
221
+ # Global instance
222
+ _reranker: Optional[SearchReranker] = None
223
+
224
+
225
+ def get_reranker() -> SearchReranker:
226
+ """Get or create the global reranker instance."""
227
+ global _reranker
228
+ if _reranker is None:
229
+ _reranker = SearchReranker()
230
+ return _reranker