morph-websearch-mcp 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,73 @@
1
+ Metadata-Version: 2.4
2
+ Name: morph-websearch-mcp
3
+ Version: 0.1.0
4
+ Summary: Web search MCP server with AI-powered content extraction
5
+ License-Expression: MIT
6
+ Requires-Python: >=3.13
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: mcp>=1.0.0
9
+ Requires-Dist: crawl4ai>=0.4.0
10
+ Requires-Dist: requests>=2.31.0
11
+ Requires-Dist: openai>=1.0.0
12
+ Requires-Dist: beautifulsoup4>=4.12.0
13
+
14
+ # websearch-mcp
15
+
16
+ MCP server providing web search, web fetch, and AI-powered web extraction — all results compacted for minimal context usage.
17
+
18
+ ## Tools
19
+
20
+ | Tool | Input | Output |
21
+ |------|-------|--------|
22
+ | `websearch` | `{query, num_results?}` | `[{title, url, snippet, content}]` |
23
+ | `webfetch` | `{url}` | `{url, content}` |
24
+ | `webextract` | `{query}` | `{answer, sources: [{title, url}]}` |
25
+
26
+ All page content is compacted via morph before returning, stripping irrelevant boilerplate.
27
+
28
+ ## Install
29
+
30
+ ```bash
31
+ pip install morph-websearch-mcp
32
+ # or
33
+ uv add morph-websearch-mcp
34
+ ```
35
+
36
+ ## Setup
37
+
38
+ Set your morph API key:
39
+
40
+ ```bash
41
+ export MORPH_API_KEY="sk-..."
42
+ ```
43
+
44
+ ## MCP Client Config
45
+
46
+ ```json
47
+ {
48
+ "mcpServers": {
49
+ "websearch-mcp": {
50
+ "command": "websearch-mcp",
51
+ "env": {
52
+ "MORPH_API_KEY": "sk-..."
53
+ }
54
+ }
55
+ }
56
+ }
57
+ ```
58
+
59
+ If installing from source:
60
+
61
+ ```json
62
+ {
63
+ "mcpServers": {
64
+ "websearch-mcp": {
65
+ "command": "uv",
66
+ "args": ["run", "--directory", "/path/to/websearch-mcp", "main.py"],
67
+ "env": {
68
+ "MORPH_API_KEY": "sk-..."
69
+ }
70
+ }
71
+ }
72
+ }
73
+ ```
@@ -0,0 +1,60 @@
1
+ # websearch-mcp
2
+
3
+ MCP server providing web search, web fetch, and AI-powered web extraction — all results compacted for minimal context usage.
4
+
5
+ ## Tools
6
+
7
+ | Tool | Input | Output |
8
+ |------|-------|--------|
9
+ | `websearch` | `{query, num_results?}` | `[{title, url, snippet, content}]` |
10
+ | `webfetch` | `{url}` | `{url, content}` |
11
+ | `webextract` | `{query}` | `{answer, sources: [{title, url}]}` |
12
+
13
+ All page content is compacted via morph before returning, stripping irrelevant boilerplate.
14
+
15
+ ## Install
16
+
17
+ ```bash
18
+ pip install morph-websearch-mcp
19
+ # or
20
+ uv add morph-websearch-mcp
21
+ ```
22
+
23
+ ## Setup
24
+
25
+ Set your morph API key:
26
+
27
+ ```bash
28
+ export MORPH_API_KEY="sk-..."
29
+ ```
30
+
31
+ ## MCP Client Config
32
+
33
+ ```json
34
+ {
35
+ "mcpServers": {
36
+ "websearch-mcp": {
37
+ "command": "websearch-mcp",
38
+ "env": {
39
+ "MORPH_API_KEY": "sk-..."
40
+ }
41
+ }
42
+ }
43
+ }
44
+ ```
45
+
46
+ If installing from source:
47
+
48
+ ```json
49
+ {
50
+ "mcpServers": {
51
+ "websearch-mcp": {
52
+ "command": "uv",
53
+ "args": ["run", "--directory", "/path/to/websearch-mcp", "main.py"],
54
+ "env": {
55
+ "MORPH_API_KEY": "sk-..."
56
+ }
57
+ }
58
+ }
59
+ }
60
+ ```
@@ -0,0 +1,320 @@
1
+ import asyncio
2
+ import json
3
+ import urllib.parse
4
+ from mcp.server import Server
5
+ from mcp.server.stdio import stdio_server
6
+ from mcp.types import Tool, TextContent
7
+ import requests
8
+ from openai import AsyncOpenAI
9
+ from crawl4ai import AsyncWebCrawler
10
+ from bs4 import BeautifulSoup
11
+
12
+ import os
13
+
14
+ MORPH_API_KEY = os.environ.get("MORPH_API_KEY", "")
15
+ COMPACT_URL = "https://api.morphllm.com/v1/compact"
16
+ OPENAI_BASE = "https://api.morphllm.com/v1"
17
+ MODEL = "morph-dsv4flash"
18
+
19
+ from importlib.metadata import version as pkg_version
20
+
21
+ try:
22
+ __version__ = pkg_version("websearch-mcp")
23
+ except Exception:
24
+ __version__ = "0.1.0"
25
+
26
+ server = Server("websearch-mcp", version=__version__)
27
+ _crawler = None
28
+
29
+
30
+ async def get_crawler():
31
+ global _crawler
32
+ if _crawler is None:
33
+ _crawler = AsyncWebCrawler()
34
+ await _crawler.__aenter__()
35
+ return _crawler
36
+
37
+
38
+ def get_markdown(result):
39
+ md = result.markdown
40
+ if isinstance(md, str):
41
+ return md
42
+ if hasattr(md, "raw_markdown"):
43
+ return md.raw_markdown
44
+ return str(md)
45
+
46
+
47
+ async def compact_text(text, query, ratio=0.5):
48
+ if not text:
49
+ return ""
50
+ try:
51
+ response = await asyncio.to_thread(
52
+ lambda: requests.post(
53
+ COMPACT_URL,
54
+ headers={"Authorization": f"Bearer {MORPH_API_KEY}"},
55
+ json={
56
+ "input": text,
57
+ "query": query,
58
+ "compression_ratio": ratio,
59
+ "preserve_recent": 0,
60
+ "include_markers": False,
61
+ },
62
+ timeout=60,
63
+ )
64
+ )
65
+ if response.status_code == 200:
66
+ return response.json()["output"]
67
+ except Exception:
68
+ pass
69
+ return text
70
+
71
+
72
+ def _resolve_ddg_url(href):
73
+ if not href:
74
+ return ""
75
+ if href.startswith("//duckduckgo.com/l/") or "uddg=" in href:
76
+ parsed = urllib.parse.urlparse(href, scheme="https")
77
+ params = urllib.parse.parse_qs(parsed.query)
78
+ encoded = params.get("uddg", [""])[0]
79
+ if encoded:
80
+ return urllib.parse.unquote(encoded)
81
+ if href.startswith("//"):
82
+ return "https:" + href
83
+ return href
84
+
85
+
86
+ async def websearch_impl(query, num_results=5):
87
+ crawler = await get_crawler()
88
+ ddg_url = f"https://html.duckduckgo.com/html/?q={urllib.parse.quote(query)}"
89
+ result = await crawler.arun(ddg_url)
90
+ html = getattr(result, "html", "") or ""
91
+ soup = BeautifulSoup(html, "html.parser")
92
+
93
+ result_elements = soup.select(".result")
94
+ parsed = []
95
+ for item in result_elements:
96
+ classes = item.get("class", [])
97
+ if "result--ad" in classes:
98
+ continue
99
+ link_el = item.select_one("a.result__a")
100
+ snippet_el = item.select_one("a.result__snippet")
101
+ if not link_el:
102
+ continue
103
+ title = link_el.get_text(strip=True)
104
+ url = _resolve_ddg_url(link_el.get("href", ""))
105
+ snippet = snippet_el.get_text(strip=True) if snippet_el else ""
106
+ if not url or not (url.startswith("http://") or url.startswith("https://")):
107
+ continue
108
+ parsed.append({"title": title, "url": url, "snippet": snippet})
109
+ if len(parsed) >= num_results:
110
+ break
111
+
112
+ for r in parsed:
113
+ r["content"] = ""
114
+
115
+ urls = [r["url"] for r in parsed]
116
+ if urls:
117
+ try:
118
+ crawl_results = await crawler.arun_many(urls)
119
+ url_to_content = {}
120
+ for cr in crawl_results:
121
+ if cr and cr.url:
122
+ md = get_markdown(cr)
123
+ url_to_content[cr.url] = await compact_text(md, query, 0.3) if md else ""
124
+ for r in parsed:
125
+ r["content"] = url_to_content.get(r["url"], "")
126
+ except Exception:
127
+ pass
128
+
129
+ return parsed
130
+
131
+
132
+ async def webfetch_impl(url):
133
+ crawler = await get_crawler()
134
+ result = await crawler.arun(url)
135
+ md = get_markdown(result)
136
+ content = await compact_text(md, url, 0.5) if md else ""
137
+ return {"url": url, "content": content}
138
+
139
+
140
+ async def webextract_impl(query):
141
+ client = AsyncOpenAI(api_key=MORPH_API_KEY, base_url=OPENAI_BASE)
142
+ tools = [
143
+ {
144
+ "type": "function",
145
+ "function": {
146
+ "name": "websearch",
147
+ "description": "Search the web for information using DuckDuckGo",
148
+ "parameters": {
149
+ "type": "object",
150
+ "properties": {
151
+ "query": {"type": "string", "description": "The search query"},
152
+ "num_results": {"type": "integer", "default": 5},
153
+ },
154
+ "required": ["query"],
155
+ },
156
+ },
157
+ },
158
+ {
159
+ "type": "function",
160
+ "function": {
161
+ "name": "webfetch",
162
+ "description": "Fetch and extract content from a URL",
163
+ "parameters": {
164
+ "type": "object",
165
+ "properties": {
166
+ "url": {"type": "string", "description": "The URL to fetch"}
167
+ },
168
+ "required": ["url"],
169
+ },
170
+ },
171
+ },
172
+ ]
173
+
174
+ messages = [
175
+ {
176
+ "role": "system",
177
+ "content": "You are a web research agent. Your job is to answer user queries by searching the web and fetching web pages. You have access to two functions: websearch(query, num_results) and webfetch(url). Use them to find the answer. When you have enough information, output your final answer in a clear format with links to sources.",
178
+ },
179
+ {"role": "user", "content": query},
180
+ ]
181
+
182
+ sources = []
183
+
184
+ for _ in range(5):
185
+ response = await client.chat.completions.create(
186
+ model=MODEL,
187
+ messages=messages,
188
+ tools=tools,
189
+ )
190
+ msg = response.choices[0].message
191
+
192
+ if not msg.tool_calls:
193
+ return {"answer": msg.content, "sources": sources}
194
+
195
+ tool_calls_data = [
196
+ {
197
+ "id": tc.id,
198
+ "type": "function",
199
+ "function": {"name": tc.function.name, "arguments": tc.function.arguments},
200
+ }
201
+ for tc in msg.tool_calls
202
+ ]
203
+ messages.append(
204
+ {"role": "assistant", "tool_calls": tool_calls_data}
205
+ )
206
+
207
+ for tc in msg.tool_calls:
208
+ args = json.loads(tc.function.arguments)
209
+ if tc.function.name == "websearch":
210
+ results = await websearch_impl(
211
+ args.get("query", ""), args.get("num_results", 5)
212
+ )
213
+ for r in results:
214
+ if r.get("title") and r.get("url"):
215
+ sources.append({"title": r["title"], "url": r["url"]})
216
+ messages.append(
217
+ {"role": "tool", "tool_call_id": tc.id, "content": json.dumps(results)}
218
+ )
219
+ elif tc.function.name == "webfetch":
220
+ result = await webfetch_impl(args.get("url", ""))
221
+ if result.get("url"):
222
+ sources.append({"title": result["url"], "url": result["url"]})
223
+ messages.append(
224
+ {"role": "tool", "tool_call_id": tc.id, "content": json.dumps(result)}
225
+ )
226
+
227
+ final_response = await client.chat.completions.create(
228
+ model=MODEL,
229
+ messages=messages
230
+ + [
231
+ {
232
+ "role": "user",
233
+ "content": "You have reached the maximum number of research steps. Provide your final answer now based on what you found, with links to sources.",
234
+ }
235
+ ],
236
+ )
237
+ return {"answer": final_response.choices[0].message.content, "sources": sources}
238
+
239
+
240
+ @server.list_tools()
241
+ async def list_tools():
242
+ return [
243
+ Tool(
244
+ name="websearch",
245
+ description="Search the web using DuckDuckGo and return results with full page content",
246
+ inputSchema={
247
+ "type": "object",
248
+ "properties": {
249
+ "query": {"type": "string", "description": "The search query"},
250
+ "num_results": {
251
+ "type": "integer",
252
+ "default": 5,
253
+ "description": "Number of results to return",
254
+ },
255
+ },
256
+ "required": ["query"],
257
+ },
258
+ ),
259
+ Tool(
260
+ name="webfetch",
261
+ description="Fetch and extract content from a URL",
262
+ inputSchema={
263
+ "type": "object",
264
+ "properties": {
265
+ "url": {"type": "string", "description": "The URL to fetch"}
266
+ },
267
+ "required": ["url"],
268
+ },
269
+ ),
270
+ Tool(
271
+ name="webextract",
272
+ description="Agentic web research — searches and fetches pages to answer a query",
273
+ inputSchema={
274
+ "type": "object",
275
+ "properties": {
276
+ "query": {
277
+ "type": "string",
278
+ "description": "The research query to answer",
279
+ }
280
+ },
281
+ "required": ["query"],
282
+ },
283
+ ),
284
+ ]
285
+
286
+
287
+ @server.call_tool()
288
+ async def call_tool(name, arguments):
289
+ if name == "websearch":
290
+ results = await websearch_impl(
291
+ arguments.get("query", ""), arguments.get("num_results", 5)
292
+ )
293
+ return [TextContent(type="text", text=json.dumps(results, indent=2))]
294
+ elif name == "webfetch":
295
+ result = await webfetch_impl(arguments.get("url", ""))
296
+ return [TextContent(type="text", text=json.dumps(result, indent=2))]
297
+ elif name == "webextract":
298
+ result = await webextract_impl(arguments.get("query", ""))
299
+ return [TextContent(type="text", text=json.dumps(result, indent=2))]
300
+ return [TextContent(type="text", text="Unknown tool")]
301
+
302
+
303
+ async def serve():
304
+ try:
305
+ async with stdio_server() as (read_stream, write_stream):
306
+ await server.run(
307
+ read_stream, write_stream, server.create_initialization_options()
308
+ )
309
+ finally:
310
+ global _crawler
311
+ if _crawler:
312
+ await _crawler.__aexit__(None, None, None)
313
+
314
+
315
+ def main():
316
+ asyncio.run(serve())
317
+
318
+
319
+ if __name__ == "__main__":
320
+ main()
@@ -0,0 +1,73 @@
1
+ Metadata-Version: 2.4
2
+ Name: morph-websearch-mcp
3
+ Version: 0.1.0
4
+ Summary: Web search MCP server with AI-powered content extraction
5
+ License-Expression: MIT
6
+ Requires-Python: >=3.13
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: mcp>=1.0.0
9
+ Requires-Dist: crawl4ai>=0.4.0
10
+ Requires-Dist: requests>=2.31.0
11
+ Requires-Dist: openai>=1.0.0
12
+ Requires-Dist: beautifulsoup4>=4.12.0
13
+
14
+ # websearch-mcp
15
+
16
+ MCP server providing web search, web fetch, and AI-powered web extraction — all results compacted for minimal context usage.
17
+
18
+ ## Tools
19
+
20
+ | Tool | Input | Output |
21
+ |------|-------|--------|
22
+ | `websearch` | `{query, num_results?}` | `[{title, url, snippet, content}]` |
23
+ | `webfetch` | `{url}` | `{url, content}` |
24
+ | `webextract` | `{query}` | `{answer, sources: [{title, url}]}` |
25
+
26
+ All page content is compacted via morph before returning, stripping irrelevant boilerplate.
27
+
28
+ ## Install
29
+
30
+ ```bash
31
+ pip install morph-websearch-mcp
32
+ # or
33
+ uv add morph-websearch-mcp
34
+ ```
35
+
36
+ ## Setup
37
+
38
+ Set your morph API key:
39
+
40
+ ```bash
41
+ export MORPH_API_KEY="sk-..."
42
+ ```
43
+
44
+ ## MCP Client Config
45
+
46
+ ```json
47
+ {
48
+ "mcpServers": {
49
+ "websearch-mcp": {
50
+ "command": "websearch-mcp",
51
+ "env": {
52
+ "MORPH_API_KEY": "sk-..."
53
+ }
54
+ }
55
+ }
56
+ }
57
+ ```
58
+
59
+ If installing from source:
60
+
61
+ ```json
62
+ {
63
+ "mcpServers": {
64
+ "websearch-mcp": {
65
+ "command": "uv",
66
+ "args": ["run", "--directory", "/path/to/websearch-mcp", "main.py"],
67
+ "env": {
68
+ "MORPH_API_KEY": "sk-..."
69
+ }
70
+ }
71
+ }
72
+ }
73
+ ```
@@ -0,0 +1,9 @@
1
+ README.md
2
+ main.py
3
+ pyproject.toml
4
+ morph_websearch_mcp.egg-info/PKG-INFO
5
+ morph_websearch_mcp.egg-info/SOURCES.txt
6
+ morph_websearch_mcp.egg-info/dependency_links.txt
7
+ morph_websearch_mcp.egg-info/entry_points.txt
8
+ morph_websearch_mcp.egg-info/requires.txt
9
+ morph_websearch_mcp.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ websearch-mcp = main:main
@@ -0,0 +1,5 @@
1
+ mcp>=1.0.0
2
+ crawl4ai>=0.4.0
3
+ requests>=2.31.0
4
+ openai>=1.0.0
5
+ beautifulsoup4>=4.12.0
@@ -0,0 +1,17 @@
1
+ [project]
2
+ name = "morph-websearch-mcp"
3
+ version = "0.1.0"
4
+ description = "Web search MCP server with AI-powered content extraction"
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ license = "MIT"
8
+ dependencies = [
9
+ "mcp>=1.0.0",
10
+ "crawl4ai>=0.4.0",
11
+ "requests>=2.31.0",
12
+ "openai>=1.0.0",
13
+ "beautifulsoup4>=4.12.0",
14
+ ]
15
+
16
+ [project.scripts]
17
+ websearch-mcp = "main:main"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+