ddgs-mcp-server 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddgs_mcp_server/server.py +119 -4
- {ddgs_mcp_server-0.4.1.dist-info → ddgs_mcp_server-0.5.1.dist-info}/METADATA +50 -3
- ddgs_mcp_server-0.5.1.dist-info/RECORD +8 -0
- ddgs_mcp_server-0.4.1.dist-info/RECORD +0 -8
- {ddgs_mcp_server-0.4.1.dist-info → ddgs_mcp_server-0.5.1.dist-info}/WHEEL +0 -0
- {ddgs_mcp_server-0.4.1.dist-info → ddgs_mcp_server-0.5.1.dist-info}/entry_points.txt +0 -0
- {ddgs_mcp_server-0.4.1.dist-info → ddgs_mcp_server-0.5.1.dist-info}/licenses/LICENSE +0 -0
ddgs_mcp_server/server.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
|
-
|
|
4
|
+
import asyncio
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
import trafilatura
|
|
5
9
|
from mcp.server import Server
|
|
6
10
|
import mcp.types as types
|
|
7
11
|
from ddgs import DDGS
|
|
@@ -13,12 +17,94 @@ logger = logging.getLogger("ddgs-mcp")
|
|
|
13
17
|
# MCP Server
|
|
14
18
|
server = Server("ddgs-mcp-server")
|
|
15
19
|
|
|
20
|
+
# --- Content Extraction Utilities ---
|
|
21
|
+
|
|
22
|
+
async def fetch_page_content(
|
|
23
|
+
url: str,
|
|
24
|
+
timeout: int = 10,
|
|
25
|
+
max_length: int = 50000
|
|
26
|
+
) -> Optional[str]:
|
|
27
|
+
"""
|
|
28
|
+
Fetch and extract main text content from a URL using trafilatura.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
url: The URL to fetch content from
|
|
32
|
+
timeout: Request timeout in seconds
|
|
33
|
+
max_length: Maximum characters to return
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Extracted text content or None on failure
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
async with httpx.AsyncClient(
|
|
40
|
+
timeout=timeout,
|
|
41
|
+
follow_redirects=True,
|
|
42
|
+
verify=True
|
|
43
|
+
) as client:
|
|
44
|
+
response = await client.get(url, headers={
|
|
45
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
46
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
47
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
48
|
+
})
|
|
49
|
+
if response.status_code == 200:
|
|
50
|
+
downloaded = response.text
|
|
51
|
+
# Extract main content using trafilatura
|
|
52
|
+
extracted = trafilatura.extract(
|
|
53
|
+
downloaded,
|
|
54
|
+
include_links=False,
|
|
55
|
+
include_images=False,
|
|
56
|
+
include_comments=False,
|
|
57
|
+
favor_precision=True
|
|
58
|
+
)
|
|
59
|
+
if extracted:
|
|
60
|
+
return extracted[:max_length]
|
|
61
|
+
except httpx.TimeoutException:
|
|
62
|
+
logger.warning(f"Timeout fetching {url}")
|
|
63
|
+
except httpx.HTTPError as e:
|
|
64
|
+
logger.warning(f"HTTP error fetching {url}: {e}")
|
|
65
|
+
except Exception as e:
|
|
66
|
+
logger.warning(f"Failed to fetch {url}: {e}")
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def enrich_results_with_content(
|
|
71
|
+
results: list,
|
|
72
|
+
max_concurrent: int = 5,
|
|
73
|
+
max_length: int = 50000
|
|
74
|
+
) -> list:
|
|
75
|
+
"""
|
|
76
|
+
Fetch full content for all search results concurrently.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
results: List of search result dictionaries
|
|
80
|
+
max_concurrent: Maximum concurrent requests
|
|
81
|
+
max_length: Maximum content length per page
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Results list with 'full_content' field added
|
|
85
|
+
"""
|
|
86
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
87
|
+
|
|
88
|
+
async def fetch_with_semaphore(result: dict) -> dict:
|
|
89
|
+
async with semaphore:
|
|
90
|
+
url = result.get("href")
|
|
91
|
+
if url:
|
|
92
|
+
content = await fetch_page_content(url, max_length=max_length)
|
|
93
|
+
result["full_content"] = content if content else "[Content extraction failed or blocked]"
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
tasks = [fetch_with_semaphore(r.copy()) for r in results]
|
|
97
|
+
return await asyncio.gather(*tasks)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# --- MCP Tool Definitions ---
|
|
101
|
+
|
|
16
102
|
@server.list_tools()
|
|
17
103
|
async def list_tools() -> list[types.Tool]:
|
|
18
104
|
return [
|
|
19
105
|
types.Tool(
|
|
20
106
|
name="search_text",
|
|
21
|
-
description="Perform a metasearch using various backends (DuckDuckGo, Google, Bing, etc.). Use this to find APIs, libraries, developer tools, and general information.",
|
|
107
|
+
description="Perform a metasearch using various backends (DuckDuckGo, Google, Bing, etc.). Use this to find APIs, libraries, developer tools, and general information. Optionally fetch full page content for complete context.",
|
|
22
108
|
inputSchema={
|
|
23
109
|
"type": "object",
|
|
24
110
|
"properties": {
|
|
@@ -32,7 +118,17 @@ async def list_tools() -> list[types.Tool]:
|
|
|
32
118
|
"region": {"type": "string", "default": "us-en", "description": "e.g., us-en, uk-en"},
|
|
33
119
|
"safesearch": {"type": "string", "enum": ["on", "moderate", "off"], "default": "moderate"},
|
|
34
120
|
"timelimit": {"type": "string", "enum": ["d", "w", "m", "y"], "default": None},
|
|
35
|
-
"max_results": {"type": "integer", "default": 10}
|
|
121
|
+
"max_results": {"type": "integer", "default": 10},
|
|
122
|
+
"fetch_full_content": {
|
|
123
|
+
"type": "boolean",
|
|
124
|
+
"default": False,
|
|
125
|
+
"description": "If true, fetches and returns the full text content of each result page. This provides complete context but adds latency."
|
|
126
|
+
},
|
|
127
|
+
"max_content_length": {
|
|
128
|
+
"type": "integer",
|
|
129
|
+
"default": 50000,
|
|
130
|
+
"description": "Maximum characters of content to fetch per page (only used if fetch_full_content is true)."
|
|
131
|
+
}
|
|
36
132
|
},
|
|
37
133
|
"required": ["query"]
|
|
38
134
|
}
|
|
@@ -54,6 +150,7 @@ async def list_tools() -> list[types.Tool]:
|
|
|
54
150
|
)
|
|
55
151
|
]
|
|
56
152
|
|
|
153
|
+
|
|
57
154
|
@server.call_tool()
|
|
58
155
|
async def call_tool(name: str, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
|
|
59
156
|
logger.info(f"Calling tool: {name} with args: {arguments}")
|
|
@@ -68,6 +165,10 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent | type
|
|
|
68
165
|
timelimit = arguments.get("timelimit")
|
|
69
166
|
max_results = arguments.get("max_results", 10)
|
|
70
167
|
|
|
168
|
+
# New parameters for full content extraction
|
|
169
|
+
fetch_full_content = arguments.get("fetch_full_content", False)
|
|
170
|
+
max_content_length = arguments.get("max_content_length", 50000)
|
|
171
|
+
|
|
71
172
|
try:
|
|
72
173
|
with DDGS() as ddgs:
|
|
73
174
|
results = []
|
|
@@ -80,6 +181,19 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent | type
|
|
|
80
181
|
max_results=max_results,
|
|
81
182
|
backend=backend
|
|
82
183
|
)
|
|
184
|
+
|
|
185
|
+
# Convert generator to list for manipulation
|
|
186
|
+
results = list(results) if results else []
|
|
187
|
+
|
|
188
|
+
# Enrich with full content if requested
|
|
189
|
+
if fetch_full_content and results:
|
|
190
|
+
logger.info(f"Fetching full content for {len(results)} results...")
|
|
191
|
+
results = await enrich_results_with_content(
|
|
192
|
+
results,
|
|
193
|
+
max_length=max_content_length
|
|
194
|
+
)
|
|
195
|
+
logger.info("Full content extraction complete")
|
|
196
|
+
|
|
83
197
|
elif name == "search_news":
|
|
84
198
|
results = ddgs.news(
|
|
85
199
|
query=query,
|
|
@@ -88,8 +202,9 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent | type
|
|
|
88
202
|
timelimit=timelimit,
|
|
89
203
|
max_results=max_results
|
|
90
204
|
)
|
|
205
|
+
results = list(results) if results else []
|
|
91
206
|
|
|
92
|
-
return [types.TextContent(type="text", text=json.dumps(results, indent=2))]
|
|
207
|
+
return [types.TextContent(type="text", text=json.dumps(results, indent=2, ensure_ascii=False))]
|
|
93
208
|
|
|
94
209
|
except Exception as e:
|
|
95
210
|
logger.error(f"Error executing {name}: {e}")
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ddgs-mcp-server
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: DuckDuckGo Search MCP Server
|
|
3
|
+
Version: 0.5.1
|
|
4
|
+
Summary: DuckDuckGo Search MCP Server with full page content extraction
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Requires-Python: >=3.10
|
|
7
7
|
Requires-Dist: ddgs>=9.10.0
|
|
8
|
+
Requires-Dist: httpx>=0.27.0
|
|
8
9
|
Requires-Dist: mcp>=1.0.0
|
|
10
|
+
Requires-Dist: trafilatura>=2.0.0
|
|
9
11
|
Description-Content-Type: text/markdown
|
|
10
12
|
|
|
11
13
|
# DDGS MCP Server
|
|
@@ -14,9 +16,54 @@ A Model Context Protocol (MCP) server that provides DuckDuckGo Search capabiliti
|
|
|
14
16
|
|
|
15
17
|
## Features
|
|
16
18
|
|
|
17
|
-
- **search_text**:
|
|
19
|
+
- **search_text**: Advanced metasearch using `bing`, `brave`, `duckduckgo`, `google`, `mojeek`, `yahoo`, `yandex`, `wikipedia`.
|
|
20
|
+
- **Full Content Extraction**: Optionally fetch complete page content (not just snippets) for comprehensive context.
|
|
18
21
|
- **search_news**: Find latest updates, releases, and tech news.
|
|
19
22
|
|
|
23
|
+
## Full Content Extraction
|
|
24
|
+
|
|
25
|
+
For coding agents that need complete context from search results, enable full page content fetching:
|
|
26
|
+
|
|
27
|
+
### Usage
|
|
28
|
+
|
|
29
|
+
```json
|
|
30
|
+
{
|
|
31
|
+
"query": "python async programming tutorial",
|
|
32
|
+
"fetch_full_content": true,
|
|
33
|
+
"max_content_length": 50000,
|
|
34
|
+
"max_results": 5
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Parameters
|
|
39
|
+
|
|
40
|
+
| Parameter | Type | Default | Description |
|
|
41
|
+
|-----------|------|---------|-------------|
|
|
42
|
+
| `fetch_full_content` | boolean | `false` | Enable full page content extraction |
|
|
43
|
+
| `max_content_length` | integer | `50000` | Maximum characters per page (when `fetch_full_content` is true) |
|
|
44
|
+
|
|
45
|
+
### Response Structure
|
|
46
|
+
|
|
47
|
+
When `fetch_full_content` is enabled, each result includes a `full_content` field:
|
|
48
|
+
|
|
49
|
+
```json
|
|
50
|
+
[
|
|
51
|
+
{
|
|
52
|
+
"title": "Python Async Programming Guide",
|
|
53
|
+
"href": "https://example.com/python-async",
|
|
54
|
+
"body": "Brief snippet from search results...",
|
|
55
|
+
"full_content": "Complete extracted article text with all paragraphs, code examples, and detailed explanations..."
|
|
56
|
+
}
|
|
57
|
+
]
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Performance Notes
|
|
61
|
+
|
|
62
|
+
- Content extraction adds ~1-3 seconds latency per page
|
|
63
|
+
- Up to 5 pages are fetched concurrently to minimize total time
|
|
64
|
+
- Failed fetches return `[Content extraction failed or blocked]` without breaking the search
|
|
65
|
+
- Uses [Trafilatura](https://trafilatura.readthedocs.io/) for high-quality text extraction
|
|
66
|
+
|
|
20
67
|
|
|
21
68
|
## Installation & Usage
|
|
22
69
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
ddgs_mcp_server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
ddgs_mcp_server/main.py,sha256=hqJl7UoGQoL9a-2hX24srZYFGdatheJfgkn5wz5Od70,492
|
|
3
|
+
ddgs_mcp_server/server.py,sha256=1I7mG-Dw1UV_xQtj-Ow_3ttHAr3HF3z6SSfR-Mi0ts8,8269
|
|
4
|
+
ddgs_mcp_server-0.5.1.dist-info/METADATA,sha256=FqvHalHQ737L415fvpyWXauJEiO7Li7AQbBNTur4lQc,3757
|
|
5
|
+
ddgs_mcp_server-0.5.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
+
ddgs_mcp_server-0.5.1.dist-info/entry_points.txt,sha256=8YvtzhkNDMvAy2CdIx8VppBFjiBSJ56JtLX-v8SUHGc,62
|
|
7
|
+
ddgs_mcp_server-0.5.1.dist-info/licenses/LICENSE,sha256=vLPKcNOa4dGBRPq4I_mIBKyVSbIlzrOdinbwXFeKb88,1091
|
|
8
|
+
ddgs_mcp_server-0.5.1.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
ddgs_mcp_server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
ddgs_mcp_server/main.py,sha256=hqJl7UoGQoL9a-2hX24srZYFGdatheJfgkn5wz5Od70,492
|
|
3
|
-
ddgs_mcp_server/server.py,sha256=IEsDiPsw2ciIVnmoOBYnOuVd_fCXmOXfNapQTBpO9wc,3919
|
|
4
|
-
ddgs_mcp_server-0.4.1.dist-info/METADATA,sha256=bylRyAQpkCOFQilQt-rMLuP1j4eOxLMZjQsZMyVqyyE,2227
|
|
5
|
-
ddgs_mcp_server-0.4.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
6
|
-
ddgs_mcp_server-0.4.1.dist-info/entry_points.txt,sha256=8YvtzhkNDMvAy2CdIx8VppBFjiBSJ56JtLX-v8SUHGc,62
|
|
7
|
-
ddgs_mcp_server-0.4.1.dist-info/licenses/LICENSE,sha256=vLPKcNOa4dGBRPq4I_mIBKyVSbIlzrOdinbwXFeKb88,1091
|
|
8
|
-
ddgs_mcp_server-0.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|