ddgs-mcp-server 0.4.1__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddgs_mcp_server-0.4.1/README.md → ddgs_mcp_server-0.5.0/PKG-INFO +58 -1
- ddgs_mcp_server-0.4.1/PKG-INFO → ddgs_mcp_server-0.5.0/README.md +46 -11
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/pyproject.toml +5 -3
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/requirements.txt +2 -0
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/server.py +115 -2
- ddgs_mcp_server-0.5.0/src/ddgs_mcp_server/server.py +211 -0
- ddgs_mcp_server-0.4.1/src/ddgs_mcp_server/server.py +0 -96
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/.env.example +0 -0
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/.gitattributes +0 -0
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/.gitignore +0 -0
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/Dockerfile +0 -0
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/LICENSE +0 -0
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/docker-compose.yml +0 -0
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/main.py +0 -0
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/src/ddgs_mcp_server/__init__.py +0 -0
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/src/ddgs_mcp_server/main.py +0 -0
- {ddgs_mcp_server-0.4.1 → ddgs_mcp_server-0.5.0}/start_api.sh +0 -0
|
@@ -1,12 +1,69 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ddgs-mcp-server
|
|
3
|
+
Version: 0.5.0
|
|
4
|
+
Summary: DuckDuckGo Search MCP Server with full page content extraction
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: ddgs>=9.10.0
|
|
8
|
+
Requires-Dist: httpx>=0.27.0
|
|
9
|
+
Requires-Dist: mcp>=1.0.0
|
|
10
|
+
Requires-Dist: trafilatura>=2.0.0
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
1
13
|
# DDGS MCP Server
|
|
2
14
|
|
|
3
15
|
A Model Context Protocol (MCP) server that provides DuckDuckGo Search capabilities to AI agents.
|
|
4
16
|
|
|
5
17
|
## Features
|
|
6
18
|
|
|
7
|
-
- **search_text**:
|
|
19
|
+
- **search_text**: Advanced metasearch using `bing`, `brave`, `duckduckgo`, `google`, `mojeek`, `yahoo`, `yandex`, `wikipedia`.
|
|
20
|
+
- **Full Content Extraction**: Optionally fetch complete page content (not just snippets) for comprehensive context.
|
|
8
21
|
- **search_news**: Find latest updates, releases, and tech news.
|
|
9
22
|
|
|
23
|
+
## Full Content Extraction
|
|
24
|
+
|
|
25
|
+
For coding agents that need complete context from search results, enable full page content fetching:
|
|
26
|
+
|
|
27
|
+
### Usage
|
|
28
|
+
|
|
29
|
+
```json
|
|
30
|
+
{
|
|
31
|
+
"query": "python async programming tutorial",
|
|
32
|
+
"fetch_full_content": true,
|
|
33
|
+
"max_content_length": 50000,
|
|
34
|
+
"max_results": 5
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Parameters
|
|
39
|
+
|
|
40
|
+
| Parameter | Type | Default | Description |
|
|
41
|
+
|-----------|------|---------|-------------|
|
|
42
|
+
| `fetch_full_content` | boolean | `false` | Enable full page content extraction |
|
|
43
|
+
| `max_content_length` | integer | `50000` | Maximum characters per page (when `fetch_full_content` is true) |
|
|
44
|
+
|
|
45
|
+
### Response Structure
|
|
46
|
+
|
|
47
|
+
When `fetch_full_content` is enabled, each result includes a `full_content` field:
|
|
48
|
+
|
|
49
|
+
```json
|
|
50
|
+
[
|
|
51
|
+
{
|
|
52
|
+
"title": "Python Async Programming Guide",
|
|
53
|
+
"href": "https://example.com/python-async",
|
|
54
|
+
"body": "Brief snippet from search results...",
|
|
55
|
+
"full_content": "Complete extracted article text with all paragraphs, code examples, and detailed explanations..."
|
|
56
|
+
}
|
|
57
|
+
]
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Performance Notes
|
|
61
|
+
|
|
62
|
+
- Content extraction adds ~1-3 seconds latency per page
|
|
63
|
+
- Up to 5 pages are fetched concurrently to minimize total time
|
|
64
|
+
- Failed fetches return `[Content extraction failed or blocked]` without breaking the search
|
|
65
|
+
- Uses [Trafilatura](https://trafilatura.readthedocs.io/) for high-quality text extraction
|
|
66
|
+
|
|
10
67
|
|
|
11
68
|
## Installation & Usage
|
|
12
69
|
|
|
@@ -1,22 +1,57 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: ddgs-mcp-server
|
|
3
|
-
Version: 0.4.1
|
|
4
|
-
Summary: DuckDuckGo Search MCP Server
|
|
5
|
-
License-File: LICENSE
|
|
6
|
-
Requires-Python: >=3.10
|
|
7
|
-
Requires-Dist: ddgs>=9.10.0
|
|
8
|
-
Requires-Dist: mcp>=1.0.0
|
|
9
|
-
Description-Content-Type: text/markdown
|
|
10
|
-
|
|
11
1
|
# DDGS MCP Server
|
|
12
2
|
|
|
13
3
|
A Model Context Protocol (MCP) server that provides DuckDuckGo Search capabilities to AI agents.
|
|
14
4
|
|
|
15
5
|
## Features
|
|
16
6
|
|
|
17
|
-
- **search_text**:
|
|
7
|
+
- **search_text**: Advanced metasearch using `bing`, `brave`, `duckduckgo`, `google`, `mojeek`, `yahoo`, `yandex`, `wikipedia`.
|
|
8
|
+
- **Full Content Extraction**: Optionally fetch complete page content (not just snippets) for comprehensive context.
|
|
18
9
|
- **search_news**: Find latest updates, releases, and tech news.
|
|
19
10
|
|
|
11
|
+
## Full Content Extraction
|
|
12
|
+
|
|
13
|
+
For coding agents that need complete context from search results, enable full page content fetching:
|
|
14
|
+
|
|
15
|
+
### Usage
|
|
16
|
+
|
|
17
|
+
```json
|
|
18
|
+
{
|
|
19
|
+
"query": "python async programming tutorial",
|
|
20
|
+
"fetch_full_content": true,
|
|
21
|
+
"max_content_length": 50000,
|
|
22
|
+
"max_results": 5
|
|
23
|
+
}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Parameters
|
|
27
|
+
|
|
28
|
+
| Parameter | Type | Default | Description |
|
|
29
|
+
|-----------|------|---------|-------------|
|
|
30
|
+
| `fetch_full_content` | boolean | `false` | Enable full page content extraction |
|
|
31
|
+
| `max_content_length` | integer | `50000` | Maximum characters per page (when `fetch_full_content` is true) |
|
|
32
|
+
|
|
33
|
+
### Response Structure
|
|
34
|
+
|
|
35
|
+
When `fetch_full_content` is enabled, each result includes a `full_content` field:
|
|
36
|
+
|
|
37
|
+
```json
|
|
38
|
+
[
|
|
39
|
+
{
|
|
40
|
+
"title": "Python Async Programming Guide",
|
|
41
|
+
"href": "https://example.com/python-async",
|
|
42
|
+
"body": "Brief snippet from search results...",
|
|
43
|
+
"full_content": "Complete extracted article text with all paragraphs, code examples, and detailed explanations..."
|
|
44
|
+
}
|
|
45
|
+
]
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Performance Notes
|
|
49
|
+
|
|
50
|
+
- Content extraction adds ~1-3 seconds latency per page
|
|
51
|
+
- Up to 5 pages are fetched concurrently to minimize total time
|
|
52
|
+
- Failed fetches return `[Content extraction failed or blocked]` without breaking the search
|
|
53
|
+
- Uses [Trafilatura](https://trafilatura.readthedocs.io/) for high-quality text extraction
|
|
54
|
+
|
|
20
55
|
|
|
21
56
|
## Installation & Usage
|
|
22
57
|
|
|
@@ -4,13 +4,15 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ddgs-mcp-server"
|
|
7
|
-
version = "0.
|
|
8
|
-
description = "DuckDuckGo Search MCP Server"
|
|
7
|
+
version = "0.5.0"
|
|
8
|
+
description = "DuckDuckGo Search MCP Server with full page content extraction"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
11
11
|
dependencies = [
|
|
12
12
|
"mcp>=1.0.0",
|
|
13
|
-
"ddgs>=9.10.0"
|
|
13
|
+
"ddgs>=9.10.0",
|
|
14
|
+
"trafilatura>=2.0.0",
|
|
15
|
+
"httpx>=0.27.0"
|
|
14
16
|
]
|
|
15
17
|
|
|
16
18
|
[project.scripts]
|
|
@@ -5,6 +5,8 @@ import logging
|
|
|
5
5
|
import uuid
|
|
6
6
|
from typing import Optional, Literal
|
|
7
7
|
|
|
8
|
+
import httpx
|
|
9
|
+
import trafilatura
|
|
8
10
|
import uvicorn
|
|
9
11
|
from fastapi import FastAPI, Request
|
|
10
12
|
from fastapi.responses import JSONResponse
|
|
@@ -27,6 +29,86 @@ app = FastAPI(title="DDGS MCP Server")
|
|
|
27
29
|
# MCP Server
|
|
28
30
|
server = Server("ddgs-mcp-server")
|
|
29
31
|
|
|
32
|
+
# --- Content Extraction Utilities ---
|
|
33
|
+
|
|
34
|
+
async def fetch_page_content(
|
|
35
|
+
url: str,
|
|
36
|
+
timeout: int = 10,
|
|
37
|
+
max_length: int = 50000
|
|
38
|
+
) -> Optional[str]:
|
|
39
|
+
"""
|
|
40
|
+
Fetch and extract main text content from a URL using trafilatura.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
url: The URL to fetch content from
|
|
44
|
+
timeout: Request timeout in seconds
|
|
45
|
+
max_length: Maximum characters to return
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Extracted text content or None on failure
|
|
49
|
+
"""
|
|
50
|
+
try:
|
|
51
|
+
async with httpx.AsyncClient(
|
|
52
|
+
timeout=timeout,
|
|
53
|
+
follow_redirects=True,
|
|
54
|
+
verify=True
|
|
55
|
+
) as client:
|
|
56
|
+
response = await client.get(url, headers={
|
|
57
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
58
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
59
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
60
|
+
})
|
|
61
|
+
if response.status_code == 200:
|
|
62
|
+
downloaded = response.text
|
|
63
|
+
# Extract main content using trafilatura
|
|
64
|
+
extracted = trafilatura.extract(
|
|
65
|
+
downloaded,
|
|
66
|
+
include_links=False,
|
|
67
|
+
include_images=False,
|
|
68
|
+
include_comments=False,
|
|
69
|
+
favor_precision=True
|
|
70
|
+
)
|
|
71
|
+
if extracted:
|
|
72
|
+
return extracted[:max_length]
|
|
73
|
+
except httpx.TimeoutException:
|
|
74
|
+
logger.warning(f"Timeout fetching {url}")
|
|
75
|
+
except httpx.HTTPError as e:
|
|
76
|
+
logger.warning(f"HTTP error fetching {url}: {e}")
|
|
77
|
+
except Exception as e:
|
|
78
|
+
logger.warning(f"Failed to fetch {url}: {e}")
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
async def enrich_results_with_content(
|
|
83
|
+
results: list,
|
|
84
|
+
max_concurrent: int = 5,
|
|
85
|
+
max_length: int = 50000
|
|
86
|
+
) -> list:
|
|
87
|
+
"""
|
|
88
|
+
Fetch full content for all search results concurrently.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
results: List of search result dictionaries
|
|
92
|
+
max_concurrent: Maximum concurrent requests
|
|
93
|
+
max_length: Maximum content length per page
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Results list with 'full_content' field added
|
|
97
|
+
"""
|
|
98
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
99
|
+
|
|
100
|
+
async def fetch_with_semaphore(result: dict) -> dict:
|
|
101
|
+
async with semaphore:
|
|
102
|
+
url = result.get("href")
|
|
103
|
+
if url:
|
|
104
|
+
content = await fetch_page_content(url, max_length=max_length)
|
|
105
|
+
result["full_content"] = content if content else "[Content extraction failed or blocked]"
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
tasks = [fetch_with_semaphore(r.copy()) for r in results]
|
|
109
|
+
return await asyncio.gather(*tasks)
|
|
110
|
+
|
|
111
|
+
|
|
30
112
|
# --- DDGS Wrappers ---
|
|
31
113
|
|
|
32
114
|
@server.list_tools()
|
|
@@ -34,7 +116,7 @@ async def list_tools() -> list[types.Tool]:
|
|
|
34
116
|
return [
|
|
35
117
|
types.Tool(
|
|
36
118
|
name="search_text",
|
|
37
|
-
description="Perform a text search using DuckDuckGo. Use this for general web queries.",
|
|
119
|
+
description="Perform a text search using DuckDuckGo. Use this for general web queries. Optionally fetch full page content for complete context.",
|
|
38
120
|
inputSchema={
|
|
39
121
|
"type": "object",
|
|
40
122
|
"properties": {
|
|
@@ -42,7 +124,17 @@ async def list_tools() -> list[types.Tool]:
|
|
|
42
124
|
"region": {"type": "string", "default": "us-en", "description": "e.g., us-en, uk-en"},
|
|
43
125
|
"safesearch": {"type": "string", "enum": ["on", "moderate", "off"], "default": "moderate"},
|
|
44
126
|
"timelimit": {"type": "string", "enum": ["d", "w", "m", "y"], "default": None},
|
|
45
|
-
"max_results": {"type": "integer", "default": 10}
|
|
127
|
+
"max_results": {"type": "integer", "default": 10},
|
|
128
|
+
"fetch_full_content": {
|
|
129
|
+
"type": "boolean",
|
|
130
|
+
"default": False,
|
|
131
|
+
"description": "If true, fetches and returns the full text content of each result page. This provides complete context but adds latency."
|
|
132
|
+
},
|
|
133
|
+
"max_content_length": {
|
|
134
|
+
"type": "integer",
|
|
135
|
+
"default": 50000,
|
|
136
|
+
"description": "Maximum characters of content to fetch per page (only used if fetch_full_content is true)."
|
|
137
|
+
}
|
|
46
138
|
},
|
|
47
139
|
"required": ["query"]
|
|
48
140
|
}
|
|
@@ -116,22 +208,43 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent | type
|
|
|
116
208
|
timelimit = arguments.get("timelimit")
|
|
117
209
|
max_results = arguments.get("max_results", 10)
|
|
118
210
|
|
|
211
|
+
# New parameters for full content extraction
|
|
212
|
+
fetch_full_content = arguments.get("fetch_full_content", False)
|
|
213
|
+
max_content_length = arguments.get("max_content_length", 50000)
|
|
214
|
+
|
|
119
215
|
try:
|
|
120
216
|
# Using context manager for DDGS
|
|
121
217
|
with DDGS() as ddgs:
|
|
122
218
|
results = []
|
|
123
219
|
if name == "search_text":
|
|
124
220
|
results = ddgs.text(query=query, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results)
|
|
221
|
+
|
|
222
|
+
# Convert generator to list for manipulation
|
|
223
|
+
results = list(results) if results else []
|
|
224
|
+
|
|
225
|
+
# Enrich with full content if requested
|
|
226
|
+
if fetch_full_content and results:
|
|
227
|
+
logger.info(f"Fetching full content for {len(results)} results...")
|
|
228
|
+
results = await enrich_results_with_content(
|
|
229
|
+
results,
|
|
230
|
+
max_length=max_content_length
|
|
231
|
+
)
|
|
232
|
+
logger.info("Full content extraction complete")
|
|
233
|
+
|
|
125
234
|
elif name == "search_images":
|
|
126
235
|
results = ddgs.images(query=query, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results)
|
|
236
|
+
results = list(results) if results else []
|
|
127
237
|
elif name == "search_videos":
|
|
128
238
|
results = ddgs.videos(query=query, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results)
|
|
239
|
+
results = list(results) if results else []
|
|
129
240
|
elif name == "search_news":
|
|
130
241
|
results = ddgs.news(query=query, region=region, safesearch=safesearch, timelimit=timelimit, max_results=max_results)
|
|
242
|
+
results = list(results) if results else []
|
|
131
243
|
elif name == "search_books":
|
|
132
244
|
# Check for books method availability or fallback
|
|
133
245
|
if hasattr(ddgs, 'books'):
|
|
134
246
|
results = ddgs.books(query=query, max_results=max_results)
|
|
247
|
+
results = list(results) if results else []
|
|
135
248
|
else:
|
|
136
249
|
return [types.TextContent(type="text", text="Error: 'books' search backend not available in this version of python-ddgs.")]
|
|
137
250
|
else:
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import asyncio
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
import trafilatura
|
|
9
|
+
from mcp.server import Server
|
|
10
|
+
import mcp.types as types
|
|
11
|
+
from ddgs import DDGS
|
|
12
|
+
|
|
13
|
+
# Logging Configuration
|
|
14
|
+
logging.basicConfig(level=logging.INFO)
|
|
15
|
+
logger = logging.getLogger("ddgs-mcp")
|
|
16
|
+
|
|
17
|
+
# MCP Server
|
|
18
|
+
server = Server("ddgs-mcp-server")
|
|
19
|
+
|
|
20
|
+
# --- Content Extraction Utilities ---
|
|
21
|
+
|
|
22
|
+
async def fetch_page_content(
|
|
23
|
+
url: str,
|
|
24
|
+
timeout: int = 10,
|
|
25
|
+
max_length: int = 50000
|
|
26
|
+
) -> Optional[str]:
|
|
27
|
+
"""
|
|
28
|
+
Fetch and extract main text content from a URL using trafilatura.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
url: The URL to fetch content from
|
|
32
|
+
timeout: Request timeout in seconds
|
|
33
|
+
max_length: Maximum characters to return
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Extracted text content or None on failure
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
async with httpx.AsyncClient(
|
|
40
|
+
timeout=timeout,
|
|
41
|
+
follow_redirects=True,
|
|
42
|
+
verify=True
|
|
43
|
+
) as client:
|
|
44
|
+
response = await client.get(url, headers={
|
|
45
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
46
|
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
|
47
|
+
"Accept-Language": "en-US,en;q=0.5",
|
|
48
|
+
})
|
|
49
|
+
if response.status_code == 200:
|
|
50
|
+
downloaded = response.text
|
|
51
|
+
# Extract main content using trafilatura
|
|
52
|
+
extracted = trafilatura.extract(
|
|
53
|
+
downloaded,
|
|
54
|
+
include_links=False,
|
|
55
|
+
include_images=False,
|
|
56
|
+
include_comments=False,
|
|
57
|
+
favor_precision=True
|
|
58
|
+
)
|
|
59
|
+
if extracted:
|
|
60
|
+
return extracted[:max_length]
|
|
61
|
+
except httpx.TimeoutException:
|
|
62
|
+
logger.warning(f"Timeout fetching {url}")
|
|
63
|
+
except httpx.HTTPError as e:
|
|
64
|
+
logger.warning(f"HTTP error fetching {url}: {e}")
|
|
65
|
+
except Exception as e:
|
|
66
|
+
logger.warning(f"Failed to fetch {url}: {e}")
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def enrich_results_with_content(
|
|
71
|
+
results: list,
|
|
72
|
+
max_concurrent: int = 5,
|
|
73
|
+
max_length: int = 50000
|
|
74
|
+
) -> list:
|
|
75
|
+
"""
|
|
76
|
+
Fetch full content for all search results concurrently.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
results: List of search result dictionaries
|
|
80
|
+
max_concurrent: Maximum concurrent requests
|
|
81
|
+
max_length: Maximum content length per page
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Results list with 'full_content' field added
|
|
85
|
+
"""
|
|
86
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
87
|
+
|
|
88
|
+
async def fetch_with_semaphore(result: dict) -> dict:
|
|
89
|
+
async with semaphore:
|
|
90
|
+
url = result.get("href")
|
|
91
|
+
if url:
|
|
92
|
+
content = await fetch_page_content(url, max_length=max_length)
|
|
93
|
+
result["full_content"] = content if content else "[Content extraction failed or blocked]"
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
tasks = [fetch_with_semaphore(r.copy()) for r in results]
|
|
97
|
+
return await asyncio.gather(*tasks)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# --- MCP Tool Definitions ---
|
|
101
|
+
|
|
102
|
+
@server.list_tools()
|
|
103
|
+
async def list_tools() -> list[types.Tool]:
|
|
104
|
+
return [
|
|
105
|
+
types.Tool(
|
|
106
|
+
name="search_text",
|
|
107
|
+
description="Perform a metasearch using various backends (DuckDuckGo, Google, Bing, etc.). Use this to find APIs, libraries, developer tools, and general information. Optionally fetch full page content for complete context.",
|
|
108
|
+
inputSchema={
|
|
109
|
+
"type": "object",
|
|
110
|
+
"properties": {
|
|
111
|
+
"query": {"type": "string", "description": "Search query"},
|
|
112
|
+
"backend": {
|
|
113
|
+
"type": "string",
|
|
114
|
+
"enum": ["auto", "html", "lite", "bing", "brave", "duckduckgo", "google", "grokipedia", "mojeek", "yandex", "yahoo", "wikipedia"],
|
|
115
|
+
"default": "auto",
|
|
116
|
+
"description": "Search engine backend to use."
|
|
117
|
+
},
|
|
118
|
+
"region": {"type": "string", "default": "us-en", "description": "e.g., us-en, uk-en"},
|
|
119
|
+
"safesearch": {"type": "string", "enum": ["on", "moderate", "off"], "default": "moderate"},
|
|
120
|
+
"timelimit": {"type": "string", "enum": ["d", "w", "m", "y"], "default": None},
|
|
121
|
+
"max_results": {"type": "integer", "default": 10},
|
|
122
|
+
"fetch_full_content": {
|
|
123
|
+
"type": "boolean",
|
|
124
|
+
"default": False,
|
|
125
|
+
"description": "If true, fetches and returns the full text content of each result page. This provides complete context but adds latency."
|
|
126
|
+
},
|
|
127
|
+
"max_content_length": {
|
|
128
|
+
"type": "integer",
|
|
129
|
+
"default": 50000,
|
|
130
|
+
"description": "Maximum characters of content to fetch per page (only used if fetch_full_content is true)."
|
|
131
|
+
}
|
|
132
|
+
},
|
|
133
|
+
"required": ["query"]
|
|
134
|
+
}
|
|
135
|
+
),
|
|
136
|
+
types.Tool(
|
|
137
|
+
name="search_news",
|
|
138
|
+
description="Perform a news search to find the latest updates, releases, or security alerts.",
|
|
139
|
+
inputSchema={
|
|
140
|
+
"type": "object",
|
|
141
|
+
"properties": {
|
|
142
|
+
"query": {"type": "string"},
|
|
143
|
+
"region": {"type": "string", "default": "us-en"},
|
|
144
|
+
"safesearch": {"type": "string", "default": "moderate"},
|
|
145
|
+
"timelimit": {"type": "string", "default": None},
|
|
146
|
+
"max_results": {"type": "integer", "default": 10}
|
|
147
|
+
},
|
|
148
|
+
"required": ["query"]
|
|
149
|
+
}
|
|
150
|
+
)
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@server.call_tool()
|
|
155
|
+
async def call_tool(name: str, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
|
|
156
|
+
logger.info(f"Calling tool: {name} with args: {arguments}")
|
|
157
|
+
|
|
158
|
+
if name not in ["search_text", "search_news"]:
|
|
159
|
+
raise ValueError(f"Unknown tool: {name}")
|
|
160
|
+
|
|
161
|
+
query = arguments.get("query")
|
|
162
|
+
backend = arguments.get("backend", "auto")
|
|
163
|
+
region = arguments.get("region", "us-en")
|
|
164
|
+
safesearch = arguments.get("safesearch", "moderate")
|
|
165
|
+
timelimit = arguments.get("timelimit")
|
|
166
|
+
max_results = arguments.get("max_results", 10)
|
|
167
|
+
|
|
168
|
+
# New parameters for full content extraction
|
|
169
|
+
fetch_full_content = arguments.get("fetch_full_content", False)
|
|
170
|
+
max_content_length = arguments.get("max_content_length", 50000)
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
with DDGS() as ddgs:
|
|
174
|
+
results = []
|
|
175
|
+
if name == "search_text":
|
|
176
|
+
results = ddgs.text(
|
|
177
|
+
query=query,
|
|
178
|
+
region=region,
|
|
179
|
+
safesearch=safesearch,
|
|
180
|
+
timelimit=timelimit,
|
|
181
|
+
max_results=max_results,
|
|
182
|
+
backend=backend
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Convert generator to list for manipulation
|
|
186
|
+
results = list(results) if results else []
|
|
187
|
+
|
|
188
|
+
# Enrich with full content if requested
|
|
189
|
+
if fetch_full_content and results:
|
|
190
|
+
logger.info(f"Fetching full content for {len(results)} results...")
|
|
191
|
+
results = await enrich_results_with_content(
|
|
192
|
+
results,
|
|
193
|
+
max_length=max_content_length
|
|
194
|
+
)
|
|
195
|
+
logger.info("Full content extraction complete")
|
|
196
|
+
|
|
197
|
+
elif name == "search_news":
|
|
198
|
+
results = ddgs.news(
|
|
199
|
+
query=query,
|
|
200
|
+
region=region,
|
|
201
|
+
safesearch=safesearch,
|
|
202
|
+
timelimit=timelimit,
|
|
203
|
+
max_results=max_results
|
|
204
|
+
)
|
|
205
|
+
results = list(results) if results else []
|
|
206
|
+
|
|
207
|
+
return [types.TextContent(type="text", text=json.dumps(results, indent=2))]
|
|
208
|
+
|
|
209
|
+
except Exception as e:
|
|
210
|
+
logger.error(f"Error executing {name}: {e}")
|
|
211
|
+
return [types.TextContent(type="text", text=f"Error performing search: {str(e)}")]
|
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
import json
|
|
3
|
-
import logging
|
|
4
|
-
from typing import Optional, Literal
|
|
5
|
-
from mcp.server import Server
|
|
6
|
-
import mcp.types as types
|
|
7
|
-
from ddgs import DDGS
|
|
8
|
-
|
|
9
|
-
# Logging Configuration
|
|
10
|
-
logging.basicConfig(level=logging.INFO)
|
|
11
|
-
logger = logging.getLogger("ddgs-mcp")
|
|
12
|
-
|
|
13
|
-
# MCP Server
|
|
14
|
-
server = Server("ddgs-mcp-server")
|
|
15
|
-
|
|
16
|
-
@server.list_tools()
|
|
17
|
-
async def list_tools() -> list[types.Tool]:
|
|
18
|
-
return [
|
|
19
|
-
types.Tool(
|
|
20
|
-
name="search_text",
|
|
21
|
-
description="Perform a metasearch using various backends (DuckDuckGo, Google, Bing, etc.). Use this to find APIs, libraries, developer tools, and general information.",
|
|
22
|
-
inputSchema={
|
|
23
|
-
"type": "object",
|
|
24
|
-
"properties": {
|
|
25
|
-
"query": {"type": "string", "description": "Search query"},
|
|
26
|
-
"backend": {
|
|
27
|
-
"type": "string",
|
|
28
|
-
"enum": ["auto", "html", "lite", "bing", "brave", "duckduckgo", "google", "grokipedia", "mojeek", "yandex", "yahoo", "wikipedia"],
|
|
29
|
-
"default": "auto",
|
|
30
|
-
"description": "Search engine backend to use."
|
|
31
|
-
},
|
|
32
|
-
"region": {"type": "string", "default": "us-en", "description": "e.g., us-en, uk-en"},
|
|
33
|
-
"safesearch": {"type": "string", "enum": ["on", "moderate", "off"], "default": "moderate"},
|
|
34
|
-
"timelimit": {"type": "string", "enum": ["d", "w", "m", "y"], "default": None},
|
|
35
|
-
"max_results": {"type": "integer", "default": 10}
|
|
36
|
-
},
|
|
37
|
-
"required": ["query"]
|
|
38
|
-
}
|
|
39
|
-
),
|
|
40
|
-
types.Tool(
|
|
41
|
-
name="search_news",
|
|
42
|
-
description="Perform a news search to find the latest updates, releases, or security alerts.",
|
|
43
|
-
inputSchema={
|
|
44
|
-
"type": "object",
|
|
45
|
-
"properties": {
|
|
46
|
-
"query": {"type": "string"},
|
|
47
|
-
"region": {"type": "string", "default": "us-en"},
|
|
48
|
-
"safesearch": {"type": "string", "default": "moderate"},
|
|
49
|
-
"timelimit": {"type": "string", "default": None},
|
|
50
|
-
"max_results": {"type": "integer", "default": 10}
|
|
51
|
-
},
|
|
52
|
-
"required": ["query"]
|
|
53
|
-
}
|
|
54
|
-
)
|
|
55
|
-
]
|
|
56
|
-
|
|
57
|
-
@server.call_tool()
|
|
58
|
-
async def call_tool(name: str, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
|
|
59
|
-
logger.info(f"Calling tool: {name} with args: {arguments}")
|
|
60
|
-
|
|
61
|
-
if name not in ["search_text", "search_news"]:
|
|
62
|
-
raise ValueError(f"Unknown tool: {name}")
|
|
63
|
-
|
|
64
|
-
query = arguments.get("query")
|
|
65
|
-
backend = arguments.get("backend", "auto")
|
|
66
|
-
region = arguments.get("region", "us-en")
|
|
67
|
-
safesearch = arguments.get("safesearch", "moderate")
|
|
68
|
-
timelimit = arguments.get("timelimit")
|
|
69
|
-
max_results = arguments.get("max_results", 10)
|
|
70
|
-
|
|
71
|
-
try:
|
|
72
|
-
with DDGS() as ddgs:
|
|
73
|
-
results = []
|
|
74
|
-
if name == "search_text":
|
|
75
|
-
results = ddgs.text(
|
|
76
|
-
query=query,
|
|
77
|
-
region=region,
|
|
78
|
-
safesearch=safesearch,
|
|
79
|
-
timelimit=timelimit,
|
|
80
|
-
max_results=max_results,
|
|
81
|
-
backend=backend
|
|
82
|
-
)
|
|
83
|
-
elif name == "search_news":
|
|
84
|
-
results = ddgs.news(
|
|
85
|
-
query=query,
|
|
86
|
-
region=region,
|
|
87
|
-
safesearch=safesearch,
|
|
88
|
-
timelimit=timelimit,
|
|
89
|
-
max_results=max_results
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
return [types.TextContent(type="text", text=json.dumps(results, indent=2))]
|
|
93
|
-
|
|
94
|
-
except Exception as e:
|
|
95
|
-
logger.error(f"Error executing {name}: {e}")
|
|
96
|
-
return [types.TextContent(type="text", text=f"Error performing search: {str(e)}")]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|