ddgs-mcp-server 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddgs_mcp_server/server.py CHANGED
@@ -1,7 +1,11 @@
1
1
 
2
2
  import json
3
3
  import logging
4
- from typing import Optional, Literal
4
+ import asyncio
5
+ from typing import Optional
6
+
7
+ import httpx
8
+ import trafilatura
5
9
  from mcp.server import Server
6
10
  import mcp.types as types
7
11
  from ddgs import DDGS
@@ -13,12 +17,94 @@ logger = logging.getLogger("ddgs-mcp")
13
17
  # MCP Server
14
18
  server = Server("ddgs-mcp-server")
15
19
 
20
+ # --- Content Extraction Utilities ---
21
+
22
+ async def fetch_page_content(
23
+ url: str,
24
+ timeout: int = 10,
25
+ max_length: int = 50000
26
+ ) -> Optional[str]:
27
+ """
28
+ Fetch and extract main text content from a URL using trafilatura.
29
+
30
+ Args:
31
+ url: The URL to fetch content from
32
+ timeout: Request timeout in seconds
33
+ max_length: Maximum characters to return
34
+
35
+ Returns:
36
+ Extracted text content or None on failure
37
+ """
38
+ try:
39
+ async with httpx.AsyncClient(
40
+ timeout=timeout,
41
+ follow_redirects=True,
42
+ verify=True
43
+ ) as client:
44
+ response = await client.get(url, headers={
45
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
46
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
47
+ "Accept-Language": "en-US,en;q=0.5",
48
+ })
49
+ if response.status_code == 200:
50
+ downloaded = response.text
51
+ # Extract main content using trafilatura
52
+ extracted = trafilatura.extract(
53
+ downloaded,
54
+ include_links=False,
55
+ include_images=False,
56
+ include_comments=False,
57
+ favor_precision=True
58
+ )
59
+ if extracted:
60
+ return extracted[:max_length]
61
+ except httpx.TimeoutException:
62
+ logger.warning(f"Timeout fetching {url}")
63
+ except httpx.HTTPError as e:
64
+ logger.warning(f"HTTP error fetching {url}: {e}")
65
+ except Exception as e:
66
+ logger.warning(f"Failed to fetch {url}: {e}")
67
+ return None
68
+
69
+
70
+ async def enrich_results_with_content(
71
+ results: list,
72
+ max_concurrent: int = 5,
73
+ max_length: int = 50000
74
+ ) -> list:
75
+ """
76
+ Fetch full content for all search results concurrently.
77
+
78
+ Args:
79
+ results: List of search result dictionaries
80
+ max_concurrent: Maximum concurrent requests
81
+ max_length: Maximum content length per page
82
+
83
+ Returns:
84
+ Results list with 'full_content' field added
85
+ """
86
+ semaphore = asyncio.Semaphore(max_concurrent)
87
+
88
+ async def fetch_with_semaphore(result: dict) -> dict:
89
+ async with semaphore:
90
+ url = result.get("href")
91
+ if url:
92
+ content = await fetch_page_content(url, max_length=max_length)
93
+ result["full_content"] = content if content else "[Content extraction failed or blocked]"
94
+ return result
95
+
96
+ tasks = [fetch_with_semaphore(r.copy()) for r in results]
97
+ return await asyncio.gather(*tasks)
98
+
99
+
100
+ # --- MCP Tool Definitions ---
101
+
16
102
  @server.list_tools()
17
103
  async def list_tools() -> list[types.Tool]:
18
104
  return [
19
105
  types.Tool(
20
106
  name="search_text",
21
- description="Perform a metasearch using various backends (DuckDuckGo, Google, Bing, etc.). Use this to find APIs, libraries, developer tools, and general information.",
107
+ description="Perform a metasearch using various backends (DuckDuckGo, Google, Bing, etc.). Use this to find APIs, libraries, developer tools, and general information. Optionally fetch full page content for complete context.",
22
108
  inputSchema={
23
109
  "type": "object",
24
110
  "properties": {
@@ -32,7 +118,17 @@ async def list_tools() -> list[types.Tool]:
32
118
  "region": {"type": "string", "default": "us-en", "description": "e.g., us-en, uk-en"},
33
119
  "safesearch": {"type": "string", "enum": ["on", "moderate", "off"], "default": "moderate"},
34
120
  "timelimit": {"type": "string", "enum": ["d", "w", "m", "y"], "default": None},
35
- "max_results": {"type": "integer", "default": 10}
121
+ "max_results": {"type": "integer", "default": 10},
122
+ "fetch_full_content": {
123
+ "type": "boolean",
124
+ "default": False,
125
+ "description": "If true, fetches and returns the full text content of each result page. This provides complete context but adds latency."
126
+ },
127
+ "max_content_length": {
128
+ "type": "integer",
129
+ "default": 50000,
130
+ "description": "Maximum characters of content to fetch per page (only used if fetch_full_content is true)."
131
+ }
36
132
  },
37
133
  "required": ["query"]
38
134
  }
@@ -54,6 +150,7 @@ async def list_tools() -> list[types.Tool]:
54
150
  )
55
151
  ]
56
152
 
153
+
57
154
  @server.call_tool()
58
155
  async def call_tool(name: str, arguments: dict) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
59
156
  logger.info(f"Calling tool: {name} with args: {arguments}")
@@ -68,6 +165,10 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent | type
68
165
  timelimit = arguments.get("timelimit")
69
166
  max_results = arguments.get("max_results", 10)
70
167
 
168
+ # New parameters for full content extraction
169
+ fetch_full_content = arguments.get("fetch_full_content", False)
170
+ max_content_length = arguments.get("max_content_length", 50000)
171
+
71
172
  try:
72
173
  with DDGS() as ddgs:
73
174
  results = []
@@ -80,6 +181,19 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent | type
80
181
  max_results=max_results,
81
182
  backend=backend
82
183
  )
184
+
185
+ # Convert generator to list for manipulation
186
+ results = list(results) if results else []
187
+
188
+ # Enrich with full content if requested
189
+ if fetch_full_content and results:
190
+ logger.info(f"Fetching full content for {len(results)} results...")
191
+ results = await enrich_results_with_content(
192
+ results,
193
+ max_length=max_content_length
194
+ )
195
+ logger.info("Full content extraction complete")
196
+
83
197
  elif name == "search_news":
84
198
  results = ddgs.news(
85
199
  query=query,
@@ -88,6 +202,7 @@ async def call_tool(name: str, arguments: dict) -> list[types.TextContent | type
88
202
  timelimit=timelimit,
89
203
  max_results=max_results
90
204
  )
205
+ results = list(results) if results else []
91
206
 
92
207
  return [types.TextContent(type="text", text=json.dumps(results, indent=2))]
93
208
 
@@ -1,11 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddgs-mcp-server
3
- Version: 0.4.1
4
- Summary: DuckDuckGo Search MCP Server
3
+ Version: 0.5.0
4
+ Summary: DuckDuckGo Search MCP Server with full page content extraction
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.10
7
7
  Requires-Dist: ddgs>=9.10.0
8
+ Requires-Dist: httpx>=0.27.0
8
9
  Requires-Dist: mcp>=1.0.0
10
+ Requires-Dist: trafilatura>=2.0.0
9
11
  Description-Content-Type: text/markdown
10
12
 
11
13
  # DDGS MCP Server
@@ -14,9 +16,54 @@ A Model Context Protocol (MCP) server that provides DuckDuckGo Search capabiliti
14
16
 
15
17
  ## Features
16
18
 
17
- - **search_text**: advanced metasearch using `bing`, `brave`, `duckduckgo`, `google`, `mojeek`, `yahoo`, `yandex`, `wikipedia`.
19
+ - **search_text**: Advanced metasearch using `bing`, `brave`, `duckduckgo`, `google`, `mojeek`, `yahoo`, `yandex`, `wikipedia`.
20
+ - **Full Content Extraction**: Optionally fetch complete page content (not just snippets) for comprehensive context.
18
21
  - **search_news**: Find latest updates, releases, and tech news.
19
22
 
23
+ ## Full Content Extraction
24
+
25
+ For coding agents that need complete context from search results, enable full page content fetching:
26
+
27
+ ### Usage
28
+
29
+ ```json
30
+ {
31
+ "query": "python async programming tutorial",
32
+ "fetch_full_content": true,
33
+ "max_content_length": 50000,
34
+ "max_results": 5
35
+ }
36
+ ```
37
+
38
+ ### Parameters
39
+
40
+ | Parameter | Type | Default | Description |
41
+ |-----------|------|---------|-------------|
42
+ | `fetch_full_content` | boolean | `false` | Enable full page content extraction |
43
+ | `max_content_length` | integer | `50000` | Maximum characters per page (when `fetch_full_content` is true) |
44
+
45
+ ### Response Structure
46
+
47
+ When `fetch_full_content` is enabled, each result includes a `full_content` field:
48
+
49
+ ```json
50
+ [
51
+ {
52
+ "title": "Python Async Programming Guide",
53
+ "href": "https://example.com/python-async",
54
+ "body": "Brief snippet from search results...",
55
+ "full_content": "Complete extracted article text with all paragraphs, code examples, and detailed explanations..."
56
+ }
57
+ ]
58
+ ```
59
+
60
+ ### Performance Notes
61
+
62
+ - Content extraction adds ~1-3 seconds latency per page
63
+ - Up to 5 pages are fetched concurrently to minimize total time
64
+ - Failed fetches return `[Content extraction failed or blocked]` without breaking the search
65
+ - Uses [Trafilatura](https://trafilatura.readthedocs.io/) for high-quality text extraction
66
+
20
67
 
21
68
  ## Installation & Usage
22
69
 
@@ -0,0 +1,8 @@
1
+ ddgs_mcp_server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ ddgs_mcp_server/main.py,sha256=hqJl7UoGQoL9a-2hX24srZYFGdatheJfgkn5wz5Od70,492
3
+ ddgs_mcp_server/server.py,sha256=jFfmXEFCoSQh3W0-DmlVAcZCcFu5GEKWRLo0cdZsSU8,8249
4
+ ddgs_mcp_server-0.5.0.dist-info/METADATA,sha256=ZHDmduec9GXJ-ufpGuG1NhAYrnIS0GYGwkV8AMvDiNQ,3757
5
+ ddgs_mcp_server-0.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ ddgs_mcp_server-0.5.0.dist-info/entry_points.txt,sha256=8YvtzhkNDMvAy2CdIx8VppBFjiBSJ56JtLX-v8SUHGc,62
7
+ ddgs_mcp_server-0.5.0.dist-info/licenses/LICENSE,sha256=vLPKcNOa4dGBRPq4I_mIBKyVSbIlzrOdinbwXFeKb88,1091
8
+ ddgs_mcp_server-0.5.0.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- ddgs_mcp_server/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ddgs_mcp_server/main.py,sha256=hqJl7UoGQoL9a-2hX24srZYFGdatheJfgkn5wz5Od70,492
3
- ddgs_mcp_server/server.py,sha256=IEsDiPsw2ciIVnmoOBYnOuVd_fCXmOXfNapQTBpO9wc,3919
4
- ddgs_mcp_server-0.4.1.dist-info/METADATA,sha256=bylRyAQpkCOFQilQt-rMLuP1j4eOxLMZjQsZMyVqyyE,2227
5
- ddgs_mcp_server-0.4.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
- ddgs_mcp_server-0.4.1.dist-info/entry_points.txt,sha256=8YvtzhkNDMvAy2CdIx8VppBFjiBSJ56JtLX-v8SUHGc,62
7
- ddgs_mcp_server-0.4.1.dist-info/licenses/LICENSE,sha256=vLPKcNOa4dGBRPq4I_mIBKyVSbIlzrOdinbwXFeKb88,1091
8
- ddgs_mcp_server-0.4.1.dist-info/RECORD,,