iflow-mcp_howe829-websearch-mcp-server 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- config.py +20 -0
- http_client.py +66 -0
- iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/METADATA +113 -0
- iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/RECORD +21 -0
- iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/WHEEL +4 -0
- iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/entry_points.txt +2 -0
- iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/licenses/LICENSE +21 -0
- providers/__init__.py +0 -0
- providers/baidu.py +35 -0
- providers/base.py +21 -0
- providers/bing.py +41 -0
- providers/enums.py +23 -0
- providers/factory.py +24 -0
- providers/github.py +22 -0
- providers/google.py +30 -0
- providers/wechat.py +17 -0
- server.py +211 -0
- xagents/__init__.py +0 -0
- xagents/agent.py +59 -0
- xagents/query_optimizer.py +72 -0
- xagents/result_formatter.py +53 -0
config.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Settings(BaseSettings):
|
|
5
|
+
model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
|
|
6
|
+
|
|
7
|
+
bing_search_base_url: str
|
|
8
|
+
google_search_base_url: str
|
|
9
|
+
cc: str
|
|
10
|
+
language: str
|
|
11
|
+
impersonate: str
|
|
12
|
+
host: str
|
|
13
|
+
port: int
|
|
14
|
+
server_mode: str
|
|
15
|
+
llm_base_url: str
|
|
16
|
+
llm_api_key: str
|
|
17
|
+
llm_model_name: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
settings = Settings()
|
http_client.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import sys
|
|
3
|
+
import nodriver as uc
|
|
4
|
+
from curl_cffi import requests, AsyncSession
|
|
5
|
+
from markitdown import MarkItDown
|
|
6
|
+
from io import BytesIO
|
|
7
|
+
from requests import Request
|
|
8
|
+
from typing import Optional
|
|
9
|
+
from config import settings
|
|
10
|
+
|
|
11
|
+
md = MarkItDown(enable_plugins=False)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class AsyncHttpClient:
|
|
15
|
+
def __init__(self):
|
|
16
|
+
if self.is_windows:
|
|
17
|
+
self._session = requests.Session(impersonate=settings.impersonate)
|
|
18
|
+
else:
|
|
19
|
+
self._session = AsyncSession(impersonate=settings.impersonate)
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def is_windows(self):
|
|
23
|
+
return sys.platform.startswith("win")
|
|
24
|
+
|
|
25
|
+
async def _get_by_curl_async(self, url: str, params: Optional[dict]) -> str:
|
|
26
|
+
self._session.acurl.loop = asyncio.get_running_loop()
|
|
27
|
+
response = await self._session.get(url=url, params=params)
|
|
28
|
+
return response.text
|
|
29
|
+
|
|
30
|
+
def _get_by_curl(self, url: str, params: Optional[dict]) -> str:
|
|
31
|
+
response = self._session.get(url=url, params=params)
|
|
32
|
+
return response.text
|
|
33
|
+
|
|
34
|
+
async def _get_by_browser(self, url: str, params: Optional[dict]) -> str:
|
|
35
|
+
driver = await uc.start()
|
|
36
|
+
try:
|
|
37
|
+
req = Request("GET", url, params=params).prepare()
|
|
38
|
+
url = req.url if req.url is not None else url
|
|
39
|
+
tab = await driver.get(url)
|
|
40
|
+
await asyncio.sleep(5)
|
|
41
|
+
text = await tab.get_content()
|
|
42
|
+
return text
|
|
43
|
+
except Exception as e:
|
|
44
|
+
return f"Error when using browser: {str(e)}"
|
|
45
|
+
finally:
|
|
46
|
+
driver.stop()
|
|
47
|
+
|
|
48
|
+
async def get(
|
|
49
|
+
self, url: str, params: Optional[dict] = None, use_browser: bool = False
|
|
50
|
+
) -> str:
|
|
51
|
+
if use_browser:
|
|
52
|
+
return await self._get_by_browser(url, params)
|
|
53
|
+
if self.is_windows:
|
|
54
|
+
return self._get_by_curl(url, params)
|
|
55
|
+
return await self._get_by_curl_async(url, params)
|
|
56
|
+
|
|
57
|
+
async def get_markdown(
|
|
58
|
+
self, url: str, params: Optional[dict] = None, use_browser: bool = False
|
|
59
|
+
) -> str:
|
|
60
|
+
text = await self.get(url=url, params=params, use_browser=use_browser)
|
|
61
|
+
buffer = BytesIO(text.encode("utf-8"))
|
|
62
|
+
result = md.convert(buffer)
|
|
63
|
+
return result.text_content
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
aio_client = AsyncHttpClient()
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: iflow-mcp_howe829-websearch-mcp-server
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: curl-cffi>=0.11.1
|
|
8
|
+
Requires-Dist: fastmcp>=2.5.1
|
|
9
|
+
Requires-Dist: loguru>=0.7.3
|
|
10
|
+
Requires-Dist: markitdown[all]>=0.1.1
|
|
11
|
+
Requires-Dist: nodriver>=0.46.1
|
|
12
|
+
Requires-Dist: openai-agents[litellm]>=0.0.17
|
|
13
|
+
Requires-Dist: pydantic-settings>=2.9.1
|
|
14
|
+
Requires-Dist: pydantic>=2.11.5
|
|
15
|
+
Requires-Dist: pytest-asyncio>=1.0.0
|
|
16
|
+
Requires-Dist: pytest>=8.3.5
|
|
17
|
+
Requires-Dist: ruff>=0.11.12
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# WebSearch MCP Server
|
|
21
|
+
|
|
22
|
+
A simple web search server that supports free search and converts URL content to Markdown. [中文](README-zh.md)
|
|
23
|
+
|
|
24
|
+
## Features
|
|
25
|
+
|
|
26
|
+
- **Web Search:** Perform web searches using different providers. Supported providers: Bing, Baidu, Google(with browser_use).
|
|
27
|
+
- **Markdown Conversion:** Convert the content of a URL into Markdown format.
|
|
28
|
+
- **Wechat Official Account Articles:** Search and retrive wechat official account articles content.
|
|
29
|
+
- **GitHub Search:** Easily search for anything on GitHub — repositories, users, issues, and more.
|
|
30
|
+
|
|
31
|
+
## Getting Started
|
|
32
|
+
|
|
33
|
+
### Prerequisites
|
|
34
|
+
|
|
35
|
+
- Python 3.12 or higher
|
|
36
|
+
- uv for dependency management (or pip)
|
|
37
|
+
|
|
38
|
+
### Installation
|
|
39
|
+
|
|
40
|
+
1. Clone the repository:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
git clone https://github.com/Howe829/websearch-mcp-server.git
|
|
44
|
+
cd websearch-mcp-server
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
2. Install the dependencies using uv (Recomend):
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
uv venv && uv sync
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Or, if you prefer using pip:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install -r requirements.txt
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Configuration
|
|
60
|
+
|
|
61
|
+
1. Create a `.env` file based on the `.env.example` file:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
cp .env.example .env
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
2. Modify the `.env` file with your desired settings:
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
BING_SEARCH_BASE_URL="https://www.bing.com"
|
|
71
|
+
LANGUAGE="en"
|
|
72
|
+
CC="us"
|
|
73
|
+
IMPERSONATE="edge"
|
|
74
|
+
HOST=127.0.0.1
|
|
75
|
+
PORT=8002
|
|
76
|
+
SERVER_MODE=streamable-http
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Usage
|
|
80
|
+
|
|
81
|
+
Run the server using uv:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
uv run python server.py
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Or run the server using python:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
source .venv/bin/activate
|
|
91
|
+
python server.py
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Contribution
|
|
95
|
+
Contributions are welcome!
|
|
96
|
+
|
|
97
|
+
This project uses pytest for unit tests
|
|
98
|
+
```bash
|
|
99
|
+
uv pip install pytest
|
|
100
|
+
uv pip install pytest-asyncio
|
|
101
|
+
uv run pytest
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
And use the ruff for code sytle formatting
|
|
105
|
+
```bash
|
|
106
|
+
uv pip install ruff
|
|
107
|
+
ruff format .
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
use mcp inpector to debug this server
|
|
111
|
+
```bash
|
|
112
|
+
uv run fastmcp dev server.py
|
|
113
|
+
```
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
config.py,sha256=YJ-ghBsucZW3X4jAUhhklVAAleNt7Sp0K4_jnxdbqXk,431
|
|
2
|
+
http_client.py,sha256=S5n21NcVI27C4nMMgssHFEB3LR8vMGaUqtPAYMkjJvI,2214
|
|
3
|
+
providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
providers/baidu.py,sha256=1Ed0FANU-B4UBMwRZLHs6gbP1s-Wcq2NPkvlygwwMLg,1286
|
|
5
|
+
providers/base.py,sha256=2KrFcQQyemQzxHnyzFyX4Wq4ZXvKkjZMz2L-FcmOalg,652
|
|
6
|
+
providers/bing.py,sha256=UcW5rGClRVwdLUnKgR5ai72hCvtUztFH23HhDdCr1Fs,1468
|
|
7
|
+
providers/enums.py,sha256=ASog8l8ruMDLxBYRb_I2HOQ-kypbKGXKtf6nAcS2Ee0,475
|
|
8
|
+
providers/factory.py,sha256=VLMHjTO6IsSa9nllxcL43ejCsu1rTR4F6xLjVnxGK68,991
|
|
9
|
+
providers/github.py,sha256=PIZcXVvOir9Z57xKOFsDNGNSPE7efpRkl_tQKPb8hrY,539
|
|
10
|
+
providers/google.py,sha256=woJaA-C6MZU8osn5KywNGooSKGduKaZJJmVNxZxTYsM,942
|
|
11
|
+
providers/wechat.py,sha256=n7v5KHiQmrhHtKc4cI7YGNMd4GDp3MFy7XhdTc0PvCM,371
|
|
12
|
+
server.py,sha256=W8UcidG39nqAIzPO_cpaNeohTQ4WGLD5Nki0pbWvLTA,5549
|
|
13
|
+
xagents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
xagents/agent.py,sha256=kKXNywezkhqOUeojP8vEkSk9f1EKtUG8pa3DRmLV6uA,2333
|
|
15
|
+
xagents/query_optimizer.py,sha256=Z8Sz4Lpkyfnl0PoVv6tuXv3eGYBXPFZiZg-rmHu1Vvc,2677
|
|
16
|
+
xagents/result_formatter.py,sha256=le9Nw4WLayUoWZqqvAYjgb1J2BEvXqtCvkEgdYqC018,2195
|
|
17
|
+
iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/METADATA,sha256=_HzWBiojgoItnep9Wmy3AAqv8tzXFjp-44Gqnph1bqU,2458
|
|
18
|
+
iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
19
|
+
iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/entry_points.txt,sha256=iILKixFfn48v6KB6N8J_cw2DaW4eU6fdzHqhg5XYLDc,53
|
|
20
|
+
iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/licenses/LICENSE,sha256=sWqgeExPhDaW2FhUEKSl7rsxIfMzSgo8Nj8i0ilWnJs,1072
|
|
21
|
+
iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) [2025] [Howard Chan]
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
providers/__init__.py
ADDED
|
File without changes
|
providers/baidu.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from providers.base import BaseWebSearchProvider
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BaiduSearch(BaseWebSearchProvider):
|
|
5
|
+
def _get_url(self):
|
|
6
|
+
return "https://www.baidu.com/s"
|
|
7
|
+
|
|
8
|
+
def _get_params(self, query: str, **kwargs) -> dict:
|
|
9
|
+
params = {
|
|
10
|
+
"wd": query,
|
|
11
|
+
}
|
|
12
|
+
return params
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def description(self):
|
|
16
|
+
return """
|
|
17
|
+
Strengths:
|
|
18
|
+
- Strong understanding of Chinese language and entity recognition
|
|
19
|
+
- Better coverage of local Chinese content (government, regional news, company sites)
|
|
20
|
+
- Excellent performance with brand names and trending topics
|
|
21
|
+
- Fast, responsive search experience
|
|
22
|
+
|
|
23
|
+
Weaknesses:
|
|
24
|
+
- Weak support for English-language queries
|
|
25
|
+
- Limited support for structured queries (e.g., Boolean logic, site:, filetype:)
|
|
26
|
+
- Poor international or technical content retrieval compared to Bing or Google
|
|
27
|
+
|
|
28
|
+
Query Strategy:
|
|
29
|
+
- For Chinese: Use natural language phrasing with brand or location keywords (e.g., “今日金价 上海黄金交易所”)
|
|
30
|
+
- Avoid complex English queries; translate and rephrase into Chinese where possible
|
|
31
|
+
- Emphasize relevance over syntax — focus on intent expression
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
baidu_search = BaiduSearch()
|
providers/base.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
from config import settings
|
|
4
|
+
from http_client import aio_client
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BaseWebSearchProvider(BaseModel):
|
|
8
|
+
@abstractmethod
|
|
9
|
+
def _get_url(self) -> str:
|
|
10
|
+
raise NotImplementedError
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def _get_params(self, query: str, **kwargs) -> dict:
|
|
14
|
+
raise NotImplementedError
|
|
15
|
+
|
|
16
|
+
async def search(self, query: str, use_browser: bool = False, **kwargs) -> str:
|
|
17
|
+
url = self._get_url()
|
|
18
|
+
params = self._get_params(query=query, **kwargs)
|
|
19
|
+
return await aio_client.get_markdown(
|
|
20
|
+
url=url, params=params, use_browser=use_browser
|
|
21
|
+
)
|
providers/bing.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from providers.base import BaseWebSearchProvider
|
|
2
|
+
from config import settings
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BingSearch(BaseWebSearchProvider):
|
|
6
|
+
"""
|
|
7
|
+
please check out https://www.bing.com/account/general for more information
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def _get_url(self):
|
|
11
|
+
return f"{settings.bing_search_base_url}/search"
|
|
12
|
+
|
|
13
|
+
def _get_params(self, query: str, **kwargs) -> dict:
|
|
14
|
+
params = {
|
|
15
|
+
"q": query,
|
|
16
|
+
"cc": kwargs.get("cc") or settings.cc,
|
|
17
|
+
"setlang": kwargs.get("lang") or settings.language,
|
|
18
|
+
}
|
|
19
|
+
return params
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def description(self):
|
|
23
|
+
return """
|
|
24
|
+
Strengths:
|
|
25
|
+
- Excellent support for English queries
|
|
26
|
+
- Handles structured search operators well (site:, filetype:, before:, after:, etc.)
|
|
27
|
+
- Good at answering natural language questions (especially in English)
|
|
28
|
+
- Strong performance for technical, coding, and international finance topics
|
|
29
|
+
|
|
30
|
+
Weaknesses:
|
|
31
|
+
- Less effective at retrieving localized Chinese content
|
|
32
|
+
- Sometimes inferior Chinese-language result quality compared to Baidu
|
|
33
|
+
|
|
34
|
+
Query Strategy:
|
|
35
|
+
- For English: Use precise keywords + operators (e.g., `gold price today site:kitco.com`)
|
|
36
|
+
- For Chinese: Keep queries concise but add intent-enhancing modifiers (e.g., “实时”, “走势图”)
|
|
37
|
+
- Prefer Bing for technical, financial, or global information needs
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
bing_search = BingSearch()
|
providers/enums.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class WebSearchProvidersEnum(str, Enum):
|
|
5
|
+
BAIDU = "baidu"
|
|
6
|
+
BING = "bing"
|
|
7
|
+
WECHAT = "wechat"
|
|
8
|
+
GITHUB = "github"
|
|
9
|
+
GOOGLE = "google"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GithubSearchTypesEnum(str, Enum):
|
|
13
|
+
REPO = "repositories"
|
|
14
|
+
CODE = "code"
|
|
15
|
+
ISSUE = "issues"
|
|
16
|
+
PR = "pullrequests"
|
|
17
|
+
DISCUSS = "discussions"
|
|
18
|
+
USER = "users"
|
|
19
|
+
COMMIT = "commits"
|
|
20
|
+
PACKAGE = "registrypackages"
|
|
21
|
+
WIKI = "wikis"
|
|
22
|
+
TOPIC = "topics"
|
|
23
|
+
MARKETPLACE = "marketplace"
|
providers/factory.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from providers.base import BaseWebSearchProvider
|
|
2
|
+
from providers.enums import WebSearchProvidersEnum
|
|
3
|
+
from providers.bing import bing_search
|
|
4
|
+
from providers.wechat import wechat_search
|
|
5
|
+
from providers.github import github_search
|
|
6
|
+
from providers.baidu import baidu_search
|
|
7
|
+
from providers.google import google_search
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class WebSearchProviderFactory:
|
|
11
|
+
def get_provider(self, provider_name: str) -> BaseWebSearchProvider:
|
|
12
|
+
match provider_name:
|
|
13
|
+
case WebSearchProvidersEnum.BAIDU.value:
|
|
14
|
+
return baidu_search
|
|
15
|
+
case WebSearchProvidersEnum.BING.value:
|
|
16
|
+
return bing_search
|
|
17
|
+
case WebSearchProvidersEnum.WECHAT.value:
|
|
18
|
+
return wechat_search
|
|
19
|
+
case WebSearchProvidersEnum.GITHUB.value:
|
|
20
|
+
return github_search
|
|
21
|
+
case WebSearchProvidersEnum.GOOGLE.value:
|
|
22
|
+
return google_search
|
|
23
|
+
case _:
|
|
24
|
+
raise ValueError(f"Unsupported provider: [{provider_name}]")
|
providers/github.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from providers.base import BaseWebSearchProvider
|
|
2
|
+
from providers.enums import GithubSearchTypesEnum
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class GithubSearch(BaseWebSearchProvider):
|
|
6
|
+
"""
|
|
7
|
+
https://www.github.com
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def _get_url(self):
|
|
11
|
+
return "https://www.github.com/search"
|
|
12
|
+
|
|
13
|
+
def _get_params(self, query: str, **kwargs) -> dict:
|
|
14
|
+
params = {
|
|
15
|
+
"q": query,
|
|
16
|
+
"type": kwargs.get("type", GithubSearchTypesEnum.REPO.value),
|
|
17
|
+
"p": kwargs.get("page", 1),
|
|
18
|
+
}
|
|
19
|
+
return params
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
github_search = GithubSearch()
|
providers/google.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from providers.base import BaseWebSearchProvider
|
|
2
|
+
from config import settings
|
|
3
|
+
from http_client import aio_client
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GoogleSearch(BaseWebSearchProvider):
|
|
7
|
+
def _get_url(self):
|
|
8
|
+
return f"{settings.google_search_base_url}/search"
|
|
9
|
+
|
|
10
|
+
def _get_params(self, query: str, **kwargs) -> dict:
|
|
11
|
+
cc = kwargs.get("cc") or settings.cc
|
|
12
|
+
lang = kwargs.get("lang") or settings.language
|
|
13
|
+
params = {
|
|
14
|
+
"q": query,
|
|
15
|
+
"lr": f"lang_{lang}",
|
|
16
|
+
"cr": f"country{cc.upper()}",
|
|
17
|
+
}
|
|
18
|
+
return params
|
|
19
|
+
|
|
20
|
+
async def search(self, query: str, **kwargs) -> str:
|
|
21
|
+
url = self._get_url()
|
|
22
|
+
params = self._get_params(query=query, **kwargs)
|
|
23
|
+
return await aio_client.get_markdown(
|
|
24
|
+
url=url,
|
|
25
|
+
params=params,
|
|
26
|
+
use_browser=True, # google search must use browser to query, due to it's strictly anti-bot pilocy
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
google_search = GoogleSearch()
|
providers/wechat.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from providers.base import BaseWebSearchProvider
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class WechatSearch(BaseWebSearchProvider):
|
|
5
|
+
"""
|
|
6
|
+
https://weixin.sogou.com
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
def _get_url(self):
|
|
10
|
+
return "https://weixin.sogou.com/weixin"
|
|
11
|
+
|
|
12
|
+
def _get_params(self, query: str, **kwargs) -> dict:
|
|
13
|
+
params = {"query": query, "type": 2}
|
|
14
|
+
return params
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
wechat_search = WechatSearch()
|
server.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import asyncio
|
|
3
|
+
from fastmcp import FastMCP
|
|
4
|
+
from urllib.parse import urljoin
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from providers.factory import WebSearchProviderFactory
|
|
8
|
+
from providers.enums import WebSearchProvidersEnum, GithubSearchTypesEnum
|
|
9
|
+
from http_client import aio_client
|
|
10
|
+
from config import settings
|
|
11
|
+
from xagents.agent import agent_search, SearchResult
|
|
12
|
+
|
|
13
|
+
server = FastMCP("WebSearch MCP Server")
|
|
14
|
+
|
|
15
|
+
provider_factory = WebSearchProviderFactory()
|
|
16
|
+
|
|
17
|
+
@server.tool(name="AgentSearch")
|
|
18
|
+
async def agent_search_tool(
|
|
19
|
+
query: str,
|
|
20
|
+
) -> SearchResult:
|
|
21
|
+
"""
|
|
22
|
+
Perform a agentic search
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
query: The search query.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
instance of SearchResult
|
|
29
|
+
"""
|
|
30
|
+
return await agent_search.search(query)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@server.tool(name="BingSearch")
|
|
34
|
+
async def bing_search(
|
|
35
|
+
query: str,
|
|
36
|
+
cc: str = "us",
|
|
37
|
+
lang: str = "en",
|
|
38
|
+
use_browser: bool = False,
|
|
39
|
+
) -> str:
|
|
40
|
+
"""
|
|
41
|
+
Perform a bing search.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
query: The search query.
|
|
45
|
+
cc: Country/Region code for example: us, cn, jp, etc.
|
|
46
|
+
lang: Language such as en, zh-CN, ja, etc
|
|
47
|
+
use_browser: Whether to use a browser to query, you must try use_browser=False first
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Search result in markdown syntax.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
engine = provider_factory.get_provider(
|
|
54
|
+
provider_name=WebSearchProvidersEnum.BING.value
|
|
55
|
+
)
|
|
56
|
+
result = await engine.search(query=query, use_browser=use_browser, cc=cc, lang=lang)
|
|
57
|
+
return result
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@server.tool(name="BaiduSearch")
|
|
61
|
+
async def baidu_search(
|
|
62
|
+
query: str,
|
|
63
|
+
use_browser: bool = False,
|
|
64
|
+
) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Perform a baidu search.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
query: The search query.
|
|
70
|
+
use_browser: Whether to use a browser to query, you must try use_browser=False first
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Search result in markdown syntax.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
engine = provider_factory.get_provider(
|
|
77
|
+
provider_name=WebSearchProvidersEnum.BAIDU.value
|
|
78
|
+
)
|
|
79
|
+
result = await engine.search(query=query, use_browser=use_browser)
|
|
80
|
+
return result
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@server.tool(name="GoogleSearch")
|
|
84
|
+
async def websearch(
|
|
85
|
+
query: str,
|
|
86
|
+
cc: str = "us",
|
|
87
|
+
lang: str = "en",
|
|
88
|
+
) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Perform a google search.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
query: The search query.
|
|
94
|
+
cc: Country/Region code for example: us, cn, jp, etc.
|
|
95
|
+
lang: Language such as en, zh-CN, ja, etc
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Search result in markdown syntax.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
engine = provider_factory.get_provider(
|
|
102
|
+
provider_name=WebSearchProvidersEnum.GOOGLE.value
|
|
103
|
+
)
|
|
104
|
+
result = await engine.search(query=query, cc=cc, lang=lang)
|
|
105
|
+
return result
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@server.tool(name="OpenUrl")
|
|
109
|
+
async def open_url(url: str) -> str:
|
|
110
|
+
"""
|
|
111
|
+
Open a URL and retrieve its content.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
url: The URL to be opened.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
Web content in markdown syntax.
|
|
118
|
+
"""
|
|
119
|
+
return await aio_client.get_markdown(url)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@server.tool(name="OpenWechatArticleLink")
|
|
123
|
+
async def open_wechat_article_link(link: str) -> dict:
|
|
124
|
+
"""
|
|
125
|
+
Open a wechat article link and retrieve its content.
|
|
126
|
+
Remember you have to do the WechatSearch first before you open the link, otherwise it will be failed
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
url: The URL to be opened.Generally starts with '/link'
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Web content in markdown syntax.
|
|
133
|
+
"""
|
|
134
|
+
url = urljoin("https://weixin.sogou.com", link)
|
|
135
|
+
result = await aio_client.get(url)
|
|
136
|
+
parts = re.findall(r"url\s*\+=\s*'([^']+)'", result)
|
|
137
|
+
full_url = "".join(parts)
|
|
138
|
+
if not full_url.startswith("https"):
|
|
139
|
+
return {"error": f"bad request with link [{link}]"}
|
|
140
|
+
text = await aio_client.get_markdown(full_url)
|
|
141
|
+
return {"url": full_url, "content": text}
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@server.tool(name="WechatSearch")
|
|
145
|
+
async def wechat_search(query: str) -> str:
|
|
146
|
+
"""
|
|
147
|
+
Search WeChat Articles
|
|
148
|
+
Args:
|
|
149
|
+
query: search query.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Search result in markdown syntax.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
engine = provider_factory.get_provider(
|
|
156
|
+
provider_name=WebSearchProvidersEnum.WECHAT.value
|
|
157
|
+
)
|
|
158
|
+
result = await engine.search(query=query)
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@server.tool(name="GithubSearch")
|
|
163
|
+
async def github_search(
|
|
164
|
+
query: str,
|
|
165
|
+
type: Literal[
|
|
166
|
+
GithubSearchTypesEnum.REPO,
|
|
167
|
+
GithubSearchTypesEnum.CODE,
|
|
168
|
+
GithubSearchTypesEnum.ISSUE,
|
|
169
|
+
GithubSearchTypesEnum.PR,
|
|
170
|
+
GithubSearchTypesEnum.DISCUSS,
|
|
171
|
+
GithubSearchTypesEnum.USER,
|
|
172
|
+
GithubSearchTypesEnum.COMMIT,
|
|
173
|
+
GithubSearchTypesEnum.PACKAGE,
|
|
174
|
+
GithubSearchTypesEnum.WIKI,
|
|
175
|
+
GithubSearchTypesEnum.TOPIC,
|
|
176
|
+
GithubSearchTypesEnum.MARKETPLACE,
|
|
177
|
+
],
|
|
178
|
+
page: int = 1,
|
|
179
|
+
) -> str:
|
|
180
|
+
"""
|
|
181
|
+
Search All Github
|
|
182
|
+
Args:
|
|
183
|
+
query: search query.
|
|
184
|
+
type: search type support: repositoris, code, issues, pullrequests, users, discussions, commits, packages, wikis, topics, marketplace.
|
|
185
|
+
page: pagination param default is 1
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Search result in markdown syntax.
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
engine = provider_factory.get_provider(
|
|
192
|
+
provider_name=WebSearchProvidersEnum.GITHUB.value
|
|
193
|
+
)
|
|
194
|
+
result = await engine.search(query=query, type=type, page=page)
|
|
195
|
+
return result
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
async def main():
|
|
199
|
+
match settings.server_mode:
|
|
200
|
+
case "stdio":
|
|
201
|
+
await server.run_async(transport=settings.server_mode)
|
|
202
|
+
case "sse" | "streamable-http":
|
|
203
|
+
await server.run_async(
|
|
204
|
+
transport=settings.server_mode, host=settings.host, port=settings.port
|
|
205
|
+
)
|
|
206
|
+
case _:
|
|
207
|
+
raise ValueError(f"Unsupported server mode [{settings.server_mode}]")
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
if __name__ == "__main__":
|
|
211
|
+
asyncio.run(main())
|
xagents/__init__.py
ADDED
|
File without changes
|
xagents/agent.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from agents import Runner
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
from asyncio import TaskGroup
|
|
4
|
+
from loguru import logger
|
|
5
|
+
|
|
6
|
+
from xagents.query_optimizer import query_opt_agent, WebSearchPlan, WebSearchItem
|
|
7
|
+
from xagents.result_formatter import result_format_agent, SearchResult
|
|
8
|
+
from providers.factory import WebSearchProviderFactory
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RawSearchResult(BaseModel):
|
|
12
|
+
engine_name: str
|
|
13
|
+
content: str
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AgentSearch:
|
|
17
|
+
async def _optimize_query(self, query: str) -> WebSearchPlan:
|
|
18
|
+
result = await Runner.run(input=query, starting_agent=query_opt_agent)
|
|
19
|
+
return result.final_output_as(WebSearchPlan)
|
|
20
|
+
|
|
21
|
+
async def _perform_search(self, item: WebSearchItem) -> RawSearchResult:
|
|
22
|
+
engine = WebSearchProviderFactory().get_provider(item.engine_name)
|
|
23
|
+
result = await engine.search(query=item.query, use_browser=True)
|
|
24
|
+
return RawSearchResult(engine_name=item.engine_name, content=result)
|
|
25
|
+
|
|
26
|
+
async def _format_result(self, raw_result: RawSearchResult) -> SearchResult:
|
|
27
|
+
result = await Runner.run(
|
|
28
|
+
input=raw_result.model_dump_json(), starting_agent=result_format_agent
|
|
29
|
+
)
|
|
30
|
+
return result.final_output_as(SearchResult)
|
|
31
|
+
|
|
32
|
+
async def _execute_search_plan(self, plan: WebSearchPlan) -> list[RawSearchResult]:
|
|
33
|
+
tasks = []
|
|
34
|
+
async with TaskGroup() as tg:
|
|
35
|
+
for search_item in plan.searches:
|
|
36
|
+
task = tg.create_task(self._perform_search(search_item))
|
|
37
|
+
tasks.append(task)
|
|
38
|
+
|
|
39
|
+
results: list[RawSearchResult] = [t.result() for t in tasks]
|
|
40
|
+
return results
|
|
41
|
+
|
|
42
|
+
def _merge_search_results(self, search_results: list[SearchResult]) -> SearchResult:
|
|
43
|
+
items = []
|
|
44
|
+
for r in search_results:
|
|
45
|
+
items.extend(r.items)
|
|
46
|
+
return SearchResult(items=items)
|
|
47
|
+
|
|
48
|
+
async def search(self, query: str) -> SearchResult:
|
|
49
|
+
search_plan = await self._optimize_query(query=query)
|
|
50
|
+
raw_results = await self._execute_search_plan(plan=search_plan)
|
|
51
|
+
tasks = []
|
|
52
|
+
async with TaskGroup() as tg:
|
|
53
|
+
for raw_result in raw_results:
|
|
54
|
+
task = tg.create_task(self._format_result(raw_result))
|
|
55
|
+
tasks.append(task)
|
|
56
|
+
results: list[SearchResult] = [t.result() for t in tasks]
|
|
57
|
+
return self._merge_search_results(results)
|
|
58
|
+
|
|
59
|
+
agent_search = AgentSearch()
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from agents import Agent
|
|
2
|
+
from config import settings
|
|
3
|
+
from agents.extensions.models.litellm_model import LitellmModel
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class WebSearchItem(BaseModel):
|
|
8
|
+
engine_name: str = Field(
|
|
9
|
+
description="websearch engine name in lowercase eg: bing, baidu"
|
|
10
|
+
)
|
|
11
|
+
query: str = Field(description="optimized query")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class WebSearchPlan(BaseModel):
|
|
15
|
+
searches: list[WebSearchItem]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
INSTRUCTION = """
|
|
19
|
+
You are a Query Optimizer Agent.
|
|
20
|
+
Your task is to reformulate user-provided search queries to maximize search effectiveness, based on the selected search engine and the query's language.
|
|
21
|
+
You must understand and adapt to the strengths and limitations of each search engine, and generate optimized queries that match their search capabilities.
|
|
22
|
+
---
|
|
23
|
+
🔍 Bing (Microsoft Search Engine)
|
|
24
|
+
- Strengths:
|
|
25
|
+
- Excellent for English-language queries.
|
|
26
|
+
- Supports advanced operators like `site:`, `filetype:`, `intitle:`, `before:`, `after:`.
|
|
27
|
+
- Well-suited for technical, financial, and global information.
|
|
28
|
+
- Handles both keyword and natural language queries.
|
|
29
|
+
|
|
30
|
+
- Query Optimization Strategy:
|
|
31
|
+
- For English queries, use precise keywords + advanced operators.
|
|
32
|
+
- For Chinese queries, use concise keywords + add clarity-enhancing modifiers (e.g., 实时, 走势).
|
|
33
|
+
- Add context (e.g., date, topic scope) to disambiguate broad queries.
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
🔍 Baidu (Chinese Search Engine)
|
|
37
|
+
|
|
38
|
+
- Strengths:
|
|
39
|
+
- Strong Chinese-language understanding and entity recognition.
|
|
40
|
+
- Excellent access to local Chinese content, government sources, brands, and services.
|
|
41
|
+
- Excels in natural-language, brand-aware queries.
|
|
42
|
+
|
|
43
|
+
- Weaknesses:
|
|
44
|
+
- Poor support for English queries and advanced operators.
|
|
45
|
+
- Limited support for Boolean/structured syntax.
|
|
46
|
+
|
|
47
|
+
- Query Optimization Strategy:
|
|
48
|
+
- Use natural, fluent Chinese queries.
|
|
49
|
+
- Add brand names, location terms, or entity types to improve precision.
|
|
50
|
+
- Avoid using English or complex operators. Translate English concepts into Chinese and simplify.
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
🎯 Optimization Rules:
|
|
55
|
+
|
|
56
|
+
- Detect the language of the original query.
|
|
57
|
+
- Match query complexity to search engine capabilities.
|
|
58
|
+
- Add specificity (brand, time, location) and intent indicators (e.g., "实时", "technical analysis", "forecast").
|
|
59
|
+
- Avoid redundant words or vague terms.
|
|
60
|
+
- Your goal is to maximize the relevance and precision of search results for the target engine.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
query_opt_agent = Agent(
|
|
64
|
+
name="Query Optimizer Agent",
|
|
65
|
+
instructions=INSTRUCTION,
|
|
66
|
+
model=LitellmModel(
|
|
67
|
+
model=f"openai/{settings.llm_model_name}",
|
|
68
|
+
base_url=settings.llm_base_url,
|
|
69
|
+
api_key=settings.llm_api_key,
|
|
70
|
+
),
|
|
71
|
+
output_type=WebSearchPlan,
|
|
72
|
+
)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
from agents import Agent
|
|
3
|
+
from agents.extensions.models.litellm_model import LitellmModel
|
|
4
|
+
from config import settings
|
|
5
|
+
|
|
6
|
+
class SearchResultItem(BaseModel):
|
|
7
|
+
title: str = Field(description="search item title")
|
|
8
|
+
description: str = Field(description="search item description")
|
|
9
|
+
source_url: str = Field(description="search item source url")
|
|
10
|
+
engine:str = Field(description="search engine name")
|
|
11
|
+
|
|
12
|
+
class SearchResult(BaseModel):
|
|
13
|
+
items: list[SearchResultItem]
|
|
14
|
+
|
|
15
|
+
INSTRCUTION = """
|
|
16
|
+
You are a Search Result Formatter Agent.
|
|
17
|
+
|
|
18
|
+
Your task is to extract structured search result items from unstructured or semi-structured search result content written in Markdown.
|
|
19
|
+
|
|
20
|
+
Each result must be extracted as a SearchResultItem object with the following fields:
|
|
21
|
+
- title: The clickable title of the result (typically found in bold or header formatting).
|
|
22
|
+
- description: A concise summary or snippet describing the content of the result.
|
|
23
|
+
- source_url: The original source URL (typically found in parentheses or Markdown links).
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
🧪 Input Format:
|
|
28
|
+
You will receive search results formatted in Markdown, for example:
|
|
29
|
+
|
|
30
|
+
- **Gold Price Today - Kitco**
|
|
31
|
+
[https://www.kitco.com/gold-price-today](https://www.kitco.com/gold-price-today)
|
|
32
|
+
Live spot gold prices and charts. Get historical data and news updates on XAUUSD.
|
|
33
|
+
|
|
34
|
+
- **Shanghai Gold Exchange Daily Price**
|
|
35
|
+
[https://www.sge.com.cn](https://www.sge.com.cn)
|
|
36
|
+
Official gold trading prices published by the Shanghai Gold Exchange.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
✅ Extraction Rules:
|
|
41
|
+
1. For each search engine group (e.g., Bing, Baidu), extract **at most the top 3 results**.
|
|
42
|
+
2. Extract each result as one `SearchResultItem`.
|
|
43
|
+
3. If a result has multiple lines, group them based on proximity (title + description + link).
|
|
44
|
+
4. Ignore Markdown formatting — only extract plain text content.
|
|
45
|
+
5. If any field is missing, skip the entry.
|
|
46
|
+
6. Return a **JSON array** of SearchResultItem objects.
|
|
47
|
+
"""
|
|
48
|
+
result_format_agent = Agent(
|
|
49
|
+
name="ResultFormatAgent",
|
|
50
|
+
instructions=INSTRCUTION,
|
|
51
|
+
model=LitellmModel(model=f"openai/{settings.llm_model_name}", base_url=settings.llm_base_url, api_key=settings.llm_api_key),
|
|
52
|
+
output_type=SearchResult
|
|
53
|
+
)
|