iflow-mcp_howe829-websearch-mcp-server 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
config.py ADDED
@@ -0,0 +1,20 @@
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+
3
+
4
+ class Settings(BaseSettings):
5
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
6
+
7
+ bing_search_base_url: str
8
+ google_search_base_url: str
9
+ cc: str
10
+ language: str
11
+ impersonate: str
12
+ host: str
13
+ port: int
14
+ server_mode: str
15
+ llm_base_url: str
16
+ llm_api_key: str
17
+ llm_model_name: str
18
+
19
+
20
+ settings = Settings()
http_client.py ADDED
@@ -0,0 +1,66 @@
1
+ import asyncio
2
+ import sys
3
+ import nodriver as uc
4
+ from curl_cffi import requests, AsyncSession
5
+ from markitdown import MarkItDown
6
+ from io import BytesIO
7
+ from requests import Request
8
+ from typing import Optional
9
+ from config import settings
10
+
11
+ md = MarkItDown(enable_plugins=False)
12
+
13
+
14
+ class AsyncHttpClient:
15
+ def __init__(self):
16
+ if self.is_windows:
17
+ self._session = requests.Session(impersonate=settings.impersonate)
18
+ else:
19
+ self._session = AsyncSession(impersonate=settings.impersonate)
20
+
21
+ @property
22
+ def is_windows(self):
23
+ return sys.platform.startswith("win")
24
+
25
+ async def _get_by_curl_async(self, url: str, params: Optional[dict]) -> str:
26
+ self._session.acurl.loop = asyncio.get_running_loop()
27
+ response = await self._session.get(url=url, params=params)
28
+ return response.text
29
+
30
+ def _get_by_curl(self, url: str, params: Optional[dict]) -> str:
31
+ response = self._session.get(url=url, params=params)
32
+ return response.text
33
+
34
+ async def _get_by_browser(self, url: str, params: Optional[dict]) -> str:
35
+ driver = await uc.start()
36
+ try:
37
+ req = Request("GET", url, params=params).prepare()
38
+ url = req.url if req.url is not None else url
39
+ tab = await driver.get(url)
40
+ await asyncio.sleep(5)
41
+ text = await tab.get_content()
42
+ return text
43
+ except Exception as e:
44
+ return f"Error when using browser: {str(e)}"
45
+ finally:
46
+ driver.stop()
47
+
48
+ async def get(
49
+ self, url: str, params: Optional[dict] = None, use_browser: bool = False
50
+ ) -> str:
51
+ if use_browser:
52
+ return await self._get_by_browser(url, params)
53
+ if self.is_windows:
54
+ return self._get_by_curl(url, params)
55
+ return await self._get_by_curl_async(url, params)
56
+
57
+ async def get_markdown(
58
+ self, url: str, params: Optional[dict] = None, use_browser: bool = False
59
+ ) -> str:
60
+ text = await self.get(url=url, params=params, use_browser=use_browser)
61
+ buffer = BytesIO(text.encode("utf-8"))
62
+ result = md.convert(buffer)
63
+ return result.text_content
64
+
65
+
66
+ aio_client = AsyncHttpClient()
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.4
2
+ Name: iflow-mcp_howe829-websearch-mcp-server
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: curl-cffi>=0.11.1
8
+ Requires-Dist: fastmcp>=2.5.1
9
+ Requires-Dist: loguru>=0.7.3
10
+ Requires-Dist: markitdown[all]>=0.1.1
11
+ Requires-Dist: nodriver>=0.46.1
12
+ Requires-Dist: openai-agents[litellm]>=0.0.17
13
+ Requires-Dist: pydantic-settings>=2.9.1
14
+ Requires-Dist: pydantic>=2.11.5
15
+ Requires-Dist: pytest-asyncio>=1.0.0
16
+ Requires-Dist: pytest>=8.3.5
17
+ Requires-Dist: ruff>=0.11.12
18
+ Description-Content-Type: text/markdown
19
+
20
+ # WebSearch MCP Server
21
+
22
+ A simple web search server that supports free search and converts URL content to Markdown. [中文](README-zh.md)
23
+
24
+ ## Features
25
+
26
+ - **Web Search:** Perform web searches using different providers. Supported providers: Bing, Baidu, Google(with browser_use).
27
+ - **Markdown Conversion:** Convert the content of a URL into Markdown format.
28
+ - **Wechat Official Account Articles:** Search and retrive wechat official account articles content.
29
+ - **GitHub Search:** Easily search for anything on GitHub — repositories, users, issues, and more.
30
+
31
+ ## Getting Started
32
+
33
+ ### Prerequisites
34
+
35
+ - Python 3.12 or higher
36
+ - uv for dependency management (or pip)
37
+
38
+ ### Installation
39
+
40
+ 1. Clone the repository:
41
+
42
+ ```bash
43
+ git clone https://github.com/Howe829/websearch-mcp-server.git
44
+ cd websearch-mcp-server
45
+ ```
46
+
47
+ 2. Install the dependencies using uv (Recomend):
48
+
49
+ ```bash
50
+ uv venv && uv sync
51
+ ```
52
+
53
+ Or, if you prefer using pip:
54
+
55
+ ```bash
56
+ pip install -r requirements.txt
57
+ ```
58
+
59
+ ### Configuration
60
+
61
+ 1. Create a `.env` file based on the `.env.example` file:
62
+
63
+ ```bash
64
+ cp .env.example .env
65
+ ```
66
+
67
+ 2. Modify the `.env` file with your desired settings:
68
+
69
+ ```
70
+ BING_SEARCH_BASE_URL="https://www.bing.com"
71
+ LANGUAGE="en"
72
+ CC="us"
73
+ IMPERSONATE="edge"
74
+ HOST=127.0.0.1
75
+ PORT=8002
76
+ SERVER_MODE=streamable-http
77
+ ```
78
+
79
+ ### Usage
80
+
81
+ Run the server using uv:
82
+
83
+ ```bash
84
+ uv run python server.py
85
+ ```
86
+
87
+ Or run the server using python:
88
+
89
+ ```bash
90
+ source .venv/bin/activate
91
+ python server.py
92
+ ```
93
+
94
+ ## Contribution
95
+ Contributions are welcome!
96
+
97
+ This project uses pytest for unit tests
98
+ ```bash
99
+ uv pip install pytest
100
+ uv pip install pytest-asyncio
101
+ uv run pytest
102
+ ```
103
+
104
+ And use the ruff for code sytle formatting
105
+ ```bash
106
+ uv pip install ruff
107
+ ruff format .
108
+ ```
109
+
110
+ use mcp inpector to debug this server
111
+ ```bash
112
+ uv run fastmcp dev server.py
113
+ ```
@@ -0,0 +1,21 @@
1
+ config.py,sha256=YJ-ghBsucZW3X4jAUhhklVAAleNt7Sp0K4_jnxdbqXk,431
2
+ http_client.py,sha256=S5n21NcVI27C4nMMgssHFEB3LR8vMGaUqtPAYMkjJvI,2214
3
+ providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ providers/baidu.py,sha256=1Ed0FANU-B4UBMwRZLHs6gbP1s-Wcq2NPkvlygwwMLg,1286
5
+ providers/base.py,sha256=2KrFcQQyemQzxHnyzFyX4Wq4ZXvKkjZMz2L-FcmOalg,652
6
+ providers/bing.py,sha256=UcW5rGClRVwdLUnKgR5ai72hCvtUztFH23HhDdCr1Fs,1468
7
+ providers/enums.py,sha256=ASog8l8ruMDLxBYRb_I2HOQ-kypbKGXKtf6nAcS2Ee0,475
8
+ providers/factory.py,sha256=VLMHjTO6IsSa9nllxcL43ejCsu1rTR4F6xLjVnxGK68,991
9
+ providers/github.py,sha256=PIZcXVvOir9Z57xKOFsDNGNSPE7efpRkl_tQKPb8hrY,539
10
+ providers/google.py,sha256=woJaA-C6MZU8osn5KywNGooSKGduKaZJJmVNxZxTYsM,942
11
+ providers/wechat.py,sha256=n7v5KHiQmrhHtKc4cI7YGNMd4GDp3MFy7XhdTc0PvCM,371
12
+ server.py,sha256=W8UcidG39nqAIzPO_cpaNeohTQ4WGLD5Nki0pbWvLTA,5549
13
+ xagents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ xagents/agent.py,sha256=kKXNywezkhqOUeojP8vEkSk9f1EKtUG8pa3DRmLV6uA,2333
15
+ xagents/query_optimizer.py,sha256=Z8Sz4Lpkyfnl0PoVv6tuXv3eGYBXPFZiZg-rmHu1Vvc,2677
16
+ xagents/result_formatter.py,sha256=le9Nw4WLayUoWZqqvAYjgb1J2BEvXqtCvkEgdYqC018,2195
17
+ iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/METADATA,sha256=_HzWBiojgoItnep9Wmy3AAqv8tzXFjp-44Gqnph1bqU,2458
18
+ iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
19
+ iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/entry_points.txt,sha256=iILKixFfn48v6KB6N8J_cw2DaW4eU6fdzHqhg5XYLDc,53
20
+ iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/licenses/LICENSE,sha256=sWqgeExPhDaW2FhUEKSl7rsxIfMzSgo8Nj8i0ilWnJs,1072
21
+ iflow_mcp_howe829_websearch_mcp_server-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ websearch-mcp-server = server:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) [2025] [Howard Chan]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
providers/__init__.py ADDED
File without changes
providers/baidu.py ADDED
@@ -0,0 +1,35 @@
1
+ from providers.base import BaseWebSearchProvider
2
+
3
+
4
+ class BaiduSearch(BaseWebSearchProvider):
5
+ def _get_url(self):
6
+ return "https://www.baidu.com/s"
7
+
8
+ def _get_params(self, query: str, **kwargs) -> dict:
9
+ params = {
10
+ "wd": query,
11
+ }
12
+ return params
13
+
14
+ @property
15
+ def description(self):
16
+ return """
17
+ Strengths:
18
+ - Strong understanding of Chinese language and entity recognition
19
+ - Better coverage of local Chinese content (government, regional news, company sites)
20
+ - Excellent performance with brand names and trending topics
21
+ - Fast, responsive search experience
22
+
23
+ Weaknesses:
24
+ - Weak support for English-language queries
25
+ - Limited support for structured queries (e.g., Boolean logic, site:, filetype:)
26
+ - Poor international or technical content retrieval compared to Bing or Google
27
+
28
+ Query Strategy:
29
+ - For Chinese: Use natural language phrasing with brand or location keywords (e.g., “今日金价 上海黄金交易所”)
30
+ - Avoid complex English queries; translate and rephrase into Chinese where possible
31
+ - Emphasize relevance over syntax — focus on intent expression
32
+ """
33
+
34
+
35
+ baidu_search = BaiduSearch()
providers/base.py ADDED
@@ -0,0 +1,21 @@
1
+ from abc import abstractmethod
2
+ from pydantic import BaseModel
3
+ from config import settings
4
+ from http_client import aio_client
5
+
6
+
7
+ class BaseWebSearchProvider(BaseModel):
8
+ @abstractmethod
9
+ def _get_url(self) -> str:
10
+ raise NotImplementedError
11
+
12
+ @abstractmethod
13
+ def _get_params(self, query: str, **kwargs) -> dict:
14
+ raise NotImplementedError
15
+
16
+ async def search(self, query: str, use_browser: bool = False, **kwargs) -> str:
17
+ url = self._get_url()
18
+ params = self._get_params(query=query, **kwargs)
19
+ return await aio_client.get_markdown(
20
+ url=url, params=params, use_browser=use_browser
21
+ )
providers/bing.py ADDED
@@ -0,0 +1,41 @@
1
+ from providers.base import BaseWebSearchProvider
2
+ from config import settings
3
+
4
+
5
+ class BingSearch(BaseWebSearchProvider):
6
+ """
7
+ please check out https://www.bing.com/account/general for more information
8
+ """
9
+
10
+ def _get_url(self):
11
+ return f"{settings.bing_search_base_url}/search"
12
+
13
+ def _get_params(self, query: str, **kwargs) -> dict:
14
+ params = {
15
+ "q": query,
16
+ "cc": kwargs.get("cc") or settings.cc,
17
+ "setlang": kwargs.get("lang") or settings.language,
18
+ }
19
+ return params
20
+
21
+ @property
22
+ def description(self):
23
+ return """
24
+ Strengths:
25
+ - Excellent support for English queries
26
+ - Handles structured search operators well (site:, filetype:, before:, after:, etc.)
27
+ - Good at answering natural language questions (especially in English)
28
+ - Strong performance for technical, coding, and international finance topics
29
+
30
+ Weaknesses:
31
+ - Less effective at retrieving localized Chinese content
32
+ - Sometimes inferior Chinese-language result quality compared to Baidu
33
+
34
+ Query Strategy:
35
+ - For English: Use precise keywords + operators (e.g., `gold price today site:kitco.com`)
36
+ - For Chinese: Keep queries concise but add intent-enhancing modifiers (e.g., “实时”, “走势图”)
37
+ - Prefer Bing for technical, financial, or global information needs
38
+ """
39
+
40
+
41
+ bing_search = BingSearch()
providers/enums.py ADDED
@@ -0,0 +1,23 @@
1
+ from enum import Enum
2
+
3
+
4
+ class WebSearchProvidersEnum(str, Enum):
5
+ BAIDU = "baidu"
6
+ BING = "bing"
7
+ WECHAT = "wechat"
8
+ GITHUB = "github"
9
+ GOOGLE = "google"
10
+
11
+
12
+ class GithubSearchTypesEnum(str, Enum):
13
+ REPO = "repositories"
14
+ CODE = "code"
15
+ ISSUE = "issues"
16
+ PR = "pullrequests"
17
+ DISCUSS = "discussions"
18
+ USER = "users"
19
+ COMMIT = "commits"
20
+ PACKAGE = "registrypackages"
21
+ WIKI = "wikis"
22
+ TOPIC = "topics"
23
+ MARKETPLACE = "marketplace"
providers/factory.py ADDED
@@ -0,0 +1,24 @@
1
+ from providers.base import BaseWebSearchProvider
2
+ from providers.enums import WebSearchProvidersEnum
3
+ from providers.bing import bing_search
4
+ from providers.wechat import wechat_search
5
+ from providers.github import github_search
6
+ from providers.baidu import baidu_search
7
+ from providers.google import google_search
8
+
9
+
10
+ class WebSearchProviderFactory:
11
+ def get_provider(self, provider_name: str) -> BaseWebSearchProvider:
12
+ match provider_name:
13
+ case WebSearchProvidersEnum.BAIDU.value:
14
+ return baidu_search
15
+ case WebSearchProvidersEnum.BING.value:
16
+ return bing_search
17
+ case WebSearchProvidersEnum.WECHAT.value:
18
+ return wechat_search
19
+ case WebSearchProvidersEnum.GITHUB.value:
20
+ return github_search
21
+ case WebSearchProvidersEnum.GOOGLE.value:
22
+ return google_search
23
+ case _:
24
+ raise ValueError(f"Unsupported provider: [{provider_name}]")
providers/github.py ADDED
@@ -0,0 +1,22 @@
1
+ from providers.base import BaseWebSearchProvider
2
+ from providers.enums import GithubSearchTypesEnum
3
+
4
+
5
+ class GithubSearch(BaseWebSearchProvider):
6
+ """
7
+ https://www.github.com
8
+ """
9
+
10
+ def _get_url(self):
11
+ return "https://www.github.com/search"
12
+
13
+ def _get_params(self, query: str, **kwargs) -> dict:
14
+ params = {
15
+ "q": query,
16
+ "type": kwargs.get("type", GithubSearchTypesEnum.REPO.value),
17
+ "p": kwargs.get("page", 1),
18
+ }
19
+ return params
20
+
21
+
22
+ github_search = GithubSearch()
providers/google.py ADDED
@@ -0,0 +1,30 @@
1
+ from providers.base import BaseWebSearchProvider
2
+ from config import settings
3
+ from http_client import aio_client
4
+
5
+
6
+ class GoogleSearch(BaseWebSearchProvider):
7
+ def _get_url(self):
8
+ return f"{settings.google_search_base_url}/search"
9
+
10
+ def _get_params(self, query: str, **kwargs) -> dict:
11
+ cc = kwargs.get("cc") or settings.cc
12
+ lang = kwargs.get("lang") or settings.language
13
+ params = {
14
+ "q": query,
15
+ "lr": f"lang_{lang}",
16
+ "cr": f"country{cc.upper()}",
17
+ }
18
+ return params
19
+
20
+ async def search(self, query: str, **kwargs) -> str:
21
+ url = self._get_url()
22
+ params = self._get_params(query=query, **kwargs)
23
+ return await aio_client.get_markdown(
24
+ url=url,
25
+ params=params,
26
+ use_browser=True, # google search must use browser to query, due to it's strictly anti-bot pilocy
27
+ )
28
+
29
+
30
+ google_search = GoogleSearch()
providers/wechat.py ADDED
@@ -0,0 +1,17 @@
1
+ from providers.base import BaseWebSearchProvider
2
+
3
+
4
+ class WechatSearch(BaseWebSearchProvider):
5
+ """
6
+ https://weixin.sogou.com
7
+ """
8
+
9
+ def _get_url(self):
10
+ return "https://weixin.sogou.com/weixin"
11
+
12
+ def _get_params(self, query: str, **kwargs) -> dict:
13
+ params = {"query": query, "type": 2}
14
+ return params
15
+
16
+
17
+ wechat_search = WechatSearch()
server.py ADDED
@@ -0,0 +1,211 @@
1
+ import re
2
+ import asyncio
3
+ from fastmcp import FastMCP
4
+ from urllib.parse import urljoin
5
+ from typing import Literal
6
+
7
+ from providers.factory import WebSearchProviderFactory
8
+ from providers.enums import WebSearchProvidersEnum, GithubSearchTypesEnum
9
+ from http_client import aio_client
10
+ from config import settings
11
+ from xagents.agent import agent_search, SearchResult
12
+
13
+ server = FastMCP("WebSearch MCP Server")
14
+
15
+ provider_factory = WebSearchProviderFactory()
16
+
17
+ @server.tool(name="AgentSearch")
18
+ async def agent_search_tool(
19
+ query: str,
20
+ ) -> SearchResult:
21
+ """
22
+ Perform a agentic search
23
+
24
+ Args:
25
+ query: The search query.
26
+
27
+ Returns:
28
+ instance of SearchResult
29
+ """
30
+ return await agent_search.search(query)
31
+
32
+
33
+ @server.tool(name="BingSearch")
34
+ async def bing_search(
35
+ query: str,
36
+ cc: str = "us",
37
+ lang: str = "en",
38
+ use_browser: bool = False,
39
+ ) -> str:
40
+ """
41
+ Perform a bing search.
42
+
43
+ Args:
44
+ query: The search query.
45
+ cc: Country/Region code for example: us, cn, jp, etc.
46
+ lang: Language such as en, zh-CN, ja, etc
47
+ use_browser: Whether to use a browser to query, you must try use_browser=False first
48
+
49
+ Returns:
50
+ Search result in markdown syntax.
51
+ """
52
+
53
+ engine = provider_factory.get_provider(
54
+ provider_name=WebSearchProvidersEnum.BING.value
55
+ )
56
+ result = await engine.search(query=query, use_browser=use_browser, cc=cc, lang=lang)
57
+ return result
58
+
59
+
60
+ @server.tool(name="BaiduSearch")
61
+ async def baidu_search(
62
+ query: str,
63
+ use_browser: bool = False,
64
+ ) -> str:
65
+ """
66
+ Perform a baidu search.
67
+
68
+ Args:
69
+ query: The search query.
70
+ use_browser: Whether to use a browser to query, you must try use_browser=False first
71
+
72
+ Returns:
73
+ Search result in markdown syntax.
74
+ """
75
+
76
+ engine = provider_factory.get_provider(
77
+ provider_name=WebSearchProvidersEnum.BAIDU.value
78
+ )
79
+ result = await engine.search(query=query, use_browser=use_browser)
80
+ return result
81
+
82
+
83
+ @server.tool(name="GoogleSearch")
84
+ async def websearch(
85
+ query: str,
86
+ cc: str = "us",
87
+ lang: str = "en",
88
+ ) -> str:
89
+ """
90
+ Perform a google search.
91
+
92
+ Args:
93
+ query: The search query.
94
+ cc: Country/Region code for example: us, cn, jp, etc.
95
+ lang: Language such as en, zh-CN, ja, etc
96
+
97
+ Returns:
98
+ Search result in markdown syntax.
99
+ """
100
+
101
+ engine = provider_factory.get_provider(
102
+ provider_name=WebSearchProvidersEnum.GOOGLE.value
103
+ )
104
+ result = await engine.search(query=query, cc=cc, lang=lang)
105
+ return result
106
+
107
+
108
+ @server.tool(name="OpenUrl")
109
+ async def open_url(url: str) -> str:
110
+ """
111
+ Open a URL and retrieve its content.
112
+
113
+ Args:
114
+ url: The URL to be opened.
115
+
116
+ Returns:
117
+ Web content in markdown syntax.
118
+ """
119
+ return await aio_client.get_markdown(url)
120
+
121
+
122
+ @server.tool(name="OpenWechatArticleLink")
123
+ async def open_wechat_article_link(link: str) -> dict:
124
+ """
125
+ Open a wechat article link and retrieve its content.
126
+ Remember you have to do the WechatSearch first before you open the link, otherwise it will be failed
127
+
128
+ Args:
129
+ url: The URL to be opened.Generally starts with '/link'
130
+
131
+ Returns:
132
+ Web content in markdown syntax.
133
+ """
134
+ url = urljoin("https://weixin.sogou.com", link)
135
+ result = await aio_client.get(url)
136
+ parts = re.findall(r"url\s*\+=\s*'([^']+)'", result)
137
+ full_url = "".join(parts)
138
+ if not full_url.startswith("https"):
139
+ return {"error": f"bad request with link [{link}]"}
140
+ text = await aio_client.get_markdown(full_url)
141
+ return {"url": full_url, "content": text}
142
+
143
+
144
+ @server.tool(name="WechatSearch")
145
+ async def wechat_search(query: str) -> str:
146
+ """
147
+ Search WeChat Articles
148
+ Args:
149
+ query: search query.
150
+
151
+ Returns:
152
+ Search result in markdown syntax.
153
+ """
154
+
155
+ engine = provider_factory.get_provider(
156
+ provider_name=WebSearchProvidersEnum.WECHAT.value
157
+ )
158
+ result = await engine.search(query=query)
159
+ return result
160
+
161
+
162
+ @server.tool(name="GithubSearch")
163
+ async def github_search(
164
+ query: str,
165
+ type: Literal[
166
+ GithubSearchTypesEnum.REPO,
167
+ GithubSearchTypesEnum.CODE,
168
+ GithubSearchTypesEnum.ISSUE,
169
+ GithubSearchTypesEnum.PR,
170
+ GithubSearchTypesEnum.DISCUSS,
171
+ GithubSearchTypesEnum.USER,
172
+ GithubSearchTypesEnum.COMMIT,
173
+ GithubSearchTypesEnum.PACKAGE,
174
+ GithubSearchTypesEnum.WIKI,
175
+ GithubSearchTypesEnum.TOPIC,
176
+ GithubSearchTypesEnum.MARKETPLACE,
177
+ ],
178
+ page: int = 1,
179
+ ) -> str:
180
+ """
181
+ Search All Github
182
+ Args:
183
+ query: search query.
184
+ type: search type support: repositoris, code, issues, pullrequests, users, discussions, commits, packages, wikis, topics, marketplace.
185
+ page: pagination param default is 1
186
+
187
+ Returns:
188
+ Search result in markdown syntax.
189
+ """
190
+
191
+ engine = provider_factory.get_provider(
192
+ provider_name=WebSearchProvidersEnum.GITHUB.value
193
+ )
194
+ result = await engine.search(query=query, type=type, page=page)
195
+ return result
196
+
197
+
198
+ async def main():
199
+ match settings.server_mode:
200
+ case "stdio":
201
+ await server.run_async(transport=settings.server_mode)
202
+ case "sse" | "streamable-http":
203
+ await server.run_async(
204
+ transport=settings.server_mode, host=settings.host, port=settings.port
205
+ )
206
+ case _:
207
+ raise ValueError(f"Unsupported server mode [{settings.server_mode}]")
208
+
209
+
210
+ if __name__ == "__main__":
211
+ asyncio.run(main())
xagents/__init__.py ADDED
File without changes
xagents/agent.py ADDED
@@ -0,0 +1,59 @@
1
+ from agents import Runner
2
+ from pydantic import BaseModel
3
+ from asyncio import TaskGroup
4
+ from loguru import logger
5
+
6
+ from xagents.query_optimizer import query_opt_agent, WebSearchPlan, WebSearchItem
7
+ from xagents.result_formatter import result_format_agent, SearchResult
8
+ from providers.factory import WebSearchProviderFactory
9
+
10
+
11
+ class RawSearchResult(BaseModel):
12
+ engine_name: str
13
+ content: str
14
+
15
+
16
+ class AgentSearch:
17
+ async def _optimize_query(self, query: str) -> WebSearchPlan:
18
+ result = await Runner.run(input=query, starting_agent=query_opt_agent)
19
+ return result.final_output_as(WebSearchPlan)
20
+
21
+ async def _perform_search(self, item: WebSearchItem) -> RawSearchResult:
22
+ engine = WebSearchProviderFactory().get_provider(item.engine_name)
23
+ result = await engine.search(query=item.query, use_browser=True)
24
+ return RawSearchResult(engine_name=item.engine_name, content=result)
25
+
26
+ async def _format_result(self, raw_result: RawSearchResult) -> SearchResult:
27
+ result = await Runner.run(
28
+ input=raw_result.model_dump_json(), starting_agent=result_format_agent
29
+ )
30
+ return result.final_output_as(SearchResult)
31
+
32
+ async def _execute_search_plan(self, plan: WebSearchPlan) -> list[RawSearchResult]:
33
+ tasks = []
34
+ async with TaskGroup() as tg:
35
+ for search_item in plan.searches:
36
+ task = tg.create_task(self._perform_search(search_item))
37
+ tasks.append(task)
38
+
39
+ results: list[RawSearchResult] = [t.result() for t in tasks]
40
+ return results
41
+
42
+ def _merge_search_results(self, search_results: list[SearchResult]) -> SearchResult:
43
+ items = []
44
+ for r in search_results:
45
+ items.extend(r.items)
46
+ return SearchResult(items=items)
47
+
48
+ async def search(self, query: str) -> SearchResult:
49
+ search_plan = await self._optimize_query(query=query)
50
+ raw_results = await self._execute_search_plan(plan=search_plan)
51
+ tasks = []
52
+ async with TaskGroup() as tg:
53
+ for raw_result in raw_results:
54
+ task = tg.create_task(self._format_result(raw_result))
55
+ tasks.append(task)
56
+ results: list[SearchResult] = [t.result() for t in tasks]
57
+ return self._merge_search_results(results)
58
+
59
+ agent_search = AgentSearch()
@@ -0,0 +1,72 @@
1
+ from agents import Agent
2
+ from config import settings
3
+ from agents.extensions.models.litellm_model import LitellmModel
4
+ from pydantic import BaseModel, Field
5
+
6
+
7
+ class WebSearchItem(BaseModel):
8
+ engine_name: str = Field(
9
+ description="websearch engine name in lowercase eg: bing, baidu"
10
+ )
11
+ query: str = Field(description="optimized query")
12
+
13
+
14
+ class WebSearchPlan(BaseModel):
15
+ searches: list[WebSearchItem]
16
+
17
+
18
+ INSTRUCTION = """
19
+ You are a Query Optimizer Agent.
20
+ Your task is to reformulate user-provided search queries to maximize search effectiveness, based on the selected search engine and the query's language.
21
+ You must understand and adapt to the strengths and limitations of each search engine, and generate optimized queries that match their search capabilities.
22
+ ---
23
+ 🔍 Bing (Microsoft Search Engine)
24
+ - Strengths:
25
+ - Excellent for English-language queries.
26
+ - Supports advanced operators like `site:`, `filetype:`, `intitle:`, `before:`, `after:`.
27
+ - Well-suited for technical, financial, and global information.
28
+ - Handles both keyword and natural language queries.
29
+
30
+ - Query Optimization Strategy:
31
+ - For English queries, use precise keywords + advanced operators.
32
+ - For Chinese queries, use concise keywords + add clarity-enhancing modifiers (e.g., 实时, 走势).
33
+ - Add context (e.g., date, topic scope) to disambiguate broad queries.
34
+ ---
35
+
36
+ 🔍 Baidu (Chinese Search Engine)
37
+
38
+ - Strengths:
39
+ - Strong Chinese-language understanding and entity recognition.
40
+ - Excellent access to local Chinese content, government sources, brands, and services.
41
+ - Excels in natural-language, brand-aware queries.
42
+
43
+ - Weaknesses:
44
+ - Poor support for English queries and advanced operators.
45
+ - Limited support for Boolean/structured syntax.
46
+
47
+ - Query Optimization Strategy:
48
+ - Use natural, fluent Chinese queries.
49
+ - Add brand names, location terms, or entity types to improve precision.
50
+ - Avoid using English or complex operators. Translate English concepts into Chinese and simplify.
51
+
52
+ ---
53
+
54
+ 🎯 Optimization Rules:
55
+
56
+ - Detect the language of the original query.
57
+ - Match query complexity to search engine capabilities.
58
+ - Add specificity (brand, time, location) and intent indicators (e.g., "实时", "technical analysis", "forecast").
59
+ - Avoid redundant words or vague terms.
60
+ - Your goal is to maximize the relevance and precision of search results for the target engine.
61
+ """
62
+
63
+ query_opt_agent = Agent(
64
+ name="Query Optimizer Agent",
65
+ instructions=INSTRUCTION,
66
+ model=LitellmModel(
67
+ model=f"openai/{settings.llm_model_name}",
68
+ base_url=settings.llm_base_url,
69
+ api_key=settings.llm_api_key,
70
+ ),
71
+ output_type=WebSearchPlan,
72
+ )
@@ -0,0 +1,53 @@
1
+ from pydantic import BaseModel, Field
2
+ from agents import Agent
3
+ from agents.extensions.models.litellm_model import LitellmModel
4
+ from config import settings
5
+
6
+ class SearchResultItem(BaseModel):
7
+ title: str = Field(description="search item title")
8
+ description: str = Field(description="search item description")
9
+ source_url: str = Field(description="search item source url")
10
+ engine:str = Field(description="search engine name")
11
+
12
+ class SearchResult(BaseModel):
13
+ items: list[SearchResultItem]
14
+
15
+ INSTRCUTION = """
16
+ You are a Search Result Formatter Agent.
17
+
18
+ Your task is to extract structured search result items from unstructured or semi-structured search result content written in Markdown.
19
+
20
+ Each result must be extracted as a SearchResultItem object with the following fields:
21
+ - title: The clickable title of the result (typically found in bold or header formatting).
22
+ - description: A concise summary or snippet describing the content of the result.
23
+ - source_url: The original source URL (typically found in parentheses or Markdown links).
24
+
25
+ ---
26
+
27
+ 🧪 Input Format:
28
+ You will receive search results formatted in Markdown, for example:
29
+
30
+ - **Gold Price Today - Kitco**
31
+ [https://www.kitco.com/gold-price-today](https://www.kitco.com/gold-price-today)
32
+ Live spot gold prices and charts. Get historical data and news updates on XAUUSD.
33
+
34
+ - **Shanghai Gold Exchange Daily Price**
35
+ [https://www.sge.com.cn](https://www.sge.com.cn)
36
+ Official gold trading prices published by the Shanghai Gold Exchange.
37
+
38
+ ---
39
+
40
+ ✅ Extraction Rules:
41
+ 1. For each search engine group (e.g., Bing, Baidu), extract **at most the top 3 results**.
42
+ 2. Extract each result as one `SearchResultItem`.
43
+ 3. If a result has multiple lines, group them based on proximity (title + description + link).
44
+ 4. Ignore Markdown formatting — only extract plain text content.
45
+ 5. If any field is missing, skip the entry.
46
+ 6. Return a **JSON array** of SearchResultItem objects.
47
+ """
48
+ result_format_agent = Agent(
49
+ name="ResultFormatAgent",
50
+ instructions=INSTRCUTION,
51
+ model=LitellmModel(model=f"openai/{settings.llm_model_name}", base_url=settings.llm_base_url, api_key=settings.llm_api_key),
52
+ output_type=SearchResult
53
+ )