mcpcn-weixin-search 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcpcn_weixin_search-0.1.2/PKG-INFO +155 -0
- mcpcn_weixin_search-0.1.2/README.md +130 -0
- mcpcn_weixin_search-0.1.2/mcpcn_weixin_search.egg-info/PKG-INFO +155 -0
- mcpcn_weixin_search-0.1.2/mcpcn_weixin_search.egg-info/SOURCES.txt +11 -0
- mcpcn_weixin_search-0.1.2/mcpcn_weixin_search.egg-info/dependency_links.txt +1 -0
- mcpcn_weixin_search-0.1.2/mcpcn_weixin_search.egg-info/entry_points.txt +2 -0
- mcpcn_weixin_search-0.1.2/mcpcn_weixin_search.egg-info/requires.txt +14 -0
- mcpcn_weixin_search-0.1.2/mcpcn_weixin_search.egg-info/top_level.txt +1 -0
- mcpcn_weixin_search-0.1.2/pyproject.toml +73 -0
- mcpcn_weixin_search-0.1.2/setup.cfg +4 -0
- mcpcn_weixin_search-0.1.2/weixin_search_mcp/__init__.py +5 -0
- mcpcn_weixin_search-0.1.2/weixin_search_mcp/main.py +79 -0
- mcpcn_weixin_search-0.1.2/weixin_search_mcp/tools/weixin_search.py +165 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcpcn-weixin-search
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: 微信公众号内容搜索和获取工具
|
|
5
|
+
Author: mini
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.12
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: mcp[cli]>=1.6.0
|
|
13
|
+
Requires-Dist: fastmcp>=2.10.4
|
|
14
|
+
Requires-Dist: requests>=2.32.3
|
|
15
|
+
Requires-Dist: loguru>=0.7.2
|
|
16
|
+
Requires-Dist: python-dotenv>=1.1.1
|
|
17
|
+
Requires-Dist: fastapi>=0.110.0
|
|
18
|
+
Requires-Dist: uvicorn>=0.34.2
|
|
19
|
+
Requires-Dist: pydantic>=2.0.0
|
|
20
|
+
Requires-Dist: lxml>=4.9.3
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: build>=1.2.2.post1; extra == "dev"
|
|
23
|
+
Requires-Dist: setuptools>=45; extra == "dev"
|
|
24
|
+
Requires-Dist: twine>=6.1.0; extra == "dev"
|
|
25
|
+
|
|
26
|
+
# 微信公众号内容搜索工具 (Weixin Search MCP)
|
|
27
|
+
|
|
28
|
+
这是一个基于MCP (Multimodal Capability Provider)的工具,用于搜索和获取微信公众号文章内容。通过搜狗微信搜索接口,可以方便地获取公众号文章并提取内容。
|
|
29
|
+
|
|
30
|
+
## 功能特点
|
|
31
|
+
|
|
32
|
+
- 通过关键词在搜狗微信搜索中查找公众号文章
|
|
33
|
+
- 自动获取文章的真实链接(从搜狗跳转链接转为微信原始链接)
|
|
34
|
+
- 提取文章的完整正文内容
|
|
35
|
+
- 支持批量获取多篇文章
|
|
36
|
+
|
|
37
|
+
## 安装方法
|
|
38
|
+
|
|
39
|
+
### 环境要求
|
|
40
|
+
|
|
41
|
+
- Python 3.12 或更高版本
|
|
42
|
+
- uv 包管理工具
|
|
43
|
+
|
|
44
|
+
### 安装步骤
|
|
45
|
+
|
|
46
|
+
使用 uv 创建虚拟环境并安装:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# 创建虚拟环境
|
|
50
|
+
uv venv
|
|
51
|
+
|
|
52
|
+
# 激活虚拟环境
|
|
53
|
+
source .venv/bin/activate # Linux/Mac
|
|
54
|
+
# 或者在Windows上:
|
|
55
|
+
# .venv\Scripts\activate
|
|
56
|
+
|
|
57
|
+
# 安装项目
|
|
58
|
+
uv pip install -e .
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## 使用方法
|
|
62
|
+
|
|
63
|
+
### 启动服务
|
|
64
|
+
|
|
65
|
+
启动HTTP服务器:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
weixin_search_mcp --transport http --port 8809 --host 0.0.0.0
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
或者使用标准输入/输出模式(用于与其他应用集成):
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
weixin_search_mcp --transport stdio
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 4. 配置MCP服务
|
|
78
|
+
|
|
79
|
+
有两种方式可以配置和启动MCP服务:
|
|
80
|
+
|
|
81
|
+
#### 方式一:使用stdio协议(直接连接)
|
|
82
|
+
|
|
83
|
+
在Claude配置中添加以下内容:
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
{
|
|
87
|
+
"mcpServers": {
|
|
88
|
+
"weixin_search_mcp": {
|
|
89
|
+
"command": "uvx",
|
|
90
|
+
"args": ["weixin_search_mcp", "--transport", "stdio"]
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
#### 方式二:使用HTTP协议
|
|
97
|
+
|
|
98
|
+
1. 启动HTTP服务:
|
|
99
|
+
|
|
100
|
+
```sh
|
|
101
|
+
uvx weixin_search_mcp --transport http --port 8809
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
2. 在Claude配置中添加以下内容:
|
|
105
|
+
|
|
106
|
+
```json
|
|
107
|
+
{
|
|
108
|
+
"mcpServers": {
|
|
109
|
+
"weixin_search_mcp": {
|
|
110
|
+
"type": "http",
|
|
111
|
+
"url": "http://localhost:8809/mcp"
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## 工具使用说明
|
|
118
|
+
|
|
119
|
+
本项目提供了以下工具来搜索和获取微信公众号内容:
|
|
120
|
+
|
|
121
|
+
### 微信搜索工具
|
|
122
|
+
- **weixin_search**: 在搜狗微信搜索中搜索指定关键词并返回结果列表
|
|
123
|
+
- 参数: `query` - 搜索关键词
|
|
124
|
+
- 返回: 包含标题、链接、真实URL和发布时间的文章列表
|
|
125
|
+
|
|
126
|
+
### 内容获取工具
|
|
127
|
+
- **get_weixin_article_content**: 获取微信公众号文章的正文内容
|
|
128
|
+
- 参数:
|
|
129
|
+
- `real_url` - 真实微信公众号文章链接
|
|
130
|
+
- `referer` - 可选,请求来源,通常为weixin_search返回的链接
|
|
131
|
+
- 返回: 文章正文内容
|
|
132
|
+
|
|
133
|
+
### 使用示例
|
|
134
|
+
|
|
135
|
+
1. 搜索关键词相关的微信公众号文章:
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
results = weixin_search("人工智能")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
2. 获取文章内容:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
article_content = get_weixin_article_content(real_url="https://mp.weixin.qq.com/...", referer="https://weixin.sogou.com/...")
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## 注意事项
|
|
148
|
+
|
|
149
|
+
- 该工具依赖于搜狗微信搜索接口,如果接口变更可能会影响工具功能
|
|
150
|
+
- 请合理控制请求频率,避免被搜狗或微信官方限制访问
|
|
151
|
+
- 获取的内容仅供学习研究使用,请遵守相关法律法规
|
|
152
|
+
|
|
153
|
+
## 许可证
|
|
154
|
+
|
|
155
|
+
MIT
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# 微信公众号内容搜索工具 (Weixin Search MCP)
|
|
2
|
+
|
|
3
|
+
这是一个基于MCP (Multimodal Capability Provider)的工具,用于搜索和获取微信公众号文章内容。通过搜狗微信搜索接口,可以方便地获取公众号文章并提取内容。
|
|
4
|
+
|
|
5
|
+
## 功能特点
|
|
6
|
+
|
|
7
|
+
- 通过关键词在搜狗微信搜索中查找公众号文章
|
|
8
|
+
- 自动获取文章的真实链接(从搜狗跳转链接转为微信原始链接)
|
|
9
|
+
- 提取文章的完整正文内容
|
|
10
|
+
- 支持批量获取多篇文章
|
|
11
|
+
|
|
12
|
+
## 安装方法
|
|
13
|
+
|
|
14
|
+
### 环境要求
|
|
15
|
+
|
|
16
|
+
- Python 3.12 或更高版本
|
|
17
|
+
- uv 包管理工具
|
|
18
|
+
|
|
19
|
+
### 安装步骤
|
|
20
|
+
|
|
21
|
+
使用 uv 创建虚拟环境并安装:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
# 创建虚拟环境
|
|
25
|
+
uv venv
|
|
26
|
+
|
|
27
|
+
# 激活虚拟环境
|
|
28
|
+
source .venv/bin/activate # Linux/Mac
|
|
29
|
+
# 或者在Windows上:
|
|
30
|
+
# .venv\Scripts\activate
|
|
31
|
+
|
|
32
|
+
# 安装项目
|
|
33
|
+
uv pip install -e .
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## 使用方法
|
|
37
|
+
|
|
38
|
+
### 启动服务
|
|
39
|
+
|
|
40
|
+
启动HTTP服务器:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
weixin_search_mcp --transport http --port 8809 --host 0.0.0.0
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
或者使用标准输入/输出模式(用于与其他应用集成):
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
weixin_search_mcp --transport stdio
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### 4. 配置MCP服务
|
|
53
|
+
|
|
54
|
+
有两种方式可以配置和启动MCP服务:
|
|
55
|
+
|
|
56
|
+
#### 方式一:使用stdio协议(直接连接)
|
|
57
|
+
|
|
58
|
+
在Claude配置中添加以下内容:
|
|
59
|
+
|
|
60
|
+
```json
|
|
61
|
+
{
|
|
62
|
+
"mcpServers": {
|
|
63
|
+
"weixin_search_mcp": {
|
|
64
|
+
"command": "uvx",
|
|
65
|
+
"args": ["weixin_search_mcp", "--transport", "stdio"]
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
#### 方式二:使用HTTP协议
|
|
72
|
+
|
|
73
|
+
1. 启动HTTP服务:
|
|
74
|
+
|
|
75
|
+
```sh
|
|
76
|
+
uvx weixin_search_mcp --transport http --port 8809
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
2. 在Claude配置中添加以下内容:
|
|
80
|
+
|
|
81
|
+
```json
|
|
82
|
+
{
|
|
83
|
+
"mcpServers": {
|
|
84
|
+
"weixin_search_mcp": {
|
|
85
|
+
"type": "http",
|
|
86
|
+
"url": "http://localhost:8809/mcp"
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## 工具使用说明
|
|
93
|
+
|
|
94
|
+
本项目提供了以下工具来搜索和获取微信公众号内容:
|
|
95
|
+
|
|
96
|
+
### 微信搜索工具
|
|
97
|
+
- **weixin_search**: 在搜狗微信搜索中搜索指定关键词并返回结果列表
|
|
98
|
+
- 参数: `query` - 搜索关键词
|
|
99
|
+
- 返回: 包含标题、链接、真实URL和发布时间的文章列表
|
|
100
|
+
|
|
101
|
+
### 内容获取工具
|
|
102
|
+
- **get_weixin_article_content**: 获取微信公众号文章的正文内容
|
|
103
|
+
- 参数:
|
|
104
|
+
- `real_url` - 真实微信公众号文章链接
|
|
105
|
+
- `referer` - 可选,请求来源,通常为weixin_search返回的链接
|
|
106
|
+
- 返回: 文章正文内容
|
|
107
|
+
|
|
108
|
+
### 使用示例
|
|
109
|
+
|
|
110
|
+
1. 搜索关键词相关的微信公众号文章:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
results = weixin_search("人工智能")
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
2. 获取文章内容:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
article_content = get_weixin_article_content(real_url="https://mp.weixin.qq.com/...", referer="https://weixin.sogou.com/...")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## 注意事项
|
|
123
|
+
|
|
124
|
+
- 该工具依赖于搜狗微信搜索接口,如果接口变更可能会影响工具功能
|
|
125
|
+
- 请合理控制请求频率,避免被搜狗或微信官方限制访问
|
|
126
|
+
- 获取的内容仅供学习研究使用,请遵守相关法律法规
|
|
127
|
+
|
|
128
|
+
## 许可证
|
|
129
|
+
|
|
130
|
+
MIT
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mcpcn-weixin-search
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: 微信公众号内容搜索和获取工具
|
|
5
|
+
Author: mini
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.12
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
Requires-Dist: mcp[cli]>=1.6.0
|
|
13
|
+
Requires-Dist: fastmcp>=2.10.4
|
|
14
|
+
Requires-Dist: requests>=2.32.3
|
|
15
|
+
Requires-Dist: loguru>=0.7.2
|
|
16
|
+
Requires-Dist: python-dotenv>=1.1.1
|
|
17
|
+
Requires-Dist: fastapi>=0.110.0
|
|
18
|
+
Requires-Dist: uvicorn>=0.34.2
|
|
19
|
+
Requires-Dist: pydantic>=2.0.0
|
|
20
|
+
Requires-Dist: lxml>=4.9.3
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: build>=1.2.2.post1; extra == "dev"
|
|
23
|
+
Requires-Dist: setuptools>=45; extra == "dev"
|
|
24
|
+
Requires-Dist: twine>=6.1.0; extra == "dev"
|
|
25
|
+
|
|
26
|
+
# 微信公众号内容搜索工具 (Weixin Search MCP)
|
|
27
|
+
|
|
28
|
+
这是一个基于MCP (Multimodal Capability Provider)的工具,用于搜索和获取微信公众号文章内容。通过搜狗微信搜索接口,可以方便地获取公众号文章并提取内容。
|
|
29
|
+
|
|
30
|
+
## 功能特点
|
|
31
|
+
|
|
32
|
+
- 通过关键词在搜狗微信搜索中查找公众号文章
|
|
33
|
+
- 自动获取文章的真实链接(从搜狗跳转链接转为微信原始链接)
|
|
34
|
+
- 提取文章的完整正文内容
|
|
35
|
+
- 支持批量获取多篇文章
|
|
36
|
+
|
|
37
|
+
## 安装方法
|
|
38
|
+
|
|
39
|
+
### 环境要求
|
|
40
|
+
|
|
41
|
+
- Python 3.12 或更高版本
|
|
42
|
+
- uv 包管理工具
|
|
43
|
+
|
|
44
|
+
### 安装步骤
|
|
45
|
+
|
|
46
|
+
使用 uv 创建虚拟环境并安装:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# 创建虚拟环境
|
|
50
|
+
uv venv
|
|
51
|
+
|
|
52
|
+
# 激活虚拟环境
|
|
53
|
+
source .venv/bin/activate # Linux/Mac
|
|
54
|
+
# 或者在Windows上:
|
|
55
|
+
# .venv\Scripts\activate
|
|
56
|
+
|
|
57
|
+
# 安装项目
|
|
58
|
+
uv pip install -e .
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## 使用方法
|
|
62
|
+
|
|
63
|
+
### 启动服务
|
|
64
|
+
|
|
65
|
+
启动HTTP服务器:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
weixin_search_mcp --transport http --port 8809 --host 0.0.0.0
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
或者使用标准输入/输出模式(用于与其他应用集成):
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
weixin_search_mcp --transport stdio
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 4. 配置MCP服务
|
|
78
|
+
|
|
79
|
+
有两种方式可以配置和启动MCP服务:
|
|
80
|
+
|
|
81
|
+
#### 方式一:使用stdio协议(直接连接)
|
|
82
|
+
|
|
83
|
+
在Claude配置中添加以下内容:
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
{
|
|
87
|
+
"mcpServers": {
|
|
88
|
+
"weixin_search_mcp": {
|
|
89
|
+
"command": "uvx",
|
|
90
|
+
"args": ["weixin_search_mcp", "--transport", "stdio"]
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
#### 方式二:使用HTTP协议
|
|
97
|
+
|
|
98
|
+
1. 启动HTTP服务:
|
|
99
|
+
|
|
100
|
+
```sh
|
|
101
|
+
uvx weixin_search_mcp --transport http --port 8809
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
2. 在Claude配置中添加以下内容:
|
|
105
|
+
|
|
106
|
+
```json
|
|
107
|
+
{
|
|
108
|
+
"mcpServers": {
|
|
109
|
+
"weixin_search_mcp": {
|
|
110
|
+
"type": "http",
|
|
111
|
+
"url": "http://localhost:8809/mcp"
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## 工具使用说明
|
|
118
|
+
|
|
119
|
+
本项目提供了以下工具来搜索和获取微信公众号内容:
|
|
120
|
+
|
|
121
|
+
### 微信搜索工具
|
|
122
|
+
- **weixin_search**: 在搜狗微信搜索中搜索指定关键词并返回结果列表
|
|
123
|
+
- 参数: `query` - 搜索关键词
|
|
124
|
+
- 返回: 包含标题、链接、真实URL和发布时间的文章列表
|
|
125
|
+
|
|
126
|
+
### 内容获取工具
|
|
127
|
+
- **get_weixin_article_content**: 获取微信公众号文章的正文内容
|
|
128
|
+
- 参数:
|
|
129
|
+
- `real_url` - 真实微信公众号文章链接
|
|
130
|
+
- `referer` - 可选,请求来源,通常为weixin_search返回的链接
|
|
131
|
+
- 返回: 文章正文内容
|
|
132
|
+
|
|
133
|
+
### 使用示例
|
|
134
|
+
|
|
135
|
+
1. 搜索关键词相关的微信公众号文章:
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
results = weixin_search("人工智能")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
2. 获取文章内容:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
article_content = get_weixin_article_content(real_url="https://mp.weixin.qq.com/...", referer="https://weixin.sogou.com/...")
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## 注意事项
|
|
148
|
+
|
|
149
|
+
- 该工具依赖于搜狗微信搜索接口,如果接口变更可能会影响工具功能
|
|
150
|
+
- 请合理控制请求频率,避免被搜狗或微信官方限制访问
|
|
151
|
+
- 获取的内容仅供学习研究使用,请遵守相关法律法规
|
|
152
|
+
|
|
153
|
+
## 许可证
|
|
154
|
+
|
|
155
|
+
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
mcpcn_weixin_search.egg-info/PKG-INFO
|
|
4
|
+
mcpcn_weixin_search.egg-info/SOURCES.txt
|
|
5
|
+
mcpcn_weixin_search.egg-info/dependency_links.txt
|
|
6
|
+
mcpcn_weixin_search.egg-info/entry_points.txt
|
|
7
|
+
mcpcn_weixin_search.egg-info/requires.txt
|
|
8
|
+
mcpcn_weixin_search.egg-info/top_level.txt
|
|
9
|
+
weixin_search_mcp/__init__.py
|
|
10
|
+
weixin_search_mcp/main.py
|
|
11
|
+
weixin_search_mcp/tools/weixin_search.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
weixin_search_mcp
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mcpcn-weixin-search"
|
|
3
|
+
version = "0.1.2"
|
|
4
|
+
description = "微信公众号内容搜索和获取工具"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
authors = [
|
|
9
|
+
{name = "mini"}
|
|
10
|
+
]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Programming Language :: Python :: 3",
|
|
13
|
+
"Programming Language :: Python :: 3.12",
|
|
14
|
+
"Operating System :: OS Independent",
|
|
15
|
+
]
|
|
16
|
+
dependencies = [
|
|
17
|
+
"mcp[cli]>=1.6.0",
|
|
18
|
+
"fastmcp>=2.10.4",
|
|
19
|
+
"requests>=2.32.3",
|
|
20
|
+
"loguru>=0.7.2",
|
|
21
|
+
"python-dotenv>=1.1.1",
|
|
22
|
+
"fastapi>=0.110.0",
|
|
23
|
+
"uvicorn>=0.34.2",
|
|
24
|
+
"pydantic>=2.0.0",
|
|
25
|
+
"lxml>=4.9.3"
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.scripts]
|
|
29
|
+
weixin_search_mcp = "weixin_search_mcp.main:app"
|
|
30
|
+
|
|
31
|
+
[build-system]
|
|
32
|
+
requires = ["setuptools>=45", "wheel"]
|
|
33
|
+
build-backend = "setuptools.build_meta"
|
|
34
|
+
|
|
35
|
+
[tool.setuptools]
|
|
36
|
+
packages = ["weixin_search_mcp", "weixin_search_mcp.tools"]
|
|
37
|
+
include-package-data = true
|
|
38
|
+
|
|
39
|
+
[tool.setuptools.package-data]
|
|
40
|
+
"weixin_search_mcp.tools" = ["*.js"]
|
|
41
|
+
|
|
42
|
+
[project.optional-dependencies]
|
|
43
|
+
dev = [
|
|
44
|
+
"build>=1.2.2.post1",
|
|
45
|
+
"setuptools>=45",
|
|
46
|
+
"twine>=6.1.0",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
[tool.black]
|
|
50
|
+
line-length = 88
|
|
51
|
+
target-version = ["py312"]
|
|
52
|
+
|
|
53
|
+
[tool.isort]
|
|
54
|
+
profile = "black"
|
|
55
|
+
line_length = 88
|
|
56
|
+
|
|
57
|
+
[tool.mypy]
|
|
58
|
+
python_version = "3.12.1"
|
|
59
|
+
disallow_untyped_defs = true
|
|
60
|
+
disallow_incomplete_defs = true
|
|
61
|
+
check_untyped_defs = true
|
|
62
|
+
disallow_untyped_decorators = true
|
|
63
|
+
no_implicit_optional = true
|
|
64
|
+
strict_optional = true
|
|
65
|
+
warn_redundant_casts = true
|
|
66
|
+
warn_return_any = true
|
|
67
|
+
warn_unused_ignores = true
|
|
68
|
+
|
|
69
|
+
[dependency-groups]
|
|
70
|
+
dev = [
|
|
71
|
+
"build>=1.3.0",
|
|
72
|
+
]
|
|
73
|
+
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from typing import Annotated, Any, List, Dict, Optional
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
import os
|
|
6
|
+
from fastmcp import FastMCP
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
|
|
9
|
+
from pydantic import Field
|
|
10
|
+
import requests
|
|
11
|
+
from loguru import logger
|
|
12
|
+
from urllib.parse import urlparse, parse_qs
|
|
13
|
+
import argparse
|
|
14
|
+
|
|
15
|
+
# 导入工具函数
|
|
16
|
+
from weixin_search_mcp.tools.weixin_search import sogou_weixin_search, get_real_url, get_article_content
|
|
17
|
+
|
|
18
|
+
# 配置日志
|
|
19
|
+
def setup_logger(log_level="INFO"):
|
|
20
|
+
"""设置日志配置"""
|
|
21
|
+
logger.remove()
|
|
22
|
+
logger.add(
|
|
23
|
+
sys.stderr,
|
|
24
|
+
level=log_level,
|
|
25
|
+
format= "<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <4}</level> | <cyan>using_function:{function}</cyan> | <cyan>{file}:{line}</cyan> | <level>{message}</level>"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
setup_logger(log_level="INFO")
|
|
29
|
+
|
|
30
|
+
parser = argparse.ArgumentParser()
|
|
31
|
+
|
|
32
|
+
parser.add_argument("--transport", type=str, default='http')
|
|
33
|
+
parser.add_argument("--port", type=int, default=8809)
|
|
34
|
+
parser.add_argument("--host", type=str, default='0.0.0.0')
|
|
35
|
+
|
|
36
|
+
args = parser.parse_args()
|
|
37
|
+
|
|
38
|
+
mcp = FastMCP("微信公众号内容获取", port=args.port)
|
|
39
|
+
|
|
40
|
+
@mcp.tool
|
|
41
|
+
def weixin_search(query: Annotated[str, "搜索关键词"]) -> List[Dict[str, str]]:
|
|
42
|
+
"""在搜狗微信搜索中搜索指定关键词并返回结果列表
|
|
43
|
+
Args:
|
|
44
|
+
query: 搜索关键词
|
|
45
|
+
Returns:
|
|
46
|
+
List[Dict[str, str]]: 搜索结果列表
|
|
47
|
+
"""
|
|
48
|
+
return sogou_weixin_search(query)
|
|
49
|
+
|
|
50
|
+
@mcp.tool
|
|
51
|
+
def get_weixin_article_content(real_url: Annotated[str, "真实微信公众号文章链接"], referer: Annotated[Optional[str], "请求来源,weixin_search的返回值"]) -> str:
|
|
52
|
+
"""获取微信公众号文章的正文内容
|
|
53
|
+
Args:
|
|
54
|
+
real_url: 真实微信公众号文章链接
|
|
55
|
+
referer: 可选,请求来源,weixin_search的返回值
|
|
56
|
+
Returns:
|
|
57
|
+
str: 微信公众号文章的正文内容
|
|
58
|
+
"""
|
|
59
|
+
return get_article_content(real_url, referer)
|
|
60
|
+
|
|
61
|
+
def app():
|
|
62
|
+
host = args.host
|
|
63
|
+
port = args.port
|
|
64
|
+
transport = args.transport
|
|
65
|
+
try:
|
|
66
|
+
if transport == "http":
|
|
67
|
+
mcp.run(host=host, port=port, transport=transport)
|
|
68
|
+
elif transport == "stdio":
|
|
69
|
+
mcp.run(transport=transport)
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError("不支持的端口形式")
|
|
72
|
+
except KeyboardInterrupt:
|
|
73
|
+
logger.info("Server stopped by user")
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.error(f"Error running server: {str(e)}")
|
|
76
|
+
sys.exit(1)
|
|
77
|
+
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
app()
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import asyncio
|
|
3
|
+
from typing import Annotated, Any, Dict, List, Optional
|
|
4
|
+
import requests
|
|
5
|
+
from lxml import html
|
|
6
|
+
from urllib.parse import quote
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
def sogou_weixin_search(query: Annotated[str, "搜索关键词"]) -> List[Dict[str, str]]:
|
|
10
|
+
"""在搜狗微信搜索中搜索指定关键词并返回结果列表,包含真实URL"""
|
|
11
|
+
headers = {
|
|
12
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
13
|
+
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
14
|
+
'Cache-Control': 'no-cache',
|
|
15
|
+
'Connection': 'keep-alive',
|
|
16
|
+
'Pragma': 'no-cache',
|
|
17
|
+
'Referer': f'https://weixin.sogou.com/weixin?query={quote(query)}',
|
|
18
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0',
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
params = {
|
|
22
|
+
'type': '2',
|
|
23
|
+
's_from': 'input',
|
|
24
|
+
'query': query,
|
|
25
|
+
'ie': 'utf8',
|
|
26
|
+
'_sug_': 'n',
|
|
27
|
+
'_sug_type_': '',
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
response = requests.get('https://weixin.sogou.com/weixin', params=params, headers=headers)
|
|
32
|
+
|
|
33
|
+
if response.status_code == 200:
|
|
34
|
+
tree = html.fromstring(response.text)
|
|
35
|
+
results = []
|
|
36
|
+
|
|
37
|
+
elements = tree.xpath("//a[contains(@id, 'sogou_vr_11002601_title_')]")
|
|
38
|
+
publish_time = tree.xpath(
|
|
39
|
+
"//li[contains(@id, 'sogou_vr_11002601_box_')]/div[@class='txt-box']/div[@class='s-p']/span[@class='s2']")
|
|
40
|
+
|
|
41
|
+
for element, time_elem in zip(elements, publish_time):
|
|
42
|
+
title = element.text_content().strip()
|
|
43
|
+
link = element.get('href')
|
|
44
|
+
if link and not link.startswith('http'):
|
|
45
|
+
link = 'https://weixin.sogou.com' + link
|
|
46
|
+
|
|
47
|
+
# 获取真实URL
|
|
48
|
+
real_url = ""
|
|
49
|
+
try:
|
|
50
|
+
real_url = get_real_url_from_sogou(link)
|
|
51
|
+
except Exception:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
results.append({
|
|
55
|
+
'title': title,
|
|
56
|
+
'link': link,
|
|
57
|
+
'real_url': real_url,
|
|
58
|
+
'publish_time': time_elem.text_content().strip()
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
return results
|
|
62
|
+
else:
|
|
63
|
+
return []
|
|
64
|
+
except Exception as e:
|
|
65
|
+
return []
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_real_url_from_sogou(sogou_url: str) -> str:
|
|
69
|
+
"""从搜狗微信链接获取真实的微信公众号文章链接"""
|
|
70
|
+
headers = {
|
|
71
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
72
|
+
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
73
|
+
'Cache-Control': 'no-cache',
|
|
74
|
+
'Connection': 'keep-alive',
|
|
75
|
+
'Pragma': 'no-cache',
|
|
76
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0',
|
|
77
|
+
'Cookie': 'ABTEST=7|1750756616|v1; SUID=0A5BF4788E52A20B00000000685A6D08; IPLOC=CN1100; SUID=605BF4783954A20B00000000685A6D08; SUV=006817F578F45BFE685A6D0B913DA642; SNUID=B3E34CC0B8BF80F5737E3561B9B78454; ariaDefaultTheme=undefined',
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
response = requests.get(sogou_url, headers=headers)
|
|
82
|
+
|
|
83
|
+
script_content = response.text
|
|
84
|
+
start_index = script_content.find("url += '") + len("url += '")
|
|
85
|
+
url_parts = []
|
|
86
|
+
while True:
|
|
87
|
+
part_start = script_content.find("url += '", start_index)
|
|
88
|
+
if part_start == -1:
|
|
89
|
+
break
|
|
90
|
+
part_end = script_content.find("'", part_start + len("url += '"))
|
|
91
|
+
part = script_content[part_start + len("url += '"):part_end]
|
|
92
|
+
url_parts.append(part)
|
|
93
|
+
start_index = part_end + 1
|
|
94
|
+
|
|
95
|
+
full_url = ''.join(url_parts).replace("@", "")
|
|
96
|
+
return "https://mp." + full_url
|
|
97
|
+
except Exception as e:
|
|
98
|
+
return ""
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_real_url(sogou_url: Annotated[str, "搜狗微信链接,来自于sogou_weixin_search工具结果"]) -> str:
|
|
102
|
+
"""从搜狗微信链接获取真实的微信公众号文章链接"""
|
|
103
|
+
return get_real_url_from_sogou(sogou_url)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_article_content(real_url: Annotated[str, "真实微信公众号文章链接"], referer: Annotated[Optional[str], "请求来源,get_real_url的返回值"]) -> str:
|
|
107
|
+
"""获取微信公众号文章的正文内容"""
|
|
108
|
+
headers = {
|
|
109
|
+
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
110
|
+
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
111
|
+
'cache-control': 'no-cache',
|
|
112
|
+
'pragma': 'no-cache',
|
|
113
|
+
'priority': 'u=0, i',
|
|
114
|
+
'referer': referer,
|
|
115
|
+
'sec-ch-ua': '"Microsoft Edge";v="137", "Chromium";v="137", "Not/A)Brand";v="24"',
|
|
116
|
+
'sec-ch-ua-mobile': '?0',
|
|
117
|
+
'sec-ch-ua-platform': '"Windows"',
|
|
118
|
+
'sec-fetch-dest': 'document',
|
|
119
|
+
'sec-fetch-mode': 'navigate',
|
|
120
|
+
'sec-fetch-site': 'cross-site',
|
|
121
|
+
'sec-fetch-user': '?1',
|
|
122
|
+
'upgrade-insecure-requests': '1',
|
|
123
|
+
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36 Edg/137.0.0.0',
|
|
124
|
+
}
|
|
125
|
+
if not referer:
|
|
126
|
+
headers.pop('referer')
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
response = requests.get(real_url, headers=headers)
|
|
130
|
+
tree = html.fromstring(response.text)
|
|
131
|
+
content_elements = tree.xpath("//div[@id='js_content']//text()")
|
|
132
|
+
cleaned_content = [text.strip() for text in content_elements if text.strip()]
|
|
133
|
+
main_content = '\n'.join(cleaned_content)
|
|
134
|
+
return main_content
|
|
135
|
+
except Exception as e:
|
|
136
|
+
return f"获取文章内容失败: {str(e)}"
|
|
137
|
+
|
|
138
|
+
def get_wechat_article(query: str, number=10):
|
|
139
|
+
"""
|
|
140
|
+
获取前10篇文章
|
|
141
|
+
"""
|
|
142
|
+
start_time = time.time()
|
|
143
|
+
results = sogou_weixin_search(query)
|
|
144
|
+
if not results:
|
|
145
|
+
return f"没有搜索到{query}相关的文章"
|
|
146
|
+
articles = []
|
|
147
|
+
results = results[:number]
|
|
148
|
+
for every_result in results:
|
|
149
|
+
sougou_link = every_result["link"]
|
|
150
|
+
real_url = get_real_url(sougou_link)
|
|
151
|
+
# referer:请求来源
|
|
152
|
+
content = get_article_content(real_url, referer=sougou_link)
|
|
153
|
+
article = {
|
|
154
|
+
"title": every_result["title"],
|
|
155
|
+
"publish_time": every_result["publish_time"],
|
|
156
|
+
"real_url": real_url,
|
|
157
|
+
"content": content
|
|
158
|
+
}
|
|
159
|
+
articles.append(article)
|
|
160
|
+
end_time = time.time()
|
|
161
|
+
print(f"关键词{query}相关的文章已经获取完毕,获取到{len(articles)}篇, 耗时{end_time - start_time}秒")
|
|
162
|
+
return articles
|
|
163
|
+
|
|
164
|
+
if __name__ == '__main__':
|
|
165
|
+
get_wechat_article(query="吉利汽车",number=2)
|