@youhaozhao/cninfo-mcp 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/bin/cninfo-mcp.js +91 -49
- package/package.json +1 -1
- package/python/__pycache__/spider.cpython-314.pyc +0 -0
- package/python/mcp_server.py +6 -2
- package/python/spider.py +120 -65
- package/scripts/install-python-deps.js +29 -4
package/README.md
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
在 Claude Desktop / Claude Code 配置文件中添加:
|
|
10
10
|
|
|
11
11
|
**macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
12
|
+
|
|
12
13
|
**Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
|
|
13
14
|
|
|
14
15
|
```json
|
|
@@ -48,3 +49,4 @@
|
|
|
48
49
|
## Credits
|
|
49
50
|
|
|
50
51
|
爬虫逻辑基于 [gaodechen/cninfo_process](https://github.com/gaodechen/cninfo_process)。
|
|
52
|
+
|
package/bin/cninfo-mcp.js
CHANGED
|
@@ -5,22 +5,45 @@
|
|
|
5
5
|
* 自动检测 Python 并安装依赖,然后启动 Python MCP 服务器。
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
const { spawn } = require(
|
|
9
|
-
const path = require(
|
|
10
|
-
const fs = require(
|
|
8
|
+
const { spawn } = require("child_process");
|
|
9
|
+
const path = require("path");
|
|
10
|
+
const fs = require("fs");
|
|
11
|
+
const os = require("os");
|
|
11
12
|
|
|
12
13
|
// 配置路径
|
|
13
|
-
const PYTHON_SCRIPT = path.join(__dirname,
|
|
14
|
-
const PYTHON_REQUIREMENTS = path.join(
|
|
14
|
+
const PYTHON_SCRIPT = path.join(__dirname, "..", "python", "mcp_server.py");
|
|
15
|
+
const PYTHON_REQUIREMENTS = path.join(
|
|
16
|
+
__dirname,
|
|
17
|
+
"..",
|
|
18
|
+
"python",
|
|
19
|
+
"requirements.txt",
|
|
20
|
+
);
|
|
21
|
+
|
|
22
|
+
// 虚拟环境目录,放在用户目录下保证跨 npx 调用持久化
|
|
23
|
+
const VENV_DIR = path.join(os.homedir(), ".cninfo-mcp", "venv");
|
|
24
|
+
|
|
25
|
+
// 获取虚拟环境中的 Python 可执行文件路径
|
|
26
|
+
function getVenvPython() {
|
|
27
|
+
if (process.platform === "win32") {
|
|
28
|
+
return path.join(VENV_DIR, "Scripts", "python.exe");
|
|
29
|
+
}
|
|
30
|
+
return path.join(VENV_DIR, "bin", "python3");
|
|
31
|
+
}
|
|
15
32
|
|
|
16
|
-
//
|
|
33
|
+
// 查找可用的系统 Python 可执行文件(仅用于创建 venv)
|
|
17
34
|
async function findPython() {
|
|
18
|
-
const pythonCommands = [
|
|
35
|
+
const pythonCommands = [
|
|
36
|
+
"python3",
|
|
37
|
+
"python",
|
|
38
|
+
"python3.12",
|
|
39
|
+
"python3.11",
|
|
40
|
+
"python3.10",
|
|
41
|
+
];
|
|
19
42
|
|
|
20
43
|
for (const cmd of pythonCommands) {
|
|
21
44
|
try {
|
|
22
|
-
const result = await spawnAsync(cmd, [
|
|
23
|
-
if (result.stdout && result.stdout.includes(
|
|
45
|
+
const result = await spawnAsync(cmd, ["--version"]);
|
|
46
|
+
if (result.stdout && result.stdout.includes("Python")) {
|
|
24
47
|
return cmd;
|
|
25
48
|
}
|
|
26
49
|
} catch (error) {
|
|
@@ -29,38 +52,57 @@ async function findPython() {
|
|
|
29
52
|
}
|
|
30
53
|
|
|
31
54
|
throw new Error(
|
|
32
|
-
|
|
33
|
-
|
|
55
|
+
"Python not found. Please install Python 3.10+ from https://python.org\n" +
|
|
56
|
+
"After installation, restart your terminal and try again.",
|
|
34
57
|
);
|
|
35
58
|
}
|
|
36
59
|
|
|
37
|
-
//
|
|
38
|
-
async function
|
|
60
|
+
// 创建虚拟环境(如果不存在)
|
|
61
|
+
async function ensureVenv(systemPythonCmd) {
|
|
62
|
+
const venvPython = getVenvPython();
|
|
63
|
+
if (fs.existsSync(venvPython)) {
|
|
64
|
+
return venvPython;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
console.error("Creating Python virtual environment...");
|
|
68
|
+
fs.mkdirSync(path.dirname(VENV_DIR), { recursive: true });
|
|
69
|
+
await spawnAsync(systemPythonCmd, ["-m", "venv", VENV_DIR], {
|
|
70
|
+
stdio: "inherit",
|
|
71
|
+
});
|
|
72
|
+
console.error("Virtual environment created\n");
|
|
73
|
+
return venvPython;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// 检查并安装 Python 依赖(使用 venv 中的 python)
|
|
77
|
+
async function ensureDependencies(venvPython) {
|
|
39
78
|
const requirementsPath = PYTHON_REQUIREMENTS;
|
|
40
79
|
|
|
41
80
|
if (!fs.existsSync(requirementsPath)) {
|
|
42
|
-
console.error(
|
|
81
|
+
console.error("Error: requirements.txt not found at", requirementsPath);
|
|
43
82
|
process.exit(1);
|
|
44
83
|
}
|
|
45
84
|
|
|
46
85
|
try {
|
|
47
86
|
// 检查 mcp 包是否已安装
|
|
48
|
-
|
|
87
|
+
await spawnAsync(venvPython, ["-c", "import mcp"]);
|
|
49
88
|
} catch (error) {
|
|
50
89
|
// 未安装,执行安装
|
|
51
|
-
console.error(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
90
|
+
console.error("Installing Python dependencies...");
|
|
91
|
+
try {
|
|
92
|
+
await spawnAsync(
|
|
93
|
+
venvPython,
|
|
94
|
+
["-m", "pip", "install", "-r", requirementsPath],
|
|
95
|
+
{
|
|
96
|
+
stdio: "inherit",
|
|
97
|
+
},
|
|
98
|
+
);
|
|
99
|
+
console.error("Python dependencies installed successfully\n");
|
|
100
|
+
} catch (installError) {
|
|
101
|
+
console.error("\n❌ Failed to install Python dependencies");
|
|
102
|
+
console.error("Please run manually:");
|
|
103
|
+
console.error(` ${venvPython} -m pip install -r ${requirementsPath}`);
|
|
60
104
|
process.exit(1);
|
|
61
105
|
}
|
|
62
|
-
|
|
63
|
-
console.error('✅ Python dependencies installed successfully\n');
|
|
64
106
|
}
|
|
65
107
|
}
|
|
66
108
|
|
|
@@ -68,28 +110,28 @@ async function ensureDependencies(pythonCmd) {
|
|
|
68
110
|
function spawnAsync(command, args, options = {}) {
|
|
69
111
|
return new Promise((resolve, reject) => {
|
|
70
112
|
const child = spawn(command, args, {
|
|
71
|
-
stdio: options.stdio ||
|
|
72
|
-
shell: process.platform ===
|
|
73
|
-
...options
|
|
113
|
+
stdio: options.stdio || "pipe",
|
|
114
|
+
shell: process.platform === "win32",
|
|
115
|
+
...options,
|
|
74
116
|
});
|
|
75
117
|
|
|
76
|
-
let stdout =
|
|
77
|
-
let stderr =
|
|
118
|
+
let stdout = "";
|
|
119
|
+
let stderr = "";
|
|
78
120
|
let code = null;
|
|
79
121
|
|
|
80
122
|
if (child.stdout) {
|
|
81
|
-
child.stdout.on(
|
|
123
|
+
child.stdout.on("data", (data) => {
|
|
82
124
|
stdout += data.toString();
|
|
83
125
|
});
|
|
84
126
|
}
|
|
85
127
|
|
|
86
128
|
if (child.stderr) {
|
|
87
|
-
child.stderr.on(
|
|
129
|
+
child.stderr.on("data", (data) => {
|
|
88
130
|
stderr += data.toString();
|
|
89
131
|
});
|
|
90
132
|
}
|
|
91
133
|
|
|
92
|
-
child.on(
|
|
134
|
+
child.on("close", (exitCode) => {
|
|
93
135
|
code = exitCode;
|
|
94
136
|
if (code === 0) {
|
|
95
137
|
resolve({ stdout, stderr, code });
|
|
@@ -102,7 +144,7 @@ function spawnAsync(command, args, options = {}) {
|
|
|
102
144
|
}
|
|
103
145
|
});
|
|
104
146
|
|
|
105
|
-
child.on(
|
|
147
|
+
child.on("error", (error) => {
|
|
106
148
|
reject(error);
|
|
107
149
|
});
|
|
108
150
|
});
|
|
@@ -112,36 +154,36 @@ async function main() {
|
|
|
112
154
|
try {
|
|
113
155
|
// 检查 Python 脚本是否存在
|
|
114
156
|
if (!fs.existsSync(PYTHON_SCRIPT)) {
|
|
115
|
-
console.error(
|
|
157
|
+
console.error("Error: mcp_server.py not found at", PYTHON_SCRIPT);
|
|
116
158
|
process.exit(1);
|
|
117
159
|
}
|
|
118
160
|
|
|
119
|
-
const
|
|
120
|
-
await
|
|
161
|
+
const systemPython = await findPython();
|
|
162
|
+
const venvPython = await ensureVenv(systemPython);
|
|
163
|
+
await ensureDependencies(venvPython);
|
|
121
164
|
|
|
122
165
|
// 启动 MCP 服务器
|
|
123
|
-
console.error(
|
|
124
|
-
const child = spawn(
|
|
125
|
-
stdio:
|
|
126
|
-
shell: process.platform ===
|
|
166
|
+
console.error("巨潮资讯 MCP 服务器已启动,等待连接...");
|
|
167
|
+
const child = spawn(venvPython, [PYTHON_SCRIPT], {
|
|
168
|
+
stdio: "inherit",
|
|
169
|
+
shell: process.platform === "win32",
|
|
127
170
|
env: {
|
|
128
171
|
...process.env,
|
|
129
|
-
PYTHONPATH: path.join(__dirname,
|
|
130
|
-
}
|
|
172
|
+
PYTHONPATH: path.join(__dirname, "..", "python"),
|
|
173
|
+
},
|
|
131
174
|
});
|
|
132
175
|
|
|
133
176
|
// 处理子进程退出
|
|
134
|
-
child.on(
|
|
135
|
-
console.error(
|
|
177
|
+
child.on("error", (error) => {
|
|
178
|
+
console.error("Failed to start MCP Server:", error.message);
|
|
136
179
|
process.exit(1);
|
|
137
180
|
});
|
|
138
181
|
|
|
139
|
-
child.on(
|
|
182
|
+
child.on("exit", (code) => {
|
|
140
183
|
process.exit(code || 0);
|
|
141
184
|
});
|
|
142
|
-
|
|
143
185
|
} catch (error) {
|
|
144
|
-
console.error(
|
|
186
|
+
console.error("Error:", error.message);
|
|
145
187
|
process.exit(1);
|
|
146
188
|
}
|
|
147
189
|
}
|
package/package.json
CHANGED
|
Binary file
|
package/python/mcp_server.py
CHANGED
|
@@ -96,7 +96,9 @@ def query_annual_reports_tool(stock_code: str, year: Optional[int] = None) -> di
|
|
|
96
96
|
|
|
97
97
|
|
|
98
98
|
@mcp.tool()
|
|
99
|
-
def download_annual_reports_tool(
|
|
99
|
+
def download_annual_reports_tool(
|
|
100
|
+
stock_code: str, year: Optional[int] = None, save_path: Optional[str] = None
|
|
101
|
+
) -> dict:
|
|
100
102
|
"""
|
|
101
103
|
Download annual reports for a Chinese listed company
|
|
102
104
|
|
|
@@ -170,7 +172,9 @@ def query_prospectus_tool(stock_code: str) -> dict:
|
|
|
170
172
|
"announcementTime": r.get("announcementTime", ""),
|
|
171
173
|
"secCode": r.get("secCode", ""),
|
|
172
174
|
"secName": r.get("secName", ""),
|
|
173
|
-
"adjunctUrl": base_url + r.get("adjunctUrl", "")
|
|
175
|
+
"adjunctUrl": base_url + r.get("adjunctUrl", "")
|
|
176
|
+
if r.get("adjunctUrl")
|
|
177
|
+
else "",
|
|
174
178
|
}
|
|
175
179
|
for r in reports
|
|
176
180
|
]
|
package/python/spider.py
CHANGED
|
@@ -3,9 +3,12 @@
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import datetime
|
|
6
|
+
import logging
|
|
6
7
|
import os
|
|
7
8
|
import random
|
|
9
|
+
import re
|
|
8
10
|
import time
|
|
11
|
+
from typing import Optional, Union
|
|
9
12
|
|
|
10
13
|
import requests
|
|
11
14
|
|
|
@@ -13,6 +16,7 @@ download_path = "https://static.cninfo.com.cn/"
|
|
|
13
16
|
# 使用脚本所在目录的相对路径
|
|
14
17
|
_saving_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pdf")
|
|
15
18
|
saving_path = _saving_path + "/"
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
16
20
|
|
|
17
21
|
User_Agent = [
|
|
18
22
|
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
|
|
@@ -25,7 +29,7 @@ User_Agent = [
|
|
|
25
29
|
]
|
|
26
30
|
|
|
27
31
|
|
|
28
|
-
|
|
32
|
+
BASE_HEADERS = {
|
|
29
33
|
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
30
34
|
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
|
31
35
|
"Accept-Encoding": "gzip, deflate",
|
|
@@ -37,10 +41,64 @@ headers = {
|
|
|
37
41
|
}
|
|
38
42
|
|
|
39
43
|
|
|
44
|
+
def _build_headers() -> dict:
|
|
45
|
+
"""构造请求头,避免在并发场景下修改全局字典。"""
|
|
46
|
+
headers = BASE_HEADERS.copy()
|
|
47
|
+
headers["User-Agent"] = random.choice(User_Agent)
|
|
48
|
+
return headers
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _date_range(start_date: str) -> str:
|
|
52
|
+
"""构造查询时间区间,结束日期取当天,避免硬编码过期。"""
|
|
53
|
+
datetime.datetime.strptime(start_date, "%Y-%m-%d")
|
|
54
|
+
today = datetime.date.today().strftime("%Y-%m-%d")
|
|
55
|
+
return f"{start_date}~{today}"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _is_annual_report_title(
|
|
59
|
+
title: str, year_filter: Optional[Union[int, str]] = None
|
|
60
|
+
) -> bool:
|
|
61
|
+
"""
|
|
62
|
+
判断标题是否为“年度报告正文”。
|
|
63
|
+
|
|
64
|
+
支持常见变体:
|
|
65
|
+
- 2024年年度报告
|
|
66
|
+
- 2024年度报告
|
|
67
|
+
- 2024年报
|
|
68
|
+
"""
|
|
69
|
+
compact_title = re.sub(r"\s+", "", title or "")
|
|
70
|
+
|
|
71
|
+
# 非正文公告关键词过滤
|
|
72
|
+
exclude_keywords = [
|
|
73
|
+
"摘要",
|
|
74
|
+
"确认意见",
|
|
75
|
+
"取消",
|
|
76
|
+
"更正",
|
|
77
|
+
"补充",
|
|
78
|
+
"说明",
|
|
79
|
+
"提示",
|
|
80
|
+
"致歉",
|
|
81
|
+
"修订",
|
|
82
|
+
"英文",
|
|
83
|
+
]
|
|
84
|
+
if any(keyword in compact_title for keyword in exclude_keywords):
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
year_expr = re.escape(str(year_filter)) if year_filter is not None else r"\d{4}"
|
|
88
|
+
suffix_expr = r"(?:[((]更新后[))])?"
|
|
89
|
+
patterns = [
|
|
90
|
+
rf".*{year_expr}年年度报告{suffix_expr}",
|
|
91
|
+
rf".*{year_expr}年度报告{suffix_expr}",
|
|
92
|
+
]
|
|
93
|
+
if year_filter is not None:
|
|
94
|
+
patterns.append(rf".*{year_expr}年报{suffix_expr}")
|
|
95
|
+
|
|
96
|
+
return any(re.fullmatch(pattern, compact_title) for pattern in patterns)
|
|
97
|
+
|
|
98
|
+
|
|
40
99
|
# 深市 年度报告
|
|
41
100
|
def szseAnnual(page, stock):
|
|
42
101
|
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
43
|
-
headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
|
|
44
102
|
query = {
|
|
45
103
|
"pageNum": page, # 页码
|
|
46
104
|
"pageSize": 30,
|
|
@@ -52,10 +110,12 @@ def szseAnnual(page, stock):
|
|
|
52
110
|
"plate": "sz",
|
|
53
111
|
"category": "category_ndbg_szsh", # 年度报告
|
|
54
112
|
"trade": "",
|
|
55
|
-
"seDate": "2020-01-01
|
|
113
|
+
"seDate": _date_range("2020-01-01"), # 时间区间
|
|
56
114
|
}
|
|
57
115
|
|
|
58
|
-
namelist = requests.post(
|
|
116
|
+
namelist = requests.post(
|
|
117
|
+
query_path, headers=_build_headers(), data=query, timeout=30
|
|
118
|
+
)
|
|
59
119
|
result = namelist.json()
|
|
60
120
|
if result and "announcements" in result and result["announcements"]:
|
|
61
121
|
return result["announcements"]
|
|
@@ -65,7 +125,6 @@ def szseAnnual(page, stock):
|
|
|
65
125
|
# 沪市 年度报告
|
|
66
126
|
def sseAnnual(page, stock):
|
|
67
127
|
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
68
|
-
headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
|
|
69
128
|
query = {
|
|
70
129
|
"pageNum": page, # 页码
|
|
71
130
|
"pageSize": 30,
|
|
@@ -77,10 +136,12 @@ def sseAnnual(page, stock):
|
|
|
77
136
|
"plate": "sh",
|
|
78
137
|
"category": "category_ndbg_szsh", # 年度报告
|
|
79
138
|
"trade": "",
|
|
80
|
-
"seDate": "2020-01-01
|
|
139
|
+
"seDate": _date_range("2020-01-01"), # 时间区间
|
|
81
140
|
}
|
|
82
141
|
|
|
83
|
-
namelist = requests.post(
|
|
142
|
+
namelist = requests.post(
|
|
143
|
+
query_path, headers=_build_headers(), data=query, timeout=30
|
|
144
|
+
)
|
|
84
145
|
result = namelist.json()
|
|
85
146
|
if result and "announcements" in result and result["announcements"]:
|
|
86
147
|
return result["announcements"]
|
|
@@ -90,7 +151,6 @@ def sseAnnual(page, stock):
|
|
|
90
151
|
# 深市 招股
|
|
91
152
|
def szseStock(page, stock):
|
|
92
153
|
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
93
|
-
headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
|
|
94
154
|
query = {
|
|
95
155
|
"pageNum": page, # 页码
|
|
96
156
|
"pageSize": 30,
|
|
@@ -102,10 +162,12 @@ def szseStock(page, stock):
|
|
|
102
162
|
"plate": "sz",
|
|
103
163
|
"category": "",
|
|
104
164
|
"trade": "",
|
|
105
|
-
"seDate": "2015-01-01
|
|
165
|
+
"seDate": _date_range("2015-01-01"), # 时间区间
|
|
106
166
|
}
|
|
107
167
|
|
|
108
|
-
namelist = requests.post(
|
|
168
|
+
namelist = requests.post(
|
|
169
|
+
query_path, headers=_build_headers(), data=query, timeout=30
|
|
170
|
+
)
|
|
109
171
|
result = namelist.json()
|
|
110
172
|
if result and "announcements" in result and result["announcements"]:
|
|
111
173
|
return result["announcements"]
|
|
@@ -115,7 +177,6 @@ def szseStock(page, stock):
|
|
|
115
177
|
# 沪市 招股
|
|
116
178
|
def sseStock(page, stock):
|
|
117
179
|
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
118
|
-
headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
|
|
119
180
|
query = {
|
|
120
181
|
"pageNum": page, # 页码
|
|
121
182
|
"pageSize": 30,
|
|
@@ -127,44 +188,27 @@ def sseStock(page, stock):
|
|
|
127
188
|
"plate": "sh",
|
|
128
189
|
"category": "",
|
|
129
190
|
"trade": "",
|
|
130
|
-
"seDate": "2015-01-01
|
|
191
|
+
"seDate": _date_range("2015-01-01"), # 时间区间
|
|
131
192
|
}
|
|
132
193
|
|
|
133
|
-
namelist = requests.post(
|
|
194
|
+
namelist = requests.post(
|
|
195
|
+
query_path, headers=_build_headers(), data=query, timeout=30
|
|
196
|
+
)
|
|
134
197
|
result = namelist.json()
|
|
135
198
|
if result and "announcements" in result and result["announcements"]:
|
|
136
199
|
return result["announcements"]
|
|
137
200
|
return []
|
|
138
201
|
|
|
139
202
|
|
|
140
|
-
def Download(
|
|
203
|
+
def Download(
|
|
204
|
+
single_page,
|
|
205
|
+
year_filter: Optional[Union[int, str]] = None,
|
|
206
|
+
save_path: Optional[str] = None,
|
|
207
|
+
):
|
|
141
208
|
"""下载公告列表中的 PDF 文件"""
|
|
142
209
|
if single_page is None:
|
|
143
210
|
return
|
|
144
211
|
|
|
145
|
-
headers = {
|
|
146
|
-
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
147
|
-
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
|
148
|
-
"Accept-Encoding": "gzip, deflate",
|
|
149
|
-
"Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-HK;q=0.6,zh-TW;q=0.5",
|
|
150
|
-
"Host": "www.cninfo.com.cn",
|
|
151
|
-
"Origin": "http://www.cninfo.com.cn",
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
# 按年份筛选允许下载的标题
|
|
155
|
-
allowed_list = []
|
|
156
|
-
if year_filter:
|
|
157
|
-
allowed_list = [
|
|
158
|
-
f"{year_filter}年年度报告(更新后)",
|
|
159
|
-
f"{year_filter}年年度报告",
|
|
160
|
-
]
|
|
161
|
-
else:
|
|
162
|
-
# 默认下载 2015 年至当前年份
|
|
163
|
-
current_year = datetime.datetime.now().year
|
|
164
|
-
for year in range(2015, current_year + 1):
|
|
165
|
-
allowed_list.append(f"{year}年年度报告(更新后)")
|
|
166
|
-
allowed_list.append(f"{year}年年度报告")
|
|
167
|
-
|
|
168
212
|
allowed_list_2 = [
|
|
169
213
|
"招股书",
|
|
170
214
|
"招股说明书",
|
|
@@ -181,20 +225,13 @@ def Download(single_page, year_filter=None, save_path=None):
|
|
|
181
225
|
if "确认意见" in title or "取消" in title or "摘要" in title:
|
|
182
226
|
continue
|
|
183
227
|
|
|
184
|
-
#
|
|
185
|
-
|
|
186
|
-
for item in allowed_list:
|
|
187
|
-
if title == item:
|
|
188
|
-
allowed = True
|
|
189
|
-
break
|
|
228
|
+
# 年报标题匹配:支持“2024年年度报告/2024年度报告/2024年报”等变体
|
|
229
|
+
is_annual_report = _is_annual_report_title(title, year_filter=year_filter)
|
|
190
230
|
|
|
191
231
|
# 检查招股书
|
|
192
|
-
for item in allowed_list_2
|
|
193
|
-
if item in title:
|
|
194
|
-
allowed = True
|
|
195
|
-
break
|
|
232
|
+
is_prospectus = any(item in title for item in allowed_list_2)
|
|
196
233
|
|
|
197
|
-
if
|
|
234
|
+
if is_annual_report or is_prospectus:
|
|
198
235
|
download = download_path + i["adjunctUrl"]
|
|
199
236
|
name = (
|
|
200
237
|
i["secCode"]
|
|
@@ -209,19 +246,19 @@ def Download(single_page, year_filter=None, save_path=None):
|
|
|
209
246
|
file_path = output_dir + name
|
|
210
247
|
|
|
211
248
|
# 显示下载进度
|
|
212
|
-
|
|
249
|
+
logger.info("↓ %s", name)
|
|
213
250
|
|
|
214
251
|
# 确保目录存在
|
|
215
252
|
os.makedirs(output_dir, exist_ok=True)
|
|
216
253
|
|
|
217
254
|
time.sleep(random.random() * 2)
|
|
218
255
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
256
|
+
r = requests.get(
|
|
257
|
+
download, headers={"User-Agent": random.choice(User_Agent)}, timeout=30
|
|
258
|
+
)
|
|
259
|
+
r.raise_for_status()
|
|
260
|
+
with open(file_path, "wb") as f:
|
|
261
|
+
f.write(r.content)
|
|
225
262
|
downloaded_count += 1
|
|
226
263
|
else:
|
|
227
264
|
continue
|
|
@@ -237,17 +274,18 @@ def query_prospectus(stock_code):
|
|
|
237
274
|
announcements_sse = sseStock(1, stock_code)
|
|
238
275
|
all_announcements.extend(announcements_sse)
|
|
239
276
|
except Exception as e:
|
|
240
|
-
|
|
277
|
+
logger.warning("沪市招股书查询失败: %s", e)
|
|
241
278
|
|
|
242
279
|
try:
|
|
243
280
|
announcements_szse = szseStock(1, stock_code)
|
|
244
281
|
all_announcements.extend(announcements_szse)
|
|
245
282
|
except Exception as e:
|
|
246
|
-
|
|
283
|
+
logger.warning("深市招股书查询失败: %s", e)
|
|
247
284
|
|
|
248
285
|
prospectus_keywords = ["招股书", "招股说明书", "招股意向书"]
|
|
249
286
|
filtered = [
|
|
250
|
-
a
|
|
287
|
+
a
|
|
288
|
+
for a in all_announcements
|
|
251
289
|
if any(kw in a.get("announcementTitle", "") for kw in prospectus_keywords)
|
|
252
290
|
]
|
|
253
291
|
|
|
@@ -288,21 +326,30 @@ def query_annual_reports(stock_code, year=None):
|
|
|
288
326
|
announcements_sse = sseAnnual(1, stock_code)
|
|
289
327
|
all_announcements.extend(announcements_sse)
|
|
290
328
|
except Exception as e:
|
|
291
|
-
|
|
329
|
+
logger.warning("沪市年报查询失败: %s", e)
|
|
292
330
|
|
|
293
331
|
# 查询深市
|
|
294
332
|
try:
|
|
295
333
|
announcements_szse = szseAnnual(1, stock_code)
|
|
296
334
|
all_announcements.extend(announcements_szse)
|
|
297
335
|
except Exception as e:
|
|
298
|
-
|
|
336
|
+
logger.warning("深市年报查询失败: %s", e)
|
|
299
337
|
|
|
300
338
|
# 按年份过滤
|
|
301
339
|
if year:
|
|
302
|
-
|
|
340
|
+
year_expr = re.escape(str(year))
|
|
341
|
+
year_patterns = [
|
|
342
|
+
rf"{year_expr}年年度报告",
|
|
343
|
+
rf"{year_expr}年度报告",
|
|
344
|
+
rf"{year_expr}年报",
|
|
345
|
+
]
|
|
303
346
|
filtered = []
|
|
304
347
|
for announcement in all_announcements:
|
|
305
|
-
|
|
348
|
+
title = re.sub(r"\s+", "", announcement.get("announcementTitle", ""))
|
|
349
|
+
# 这里故意使用宽松匹配作为“预筛选”以保留候选项。
|
|
350
|
+
# 真正的严格判定(fullmatch + 排除词)在 Download() 的
|
|
351
|
+
# _is_annual_report_title() 中执行,形成两层防线。
|
|
352
|
+
if any(re.search(pattern, title) for pattern in year_patterns):
|
|
306
353
|
filtered.append(announcement)
|
|
307
354
|
all_announcements = filtered
|
|
308
355
|
|
|
@@ -337,17 +384,22 @@ def download_annual_reports(stock_code, year=None, save_path=None):
|
|
|
337
384
|
|
|
338
385
|
|
|
339
386
|
def Run(page_number, stock):
|
|
387
|
+
annual_report = []
|
|
388
|
+
stock_report = []
|
|
389
|
+
annual_report_ = []
|
|
390
|
+
stock_report_ = []
|
|
391
|
+
|
|
340
392
|
try:
|
|
341
393
|
annual_report = szseAnnual(page_number, stock)
|
|
342
394
|
stock_report = szseStock(page_number, stock)
|
|
343
395
|
annual_report_ = sseAnnual(page_number, stock)
|
|
344
396
|
stock_report_ = sseStock(page_number, stock)
|
|
345
397
|
except Exception:
|
|
346
|
-
|
|
398
|
+
logger.warning("%s page error, retrying", page_number)
|
|
347
399
|
try:
|
|
348
400
|
annual_report = szseAnnual(page_number, stock)
|
|
349
401
|
except Exception:
|
|
350
|
-
|
|
402
|
+
logger.warning("%s page error", page_number)
|
|
351
403
|
Download(annual_report)
|
|
352
404
|
Download(stock_report)
|
|
353
405
|
Download(annual_report_)
|
|
@@ -355,9 +407,12 @@ def Run(page_number, stock):
|
|
|
355
407
|
|
|
356
408
|
|
|
357
409
|
if __name__ == "__main__":
|
|
410
|
+
logging.basicConfig(
|
|
411
|
+
level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
|
|
412
|
+
)
|
|
358
413
|
with open("company_id.txt") as file:
|
|
359
414
|
lines = file.readlines()
|
|
360
415
|
for line in lines:
|
|
361
416
|
stock = line
|
|
362
417
|
Run(1, line)
|
|
363
|
-
|
|
418
|
+
logger.info("%s done", line.strip())
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
const { spawn } = require("child_process");
|
|
8
8
|
const fs = require("fs");
|
|
9
9
|
const path = require("path");
|
|
10
|
+
const os = require("os");
|
|
10
11
|
|
|
11
12
|
const REQUIREMENTS_FILE = path.join(
|
|
12
13
|
__dirname,
|
|
@@ -15,6 +16,15 @@ const REQUIREMENTS_FILE = path.join(
|
|
|
15
16
|
"requirements.txt",
|
|
16
17
|
);
|
|
17
18
|
|
|
19
|
+
const VENV_DIR = path.join(os.homedir(), ".cninfo-mcp", "venv");
|
|
20
|
+
|
|
21
|
+
function getVenvPython() {
|
|
22
|
+
if (process.platform === "win32") {
|
|
23
|
+
return path.join(VENV_DIR, "Scripts", "python.exe");
|
|
24
|
+
}
|
|
25
|
+
return path.join(VENV_DIR, "bin", "python3");
|
|
26
|
+
}
|
|
27
|
+
|
|
18
28
|
async function findPython() {
|
|
19
29
|
const pythonCommands = [
|
|
20
30
|
"python3",
|
|
@@ -75,16 +85,31 @@ async function main() {
|
|
|
75
85
|
return;
|
|
76
86
|
}
|
|
77
87
|
|
|
88
|
+
// 创建虚拟环境(如果不存在)
|
|
89
|
+
const venvPython = getVenvPython();
|
|
90
|
+
if (!fs.existsSync(venvPython)) {
|
|
91
|
+
console.log("Creating Python virtual environment...");
|
|
92
|
+
try {
|
|
93
|
+
fs.mkdirSync(path.dirname(VENV_DIR), { recursive: true });
|
|
94
|
+
await spawnCommand(pythonCmd, ["-m", "venv", VENV_DIR]);
|
|
95
|
+
console.log("Virtual environment created");
|
|
96
|
+
} catch (venvError) {
|
|
97
|
+
console.warn(" Failed to create virtual environment during npm install");
|
|
98
|
+
console.warn(" It will be created automatically on first run");
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
78
103
|
try {
|
|
79
|
-
// 检查 mcp
|
|
80
|
-
await spawnCommand(
|
|
104
|
+
// 检查 mcp 是否已安装(用 venv 的 python)
|
|
105
|
+
await spawnCommand(venvPython, ["-c", "import mcp"]);
|
|
81
106
|
console.log("✅ Python dependencies already installed");
|
|
82
107
|
} catch (error) {
|
|
83
|
-
//
|
|
108
|
+
// 执行安装(用 venv 的 pip)
|
|
84
109
|
console.log("📦 Installing Python dependencies...");
|
|
85
110
|
try {
|
|
86
111
|
await spawnCommand(
|
|
87
|
-
|
|
112
|
+
venvPython,
|
|
88
113
|
["-m", "pip", "install", "-r", REQUIREMENTS_FILE],
|
|
89
114
|
{
|
|
90
115
|
stdio: "inherit",
|