@youhaozhao/cninfo-mcp 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,6 +9,7 @@
9
9
  在 Claude Desktop / Claude Code 配置文件中添加:
10
10
 
11
11
  **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
12
+
12
13
  **Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
13
14
 
14
15
  ```json
@@ -48,3 +49,4 @@
48
49
  ## Credits
49
50
 
50
51
  爬虫逻辑基于 [gaodechen/cninfo_process](https://github.com/gaodechen/cninfo_process)。
52
+
package/bin/cninfo-mcp.js CHANGED
@@ -5,22 +5,45 @@
5
5
  * 自动检测 Python 并安装依赖,然后启动 Python MCP 服务器。
6
6
  */
7
7
 
8
- const { spawn } = require('child_process');
9
- const path = require('path');
10
- const fs = require('fs');
8
+ const { spawn } = require("child_process");
9
+ const path = require("path");
10
+ const fs = require("fs");
11
+ const os = require("os");
11
12
 
12
13
  // 配置路径
13
- const PYTHON_SCRIPT = path.join(__dirname, '..', 'python', 'mcp_server.py');
14
- const PYTHON_REQUIREMENTS = path.join(__dirname, '..', 'python', 'requirements.txt');
14
+ const PYTHON_SCRIPT = path.join(__dirname, "..", "python", "mcp_server.py");
15
+ const PYTHON_REQUIREMENTS = path.join(
16
+ __dirname,
17
+ "..",
18
+ "python",
19
+ "requirements.txt",
20
+ );
21
+
22
+ // 虚拟环境目录,放在用户目录下保证跨 npx 调用持久化
23
+ const VENV_DIR = path.join(os.homedir(), ".cninfo-mcp", "venv");
24
+
25
+ // 获取虚拟环境中的 Python 可执行文件路径
26
+ function getVenvPython() {
27
+ if (process.platform === "win32") {
28
+ return path.join(VENV_DIR, "Scripts", "python.exe");
29
+ }
30
+ return path.join(VENV_DIR, "bin", "python3");
31
+ }
15
32
 
16
- // 查找可用的 Python 可执行文件
33
+ // 查找可用的系统 Python 可执行文件(仅用于创建 venv)
17
34
  async function findPython() {
18
- const pythonCommands = ['python3', 'python', 'python3.12', 'python3.11', 'python3.10'];
35
+ const pythonCommands = [
36
+ "python3",
37
+ "python",
38
+ "python3.12",
39
+ "python3.11",
40
+ "python3.10",
41
+ ];
19
42
 
20
43
  for (const cmd of pythonCommands) {
21
44
  try {
22
- const result = await spawnAsync(cmd, ['--version']);
23
- if (result.stdout && result.stdout.includes('Python')) {
45
+ const result = await spawnAsync(cmd, ["--version"]);
46
+ if (result.stdout && result.stdout.includes("Python")) {
24
47
  return cmd;
25
48
  }
26
49
  } catch (error) {
@@ -29,38 +52,57 @@ async function findPython() {
29
52
  }
30
53
 
31
54
  throw new Error(
32
- 'Python not found. Please install Python 3.10+ from https://python.org\n' +
33
- 'After installation, restart your terminal and try again.'
55
+ "Python not found. Please install Python 3.10+ from https://python.org\n" +
56
+ "After installation, restart your terminal and try again.",
34
57
  );
35
58
  }
36
59
 
37
- // 检查并安装 Python 依赖
38
- async function ensureDependencies(pythonCmd) {
60
+ // 创建虚拟环境(如果不存在)
61
+ async function ensureVenv(systemPythonCmd) {
62
+ const venvPython = getVenvPython();
63
+ if (fs.existsSync(venvPython)) {
64
+ return venvPython;
65
+ }
66
+
67
+ console.error("Creating Python virtual environment...");
68
+ fs.mkdirSync(path.dirname(VENV_DIR), { recursive: true });
69
+ await spawnAsync(systemPythonCmd, ["-m", "venv", VENV_DIR], {
70
+ stdio: "inherit",
71
+ });
72
+ console.error("Virtual environment created\n");
73
+ return venvPython;
74
+ }
75
+
76
+ // 检查并安装 Python 依赖(使用 venv 中的 python)
77
+ async function ensureDependencies(venvPython) {
39
78
  const requirementsPath = PYTHON_REQUIREMENTS;
40
79
 
41
80
  if (!fs.existsSync(requirementsPath)) {
42
- console.error('Error: requirements.txt not found at', requirementsPath);
81
+ console.error("Error: requirements.txt not found at", requirementsPath);
43
82
  process.exit(1);
44
83
  }
45
84
 
46
85
  try {
47
86
  // 检查 mcp 包是否已安装
48
- const checkResult = await spawnAsync(pythonCmd, ['-c', 'import mcp']);
87
+ await spawnAsync(venvPython, ["-c", "import mcp"]);
49
88
  } catch (error) {
50
89
  // 未安装,执行安装
51
- console.error('Installing Python dependencies...');
52
- const installResult = await spawnAsync(pythonCmd, ['-m', 'pip', 'install', '-r', requirementsPath], {
53
- stdio: 'inherit'
54
- });
55
-
56
- if (installResult.code !== 0) {
57
- console.error('\n❌ Failed to install Python dependencies');
58
- console.error('Please run manually:');
59
- console.error(` ${pythonCmd} -m pip install -r ${requirementsPath}`);
90
+ console.error("Installing Python dependencies...");
91
+ try {
92
+ await spawnAsync(
93
+ venvPython,
94
+ ["-m", "pip", "install", "-r", requirementsPath],
95
+ {
96
+ stdio: "inherit",
97
+ },
98
+ );
99
+ console.error("Python dependencies installed successfully\n");
100
+ } catch (installError) {
101
+ console.error("\n❌ Failed to install Python dependencies");
102
+ console.error("Please run manually:");
103
+ console.error(` ${venvPython} -m pip install -r ${requirementsPath}`);
60
104
  process.exit(1);
61
105
  }
62
-
63
- console.error('✅ Python dependencies installed successfully\n');
64
106
  }
65
107
  }
66
108
 
@@ -68,28 +110,28 @@ async function ensureDependencies(pythonCmd) {
68
110
  function spawnAsync(command, args, options = {}) {
69
111
  return new Promise((resolve, reject) => {
70
112
  const child = spawn(command, args, {
71
- stdio: options.stdio || 'pipe',
72
- shell: process.platform === 'win32',
73
- ...options
113
+ stdio: options.stdio || "pipe",
114
+ shell: process.platform === "win32",
115
+ ...options,
74
116
  });
75
117
 
76
- let stdout = '';
77
- let stderr = '';
118
+ let stdout = "";
119
+ let stderr = "";
78
120
  let code = null;
79
121
 
80
122
  if (child.stdout) {
81
- child.stdout.on('data', (data) => {
123
+ child.stdout.on("data", (data) => {
82
124
  stdout += data.toString();
83
125
  });
84
126
  }
85
127
 
86
128
  if (child.stderr) {
87
- child.stderr.on('data', (data) => {
129
+ child.stderr.on("data", (data) => {
88
130
  stderr += data.toString();
89
131
  });
90
132
  }
91
133
 
92
- child.on('close', (exitCode) => {
134
+ child.on("close", (exitCode) => {
93
135
  code = exitCode;
94
136
  if (code === 0) {
95
137
  resolve({ stdout, stderr, code });
@@ -102,7 +144,7 @@ function spawnAsync(command, args, options = {}) {
102
144
  }
103
145
  });
104
146
 
105
- child.on('error', (error) => {
147
+ child.on("error", (error) => {
106
148
  reject(error);
107
149
  });
108
150
  });
@@ -112,36 +154,36 @@ async function main() {
112
154
  try {
113
155
  // 检查 Python 脚本是否存在
114
156
  if (!fs.existsSync(PYTHON_SCRIPT)) {
115
- console.error('Error: mcp_server.py not found at', PYTHON_SCRIPT);
157
+ console.error("Error: mcp_server.py not found at", PYTHON_SCRIPT);
116
158
  process.exit(1);
117
159
  }
118
160
 
119
- const pythonCmd = await findPython();
120
- await ensureDependencies(pythonCmd);
161
+ const systemPython = await findPython();
162
+ const venvPython = await ensureVenv(systemPython);
163
+ await ensureDependencies(venvPython);
121
164
 
122
165
  // 启动 MCP 服务器
123
- console.error('巨潮资讯 MCP 服务器已启动,等待连接...');
124
- const child = spawn(pythonCmd, [PYTHON_SCRIPT], {
125
- stdio: 'inherit',
126
- shell: process.platform === 'win32',
166
+ console.error("巨潮资讯 MCP 服务器已启动,等待连接...");
167
+ const child = spawn(venvPython, [PYTHON_SCRIPT], {
168
+ stdio: "inherit",
169
+ shell: process.platform === "win32",
127
170
  env: {
128
171
  ...process.env,
129
- PYTHONPATH: path.join(__dirname, '..', 'python')
130
- }
172
+ PYTHONPATH: path.join(__dirname, "..", "python"),
173
+ },
131
174
  });
132
175
 
133
176
  // 处理子进程退出
134
- child.on('error', (error) => {
135
- console.error('Failed to start MCP Server:', error.message);
177
+ child.on("error", (error) => {
178
+ console.error("Failed to start MCP Server:", error.message);
136
179
  process.exit(1);
137
180
  });
138
181
 
139
- child.on('exit', (code) => {
182
+ child.on("exit", (code) => {
140
183
  process.exit(code || 0);
141
184
  });
142
-
143
185
  } catch (error) {
144
- console.error('Error:', error.message);
186
+ console.error("Error:", error.message);
145
187
  process.exit(1);
146
188
  }
147
189
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@youhaozhao/cninfo-mcp",
3
- "version": "1.0.5",
3
+ "version": "1.0.7",
4
4
  "description": "MCP Server for querying and downloading Chinese listed companies' annual reports from CNINFO (巨潮资讯网)",
5
5
  "keywords": [
6
6
  "mcp",
@@ -96,7 +96,9 @@ def query_annual_reports_tool(stock_code: str, year: Optional[int] = None) -> di
96
96
 
97
97
 
98
98
  @mcp.tool()
99
- def download_annual_reports_tool(stock_code: str, year: Optional[int] = None, save_path: Optional[str] = None) -> dict:
99
+ def download_annual_reports_tool(
100
+ stock_code: str, year: Optional[int] = None, save_path: Optional[str] = None
101
+ ) -> dict:
100
102
  """
101
103
  Download annual reports for a Chinese listed company
102
104
 
@@ -170,7 +172,9 @@ def query_prospectus_tool(stock_code: str) -> dict:
170
172
  "announcementTime": r.get("announcementTime", ""),
171
173
  "secCode": r.get("secCode", ""),
172
174
  "secName": r.get("secName", ""),
173
- "adjunctUrl": base_url + r.get("adjunctUrl", "") if r.get("adjunctUrl") else "",
175
+ "adjunctUrl": base_url + r.get("adjunctUrl", "")
176
+ if r.get("adjunctUrl")
177
+ else "",
174
178
  }
175
179
  for r in reports
176
180
  ]
package/python/spider.py CHANGED
@@ -3,9 +3,12 @@
3
3
  """
4
4
 
5
5
  import datetime
6
+ import logging
6
7
  import os
7
8
  import random
9
+ import re
8
10
  import time
11
+ from typing import Optional, Union
9
12
 
10
13
  import requests
11
14
 
@@ -13,6 +16,7 @@ download_path = "https://static.cninfo.com.cn/"
13
16
  # 使用脚本所在目录的相对路径
14
17
  _saving_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pdf")
15
18
  saving_path = _saving_path + "/"
19
+ logger = logging.getLogger(__name__)
16
20
 
17
21
  User_Agent = [
18
22
  "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
@@ -25,7 +29,7 @@ User_Agent = [
25
29
  ]
26
30
 
27
31
 
28
- headers = {
32
+ BASE_HEADERS = {
29
33
  "Accept": "application/json, text/javascript, */*; q=0.01",
30
34
  "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
31
35
  "Accept-Encoding": "gzip, deflate",
@@ -37,10 +41,64 @@ headers = {
37
41
  }
38
42
 
39
43
 
44
+ def _build_headers() -> dict:
45
+ """构造请求头,避免在并发场景下修改全局字典。"""
46
+ headers = BASE_HEADERS.copy()
47
+ headers["User-Agent"] = random.choice(User_Agent)
48
+ return headers
49
+
50
+
51
+ def _date_range(start_date: str) -> str:
52
+ """构造查询时间区间,结束日期取当天,避免硬编码过期。"""
53
+ datetime.datetime.strptime(start_date, "%Y-%m-%d")
54
+ today = datetime.date.today().strftime("%Y-%m-%d")
55
+ return f"{start_date}~{today}"
56
+
57
+
58
+ def _is_annual_report_title(
59
+ title: str, year_filter: Optional[Union[int, str]] = None
60
+ ) -> bool:
61
+ """
62
+ 判断标题是否为“年度报告正文”。
63
+
64
+ 支持常见变体:
65
+ - 2024年年度报告
66
+ - 2024年度报告
67
+ - 2024年报
68
+ """
69
+ compact_title = re.sub(r"\s+", "", title or "")
70
+
71
+ # 非正文公告关键词过滤
72
+ exclude_keywords = [
73
+ "摘要",
74
+ "确认意见",
75
+ "取消",
76
+ "更正",
77
+ "补充",
78
+ "说明",
79
+ "提示",
80
+ "致歉",
81
+ "修订",
82
+ "英文",
83
+ ]
84
+ if any(keyword in compact_title for keyword in exclude_keywords):
85
+ return False
86
+
87
+ year_expr = re.escape(str(year_filter)) if year_filter is not None else r"\d{4}"
88
+ suffix_expr = r"(?:[((]更新后[))])?"
89
+ patterns = [
90
+ rf".*{year_expr}年年度报告{suffix_expr}",
91
+ rf".*{year_expr}年度报告{suffix_expr}",
92
+ ]
93
+ if year_filter is not None:
94
+ patterns.append(rf".*{year_expr}年报{suffix_expr}")
95
+
96
+ return any(re.fullmatch(pattern, compact_title) for pattern in patterns)
97
+
98
+
40
99
  # 深市 年度报告
41
100
  def szseAnnual(page, stock):
42
101
  query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
43
- headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
44
102
  query = {
45
103
  "pageNum": page, # 页码
46
104
  "pageSize": 30,
@@ -52,10 +110,12 @@ def szseAnnual(page, stock):
52
110
  "plate": "sz",
53
111
  "category": "category_ndbg_szsh", # 年度报告
54
112
  "trade": "",
55
- "seDate": "2020-01-01~2026-02-15", # 时间区间
113
+ "seDate": _date_range("2020-01-01"), # 时间区间
56
114
  }
57
115
 
58
- namelist = requests.post(query_path, headers=headers, data=query)
116
+ namelist = requests.post(
117
+ query_path, headers=_build_headers(), data=query, timeout=30
118
+ )
59
119
  result = namelist.json()
60
120
  if result and "announcements" in result and result["announcements"]:
61
121
  return result["announcements"]
@@ -65,7 +125,6 @@ def szseAnnual(page, stock):
65
125
  # 沪市 年度报告
66
126
  def sseAnnual(page, stock):
67
127
  query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
68
- headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
69
128
  query = {
70
129
  "pageNum": page, # 页码
71
130
  "pageSize": 30,
@@ -77,10 +136,12 @@ def sseAnnual(page, stock):
77
136
  "plate": "sh",
78
137
  "category": "category_ndbg_szsh", # 年度报告
79
138
  "trade": "",
80
- "seDate": "2020-01-01~2026-02-15", # 时间区间
139
+ "seDate": _date_range("2020-01-01"), # 时间区间
81
140
  }
82
141
 
83
- namelist = requests.post(query_path, headers=headers, data=query)
142
+ namelist = requests.post(
143
+ query_path, headers=_build_headers(), data=query, timeout=30
144
+ )
84
145
  result = namelist.json()
85
146
  if result and "announcements" in result and result["announcements"]:
86
147
  return result["announcements"]
@@ -90,7 +151,6 @@ def sseAnnual(page, stock):
90
151
  # 深市 招股
91
152
  def szseStock(page, stock):
92
153
  query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
93
- headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
94
154
  query = {
95
155
  "pageNum": page, # 页码
96
156
  "pageSize": 30,
@@ -102,10 +162,12 @@ def szseStock(page, stock):
102
162
  "plate": "sz",
103
163
  "category": "",
104
164
  "trade": "",
105
- "seDate": "2015-01-01~2026-02-15", # 时间区间
165
+ "seDate": _date_range("2015-01-01"), # 时间区间
106
166
  }
107
167
 
108
- namelist = requests.post(query_path, headers=headers, data=query)
168
+ namelist = requests.post(
169
+ query_path, headers=_build_headers(), data=query, timeout=30
170
+ )
109
171
  result = namelist.json()
110
172
  if result and "announcements" in result and result["announcements"]:
111
173
  return result["announcements"]
@@ -115,7 +177,6 @@ def szseStock(page, stock):
115
177
  # 沪市 招股
116
178
  def sseStock(page, stock):
117
179
  query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
118
- headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
119
180
  query = {
120
181
  "pageNum": page, # 页码
121
182
  "pageSize": 30,
@@ -127,44 +188,27 @@ def sseStock(page, stock):
127
188
  "plate": "sh",
128
189
  "category": "",
129
190
  "trade": "",
130
- "seDate": "2015-01-01~2026-02-15", # 时间区间
191
+ "seDate": _date_range("2015-01-01"), # 时间区间
131
192
  }
132
193
 
133
- namelist = requests.post(query_path, headers=headers, data=query)
194
+ namelist = requests.post(
195
+ query_path, headers=_build_headers(), data=query, timeout=30
196
+ )
134
197
  result = namelist.json()
135
198
  if result and "announcements" in result and result["announcements"]:
136
199
  return result["announcements"]
137
200
  return []
138
201
 
139
202
 
140
- def Download(single_page, year_filter=None, save_path=None):
203
+ def Download(
204
+ single_page,
205
+ year_filter: Optional[Union[int, str]] = None,
206
+ save_path: Optional[str] = None,
207
+ ):
141
208
  """下载公告列表中的 PDF 文件"""
142
209
  if single_page is None:
143
210
  return
144
211
 
145
- headers = {
146
- "Accept": "application/json, text/javascript, */*; q=0.01",
147
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
148
- "Accept-Encoding": "gzip, deflate",
149
- "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-HK;q=0.6,zh-TW;q=0.5",
150
- "Host": "www.cninfo.com.cn",
151
- "Origin": "http://www.cninfo.com.cn",
152
- }
153
-
154
- # 按年份筛选允许下载的标题
155
- allowed_list = []
156
- if year_filter:
157
- allowed_list = [
158
- f"{year_filter}年年度报告(更新后)",
159
- f"{year_filter}年年度报告",
160
- ]
161
- else:
162
- # 默认下载 2015 年至当前年份
163
- current_year = datetime.datetime.now().year
164
- for year in range(2015, current_year + 1):
165
- allowed_list.append(f"{year}年年度报告(更新后)")
166
- allowed_list.append(f"{year}年年度报告")
167
-
168
212
  allowed_list_2 = [
169
213
  "招股书",
170
214
  "招股说明书",
@@ -181,20 +225,13 @@ def Download(single_page, year_filter=None, save_path=None):
181
225
  if "确认意见" in title or "取消" in title or "摘要" in title:
182
226
  continue
183
227
 
184
- # 检查标题是否精确匹配(避免"摘要"等变体被误下载)
185
- allowed = False
186
- for item in allowed_list:
187
- if title == item:
188
- allowed = True
189
- break
228
+ # 年报标题匹配:支持“2024年年度报告/2024年度报告/2024年报”等变体
229
+ is_annual_report = _is_annual_report_title(title, year_filter=year_filter)
190
230
 
191
231
  # 检查招股书
192
- for item in allowed_list_2:
193
- if item in title:
194
- allowed = True
195
- break
232
+ is_prospectus = any(item in title for item in allowed_list_2)
196
233
 
197
- if allowed:
234
+ if is_annual_report or is_prospectus:
198
235
  download = download_path + i["adjunctUrl"]
199
236
  name = (
200
237
  i["secCode"]
@@ -209,19 +246,19 @@ def Download(single_page, year_filter=None, save_path=None):
209
246
  file_path = output_dir + name
210
247
 
211
248
  # 显示下载进度
212
- print(f" {name}")
249
+ logger.info("↓ %s", name)
213
250
 
214
251
  # 确保目录存在
215
252
  os.makedirs(output_dir, exist_ok=True)
216
253
 
217
254
  time.sleep(random.random() * 2)
218
255
 
219
- headers["User-Agent"] = random.choice(User_Agent)
220
- r = requests.get(download)
221
-
222
- f = open(file_path, "wb")
223
- f.write(r.content)
224
- f.close()
256
+ r = requests.get(
257
+ download, headers={"User-Agent": random.choice(User_Agent)}, timeout=30
258
+ )
259
+ r.raise_for_status()
260
+ with open(file_path, "wb") as f:
261
+ f.write(r.content)
225
262
  downloaded_count += 1
226
263
  else:
227
264
  continue
@@ -237,17 +274,18 @@ def query_prospectus(stock_code):
237
274
  announcements_sse = sseStock(1, stock_code)
238
275
  all_announcements.extend(announcements_sse)
239
276
  except Exception as e:
240
- print(f"沪市招股书查询失败: {e}")
277
+ logger.warning("沪市招股书查询失败: %s", e)
241
278
 
242
279
  try:
243
280
  announcements_szse = szseStock(1, stock_code)
244
281
  all_announcements.extend(announcements_szse)
245
282
  except Exception as e:
246
- print(f"深市招股书查询失败: {e}")
283
+ logger.warning("深市招股书查询失败: %s", e)
247
284
 
248
285
  prospectus_keywords = ["招股书", "招股说明书", "招股意向书"]
249
286
  filtered = [
250
- a for a in all_announcements
287
+ a
288
+ for a in all_announcements
251
289
  if any(kw in a.get("announcementTitle", "") for kw in prospectus_keywords)
252
290
  ]
253
291
 
@@ -288,21 +326,30 @@ def query_annual_reports(stock_code, year=None):
288
326
  announcements_sse = sseAnnual(1, stock_code)
289
327
  all_announcements.extend(announcements_sse)
290
328
  except Exception as e:
291
- print(f"沪市年报查询失败: {e}")
329
+ logger.warning("沪市年报查询失败: %s", e)
292
330
 
293
331
  # 查询深市
294
332
  try:
295
333
  announcements_szse = szseAnnual(1, stock_code)
296
334
  all_announcements.extend(announcements_szse)
297
335
  except Exception as e:
298
- print(f"深市年报查询失败: {e}")
336
+ logger.warning("深市年报查询失败: %s", e)
299
337
 
300
338
  # 按年份过滤
301
339
  if year:
302
- year_str = str(year)
340
+ year_expr = re.escape(str(year))
341
+ year_patterns = [
342
+ rf"{year_expr}年年度报告",
343
+ rf"{year_expr}年度报告",
344
+ rf"{year_expr}年报",
345
+ ]
303
346
  filtered = []
304
347
  for announcement in all_announcements:
305
- if year_str in announcement.get("announcementTitle", ""):
348
+ title = re.sub(r"\s+", "", announcement.get("announcementTitle", ""))
349
+ # 这里故意使用宽松匹配作为“预筛选”以保留候选项。
350
+ # 真正的严格判定(fullmatch + 排除词)在 Download() 的
351
+ # _is_annual_report_title() 中执行,形成两层防线。
352
+ if any(re.search(pattern, title) for pattern in year_patterns):
306
353
  filtered.append(announcement)
307
354
  all_announcements = filtered
308
355
 
@@ -337,17 +384,22 @@ def download_annual_reports(stock_code, year=None, save_path=None):
337
384
 
338
385
 
339
386
  def Run(page_number, stock):
387
+ annual_report = []
388
+ stock_report = []
389
+ annual_report_ = []
390
+ stock_report_ = []
391
+
340
392
  try:
341
393
  annual_report = szseAnnual(page_number, stock)
342
394
  stock_report = szseStock(page_number, stock)
343
395
  annual_report_ = sseAnnual(page_number, stock)
344
396
  stock_report_ = sseStock(page_number, stock)
345
397
  except Exception:
346
- print(page_number, "page error, retrying")
398
+ logger.warning("%s page error, retrying", page_number)
347
399
  try:
348
400
  annual_report = szseAnnual(page_number, stock)
349
401
  except Exception:
350
- print(page_number, "page error")
402
+ logger.warning("%s page error", page_number)
351
403
  Download(annual_report)
352
404
  Download(stock_report)
353
405
  Download(annual_report_)
@@ -355,9 +407,12 @@ def Run(page_number, stock):
355
407
 
356
408
 
357
409
  if __name__ == "__main__":
410
+ logging.basicConfig(
411
+ level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
412
+ )
358
413
  with open("company_id.txt") as file:
359
414
  lines = file.readlines()
360
415
  for line in lines:
361
416
  stock = line
362
417
  Run(1, line)
363
- print(line, "done")
418
+ logger.info("%s done", line.strip())
@@ -7,6 +7,7 @@
7
7
  const { spawn } = require("child_process");
8
8
  const fs = require("fs");
9
9
  const path = require("path");
10
+ const os = require("os");
10
11
 
11
12
  const REQUIREMENTS_FILE = path.join(
12
13
  __dirname,
@@ -15,6 +16,15 @@ const REQUIREMENTS_FILE = path.join(
15
16
  "requirements.txt",
16
17
  );
17
18
 
19
+ const VENV_DIR = path.join(os.homedir(), ".cninfo-mcp", "venv");
20
+
21
+ function getVenvPython() {
22
+ if (process.platform === "win32") {
23
+ return path.join(VENV_DIR, "Scripts", "python.exe");
24
+ }
25
+ return path.join(VENV_DIR, "bin", "python3");
26
+ }
27
+
18
28
  async function findPython() {
19
29
  const pythonCommands = [
20
30
  "python3",
@@ -75,16 +85,31 @@ async function main() {
75
85
  return;
76
86
  }
77
87
 
88
+ // 创建虚拟环境(如果不存在)
89
+ const venvPython = getVenvPython();
90
+ if (!fs.existsSync(venvPython)) {
91
+ console.log("Creating Python virtual environment...");
92
+ try {
93
+ fs.mkdirSync(path.dirname(VENV_DIR), { recursive: true });
94
+ await spawnCommand(pythonCmd, ["-m", "venv", VENV_DIR]);
95
+ console.log("Virtual environment created");
96
+ } catch (venvError) {
97
+ console.warn(" Failed to create virtual environment during npm install");
98
+ console.warn(" It will be created automatically on first run");
99
+ return;
100
+ }
101
+ }
102
+
78
103
  try {
79
- // 检查 mcp 是否已安装
80
- await spawnCommand(pythonCmd, ["-c", "import mcp"]);
104
+ // 检查 mcp 是否已安装(用 venv 的 python)
105
+ await spawnCommand(venvPython, ["-c", "import mcp"]);
81
106
  console.log("✅ Python dependencies already installed");
82
107
  } catch (error) {
83
- // 执行安装
108
+ // 执行安装(用 venv 的 pip)
84
109
  console.log("📦 Installing Python dependencies...");
85
110
  try {
86
111
  await spawnCommand(
87
- pythonCmd,
112
+ venvPython,
88
113
  ["-m", "pip", "install", "-r", REQUIREMENTS_FILE],
89
114
  {
90
115
  stdio: "inherit",