@youhaozhao/cninfo-mcp 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/package.json +5 -3
- package/python/spider.py +126 -32
- package/python/bridge.py +0 -161
package/README.md
CHANGED
|
@@ -35,6 +35,7 @@
|
|
|
35
35
|
```
|
|
36
36
|
查询 000888 的 2024 年报
|
|
37
37
|
下载 688777 的年报
|
|
38
|
+
查询 920185 的年报 # 北交所,新旧代码(如 835185)均可
|
|
38
39
|
```
|
|
39
40
|
|
|
40
41
|
## 系统要求
|
|
@@ -44,7 +45,7 @@
|
|
|
44
45
|
|
|
45
46
|
## 数据来源
|
|
46
47
|
|
|
47
|
-
[巨潮资讯网](https://www.cninfo.com.cn) —
|
|
48
|
+
[巨潮资讯网](https://www.cninfo.com.cn) — 支持沪深两市(主板、创业板、科创板)及北京证券交易所(北交所)
|
|
48
49
|
|
|
49
50
|
## Credits
|
|
50
51
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@youhaozhao/cninfo-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "MCP Server for querying and downloading Chinese listed companies' annual reports from CNINFO (巨潮资讯网)",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mcp",
|
|
@@ -45,8 +45,10 @@
|
|
|
45
45
|
],
|
|
46
46
|
"files": [
|
|
47
47
|
"bin/",
|
|
48
|
-
"python/",
|
|
49
|
-
"
|
|
48
|
+
"python/mcp_server.py",
|
|
49
|
+
"python/spider.py",
|
|
50
|
+
"python/requirements.txt",
|
|
51
|
+
"scripts/install-python-deps.js",
|
|
50
52
|
"README.md",
|
|
51
53
|
"LICENSE"
|
|
52
54
|
]
|
package/python/spider.py
CHANGED
|
@@ -24,6 +24,13 @@ EARLIEST_DATE = "2001-01-01"
|
|
|
24
24
|
PAGE_SIZE = 30
|
|
25
25
|
# 翻页安全上限,防止异常情况下无限循环
|
|
26
26
|
MAX_PAGES = 100
|
|
27
|
+
# 巨潮搜索联想接口:把股票代码/简称解析为 orgId(北交所查询必需)
|
|
28
|
+
TOP_SEARCH_URL = "http://www.cninfo.com.cn/new/information/topSearch/query"
|
|
29
|
+
# 公告查询接口
|
|
30
|
+
QUERY_URL = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
31
|
+
# 瞬时失败(网络抖动/限流)的重试次数与退避基数(秒)
|
|
32
|
+
MAX_RETRIES = 3
|
|
33
|
+
RETRY_BACKOFF = 1.0
|
|
27
34
|
|
|
28
35
|
User_Agent = [
|
|
29
36
|
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
|
|
@@ -55,6 +62,85 @@ def _build_headers() -> dict:
|
|
|
55
62
|
return headers
|
|
56
63
|
|
|
57
64
|
|
|
65
|
+
def _post_json(url: str, data: dict) -> dict:
|
|
66
|
+
"""POST 请求并解析 JSON,仅对可重试的瞬时失败按指数退避重试。
|
|
67
|
+
|
|
68
|
+
巨潮接口在批量/高频访问下会偶发超时或限流,导致本可成功的查询失败。
|
|
69
|
+
可重试:网络异常(超时/连接错误)、5xx、429,以及空/截断响应导致的
|
|
70
|
+
JSON 解析失败(requests 会抛 JSONDecodeError,实测多为限流时返回空体)。
|
|
71
|
+
不可重试:4xx(除 429)等客户端错误,快速失败,避免无谓的退避等待。
|
|
72
|
+
重试用尽后抛出最后一次异常,交由调用方的 try/except 记录并降级。
|
|
73
|
+
"""
|
|
74
|
+
last_exc = None
|
|
75
|
+
for attempt in range(MAX_RETRIES):
|
|
76
|
+
try:
|
|
77
|
+
resp = requests.post(
|
|
78
|
+
url, headers=_build_headers(), data=data, timeout=30
|
|
79
|
+
)
|
|
80
|
+
resp.raise_for_status()
|
|
81
|
+
return resp.json()
|
|
82
|
+
except requests.exceptions.HTTPError as e:
|
|
83
|
+
status = e.response.status_code if e.response is not None else None
|
|
84
|
+
# 4xx(429 除外)不会自愈,立即失败,不浪费退避等待
|
|
85
|
+
if status is not None and status != 429 and 400 <= status < 500:
|
|
86
|
+
raise
|
|
87
|
+
last_exc = e
|
|
88
|
+
except requests.exceptions.RequestException as e:
|
|
89
|
+
# 网络异常 + JSONDecodeError(空/截断响应,多为瞬时限流)
|
|
90
|
+
last_exc = e
|
|
91
|
+
if attempt < MAX_RETRIES - 1:
|
|
92
|
+
time.sleep(RETRY_BACKOFF * (2**attempt) + random.random())
|
|
93
|
+
if last_exc is not None:
|
|
94
|
+
raise last_exc
|
|
95
|
+
raise RuntimeError(f"_post_json 未执行任何请求(MAX_RETRIES={MAX_RETRIES})")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _query_announcements(query: dict) -> list:
|
|
99
|
+
"""调用公告查询接口并返回 announcements 列表(带重试)。"""
|
|
100
|
+
result = _post_json(QUERY_URL, query)
|
|
101
|
+
if result and "announcements" in result and result["announcements"]:
|
|
102
|
+
return result["announcements"]
|
|
103
|
+
return []
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _is_bse_code(stock_code) -> bool:
|
|
107
|
+
"""判断是否为北交所代码。
|
|
108
|
+
|
|
109
|
+
北交所代码段:4xxxxx / 8xxxxx(原新三板平移),以及 92xxxx 标准段
|
|
110
|
+
(920,2024-04 起启用,预留 920-929)。这里特意只匹配 92 而非整个 9
|
|
111
|
+
开头,以排除沪市 B 股 900xxx,避免为其多发一次无效的北交所查询。
|
|
112
|
+
"""
|
|
113
|
+
digits = re.sub(r"\D", "", str(stock_code or ""))
|
|
114
|
+
return digits[:1] in ("4", "8") or digits[:2] == "92"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _resolve_org_id(stock_code) -> Optional[tuple]:
|
|
118
|
+
"""通过巨潮搜索联想接口把股票代码解析为 (code, orgId)。
|
|
119
|
+
|
|
120
|
+
北交所的 hisAnnouncement 接口不接受 searchkey 或裸代码,必须以
|
|
121
|
+
stock="代码,orgId" 的形式查询,因此需要先解析 orgId。
|
|
122
|
+
优先返回 code 完全等于输入的条目;找不到精确匹配则取第一条
|
|
123
|
+
(同一公司新旧代码共用同一 orgId)。无结果返回 None。
|
|
124
|
+
"""
|
|
125
|
+
try:
|
|
126
|
+
hits = _post_json(TOP_SEARCH_URL, {"keyWord": stock_code, "maxNum": 10})
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.warning("orgId 解析失败(%s): %s", stock_code, e)
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
if not isinstance(hits, list) or not hits:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
target = re.sub(r"\D", "", str(stock_code or ""))
|
|
135
|
+
for it in hits:
|
|
136
|
+
if str(it.get("code")) == target and it.get("orgId"):
|
|
137
|
+
return str(it.get("code")), str(it.get("orgId"))
|
|
138
|
+
first = hits[0]
|
|
139
|
+
if first.get("orgId"):
|
|
140
|
+
return str(first.get("code")), str(first.get("orgId"))
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
|
|
58
144
|
def _date_range(start_date: str) -> str:
|
|
59
145
|
"""构造查询时间区间,结束日期取当天,避免硬编码过期。"""
|
|
60
146
|
datetime.datetime.strptime(start_date, "%Y-%m-%d")
|
|
@@ -125,7 +211,6 @@ def _is_annual_report_title(
|
|
|
125
211
|
|
|
126
212
|
# 深市 年度报告
|
|
127
213
|
def szseAnnual(page, stock):
|
|
128
|
-
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
129
214
|
query = {
|
|
130
215
|
"pageNum": page, # 页码
|
|
131
216
|
"pageSize": PAGE_SIZE,
|
|
@@ -140,18 +225,11 @@ def szseAnnual(page, stock):
|
|
|
140
225
|
"seDate": _date_range(EARLIEST_DATE), # 时间区间
|
|
141
226
|
}
|
|
142
227
|
|
|
143
|
-
|
|
144
|
-
query_path, headers=_build_headers(), data=query, timeout=30
|
|
145
|
-
)
|
|
146
|
-
result = namelist.json()
|
|
147
|
-
if result and "announcements" in result and result["announcements"]:
|
|
148
|
-
return result["announcements"]
|
|
149
|
-
return []
|
|
228
|
+
return _query_announcements(query)
|
|
150
229
|
|
|
151
230
|
|
|
152
231
|
# 沪市 年度报告
|
|
153
232
|
def sseAnnual(page, stock):
|
|
154
|
-
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
155
233
|
query = {
|
|
156
234
|
"pageNum": page, # 页码
|
|
157
235
|
"pageSize": PAGE_SIZE,
|
|
@@ -166,18 +244,35 @@ def sseAnnual(page, stock):
|
|
|
166
244
|
"seDate": _date_range(EARLIEST_DATE), # 时间区间
|
|
167
245
|
}
|
|
168
246
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
247
|
+
return _query_announcements(query)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# 北交所 年度报告
|
|
251
|
+
def bseAnnual(page, stock):
|
|
252
|
+
"""北交所年报查询。
|
|
253
|
+
|
|
254
|
+
stock 形如 "代码,orgId",由 _resolve_org_id 解析得到。北交所必须
|
|
255
|
+
通过 plate=bj + stock="代码,orgId" 查询,searchkey/裸代码均返回空。
|
|
256
|
+
"""
|
|
257
|
+
query = {
|
|
258
|
+
"pageNum": page, # 页码
|
|
259
|
+
"pageSize": PAGE_SIZE,
|
|
260
|
+
"tabName": "fulltext",
|
|
261
|
+
"column": "bj", # 北交所
|
|
262
|
+
"stock": stock, # 必须为 "代码,orgId"
|
|
263
|
+
"searchkey": "",
|
|
264
|
+
"secid": "",
|
|
265
|
+
"plate": "bj",
|
|
266
|
+
"category": "category_ndbg_szsh", # 年度报告
|
|
267
|
+
"trade": "",
|
|
268
|
+
"seDate": _date_range(EARLIEST_DATE), # 时间区间
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return _query_announcements(query)
|
|
176
272
|
|
|
177
273
|
|
|
178
274
|
# 深市 招股
|
|
179
275
|
def szseStock(page, stock):
|
|
180
|
-
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
181
276
|
query = {
|
|
182
277
|
"pageNum": page, # 页码
|
|
183
278
|
"pageSize": PAGE_SIZE,
|
|
@@ -192,18 +287,11 @@ def szseStock(page, stock):
|
|
|
192
287
|
"seDate": _date_range(EARLIEST_DATE), # 时间区间
|
|
193
288
|
}
|
|
194
289
|
|
|
195
|
-
|
|
196
|
-
query_path, headers=_build_headers(), data=query, timeout=30
|
|
197
|
-
)
|
|
198
|
-
result = namelist.json()
|
|
199
|
-
if result and "announcements" in result and result["announcements"]:
|
|
200
|
-
return result["announcements"]
|
|
201
|
-
return []
|
|
290
|
+
return _query_announcements(query)
|
|
202
291
|
|
|
203
292
|
|
|
204
293
|
# 沪市 招股
|
|
205
294
|
def sseStock(page, stock):
|
|
206
|
-
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
207
295
|
query = {
|
|
208
296
|
"pageNum": page, # 页码
|
|
209
297
|
"pageSize": PAGE_SIZE,
|
|
@@ -218,13 +306,7 @@ def sseStock(page, stock):
|
|
|
218
306
|
"seDate": _date_range(EARLIEST_DATE), # 时间区间
|
|
219
307
|
}
|
|
220
308
|
|
|
221
|
-
|
|
222
|
-
query_path, headers=_build_headers(), data=query, timeout=30
|
|
223
|
-
)
|
|
224
|
-
result = namelist.json()
|
|
225
|
-
if result and "announcements" in result and result["announcements"]:
|
|
226
|
-
return result["announcements"]
|
|
227
|
-
return []
|
|
309
|
+
return _query_announcements(query)
|
|
228
310
|
|
|
229
311
|
|
|
230
312
|
def Download(
|
|
@@ -362,6 +444,18 @@ def query_annual_reports(stock_code, year=None):
|
|
|
362
444
|
except Exception as e:
|
|
363
445
|
logger.warning("深市年报查询失败: %s", e)
|
|
364
446
|
|
|
447
|
+
# 查询北交所(代码以 4/8/9 开头)。北交所接口必须用 orgId,
|
|
448
|
+
# 故先解析 orgId 再以 stock="代码,orgId" 翻页查询。
|
|
449
|
+
if _is_bse_code(stock_code):
|
|
450
|
+
try:
|
|
451
|
+
resolved = _resolve_org_id(stock_code)
|
|
452
|
+
if resolved:
|
|
453
|
+
code, org_id = resolved
|
|
454
|
+
announcements_bse = _paginate(bseAnnual, f"{code},{org_id}")
|
|
455
|
+
all_announcements.extend(announcements_bse)
|
|
456
|
+
except Exception as e:
|
|
457
|
+
logger.warning("北交所年报查询失败: %s", e)
|
|
458
|
+
|
|
365
459
|
# 按年份过滤
|
|
366
460
|
if year:
|
|
367
461
|
year_expr = re.escape(str(year))
|
package/python/bridge.py
DELETED
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""CLI bridge that exposes cninfo spider functions as JSON in/out for Node.js."""
|
|
3
|
-
|
|
4
|
-
import json
|
|
5
|
-
import os
|
|
6
|
-
import sys
|
|
7
|
-
import traceback
|
|
8
|
-
from typing import Any, Optional
|
|
9
|
-
|
|
10
|
-
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
11
|
-
|
|
12
|
-
from spider import ( # noqa: E402
|
|
13
|
-
download_annual_reports,
|
|
14
|
-
download_prospectus,
|
|
15
|
-
query_annual_reports,
|
|
16
|
-
query_prospectus,
|
|
17
|
-
saving_path,
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
BASE_URL = "https://static.cninfo.com.cn/"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def _format_reports(reports: list[dict]) -> list[dict]:
|
|
25
|
-
formatted = []
|
|
26
|
-
for report in reports:
|
|
27
|
-
adj = report.get("adjunctUrl", "")
|
|
28
|
-
formatted.append(
|
|
29
|
-
{
|
|
30
|
-
"announcementTitle": report.get("announcementTitle", ""),
|
|
31
|
-
"announcementTime": report.get("announcementTime", ""),
|
|
32
|
-
"secCode": report.get("secCode", ""),
|
|
33
|
-
"secName": report.get("secName", ""),
|
|
34
|
-
"adjunctUrl": BASE_URL + adj if adj else "",
|
|
35
|
-
}
|
|
36
|
-
)
|
|
37
|
-
return formatted
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def _require_stock_code(payload: dict) -> str:
|
|
41
|
-
stock_code = (payload.get("stock_code") or "").strip()
|
|
42
|
-
if not stock_code:
|
|
43
|
-
raise ValueError("stock_code is required")
|
|
44
|
-
return stock_code
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def _optional_year(payload: dict) -> Optional[int]:
|
|
48
|
-
year = payload.get("year")
|
|
49
|
-
if year is None:
|
|
50
|
-
return None
|
|
51
|
-
return int(year)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def _resolve_save_path(payload: dict) -> str:
|
|
55
|
-
save_path = payload.get("save_path")
|
|
56
|
-
return save_path if save_path else saving_path
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def action_query_annual_reports(payload: dict) -> dict:
|
|
60
|
-
stock_code = _require_stock_code(payload)
|
|
61
|
-
year = _optional_year(payload)
|
|
62
|
-
reports = query_annual_reports(stock_code, year)
|
|
63
|
-
suffix = f" for year {year}" if year else ""
|
|
64
|
-
if not reports:
|
|
65
|
-
return {
|
|
66
|
-
"success": False,
|
|
67
|
-
"stock_code": stock_code,
|
|
68
|
-
"year": year,
|
|
69
|
-
"count": 0,
|
|
70
|
-
"reports": [],
|
|
71
|
-
"message": f"No annual reports found for stock {stock_code}{suffix}",
|
|
72
|
-
}
|
|
73
|
-
return {
|
|
74
|
-
"success": True,
|
|
75
|
-
"stock_code": stock_code,
|
|
76
|
-
"year": year,
|
|
77
|
-
"count": len(reports),
|
|
78
|
-
"reports": _format_reports(reports),
|
|
79
|
-
"message": f"Found {len(reports)} annual report(s){suffix}",
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def action_download_annual_reports(payload: dict) -> dict:
|
|
84
|
-
stock_code = _require_stock_code(payload)
|
|
85
|
-
year = _optional_year(payload)
|
|
86
|
-
output_dir = _resolve_save_path(payload)
|
|
87
|
-
os.makedirs(output_dir, exist_ok=True)
|
|
88
|
-
result = download_annual_reports(stock_code, year, save_path=output_dir)
|
|
89
|
-
result["stock_code"] = stock_code
|
|
90
|
-
result["year"] = year
|
|
91
|
-
return result
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def action_query_prospectus(payload: dict) -> dict:
|
|
95
|
-
stock_code = _require_stock_code(payload)
|
|
96
|
-
reports = query_prospectus(stock_code)
|
|
97
|
-
if not reports:
|
|
98
|
-
return {
|
|
99
|
-
"success": False,
|
|
100
|
-
"stock_code": stock_code,
|
|
101
|
-
"count": 0,
|
|
102
|
-
"reports": [],
|
|
103
|
-
"message": f"No prospectus found for stock {stock_code}",
|
|
104
|
-
}
|
|
105
|
-
return {
|
|
106
|
-
"success": True,
|
|
107
|
-
"stock_code": stock_code,
|
|
108
|
-
"count": len(reports),
|
|
109
|
-
"reports": _format_reports(reports),
|
|
110
|
-
"message": f"Found {len(reports)} prospectus document(s)",
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def action_download_prospectus(payload: dict) -> dict:
|
|
115
|
-
stock_code = _require_stock_code(payload)
|
|
116
|
-
output_dir = _resolve_save_path(payload)
|
|
117
|
-
os.makedirs(output_dir, exist_ok=True)
|
|
118
|
-
result = download_prospectus(stock_code, save_path=output_dir)
|
|
119
|
-
result["stock_code"] = stock_code
|
|
120
|
-
return result
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
ACTIONS = {
|
|
124
|
-
"query_annual_reports": action_query_annual_reports,
|
|
125
|
-
"download_annual_reports": action_download_annual_reports,
|
|
126
|
-
"query_prospectus": action_query_prospectus,
|
|
127
|
-
"download_prospectus": action_download_prospectus,
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def main() -> int:
|
|
132
|
-
if len(sys.argv) < 3:
|
|
133
|
-
sys.stderr.write("usage: bridge.py <action> <json-payload>\n")
|
|
134
|
-
return 2
|
|
135
|
-
|
|
136
|
-
action = sys.argv[1]
|
|
137
|
-
raw_payload = sys.argv[2]
|
|
138
|
-
|
|
139
|
-
handler = ACTIONS.get(action)
|
|
140
|
-
if handler is None:
|
|
141
|
-
sys.stdout.write(json.dumps({"ok": False, "error": f"Unknown action: {action}"}))
|
|
142
|
-
return 1
|
|
143
|
-
|
|
144
|
-
try:
|
|
145
|
-
payload = json.loads(raw_payload) if raw_payload else {}
|
|
146
|
-
except json.JSONDecodeError as exc:
|
|
147
|
-
sys.stdout.write(json.dumps({"ok": False, "error": f"Invalid JSON payload: {exc}"}))
|
|
148
|
-
return 1
|
|
149
|
-
|
|
150
|
-
try:
|
|
151
|
-
result: Any = handler(payload)
|
|
152
|
-
sys.stdout.write(json.dumps({"ok": True, "payload": result}, ensure_ascii=False))
|
|
153
|
-
return 0
|
|
154
|
-
except Exception as exc:
|
|
155
|
-
sys.stderr.write(traceback.format_exc())
|
|
156
|
-
sys.stdout.write(json.dumps({"ok": False, "error": str(exc)}))
|
|
157
|
-
return 1
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
if __name__ == "__main__":
|
|
161
|
-
raise SystemExit(main())
|