@youhaozhao/cninfo-mcp 1.0.7 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/package.json +6 -4
- package/python/spider.py +165 -44
- package/python/__pycache__/spider.cpython-314.pyc +0 -0
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://www.npmjs.com/package/@youhaozhao/cninfo-mcp)
|
|
4
4
|
|
|
5
|
-
通过 MCP
|
|
5
|
+
通过 MCP 协议查询和下载巨潮资讯网上市公司年报 PDF 的工具,适用于 Claude Desktop。
|
|
6
6
|
|
|
7
7
|
## 使用方法
|
|
8
8
|
|
|
@@ -35,6 +35,7 @@
|
|
|
35
35
|
```
|
|
36
36
|
查询 000888 的 2024 年报
|
|
37
37
|
下载 688777 的年报
|
|
38
|
+
查询 920185 的年报 # 北交所,新旧代码(如 835185)均可
|
|
38
39
|
```
|
|
39
40
|
|
|
40
41
|
## 系统要求
|
|
@@ -44,9 +45,10 @@
|
|
|
44
45
|
|
|
45
46
|
## 数据来源
|
|
46
47
|
|
|
47
|
-
[巨潮资讯网](https://www.cninfo.com.cn) —
|
|
48
|
+
[巨潮资讯网](https://www.cninfo.com.cn) — 支持沪深两市(主板、创业板、科创板)及北京证券交易所(北交所)
|
|
48
49
|
|
|
49
50
|
## Credits
|
|
50
51
|
|
|
51
52
|
爬虫逻辑基于 [gaodechen/cninfo_process](https://github.com/gaodechen/cninfo_process)。
|
|
52
53
|
|
|
54
|
+
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@youhaozhao/cninfo-mcp",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"description": "MCP Server for querying and downloading Chinese listed companies' annual reports from CNINFO (巨潮资讯网)",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mcp",
|
|
@@ -45,9 +45,11 @@
|
|
|
45
45
|
],
|
|
46
46
|
"files": [
|
|
47
47
|
"bin/",
|
|
48
|
-
"python/",
|
|
49
|
-
"
|
|
48
|
+
"python/mcp_server.py",
|
|
49
|
+
"python/spider.py",
|
|
50
|
+
"python/requirements.txt",
|
|
51
|
+
"scripts/install-python-deps.js",
|
|
50
52
|
"README.md",
|
|
51
53
|
"LICENSE"
|
|
52
54
|
]
|
|
53
|
-
}
|
|
55
|
+
}
|
package/python/spider.py
CHANGED
|
@@ -18,6 +18,20 @@ _saving_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pdf")
|
|
|
18
18
|
saving_path = _saving_path + "/"
|
|
19
19
|
logger = logging.getLogger(__name__)
|
|
20
20
|
|
|
21
|
+
# 巨潮资讯历史公告的实际下限约为 2001 会计年度,再往前查询无数据返回。
|
|
22
|
+
EARLIEST_DATE = "2001-01-01"
|
|
23
|
+
# 接口单页最大返回条数
|
|
24
|
+
PAGE_SIZE = 30
|
|
25
|
+
# 翻页安全上限,防止异常情况下无限循环
|
|
26
|
+
MAX_PAGES = 100
|
|
27
|
+
# 巨潮搜索联想接口:把股票代码/简称解析为 orgId(北交所查询必需)
|
|
28
|
+
TOP_SEARCH_URL = "http://www.cninfo.com.cn/new/information/topSearch/query"
|
|
29
|
+
# 公告查询接口
|
|
30
|
+
QUERY_URL = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
31
|
+
# 瞬时失败(网络抖动/限流)的重试次数与退避基数(秒)
|
|
32
|
+
MAX_RETRIES = 3
|
|
33
|
+
RETRY_BACKOFF = 1.0
|
|
34
|
+
|
|
21
35
|
User_Agent = [
|
|
22
36
|
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
|
|
23
37
|
"Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
|
|
@@ -48,6 +62,85 @@ def _build_headers() -> dict:
|
|
|
48
62
|
return headers
|
|
49
63
|
|
|
50
64
|
|
|
65
|
+
def _post_json(url: str, data: dict) -> dict:
|
|
66
|
+
"""POST 请求并解析 JSON,仅对可重试的瞬时失败按指数退避重试。
|
|
67
|
+
|
|
68
|
+
巨潮接口在批量/高频访问下会偶发超时或限流,导致本可成功的查询失败。
|
|
69
|
+
可重试:网络异常(超时/连接错误)、5xx、429,以及空/截断响应导致的
|
|
70
|
+
JSON 解析失败(requests 会抛 JSONDecodeError,实测多为限流时返回空体)。
|
|
71
|
+
不可重试:4xx(除 429)等客户端错误,快速失败,避免无谓的退避等待。
|
|
72
|
+
重试用尽后抛出最后一次异常,交由调用方的 try/except 记录并降级。
|
|
73
|
+
"""
|
|
74
|
+
last_exc = None
|
|
75
|
+
for attempt in range(MAX_RETRIES):
|
|
76
|
+
try:
|
|
77
|
+
resp = requests.post(
|
|
78
|
+
url, headers=_build_headers(), data=data, timeout=30
|
|
79
|
+
)
|
|
80
|
+
resp.raise_for_status()
|
|
81
|
+
return resp.json()
|
|
82
|
+
except requests.exceptions.HTTPError as e:
|
|
83
|
+
status = e.response.status_code if e.response is not None else None
|
|
84
|
+
# 4xx(429 除外)不会自愈,立即失败,不浪费退避等待
|
|
85
|
+
if status is not None and status != 429 and 400 <= status < 500:
|
|
86
|
+
raise
|
|
87
|
+
last_exc = e
|
|
88
|
+
except requests.exceptions.RequestException as e:
|
|
89
|
+
# 网络异常 + JSONDecodeError(空/截断响应,多为瞬时限流)
|
|
90
|
+
last_exc = e
|
|
91
|
+
if attempt < MAX_RETRIES - 1:
|
|
92
|
+
time.sleep(RETRY_BACKOFF * (2**attempt) + random.random())
|
|
93
|
+
if last_exc is not None:
|
|
94
|
+
raise last_exc
|
|
95
|
+
raise RuntimeError(f"_post_json 未执行任何请求(MAX_RETRIES={MAX_RETRIES})")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _query_announcements(query: dict) -> list:
|
|
99
|
+
"""调用公告查询接口并返回 announcements 列表(带重试)。"""
|
|
100
|
+
result = _post_json(QUERY_URL, query)
|
|
101
|
+
if result and "announcements" in result and result["announcements"]:
|
|
102
|
+
return result["announcements"]
|
|
103
|
+
return []
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _is_bse_code(stock_code) -> bool:
|
|
107
|
+
"""判断是否为北交所代码。
|
|
108
|
+
|
|
109
|
+
北交所代码段:4xxxxx / 8xxxxx(原新三板平移),以及 92xxxx 标准段
|
|
110
|
+
(920,2024-04 起启用,预留 920-929)。这里特意只匹配 92 而非整个 9
|
|
111
|
+
开头,以排除沪市 B 股 900xxx,避免为其多发一次无效的北交所查询。
|
|
112
|
+
"""
|
|
113
|
+
digits = re.sub(r"\D", "", str(stock_code or ""))
|
|
114
|
+
return digits[:1] in ("4", "8") or digits[:2] == "92"
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _resolve_org_id(stock_code) -> Optional[tuple]:
|
|
118
|
+
"""通过巨潮搜索联想接口把股票代码解析为 (code, orgId)。
|
|
119
|
+
|
|
120
|
+
北交所的 hisAnnouncement 接口不接受 searchkey 或裸代码,必须以
|
|
121
|
+
stock="代码,orgId" 的形式查询,因此需要先解析 orgId。
|
|
122
|
+
优先返回 code 完全等于输入的条目;找不到精确匹配则取第一条
|
|
123
|
+
(同一公司新旧代码共用同一 orgId)。无结果返回 None。
|
|
124
|
+
"""
|
|
125
|
+
try:
|
|
126
|
+
hits = _post_json(TOP_SEARCH_URL, {"keyWord": stock_code, "maxNum": 10})
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.warning("orgId 解析失败(%s): %s", stock_code, e)
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
if not isinstance(hits, list) or not hits:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
target = re.sub(r"\D", "", str(stock_code or ""))
|
|
135
|
+
for it in hits:
|
|
136
|
+
if str(it.get("code")) == target and it.get("orgId"):
|
|
137
|
+
return str(it.get("code")), str(it.get("orgId"))
|
|
138
|
+
first = hits[0]
|
|
139
|
+
if first.get("orgId"):
|
|
140
|
+
return str(first.get("code")), str(first.get("orgId"))
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
|
|
51
144
|
def _date_range(start_date: str) -> str:
|
|
52
145
|
"""构造查询时间区间,结束日期取当天,避免硬编码过期。"""
|
|
53
146
|
datetime.datetime.strptime(start_date, "%Y-%m-%d")
|
|
@@ -55,6 +148,26 @@ def _date_range(start_date: str) -> str:
|
|
|
55
148
|
return f"{start_date}~{today}"
|
|
56
149
|
|
|
57
150
|
|
|
151
|
+
def _paginate(fetch_fn, stock):
|
|
152
|
+
"""
|
|
153
|
+
对单页查询函数翻页,汇总所有页的公告。
|
|
154
|
+
|
|
155
|
+
巨潮接口单页最多返回 PAGE_SIZE 条,放开时间区间后历史年报会跨越多页,
|
|
156
|
+
必须翻页才能取全。以“返回数量不足一页”作为终止条件,并设安全上限。
|
|
157
|
+
"""
|
|
158
|
+
all_items = []
|
|
159
|
+
for page in range(1, MAX_PAGES + 1):
|
|
160
|
+
items = fetch_fn(page, stock)
|
|
161
|
+
if not items:
|
|
162
|
+
break
|
|
163
|
+
all_items.extend(items)
|
|
164
|
+
if len(items) < PAGE_SIZE: # 不足一页说明已到最后一页
|
|
165
|
+
break
|
|
166
|
+
else:
|
|
167
|
+
logger.warning("翻页达到上限 %s,结果可能被截断(%s)", MAX_PAGES, stock)
|
|
168
|
+
return all_items
|
|
169
|
+
|
|
170
|
+
|
|
58
171
|
def _is_annual_report_title(
|
|
59
172
|
title: str, year_filter: Optional[Union[int, str]] = None
|
|
60
173
|
) -> bool:
|
|
@@ -98,10 +211,9 @@ def _is_annual_report_title(
|
|
|
98
211
|
|
|
99
212
|
# 深市 年度报告
|
|
100
213
|
def szseAnnual(page, stock):
|
|
101
|
-
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
102
214
|
query = {
|
|
103
215
|
"pageNum": page, # 页码
|
|
104
|
-
"pageSize":
|
|
216
|
+
"pageSize": PAGE_SIZE,
|
|
105
217
|
"tabName": "fulltext",
|
|
106
218
|
"column": "szse", # 深交所
|
|
107
219
|
"stock": "",
|
|
@@ -110,24 +222,17 @@ def szseAnnual(page, stock):
|
|
|
110
222
|
"plate": "sz",
|
|
111
223
|
"category": "category_ndbg_szsh", # 年度报告
|
|
112
224
|
"trade": "",
|
|
113
|
-
"seDate": _date_range(
|
|
225
|
+
"seDate": _date_range(EARLIEST_DATE), # 时间区间
|
|
114
226
|
}
|
|
115
227
|
|
|
116
|
-
|
|
117
|
-
query_path, headers=_build_headers(), data=query, timeout=30
|
|
118
|
-
)
|
|
119
|
-
result = namelist.json()
|
|
120
|
-
if result and "announcements" in result and result["announcements"]:
|
|
121
|
-
return result["announcements"]
|
|
122
|
-
return []
|
|
228
|
+
return _query_announcements(query)
|
|
123
229
|
|
|
124
230
|
|
|
125
231
|
# 沪市 年度报告
|
|
126
232
|
def sseAnnual(page, stock):
|
|
127
|
-
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
128
233
|
query = {
|
|
129
234
|
"pageNum": page, # 页码
|
|
130
|
-
"pageSize":
|
|
235
|
+
"pageSize": PAGE_SIZE,
|
|
131
236
|
"tabName": "fulltext",
|
|
132
237
|
"column": "sse",
|
|
133
238
|
"stock": "",
|
|
@@ -136,24 +241,41 @@ def sseAnnual(page, stock):
|
|
|
136
241
|
"plate": "sh",
|
|
137
242
|
"category": "category_ndbg_szsh", # 年度报告
|
|
138
243
|
"trade": "",
|
|
139
|
-
"seDate": _date_range(
|
|
244
|
+
"seDate": _date_range(EARLIEST_DATE), # 时间区间
|
|
140
245
|
}
|
|
141
246
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
247
|
+
return _query_announcements(query)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# 北交所 年度报告
|
|
251
|
+
def bseAnnual(page, stock):
|
|
252
|
+
"""北交所年报查询。
|
|
253
|
+
|
|
254
|
+
stock 形如 "代码,orgId",由 _resolve_org_id 解析得到。北交所必须
|
|
255
|
+
通过 plate=bj + stock="代码,orgId" 查询,searchkey/裸代码均返回空。
|
|
256
|
+
"""
|
|
257
|
+
query = {
|
|
258
|
+
"pageNum": page, # 页码
|
|
259
|
+
"pageSize": PAGE_SIZE,
|
|
260
|
+
"tabName": "fulltext",
|
|
261
|
+
"column": "bj", # 北交所
|
|
262
|
+
"stock": stock, # 必须为 "代码,orgId"
|
|
263
|
+
"searchkey": "",
|
|
264
|
+
"secid": "",
|
|
265
|
+
"plate": "bj",
|
|
266
|
+
"category": "category_ndbg_szsh", # 年度报告
|
|
267
|
+
"trade": "",
|
|
268
|
+
"seDate": _date_range(EARLIEST_DATE), # 时间区间
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return _query_announcements(query)
|
|
149
272
|
|
|
150
273
|
|
|
151
274
|
# 深市 招股
|
|
152
275
|
def szseStock(page, stock):
|
|
153
|
-
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
154
276
|
query = {
|
|
155
277
|
"pageNum": page, # 页码
|
|
156
|
-
"pageSize":
|
|
278
|
+
"pageSize": PAGE_SIZE,
|
|
157
279
|
"tabName": "fulltext",
|
|
158
280
|
"column": "szse",
|
|
159
281
|
"stock": "",
|
|
@@ -162,24 +284,17 @@ def szseStock(page, stock):
|
|
|
162
284
|
"plate": "sz",
|
|
163
285
|
"category": "",
|
|
164
286
|
"trade": "",
|
|
165
|
-
"seDate": _date_range(
|
|
287
|
+
"seDate": _date_range(EARLIEST_DATE), # 时间区间
|
|
166
288
|
}
|
|
167
289
|
|
|
168
|
-
|
|
169
|
-
query_path, headers=_build_headers(), data=query, timeout=30
|
|
170
|
-
)
|
|
171
|
-
result = namelist.json()
|
|
172
|
-
if result and "announcements" in result and result["announcements"]:
|
|
173
|
-
return result["announcements"]
|
|
174
|
-
return []
|
|
290
|
+
return _query_announcements(query)
|
|
175
291
|
|
|
176
292
|
|
|
177
293
|
# 沪市 招股
|
|
178
294
|
def sseStock(page, stock):
|
|
179
|
-
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
180
295
|
query = {
|
|
181
296
|
"pageNum": page, # 页码
|
|
182
|
-
"pageSize":
|
|
297
|
+
"pageSize": PAGE_SIZE,
|
|
183
298
|
"tabName": "fulltext",
|
|
184
299
|
"column": "sse",
|
|
185
300
|
"stock": "",
|
|
@@ -188,16 +303,10 @@ def sseStock(page, stock):
|
|
|
188
303
|
"plate": "sh",
|
|
189
304
|
"category": "",
|
|
190
305
|
"trade": "",
|
|
191
|
-
"seDate": _date_range(
|
|
306
|
+
"seDate": _date_range(EARLIEST_DATE), # 时间区间
|
|
192
307
|
}
|
|
193
308
|
|
|
194
|
-
|
|
195
|
-
query_path, headers=_build_headers(), data=query, timeout=30
|
|
196
|
-
)
|
|
197
|
-
result = namelist.json()
|
|
198
|
-
if result and "announcements" in result and result["announcements"]:
|
|
199
|
-
return result["announcements"]
|
|
200
|
-
return []
|
|
309
|
+
return _query_announcements(query)
|
|
201
310
|
|
|
202
311
|
|
|
203
312
|
def Download(
|
|
@@ -271,13 +380,13 @@ def query_prospectus(stock_code):
|
|
|
271
380
|
all_announcements = []
|
|
272
381
|
|
|
273
382
|
try:
|
|
274
|
-
announcements_sse = sseStock
|
|
383
|
+
announcements_sse = _paginate(sseStock, stock_code)
|
|
275
384
|
all_announcements.extend(announcements_sse)
|
|
276
385
|
except Exception as e:
|
|
277
386
|
logger.warning("沪市招股书查询失败: %s", e)
|
|
278
387
|
|
|
279
388
|
try:
|
|
280
|
-
announcements_szse = szseStock
|
|
389
|
+
announcements_szse = _paginate(szseStock, stock_code)
|
|
281
390
|
all_announcements.extend(announcements_szse)
|
|
282
391
|
except Exception as e:
|
|
283
392
|
logger.warning("深市招股书查询失败: %s", e)
|
|
@@ -323,18 +432,30 @@ def query_annual_reports(stock_code, year=None):
|
|
|
323
432
|
|
|
324
433
|
# 查询沪市
|
|
325
434
|
try:
|
|
326
|
-
announcements_sse = sseAnnual
|
|
435
|
+
announcements_sse = _paginate(sseAnnual, stock_code)
|
|
327
436
|
all_announcements.extend(announcements_sse)
|
|
328
437
|
except Exception as e:
|
|
329
438
|
logger.warning("沪市年报查询失败: %s", e)
|
|
330
439
|
|
|
331
440
|
# 查询深市
|
|
332
441
|
try:
|
|
333
|
-
announcements_szse = szseAnnual
|
|
442
|
+
announcements_szse = _paginate(szseAnnual, stock_code)
|
|
334
443
|
all_announcements.extend(announcements_szse)
|
|
335
444
|
except Exception as e:
|
|
336
445
|
logger.warning("深市年报查询失败: %s", e)
|
|
337
446
|
|
|
447
|
+
# 查询北交所(代码以 4/8/9 开头)。北交所接口必须用 orgId,
|
|
448
|
+
# 故先解析 orgId 再以 stock="代码,orgId" 翻页查询。
|
|
449
|
+
if _is_bse_code(stock_code):
|
|
450
|
+
try:
|
|
451
|
+
resolved = _resolve_org_id(stock_code)
|
|
452
|
+
if resolved:
|
|
453
|
+
code, org_id = resolved
|
|
454
|
+
announcements_bse = _paginate(bseAnnual, f"{code},{org_id}")
|
|
455
|
+
all_announcements.extend(announcements_bse)
|
|
456
|
+
except Exception as e:
|
|
457
|
+
logger.warning("北交所年报查询失败: %s", e)
|
|
458
|
+
|
|
338
459
|
# 按年份过滤
|
|
339
460
|
if year:
|
|
340
461
|
year_expr = re.escape(str(year))
|
|
Binary file
|