@youhaozhao/cninfo-mcp 1.0.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![npm version](https://img.shields.io/npm/v/@youhaozhao/cninfo-mcp)](https://www.npmjs.com/package/@youhaozhao/cninfo-mcp)
4
4
 
5
- 通过 MCP 协议查询和下载巨潮资讯网上市公司年报的工具,适用于 Claude Desktop。
5
+ 通过 MCP 协议查询和下载巨潮资讯网上市公司年报 PDF 的工具,适用于 Claude Desktop。
6
6
 
7
7
  ## 使用方法
8
8
 
@@ -35,6 +35,7 @@
35
35
  ```
36
36
  查询 000888 的 2024 年报
37
37
  下载 688777 的年报
38
+ 查询 920185 的年报 # 北交所,新旧代码(如 835185)均可
38
39
  ```
39
40
 
40
41
  ## 系统要求
@@ -44,9 +45,10 @@
44
45
 
45
46
  ## 数据来源
46
47
 
47
- [巨潮资讯网](https://www.cninfo.com.cn) — 支持沪深两市及科创板
48
+ [巨潮资讯网](https://www.cninfo.com.cn) — 支持沪深两市(主板、创业板、科创板)及北京证券交易所(北交所)
48
49
 
49
50
  ## Credits
50
51
 
51
52
  爬虫逻辑基于 [gaodechen/cninfo_process](https://github.com/gaodechen/cninfo_process)。
52
53
 
54
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@youhaozhao/cninfo-mcp",
3
- "version": "1.0.7",
3
+ "version": "1.2.0",
4
4
  "description": "MCP Server for querying and downloading Chinese listed companies' annual reports from CNINFO (巨潮资讯网)",
5
5
  "keywords": [
6
6
  "mcp",
@@ -45,9 +45,11 @@
45
45
  ],
46
46
  "files": [
47
47
  "bin/",
48
- "python/",
49
- "scripts/",
48
+ "python/mcp_server.py",
49
+ "python/spider.py",
50
+ "python/requirements.txt",
51
+ "scripts/install-python-deps.js",
50
52
  "README.md",
51
53
  "LICENSE"
52
54
  ]
53
- }
55
+ }
package/python/spider.py CHANGED
@@ -18,6 +18,20 @@ _saving_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pdf")
18
18
  saving_path = _saving_path + "/"
19
19
  logger = logging.getLogger(__name__)
20
20
 
21
+ # 巨潮资讯历史公告的实际下限约为 2001 会计年度,再往前查询无数据返回。
22
+ EARLIEST_DATE = "2001-01-01"
23
+ # 接口单页最大返回条数
24
+ PAGE_SIZE = 30
25
+ # 翻页安全上限,防止异常情况下无限循环
26
+ MAX_PAGES = 100
27
+ # 巨潮搜索联想接口:把股票代码/简称解析为 orgId(北交所查询必需)
28
+ TOP_SEARCH_URL = "http://www.cninfo.com.cn/new/information/topSearch/query"
29
+ # 公告查询接口
30
+ QUERY_URL = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
31
+ # 瞬时失败(网络抖动/限流)的重试次数与退避基数(秒)
32
+ MAX_RETRIES = 3
33
+ RETRY_BACKOFF = 1.0
34
+
21
35
  User_Agent = [
22
36
  "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
23
37
  "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
@@ -48,6 +62,85 @@ def _build_headers() -> dict:
48
62
  return headers
49
63
 
50
64
 
65
+ def _post_json(url: str, data: dict) -> dict:
66
+ """POST 请求并解析 JSON,仅对可重试的瞬时失败按指数退避重试。
67
+
68
+ 巨潮接口在批量/高频访问下会偶发超时或限流,导致本可成功的查询失败。
69
+ 可重试:网络异常(超时/连接错误)、5xx、429,以及空/截断响应导致的
70
+ JSON 解析失败(requests 会抛 JSONDecodeError,实测多为限流时返回空体)。
71
+ 不可重试:4xx(除 429)等客户端错误,快速失败,避免无谓的退避等待。
72
+ 重试用尽后抛出最后一次异常,交由调用方的 try/except 记录并降级。
73
+ """
74
+ last_exc = None
75
+ for attempt in range(MAX_RETRIES):
76
+ try:
77
+ resp = requests.post(
78
+ url, headers=_build_headers(), data=data, timeout=30
79
+ )
80
+ resp.raise_for_status()
81
+ return resp.json()
82
+ except requests.exceptions.HTTPError as e:
83
+ status = e.response.status_code if e.response is not None else None
84
+ # 4xx(429 除外)不会自愈,立即失败,不浪费退避等待
85
+ if status is not None and status != 429 and 400 <= status < 500:
86
+ raise
87
+ last_exc = e
88
+ except requests.exceptions.RequestException as e:
89
+ # 网络异常 + JSONDecodeError(空/截断响应,多为瞬时限流)
90
+ last_exc = e
91
+ if attempt < MAX_RETRIES - 1:
92
+ time.sleep(RETRY_BACKOFF * (2**attempt) + random.random())
93
+ if last_exc is not None:
94
+ raise last_exc
95
+ raise RuntimeError(f"_post_json 未执行任何请求(MAX_RETRIES={MAX_RETRIES})")
96
+
97
+
98
+ def _query_announcements(query: dict) -> list:
99
+ """调用公告查询接口并返回 announcements 列表(带重试)。"""
100
+ result = _post_json(QUERY_URL, query)
101
+ if result and "announcements" in result and result["announcements"]:
102
+ return result["announcements"]
103
+ return []
104
+
105
+
106
+ def _is_bse_code(stock_code) -> bool:
107
+ """判断是否为北交所代码。
108
+
109
+ 北交所代码段:4xxxxx / 8xxxxx(原新三板平移),以及 92xxxx 标准段
110
+ (920,2024-04 起启用,预留 920-929)。这里特意只匹配 92 而非整个 9
111
+ 开头,以排除沪市 B 股 900xxx,避免为其多发一次无效的北交所查询。
112
+ """
113
+ digits = re.sub(r"\D", "", str(stock_code or ""))
114
+ return digits[:1] in ("4", "8") or digits[:2] == "92"
115
+
116
+
117
+ def _resolve_org_id(stock_code) -> Optional[tuple]:
118
+ """通过巨潮搜索联想接口把股票代码解析为 (code, orgId)。
119
+
120
+ 北交所的 hisAnnouncement 接口不接受 searchkey 或裸代码,必须以
121
+ stock="代码,orgId" 的形式查询,因此需要先解析 orgId。
122
+ 优先返回 code 完全等于输入的条目;找不到精确匹配则取第一条
123
+ (同一公司新旧代码共用同一 orgId)。无结果返回 None。
124
+ """
125
+ try:
126
+ hits = _post_json(TOP_SEARCH_URL, {"keyWord": stock_code, "maxNum": 10})
127
+ except Exception as e:
128
+ logger.warning("orgId 解析失败(%s): %s", stock_code, e)
129
+ return None
130
+
131
+ if not isinstance(hits, list) or not hits:
132
+ return None
133
+
134
+ target = re.sub(r"\D", "", str(stock_code or ""))
135
+ for it in hits:
136
+ if str(it.get("code")) == target and it.get("orgId"):
137
+ return str(it.get("code")), str(it.get("orgId"))
138
+ first = hits[0]
139
+ if first.get("orgId"):
140
+ return str(first.get("code")), str(first.get("orgId"))
141
+ return None
142
+
143
+
51
144
  def _date_range(start_date: str) -> str:
52
145
  """构造查询时间区间,结束日期取当天,避免硬编码过期。"""
53
146
  datetime.datetime.strptime(start_date, "%Y-%m-%d")
@@ -55,6 +148,26 @@ def _date_range(start_date: str) -> str:
55
148
  return f"{start_date}~{today}"
56
149
 
57
150
 
151
+ def _paginate(fetch_fn, stock):
152
+ """
153
+ 对单页查询函数翻页,汇总所有页的公告。
154
+
155
+ 巨潮接口单页最多返回 PAGE_SIZE 条,放开时间区间后历史年报会跨越多页,
156
+ 必须翻页才能取全。以“返回数量不足一页”作为终止条件,并设安全上限。
157
+ """
158
+ all_items = []
159
+ for page in range(1, MAX_PAGES + 1):
160
+ items = fetch_fn(page, stock)
161
+ if not items:
162
+ break
163
+ all_items.extend(items)
164
+ if len(items) < PAGE_SIZE: # 不足一页说明已到最后一页
165
+ break
166
+ else:
167
+ logger.warning("翻页达到上限 %s,结果可能被截断(%s)", MAX_PAGES, stock)
168
+ return all_items
169
+
170
+
58
171
  def _is_annual_report_title(
59
172
  title: str, year_filter: Optional[Union[int, str]] = None
60
173
  ) -> bool:
@@ -98,10 +211,9 @@ def _is_annual_report_title(
98
211
 
99
212
  # 深市 年度报告
100
213
  def szseAnnual(page, stock):
101
- query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
102
214
  query = {
103
215
  "pageNum": page, # 页码
104
- "pageSize": 30,
216
+ "pageSize": PAGE_SIZE,
105
217
  "tabName": "fulltext",
106
218
  "column": "szse", # 深交所
107
219
  "stock": "",
@@ -110,24 +222,17 @@ def szseAnnual(page, stock):
110
222
  "plate": "sz",
111
223
  "category": "category_ndbg_szsh", # 年度报告
112
224
  "trade": "",
113
- "seDate": _date_range("2020-01-01"), # 时间区间
225
+ "seDate": _date_range(EARLIEST_DATE), # 时间区间
114
226
  }
115
227
 
116
- namelist = requests.post(
117
- query_path, headers=_build_headers(), data=query, timeout=30
118
- )
119
- result = namelist.json()
120
- if result and "announcements" in result and result["announcements"]:
121
- return result["announcements"]
122
- return []
228
+ return _query_announcements(query)
123
229
 
124
230
 
125
231
  # 沪市 年度报告
126
232
  def sseAnnual(page, stock):
127
- query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
128
233
  query = {
129
234
  "pageNum": page, # 页码
130
- "pageSize": 30,
235
+ "pageSize": PAGE_SIZE,
131
236
  "tabName": "fulltext",
132
237
  "column": "sse",
133
238
  "stock": "",
@@ -136,24 +241,41 @@ def sseAnnual(page, stock):
136
241
  "plate": "sh",
137
242
  "category": "category_ndbg_szsh", # 年度报告
138
243
  "trade": "",
139
- "seDate": _date_range("2020-01-01"), # 时间区间
244
+ "seDate": _date_range(EARLIEST_DATE), # 时间区间
140
245
  }
141
246
 
142
- namelist = requests.post(
143
- query_path, headers=_build_headers(), data=query, timeout=30
144
- )
145
- result = namelist.json()
146
- if result and "announcements" in result and result["announcements"]:
147
- return result["announcements"]
148
- return []
247
+ return _query_announcements(query)
248
+
249
+
250
+ # 北交所 年度报告
251
+ def bseAnnual(page, stock):
252
+ """北交所年报查询。
253
+
254
+ stock 形如 "代码,orgId",由 _resolve_org_id 解析得到。北交所必须
255
+ 通过 plate=bj + stock="代码,orgId" 查询,searchkey/裸代码均返回空。
256
+ """
257
+ query = {
258
+ "pageNum": page, # 页码
259
+ "pageSize": PAGE_SIZE,
260
+ "tabName": "fulltext",
261
+ "column": "bj", # 北交所
262
+ "stock": stock, # 必须为 "代码,orgId"
263
+ "searchkey": "",
264
+ "secid": "",
265
+ "plate": "bj",
266
+ "category": "category_ndbg_szsh", # 年度报告
267
+ "trade": "",
268
+ "seDate": _date_range(EARLIEST_DATE), # 时间区间
269
+ }
270
+
271
+ return _query_announcements(query)
149
272
 
150
273
 
151
274
  # 深市 招股
152
275
  def szseStock(page, stock):
153
- query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
154
276
  query = {
155
277
  "pageNum": page, # 页码
156
- "pageSize": 30,
278
+ "pageSize": PAGE_SIZE,
157
279
  "tabName": "fulltext",
158
280
  "column": "szse",
159
281
  "stock": "",
@@ -162,24 +284,17 @@ def szseStock(page, stock):
162
284
  "plate": "sz",
163
285
  "category": "",
164
286
  "trade": "",
165
- "seDate": _date_range("2015-01-01"), # 时间区间
287
+ "seDate": _date_range(EARLIEST_DATE), # 时间区间
166
288
  }
167
289
 
168
- namelist = requests.post(
169
- query_path, headers=_build_headers(), data=query, timeout=30
170
- )
171
- result = namelist.json()
172
- if result and "announcements" in result and result["announcements"]:
173
- return result["announcements"]
174
- return []
290
+ return _query_announcements(query)
175
291
 
176
292
 
177
293
  # 沪市 招股
178
294
  def sseStock(page, stock):
179
- query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
180
295
  query = {
181
296
  "pageNum": page, # 页码
182
- "pageSize": 30,
297
+ "pageSize": PAGE_SIZE,
183
298
  "tabName": "fulltext",
184
299
  "column": "sse",
185
300
  "stock": "",
@@ -188,16 +303,10 @@ def sseStock(page, stock):
188
303
  "plate": "sh",
189
304
  "category": "",
190
305
  "trade": "",
191
- "seDate": _date_range("2015-01-01"), # 时间区间
306
+ "seDate": _date_range(EARLIEST_DATE), # 时间区间
192
307
  }
193
308
 
194
- namelist = requests.post(
195
- query_path, headers=_build_headers(), data=query, timeout=30
196
- )
197
- result = namelist.json()
198
- if result and "announcements" in result and result["announcements"]:
199
- return result["announcements"]
200
- return []
309
+ return _query_announcements(query)
201
310
 
202
311
 
203
312
  def Download(
@@ -271,13 +380,13 @@ def query_prospectus(stock_code):
271
380
  all_announcements = []
272
381
 
273
382
  try:
274
- announcements_sse = sseStock(1, stock_code)
383
+ announcements_sse = _paginate(sseStock, stock_code)
275
384
  all_announcements.extend(announcements_sse)
276
385
  except Exception as e:
277
386
  logger.warning("沪市招股书查询失败: %s", e)
278
387
 
279
388
  try:
280
- announcements_szse = szseStock(1, stock_code)
389
+ announcements_szse = _paginate(szseStock, stock_code)
281
390
  all_announcements.extend(announcements_szse)
282
391
  except Exception as e:
283
392
  logger.warning("深市招股书查询失败: %s", e)
@@ -323,18 +432,30 @@ def query_annual_reports(stock_code, year=None):
323
432
 
324
433
  # 查询沪市
325
434
  try:
326
- announcements_sse = sseAnnual(1, stock_code)
435
+ announcements_sse = _paginate(sseAnnual, stock_code)
327
436
  all_announcements.extend(announcements_sse)
328
437
  except Exception as e:
329
438
  logger.warning("沪市年报查询失败: %s", e)
330
439
 
331
440
  # 查询深市
332
441
  try:
333
- announcements_szse = szseAnnual(1, stock_code)
442
+ announcements_szse = _paginate(szseAnnual, stock_code)
334
443
  all_announcements.extend(announcements_szse)
335
444
  except Exception as e:
336
445
  logger.warning("深市年报查询失败: %s", e)
337
446
 
447
+ # 查询北交所(代码以 4/8/9 开头)。北交所接口必须用 orgId,
448
+ # 故先解析 orgId 再以 stock="代码,orgId" 翻页查询。
449
+ if _is_bse_code(stock_code):
450
+ try:
451
+ resolved = _resolve_org_id(stock_code)
452
+ if resolved:
453
+ code, org_id = resolved
454
+ announcements_bse = _paginate(bseAnnual, f"{code},{org_id}")
455
+ all_announcements.extend(announcements_bse)
456
+ except Exception as e:
457
+ logger.warning("北交所年报查询失败: %s", e)
458
+
338
459
  # 按年份过滤
339
460
  if year:
340
461
  year_expr = re.escape(str(year))