@youhaozhao/cninfo-mcp 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -35,6 +35,7 @@
35
35
  ```
36
36
  查询 000888 的 2024 年报
37
37
  下载 688777 的年报
38
+ 查询 920185 的年报 # 北交所,新旧代码(如 835185)均可
38
39
  ```
39
40
 
40
41
  ## 系统要求
@@ -44,7 +45,7 @@
44
45
 
45
46
  ## 数据来源
46
47
 
47
- [巨潮资讯网](https://www.cninfo.com.cn) — 支持沪深两市及科创板
48
+ [巨潮资讯网](https://www.cninfo.com.cn) — 支持沪深两市(主板、创业板、科创板)及北京证券交易所(北交所)
48
49
 
49
50
  ## Credits
50
51
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@youhaozhao/cninfo-mcp",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "MCP Server for querying and downloading Chinese listed companies' annual reports from CNINFO (巨潮资讯网)",
5
5
  "keywords": [
6
6
  "mcp",
@@ -45,8 +45,10 @@
45
45
  ],
46
46
  "files": [
47
47
  "bin/",
48
- "python/",
49
- "scripts/",
48
+ "python/mcp_server.py",
49
+ "python/spider.py",
50
+ "python/requirements.txt",
51
+ "scripts/install-python-deps.js",
50
52
  "README.md",
51
53
  "LICENSE"
52
54
  ]
package/python/spider.py CHANGED
@@ -24,6 +24,13 @@ EARLIEST_DATE = "2001-01-01"
24
24
  PAGE_SIZE = 30
25
25
  # 翻页安全上限,防止异常情况下无限循环
26
26
  MAX_PAGES = 100
27
+ # 巨潮搜索联想接口:把股票代码/简称解析为 orgId(北交所查询必需)
28
+ TOP_SEARCH_URL = "http://www.cninfo.com.cn/new/information/topSearch/query"
29
+ # 公告查询接口
30
+ QUERY_URL = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
31
+ # 瞬时失败(网络抖动/限流)的重试次数与退避基数(秒)
32
+ MAX_RETRIES = 3
33
+ RETRY_BACKOFF = 1.0
27
34
 
28
35
  User_Agent = [
29
36
  "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
@@ -55,6 +62,85 @@ def _build_headers() -> dict:
55
62
  return headers
56
63
 
57
64
 
65
+ def _post_json(url: str, data: dict) -> dict:
66
+ """POST 请求并解析 JSON,仅对可重试的瞬时失败按指数退避重试。
67
+
68
+ 巨潮接口在批量/高频访问下会偶发超时或限流,导致本可成功的查询失败。
69
+ 可重试:网络异常(超时/连接错误)、5xx、429,以及空/截断响应导致的
70
+ JSON 解析失败(requests 会抛 JSONDecodeError,实测多为限流时返回空体)。
71
+ 不可重试:4xx(除 429)等客户端错误,快速失败,避免无谓的退避等待。
72
+ 重试用尽后抛出最后一次异常,交由调用方的 try/except 记录并降级。
73
+ """
74
+ last_exc = None
75
+ for attempt in range(MAX_RETRIES):
76
+ try:
77
+ resp = requests.post(
78
+ url, headers=_build_headers(), data=data, timeout=30
79
+ )
80
+ resp.raise_for_status()
81
+ return resp.json()
82
+ except requests.exceptions.HTTPError as e:
83
+ status = e.response.status_code if e.response is not None else None
84
+ # 4xx(429 除外)不会自愈,立即失败,不浪费退避等待
85
+ if status is not None and status != 429 and 400 <= status < 500:
86
+ raise
87
+ last_exc = e
88
+ except requests.exceptions.RequestException as e:
89
+ # 网络异常 + JSONDecodeError(空/截断响应,多为瞬时限流)
90
+ last_exc = e
91
+ if attempt < MAX_RETRIES - 1:
92
+ time.sleep(RETRY_BACKOFF * (2**attempt) + random.random())
93
+ if last_exc is not None:
94
+ raise last_exc
95
+ raise RuntimeError(f"_post_json 未执行任何请求(MAX_RETRIES={MAX_RETRIES})")
96
+
97
+
98
+ def _query_announcements(query: dict) -> list:
99
+ """调用公告查询接口并返回 announcements 列表(带重试)。"""
100
+ result = _post_json(QUERY_URL, query)
101
+ if result and "announcements" in result and result["announcements"]:
102
+ return result["announcements"]
103
+ return []
104
+
105
+
106
+ def _is_bse_code(stock_code) -> bool:
107
+ """判断是否为北交所代码。
108
+
109
+ 北交所代码段:4xxxxx / 8xxxxx(原新三板平移),以及 92xxxx 标准段
110
+ (920,2024-04 起启用,预留 920-929)。这里特意只匹配 92 而非整个 9
111
+ 开头,以排除沪市 B 股 900xxx,避免为其多发一次无效的北交所查询。
112
+ """
113
+ digits = re.sub(r"\D", "", str(stock_code or ""))
114
+ return digits[:1] in ("4", "8") or digits[:2] == "92"
115
+
116
+
117
+ def _resolve_org_id(stock_code) -> Optional[tuple]:
118
+ """通过巨潮搜索联想接口把股票代码解析为 (code, orgId)。
119
+
120
+ 北交所的 hisAnnouncement 接口不接受 searchkey 或裸代码,必须以
121
+ stock="代码,orgId" 的形式查询,因此需要先解析 orgId。
122
+ 优先返回 code 完全等于输入的条目;找不到精确匹配则取第一条
123
+ (同一公司新旧代码共用同一 orgId)。无结果返回 None。
124
+ """
125
+ try:
126
+ hits = _post_json(TOP_SEARCH_URL, {"keyWord": stock_code, "maxNum": 10})
127
+ except Exception as e:
128
+ logger.warning("orgId 解析失败(%s): %s", stock_code, e)
129
+ return None
130
+
131
+ if not isinstance(hits, list) or not hits:
132
+ return None
133
+
134
+ target = re.sub(r"\D", "", str(stock_code or ""))
135
+ for it in hits:
136
+ if str(it.get("code")) == target and it.get("orgId"):
137
+ return str(it.get("code")), str(it.get("orgId"))
138
+ first = hits[0]
139
+ if first.get("orgId"):
140
+ return str(first.get("code")), str(first.get("orgId"))
141
+ return None
142
+
143
+
58
144
  def _date_range(start_date: str) -> str:
59
145
  """构造查询时间区间,结束日期取当天,避免硬编码过期。"""
60
146
  datetime.datetime.strptime(start_date, "%Y-%m-%d")
@@ -125,7 +211,6 @@ def _is_annual_report_title(
125
211
 
126
212
  # 深市 年度报告
127
213
  def szseAnnual(page, stock):
128
- query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
129
214
  query = {
130
215
  "pageNum": page, # 页码
131
216
  "pageSize": PAGE_SIZE,
@@ -140,18 +225,11 @@ def szseAnnual(page, stock):
140
225
  "seDate": _date_range(EARLIEST_DATE), # 时间区间
141
226
  }
142
227
 
143
- namelist = requests.post(
144
- query_path, headers=_build_headers(), data=query, timeout=30
145
- )
146
- result = namelist.json()
147
- if result and "announcements" in result and result["announcements"]:
148
- return result["announcements"]
149
- return []
228
+ return _query_announcements(query)
150
229
 
151
230
 
152
231
  # 沪市 年度报告
153
232
  def sseAnnual(page, stock):
154
- query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
155
233
  query = {
156
234
  "pageNum": page, # 页码
157
235
  "pageSize": PAGE_SIZE,
@@ -166,18 +244,35 @@ def sseAnnual(page, stock):
166
244
  "seDate": _date_range(EARLIEST_DATE), # 时间区间
167
245
  }
168
246
 
169
- namelist = requests.post(
170
- query_path, headers=_build_headers(), data=query, timeout=30
171
- )
172
- result = namelist.json()
173
- if result and "announcements" in result and result["announcements"]:
174
- return result["announcements"]
175
- return []
247
+ return _query_announcements(query)
248
+
249
+
250
+ # 北交所 年度报告
251
+ def bseAnnual(page, stock):
252
+ """北交所年报查询。
253
+
254
+ stock 形如 "代码,orgId",由 _resolve_org_id 解析得到。北交所必须
255
+ 通过 plate=bj + stock="代码,orgId" 查询,searchkey/裸代码均返回空。
256
+ """
257
+ query = {
258
+ "pageNum": page, # 页码
259
+ "pageSize": PAGE_SIZE,
260
+ "tabName": "fulltext",
261
+ "column": "bj", # 北交所
262
+ "stock": stock, # 必须为 "代码,orgId"
263
+ "searchkey": "",
264
+ "secid": "",
265
+ "plate": "bj",
266
+ "category": "category_ndbg_szsh", # 年度报告
267
+ "trade": "",
268
+ "seDate": _date_range(EARLIEST_DATE), # 时间区间
269
+ }
270
+
271
+ return _query_announcements(query)
176
272
 
177
273
 
178
274
  # 深市 招股
179
275
  def szseStock(page, stock):
180
- query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
181
276
  query = {
182
277
  "pageNum": page, # 页码
183
278
  "pageSize": PAGE_SIZE,
@@ -192,18 +287,11 @@ def szseStock(page, stock):
192
287
  "seDate": _date_range(EARLIEST_DATE), # 时间区间
193
288
  }
194
289
 
195
- namelist = requests.post(
196
- query_path, headers=_build_headers(), data=query, timeout=30
197
- )
198
- result = namelist.json()
199
- if result and "announcements" in result and result["announcements"]:
200
- return result["announcements"]
201
- return []
290
+ return _query_announcements(query)
202
291
 
203
292
 
204
293
  # 沪市 招股
205
294
  def sseStock(page, stock):
206
- query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
207
295
  query = {
208
296
  "pageNum": page, # 页码
209
297
  "pageSize": PAGE_SIZE,
@@ -218,13 +306,7 @@ def sseStock(page, stock):
218
306
  "seDate": _date_range(EARLIEST_DATE), # 时间区间
219
307
  }
220
308
 
221
- namelist = requests.post(
222
- query_path, headers=_build_headers(), data=query, timeout=30
223
- )
224
- result = namelist.json()
225
- if result and "announcements" in result and result["announcements"]:
226
- return result["announcements"]
227
- return []
309
+ return _query_announcements(query)
228
310
 
229
311
 
230
312
  def Download(
@@ -362,6 +444,18 @@ def query_annual_reports(stock_code, year=None):
362
444
  except Exception as e:
363
445
  logger.warning("深市年报查询失败: %s", e)
364
446
 
447
+ # 查询北交所(代码以 4/8/9 开头)。北交所接口必须用 orgId,
448
+ # 故先解析 orgId 再以 stock="代码,orgId" 翻页查询。
449
+ if _is_bse_code(stock_code):
450
+ try:
451
+ resolved = _resolve_org_id(stock_code)
452
+ if resolved:
453
+ code, org_id = resolved
454
+ announcements_bse = _paginate(bseAnnual, f"{code},{org_id}")
455
+ all_announcements.extend(announcements_bse)
456
+ except Exception as e:
457
+ logger.warning("北交所年报查询失败: %s", e)
458
+
365
459
  # 按年份过滤
366
460
  if year:
367
461
  year_expr = re.escape(str(year))
package/python/bridge.py DELETED
@@ -1,161 +0,0 @@
1
- #!/usr/bin/env python3
2
- """CLI bridge that exposes cninfo spider functions as JSON in/out for Node.js."""
3
-
4
- import json
5
- import os
6
- import sys
7
- import traceback
8
- from typing import Any, Optional
9
-
10
- sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
11
-
12
- from spider import ( # noqa: E402
13
- download_annual_reports,
14
- download_prospectus,
15
- query_annual_reports,
16
- query_prospectus,
17
- saving_path,
18
- )
19
-
20
-
21
- BASE_URL = "https://static.cninfo.com.cn/"
22
-
23
-
24
- def _format_reports(reports: list[dict]) -> list[dict]:
25
- formatted = []
26
- for report in reports:
27
- adj = report.get("adjunctUrl", "")
28
- formatted.append(
29
- {
30
- "announcementTitle": report.get("announcementTitle", ""),
31
- "announcementTime": report.get("announcementTime", ""),
32
- "secCode": report.get("secCode", ""),
33
- "secName": report.get("secName", ""),
34
- "adjunctUrl": BASE_URL + adj if adj else "",
35
- }
36
- )
37
- return formatted
38
-
39
-
40
- def _require_stock_code(payload: dict) -> str:
41
- stock_code = (payload.get("stock_code") or "").strip()
42
- if not stock_code:
43
- raise ValueError("stock_code is required")
44
- return stock_code
45
-
46
-
47
- def _optional_year(payload: dict) -> Optional[int]:
48
- year = payload.get("year")
49
- if year is None:
50
- return None
51
- return int(year)
52
-
53
-
54
- def _resolve_save_path(payload: dict) -> str:
55
- save_path = payload.get("save_path")
56
- return save_path if save_path else saving_path
57
-
58
-
59
- def action_query_annual_reports(payload: dict) -> dict:
60
- stock_code = _require_stock_code(payload)
61
- year = _optional_year(payload)
62
- reports = query_annual_reports(stock_code, year)
63
- suffix = f" for year {year}" if year else ""
64
- if not reports:
65
- return {
66
- "success": False,
67
- "stock_code": stock_code,
68
- "year": year,
69
- "count": 0,
70
- "reports": [],
71
- "message": f"No annual reports found for stock {stock_code}{suffix}",
72
- }
73
- return {
74
- "success": True,
75
- "stock_code": stock_code,
76
- "year": year,
77
- "count": len(reports),
78
- "reports": _format_reports(reports),
79
- "message": f"Found {len(reports)} annual report(s){suffix}",
80
- }
81
-
82
-
83
- def action_download_annual_reports(payload: dict) -> dict:
84
- stock_code = _require_stock_code(payload)
85
- year = _optional_year(payload)
86
- output_dir = _resolve_save_path(payload)
87
- os.makedirs(output_dir, exist_ok=True)
88
- result = download_annual_reports(stock_code, year, save_path=output_dir)
89
- result["stock_code"] = stock_code
90
- result["year"] = year
91
- return result
92
-
93
-
94
- def action_query_prospectus(payload: dict) -> dict:
95
- stock_code = _require_stock_code(payload)
96
- reports = query_prospectus(stock_code)
97
- if not reports:
98
- return {
99
- "success": False,
100
- "stock_code": stock_code,
101
- "count": 0,
102
- "reports": [],
103
- "message": f"No prospectus found for stock {stock_code}",
104
- }
105
- return {
106
- "success": True,
107
- "stock_code": stock_code,
108
- "count": len(reports),
109
- "reports": _format_reports(reports),
110
- "message": f"Found {len(reports)} prospectus document(s)",
111
- }
112
-
113
-
114
- def action_download_prospectus(payload: dict) -> dict:
115
- stock_code = _require_stock_code(payload)
116
- output_dir = _resolve_save_path(payload)
117
- os.makedirs(output_dir, exist_ok=True)
118
- result = download_prospectus(stock_code, save_path=output_dir)
119
- result["stock_code"] = stock_code
120
- return result
121
-
122
-
123
- ACTIONS = {
124
- "query_annual_reports": action_query_annual_reports,
125
- "download_annual_reports": action_download_annual_reports,
126
- "query_prospectus": action_query_prospectus,
127
- "download_prospectus": action_download_prospectus,
128
- }
129
-
130
-
131
- def main() -> int:
132
- if len(sys.argv) < 3:
133
- sys.stderr.write("usage: bridge.py <action> <json-payload>\n")
134
- return 2
135
-
136
- action = sys.argv[1]
137
- raw_payload = sys.argv[2]
138
-
139
- handler = ACTIONS.get(action)
140
- if handler is None:
141
- sys.stdout.write(json.dumps({"ok": False, "error": f"Unknown action: {action}"}))
142
- return 1
143
-
144
- try:
145
- payload = json.loads(raw_payload) if raw_payload else {}
146
- except json.JSONDecodeError as exc:
147
- sys.stdout.write(json.dumps({"ok": False, "error": f"Invalid JSON payload: {exc}"}))
148
- return 1
149
-
150
- try:
151
- result: Any = handler(payload)
152
- sys.stdout.write(json.dumps({"ok": True, "payload": result}, ensure_ascii=False))
153
- return 0
154
- except Exception as exc:
155
- sys.stderr.write(traceback.format_exc())
156
- sys.stdout.write(json.dumps({"ok": False, "error": str(exc)}))
157
- return 1
158
-
159
-
160
- if __name__ == "__main__":
161
- raise SystemExit(main())