@youhaozhao/cninfo-mcp 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@youhaozhao/cninfo-mcp",
3
- "version": "1.0.2",
3
+ "version": "1.0.3",
4
4
  "description": "MCP Server for querying and downloading Chinese listed companies' annual reports from CNINFO (巨潮资讯网)",
5
5
  "keywords": [
6
6
  "mcp",
@@ -20,7 +20,7 @@
20
20
  "url": "https://github.com/youhaozhao/cninfo_mcp/issues"
21
21
  },
22
22
  "bin": {
23
- "cninfo-mcp": "./bin/cninfo-mcp.js"
23
+ "cninfo-mcp": "bin/cninfo-mcp.js"
24
24
  },
25
25
  "scripts": {
26
26
  "postinstall": "node scripts/install-python-deps.js",
@@ -12,20 +12,23 @@ from typing import Optional
12
12
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
13
13
 
14
14
  from mcp.server import FastMCP
15
- from spider import query_annual_reports, download_annual_reports, saving_path
15
+ from spider import (
16
+ query_annual_reports,
17
+ download_annual_reports,
18
+ query_prospectus,
19
+ download_prospectus,
20
+ saving_path,
21
+ )
16
22
 
17
23
  # Create MCP server
18
24
  mcp = FastMCP(
19
25
  name="cninfo-server",
20
- instructions="CNINFO annual reports server - Query and download Chinese listed companies' annual reports from cninfo.com.cn"
26
+ instructions="CNINFO annual reports server - Query and download Chinese listed companies' annual reports from cninfo.com.cn",
21
27
  )
22
28
 
23
29
 
24
30
  @mcp.tool()
25
- def query_annual_reports_tool(
26
- stock_code: str,
27
- year: Optional[int] = None
28
- ) -> dict:
31
+ def query_annual_reports_tool(stock_code: str, year: Optional[int] = None) -> dict:
29
32
  """
30
33
  Query annual reports for a Chinese listed company
31
34
 
@@ -46,57 +49,59 @@ def query_annual_reports_tool(
46
49
 
47
50
  if not reports:
48
51
  return {
49
- 'success': False,
50
- 'stock_code': stock_code,
51
- 'year': year,
52
- 'count': 0,
53
- 'reports': [],
54
- 'message': f'No annual reports found for stock {stock_code}' + (f' in year {year}' if year else '')
52
+ "success": False,
53
+ "stock_code": stock_code,
54
+ "year": year,
55
+ "count": 0,
56
+ "reports": [],
57
+ "message": f"No annual reports found for stock {stock_code}"
58
+ + (f" in year {year}" if year else ""),
55
59
  }
56
60
 
57
61
  # Extract relevant information
58
62
  report_details = []
59
63
  for report in reports:
60
- report_details.append({
61
- 'announcementTitle': report.get('announcementTitle', ''),
62
- 'announcementTime': report.get('announcementTime', ''),
63
- 'secCode': report.get('secCode', ''),
64
- 'secName': report.get('secName', ''),
65
- 'adjunctUrl': report.get('adjunctUrl', '')
66
- })
64
+ report_details.append(
65
+ {
66
+ "announcementTitle": report.get("announcementTitle", ""),
67
+ "announcementTime": report.get("announcementTime", ""),
68
+ "secCode": report.get("secCode", ""),
69
+ "secName": report.get("secName", ""),
70
+ "adjunctUrl": report.get("adjunctUrl", ""),
71
+ }
72
+ )
67
73
 
68
74
  return {
69
- 'success': True,
70
- 'stock_code': stock_code,
71
- 'year': year,
72
- 'count': len(reports),
73
- 'reports': report_details,
74
- 'message': f'Found {len(reports)} annual report(s)' + (f' for year {year}' if year else '')
75
+ "success": True,
76
+ "stock_code": stock_code,
77
+ "year": year,
78
+ "count": len(reports),
79
+ "reports": report_details,
80
+ "message": f"Found {len(reports)} annual report(s)"
81
+ + (f" for year {year}" if year else ""),
75
82
  }
76
83
 
77
84
  except Exception as e:
78
85
  return {
79
- 'success': False,
80
- 'stock_code': stock_code,
81
- 'year': year,
82
- 'count': 0,
83
- 'reports': [],
84
- 'error': str(e),
85
- 'message': f'Error querying annual reports: {str(e)}'
86
+ "success": False,
87
+ "stock_code": stock_code,
88
+ "year": year,
89
+ "count": 0,
90
+ "reports": [],
91
+ "error": str(e),
92
+ "message": f"Error querying annual reports: {str(e)}",
86
93
  }
87
94
 
88
95
 
89
96
  @mcp.tool()
90
- def download_annual_reports_tool(
91
- stock_code: str,
92
- year: Optional[int] = None
93
- ) -> dict:
97
+ def download_annual_reports_tool(stock_code: str, year: Optional[int] = None, save_path: Optional[str] = None) -> dict:
94
98
  """
95
99
  Download annual reports for a Chinese listed company
96
100
 
97
101
  Args:
98
102
  stock_code: Stock code (e.g., '000888' for 峨眉山, '688777' for 中科德芯)
99
103
  year: Optional year to filter (e.g., 2024). If not provided, downloads all available years
104
+ save_path: Optional directory to save files (e.g., '/Users/me/reports'). Defaults to pdf/ in package directory
100
105
 
101
106
  Returns:
102
107
  Dictionary containing:
@@ -108,26 +113,118 @@ def download_annual_reports_tool(
108
113
  - message: Status message
109
114
  """
110
115
  try:
111
- # Ensure download directory exists
112
- os.makedirs(saving_path, exist_ok=True)
116
+ output_dir = save_path or saving_path
117
+ os.makedirs(output_dir, exist_ok=True)
118
+
119
+ result = download_annual_reports(stock_code, year, save_path=output_dir)
120
+ result["stock_code"] = stock_code
121
+ result["year"] = year
122
+
123
+ return result
124
+
125
+ except Exception as e:
126
+ return {
127
+ "success": False,
128
+ "stock_code": stock_code,
129
+ "year": year,
130
+ "downloaded": 0,
131
+ "path": save_path or saving_path,
132
+ "error": str(e),
133
+ "message": f"Error downloading annual reports: {str(e)}",
134
+ }
135
+
136
+
137
+ @mcp.tool()
138
+ def query_prospectus_tool(stock_code: str) -> dict:
139
+ """
140
+ Query prospectus documents for a Chinese listed company
141
+
142
+ Args:
143
+ stock_code: Stock code (e.g., '000888' for 峨眉山, '688777' for 中科德芯)
144
+
145
+ Returns:
146
+ Dictionary containing:
147
+ - success: Boolean indicating if the query was successful
148
+ - stock_code: The queried stock code
149
+ - count: Number of documents found
150
+ - reports: List of document details (announcementTitle, announcementTime, secCode, secName)
151
+ """
152
+ try:
153
+ reports = query_prospectus(stock_code)
154
+
155
+ if not reports:
156
+ return {
157
+ "success": False,
158
+ "stock_code": stock_code,
159
+ "count": 0,
160
+ "reports": [],
161
+ "message": f"No prospectus found for stock {stock_code}",
162
+ }
113
163
 
114
- result = download_annual_reports(stock_code, year)
164
+ report_details = [
165
+ {
166
+ "announcementTitle": r.get("announcementTitle", ""),
167
+ "announcementTime": r.get("announcementTime", ""),
168
+ "secCode": r.get("secCode", ""),
169
+ "secName": r.get("secName", ""),
170
+ "adjunctUrl": r.get("adjunctUrl", ""),
171
+ }
172
+ for r in reports
173
+ ]
174
+
175
+ return {
176
+ "success": True,
177
+ "stock_code": stock_code,
178
+ "count": len(reports),
179
+ "reports": report_details,
180
+ "message": f"Found {len(reports)} prospectus document(s)",
181
+ }
182
+
183
+ except Exception as e:
184
+ return {
185
+ "success": False,
186
+ "stock_code": stock_code,
187
+ "count": 0,
188
+ "reports": [],
189
+ "error": str(e),
190
+ "message": f"Error querying prospectus: {str(e)}",
191
+ }
192
+
193
+
194
+ @mcp.tool()
195
+ def download_prospectus_tool(stock_code: str, save_path: Optional[str] = None) -> dict:
196
+ """
197
+ Download prospectus documents for a Chinese listed company
198
+
199
+ Args:
200
+ stock_code: Stock code (e.g., '000888' for 峨眉山, '688777' for 中科德芯)
201
+ save_path: Optional directory to save files (e.g., '/Users/me/reports'). Defaults to pdf/ in package directory
202
+
203
+ Returns:
204
+ Dictionary containing:
205
+ - success: Boolean indicating if download was successful
206
+ - stock_code: The stock code
207
+ - downloaded: Number of files downloaded
208
+ - path: Directory where files were saved
209
+ - message: Status message
210
+ """
211
+ try:
212
+ output_dir = save_path or saving_path
213
+ os.makedirs(output_dir, exist_ok=True)
115
214
 
116
- # Add path information
117
- result['stock_code'] = stock_code
118
- result['year'] = year
215
+ result = download_prospectus(stock_code, save_path=output_dir)
216
+ result["stock_code"] = stock_code
119
217
 
120
218
  return result
121
219
 
122
220
  except Exception as e:
123
221
  return {
124
- 'success': False,
125
- 'stock_code': stock_code,
126
- 'year': year,
127
- 'downloaded': 0,
128
- 'path': saving_path,
129
- 'error': str(e),
130
- 'message': f'Error downloading annual reports: {str(e)}'
222
+ "success": False,
223
+ "stock_code": stock_code,
224
+ "downloaded": 0,
225
+ "path": save_path or saving_path,
226
+ "error": str(e),
227
+ "message": f"Error downloading prospectus: {str(e)}",
131
228
  }
132
229
 
133
230
 
@@ -151,9 +248,9 @@ def get_annual_reports_list(stock_code: str) -> str:
151
248
  output = [f"Annual Reports for {stock_code}:", "=" * 60]
152
249
 
153
250
  for report in reports:
154
- title = report.get('announcementTitle', 'N/A')
155
- time = report.get('announcementTime', 'N/A')
156
- name = report.get('secName', 'N/A')
251
+ title = report.get("announcementTitle", "N/A")
252
+ time = report.get("announcementTime", "N/A")
253
+ name = report.get("secName", "N/A")
157
254
  output.append(f"\n📄 {title}")
158
255
  output.append(f" Company: {name}")
159
256
  output.append(f" Date: {time}")
package/python/spider.py CHANGED
@@ -1,17 +1,18 @@
1
1
  """
2
- downloads:
3
- 公开招股书(招股说明书/招股意向书)
4
- 《年度报告》 16 17 18
2
+ downloads:
3
+ 公开招股书(招股说明书/招股意向书)
5
4
  """
5
+
6
6
  import os
7
7
  import random
8
8
  import time
9
+
9
10
  import requests
10
11
 
11
- download_path = 'https://static.cninfo.com.cn/'
12
+ download_path = "https://static.cninfo.com.cn/"
12
13
  # 使用脚本所在目录的相对路径
13
- _saving_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'pdf')
14
- saving_path = _saving_path + '/'
14
+ _saving_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pdf")
15
+ saving_path = _saving_path + "/"
15
16
 
16
17
  User_Agent = [
17
18
  "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
@@ -20,163 +21,170 @@ User_Agent = [
20
21
  "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
21
22
  "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
22
23
  "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
23
- "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0"
24
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
24
25
  ]
25
26
 
26
27
 
27
- headers = {'Accept': 'application/json, text/javascript, */*; q=0.01',
28
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
29
- "Accept-Encoding": "gzip, deflate",
30
- "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-HK;q=0.6,zh-TW;q=0.5",
31
- 'Host': 'www.cninfo.com.cn',
32
- 'Origin': 'http://www.cninfo.com.cn',
33
- 'Referer': 'http://www.cninfo.com.cn/new/commonUrl?url=disclosure/list/notice',
34
- 'X-Requested-With': 'XMLHttpRequest'
35
- }
36
-
37
-
28
+ headers = {
29
+ "Accept": "application/json, text/javascript, */*; q=0.01",
30
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
31
+ "Accept-Encoding": "gzip, deflate",
32
+ "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-HK;q=0.6,zh-TW;q=0.5",
33
+ "Host": "www.cninfo.com.cn",
34
+ "Origin": "http://www.cninfo.com.cn",
35
+ "Referer": "http://www.cninfo.com.cn/new/commonUrl?url=disclosure/list/notice",
36
+ "X-Requested-With": "XMLHttpRequest",
37
+ }
38
38
 
39
39
 
40
40
  # 深市 年度报告
41
41
  def szseAnnual(page, stock):
42
- query_path = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
43
- headers['User-Agent'] = random.choice(User_Agent) # 定义User_Agent
44
- query = {'pageNum': page, # 页码
45
- 'pageSize': 30,
46
- 'tabName': 'fulltext',
47
- 'column': 'szse', # 深交所
48
- 'stock': '',
49
- 'searchkey': stock, # 使用searchkey查询股票代码或公司名
50
- 'secid': '',
51
- 'plate': 'sz',
52
- 'category': 'category_ndbg_szsh', # 年度报告
53
- 'trade': '',
54
- 'seDate': '2020-01-01~2026-02-15' # 时间区间
55
- }
42
+ query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
43
+ headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
44
+ query = {
45
+ "pageNum": page, # 页码
46
+ "pageSize": 30,
47
+ "tabName": "fulltext",
48
+ "column": "szse", # 深交所
49
+ "stock": "",
50
+ "searchkey": stock, # 使用searchkey查询股票代码或公司名
51
+ "secid": "",
52
+ "plate": "sz",
53
+ "category": "category_ndbg_szsh", # 年度报告
54
+ "trade": "",
55
+ "seDate": "2020-01-01~2026-02-15", # 时间区间
56
+ }
56
57
 
57
58
  namelist = requests.post(query_path, headers=headers, data=query)
58
59
  result = namelist.json()
59
- if result and 'announcements' in result and result['announcements']:
60
- return result['announcements']
60
+ if result and "announcements" in result and result["announcements"]:
61
+ return result["announcements"]
61
62
  return []
62
63
 
63
64
 
64
65
  # 沪市 年度报告
65
66
  def sseAnnual(page, stock):
66
- query_path = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
67
- headers['User-Agent'] = random.choice(User_Agent) # 定义User_Agent
68
- query = {'pageNum': page, # 页码
69
- 'pageSize': 30,
70
- 'tabName': 'fulltext',
71
- 'column': 'sse',
72
- 'stock': '',
73
- 'searchkey': stock, # 使用searchkey查询股票代码或公司名
74
- 'secid': '',
75
- 'plate': 'sh',
76
- 'category': 'category_ndbg_szsh', # 年度报告
77
- 'trade': '',
78
- 'seDate': '2020-01-01~2026-02-15' # 时间区间
79
- }
67
+ query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
68
+ headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
69
+ query = {
70
+ "pageNum": page, # 页码
71
+ "pageSize": 30,
72
+ "tabName": "fulltext",
73
+ "column": "sse",
74
+ "stock": "",
75
+ "searchkey": stock, # 使用searchkey查询股票代码或公司名
76
+ "secid": "",
77
+ "plate": "sh",
78
+ "category": "category_ndbg_szsh", # 年度报告
79
+ "trade": "",
80
+ "seDate": "2020-01-01~2026-02-15", # 时间区间
81
+ }
80
82
 
81
83
  namelist = requests.post(query_path, headers=headers, data=query)
82
84
  result = namelist.json()
83
- if result and 'announcements' in result and result['announcements']:
84
- return result['announcements']
85
+ if result and "announcements" in result and result["announcements"]:
86
+ return result["announcements"]
85
87
  return []
86
88
 
87
89
 
88
90
  # 深市 招股
89
91
  def szseStock(page, stock):
90
- query_path = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
91
- headers['User-Agent'] = random.choice(User_Agent) # 定义User_Agent
92
- query = {'pageNum': page, # 页码
93
- 'pageSize': 30,
94
- 'tabName': 'fulltext',
95
- 'column': 'szse',
96
- 'stock': '',
97
- 'searchkey': stock + ' 招股', # 组合搜索:股票代码 + 招股
98
- 'secid': '',
99
- 'plate': 'sz',
100
- 'category': '',
101
- 'trade': '',
102
- 'seDate': '2015-01-01~2026-02-15' # 时间区间
103
- }
92
+ query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
93
+ headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
94
+ query = {
95
+ "pageNum": page, # 页码
96
+ "pageSize": 30,
97
+ "tabName": "fulltext",
98
+ "column": "szse",
99
+ "stock": "",
100
+ "searchkey": stock + " 招股", # 组合搜索:股票代码 + 招股
101
+ "secid": "",
102
+ "plate": "sz",
103
+ "category": "",
104
+ "trade": "",
105
+ "seDate": "2015-01-01~2026-02-15", # 时间区间
106
+ }
104
107
 
105
108
  namelist = requests.post(query_path, headers=headers, data=query)
106
109
  result = namelist.json()
107
- if result and 'announcements' in result and result['announcements']:
108
- return result['announcements']
110
+ if result and "announcements" in result and result["announcements"]:
111
+ return result["announcements"]
109
112
  return []
110
113
 
111
114
 
112
115
  # 沪市 招股
113
116
  def sseStock(page, stock):
114
- query_path = 'http://www.cninfo.com.cn/new/hisAnnouncement/query'
115
- headers['User-Agent'] = random.choice(User_Agent) # 定义User_Agent
116
- query = {'pageNum': page, # 页码
117
- 'pageSize': 30,
118
- 'tabName': 'fulltext',
119
- 'column': 'sse',
120
- 'stock': '',
121
- 'searchkey': stock + ' 招股', # 组合搜索:股票代码 + 招股
122
- 'secid': '',
123
- 'plate': 'sh',
124
- 'category': '',
125
- 'trade': '',
126
- 'seDate': '2015-01-01~2026-02-15' # 时间区间
127
- }
117
+ query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
118
+ headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
119
+ query = {
120
+ "pageNum": page, # 页码
121
+ "pageSize": 30,
122
+ "tabName": "fulltext",
123
+ "column": "sse",
124
+ "stock": "",
125
+ "searchkey": stock + " 招股", # 组合搜索:股票代码 + 招股
126
+ "secid": "",
127
+ "plate": "sh",
128
+ "category": "",
129
+ "trade": "",
130
+ "seDate": "2015-01-01~2026-02-15", # 时间区间
131
+ }
128
132
 
129
133
  namelist = requests.post(query_path, headers=headers, data=query)
130
134
  result = namelist.json()
131
- if result and 'announcements' in result and result['announcements']:
132
- return result['announcements']
135
+ if result and "announcements" in result and result["announcements"]:
136
+ return result["announcements"]
133
137
  return []
134
138
 
135
139
 
136
140
  # download PDF
137
- def Download(single_page, year_filter=None):
141
+ def Download(single_page, year_filter=None, save_path=None):
138
142
  """
139
143
  Download PDF files from announcement list
140
144
 
141
145
  Args:
142
146
  single_page: List of announcement dictionaries
143
147
  year_filter: Optional year to filter (e.g., 2024). If None, downloads all years
148
+ save_path: Optional custom directory to save files. Defaults to saving_path
144
149
  """
145
150
  if single_page is None:
146
151
  return
147
152
 
148
- headers = {'Accept': 'application/json, text/javascript, */*; q=0.01',
149
- "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
150
- "Accept-Encoding": "gzip, deflate",
151
- "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-HK;q=0.6,zh-TW;q=0.5",
152
- 'Host': 'www.cninfo.com.cn',
153
- 'Origin': 'http://www.cninfo.com.cn'
154
- }
153
+ headers = {
154
+ "Accept": "application/json, text/javascript, */*; q=0.01",
155
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
156
+ "Accept-Encoding": "gzip, deflate",
157
+ "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-HK;q=0.6,zh-TW;q=0.5",
158
+ "Host": "www.cninfo.com.cn",
159
+ "Origin": "http://www.cninfo.com.cn",
160
+ }
155
161
 
156
162
  # Build allowed list dynamically based on year_filter
157
163
  allowed_list = []
158
164
  if year_filter:
159
165
  allowed_list = [
160
- f'{year_filter}年年度报告(更新后)',
161
- f'{year_filter}年年度报告',
166
+ f"{year_filter}年年度报告(更新后)",
167
+ f"{year_filter}年年度报告",
162
168
  ]
163
169
  else:
164
170
  # Default: all years from 2016-2025
165
171
  for year in range(2016, 2026):
166
- allowed_list.append(f'{year}年年度报告(更新后)')
167
- allowed_list.append(f'{year}年年度报告')
172
+ allowed_list.append(f"{year}年年度报告(更新后)")
173
+ allowed_list.append(f"{year}年年度报告")
168
174
 
169
175
  allowed_list_2 = [
170
- '招股书',
171
- '招股说明书',
172
- '招股意向书',
176
+ "招股书",
177
+ "招股说明书",
178
+ "招股意向书",
173
179
  ]
174
180
 
181
+ output_dir = (save_path or saving_path).rstrip("/") + "/"
182
+
175
183
  for i in single_page:
176
- title = i['announcementTitle']
184
+ title = i["announcementTitle"]
177
185
 
178
186
  # 跳过确认意见等非正式报告
179
- if '确认意见' in title or '取消' in title:
187
+ if "确认意见" in title or "取消" in title:
180
188
  continue
181
189
 
182
190
  # 检查标题是否包含允许的文本
@@ -194,20 +202,27 @@ def Download(single_page, year_filter=None):
194
202
 
195
203
  if allowed:
196
204
  download = download_path + i["adjunctUrl"]
197
- name = i["secCode"] + '_' + i['secName'] + '_' + i['announcementTitle'] + '.pdf'
198
- if '*' in name:
199
- name = name.replace('*', '')
200
- file_path = saving_path + name
205
+ name = (
206
+ i["secCode"]
207
+ + "_"
208
+ + i["secName"]
209
+ + "_"
210
+ + i["announcementTitle"]
211
+ + ".pdf"
212
+ )
213
+ if "*" in name:
214
+ name = name.replace("*", "")
215
+ file_path = output_dir + name
201
216
 
202
217
  # 显示下载进度
203
218
  print(f" ↓ {name}")
204
219
 
205
220
  # 确保目录存在
206
- os.makedirs(os.path.dirname(file_path), exist_ok=True)
221
+ os.makedirs(output_dir, exist_ok=True)
207
222
 
208
223
  time.sleep(random.random() * 2)
209
224
 
210
- headers['User-Agent'] = random.choice(User_Agent)
225
+ headers["User-Agent"] = random.choice(User_Agent)
211
226
  r = requests.get(download)
212
227
 
213
228
  f = open(file_path, "wb")
@@ -219,6 +234,70 @@ def Download(single_page, year_filter=None):
219
234
  return True
220
235
 
221
236
 
237
+ def query_prospectus(stock_code):
238
+ """
239
+ Query prospectus documents (招股书/招股说明书/招股意向书) for a specific stock code
240
+
241
+ Args:
242
+ stock_code: Stock code (e.g., '000888', '688777')
243
+
244
+ Returns:
245
+ List of announcement dictionaries
246
+ """
247
+ all_announcements = []
248
+
249
+ try:
250
+ announcements_sse = sseStock(1, stock_code)
251
+ all_announcements.extend(announcements_sse)
252
+ except Exception as e:
253
+ print(f"Error querying SSE prospectus: {e}")
254
+
255
+ try:
256
+ announcements_szse = szseStock(1, stock_code)
257
+ all_announcements.extend(announcements_szse)
258
+ except Exception as e:
259
+ print(f"Error querying SZSE prospectus: {e}")
260
+
261
+ prospectus_keywords = ["招股书", "招股说明书", "招股意向书"]
262
+ filtered = [
263
+ a for a in all_announcements
264
+ if any(kw in a.get("announcementTitle", "") for kw in prospectus_keywords)
265
+ ]
266
+
267
+ return filtered
268
+
269
+
270
+ def download_prospectus(stock_code, save_path=None):
271
+ """
272
+ Download prospectus documents for a specific stock code
273
+
274
+ Args:
275
+ stock_code: Stock code (e.g., '000888', '688777')
276
+ save_path: Optional custom directory to save files
277
+
278
+ Returns:
279
+ Dictionary with status and message
280
+ """
281
+ announcements = query_prospectus(stock_code)
282
+
283
+ if not announcements:
284
+ return {
285
+ "success": False,
286
+ "message": f"No prospectus found for stock {stock_code}",
287
+ "downloaded": 0,
288
+ }
289
+
290
+ output_dir = save_path or saving_path
291
+ result = Download(announcements, save_path=output_dir)
292
+
293
+ return {
294
+ "success": result,
295
+ "message": f"Downloaded prospectus for {stock_code}",
296
+ "downloaded": len(announcements),
297
+ "path": output_dir,
298
+ }
299
+
300
+
222
301
  def query_annual_reports(stock_code, year=None):
223
302
  """
224
303
  Query annual reports for a specific stock code
@@ -251,20 +330,21 @@ def query_annual_reports(stock_code, year=None):
251
330
  year_str = str(year)
252
331
  filtered = []
253
332
  for announcement in all_announcements:
254
- if year_str in announcement.get('announcementTitle', ''):
333
+ if year_str in announcement.get("announcementTitle", ""):
255
334
  filtered.append(announcement)
256
335
  all_announcements = filtered
257
336
 
258
337
  return all_announcements
259
338
 
260
339
 
261
- def download_annual_reports(stock_code, year=None):
340
+ def download_annual_reports(stock_code, year=None, save_path=None):
262
341
  """
263
342
  Download annual reports for a specific stock code
264
343
 
265
344
  Args:
266
345
  stock_code: Stock code (e.g., '000888', '688777')
267
346
  year: Optional year filter (e.g., 2024). If None, downloads all years
347
+ save_path: Optional custom directory to save files
268
348
 
269
349
  Returns:
270
350
  Dictionary with status and message
@@ -273,19 +353,21 @@ def download_annual_reports(stock_code, year=None):
273
353
 
274
354
  if not announcements:
275
355
  return {
276
- 'success': False,
277
- 'message': f'No annual reports found for stock {stock_code}' + (f' in year {year}' if year else ''),
278
- 'downloaded': 0
356
+ "success": False,
357
+ "message": f"No annual reports found for stock {stock_code}"
358
+ + (f" in year {year}" if year else ""),
359
+ "downloaded": 0,
279
360
  }
280
361
 
281
- # Download PDFs
282
- result = Download(announcements, year_filter=year)
362
+ output_dir = save_path or saving_path
363
+ result = Download(announcements, year_filter=year, save_path=output_dir)
283
364
 
284
365
  return {
285
- 'success': result,
286
- 'message': f'Downloaded reports for {stock_code}' + (f' year {year}' if year else ''),
287
- 'downloaded': len(announcements),
288
- 'path': saving_path
366
+ "success": result,
367
+ "message": f"Downloaded reports for {stock_code}"
368
+ + (f" year {year}" if year else ""),
369
+ "downloaded": len(announcements),
370
+ "path": output_dir,
289
371
  }
290
372
 
291
373
 
@@ -297,19 +379,19 @@ def Run(page_number, stock):
297
379
  annual_report_ = sseAnnual(page_number, stock)
298
380
  stock_report_ = sseStock(page_number, stock)
299
381
  except Exception:
300
- print(page_number, 'page error, retrying')
382
+ print(page_number, "page error, retrying")
301
383
  try:
302
384
  annual_report = szseAnnual(page_number, stock)
303
385
  except Exception:
304
- print(page_number, 'page error')
386
+ print(page_number, "page error")
305
387
  Download(annual_report)
306
388
  Download(stock_report)
307
389
  Download(annual_report_)
308
390
  Download(stock_report_)
309
391
 
310
392
 
311
- if __name__ == '__main__':
312
- with open('company_id.txt') as file:
393
+ if __name__ == "__main__":
394
+ with open("company_id.txt") as file:
313
395
  lines = file.readlines()
314
396
  for line in lines:
315
397
  stock = line