@youhaozhao/cninfo-mcp 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/python/__pycache__/spider.cpython-314.pyc +0 -0
- package/python/mcp_server.py +149 -52
- package/python/spider.py +202 -120
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@youhaozhao/cninfo-mcp",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.3",
|
|
4
4
|
"description": "MCP Server for querying and downloading Chinese listed companies' annual reports from CNINFO (巨潮资讯网)",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mcp",
|
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
"url": "https://github.com/youhaozhao/cninfo_mcp/issues"
|
|
21
21
|
},
|
|
22
22
|
"bin": {
|
|
23
|
-
"cninfo-mcp": "
|
|
23
|
+
"cninfo-mcp": "bin/cninfo-mcp.js"
|
|
24
24
|
},
|
|
25
25
|
"scripts": {
|
|
26
26
|
"postinstall": "node scripts/install-python-deps.js",
|
|
Binary file
|
package/python/mcp_server.py
CHANGED
|
@@ -12,20 +12,23 @@ from typing import Optional
|
|
|
12
12
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
13
13
|
|
|
14
14
|
from mcp.server import FastMCP
|
|
15
|
-
from spider import
|
|
15
|
+
from spider import (
|
|
16
|
+
query_annual_reports,
|
|
17
|
+
download_annual_reports,
|
|
18
|
+
query_prospectus,
|
|
19
|
+
download_prospectus,
|
|
20
|
+
saving_path,
|
|
21
|
+
)
|
|
16
22
|
|
|
17
23
|
# Create MCP server
|
|
18
24
|
mcp = FastMCP(
|
|
19
25
|
name="cninfo-server",
|
|
20
|
-
instructions="CNINFO annual reports server - Query and download Chinese listed companies' annual reports from cninfo.com.cn"
|
|
26
|
+
instructions="CNINFO annual reports server - Query and download Chinese listed companies' annual reports from cninfo.com.cn",
|
|
21
27
|
)
|
|
22
28
|
|
|
23
29
|
|
|
24
30
|
@mcp.tool()
|
|
25
|
-
def query_annual_reports_tool(
|
|
26
|
-
stock_code: str,
|
|
27
|
-
year: Optional[int] = None
|
|
28
|
-
) -> dict:
|
|
31
|
+
def query_annual_reports_tool(stock_code: str, year: Optional[int] = None) -> dict:
|
|
29
32
|
"""
|
|
30
33
|
Query annual reports for a Chinese listed company
|
|
31
34
|
|
|
@@ -46,57 +49,59 @@ def query_annual_reports_tool(
|
|
|
46
49
|
|
|
47
50
|
if not reports:
|
|
48
51
|
return {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
52
|
+
"success": False,
|
|
53
|
+
"stock_code": stock_code,
|
|
54
|
+
"year": year,
|
|
55
|
+
"count": 0,
|
|
56
|
+
"reports": [],
|
|
57
|
+
"message": f"No annual reports found for stock {stock_code}"
|
|
58
|
+
+ (f" in year {year}" if year else ""),
|
|
55
59
|
}
|
|
56
60
|
|
|
57
61
|
# Extract relevant information
|
|
58
62
|
report_details = []
|
|
59
63
|
for report in reports:
|
|
60
|
-
report_details.append(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
report_details.append(
|
|
65
|
+
{
|
|
66
|
+
"announcementTitle": report.get("announcementTitle", ""),
|
|
67
|
+
"announcementTime": report.get("announcementTime", ""),
|
|
68
|
+
"secCode": report.get("secCode", ""),
|
|
69
|
+
"secName": report.get("secName", ""),
|
|
70
|
+
"adjunctUrl": report.get("adjunctUrl", ""),
|
|
71
|
+
}
|
|
72
|
+
)
|
|
67
73
|
|
|
68
74
|
return {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
+
"success": True,
|
|
76
|
+
"stock_code": stock_code,
|
|
77
|
+
"year": year,
|
|
78
|
+
"count": len(reports),
|
|
79
|
+
"reports": report_details,
|
|
80
|
+
"message": f"Found {len(reports)} annual report(s)"
|
|
81
|
+
+ (f" for year {year}" if year else ""),
|
|
75
82
|
}
|
|
76
83
|
|
|
77
84
|
except Exception as e:
|
|
78
85
|
return {
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
+
"success": False,
|
|
87
|
+
"stock_code": stock_code,
|
|
88
|
+
"year": year,
|
|
89
|
+
"count": 0,
|
|
90
|
+
"reports": [],
|
|
91
|
+
"error": str(e),
|
|
92
|
+
"message": f"Error querying annual reports: {str(e)}",
|
|
86
93
|
}
|
|
87
94
|
|
|
88
95
|
|
|
89
96
|
@mcp.tool()
|
|
90
|
-
def download_annual_reports_tool(
|
|
91
|
-
stock_code: str,
|
|
92
|
-
year: Optional[int] = None
|
|
93
|
-
) -> dict:
|
|
97
|
+
def download_annual_reports_tool(stock_code: str, year: Optional[int] = None, save_path: Optional[str] = None) -> dict:
|
|
94
98
|
"""
|
|
95
99
|
Download annual reports for a Chinese listed company
|
|
96
100
|
|
|
97
101
|
Args:
|
|
98
102
|
stock_code: Stock code (e.g., '000888' for 峨眉山, '688777' for 中科德芯)
|
|
99
103
|
year: Optional year to filter (e.g., 2024). If not provided, downloads all available years
|
|
104
|
+
save_path: Optional directory to save files (e.g., '/Users/me/reports'). Defaults to pdf/ in package directory
|
|
100
105
|
|
|
101
106
|
Returns:
|
|
102
107
|
Dictionary containing:
|
|
@@ -108,26 +113,118 @@ def download_annual_reports_tool(
|
|
|
108
113
|
- message: Status message
|
|
109
114
|
"""
|
|
110
115
|
try:
|
|
111
|
-
|
|
112
|
-
os.makedirs(
|
|
116
|
+
output_dir = save_path or saving_path
|
|
117
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
118
|
+
|
|
119
|
+
result = download_annual_reports(stock_code, year, save_path=output_dir)
|
|
120
|
+
result["stock_code"] = stock_code
|
|
121
|
+
result["year"] = year
|
|
122
|
+
|
|
123
|
+
return result
|
|
124
|
+
|
|
125
|
+
except Exception as e:
|
|
126
|
+
return {
|
|
127
|
+
"success": False,
|
|
128
|
+
"stock_code": stock_code,
|
|
129
|
+
"year": year,
|
|
130
|
+
"downloaded": 0,
|
|
131
|
+
"path": save_path or saving_path,
|
|
132
|
+
"error": str(e),
|
|
133
|
+
"message": f"Error downloading annual reports: {str(e)}",
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@mcp.tool()
|
|
138
|
+
def query_prospectus_tool(stock_code: str) -> dict:
|
|
139
|
+
"""
|
|
140
|
+
Query prospectus documents for a Chinese listed company
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
stock_code: Stock code (e.g., '000888' for 峨眉山, '688777' for 中科德芯)
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Dictionary containing:
|
|
147
|
+
- success: Boolean indicating if the query was successful
|
|
148
|
+
- stock_code: The queried stock code
|
|
149
|
+
- count: Number of documents found
|
|
150
|
+
- reports: List of document details (announcementTitle, announcementTime, secCode, secName)
|
|
151
|
+
"""
|
|
152
|
+
try:
|
|
153
|
+
reports = query_prospectus(stock_code)
|
|
154
|
+
|
|
155
|
+
if not reports:
|
|
156
|
+
return {
|
|
157
|
+
"success": False,
|
|
158
|
+
"stock_code": stock_code,
|
|
159
|
+
"count": 0,
|
|
160
|
+
"reports": [],
|
|
161
|
+
"message": f"No prospectus found for stock {stock_code}",
|
|
162
|
+
}
|
|
113
163
|
|
|
114
|
-
|
|
164
|
+
report_details = [
|
|
165
|
+
{
|
|
166
|
+
"announcementTitle": r.get("announcementTitle", ""),
|
|
167
|
+
"announcementTime": r.get("announcementTime", ""),
|
|
168
|
+
"secCode": r.get("secCode", ""),
|
|
169
|
+
"secName": r.get("secName", ""),
|
|
170
|
+
"adjunctUrl": r.get("adjunctUrl", ""),
|
|
171
|
+
}
|
|
172
|
+
for r in reports
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
"success": True,
|
|
177
|
+
"stock_code": stock_code,
|
|
178
|
+
"count": len(reports),
|
|
179
|
+
"reports": report_details,
|
|
180
|
+
"message": f"Found {len(reports)} prospectus document(s)",
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
except Exception as e:
|
|
184
|
+
return {
|
|
185
|
+
"success": False,
|
|
186
|
+
"stock_code": stock_code,
|
|
187
|
+
"count": 0,
|
|
188
|
+
"reports": [],
|
|
189
|
+
"error": str(e),
|
|
190
|
+
"message": f"Error querying prospectus: {str(e)}",
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@mcp.tool()
|
|
195
|
+
def download_prospectus_tool(stock_code: str, save_path: Optional[str] = None) -> dict:
|
|
196
|
+
"""
|
|
197
|
+
Download prospectus documents for a Chinese listed company
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
stock_code: Stock code (e.g., '000888' for 峨眉山, '688777' for 中科德芯)
|
|
201
|
+
save_path: Optional directory to save files (e.g., '/Users/me/reports'). Defaults to pdf/ in package directory
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Dictionary containing:
|
|
205
|
+
- success: Boolean indicating if download was successful
|
|
206
|
+
- stock_code: The stock code
|
|
207
|
+
- downloaded: Number of files downloaded
|
|
208
|
+
- path: Directory where files were saved
|
|
209
|
+
- message: Status message
|
|
210
|
+
"""
|
|
211
|
+
try:
|
|
212
|
+
output_dir = save_path or saving_path
|
|
213
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
115
214
|
|
|
116
|
-
|
|
117
|
-
result[
|
|
118
|
-
result['year'] = year
|
|
215
|
+
result = download_prospectus(stock_code, save_path=output_dir)
|
|
216
|
+
result["stock_code"] = stock_code
|
|
119
217
|
|
|
120
218
|
return result
|
|
121
219
|
|
|
122
220
|
except Exception as e:
|
|
123
221
|
return {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
'message': f'Error downloading annual reports: {str(e)}'
|
|
222
|
+
"success": False,
|
|
223
|
+
"stock_code": stock_code,
|
|
224
|
+
"downloaded": 0,
|
|
225
|
+
"path": save_path or saving_path,
|
|
226
|
+
"error": str(e),
|
|
227
|
+
"message": f"Error downloading prospectus: {str(e)}",
|
|
131
228
|
}
|
|
132
229
|
|
|
133
230
|
|
|
@@ -151,9 +248,9 @@ def get_annual_reports_list(stock_code: str) -> str:
|
|
|
151
248
|
output = [f"Annual Reports for {stock_code}:", "=" * 60]
|
|
152
249
|
|
|
153
250
|
for report in reports:
|
|
154
|
-
title = report.get(
|
|
155
|
-
time = report.get(
|
|
156
|
-
name = report.get(
|
|
251
|
+
title = report.get("announcementTitle", "N/A")
|
|
252
|
+
time = report.get("announcementTime", "N/A")
|
|
253
|
+
name = report.get("secName", "N/A")
|
|
157
254
|
output.append(f"\n📄 {title}")
|
|
158
255
|
output.append(f" Company: {name}")
|
|
159
256
|
output.append(f" Date: {time}")
|
package/python/spider.py
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
《年度报告》 16 17 18
|
|
2
|
+
downloads:
|
|
3
|
+
公开招股书(招股说明书/招股意向书)
|
|
5
4
|
"""
|
|
5
|
+
|
|
6
6
|
import os
|
|
7
7
|
import random
|
|
8
8
|
import time
|
|
9
|
+
|
|
9
10
|
import requests
|
|
10
11
|
|
|
11
|
-
download_path =
|
|
12
|
+
download_path = "https://static.cninfo.com.cn/"
|
|
12
13
|
# 使用脚本所在目录的相对路径
|
|
13
|
-
_saving_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
|
|
14
|
-
saving_path = _saving_path +
|
|
14
|
+
_saving_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pdf")
|
|
15
|
+
saving_path = _saving_path + "/"
|
|
15
16
|
|
|
16
17
|
User_Agent = [
|
|
17
18
|
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
|
|
@@ -20,163 +21,170 @@ User_Agent = [
|
|
|
20
21
|
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
|
|
21
22
|
"Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
|
|
22
23
|
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
|
|
23
|
-
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0"
|
|
24
|
+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
|
|
24
25
|
]
|
|
25
26
|
|
|
26
27
|
|
|
27
|
-
headers = {
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
28
|
+
headers = {
|
|
29
|
+
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
30
|
+
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
|
31
|
+
"Accept-Encoding": "gzip, deflate",
|
|
32
|
+
"Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-HK;q=0.6,zh-TW;q=0.5",
|
|
33
|
+
"Host": "www.cninfo.com.cn",
|
|
34
|
+
"Origin": "http://www.cninfo.com.cn",
|
|
35
|
+
"Referer": "http://www.cninfo.com.cn/new/commonUrl?url=disclosure/list/notice",
|
|
36
|
+
"X-Requested-With": "XMLHttpRequest",
|
|
37
|
+
}
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
# 深市 年度报告
|
|
41
41
|
def szseAnnual(page, stock):
|
|
42
|
-
query_path =
|
|
43
|
-
headers[
|
|
44
|
-
query = {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
42
|
+
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
43
|
+
headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
|
|
44
|
+
query = {
|
|
45
|
+
"pageNum": page, # 页码
|
|
46
|
+
"pageSize": 30,
|
|
47
|
+
"tabName": "fulltext",
|
|
48
|
+
"column": "szse", # 深交所
|
|
49
|
+
"stock": "",
|
|
50
|
+
"searchkey": stock, # 使用searchkey查询股票代码或公司名
|
|
51
|
+
"secid": "",
|
|
52
|
+
"plate": "sz",
|
|
53
|
+
"category": "category_ndbg_szsh", # 年度报告
|
|
54
|
+
"trade": "",
|
|
55
|
+
"seDate": "2020-01-01~2026-02-15", # 时间区间
|
|
56
|
+
}
|
|
56
57
|
|
|
57
58
|
namelist = requests.post(query_path, headers=headers, data=query)
|
|
58
59
|
result = namelist.json()
|
|
59
|
-
if result and
|
|
60
|
-
return result[
|
|
60
|
+
if result and "announcements" in result and result["announcements"]:
|
|
61
|
+
return result["announcements"]
|
|
61
62
|
return []
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
# 沪市 年度报告
|
|
65
66
|
def sseAnnual(page, stock):
|
|
66
|
-
query_path =
|
|
67
|
-
headers[
|
|
68
|
-
query = {
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
67
|
+
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
68
|
+
headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
|
|
69
|
+
query = {
|
|
70
|
+
"pageNum": page, # 页码
|
|
71
|
+
"pageSize": 30,
|
|
72
|
+
"tabName": "fulltext",
|
|
73
|
+
"column": "sse",
|
|
74
|
+
"stock": "",
|
|
75
|
+
"searchkey": stock, # 使用searchkey查询股票代码或公司名
|
|
76
|
+
"secid": "",
|
|
77
|
+
"plate": "sh",
|
|
78
|
+
"category": "category_ndbg_szsh", # 年度报告
|
|
79
|
+
"trade": "",
|
|
80
|
+
"seDate": "2020-01-01~2026-02-15", # 时间区间
|
|
81
|
+
}
|
|
80
82
|
|
|
81
83
|
namelist = requests.post(query_path, headers=headers, data=query)
|
|
82
84
|
result = namelist.json()
|
|
83
|
-
if result and
|
|
84
|
-
return result[
|
|
85
|
+
if result and "announcements" in result and result["announcements"]:
|
|
86
|
+
return result["announcements"]
|
|
85
87
|
return []
|
|
86
88
|
|
|
87
89
|
|
|
88
90
|
# 深市 招股
|
|
89
91
|
def szseStock(page, stock):
|
|
90
|
-
query_path =
|
|
91
|
-
headers[
|
|
92
|
-
query = {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
92
|
+
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
93
|
+
headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
|
|
94
|
+
query = {
|
|
95
|
+
"pageNum": page, # 页码
|
|
96
|
+
"pageSize": 30,
|
|
97
|
+
"tabName": "fulltext",
|
|
98
|
+
"column": "szse",
|
|
99
|
+
"stock": "",
|
|
100
|
+
"searchkey": stock + " 招股", # 组合搜索:股票代码 + 招股
|
|
101
|
+
"secid": "",
|
|
102
|
+
"plate": "sz",
|
|
103
|
+
"category": "",
|
|
104
|
+
"trade": "",
|
|
105
|
+
"seDate": "2015-01-01~2026-02-15", # 时间区间
|
|
106
|
+
}
|
|
104
107
|
|
|
105
108
|
namelist = requests.post(query_path, headers=headers, data=query)
|
|
106
109
|
result = namelist.json()
|
|
107
|
-
if result and
|
|
108
|
-
return result[
|
|
110
|
+
if result and "announcements" in result and result["announcements"]:
|
|
111
|
+
return result["announcements"]
|
|
109
112
|
return []
|
|
110
113
|
|
|
111
114
|
|
|
112
115
|
# 沪市 招股
|
|
113
116
|
def sseStock(page, stock):
|
|
114
|
-
query_path =
|
|
115
|
-
headers[
|
|
116
|
-
query = {
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
117
|
+
query_path = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
|
|
118
|
+
headers["User-Agent"] = random.choice(User_Agent) # 定义User_Agent
|
|
119
|
+
query = {
|
|
120
|
+
"pageNum": page, # 页码
|
|
121
|
+
"pageSize": 30,
|
|
122
|
+
"tabName": "fulltext",
|
|
123
|
+
"column": "sse",
|
|
124
|
+
"stock": "",
|
|
125
|
+
"searchkey": stock + " 招股", # 组合搜索:股票代码 + 招股
|
|
126
|
+
"secid": "",
|
|
127
|
+
"plate": "sh",
|
|
128
|
+
"category": "",
|
|
129
|
+
"trade": "",
|
|
130
|
+
"seDate": "2015-01-01~2026-02-15", # 时间区间
|
|
131
|
+
}
|
|
128
132
|
|
|
129
133
|
namelist = requests.post(query_path, headers=headers, data=query)
|
|
130
134
|
result = namelist.json()
|
|
131
|
-
if result and
|
|
132
|
-
return result[
|
|
135
|
+
if result and "announcements" in result and result["announcements"]:
|
|
136
|
+
return result["announcements"]
|
|
133
137
|
return []
|
|
134
138
|
|
|
135
139
|
|
|
136
140
|
# download PDF
|
|
137
|
-
def Download(single_page, year_filter=None):
|
|
141
|
+
def Download(single_page, year_filter=None, save_path=None):
|
|
138
142
|
"""
|
|
139
143
|
Download PDF files from announcement list
|
|
140
144
|
|
|
141
145
|
Args:
|
|
142
146
|
single_page: List of announcement dictionaries
|
|
143
147
|
year_filter: Optional year to filter (e.g., 2024). If None, downloads all years
|
|
148
|
+
save_path: Optional custom directory to save files. Defaults to saving_path
|
|
144
149
|
"""
|
|
145
150
|
if single_page is None:
|
|
146
151
|
return
|
|
147
152
|
|
|
148
|
-
headers = {
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
153
|
+
headers = {
|
|
154
|
+
"Accept": "application/json, text/javascript, */*; q=0.01",
|
|
155
|
+
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
|
|
156
|
+
"Accept-Encoding": "gzip, deflate",
|
|
157
|
+
"Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7,zh-HK;q=0.6,zh-TW;q=0.5",
|
|
158
|
+
"Host": "www.cninfo.com.cn",
|
|
159
|
+
"Origin": "http://www.cninfo.com.cn",
|
|
160
|
+
}
|
|
155
161
|
|
|
156
162
|
# Build allowed list dynamically based on year_filter
|
|
157
163
|
allowed_list = []
|
|
158
164
|
if year_filter:
|
|
159
165
|
allowed_list = [
|
|
160
|
-
f
|
|
161
|
-
f
|
|
166
|
+
f"{year_filter}年年度报告(更新后)",
|
|
167
|
+
f"{year_filter}年年度报告",
|
|
162
168
|
]
|
|
163
169
|
else:
|
|
164
170
|
# Default: all years from 2016-2025
|
|
165
171
|
for year in range(2016, 2026):
|
|
166
|
-
allowed_list.append(f
|
|
167
|
-
allowed_list.append(f
|
|
172
|
+
allowed_list.append(f"{year}年年度报告(更新后)")
|
|
173
|
+
allowed_list.append(f"{year}年年度报告")
|
|
168
174
|
|
|
169
175
|
allowed_list_2 = [
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
176
|
+
"招股书",
|
|
177
|
+
"招股说明书",
|
|
178
|
+
"招股意向书",
|
|
173
179
|
]
|
|
174
180
|
|
|
181
|
+
output_dir = (save_path or saving_path).rstrip("/") + "/"
|
|
182
|
+
|
|
175
183
|
for i in single_page:
|
|
176
|
-
title = i[
|
|
184
|
+
title = i["announcementTitle"]
|
|
177
185
|
|
|
178
186
|
# 跳过确认意见等非正式报告
|
|
179
|
-
if
|
|
187
|
+
if "确认意见" in title or "取消" in title:
|
|
180
188
|
continue
|
|
181
189
|
|
|
182
190
|
# 检查标题是否包含允许的文本
|
|
@@ -194,20 +202,27 @@ def Download(single_page, year_filter=None):
|
|
|
194
202
|
|
|
195
203
|
if allowed:
|
|
196
204
|
download = download_path + i["adjunctUrl"]
|
|
197
|
-
name =
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
205
|
+
name = (
|
|
206
|
+
i["secCode"]
|
|
207
|
+
+ "_"
|
|
208
|
+
+ i["secName"]
|
|
209
|
+
+ "_"
|
|
210
|
+
+ i["announcementTitle"]
|
|
211
|
+
+ ".pdf"
|
|
212
|
+
)
|
|
213
|
+
if "*" in name:
|
|
214
|
+
name = name.replace("*", "")
|
|
215
|
+
file_path = output_dir + name
|
|
201
216
|
|
|
202
217
|
# 显示下载进度
|
|
203
218
|
print(f" ↓ {name}")
|
|
204
219
|
|
|
205
220
|
# 确保目录存在
|
|
206
|
-
os.makedirs(
|
|
221
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
207
222
|
|
|
208
223
|
time.sleep(random.random() * 2)
|
|
209
224
|
|
|
210
|
-
headers[
|
|
225
|
+
headers["User-Agent"] = random.choice(User_Agent)
|
|
211
226
|
r = requests.get(download)
|
|
212
227
|
|
|
213
228
|
f = open(file_path, "wb")
|
|
@@ -219,6 +234,70 @@ def Download(single_page, year_filter=None):
|
|
|
219
234
|
return True
|
|
220
235
|
|
|
221
236
|
|
|
237
|
+
def query_prospectus(stock_code):
|
|
238
|
+
"""
|
|
239
|
+
Query prospectus documents (招股书/招股说明书/招股意向书) for a specific stock code
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
stock_code: Stock code (e.g., '000888', '688777')
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
List of announcement dictionaries
|
|
246
|
+
"""
|
|
247
|
+
all_announcements = []
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
announcements_sse = sseStock(1, stock_code)
|
|
251
|
+
all_announcements.extend(announcements_sse)
|
|
252
|
+
except Exception as e:
|
|
253
|
+
print(f"Error querying SSE prospectus: {e}")
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
announcements_szse = szseStock(1, stock_code)
|
|
257
|
+
all_announcements.extend(announcements_szse)
|
|
258
|
+
except Exception as e:
|
|
259
|
+
print(f"Error querying SZSE prospectus: {e}")
|
|
260
|
+
|
|
261
|
+
prospectus_keywords = ["招股书", "招股说明书", "招股意向书"]
|
|
262
|
+
filtered = [
|
|
263
|
+
a for a in all_announcements
|
|
264
|
+
if any(kw in a.get("announcementTitle", "") for kw in prospectus_keywords)
|
|
265
|
+
]
|
|
266
|
+
|
|
267
|
+
return filtered
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def download_prospectus(stock_code, save_path=None):
|
|
271
|
+
"""
|
|
272
|
+
Download prospectus documents for a specific stock code
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
stock_code: Stock code (e.g., '000888', '688777')
|
|
276
|
+
save_path: Optional custom directory to save files
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Dictionary with status and message
|
|
280
|
+
"""
|
|
281
|
+
announcements = query_prospectus(stock_code)
|
|
282
|
+
|
|
283
|
+
if not announcements:
|
|
284
|
+
return {
|
|
285
|
+
"success": False,
|
|
286
|
+
"message": f"No prospectus found for stock {stock_code}",
|
|
287
|
+
"downloaded": 0,
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
output_dir = save_path or saving_path
|
|
291
|
+
result = Download(announcements, save_path=output_dir)
|
|
292
|
+
|
|
293
|
+
return {
|
|
294
|
+
"success": result,
|
|
295
|
+
"message": f"Downloaded prospectus for {stock_code}",
|
|
296
|
+
"downloaded": len(announcements),
|
|
297
|
+
"path": output_dir,
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
|
|
222
301
|
def query_annual_reports(stock_code, year=None):
|
|
223
302
|
"""
|
|
224
303
|
Query annual reports for a specific stock code
|
|
@@ -251,20 +330,21 @@ def query_annual_reports(stock_code, year=None):
|
|
|
251
330
|
year_str = str(year)
|
|
252
331
|
filtered = []
|
|
253
332
|
for announcement in all_announcements:
|
|
254
|
-
if year_str in announcement.get(
|
|
333
|
+
if year_str in announcement.get("announcementTitle", ""):
|
|
255
334
|
filtered.append(announcement)
|
|
256
335
|
all_announcements = filtered
|
|
257
336
|
|
|
258
337
|
return all_announcements
|
|
259
338
|
|
|
260
339
|
|
|
261
|
-
def download_annual_reports(stock_code, year=None):
|
|
340
|
+
def download_annual_reports(stock_code, year=None, save_path=None):
|
|
262
341
|
"""
|
|
263
342
|
Download annual reports for a specific stock code
|
|
264
343
|
|
|
265
344
|
Args:
|
|
266
345
|
stock_code: Stock code (e.g., '000888', '688777')
|
|
267
346
|
year: Optional year filter (e.g., 2024). If None, downloads all years
|
|
347
|
+
save_path: Optional custom directory to save files
|
|
268
348
|
|
|
269
349
|
Returns:
|
|
270
350
|
Dictionary with status and message
|
|
@@ -273,19 +353,21 @@ def download_annual_reports(stock_code, year=None):
|
|
|
273
353
|
|
|
274
354
|
if not announcements:
|
|
275
355
|
return {
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
356
|
+
"success": False,
|
|
357
|
+
"message": f"No annual reports found for stock {stock_code}"
|
|
358
|
+
+ (f" in year {year}" if year else ""),
|
|
359
|
+
"downloaded": 0,
|
|
279
360
|
}
|
|
280
361
|
|
|
281
|
-
|
|
282
|
-
result = Download(announcements, year_filter=year)
|
|
362
|
+
output_dir = save_path or saving_path
|
|
363
|
+
result = Download(announcements, year_filter=year, save_path=output_dir)
|
|
283
364
|
|
|
284
365
|
return {
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
366
|
+
"success": result,
|
|
367
|
+
"message": f"Downloaded reports for {stock_code}"
|
|
368
|
+
+ (f" year {year}" if year else ""),
|
|
369
|
+
"downloaded": len(announcements),
|
|
370
|
+
"path": output_dir,
|
|
289
371
|
}
|
|
290
372
|
|
|
291
373
|
|
|
@@ -297,19 +379,19 @@ def Run(page_number, stock):
|
|
|
297
379
|
annual_report_ = sseAnnual(page_number, stock)
|
|
298
380
|
stock_report_ = sseStock(page_number, stock)
|
|
299
381
|
except Exception:
|
|
300
|
-
print(page_number,
|
|
382
|
+
print(page_number, "page error, retrying")
|
|
301
383
|
try:
|
|
302
384
|
annual_report = szseAnnual(page_number, stock)
|
|
303
385
|
except Exception:
|
|
304
|
-
print(page_number,
|
|
386
|
+
print(page_number, "page error")
|
|
305
387
|
Download(annual_report)
|
|
306
388
|
Download(stock_report)
|
|
307
389
|
Download(annual_report_)
|
|
308
390
|
Download(stock_report_)
|
|
309
391
|
|
|
310
392
|
|
|
311
|
-
if __name__ ==
|
|
312
|
-
with open(
|
|
393
|
+
if __name__ == "__main__":
|
|
394
|
+
with open("company_id.txt") as file:
|
|
313
395
|
lines = file.readlines()
|
|
314
396
|
for line in lines:
|
|
315
397
|
stock = line
|