akshare-one 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- akshare_one/__init__.py +32 -0
- akshare_one/adapters/__init__.py +7 -0
- akshare_one/adapters/cache/cache.py +9 -0
- akshare_one/adapters/eastmoney.py +344 -0
- akshare_one/adapters/sina.py +463 -0
- akshare_one/adapters/xueqiu.py +106 -0
- akshare_one/financial.py +121 -0
- akshare_one/insider.py +35 -0
- akshare_one/news.py +28 -0
- akshare_one/stock.py +86 -0
- akshare_one-0.1.0.dist-info/METADATA +61 -0
- akshare_one-0.1.0.dist-info/RECORD +15 -0
- akshare_one-0.1.0.dist-info/WHEEL +5 -0
- akshare_one-0.1.0.dist-info/licenses/LICENSE +21 -0
- akshare_one-0.1.0.dist-info/top_level.txt +1 -0
akshare_one/__init__.py
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
"""Akshare One - Unified interface for Chinese market data
|
2
|
+
|
3
|
+
Provides standardized access to various financial data sources with:
|
4
|
+
- Consistent symbol formats
|
5
|
+
- Unified data schemas
|
6
|
+
- Cleaned and normalized outputs
|
7
|
+
|
8
|
+
Example:
|
9
|
+
>>> from akshare_one import get_hist_data, get_realtime_data
|
10
|
+
>>> df = get_hist_data("600000", interval="day")
|
11
|
+
>>> print(df.head())
|
12
|
+
>>> # 获取单只股票实时数据
|
13
|
+
>>> df = get_realtime_data(symbol="600000")
|
14
|
+
>>> # 获取所有股票实时数据
|
15
|
+
>>> df = get_realtime_data()
|
16
|
+
"""
|
17
|
+
|
18
|
+
from .stock import get_hist_data, get_realtime_data
|
19
|
+
from .news import get_news_data
|
20
|
+
from .insider import get_inner_trade_data
|
21
|
+
from .financial import get_balance_sheet, get_income_statement, get_cash_flow
|
22
|
+
|
23
|
+
|
24
|
+
__all__ = [
|
25
|
+
"get_hist_data",
|
26
|
+
"get_realtime_data",
|
27
|
+
"get_news_data",
|
28
|
+
"get_inner_trade_data",
|
29
|
+
"get_balance_sheet",
|
30
|
+
"get_income_statement",
|
31
|
+
"get_cash_flow",
|
32
|
+
]
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from cachetools import TTLCache
|
2
|
+
|
3
|
+
# 缓存配置
|
4
|
+
CACHE_CONFIG = {
|
5
|
+
'hist_data_cache': TTLCache(maxsize=1000, ttl=3600), # 历史数据缓存1小时
|
6
|
+
'realtime_cache': TTLCache(maxsize=500, ttl=60), # 实时数据缓存1分钟
|
7
|
+
'news_cache': TTLCache(maxsize=500, ttl=3600),
|
8
|
+
'financial_cache': TTLCache(maxsize=500, ttl=86400), # 财务数据缓存24小时
|
9
|
+
}
|
@@ -0,0 +1,344 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
import pandas as pd
|
3
|
+
import akshare as ak
|
4
|
+
from cachetools import cached
|
5
|
+
from .cache.cache import CACHE_CONFIG
|
6
|
+
|
7
|
+
|
8
|
+
class EastMoneyAdapter:
|
9
|
+
"""Adapter for EastMoney historical stock data API"""
|
10
|
+
|
11
|
+
@cached(
|
12
|
+
CACHE_CONFIG["hist_data_cache"],
|
13
|
+
key=lambda self,
|
14
|
+
symbol,
|
15
|
+
interval,
|
16
|
+
interval_multiplier,
|
17
|
+
start_date,
|
18
|
+
end_date,
|
19
|
+
adjust: ("eastmoney", symbol, interval, interval_multiplier, start_date, end_date, adjust),
|
20
|
+
)
|
21
|
+
def get_hist_data(
|
22
|
+
self,
|
23
|
+
symbol: str,
|
24
|
+
interval: str = "day",
|
25
|
+
interval_multiplier: int = 1,
|
26
|
+
start_date: str = "1970-01-01",
|
27
|
+
end_date: str = "2030-12-31",
|
28
|
+
adjust: str = "none",
|
29
|
+
) -> pd.DataFrame:
|
30
|
+
"""获取东方财富历史行情数据
|
31
|
+
|
32
|
+
Args:
|
33
|
+
symbol: Unified symbol format (e.g. '600000')
|
34
|
+
interval: Time granularity ('second','minute','hour','day','week','month','year')
|
35
|
+
interval_multiplier: Interval multiplier (e.g. 5 for 5 minutes)
|
36
|
+
start_date: Start date in YYYY-MM-DD format (will be converted to YYYYMMDD)
|
37
|
+
end_date: End date in YYYY-MM-DD format (will be converted to YYYYMMDD)
|
38
|
+
adjust: Adjustment type ('none','qfq','hfq')
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
Standardized DataFrame with OHLCV data
|
42
|
+
"""
|
43
|
+
# Map standard interval to akshare supported periods
|
44
|
+
interval = interval.lower()
|
45
|
+
if interval == "second":
|
46
|
+
raise ValueError("EastMoney does not support second-level data")
|
47
|
+
elif interval == "minute":
|
48
|
+
if interval_multiplier < 1:
|
49
|
+
raise ValueError("Minute interval multiplier must be >= 1")
|
50
|
+
|
51
|
+
start_date = (
|
52
|
+
f"{start_date} 09:30:00" if " " not in start_date else start_date
|
53
|
+
)
|
54
|
+
end_date = f"{end_date} 15:00:00" if " " not in end_date else end_date
|
55
|
+
|
56
|
+
raw_df = ak.stock_zh_a_hist_min_em(
|
57
|
+
symbol=symbol,
|
58
|
+
period="1",
|
59
|
+
start_date=start_date,
|
60
|
+
end_date=end_date,
|
61
|
+
adjust=adjust if adjust != "none" else "",
|
62
|
+
)
|
63
|
+
# Resample the data to the desired minute interval
|
64
|
+
raw_df["时间"] = pd.to_datetime(raw_df["时间"])
|
65
|
+
raw_df = raw_df.set_index("时间")
|
66
|
+
resampled = raw_df.resample(f"{interval_multiplier}min").agg(
|
67
|
+
{
|
68
|
+
"开盘": "first",
|
69
|
+
"最高": "max",
|
70
|
+
"最低": "min",
|
71
|
+
"收盘": "last",
|
72
|
+
"成交量": "sum",
|
73
|
+
"成交额": "sum",
|
74
|
+
}
|
75
|
+
)
|
76
|
+
raw_df = resampled.reset_index()
|
77
|
+
return self._clean_minute_data(raw_df, str(interval_multiplier))
|
78
|
+
elif interval == "hour":
|
79
|
+
if interval_multiplier < 1:
|
80
|
+
raise ValueError("Hour interval multiplier must be >= 1")
|
81
|
+
|
82
|
+
start_date = (
|
83
|
+
f"{start_date} 09:30:00" if " " not in start_date else start_date
|
84
|
+
)
|
85
|
+
end_date = f"{end_date} 15:00:00" if " " not in end_date else end_date
|
86
|
+
|
87
|
+
raw_df = ak.stock_zh_a_hist_min_em(
|
88
|
+
symbol=symbol,
|
89
|
+
period="60",
|
90
|
+
start_date=start_date,
|
91
|
+
end_date=end_date,
|
92
|
+
adjust=adjust if adjust != "none" else "",
|
93
|
+
)
|
94
|
+
|
95
|
+
# Resample the data to the desired hour interval
|
96
|
+
raw_df["时间"] = pd.to_datetime(raw_df["时间"])
|
97
|
+
raw_df = raw_df.set_index("时间")
|
98
|
+
resampled = raw_df.resample(f"{interval_multiplier}h").agg(
|
99
|
+
{
|
100
|
+
"开盘": "first",
|
101
|
+
"最高": "max",
|
102
|
+
"最低": "min",
|
103
|
+
"收盘": "last",
|
104
|
+
"成交量": "sum",
|
105
|
+
"成交额": "sum",
|
106
|
+
}
|
107
|
+
)
|
108
|
+
raw_df = resampled.reset_index()
|
109
|
+
|
110
|
+
return self._clean_minute_data(raw_df, f"{interval_multiplier}H")
|
111
|
+
elif interval == "day":
|
112
|
+
period = "daily"
|
113
|
+
elif interval == "week":
|
114
|
+
period = "weekly"
|
115
|
+
elif interval == "month":
|
116
|
+
period = "monthly"
|
117
|
+
elif interval == "year":
|
118
|
+
period = "monthly" # use monthly for yearly data
|
119
|
+
interval_multiplier = 12 * interval_multiplier
|
120
|
+
else:
|
121
|
+
raise ValueError(f"Unsupported interval: {interval}")
|
122
|
+
|
123
|
+
# Convert date format from YYYY-MM-DD to YYYYMMDD if needed
|
124
|
+
start_date = start_date.replace("-", "") if "-" in start_date else start_date
|
125
|
+
end_date = end_date.replace("-", "") if "-" in end_date else end_date
|
126
|
+
|
127
|
+
# Fetch raw data from akshare
|
128
|
+
raw_df = ak.stock_zh_a_hist(
|
129
|
+
symbol=symbol,
|
130
|
+
period=period, # daily/weekly/monthly
|
131
|
+
start_date=start_date,
|
132
|
+
end_date=end_date,
|
133
|
+
adjust=adjust if adjust != "none" else "",
|
134
|
+
)
|
135
|
+
|
136
|
+
if interval_multiplier > 1:
|
137
|
+
raw_df = self._resample_data(raw_df, interval, interval_multiplier)
|
138
|
+
|
139
|
+
# Standardize the data format
|
140
|
+
return self._clean_data(raw_df)
|
141
|
+
|
142
|
+
@cached(CACHE_CONFIG["realtime_cache"], key=lambda self, symbol=None: f"eastmoney_{symbol if symbol else 'all'}")
|
143
|
+
def get_realtime_data(self, symbol: Optional[str] = None) -> pd.DataFrame:
|
144
|
+
"""获取沪深京A股实时行情数据"""
|
145
|
+
raw_df = ak.stock_zh_a_spot_em()
|
146
|
+
df = self._clean_spot_data(raw_df)
|
147
|
+
if symbol:
|
148
|
+
df = df[df["symbol"] == symbol].reset_index(drop=True)
|
149
|
+
return df
|
150
|
+
|
151
|
+
def _resample_data(
|
152
|
+
self, df: pd.DataFrame, interval: str, multiplier: int
|
153
|
+
) -> pd.DataFrame:
|
154
|
+
"""Resample the data based on the given interval and multiplier"""
|
155
|
+
if interval == "day":
|
156
|
+
freq = f"{multiplier}D"
|
157
|
+
elif interval == "week":
|
158
|
+
freq = f"{multiplier}W-MON"
|
159
|
+
elif interval == "month":
|
160
|
+
freq = f"{multiplier}MS"
|
161
|
+
elif interval == "year":
|
162
|
+
freq = f"{multiplier}AS-JAN"
|
163
|
+
|
164
|
+
df["日期"] = pd.to_datetime(df["日期"])
|
165
|
+
df = df.set_index("日期")
|
166
|
+
resampled = df.resample(freq).agg(
|
167
|
+
{
|
168
|
+
"开盘": "first",
|
169
|
+
"最高": "max",
|
170
|
+
"最低": "min",
|
171
|
+
"收盘": "last",
|
172
|
+
"成交量": "sum",
|
173
|
+
}
|
174
|
+
)
|
175
|
+
return resampled.reset_index()
|
176
|
+
|
177
|
+
def _clean_minute_data(self, raw_df: pd.DataFrame, period: str) -> pd.DataFrame:
|
178
|
+
if period == "1":
|
179
|
+
column_mapping = {
|
180
|
+
"时间": "timestamp",
|
181
|
+
"开盘": "open",
|
182
|
+
"收盘": "close",
|
183
|
+
"最高": "high",
|
184
|
+
"最低": "low",
|
185
|
+
"成交量": "volume",
|
186
|
+
"成交额": "amount",
|
187
|
+
"均价": "vwap",
|
188
|
+
}
|
189
|
+
else:
|
190
|
+
column_mapping = {
|
191
|
+
"时间": "timestamp",
|
192
|
+
"开盘": "open",
|
193
|
+
"收盘": "close",
|
194
|
+
"最高": "high",
|
195
|
+
"最低": "low",
|
196
|
+
"涨跌幅": "pct_change",
|
197
|
+
"涨跌额": "change",
|
198
|
+
"成交量": "volume",
|
199
|
+
"成交额": "amount",
|
200
|
+
"振幅": "amplitude",
|
201
|
+
"换手率": "turnover",
|
202
|
+
}
|
203
|
+
|
204
|
+
df = raw_df.rename(columns=column_mapping)
|
205
|
+
|
206
|
+
if "timestamp" in df.columns:
|
207
|
+
df["timestamp"] = (
|
208
|
+
pd.to_datetime(df["timestamp"])
|
209
|
+
.dt.tz_localize("Asia/Shanghai")
|
210
|
+
.dt.tz_convert("UTC")
|
211
|
+
)
|
212
|
+
standard_columns = [
|
213
|
+
"timestamp",
|
214
|
+
"open",
|
215
|
+
"high",
|
216
|
+
"low",
|
217
|
+
"close",
|
218
|
+
"volume",
|
219
|
+
]
|
220
|
+
return df[[col for col in standard_columns if col in df.columns]]
|
221
|
+
|
222
|
+
def _clean_data(self, raw_df: pd.DataFrame, adjust: str = "none") -> pd.DataFrame:
|
223
|
+
"""清理和标准化历史数据格式
|
224
|
+
|
225
|
+
Args:
|
226
|
+
raw_df: Raw DataFrame from EastMoney API
|
227
|
+
adjust: Adjustment type ('none','qfq','hfq')
|
228
|
+
|
229
|
+
Returns:
|
230
|
+
Standardized DataFrame with consistent columns
|
231
|
+
"""
|
232
|
+
# Check if required columns exist in raw data
|
233
|
+
required_columns = {
|
234
|
+
"日期": "timestamp",
|
235
|
+
"开盘": "open",
|
236
|
+
"收盘": "close",
|
237
|
+
"最高": "high",
|
238
|
+
"最低": "low",
|
239
|
+
"成交量": "volume",
|
240
|
+
}
|
241
|
+
|
242
|
+
# Find available columns in raw data
|
243
|
+
available_columns = {}
|
244
|
+
for src_col, target_col in required_columns.items():
|
245
|
+
if src_col in raw_df.columns:
|
246
|
+
available_columns[src_col] = target_col
|
247
|
+
|
248
|
+
if not available_columns:
|
249
|
+
raise ValueError("Raw data does not contain any expected columns")
|
250
|
+
|
251
|
+
# Rename available columns
|
252
|
+
df = raw_df.rename(columns=available_columns)
|
253
|
+
|
254
|
+
# Process timestamp if available
|
255
|
+
if "timestamp" in df.columns:
|
256
|
+
df = df.assign(
|
257
|
+
timestamp=lambda x: pd.to_datetime(x["timestamp"])
|
258
|
+
.dt.tz_localize("Asia/Shanghai")
|
259
|
+
.dt.tz_convert("UTC")
|
260
|
+
)
|
261
|
+
|
262
|
+
# Process volume if available
|
263
|
+
if "volume" in df.columns:
|
264
|
+
df = df.assign(volume=lambda x: x["volume"].astype("int64"))
|
265
|
+
|
266
|
+
# Process adjustment flag
|
267
|
+
if adjust != "none":
|
268
|
+
df = df.assign(is_adjusted=lambda x: x["adjust"] != "none")
|
269
|
+
else:
|
270
|
+
df = df.assign(is_adjusted=False)
|
271
|
+
|
272
|
+
# Select available standardized columns
|
273
|
+
standard_columns = [
|
274
|
+
"timestamp",
|
275
|
+
"open",
|
276
|
+
"high",
|
277
|
+
"low",
|
278
|
+
"close",
|
279
|
+
"volume",
|
280
|
+
]
|
281
|
+
return df[[col for col in standard_columns if col in df.columns]]
|
282
|
+
|
283
|
+
@cached(CACHE_CONFIG["news_cache"], key=lambda self, symbol: f"eastmoney_{symbol}")
|
284
|
+
def get_news_data(self, symbol: str) -> pd.DataFrame:
|
285
|
+
"""获取东方财富个股新闻数据"""
|
286
|
+
raw_df = ak.stock_news_em(symbol=symbol)
|
287
|
+
|
288
|
+
column_mapping = {
|
289
|
+
"关键词": "keyword",
|
290
|
+
"新闻标题": "title",
|
291
|
+
"新闻内容": "content",
|
292
|
+
"发布时间": "publish_time",
|
293
|
+
"文章来源": "source",
|
294
|
+
"新闻链接": "url",
|
295
|
+
}
|
296
|
+
|
297
|
+
df = raw_df.rename(columns=column_mapping)
|
298
|
+
|
299
|
+
if "publish_time" in df.columns:
|
300
|
+
df = df.assign(
|
301
|
+
publish_time=lambda x: pd.to_datetime(x["publish_time"])
|
302
|
+
.dt.tz_localize("Asia/Shanghai")
|
303
|
+
.dt.tz_convert("UTC")
|
304
|
+
)
|
305
|
+
|
306
|
+
return df
|
307
|
+
|
308
|
+
def _clean_spot_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
309
|
+
"""清理和标准化实时行情数据"""
|
310
|
+
|
311
|
+
column_mapping = {
|
312
|
+
"代码": "symbol",
|
313
|
+
"最新价": "price",
|
314
|
+
"涨跌额": "change",
|
315
|
+
"涨跌幅": "pct_change",
|
316
|
+
"成交量": "volume",
|
317
|
+
"成交额": "amount",
|
318
|
+
"今开": "open",
|
319
|
+
"最高": "high",
|
320
|
+
"最低": "low",
|
321
|
+
"昨收": "prev_close",
|
322
|
+
}
|
323
|
+
|
324
|
+
df = raw_df.rename(columns=column_mapping)
|
325
|
+
|
326
|
+
# Change time to UTC
|
327
|
+
df = df.assign(
|
328
|
+
timestamp=lambda x: pd.Timestamp.now(tz="Asia/Shanghai").tz_convert("UTC")
|
329
|
+
)
|
330
|
+
|
331
|
+
required_columns = [
|
332
|
+
"symbol",
|
333
|
+
"price",
|
334
|
+
"change",
|
335
|
+
"pct_change",
|
336
|
+
"timestamp",
|
337
|
+
"volume",
|
338
|
+
"amount",
|
339
|
+
"open",
|
340
|
+
"high",
|
341
|
+
"low",
|
342
|
+
"prev_close",
|
343
|
+
]
|
344
|
+
return df[required_columns]
|
@@ -0,0 +1,463 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import akshare as ak
|
3
|
+
from cachetools import cached
|
4
|
+
from .cache.cache import CACHE_CONFIG
|
5
|
+
|
6
|
+
|
7
|
+
class SinaAdapter:
|
8
|
+
"""Adapter for Sina financial data API
|
9
|
+
|
10
|
+
Includes:
|
11
|
+
- Financial reports (balance sheet, income statement, cash flow)
|
12
|
+
- Historical market data
|
13
|
+
"""
|
14
|
+
|
15
|
+
@cached(CACHE_CONFIG["financial_cache"], key=lambda self, symbol: f"sina_{symbol}")
|
16
|
+
def get_balance_sheet(self, symbol: str) -> pd.DataFrame:
|
17
|
+
"""获取资产负债表数据
|
18
|
+
|
19
|
+
Args:
|
20
|
+
symbol: 股票代码 (如 "600600")
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
Standardized DataFrame with balance sheet data
|
24
|
+
"""
|
25
|
+
stock = f"sh{symbol}" if not symbol.startswith(("sh", "sz", "bj")) else symbol
|
26
|
+
raw_df = ak.stock_financial_report_sina(stock=stock, symbol="资产负债表")
|
27
|
+
return self._clean_balance_data(raw_df)
|
28
|
+
|
29
|
+
@cached(CACHE_CONFIG["financial_cache"], key=lambda self, symbol: f"sina_{symbol}")
|
30
|
+
def get_income_statement(self, symbol: str) -> pd.DataFrame:
|
31
|
+
"""获取利润表数据
|
32
|
+
|
33
|
+
Args:
|
34
|
+
symbol: 股票代码 (如 "600600")
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
Standardized DataFrame with income statement data
|
38
|
+
"""
|
39
|
+
stock = f"sh{symbol}" if not symbol.startswith(("sh", "sz", "bj")) else symbol
|
40
|
+
raw_df = ak.stock_financial_report_sina(stock=stock, symbol="利润表")
|
41
|
+
return self._clean_income_data(raw_df)
|
42
|
+
|
43
|
+
@cached(CACHE_CONFIG["financial_cache"], key=lambda self, symbol: f"sina_{symbol}")
|
44
|
+
def get_cash_flow(self, symbol: str) -> pd.DataFrame:
|
45
|
+
"""获取现金流量表数据
|
46
|
+
|
47
|
+
Args:
|
48
|
+
symbol: 股票代码 (如 "600600")
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
Standardized DataFrame with cash flow data
|
52
|
+
"""
|
53
|
+
stock = f"sh{symbol}" if not symbol.startswith(("sh", "sz", "bj")) else symbol
|
54
|
+
raw_df = ak.stock_financial_report_sina(stock=stock, symbol="现金流量表")
|
55
|
+
return self._clean_cash_data(raw_df)
|
56
|
+
|
57
|
+
def _clean_cash_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
58
|
+
"""清理和标准化现金流量表数据
|
59
|
+
|
60
|
+
Args:
|
61
|
+
raw_df: Raw DataFrame from Sina API
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
Standardized DataFrame with consistent columns
|
65
|
+
"""
|
66
|
+
# Convert timestamp columns if exists
|
67
|
+
if "报告日" in raw_df.columns:
|
68
|
+
raw_df = raw_df.rename(columns={"报告日": "report_date"})
|
69
|
+
raw_df["report_date"] = pd.to_datetime(
|
70
|
+
raw_df["report_date"], format="%Y%m%d"
|
71
|
+
)
|
72
|
+
|
73
|
+
if "更新日期" in raw_df.columns:
|
74
|
+
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
75
|
+
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
76
|
+
|
77
|
+
# Standardize column names
|
78
|
+
column_mapping = {
|
79
|
+
"类型": "report_type",
|
80
|
+
"币种": "currency",
|
81
|
+
"净利润": "net_income",
|
82
|
+
"固定资产折旧、油气资产折耗、生产性生物资产折旧": "depreciation_and_amortization",
|
83
|
+
"无形资产摊销": "share_based_compensation",
|
84
|
+
"经营活动产生的现金流量净额": "net_cash_flow_from_operations",
|
85
|
+
"购建固定资产、无形资产和其他长期资产支付的现金": "capital_expenditure",
|
86
|
+
"取得子公司及其他营业单位支付的现金净额": "business_acquisitions_and_disposals",
|
87
|
+
"投资支付的现金": "investment_acquisitions_and_disposals",
|
88
|
+
"投资活动产生的现金流量净额": "net_cash_flow_from_investing",
|
89
|
+
"取得借款收到的现金": "issuance_or_repayment_of_debt_securities",
|
90
|
+
"吸收投资收到的现金": "issuance_or_purchase_of_equity_shares",
|
91
|
+
"分配股利、利润或偿付利息支付的现金": "dividends_and_other_cash_distributions",
|
92
|
+
"筹资活动产生的现金流量净额": "net_cash_flow_from_financing",
|
93
|
+
"现金及现金等价物净增加额": "change_in_cash_and_equivalents",
|
94
|
+
"汇率变动对现金及现金等价物的影响": "effect_of_exchange_rate_changes",
|
95
|
+
"期末现金及现金等价物余额": "ending_cash_balance",
|
96
|
+
"自由现金流": "free_cash_flow",
|
97
|
+
}
|
98
|
+
raw_df = raw_df.rename(columns=column_mapping)
|
99
|
+
|
100
|
+
# Select only required columns
|
101
|
+
required_columns = [
|
102
|
+
"report_date",
|
103
|
+
"report_period",
|
104
|
+
"period",
|
105
|
+
"currency",
|
106
|
+
"net_income",
|
107
|
+
"depreciation_and_amortization",
|
108
|
+
"share_based_compensation",
|
109
|
+
"net_cash_flow_from_operations",
|
110
|
+
"capital_expenditure",
|
111
|
+
"business_acquisitions_and_disposals",
|
112
|
+
"investment_acquisitions_and_disposals",
|
113
|
+
"net_cash_flow_from_investing",
|
114
|
+
"issuance_or_repayment_of_debt_securities",
|
115
|
+
"issuance_or_purchase_of_equity_shares",
|
116
|
+
"dividends_and_other_cash_distributions",
|
117
|
+
"net_cash_flow_from_financing",
|
118
|
+
"change_in_cash_and_equivalents",
|
119
|
+
"effect_of_exchange_rate_changes",
|
120
|
+
"ending_cash_balance",
|
121
|
+
"free_cash_flow",
|
122
|
+
]
|
123
|
+
|
124
|
+
# Filter columns
|
125
|
+
available_columns = [col for col in required_columns if col in raw_df.columns]
|
126
|
+
return raw_df[available_columns]
|
127
|
+
|
128
|
+
def _clean_balance_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
129
|
+
"""清理和标准化资产负债表数据
|
130
|
+
|
131
|
+
Args:
|
132
|
+
raw_df: Raw DataFrame from Sina API
|
133
|
+
|
134
|
+
Returns:
|
135
|
+
Standardized DataFrame with consistent columns
|
136
|
+
"""
|
137
|
+
# Convert timestamp columns if exists
|
138
|
+
if "报告日" in raw_df.columns:
|
139
|
+
raw_df = raw_df.rename(columns={"报告日": "report_date"})
|
140
|
+
raw_df["report_date"] = pd.to_datetime(
|
141
|
+
raw_df["report_date"], format="%Y%m%d"
|
142
|
+
)
|
143
|
+
|
144
|
+
if "更新日期" in raw_df.columns:
|
145
|
+
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
146
|
+
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
147
|
+
|
148
|
+
# Standardize column names
|
149
|
+
column_mapping = {
|
150
|
+
"类型": "report_type",
|
151
|
+
"币种": "currency",
|
152
|
+
"资产总计": "total_assets",
|
153
|
+
"流动资产合计": "current_assets",
|
154
|
+
"货币资金": "cash_and_equivalents",
|
155
|
+
"存货": "inventory",
|
156
|
+
"交易性金融资产": "current_investments",
|
157
|
+
"应收票据及应收账款": "trade_and_non_trade_receivables",
|
158
|
+
"非流动资产合计": "non_current_assets",
|
159
|
+
"固定资产": "property_plant_and_equipment",
|
160
|
+
"商誉": "goodwill_and_intangible_assets",
|
161
|
+
"长期股权投资": "investments",
|
162
|
+
"其他非流动金融资产": "non_current_investments",
|
163
|
+
"实收资本(或股本)": "outstanding_shares",
|
164
|
+
"递延所得税资产": "tax_assets",
|
165
|
+
"负债合计": "total_liabilities",
|
166
|
+
"流动负债合计": "current_liabilities",
|
167
|
+
"短期借款": "current_debt",
|
168
|
+
"应付票据及应付账款": "trade_and_non_trade_payables",
|
169
|
+
"合同负债": "deferred_revenue",
|
170
|
+
"吸收存款及同业存放": "deposit_liabilities",
|
171
|
+
"非流动负债合计": "non_current_liabilities",
|
172
|
+
"长期借款": "non_current_debt",
|
173
|
+
"递延所得税负债": "tax_liabilities",
|
174
|
+
"所有者权益(或股东权益)合计": "shareholders_equity",
|
175
|
+
"未分配利润": "retained_earnings",
|
176
|
+
"其他综合收益": "accumulated_other_comprehensive_income",
|
177
|
+
}
|
178
|
+
raw_df = raw_df.rename(columns=column_mapping)
|
179
|
+
|
180
|
+
# Select only required columns
|
181
|
+
required_columns = [
|
182
|
+
"report_date",
|
183
|
+
"report_period",
|
184
|
+
"period",
|
185
|
+
"currency",
|
186
|
+
"total_assets",
|
187
|
+
"current_assets",
|
188
|
+
"cash_and_equivalents",
|
189
|
+
"inventory",
|
190
|
+
"current_investments",
|
191
|
+
"trade_and_non_trade_receivables",
|
192
|
+
"non_current_assets",
|
193
|
+
"property_plant_and_equipment",
|
194
|
+
"goodwill_and_intangible_assets",
|
195
|
+
"investments",
|
196
|
+
"non_current_investments",
|
197
|
+
"outstanding_shares",
|
198
|
+
"tax_assets",
|
199
|
+
"total_liabilities",
|
200
|
+
"current_liabilities",
|
201
|
+
"current_debt",
|
202
|
+
"trade_and_non_trade_payables",
|
203
|
+
"deferred_revenue",
|
204
|
+
"deposit_liabilities",
|
205
|
+
"non_current_liabilities",
|
206
|
+
"non_current_debt",
|
207
|
+
"tax_liabilities",
|
208
|
+
"shareholders_equity",
|
209
|
+
"retained_earnings",
|
210
|
+
"accumulated_other_comprehensive_income",
|
211
|
+
]
|
212
|
+
|
213
|
+
# Calculate total_debt
|
214
|
+
if "current_debt" in raw_df.columns and "non_current_debt" in raw_df.columns:
|
215
|
+
raw_df["total_debt"] = raw_df["current_debt"] + raw_df["non_current_debt"]
|
216
|
+
required_columns.append("total_debt")
|
217
|
+
|
218
|
+
# Filter columns
|
219
|
+
available_columns = [col for col in required_columns if col in raw_df.columns]
|
220
|
+
return raw_df[available_columns]
|
221
|
+
|
222
|
+
@cached(
|
223
|
+
CACHE_CONFIG["hist_data_cache"],
|
224
|
+
key=lambda self,
|
225
|
+
symbol,
|
226
|
+
interval,
|
227
|
+
interval_multiplier,
|
228
|
+
start_date,
|
229
|
+
end_date,
|
230
|
+
adjust: ("sina", symbol, interval, interval_multiplier, start_date, end_date, adjust),
|
231
|
+
)
|
232
|
+
def get_hist_data(
|
233
|
+
self,
|
234
|
+
symbol: str,
|
235
|
+
interval: str = "day",
|
236
|
+
interval_multiplier: int = 1,
|
237
|
+
start_date: str = "1970-01-01",
|
238
|
+
end_date: str = "2030-12-31",
|
239
|
+
adjust: str = "none",
|
240
|
+
) -> pd.DataFrame:
|
241
|
+
"""获取新浪历史行情数据
|
242
|
+
|
243
|
+
Args:
|
244
|
+
symbol: 股票代码 (如 "600000")
|
245
|
+
interval: 时间粒度 (支持: "minute", "hour", "day", "week", "month", "year")
|
246
|
+
interval_multiplier: 时间间隔倍数 (如: 5分钟数据设为5)
|
247
|
+
start_date: 开始日期 (YYYY-MM-DD)
|
248
|
+
end_date: 结束日期 (YYYY-MM-DD)
|
249
|
+
adjust: 复权类型 ('none','qfq','hfq','qfq-factor','hfq-factor')
|
250
|
+
|
251
|
+
Returns:
|
252
|
+
Standardized DataFrame with OHLCV data
|
253
|
+
"""
|
254
|
+
interval = interval.lower()
|
255
|
+
stock = f"sh{symbol}" if not symbol.startswith(("sh", "sz", "bj")) else symbol
|
256
|
+
|
257
|
+
if interval == "minute":
|
258
|
+
raw_df = ak.stock_zh_a_minute(
|
259
|
+
symbol=stock,
|
260
|
+
period="1",
|
261
|
+
adjust=adjust if adjust != "none" else "",
|
262
|
+
)
|
263
|
+
raw_df = raw_df.rename(columns={"day": "date"})
|
264
|
+
raw_df["date"] = pd.to_datetime(raw_df["date"])
|
265
|
+
raw_df = raw_df.set_index("date")
|
266
|
+
raw_df = (
|
267
|
+
raw_df.resample(f"{interval_multiplier}min")
|
268
|
+
.agg(
|
269
|
+
{
|
270
|
+
"open": "first",
|
271
|
+
"high": "max",
|
272
|
+
"low": "min",
|
273
|
+
"close": "last",
|
274
|
+
"volume": "sum",
|
275
|
+
}
|
276
|
+
)
|
277
|
+
.reset_index()
|
278
|
+
)
|
279
|
+
|
280
|
+
elif interval == "hour":
|
281
|
+
if interval_multiplier < 1:
|
282
|
+
raise ValueError("Hour interval multiplier must be >= 1")
|
283
|
+
|
284
|
+
raw_df = ak.stock_zh_a_minute(
|
285
|
+
symbol=stock,
|
286
|
+
period="60",
|
287
|
+
adjust=adjust if adjust != "none" else "",
|
288
|
+
)
|
289
|
+
raw_df = raw_df.rename(columns={"day": "date"})
|
290
|
+
raw_df["date"] = pd.to_datetime(raw_df["date"])
|
291
|
+
raw_df = raw_df.set_index("date")
|
292
|
+
raw_df = (
|
293
|
+
raw_df.resample(f"{interval_multiplier}h")
|
294
|
+
.agg(
|
295
|
+
{
|
296
|
+
"open": "first",
|
297
|
+
"high": "max",
|
298
|
+
"low": "min",
|
299
|
+
"close": "last",
|
300
|
+
"volume": "sum",
|
301
|
+
}
|
302
|
+
)
|
303
|
+
.reset_index()
|
304
|
+
)
|
305
|
+
|
306
|
+
elif interval in ["day", "week", "month", "year"]:
|
307
|
+
# Convert date format from YYYY-MM-DD to YYYYMMDD
|
308
|
+
start_date = (
|
309
|
+
start_date.replace("-", "") if "-" in start_date else start_date
|
310
|
+
)
|
311
|
+
end_date = end_date.replace("-", "") if "-" in end_date else end_date
|
312
|
+
|
313
|
+
raw_df = ak.stock_zh_a_daily(
|
314
|
+
symbol=stock,
|
315
|
+
start_date=start_date,
|
316
|
+
end_date=end_date,
|
317
|
+
adjust=adjust if adjust != "none" else "",
|
318
|
+
)
|
319
|
+
|
320
|
+
if interval_multiplier > 1:
|
321
|
+
raw_df = self._resample_data(raw_df, interval, interval_multiplier)
|
322
|
+
else:
|
323
|
+
raise ValueError(f"Unsupported interval: {interval}")
|
324
|
+
|
325
|
+
return self._clean_hist_data(raw_df, adjust)
|
326
|
+
|
327
|
+
def _resample_data(
|
328
|
+
self, df: pd.DataFrame, interval: str, multiplier: int
|
329
|
+
) -> pd.DataFrame:
|
330
|
+
if interval == "day":
|
331
|
+
freq = f"{multiplier}D"
|
332
|
+
elif interval == "week":
|
333
|
+
freq = f"{multiplier}W-MON"
|
334
|
+
elif interval == "month":
|
335
|
+
freq = f"{multiplier}MS"
|
336
|
+
elif interval == "year":
|
337
|
+
freq = f"{multiplier}AS-JAN"
|
338
|
+
|
339
|
+
df["date"] = pd.to_datetime(df["date"])
|
340
|
+
df = df.set_index("date")
|
341
|
+
resampled = df.resample(freq).agg(
|
342
|
+
{
|
343
|
+
"open": "first",
|
344
|
+
"high": "max",
|
345
|
+
"low": "min",
|
346
|
+
"close": "last",
|
347
|
+
"volume": "sum",
|
348
|
+
}
|
349
|
+
)
|
350
|
+
return resampled.reset_index()
|
351
|
+
|
352
|
+
def _clean_hist_data(self, raw_df: pd.DataFrame, adjust: str) -> pd.DataFrame:
|
353
|
+
"""清理和标准化历史行情数据
|
354
|
+
|
355
|
+
Args:
|
356
|
+
raw_df: 原始数据DataFrame
|
357
|
+
adjust: 复权类型
|
358
|
+
|
359
|
+
Returns:
|
360
|
+
标准化后的DataFrame
|
361
|
+
"""
|
362
|
+
column_mapping = {
|
363
|
+
"date": "timestamp",
|
364
|
+
"open": "open",
|
365
|
+
"high": "high",
|
366
|
+
"low": "low",
|
367
|
+
"close": "close",
|
368
|
+
"volume": "volume",
|
369
|
+
}
|
370
|
+
|
371
|
+
df = raw_df.rename(columns=column_mapping)
|
372
|
+
|
373
|
+
# Process timestamp
|
374
|
+
if "timestamp" in df.columns:
|
375
|
+
df["timestamp"] = (
|
376
|
+
pd.to_datetime(df["timestamp"])
|
377
|
+
.dt.tz_localize("Asia/Shanghai")
|
378
|
+
.dt.tz_convert("UTC")
|
379
|
+
)
|
380
|
+
|
381
|
+
# Process volume
|
382
|
+
if "volume" in df.columns:
|
383
|
+
df["volume"] = df["volume"].astype("int64")
|
384
|
+
|
385
|
+
# Select and order columns
|
386
|
+
standard_columns = [
|
387
|
+
"timestamp",
|
388
|
+
"open",
|
389
|
+
"high",
|
390
|
+
"low",
|
391
|
+
"close",
|
392
|
+
"volume",
|
393
|
+
]
|
394
|
+
|
395
|
+
return df[[col for col in standard_columns if col in df.columns]]
|
396
|
+
|
397
|
+
def _clean_income_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
398
|
+
"""清理和标准化利润表数据
|
399
|
+
|
400
|
+
Args:
|
401
|
+
raw_df: Raw DataFrame from Sina API
|
402
|
+
|
403
|
+
Returns:
|
404
|
+
Standardized DataFrame with consistent columns
|
405
|
+
"""
|
406
|
+
# Convert timestamp columns if exists
|
407
|
+
if "报告日" in raw_df.columns:
|
408
|
+
raw_df = raw_df.rename(columns={"报告日": "report_date"})
|
409
|
+
raw_df["report_date"] = pd.to_datetime(
|
410
|
+
raw_df["report_date"], format="%Y%m%d"
|
411
|
+
)
|
412
|
+
|
413
|
+
if "更新日期" in raw_df.columns:
|
414
|
+
raw_df = raw_df.rename(columns={"更新日期": "update_time"})
|
415
|
+
raw_df["update_time"] = pd.to_datetime(raw_df["update_time"])
|
416
|
+
|
417
|
+
# Standardize column names
|
418
|
+
column_mapping = {
|
419
|
+
"类型": "report_type",
|
420
|
+
"币种": "currency",
|
421
|
+
"营业总收入": "revenue",
|
422
|
+
"营业成本": "cost_of_revenue",
|
423
|
+
"营业利润": "gross_profit",
|
424
|
+
"销售费用": "selling_general_and_administrative_expenses",
|
425
|
+
"管理费用": "operating_expense",
|
426
|
+
"研发费用": "research_and_development",
|
427
|
+
"利息支出": "interest_expense",
|
428
|
+
"利润总额": "ebit",
|
429
|
+
"所得税费用": "income_tax_expense",
|
430
|
+
"净利润": "net_income",
|
431
|
+
"归属于母公司所有者的净利润": "net_income_common_stock",
|
432
|
+
"少数股东损益": "net_income_non_controlling_interests",
|
433
|
+
"基本每股收益": "earnings_per_share",
|
434
|
+
"稀释每股收益": "earnings_per_share_diluted",
|
435
|
+
}
|
436
|
+
raw_df = raw_df.rename(columns=column_mapping)
|
437
|
+
|
438
|
+
# Select only required columns
|
439
|
+
required_columns = [
|
440
|
+
"report_date",
|
441
|
+
"report_period",
|
442
|
+
"period",
|
443
|
+
"currency",
|
444
|
+
"revenue",
|
445
|
+
"cost_of_revenue",
|
446
|
+
"gross_profit",
|
447
|
+
"operating_expense",
|
448
|
+
"selling_general_and_administrative_expenses",
|
449
|
+
"research_and_development",
|
450
|
+
"operating_income",
|
451
|
+
"interest_expense",
|
452
|
+
"ebit",
|
453
|
+
"income_tax_expense",
|
454
|
+
"net_income",
|
455
|
+
"net_income_common_stock",
|
456
|
+
"net_income_non_controlling_interests",
|
457
|
+
"earnings_per_share",
|
458
|
+
"earnings_per_share_diluted",
|
459
|
+
]
|
460
|
+
|
461
|
+
# Filter columns
|
462
|
+
available_columns = [col for col in required_columns if col in raw_df.columns]
|
463
|
+
return raw_df[available_columns]
|
@@ -0,0 +1,106 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
import pandas as pd
|
3
|
+
import akshare as ak
|
4
|
+
from cachetools import cached
|
5
|
+
from .cache.cache import CACHE_CONFIG
|
6
|
+
|
7
|
+
|
8
|
+
class XueQiuAdapter:
|
9
|
+
"""Adapter for XueQiu insider trading data API"""
|
10
|
+
|
11
|
+
@cached(CACHE_CONFIG["hist_data_cache"], key=lambda self, symbol=None: f"inner_trade_{symbol if symbol else 'all'}")
|
12
|
+
def get_inner_trade_data(self, symbol: Optional[str] = None) -> pd.DataFrame:
|
13
|
+
"""获取雪球内部交易数据
|
14
|
+
|
15
|
+
Args:
|
16
|
+
symbol: 可选股票代码,如"600000",不传则返回所有数据
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
Standardized DataFrame with insider trading data:
|
20
|
+
- symbol: 股票代码
|
21
|
+
- name: 股票名称
|
22
|
+
- change_date: 变动日期
|
23
|
+
- insider: 变动人
|
24
|
+
- shares_changed: 变动股数
|
25
|
+
- avg_price: 成交均价
|
26
|
+
- shares_after: 变动后持股数
|
27
|
+
- relationship: 与董监高关系
|
28
|
+
- position: 董监高职务
|
29
|
+
"""
|
30
|
+
raw_df = ak.stock_inner_trade_xq()
|
31
|
+
if symbol:
|
32
|
+
raw_df = raw_df[raw_df["股票代码"] == symbol]
|
33
|
+
return self._clean_data(raw_df)
|
34
|
+
|
35
|
+
def _clean_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
36
|
+
"""清理和标准化内部交易数据
|
37
|
+
|
38
|
+
Args:
|
39
|
+
raw_df: Raw DataFrame from XueQiu API
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
Standardized DataFrame with consistent columns
|
43
|
+
"""
|
44
|
+
column_mapping = {
|
45
|
+
"股票代码": "symbol",
|
46
|
+
"股票名称": "issuer",
|
47
|
+
"变动人": "name",
|
48
|
+
"董监高职务": "title",
|
49
|
+
"变动日期": "transaction_date",
|
50
|
+
"变动股数": "transaction_shares",
|
51
|
+
"成交均价": "transaction_price_per_share",
|
52
|
+
"变动后持股数": "shares_owned_after_transaction",
|
53
|
+
"与董监高关系": "relationship",
|
54
|
+
}
|
55
|
+
|
56
|
+
df = raw_df.rename(columns=column_mapping)
|
57
|
+
|
58
|
+
# Add is_board_director column
|
59
|
+
df["is_board_director"] = df["title"].str.contains("董事")
|
60
|
+
|
61
|
+
# Calculate transaction_value
|
62
|
+
if (
|
63
|
+
"transaction_shares" in df.columns
|
64
|
+
and "transaction_price_per_share" in df.columns
|
65
|
+
):
|
66
|
+
df["transaction_value"] = (
|
67
|
+
df["transaction_shares"] * df["transaction_price_per_share"]
|
68
|
+
)
|
69
|
+
|
70
|
+
# Add shares_owned_before_transaction if possible
|
71
|
+
if (
|
72
|
+
"shares_owned_after_transaction" in df.columns
|
73
|
+
and "transaction_shares" in df.columns
|
74
|
+
):
|
75
|
+
df["shares_owned_before_transaction"] = (
|
76
|
+
df["shares_owned_after_transaction"] - df["transaction_shares"]
|
77
|
+
)
|
78
|
+
|
79
|
+
# Convert date format
|
80
|
+
if "transaction_date" in df.columns:
|
81
|
+
df["transaction_date"] = (
|
82
|
+
pd.to_datetime(df["transaction_date"])
|
83
|
+
.dt.tz_localize("Asia/Shanghai")
|
84
|
+
.dt.tz_convert("UTC")
|
85
|
+
)
|
86
|
+
|
87
|
+
if "filing_date" in df.columns:
|
88
|
+
df["filing_date"] = (
|
89
|
+
pd.to_datetime(df["filing_date"])
|
90
|
+
.dt.tz_localize("Asia/Shanghai")
|
91
|
+
.dt.tz_convert("UTC")
|
92
|
+
)
|
93
|
+
|
94
|
+
# Convert numeric columns
|
95
|
+
numeric_cols = [
|
96
|
+
"transaction_shares",
|
97
|
+
"transaction_price_per_share",
|
98
|
+
"transaction_value",
|
99
|
+
"shares_owned_before_transaction",
|
100
|
+
"shares_owned_after_transaction",
|
101
|
+
]
|
102
|
+
for col in numeric_cols:
|
103
|
+
if col in df.columns:
|
104
|
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
105
|
+
|
106
|
+
return df
|
akshare_one/financial.py
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
"""财务报表数据模块
|
2
|
+
|
3
|
+
包含资产负债表、利润表和现金流量表相关功能
|
4
|
+
"""
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
from .adapters import SinaAdapter
|
8
|
+
|
9
|
+
|
10
|
+
def get_balance_sheet(symbol: str, source: str = "sina") -> "pd.DataFrame":
|
11
|
+
"""获取资产负债表数据
|
12
|
+
|
13
|
+
Args:
|
14
|
+
symbol: 股票代码 (如 "600600")
|
15
|
+
source: 数据源 ("sina")
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
pd.DataFrame:
|
19
|
+
- report_date: 报告日期
|
20
|
+
- report_period: 报告期
|
21
|
+
- period: 期间
|
22
|
+
- currency: 币种
|
23
|
+
- total_assets: 资产总计
|
24
|
+
- current_assets: 流动资产合计
|
25
|
+
- cash_and_equivalents: 货币资金
|
26
|
+
- inventory: 存货
|
27
|
+
- current_investments: 交易性金融资产
|
28
|
+
- trade_and_non_trade_receivables: 应收票据及应收账款
|
29
|
+
- non_current_assets: 非流动资产合计
|
30
|
+
- property_plant_and_equipment: 固定资产
|
31
|
+
- goodwill_and_intangible_assets: 商誉
|
32
|
+
- investments: 长期股权投资
|
33
|
+
- non_current_investments: 其他非流动金融资产
|
34
|
+
- outstanding_shares: 实收资本(或股本)
|
35
|
+
- tax_assets: 递延所得税资产
|
36
|
+
- total_liabilities: 负债合计
|
37
|
+
- current_liabilities: 流动负债合计
|
38
|
+
- current_debt: 短期借款
|
39
|
+
- trade_and_non_trade_payables: 应付票据及应付账款
|
40
|
+
- deferred_revenue: 合同负债
|
41
|
+
- deposit_liabilities: 吸收存款及同业存放
|
42
|
+
- non_current_liabilities: 非流动负债合计
|
43
|
+
- non_current_debt: 长期借款
|
44
|
+
- tax_liabilities: 递延所得税负债
|
45
|
+
- shareholders_equity: 所有者权益(或股东权益)合计
|
46
|
+
- retained_earnings: 未分配利润
|
47
|
+
- accumulated_other_comprehensive_income: 其他综合收益
|
48
|
+
- total_debt: 总债务(短期借款+长期借款)
|
49
|
+
"""
|
50
|
+
if source == "sina":
|
51
|
+
return SinaAdapter().get_balance_sheet(symbol=symbol)
|
52
|
+
raise ValueError(f"Unsupported data source: {source}")
|
53
|
+
|
54
|
+
|
55
|
+
def get_income_statement(symbol: str, source: str = "sina") -> "pd.DataFrame":
|
56
|
+
"""获取利润表数据
|
57
|
+
|
58
|
+
Args:
|
59
|
+
symbol: 股票代码 (如 "600600")
|
60
|
+
source: 数据源 (目前支持 "sina")
|
61
|
+
|
62
|
+
Returns:
|
63
|
+
pd.DataFrame:
|
64
|
+
- report_date: 报告日期
|
65
|
+
- report_period: 报告期
|
66
|
+
- period: 期间
|
67
|
+
- currency: 币种
|
68
|
+
- revenue: 营业总收入
|
69
|
+
- cost_of_revenue: 营业成本
|
70
|
+
- gross_profit: 营业利润
|
71
|
+
- operating_expense: 管理费用
|
72
|
+
- selling_general_and_administrative_expenses: 销售费用
|
73
|
+
- research_and_development: 研发费用
|
74
|
+
- operating_income: 营业利润
|
75
|
+
- interest_expense: 利息支出
|
76
|
+
- ebit: 利润总额
|
77
|
+
- income_tax_expense: 所得税费用
|
78
|
+
- net_income: 净利润
|
79
|
+
- net_income_common_stock: 归属于母公司所有者的净利润
|
80
|
+
- net_income_non_controlling_interests: 少数股东损益
|
81
|
+
- earnings_per_share: 基本每股收益
|
82
|
+
- earnings_per_share_diluted: 稀释每股收益
|
83
|
+
"""
|
84
|
+
if source == "sina":
|
85
|
+
return SinaAdapter().get_income_statement(symbol=symbol)
|
86
|
+
raise ValueError(f"Unsupported data source: {source}")
|
87
|
+
|
88
|
+
|
89
|
+
def get_cash_flow(symbol: str, source: str = "sina") -> "pd.DataFrame":
|
90
|
+
"""获取现金流量表数据
|
91
|
+
|
92
|
+
Args:
|
93
|
+
symbol: 股票代码 (如 "600600")
|
94
|
+
source: 数据源 (目前支持 "sina")
|
95
|
+
|
96
|
+
Returns:
|
97
|
+
pd.DataFrame:
|
98
|
+
- report_date: 报告日期
|
99
|
+
- report_period: 报告期
|
100
|
+
- period: 期间
|
101
|
+
- currency: 币种
|
102
|
+
- net_income: 净利润
|
103
|
+
- depreciation_and_amortization: 固定资产折旧、油气资产折耗、生产性生物资产折旧
|
104
|
+
- share_based_compensation: 无形资产摊销
|
105
|
+
- net_cash_flow_from_operations: 经营活动产生的现金流量净额
|
106
|
+
- capital_expenditure: 购建固定资产、无形资产和其他长期资产支付的现金
|
107
|
+
- business_acquisitions_and_disposals: 取得子公司及其他营业单位支付的现金净额
|
108
|
+
- investment_acquisitions_and_disposals: 投资支付的现金
|
109
|
+
- net_cash_flow_from_investing: 投资活动产生的现金流量净额
|
110
|
+
- issuance_or_repayment_of_debt_securities: 取得借款收到的现金
|
111
|
+
- issuance_or_purchase_of_equity_shares: 吸收投资收到的现金
|
112
|
+
- dividends_and_other_cash_distributions: 分配股利、利润或偿付利息支付的现金
|
113
|
+
- net_cash_flow_from_financing: 筹资活动产生的现金流量净额
|
114
|
+
- change_in_cash_and_equivalents: 现金及现金等价物净增加额
|
115
|
+
- effect_of_exchange_rate_changes: 汇率变动对现金及现金等价物的影响
|
116
|
+
- ending_cash_balance: 期末现金及现金等价物余额
|
117
|
+
- free_cash_flow: 自由现金流
|
118
|
+
"""
|
119
|
+
if source == "sina":
|
120
|
+
return SinaAdapter().get_cash_flow(symbol=symbol)
|
121
|
+
raise ValueError(f"Unsupported data source: {source}")
|
akshare_one/insider.py
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
"""内部交易数据模块
|
2
|
+
|
3
|
+
包含上市公司内部交易相关功能
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Optional
|
7
|
+
import pandas as pd
|
8
|
+
from .adapters import XueQiuAdapter
|
9
|
+
|
10
|
+
|
11
|
+
def get_inner_trade_data(source: str = "xueqiu", symbol: Optional[str] = None) -> "pd.DataFrame":
|
12
|
+
"""获取雪球内部交易数据
|
13
|
+
|
14
|
+
Args:
|
15
|
+
source: 数据源 (目前支持 "xueqiu")
|
16
|
+
symbol: 可选股票代码,如"600000",不传则返回所有数据
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
pd.DataFrame:
|
20
|
+
- symbol: 股票代码
|
21
|
+
- issuer: 股票名称
|
22
|
+
- name: 变动人
|
23
|
+
- title: 董监高职务
|
24
|
+
- transaction_date: 变动日期(UTC时区)
|
25
|
+
- transaction_shares: 变动股数
|
26
|
+
- transaction_price_per_share: 成交均价
|
27
|
+
- shares_owned_after_transaction: 变动后持股数
|
28
|
+
- relationship: 与董监高关系
|
29
|
+
- is_board_director: 是否为董事会成员
|
30
|
+
- transaction_value: 交易金额(变动股数*成交均价)
|
31
|
+
- shares_owned_before_transaction: 变动前持股数
|
32
|
+
"""
|
33
|
+
if source == "xueqiu":
|
34
|
+
return XueQiuAdapter().get_inner_trade_data(symbol=symbol)
|
35
|
+
raise ValueError(f"Unsupported data source: {source}")
|
akshare_one/news.py
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
"""新闻数据模块
|
2
|
+
|
3
|
+
包含股票新闻相关功能
|
4
|
+
"""
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
from .adapters import EastMoneyAdapter
|
8
|
+
|
9
|
+
|
10
|
+
def get_news_data(symbol: str, source: str = "eastmoney") -> "pd.DataFrame":
|
11
|
+
"""获取个股新闻数据
|
12
|
+
|
13
|
+
Args:
|
14
|
+
symbol: 股票代码 (如 "300059")
|
15
|
+
source: 数据源 ('eastmoney')
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
pd.DataFrame:
|
19
|
+
- keyword: 关键词
|
20
|
+
- title: 新闻标题
|
21
|
+
- content: 新闻内容
|
22
|
+
- publish_time: 发布时间
|
23
|
+
- source: 文章来源
|
24
|
+
- url: 新闻链接
|
25
|
+
"""
|
26
|
+
if source == "eastmoney":
|
27
|
+
return EastMoneyAdapter().get_news_data(symbol=symbol)
|
28
|
+
raise ValueError(f"Unsupported data source: {source}")
|
akshare_one/stock.py
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
"""股票市场数据模块
|
2
|
+
|
3
|
+
包含股票历史数据和实时数据相关功能
|
4
|
+
"""
|
5
|
+
|
6
|
+
from typing import Optional
|
7
|
+
import pandas as pd
|
8
|
+
from .adapters import EastMoneyAdapter, SinaAdapter
|
9
|
+
|
10
|
+
|
11
|
+
def get_hist_data(
|
12
|
+
symbol: str,
|
13
|
+
interval: str,
|
14
|
+
interval_multiplier: int = 1,
|
15
|
+
start_date: str = "1970-01-01",
|
16
|
+
end_date: str = "2030-12-31",
|
17
|
+
adjust: str = "none",
|
18
|
+
source: str = "eastmoney",
|
19
|
+
) -> "pd.DataFrame":
|
20
|
+
"""Get historical market data
|
21
|
+
|
22
|
+
Args:
|
23
|
+
symbol: 股票代码 (e.g. '600000')
|
24
|
+
interval: 时间间隔 ('second','minute','hour','day','week','month','year')
|
25
|
+
interval_multiplier: 时间间隔倍数 (e.g. 5 for 5 minutes)
|
26
|
+
start_date: 开始日期 (YYYY-MM-DD)
|
27
|
+
end_date: 结束日期 (YYYY-MM-DD)
|
28
|
+
adjust: 复权类型 ('none','qfq','hfq')
|
29
|
+
source: 数据源 ('eastmoney', 'sina')
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
pd.DataFrame:
|
33
|
+
- timestamp: 时间戳(UTC时区)
|
34
|
+
- open: 开盘价
|
35
|
+
- high: 最高价
|
36
|
+
- low: 最低价
|
37
|
+
- close: 收盘价
|
38
|
+
- volume: 成交量
|
39
|
+
"""
|
40
|
+
if source == "eastmoney":
|
41
|
+
return EastMoneyAdapter().get_hist_data(
|
42
|
+
symbol=symbol,
|
43
|
+
interval=interval,
|
44
|
+
interval_multiplier=interval_multiplier,
|
45
|
+
start_date=start_date,
|
46
|
+
end_date=end_date,
|
47
|
+
adjust=adjust,
|
48
|
+
)
|
49
|
+
elif source == "sina":
|
50
|
+
return SinaAdapter().get_hist_data(
|
51
|
+
symbol=symbol,
|
52
|
+
interval=interval,
|
53
|
+
interval_multiplier=interval_multiplier,
|
54
|
+
start_date=start_date,
|
55
|
+
end_date=end_date,
|
56
|
+
adjust=adjust,
|
57
|
+
)
|
58
|
+
raise ValueError(f"Unsupported data source: {source}")
|
59
|
+
|
60
|
+
|
61
|
+
def get_realtime_data(
|
62
|
+
source: str = "eastmoney", symbol: Optional[str] = None
|
63
|
+
) -> "pd.DataFrame":
|
64
|
+
"""Get real-time market quotes
|
65
|
+
|
66
|
+
Args:
|
67
|
+
symbol: 股票代码 (如 "600000")
|
68
|
+
source: 数据源 ('eastmoney')
|
69
|
+
|
70
|
+
Returns:
|
71
|
+
pd.DataFrame:
|
72
|
+
- symbol: 股票代码
|
73
|
+
- price: 最新价
|
74
|
+
- change: 涨跌额
|
75
|
+
- pct_change: 涨跌幅(%)
|
76
|
+
- timestamp: 时间戳
|
77
|
+
- volume: 成交量(手)
|
78
|
+
- amount: 成交额(元)
|
79
|
+
- open: 今开
|
80
|
+
- high: 最高
|
81
|
+
- low: 最低
|
82
|
+
- prev_close: 昨收
|
83
|
+
"""
|
84
|
+
if source == "eastmoney":
|
85
|
+
return EastMoneyAdapter().get_realtime_data(symbol=symbol)
|
86
|
+
raise ValueError(f"Unsupported data source: {source}")
|
@@ -0,0 +1,61 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: akshare-one
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Add your description here
|
5
|
+
Requires-Python: >=3.12
|
6
|
+
Description-Content-Type: text/markdown
|
7
|
+
License-File: LICENSE
|
8
|
+
Requires-Dist: akshare>=1.16.64
|
9
|
+
Requires-Dist: cachetools>=5.5.2
|
10
|
+
Dynamic: license-file
|
11
|
+
|
12
|
+
# AKShare One
|
13
|
+
|
14
|
+
**AKShare One** 是一个标准化中国金融市场数据接口,基于 [AKShare](https://github.com/akfamily/akshare) 二次封装,解决AKShare多数据源输入输出不统一的问题。
|
15
|
+
|
16
|
+
## 项目背景
|
17
|
+
|
18
|
+
AKShare提供了丰富的中国金融市场数据,但不同数据源的:
|
19
|
+
- 股票代码格式不统一(如东方财富和新浪使用不同格式)
|
20
|
+
- 返回数据结构不一致
|
21
|
+
- 参数命名和用法有差异
|
22
|
+
|
23
|
+
AKShare One希望通过统一封装,提供:
|
24
|
+
- 标准化的股票代码格式
|
25
|
+
- 一致的数据返回结构
|
26
|
+
- 简化的API参数
|
27
|
+
|
28
|
+
## 核心功能
|
29
|
+
|
30
|
+
### 目前仅实现了以下功能:
|
31
|
+
- 历史数据 (`get_hist_data`)
|
32
|
+
- 实时行情 (`get_realtime_data`)
|
33
|
+
- 个股新闻 (`get_news_data`)
|
34
|
+
- 财务数据 (资产负债表/利润表/现金流量表)
|
35
|
+
- 内部交易 (`get_inner_trade_data`)
|
36
|
+
|
37
|
+
### 标准化处理
|
38
|
+
- 统一时间戳为UTC
|
39
|
+
- 自动处理复权数据
|
40
|
+
- 清理异常值和缺失数据
|
41
|
+
- 统一列名和数据类型
|
42
|
+
|
43
|
+
## 快速开始
|
44
|
+
|
45
|
+
使用示例:
|
46
|
+
```python
|
47
|
+
from akshare_one import get_hist_data, get_realtime_data
|
48
|
+
|
49
|
+
# 获取历史数据
|
50
|
+
df_hist = get_hist_data(
|
51
|
+
symbol="600000",
|
52
|
+
interval="day",
|
53
|
+
adjust="hfq"
|
54
|
+
)
|
55
|
+
|
56
|
+
# 获取实时数据
|
57
|
+
df_realtime = get_realtime_data(symbol="600000")
|
58
|
+
```
|
59
|
+
|
60
|
+
## API文档
|
61
|
+
详细API说明请参考 [docs/api.md](docs/api.md)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
akshare_one/__init__.py,sha256=xWUyTnh1Tv-2mEPxDmk5giUPwhrsoAuZcyoW6mAbX4k,923
|
2
|
+
akshare_one/financial.py,sha256=0vPm8LYzV-UIaPIk5GeEJuuCXYYXtWp5h1AYo-9ms38,5005
|
3
|
+
akshare_one/insider.py,sha256=hy1eM0Fa2J4uHU3vl-VDm4uB8zYTSUTbonnxOxbBuGc,1193
|
4
|
+
akshare_one/news.py,sha256=qUeX_1SnViDXBAb4Gcl28ow1qvMugEEPL75JgJhLbCA,707
|
5
|
+
akshare_one/stock.py,sha256=oGd432iZ6_hr63L4VG2iT36wOQ2q8RI8QUOflFY-OPg,2466
|
6
|
+
akshare_one/adapters/__init__.py,sha256=JrpGJ80drrU9Imc3L_gZ5Z90zyMd3PiJ5dG86VAEnmA,252
|
7
|
+
akshare_one/adapters/eastmoney.py,sha256=OP5zNbSRVEKfKX0ikC6onABuifxoWEAbBNYbk_GO3lU,11783
|
8
|
+
akshare_one/adapters/sina.py,sha256=jcpddoEJI7hEIRkxlZl7Ee1PAyDglhsOZGXFOgdnpGs,17412
|
9
|
+
akshare_one/adapters/xueqiu.py,sha256=bwRNTDnXrXJxp4cFG0Uj5LVLBvCxFyxKOHAJS1nnKaM,3624
|
10
|
+
akshare_one/adapters/cache/cache.py,sha256=GhADIdLuh_IFpPXrkhTLv-joTcuh_vC12kRdqT-cpvI,378
|
11
|
+
akshare_one-0.1.0.dist-info/licenses/LICENSE,sha256=Gg6A1GNSJCZWQ73aHJ7TXOa0i8RQ3FejZCTZ6Db07cU,1066
|
12
|
+
akshare_one-0.1.0.dist-info/METADATA,sha256=oHAPghYP-c2rNCZGcqxEYacIxV9pOwuK0eTpnk0l-oQ,1586
|
13
|
+
akshare_one-0.1.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
14
|
+
akshare_one-0.1.0.dist-info/top_level.txt,sha256=kNiucyLVAGa89wmUSpXbBLWD7pF_RuahuiaOfLHZSyw,12
|
15
|
+
akshare_one-0.1.0.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 zwldarren
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
akshare_one
|