akshare-one 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 zwldarren
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,61 @@
1
+ Metadata-Version: 2.4
2
+ Name: akshare-one
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+ Requires-Dist: akshare>=1.16.64
9
+ Requires-Dist: cachetools>=5.5.2
10
+ Dynamic: license-file
11
+
12
+ # AKShare One
13
+
14
+ **AKShare One** 是一个标准化中国金融市场数据接口,基于 [AKShare](https://github.com/akfamily/akshare) 二次封装,解决AKShare多数据源输入输出不统一的问题。
15
+
16
+ ## 项目背景
17
+
18
+ AKShare提供了丰富的中国金融市场数据,但不同数据源的:
19
+ - 股票代码格式不统一(如东方财富和新浪使用不同格式)
20
+ - 返回数据结构不一致
21
+ - 参数命名和用法有差异
22
+
23
+ AKShare One希望通过统一封装,提供:
24
+ - 标准化的股票代码格式
25
+ - 一致的数据返回结构
26
+ - 简化的API参数
27
+
28
+ ## 核心功能
29
+
30
+ ### 目前仅实现了以下功能:
31
+ - 历史数据 (`get_hist_data`)
32
+ - 实时行情 (`get_realtime_data`)
33
+ - 个股新闻 (`get_news_data`)
34
+ - 财务数据 (资产负债表/利润表/现金流量表)
35
+ - 内部交易 (`get_inner_trade_data`)
36
+
37
+ ### 标准化处理
38
+ - 统一时间戳为UTC
39
+ - 自动处理复权数据
40
+ - 清理异常值和缺失数据
41
+ - 统一列名和数据类型
42
+
43
+ ## 快速开始
44
+
45
+ 使用示例:
46
+ ```python
47
+ from akshare_one import get_hist_data, get_realtime_data
48
+
49
+ # 获取历史数据
50
+ df_hist = get_hist_data(
51
+ symbol="600000",
52
+ interval="day",
53
+ adjust="hfq"
54
+ )
55
+
56
+ # 获取实时数据
57
+ df_realtime = get_realtime_data(symbol="600000")
58
+ ```
59
+
60
+ ## API文档
61
+ 详细API说明请参考 [docs/api.md](docs/api.md)
@@ -0,0 +1,50 @@
1
+ # AKShare One
2
+
3
+ **AKShare One** 是一个标准化中国金融市场数据接口,基于 [AKShare](https://github.com/akfamily/akshare) 二次封装,解决AKShare多数据源输入输出不统一的问题。
4
+
5
+ ## 项目背景
6
+
7
+ AKShare提供了丰富的中国金融市场数据,但不同数据源的:
8
+ - 股票代码格式不统一(如东方财富和新浪使用不同格式)
9
+ - 返回数据结构不一致
10
+ - 参数命名和用法有差异
11
+
12
+ AKShare One希望通过统一封装,提供:
13
+ - 标准化的股票代码格式
14
+ - 一致的数据返回结构
15
+ - 简化的API参数
16
+
17
+ ## 核心功能
18
+
19
+ ### 目前仅实现了以下功能:
20
+ - 历史数据 (`get_hist_data`)
21
+ - 实时行情 (`get_realtime_data`)
22
+ - 个股新闻 (`get_news_data`)
23
+ - 财务数据 (资产负债表/利润表/现金流量表)
24
+ - 内部交易 (`get_inner_trade_data`)
25
+
26
+ ### 标准化处理
27
+ - 统一时间戳为UTC
28
+ - 自动处理复权数据
29
+ - 清理异常值和缺失数据
30
+ - 统一列名和数据类型
31
+
32
+ ## 快速开始
33
+
34
+ 使用示例:
35
+ ```python
36
+ from akshare_one import get_hist_data, get_realtime_data
37
+
38
+ # 获取历史数据
39
+ df_hist = get_hist_data(
40
+ symbol="600000",
41
+ interval="day",
42
+ adjust="hfq"
43
+ )
44
+
45
+ # 获取实时数据
46
+ df_realtime = get_realtime_data(symbol="600000")
47
+ ```
48
+
49
+ ## API文档
50
+ 详细API说明请参考 [docs/api.md](docs/api.md)
@@ -0,0 +1,32 @@
1
+ """Akshare One - Unified interface for Chinese market data
2
+
3
+ Provides standardized access to various financial data sources with:
4
+ - Consistent symbol formats
5
+ - Unified data schemas
6
+ - Cleaned and normalized outputs
7
+
8
+ Example:
9
+ >>> from akshare_one import get_hist_data, get_realtime_data
10
+ >>> df = get_hist_data("600000", interval="day")
11
+ >>> print(df.head())
12
+ >>> # 获取单只股票实时数据
13
+ >>> df = get_realtime_data(symbol="600000")
14
+ >>> # 获取所有股票实时数据
15
+ >>> df = get_realtime_data()
16
+ """
17
+
18
+ from .stock import get_hist_data, get_realtime_data
19
+ from .news import get_news_data
20
+ from .insider import get_inner_trade_data
21
+ from .financial import get_balance_sheet, get_income_statement, get_cash_flow
22
+
23
+
24
+ __all__ = [
25
+ "get_hist_data",
26
+ "get_realtime_data",
27
+ "get_news_data",
28
+ "get_inner_trade_data",
29
+ "get_balance_sheet",
30
+ "get_income_statement",
31
+ "get_cash_flow",
32
+ ]
@@ -0,0 +1,7 @@
1
+ # Initialize adapters package
2
+ from .eastmoney import EastMoneyAdapter
3
+ from .sina import SinaAdapter
4
+ from .xueqiu import XueQiuAdapter
5
+ from .cache.cache import CACHE_CONFIG
6
+
7
+ __all__ = ['EastMoneyAdapter', 'SinaAdapter', 'XueQiuAdapter', 'CACHE_CONFIG']
@@ -0,0 +1,9 @@
1
+ from cachetools import TTLCache
2
+
3
+ # 缓存配置
4
+ CACHE_CONFIG = {
5
+ 'hist_data_cache': TTLCache(maxsize=1000, ttl=3600), # 历史数据缓存1小时
6
+ 'realtime_cache': TTLCache(maxsize=500, ttl=60), # 实时数据缓存1分钟
7
+ 'news_cache': TTLCache(maxsize=500, ttl=3600),
8
+ 'financial_cache': TTLCache(maxsize=500, ttl=86400), # 财务数据缓存24小时
9
+ }
@@ -0,0 +1,344 @@
1
+ from typing import Optional
2
+ import pandas as pd
3
+ import akshare as ak
4
+ from cachetools import cached
5
+ from .cache.cache import CACHE_CONFIG
6
+
7
+
8
+ class EastMoneyAdapter:
9
+ """Adapter for EastMoney historical stock data API"""
10
+
11
+ @cached(
12
+ CACHE_CONFIG["hist_data_cache"],
13
+ key=lambda self,
14
+ symbol,
15
+ interval,
16
+ interval_multiplier,
17
+ start_date,
18
+ end_date,
19
+ adjust: ("eastmoney", symbol, interval, interval_multiplier, start_date, end_date, adjust),
20
+ )
21
+ def get_hist_data(
22
+ self,
23
+ symbol: str,
24
+ interval: str = "day",
25
+ interval_multiplier: int = 1,
26
+ start_date: str = "1970-01-01",
27
+ end_date: str = "2030-12-31",
28
+ adjust: str = "none",
29
+ ) -> pd.DataFrame:
30
+ """获取东方财富历史行情数据
31
+
32
+ Args:
33
+ symbol: Unified symbol format (e.g. '600000')
34
+ interval: Time granularity ('second','minute','hour','day','week','month','year')
35
+ interval_multiplier: Interval multiplier (e.g. 5 for 5 minutes)
36
+ start_date: Start date in YYYY-MM-DD format (will be converted to YYYYMMDD)
37
+ end_date: End date in YYYY-MM-DD format (will be converted to YYYYMMDD)
38
+ adjust: Adjustment type ('none','qfq','hfq')
39
+
40
+ Returns:
41
+ Standardized DataFrame with OHLCV data
42
+ """
43
+ # Map standard interval to akshare supported periods
44
+ interval = interval.lower()
45
+ if interval == "second":
46
+ raise ValueError("EastMoney does not support second-level data")
47
+ elif interval == "minute":
48
+ if interval_multiplier < 1:
49
+ raise ValueError("Minute interval multiplier must be >= 1")
50
+
51
+ start_date = (
52
+ f"{start_date} 09:30:00" if " " not in start_date else start_date
53
+ )
54
+ end_date = f"{end_date} 15:00:00" if " " not in end_date else end_date
55
+
56
+ raw_df = ak.stock_zh_a_hist_min_em(
57
+ symbol=symbol,
58
+ period="1",
59
+ start_date=start_date,
60
+ end_date=end_date,
61
+ adjust=adjust if adjust != "none" else "",
62
+ )
63
+ # Resample the data to the desired minute interval
64
+ raw_df["时间"] = pd.to_datetime(raw_df["时间"])
65
+ raw_df = raw_df.set_index("时间")
66
+ resampled = raw_df.resample(f"{interval_multiplier}min").agg(
67
+ {
68
+ "开盘": "first",
69
+ "最高": "max",
70
+ "最低": "min",
71
+ "收盘": "last",
72
+ "成交量": "sum",
73
+ "成交额": "sum",
74
+ }
75
+ )
76
+ raw_df = resampled.reset_index()
77
+ return self._clean_minute_data(raw_df, str(interval_multiplier))
78
+ elif interval == "hour":
79
+ if interval_multiplier < 1:
80
+ raise ValueError("Hour interval multiplier must be >= 1")
81
+
82
+ start_date = (
83
+ f"{start_date} 09:30:00" if " " not in start_date else start_date
84
+ )
85
+ end_date = f"{end_date} 15:00:00" if " " not in end_date else end_date
86
+
87
+ raw_df = ak.stock_zh_a_hist_min_em(
88
+ symbol=symbol,
89
+ period="60",
90
+ start_date=start_date,
91
+ end_date=end_date,
92
+ adjust=adjust if adjust != "none" else "",
93
+ )
94
+
95
+ # Resample the data to the desired hour interval
96
+ raw_df["时间"] = pd.to_datetime(raw_df["时间"])
97
+ raw_df = raw_df.set_index("时间")
98
+ resampled = raw_df.resample(f"{interval_multiplier}h").agg(
99
+ {
100
+ "开盘": "first",
101
+ "最高": "max",
102
+ "最低": "min",
103
+ "收盘": "last",
104
+ "成交量": "sum",
105
+ "成交额": "sum",
106
+ }
107
+ )
108
+ raw_df = resampled.reset_index()
109
+
110
+ return self._clean_minute_data(raw_df, f"{interval_multiplier}H")
111
+ elif interval == "day":
112
+ period = "daily"
113
+ elif interval == "week":
114
+ period = "weekly"
115
+ elif interval == "month":
116
+ period = "monthly"
117
+ elif interval == "year":
118
+ period = "monthly" # use monthly for yearly data
119
+ interval_multiplier = 12 * interval_multiplier
120
+ else:
121
+ raise ValueError(f"Unsupported interval: {interval}")
122
+
123
+ # Convert date format from YYYY-MM-DD to YYYYMMDD if needed
124
+ start_date = start_date.replace("-", "") if "-" in start_date else start_date
125
+ end_date = end_date.replace("-", "") if "-" in end_date else end_date
126
+
127
+ # Fetch raw data from akshare
128
+ raw_df = ak.stock_zh_a_hist(
129
+ symbol=symbol,
130
+ period=period, # daily/weekly/monthly
131
+ start_date=start_date,
132
+ end_date=end_date,
133
+ adjust=adjust if adjust != "none" else "",
134
+ )
135
+
136
+ if interval_multiplier > 1:
137
+ raw_df = self._resample_data(raw_df, interval, interval_multiplier)
138
+
139
+ # Standardize the data format
140
+ return self._clean_data(raw_df)
141
+
142
+ @cached(CACHE_CONFIG["realtime_cache"], key=lambda self, symbol=None: f"eastmoney_{symbol if symbol else 'all'}")
143
+ def get_realtime_data(self, symbol: Optional[str] = None) -> pd.DataFrame:
144
+ """获取沪深京A股实时行情数据"""
145
+ raw_df = ak.stock_zh_a_spot_em()
146
+ df = self._clean_spot_data(raw_df)
147
+ if symbol:
148
+ df = df[df["symbol"] == symbol].reset_index(drop=True)
149
+ return df
150
+
151
+ def _resample_data(
152
+ self, df: pd.DataFrame, interval: str, multiplier: int
153
+ ) -> pd.DataFrame:
154
+ """Resample the data based on the given interval and multiplier"""
155
+ if interval == "day":
156
+ freq = f"{multiplier}D"
157
+ elif interval == "week":
158
+ freq = f"{multiplier}W-MON"
159
+ elif interval == "month":
160
+ freq = f"{multiplier}MS"
161
+ elif interval == "year":
162
+ freq = f"{multiplier}AS-JAN"
163
+
164
+ df["日期"] = pd.to_datetime(df["日期"])
165
+ df = df.set_index("日期")
166
+ resampled = df.resample(freq).agg(
167
+ {
168
+ "开盘": "first",
169
+ "最高": "max",
170
+ "最低": "min",
171
+ "收盘": "last",
172
+ "成交量": "sum",
173
+ }
174
+ )
175
+ return resampled.reset_index()
176
+
177
+ def _clean_minute_data(self, raw_df: pd.DataFrame, period: str) -> pd.DataFrame:
178
+ if period == "1":
179
+ column_mapping = {
180
+ "时间": "timestamp",
181
+ "开盘": "open",
182
+ "收盘": "close",
183
+ "最高": "high",
184
+ "最低": "low",
185
+ "成交量": "volume",
186
+ "成交额": "amount",
187
+ "均价": "vwap",
188
+ }
189
+ else:
190
+ column_mapping = {
191
+ "时间": "timestamp",
192
+ "开盘": "open",
193
+ "收盘": "close",
194
+ "最高": "high",
195
+ "最低": "low",
196
+ "涨跌幅": "pct_change",
197
+ "涨跌额": "change",
198
+ "成交量": "volume",
199
+ "成交额": "amount",
200
+ "振幅": "amplitude",
201
+ "换手率": "turnover",
202
+ }
203
+
204
+ df = raw_df.rename(columns=column_mapping)
205
+
206
+ if "timestamp" in df.columns:
207
+ df["timestamp"] = (
208
+ pd.to_datetime(df["timestamp"])
209
+ .dt.tz_localize("Asia/Shanghai")
210
+ .dt.tz_convert("UTC")
211
+ )
212
+ standard_columns = [
213
+ "timestamp",
214
+ "open",
215
+ "high",
216
+ "low",
217
+ "close",
218
+ "volume",
219
+ ]
220
+ return df[[col for col in standard_columns if col in df.columns]]
221
+
222
+ def _clean_data(self, raw_df: pd.DataFrame, adjust: str = "none") -> pd.DataFrame:
223
+ """清理和标准化历史数据格式
224
+
225
+ Args:
226
+ raw_df: Raw DataFrame from EastMoney API
227
+ adjust: Adjustment type ('none','qfq','hfq')
228
+
229
+ Returns:
230
+ Standardized DataFrame with consistent columns
231
+ """
232
+ # Check if required columns exist in raw data
233
+ required_columns = {
234
+ "日期": "timestamp",
235
+ "开盘": "open",
236
+ "收盘": "close",
237
+ "最高": "high",
238
+ "最低": "low",
239
+ "成交量": "volume",
240
+ }
241
+
242
+ # Find available columns in raw data
243
+ available_columns = {}
244
+ for src_col, target_col in required_columns.items():
245
+ if src_col in raw_df.columns:
246
+ available_columns[src_col] = target_col
247
+
248
+ if not available_columns:
249
+ raise ValueError("Raw data does not contain any expected columns")
250
+
251
+ # Rename available columns
252
+ df = raw_df.rename(columns=available_columns)
253
+
254
+ # Process timestamp if available
255
+ if "timestamp" in df.columns:
256
+ df = df.assign(
257
+ timestamp=lambda x: pd.to_datetime(x["timestamp"])
258
+ .dt.tz_localize("Asia/Shanghai")
259
+ .dt.tz_convert("UTC")
260
+ )
261
+
262
+ # Process volume if available
263
+ if "volume" in df.columns:
264
+ df = df.assign(volume=lambda x: x["volume"].astype("int64"))
265
+
266
+ # Process adjustment flag
267
+ if adjust != "none":
268
+ df = df.assign(is_adjusted=lambda x: x["adjust"] != "none")
269
+ else:
270
+ df = df.assign(is_adjusted=False)
271
+
272
+ # Select available standardized columns
273
+ standard_columns = [
274
+ "timestamp",
275
+ "open",
276
+ "high",
277
+ "low",
278
+ "close",
279
+ "volume",
280
+ ]
281
+ return df[[col for col in standard_columns if col in df.columns]]
282
+
283
+ @cached(CACHE_CONFIG["news_cache"], key=lambda self, symbol: f"eastmoney_{symbol}")
284
+ def get_news_data(self, symbol: str) -> pd.DataFrame:
285
+ """获取东方财富个股新闻数据"""
286
+ raw_df = ak.stock_news_em(symbol=symbol)
287
+
288
+ column_mapping = {
289
+ "关键词": "keyword",
290
+ "新闻标题": "title",
291
+ "新闻内容": "content",
292
+ "发布时间": "publish_time",
293
+ "文章来源": "source",
294
+ "新闻链接": "url",
295
+ }
296
+
297
+ df = raw_df.rename(columns=column_mapping)
298
+
299
+ if "publish_time" in df.columns:
300
+ df = df.assign(
301
+ publish_time=lambda x: pd.to_datetime(x["publish_time"])
302
+ .dt.tz_localize("Asia/Shanghai")
303
+ .dt.tz_convert("UTC")
304
+ )
305
+
306
+ return df
307
+
308
+ def _clean_spot_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
309
+ """清理和标准化实时行情数据"""
310
+
311
+ column_mapping = {
312
+ "代码": "symbol",
313
+ "最新价": "price",
314
+ "涨跌额": "change",
315
+ "涨跌幅": "pct_change",
316
+ "成交量": "volume",
317
+ "成交额": "amount",
318
+ "今开": "open",
319
+ "最高": "high",
320
+ "最低": "low",
321
+ "昨收": "prev_close",
322
+ }
323
+
324
+ df = raw_df.rename(columns=column_mapping)
325
+
326
+ # Change time to UTC
327
+ df = df.assign(
328
+ timestamp=lambda x: pd.Timestamp.now(tz="Asia/Shanghai").tz_convert("UTC")
329
+ )
330
+
331
+ required_columns = [
332
+ "symbol",
333
+ "price",
334
+ "change",
335
+ "pct_change",
336
+ "timestamp",
337
+ "volume",
338
+ "amount",
339
+ "open",
340
+ "high",
341
+ "low",
342
+ "prev_close",
343
+ ]
344
+ return df[required_columns]